googleapis · ankiaga · Feb 1, 2024 · Feb 5, 2024 · Feb 7, 2024 · Feb 9, 2024
@@ -16,6 +16,8 @@
 
 from enum import Enum
 from typing import TYPE_CHECKING, List
+
+from google.cloud.spanner_dbapi import parse_utils
 from google.cloud.spanner_dbapi.parsed_statement import (
     ParsedStatement,
     StatementType,
@@ -28,6 +30,48 @@
 
 if TYPE_CHECKING:
     from google.cloud.spanner_dbapi.cursor import Cursor
+    from google.cloud.spanner_dbapi.connection import Connection
+
+
+class BatchDdlExecutor:
+    """Executor that is used when a DDL batch is started. These batches only
+    accept DDL statements. All DDL statements are buffered locally and sent to
+    Spanner when runBatch() is called.
+
+    :type "Connection": :class:`~google.cloud.spanner_dbapi.connection.Connection`
+    :param connection:
+    """
+
+    def __init__(self, connection: "Connection"):
+        self._connection = connection
+        self._statements: List[str] = []
+
+    def execute_statement(self, parsed_statement: ParsedStatement):
+        """Executes the statement when ddl batch is active by buffering the
+        statement in-memory.
+
+        :type parsed_statement: ParsedStatement
+        :param parsed_statement: parsed statement containing sql query
+        """
+        from google.cloud.spanner_dbapi import ProgrammingError
+
+        if parsed_statement.statement_type != StatementType.DDL:
+            raise ProgrammingError("Only DDL statements are allowed in batch DDL mode.")
+        self._statements.extend(
+            parse_utils.parse_and_get_ddl_statements(parsed_statement.statement.sql)
+        )
+
+    def run_batch(self):
+        """Executes all the buffered statements on the active ddl batch by
+        making a call to Spanner.
+        """
+        from google.cloud.spanner_dbapi import ProgrammingError
+
+        if self._connection._client_transaction_started:
+            raise ProgrammingError(
+                "Cannot execute DDL statement when transaction is already active."
+            )
+        return self._connection.database.update_ddl(self._statements).result()
 
 
 class BatchDmlExecutor:
@@ -52,6 +96,7 @@ def execute_statement(self, parsed_statement: ParsedStatement):
         :param parsed_statement: parsed statement containing sql query and query
          params
         """
+
         from google.cloud.spanner_dbapi import ProgrammingError
 
         if (
@@ -61,7 +106,7 @@ def execute_statement(self, parsed_statement: ParsedStatement):
             raise ProgrammingError("Only DML statements are allowed in batch DML mode.")
         self._statements.append(parsed_statement.statement)
 
-    def run_batch_dml(self):
+    def run_batch(self):
         """Executes all the buffered statements on the active dml batch by
         making a call to Spanner.
         """

@@ -85,6 +85,9 @@ def execute(cursor: "Cursor", parsed_statement: ParsedStatement):
             TypeCode.TIMESTAMP,
             column_values,
         )
+    if statement_type == ClientSideStatementType.START_BATCH_DDL:
+        connection.start_batch_ddl()
+        return None
     if statement_type == ClientSideStatementType.START_BATCH_DML:
         connection.start_batch_dml(cursor)
         return None

@@ -30,6 +30,7 @@
 RE_SHOW_READ_TIMESTAMP = re.compile(
     r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)", re.IGNORECASE
 )
+RE_START_BATCH_DDL = re.compile(r"^\s*(START)\s+(BATCH)\s+(DDL)", re.IGNORECASE)
 RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)", re.IGNORECASE)
 RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)", re.IGNORECASE)
 RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)", re.IGNORECASE)
@@ -62,6 +63,8 @@ def parse_stmt(query):
         client_side_statement_type = ClientSideStatementType.SHOW_COMMIT_TIMESTAMP
     elif RE_SHOW_READ_TIMESTAMP.match(query):
         client_side_statement_type = ClientSideStatementType.SHOW_READ_TIMESTAMP
+    elif RE_START_BATCH_DDL.match(query):
+        client_side_statement_type = ClientSideStatementType.START_BATCH_DDL
     elif RE_START_BATCH_DML.match(query):
         client_side_statement_type = ClientSideStatementType.START_BATCH_DML
     elif RE_BEGIN.match(query):

@@ -19,7 +19,11 @@
 from google.api_core.gapic_v1.client_info import ClientInfo
 from google.cloud import spanner_v1 as spanner
 from google.cloud.spanner_dbapi import partition_helper
-from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode, BatchDmlExecutor
+from google.cloud.spanner_dbapi.batch_executor import (
+    BatchMode,
+    BatchDmlExecutor,
+    BatchDdlExecutor,
+)
 from google.cloud.spanner_dbapi.parse_utils import _get_statement_type
 from google.cloud.spanner_dbapi.parsed_statement import (
     StatementType,
@@ -91,7 +95,9 @@ class Connection:
         should end a that a new one should be started when the next statement is executed.
     """
 
-    def __init__(self, instance, database=None, read_only=False):
+    def __init__(
+        self, instance, database=None, read_only=False, buffer_ddl_statements=False
+    ):
         self._instance = instance
         self._database = database
         self._ddl_statements = []
@@ -114,8 +120,10 @@ def __init__(self, instance, database=None, read_only=False):
         # made atleast one call to Spanner.
         self._spanner_transaction_started = False
         self._batch_mode = BatchMode.NONE
+        self._batch_ddl_executor: BatchDdlExecutor = None
         self._batch_dml_executor: BatchDmlExecutor = None
         self._transaction_helper = TransactionRetryHelper(self)
+        self._buffer_ddl_statements = buffer_ddl_statements
 
     @property
     def autocommit(self):
@@ -126,6 +134,15 @@ def autocommit(self):
         """
         return self._autocommit
 
+    @property
+    def buffer_ddl_statements(self):
+        """Whether to buffer ddl statements for this connection.
+
+        :rtype: bool
+        :returns: _buffer_ddl_statements flag value.
+        """
+        return self._buffer_ddl_statements
+
     @autocommit.setter
     def autocommit(self, value):
         """Change this connection autocommit mode. Setting this value to True
@@ -365,7 +382,8 @@ def commit(self):
             )
             return
 
-        self.run_prior_DDL_statements()
+        if self.buffer_ddl_statements:
+            self.run_prior_DDL_statements()
         try:
             if self._spanner_transaction_started and not self._read_only:
                 self._transaction.commit()
@@ -463,6 +481,31 @@ def validate(self):
                     "Expected: [[1]]" % result
                 )
 
+    @check_not_closed
+    def start_batch_ddl(self):
+        if self._batch_mode is not BatchMode.NONE:
+            raise ProgrammingError(
+                "Cannot start a DDL batch when a batch is already active"
+            )
+        if self.read_only:
+            raise ProgrammingError(
+                "Cannot start a DDL batch when the connection is in read-only mode"
+            )
+        if self.buffer_ddl_statements:
+            raise ProgrammingError(
+                "Cannot start a DDL batch when _buffer_ddl_statements flag is True"
+            )
+        self._batch_mode = BatchMode.DDL
+        self._batch_ddl_executor = BatchDdlExecutor(self)
+
+    @check_not_closed
+    def execute_batch_ddl_statement(self, parsed_statement: ParsedStatement):
+        if self._batch_mode is not BatchMode.DDL:
+            raise ProgrammingError(
+                "Cannot execute statement when the BatchMode is not DDL"
+            )
+        self._batch_ddl_executor.execute_statement(parsed_statement)
+
     @check_not_closed
     def start_batch_dml(self, cursor):
         if self._batch_mode is not BatchMode.NONE:
@@ -486,22 +529,28 @@ def execute_batch_dml_statement(self, parsed_statement: ParsedStatement):
 
     @check_not_closed
     def run_batch(self):
+        result_set = None
         if self._batch_mode is BatchMode.NONE:
             raise ProgrammingError("Cannot run a batch when the BatchMode is not set")
         try:
             if self._batch_mode is BatchMode.DML:
-                many_result_set = self._batch_dml_executor.run_batch_dml()
+                result_set = self._batch_dml_executor.run_batch()
+            elif self._batch_mode is BatchMode.DDL:
+                self._batch_ddl_executor.run_batch()
         finally:
             self._batch_mode = BatchMode.NONE
             self._batch_dml_executor = None
-        return many_result_set
+            self._batch_ddl_executor = None
+        return result_set
 
     @check_not_closed
     def abort_batch(self):
         if self._batch_mode is BatchMode.NONE:
             raise ProgrammingError("Cannot abort a batch when the BatchMode is not set")
         if self._batch_mode is BatchMode.DML:
             self._batch_dml_executor = None
+        if self._batch_mode is BatchMode.DDL:
+            self._batch_ddl_executor = None
         self._batch_mode = BatchMode.NONE
 
     @check_not_closed
@@ -584,10 +633,14 @@ def connect(
     pool=None,
     user_agent=None,
     client=None,
+    buffer_ddl_statements=False,
     route_to_leader_enabled=True,
 ):
     """Creates a connection to a Google Cloud Spanner database.
 
+    :type buffer_ddl_statements: bool
+    :param buffer_ddl_statements:
+
     :type instance_id: str
     :param instance_id: The ID of the instance to connect to.
 
@@ -658,7 +711,9 @@ def connect(
 
     instance = client.instance(instance_id)
     conn = Connection(
-        instance, instance.database(database_id, pool=pool) if database_id else None
+        instance,
+        instance.database(database_id, pool=pool) if database_id else None,
+        buffer_ddl_statements=buffer_ddl_statements,
     )
     if pool is not None:
         conn._own_pool = False

@@ -15,8 +15,6 @@
 """Database cursor for Google Cloud Spanner DB API."""
 from collections import namedtuple
 
-import sqlparse
-
 from google.api_core.exceptions import Aborted
 from google.api_core.exceptions import AlreadyExists
 from google.api_core.exceptions import FailedPrecondition
@@ -25,7 +23,7 @@
 from google.api_core.exceptions import OutOfRange
 
 from google.cloud import spanner_v1 as spanner
-from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode
+from google.cloud.spanner_dbapi.batch_executor import BatchMode
 from google.cloud.spanner_dbapi.exceptions import IntegrityError
 from google.cloud.spanner_dbapi.exceptions import InterfaceError
 from google.cloud.spanner_dbapi.exceptions import OperationalError
@@ -34,7 +32,7 @@
 from google.cloud.spanner_dbapi import (
     _helpers,
     client_side_statement_executor,
-    batch_dml_executor,
+    batch_executor,
 )
 from google.cloud.spanner_dbapi._helpers import ColumnInfo
 from google.cloud.spanner_dbapi._helpers import CODE_TO_DISPLAY_SIZE
@@ -210,18 +208,8 @@ def _batch_DDLs(self, sql):
         :raises: :class:`ValueError` in case not a DDL statement
                  present in the operation.
         """
-        statements = []
-        for ddl in sqlparse.split(sql):
-            if ddl:
-                ddl = ddl.rstrip(";")
-                if (
-                    parse_utils.classify_statement(ddl).statement_type
-                    != StatementType.DDL
-                ):
-                    raise ValueError("Only DDL statements may be batched.")
-
-                statements.append(ddl)
 
+        statements = parse_utils.parse_and_get_ddl_statements(sql)
         # Only queue DDL statements if they are all correctly classified.
         self.connection._ddl_statements.extend(statements)
 
@@ -261,6 +249,8 @@ def _execute(self, sql, args=None, call_from_execute_many=False):
                         self._itr = self._result_set
                     else:
                         self._itr = PeekIterator(self._result_set)
+            elif self.connection._batch_mode == BatchMode.DDL:
+                self.connection.execute_batch_ddl_statement(self._parsed_statement)
             elif self.connection._batch_mode == BatchMode.DML:
                 self.connection.execute_batch_dml_statement(self._parsed_statement)
             elif self.connection.read_only or (
@@ -269,9 +259,18 @@ def _execute(self, sql, args=None, call_from_execute_many=False):
             ):
                 self._handle_DQL(sql, args or None)
             elif self._parsed_statement.statement_type == StatementType.DDL:
-                self._batch_DDLs(sql)
-                if not self.connection._client_transaction_started:
-                    self.connection.run_prior_DDL_statements()
+                if not self.connection.buffer_ddl_statements:
+                    if not self.connection._client_transaction_started:
+                        self._batch_DDLs(sql)
+                        self.connection.run_prior_DDL_statements()
+                    else:
+                        raise ProgrammingError(
+                            "Cannot execute DDL statement when transaction is already active"
+                        )
+                else:
+                    self._batch_DDLs(sql)
+                    if not self.connection._client_transaction_started:
+                        self.connection.run_prior_DDL_statements()
             else:
                 self._execute_in_rw_transaction()
 
@@ -296,9 +295,8 @@ def _execute(self, sql, args=None, call_from_execute_many=False):
                 self.connection._spanner_transaction_started = False
 
     def _execute_in_rw_transaction(self):
-        # For every other operation, we've got to ensure that
-        # any prior DDL statements were run.
-        self.connection.run_prior_DDL_statements()
+        if self.connection.buffer_ddl_statements:
+            self.connection.run_prior_DDL_statements()
         statement = self._parsed_statement.statement
         if self.connection._client_transaction_started:
             while True:
@@ -347,9 +345,8 @@ def executemany(self, operation, seq_of_params):
                     + ", with executemany() method is not allowed."
                 )
 
-            # For every operation, we've got to ensure that any prior DDL
-            # statements were run.
-            self.connection.run_prior_DDL_statements()
+            if self.connection.buffer_ddl_statements:
+                self.connection.run_prior_DDL_statements()
             if self._parsed_statement.statement_type in (
                 StatementType.INSERT,
                 StatementType.UPDATE,
@@ -360,7 +357,7 @@ def executemany(self, operation, seq_of_params):
                         operation, params
                     )
                     statements.append(Statement(sql, params, get_param_types(params)))
-                many_result_set = batch_dml_executor.run_batch_dml(self, statements)
+                many_result_set = batch_executor.run_batch_dml(self, statements)
             else:
                 many_result_set = StreamedManyResultSets()
                 for params in seq_of_params:
@@ -523,7 +520,8 @@ def run_sql_in_snapshot(self, sql, params=None, param_types=None):
         # hence this method exists to circumvent that limit.
         if self.connection.database is None:
             raise ValueError("Database needs to be passed for this operation")
-        self.connection.run_prior_DDL_statements()
+        if self.connection.buffer_ddl_statements:
+            self.connection.run_prior_DDL_statements()
 
         with self.connection.database.snapshot() as snapshot:
             return list(snapshot.execute_sql(sql, params, param_types))

@@ -205,6 +205,17 @@ def classify_stmt(query):
     return STMT_UPDATING
 
 
+def parse_and_get_ddl_statements(sql):
+    statements = []
+    for ddl in sqlparse.split(sql):
+        if ddl:
+            ddl = ddl.rstrip(";")
+            if classify_statement(ddl).statement_type != StatementType.DDL:
+                raise ValueError("Only DDL statements may be batched.")
+            statements.append(ddl)
+    return statements
+
+
 def classify_statement(query, args=None):
     """Determine SQL query type.
 

@@ -36,6 +36,7 @@ class ClientSideStatementType(Enum):
     PARTITION_QUERY = 9
     RUN_PARTITION = 10
     RUN_PARTITIONED_QUERY = 11
+    START_BATCH_DDL = 12
 
 
 @dataclass