diff --git a/docs/dyn/documentai_v1.projects.locations.processors.html b/docs/dyn/documentai_v1.projects.locations.processors.html index a6a12f97a7..3cbcb2b702 100644 --- a/docs/dyn/documentai_v1.projects.locations.processors.html +++ b/docs/dyn/documentai_v1.projects.locations.processors.html @@ -153,7 +153,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -164,7 +164,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -176,7 +176,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, }, @@ -1334,7 +1334,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -1345,7 +1345,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -1357,7 +1357,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, }, diff --git a/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html b/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html index b3e29e57c3..def56143d8 100644 --- a/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html +++ b/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html @@ -148,7 +148,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -159,7 +159,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -171,7 +171,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, }, @@ -1406,7 +1406,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -1417,7 +1417,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -1429,7 +1429,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, }, diff --git a/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html b/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html index c39a6473f5..683771b517 100644 --- a/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html +++ b/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html @@ -89,6 +89,12 @@

Instance Methods

importDocuments(dataset, body=None, x__xgafv=None)

Import documents into a dataset.

+

+ listDocuments(dataset, body=None, x__xgafv=None)

+

Returns a list of documents present in the dataset.

+

+ listDocuments_next()

+

Retrieves the next page of results.

updateDatasetSchema(name, body=None, updateMask=None, x__xgafv=None)

Updates a `DatasetSchema`.

@@ -1163,6 +1169,73 @@

Method Details

} +
+ listDocuments(dataset, body=None, x__xgafv=None) +
Returns a list of documents present in the dataset.
+
+Args:
+  dataset: string, Required. The resource name of the dataset to be listed. Format: projects/{project}/locations/{location}/processors/{processor}/dataset (required)
+  body: object, The request body.
+    The object takes the form of:
+
+{
+  "filter": "A String", # Optional. Query to filter the documents based on https://google.aip.dev/160. ## Currently support query strings are: `SplitType=DATASET_SPLIT_TEST|DATASET_SPLIT_TRAIN|DATASET_SPLIT_UNASSIGNED` - `LabelingState=DOCUMENT_LABELED|DOCUMENT_UNLABELED|DOCUMENT_AUTO_LABELED` - `DisplayName=\"file_name.pdf\"` - `EntityType=abc/def` - `TagName=\"auto-labeling-running\"|\"sampled\"` Note: - Only `AND`, `=` and `!=` are supported. e.g. `DisplayName=file_name AND EntityType!=abc` IS supported. - Wildcard `*` is supported only in `DisplayName` filter - No duplicate filter keys are allowed, e.g. `EntityType=a AND EntityType=b` is NOT supported. - String match is case sensitive (for filter `DisplayName` & `EntityType`).
+  "pageSize": 42, # The maximum number of documents to return. The service may return fewer than this value. If unspecified, at most 20 documents will be returned. The maximum value is 100; values above 100 will be coerced to 100.
+  "pageToken": "A String", # A page token, received from a previous `ListDocuments` call. Provide this to retrieve the subsequent page. When paginating, all other parameters provided to `ListDocuments` must match the call that provided the page token.
+  "returnTotalSize": True or False, # Optional. Controls if the ListDocuments request requires a total size of matched documents. See ListDocumentsResponse.total_size. Enabling this flag may adversely impact performance. Defaults to false.
+  "skip": 42, # Optional. Number of results to skip beginning from the `page_token` if provided. https://google.aip.dev/158#skipping-results. It must be a non-negative integer. Negative values wil be rejected. Note that this is not the number of pages to skip. If this value causes the cursor to move past the end of results, `ListDocumentsResponse.document_metadata` and `ListDocumentsResponse.next_page_token` will be empty.
+}
+
+  x__xgafv: string, V1 error format.
+    Allowed values
+      1 - v1 error format
+      2 - v2 error format
+
+Returns:
+  An object of the form:
+
+    {
+  "documentMetadata": [ # Document metadata corresponding to the listed documents.
+    { # Metadata about a document.
+      "datasetType": "A String", # Type of the dataset split to which the document belongs.
+      "displayName": "A String", # The display name of the document.
+      "documentId": { # Document Identifier. # Document identifier.
+        "gcsManagedDocId": { # Identifies a document uniquely within the scope of a dataset in the user-managed Cloud Storage option. # A document id within user-managed Cloud Storage.
+          "cwDocId": "A String", # Id of the document (indexed) managed by Content Warehouse.
+          "gcsUri": "A String", # Required. The Cloud Storage URI where the actual document is stored.
+        },
+        "revisionRef": { # The revision reference specifies which revision on the document to read. # Points to a specific revision of the document if set.
+          "latestProcessorVersion": "A String", # Reads the revision generated by the processor version. The format takes the full resource name of processor version. `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
+          "revisionCase": "A String", # Reads the revision by the predefined case.
+          "revisionId": "A String", # Reads the revision given by the id.
+        },
+        "unmanagedDocId": { # Identifies a document uniquely within the scope of a dataset in unmanaged option. # A document id within unmanaged dataset.
+          "docId": "A String", # Required. The id of the document.
+        },
+      },
+      "labelingState": "A String", # Labelling state of the document.
+      "pageCount": 42, # Number of pages in the document.
+    },
+  ],
+  "nextPageToken": "A String", # A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, there are no subsequent pages.
+  "totalSize": 42, # Total count of documents queried.
+}
+
+ +
+ listDocuments_next() +
Retrieves the next page of results.
+
+        Args:
+          previous_request: The request for the previous page. (required)
+          previous_response: The response from the request for the previous page. (required)
+
+        Returns:
+          A request object that you can call 'execute()' on to request the next
+          page. Returns None if there are no more items in the collection.
+        
+
+
updateDatasetSchema(name, body=None, updateMask=None, x__xgafv=None)
Updates a `DatasetSchema`.
diff --git a/docs/dyn/documentai_v1beta3.projects.locations.processors.html b/docs/dyn/documentai_v1beta3.projects.locations.processors.html
index e8320a83f2..340daaff4b 100644
--- a/docs/dyn/documentai_v1beta3.projects.locations.processors.html
+++ b/docs/dyn/documentai_v1beta3.projects.locations.processors.html
@@ -170,7 +170,7 @@ 

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -181,7 +181,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -193,7 +193,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, "schemaOverride": { # The schema defines the output of the processed document by a processor. # Optional. Override the schema of the ProcessorVersion. Will return an Invalid Argument error if this field is set when the underlying ProcessorVersion doesn't support schema override. @@ -2240,7 +2240,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -2251,7 +2251,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -2263,7 +2263,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, "schemaOverride": { # The schema defines the output of the processed document by a processor. # Optional. Override the schema of the ProcessorVersion. Will return an Invalid Argument error if this field is set when the underlying ProcessorVersion doesn't support schema override. @@ -3233,7 +3233,7 @@

Method Details

body: object, The request body. The object takes the form of: -{ # A singleton resource under a Processor which configures a collection of documents. Next Id: 8. +{ # A singleton resource under a Processor which configures a collection of documents. "documentWarehouseConfig": { # Configuration specific to the Document AI Warehouse-based implementation. # Optional. Document AI Warehouse-based dataset configuration. "collection": "A String", # Output only. The collection in Document AI Warehouse associated with the dataset. "schema": "A String", # Output only. The schema in Document AI Warehouse associated with the dataset. diff --git a/docs/dyn/documentai_v1beta3.projects.locations.processors.processorVersions.html b/docs/dyn/documentai_v1beta3.projects.locations.processors.processorVersions.html index 3c5f0f6b4f..f1488d5cf3 100644 --- a/docs/dyn/documentai_v1beta3.projects.locations.processors.processorVersions.html +++ b/docs/dyn/documentai_v1beta3.projects.locations.processors.processorVersions.html @@ -160,7 +160,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -171,7 +171,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -183,7 +183,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, "schemaOverride": { # The schema defines the output of the processed document by a processor. # Optional. Override the schema of the ProcessorVersion. Will return an Invalid Argument error if this field is set when the underlying ProcessorVersion doesn't support schema override. @@ -2377,7 +2377,7 @@

Method Details

}, "processOptions": { # Options for Process API # Inference-time options for the process API "fromEnd": 42, # Only process certain pages from the end, same as above. - "fromStart": 42, # Only process certain pages from the start, process all if the document has less pages. + "fromStart": 42, # Only process certain pages from the start. Process all if the document has fewer pages. "individualPageSelector": { # A list of individual page numbers. # Which pages to process (1-indexed). "pages": [ # Optional. Indices of the pages (starting from 1). 42, @@ -2388,7 +2388,7 @@

Method Details

"A String", ], "computeStyleInfo": True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead. - "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors. + "disableCharacterBoxesDetection": True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors. "enableImageQualityScores": True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call. "enableNativePdfParsing": True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs. "enableSymbol": True or False, # Includes symbol level OCR information if set to true. @@ -2400,7 +2400,7 @@

Method Details

"premiumFeatures": { # Configurations for premium OCR features. # Configurations for premium OCR features. "computeStyleInfo": True or False, # Turn on font identification model and return font style information. "enableMathOcr": True or False, # Turn on the model that can extract LaTeX math formulas. - "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors. + "enableSelectionMarkDetection": True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors. }, }, "schemaOverride": { # The schema defines the output of the processed document by a processor. # Optional. Override the schema of the ProcessorVersion. Will return an Invalid Argument error if this field is set when the underlying ProcessorVersion doesn't support schema override. diff --git a/googleapiclient/discovery_cache/documents/documentai.v1.json b/googleapiclient/discovery_cache/documents/documentai.v1.json index 75c7aeaf4b..2b0590d42c 100644 --- a/googleapiclient/discovery_cache/documents/documentai.v1.json +++ b/googleapiclient/discovery_cache/documents/documentai.v1.json @@ -1042,7 +1042,7 @@ } } }, - "revision": "20230914", + "revision": "20230923", "rootUrl": "https://documentai.googleapis.com/", "schemas": { "GoogleCloudDocumentaiUiv1beta3AutoLabelDocumentsMetadata": { @@ -4034,7 +4034,7 @@ "type": "boolean" }, "disableCharacterBoxesDetection": { - "description": "Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.", + "description": "Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.", "type": "boolean" }, "enableImageQualityScores": { @@ -4087,7 +4087,7 @@ "type": "boolean" }, "enableSelectionMarkDetection": { - "description": "Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.", + "description": "Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.", "type": "boolean" } }, @@ -4103,7 +4103,7 @@ "type": "integer" }, "fromStart": { - "description": "Only process certain pages from the start, process all if the document has less pages.", + "description": "Only process certain pages from the start. Process all if the document has fewer pages.", "format": "int32", "type": "integer" }, @@ -7670,7 +7670,7 @@ "type": "object" }, "GoogleCloudDocumentaiV1beta3Dataset": { - "description": "A singleton resource under a Processor which configures a collection of documents. Next Id: 8.", + "description": "A singleton resource under a Processor which configures a collection of documents.", "id": "GoogleCloudDocumentaiV1beta3Dataset", "properties": { "documentWarehouseConfig": { diff --git a/googleapiclient/discovery_cache/documents/documentai.v1beta2.json b/googleapiclient/discovery_cache/documents/documentai.v1beta2.json index e93f8d0c0a..69f80f2faa 100644 --- a/googleapiclient/discovery_cache/documents/documentai.v1beta2.json +++ b/googleapiclient/discovery_cache/documents/documentai.v1beta2.json @@ -292,7 +292,7 @@ } } }, - "revision": "20230914", + "revision": "20230923", "rootUrl": "https://documentai.googleapis.com/", "schemas": { "GoogleCloudDocumentaiUiv1beta3AutoLabelDocumentsMetadata": { @@ -4722,7 +4722,7 @@ "type": "object" }, "GoogleCloudDocumentaiV1beta3Dataset": { - "description": "A singleton resource under a Processor which configures a collection of documents. Next Id: 8.", + "description": "A singleton resource under a Processor which configures a collection of documents.", "id": "GoogleCloudDocumentaiV1beta3Dataset", "properties": { "documentWarehouseConfig": { diff --git a/googleapiclient/discovery_cache/documents/documentai.v1beta3.json b/googleapiclient/discovery_cache/documents/documentai.v1beta3.json index 0437fc8510..d6c3c8ec0a 100644 --- a/googleapiclient/discovery_cache/documents/documentai.v1beta3.json +++ b/googleapiclient/discovery_cache/documents/documentai.v1beta3.json @@ -828,6 +828,34 @@ "https://www.googleapis.com/auth/cloud-platform" ] }, + "listDocuments": { + "description": "Returns a list of documents present in the dataset.", + "flatPath": "v1beta3/projects/{projectsId}/locations/{locationsId}/processors/{processorsId}/dataset:listDocuments", + "httpMethod": "POST", + "id": "documentai.projects.locations.processors.dataset.listDocuments", + "parameterOrder": [ + "dataset" + ], + "parameters": { + "dataset": { + "description": "Required. The resource name of the dataset to be listed. Format: projects/{project}/locations/{location}/processors/{processor}/dataset", + "location": "path", + "pattern": "^projects/[^/]+/locations/[^/]+/processors/[^/]+/dataset$", + "required": true, + "type": "string" + } + }, + "path": "v1beta3/{+dataset}:listDocuments", + "request": { + "$ref": "GoogleCloudDocumentaiV1beta3ListDocumentsRequest" + }, + "response": { + "$ref": "GoogleCloudDocumentaiV1beta3ListDocumentsResponse" + }, + "scopes": [ + "https://www.googleapis.com/auth/cloud-platform" + ] + }, "updateDatasetSchema": { "description": "Updates a `DatasetSchema`.", "flatPath": "v1beta3/projects/{projectsId}/locations/{locationsId}/processors/{processorsId}/dataset/datasetSchema", @@ -1256,7 +1284,7 @@ } } }, - "revision": "20230914", + "revision": "20230923", "rootUrl": "https://documentai.googleapis.com/", "schemas": { "GoogleCloudDocumentaiUiv1beta3AutoLabelDocumentsMetadata": { @@ -5662,7 +5690,7 @@ "type": "object" }, "GoogleCloudDocumentaiV1beta3Dataset": { - "description": "A singleton resource under a Processor which configures a collection of documents. Next Id: 8.", + "description": "A singleton resource under a Processor which configures a collection of documents.", "id": "GoogleCloudDocumentaiV1beta3Dataset", "properties": { "documentWarehouseConfig": { @@ -6063,6 +6091,58 @@ }, "type": "object" }, + "GoogleCloudDocumentaiV1beta3DocumentMetadata": { + "description": "Metadata about a document.", + "id": "GoogleCloudDocumentaiV1beta3DocumentMetadata", + "properties": { + "datasetType": { + "description": "Type of the dataset split to which the document belongs.", + "enum": [ + "DATASET_SPLIT_TYPE_UNSPECIFIED", + "DATASET_SPLIT_TRAIN", + "DATASET_SPLIT_TEST", + "DATASET_SPLIT_UNASSIGNED" + ], + "enumDescriptions": [ + "Default value if the enum is not set.", + "Identifies the train documents.", + "Identifies the test documents.", + "Identifies the unassigned documents." + ], + "type": "string" + }, + "displayName": { + "description": "The display name of the document.", + "type": "string" + }, + "documentId": { + "$ref": "GoogleCloudDocumentaiV1beta3DocumentId", + "description": "Document identifier." + }, + "labelingState": { + "description": "Labelling state of the document.", + "enum": [ + "DOCUMENT_LABELING_STATE_UNSPECIFIED", + "DOCUMENT_LABELED", + "DOCUMENT_UNLABELED", + "DOCUMENT_AUTO_LABELED" + ], + "enumDescriptions": [ + "Default value if the enum is not set.", + "Document has been labelled.", + "Document has not been labelled.", + "Document has been auto-labelled." + ], + "type": "string" + }, + "pageCount": { + "description": "Number of pages in the document.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, "GoogleCloudDocumentaiV1beta3DocumentOutputConfig": { "description": "Config that controls the output of documents. All documents will be written as a JSON file.", "id": "GoogleCloudDocumentaiV1beta3DocumentOutputConfig", @@ -7807,6 +7887,56 @@ }, "type": "object" }, + "GoogleCloudDocumentaiV1beta3ListDocumentsRequest": { + "id": "GoogleCloudDocumentaiV1beta3ListDocumentsRequest", + "properties": { + "filter": { + "description": "Optional. Query to filter the documents based on https://google.aip.dev/160. ## Currently support query strings are: `SplitType=DATASET_SPLIT_TEST|DATASET_SPLIT_TRAIN|DATASET_SPLIT_UNASSIGNED` - `LabelingState=DOCUMENT_LABELED|DOCUMENT_UNLABELED|DOCUMENT_AUTO_LABELED` - `DisplayName=\\\"file_name.pdf\\\"` - `EntityType=abc/def` - `TagName=\\\"auto-labeling-running\\\"|\\\"sampled\\\"` Note: - Only `AND`, `=` and `!=` are supported. e.g. `DisplayName=file_name AND EntityType!=abc` IS supported. - Wildcard `*` is supported only in `DisplayName` filter - No duplicate filter keys are allowed, e.g. `EntityType=a AND EntityType=b` is NOT supported. - String match is case sensitive (for filter `DisplayName` & `EntityType`).", + "type": "string" + }, + "pageSize": { + "description": "The maximum number of documents to return. The service may return fewer than this value. If unspecified, at most 20 documents will be returned. The maximum value is 100; values above 100 will be coerced to 100.", + "format": "int32", + "type": "integer" + }, + "pageToken": { + "description": "A page token, received from a previous `ListDocuments` call. Provide this to retrieve the subsequent page. When paginating, all other parameters provided to `ListDocuments` must match the call that provided the page token.", + "type": "string" + }, + "returnTotalSize": { + "description": "Optional. Controls if the ListDocuments request requires a total size of matched documents. See ListDocumentsResponse.total_size. Enabling this flag may adversely impact performance. Defaults to false.", + "type": "boolean" + }, + "skip": { + "description": "Optional. Number of results to skip beginning from the `page_token` if provided. https://google.aip.dev/158#skipping-results. It must be a non-negative integer. Negative values wil be rejected. Note that this is not the number of pages to skip. If this value causes the cursor to move past the end of results, `ListDocumentsResponse.document_metadata` and `ListDocumentsResponse.next_page_token` will be empty.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, + "GoogleCloudDocumentaiV1beta3ListDocumentsResponse": { + "id": "GoogleCloudDocumentaiV1beta3ListDocumentsResponse", + "properties": { + "documentMetadata": { + "description": "Document metadata corresponding to the listed documents.", + "items": { + "$ref": "GoogleCloudDocumentaiV1beta3DocumentMetadata" + }, + "type": "array" + }, + "nextPageToken": { + "description": "A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, there are no subsequent pages.", + "type": "string" + }, + "totalSize": { + "description": "Total count of documents queried.", + "format": "int32", + "type": "integer" + } + }, + "type": "object" + }, "GoogleCloudDocumentaiV1beta3ListEvaluationsResponse": { "description": "The response from `ListEvaluations`.", "id": "GoogleCloudDocumentaiV1beta3ListEvaluationsResponse", @@ -7913,7 +8043,7 @@ "type": "boolean" }, "disableCharacterBoxesDetection": { - "description": "Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.", + "description": "Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.", "type": "boolean" }, "enableImageQualityScores": { @@ -7966,7 +8096,7 @@ "type": "boolean" }, "enableSelectionMarkDetection": { - "description": "Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.", + "description": "Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.", "type": "boolean" } }, @@ -7982,7 +8112,7 @@ "type": "integer" }, "fromStart": { - "description": "Only process certain pages from the start, process all if the document has less pages.", + "description": "Only process certain pages from the start. Process all if the document has fewer pages.", "format": "int32", "type": "integer" },