feat(documentai): update the api

#### documentai:v1beta3 The following keys were added: - resources.projects.resources.locations.resources.processors.resources.dataset.methods.listDocuments (Total Keys: 12) - schemas.GoogleCloudDocumentaiV1beta3DocumentMetadata (Total Keys: 8) - schemas.GoogleCloudDocumentaiV1beta3ListDocumentsRequest (Total Keys: 9) - schemas.GoogleCloudDocumentaiV1beta3ListDocumentsResponse (Total Keys: 7)
googleapis · Oct 3, 2023 · 53c4c4e · 53c4c4e
1 parent 6819fc0
commit 53c4c4e
Show file tree

Hide file tree

Showing 8 changed files with 240 additions and 37 deletions.
diff --git a/docs/dyn/documentai_v1.projects.locations.processors.html b/docs/dyn/documentai_v1.projects.locations.processors.html
@@ -153,7 +153,7 @@ <h3>Method Details</h3>
   },
   &quot;processOptions&quot;: { # Options for Process API # Inference-time options for the process API
     &quot;fromEnd&quot;: 42, # Only process certain pages from the end, same as above.
-    &quot;fromStart&quot;: 42, # Only process certain pages from the start, process all if the document has less pages.
+    &quot;fromStart&quot;: 42, # Only process certain pages from the start. Process all if the document has fewer pages.
     &quot;individualPageSelector&quot;: { # A list of individual page numbers. # Which pages to process (1-indexed).
       &quot;pages&quot;: [ # Optional. Indices of the pages (starting from 1).
         42,
@@ -164,7 +164,7 @@ <h3>Method Details</h3>
         &quot;A String&quot;,
       ],
       &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead.
-      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.
+      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.
       &quot;enableImageQualityScores&quot;: True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
       &quot;enableNativePdfParsing&quot;: True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
       &quot;enableSymbol&quot;: True or False, # Includes symbol level OCR information if set to true.
@@ -176,7 +176,7 @@ <h3>Method Details</h3>
       &quot;premiumFeatures&quot;: { # Configurations for premium OCR features. # Configurations for premium OCR features.
         &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information.
         &quot;enableMathOcr&quot;: True or False, # Turn on the model that can extract LaTeX math formulas.
-        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.
+        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.
       },
     },
   },
@@ -1334,7 +1334,7 @@ <h3>Method Details</h3>
   },
   &quot;processOptions&quot;: { # Options for Process API # Inference-time options for the process API
     &quot;fromEnd&quot;: 42, # Only process certain pages from the end, same as above.
-    &quot;fromStart&quot;: 42, # Only process certain pages from the start, process all if the document has less pages.
+    &quot;fromStart&quot;: 42, # Only process certain pages from the start. Process all if the document has fewer pages.
     &quot;individualPageSelector&quot;: { # A list of individual page numbers. # Which pages to process (1-indexed).
       &quot;pages&quot;: [ # Optional. Indices of the pages (starting from 1).
         42,
@@ -1345,7 +1345,7 @@ <h3>Method Details</h3>
         &quot;A String&quot;,
       ],
       &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead.
-      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.
+      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.
       &quot;enableImageQualityScores&quot;: True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
       &quot;enableNativePdfParsing&quot;: True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
       &quot;enableSymbol&quot;: True or False, # Includes symbol level OCR information if set to true.
@@ -1357,7 +1357,7 @@ <h3>Method Details</h3>
       &quot;premiumFeatures&quot;: { # Configurations for premium OCR features. # Configurations for premium OCR features.
         &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information.
         &quot;enableMathOcr&quot;: True or False, # Turn on the model that can extract LaTeX math formulas.
-        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.
+        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.
       },
     },
   },

diff --git a/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html b/docs/dyn/documentai_v1.projects.locations.processors.processorVersions.html
@@ -148,7 +148,7 @@ <h3>Method Details</h3>
   },
   &quot;processOptions&quot;: { # Options for Process API # Inference-time options for the process API
     &quot;fromEnd&quot;: 42, # Only process certain pages from the end, same as above.
-    &quot;fromStart&quot;: 42, # Only process certain pages from the start, process all if the document has less pages.
+    &quot;fromStart&quot;: 42, # Only process certain pages from the start. Process all if the document has fewer pages.
     &quot;individualPageSelector&quot;: { # A list of individual page numbers. # Which pages to process (1-indexed).
       &quot;pages&quot;: [ # Optional. Indices of the pages (starting from 1).
         42,
@@ -159,7 +159,7 @@ <h3>Method Details</h3>
         &quot;A String&quot;,
       ],
       &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead.
-      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.
+      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.
       &quot;enableImageQualityScores&quot;: True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
       &quot;enableNativePdfParsing&quot;: True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
       &quot;enableSymbol&quot;: True or False, # Includes symbol level OCR information if set to true.
@@ -171,7 +171,7 @@ <h3>Method Details</h3>
       &quot;premiumFeatures&quot;: { # Configurations for premium OCR features. # Configurations for premium OCR features.
         &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information.
         &quot;enableMathOcr&quot;: True or False, # Turn on the model that can extract LaTeX math formulas.
-        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.
+        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.
       },
     },
   },
@@ -1406,7 +1406,7 @@ <h3>Method Details</h3>
   },
   &quot;processOptions&quot;: { # Options for Process API # Inference-time options for the process API
     &quot;fromEnd&quot;: 42, # Only process certain pages from the end, same as above.
-    &quot;fromStart&quot;: 42, # Only process certain pages from the start, process all if the document has less pages.
+    &quot;fromStart&quot;: 42, # Only process certain pages from the start. Process all if the document has fewer pages.
     &quot;individualPageSelector&quot;: { # A list of individual page numbers. # Which pages to process (1-indexed).
       &quot;pages&quot;: [ # Optional. Indices of the pages (starting from 1).
         42,
@@ -1417,7 +1417,7 @@ <h3>Method Details</h3>
         &quot;A String&quot;,
       ],
       &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information. Deprecated, use PremiumFeatures.compute_style_info instead.
-      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0+ processors.
+      &quot;disableCharacterBoxesDetection&quot;: True or False, # Turn off character box detector in OCR engine. Character box detection is enabled by default in OCR 2.0 (and later) processors.
       &quot;enableImageQualityScores&quot;: True or False, # Enables intelligent document quality scores after OCR. Can help with diagnosing why OCR responses are of poor quality for a given input. Adds additional latency comparable to regular OCR to the process call.
       &quot;enableNativePdfParsing&quot;: True or False, # Enables special handling for PDFs with existing text information. Results in better text extraction quality in such PDF inputs.
       &quot;enableSymbol&quot;: True or False, # Includes symbol level OCR information if set to true.
@@ -1429,7 +1429,7 @@ <h3>Method Details</h3>
       &quot;premiumFeatures&quot;: { # Configurations for premium OCR features. # Configurations for premium OCR features.
         &quot;computeStyleInfo&quot;: True or False, # Turn on font identification model and return font style information.
         &quot;enableMathOcr&quot;: True or False, # Turn on the model that can extract LaTeX math formulas.
-        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0+ processors.
+        &quot;enableSelectionMarkDetection&quot;: True or False, # Turn on selection mark detector in OCR engine. Only available in OCR 2.0 (and later) processors.
       },
     },
   },

diff --git a/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html b/docs/dyn/documentai_v1beta3.projects.locations.processors.dataset.html
@@ -89,6 +89,12 @@ <h2>Instance Methods</h2>
 <p class="toc_element">
   <code><a href="#importDocuments">importDocuments(dataset, body=None, x__xgafv=None)</a></code></p>
 <p class="firstline">Import documents into a dataset.</p>
+<p class="toc_element">
+  <code><a href="#listDocuments">listDocuments(dataset, body=None, x__xgafv=None)</a></code></p>
+<p class="firstline">Returns a list of documents present in the dataset.</p>
+<p class="toc_element">
+  <code><a href="#listDocuments_next">listDocuments_next()</a></code></p>
+<p class="firstline">Retrieves the next page of results.</p>
 <p class="toc_element">
   <code><a href="#updateDatasetSchema">updateDatasetSchema(name, body=None, updateMask=None, x__xgafv=None)</a></code></p>
 <p class="firstline">Updates a `DatasetSchema`.</p>
@@ -1163,6 +1169,73 @@ <h3>Method Details</h3>
 }</pre>
 </div>
 
+<div class="method">
+    <code class="details" id="listDocuments">listDocuments(dataset, body=None, x__xgafv=None)</code>
+  <pre>Returns a list of documents present in the dataset.
+
+Args:
+  dataset: string, Required. The resource name of the dataset to be listed. Format: projects/{project}/locations/{location}/processors/{processor}/dataset (required)
+  body: object, The request body.
+    The object takes the form of:
+
+{
+  &quot;filter&quot;: &quot;A String&quot;, # Optional. Query to filter the documents based on https://google.aip.dev/160. ## Currently support query strings are: `SplitType=DATASET_SPLIT_TEST|DATASET_SPLIT_TRAIN|DATASET_SPLIT_UNASSIGNED` - `LabelingState=DOCUMENT_LABELED|DOCUMENT_UNLABELED|DOCUMENT_AUTO_LABELED` - `DisplayName=\&quot;file_name.pdf\&quot;` - `EntityType=abc/def` - `TagName=\&quot;auto-labeling-running\&quot;|\&quot;sampled\&quot;` Note: - Only `AND`, `=` and `!=` are supported. e.g. `DisplayName=file_name AND EntityType!=abc` IS supported. - Wildcard `*` is supported only in `DisplayName` filter - No duplicate filter keys are allowed, e.g. `EntityType=a AND EntityType=b` is NOT supported. - String match is case sensitive (for filter `DisplayName` &amp; `EntityType`).
+  &quot;pageSize&quot;: 42, # The maximum number of documents to return. The service may return fewer than this value. If unspecified, at most 20 documents will be returned. The maximum value is 100; values above 100 will be coerced to 100.
+  &quot;pageToken&quot;: &quot;A String&quot;, # A page token, received from a previous `ListDocuments` call. Provide this to retrieve the subsequent page. When paginating, all other parameters provided to `ListDocuments` must match the call that provided the page token.
+  &quot;returnTotalSize&quot;: True or False, # Optional. Controls if the ListDocuments request requires a total size of matched documents. See ListDocumentsResponse.total_size. Enabling this flag may adversely impact performance. Defaults to false.
+  &quot;skip&quot;: 42, # Optional. Number of results to skip beginning from the `page_token` if provided. https://google.aip.dev/158#skipping-results. It must be a non-negative integer. Negative values wil be rejected. Note that this is not the number of pages to skip. If this value causes the cursor to move past the end of results, `ListDocumentsResponse.document_metadata` and `ListDocumentsResponse.next_page_token` will be empty.
+}
+
+  x__xgafv: string, V1 error format.
+    Allowed values
+      1 - v1 error format
+      2 - v2 error format
+
+Returns:
+  An object of the form:
+
+    {
+  &quot;documentMetadata&quot;: [ # Document metadata corresponding to the listed documents.
+    { # Metadata about a document.
+      &quot;datasetType&quot;: &quot;A String&quot;, # Type of the dataset split to which the document belongs.
+      &quot;displayName&quot;: &quot;A String&quot;, # The display name of the document.
+      &quot;documentId&quot;: { # Document Identifier. # Document identifier.
+        &quot;gcsManagedDocId&quot;: { # Identifies a document uniquely within the scope of a dataset in the user-managed Cloud Storage option. # A document id within user-managed Cloud Storage.
+          &quot;cwDocId&quot;: &quot;A String&quot;, # Id of the document (indexed) managed by Content Warehouse.
+          &quot;gcsUri&quot;: &quot;A String&quot;, # Required. The Cloud Storage URI where the actual document is stored.
+        },
+        &quot;revisionRef&quot;: { # The revision reference specifies which revision on the document to read. # Points to a specific revision of the document if set.
+          &quot;latestProcessorVersion&quot;: &quot;A String&quot;, # Reads the revision generated by the processor version. The format takes the full resource name of processor version. `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
+          &quot;revisionCase&quot;: &quot;A String&quot;, # Reads the revision by the predefined case.
+          &quot;revisionId&quot;: &quot;A String&quot;, # Reads the revision given by the id.
+        },
+        &quot;unmanagedDocId&quot;: { # Identifies a document uniquely within the scope of a dataset in unmanaged option. # A document id within unmanaged dataset.
+          &quot;docId&quot;: &quot;A String&quot;, # Required. The id of the document.
+        },
+      },
+      &quot;labelingState&quot;: &quot;A String&quot;, # Labelling state of the document.
+      &quot;pageCount&quot;: 42, # Number of pages in the document.
+    },
+  ],
+  &quot;nextPageToken&quot;: &quot;A String&quot;, # A token, which can be sent as `page_token` to retrieve the next page. If this field is omitted, there are no subsequent pages.
+  &quot;totalSize&quot;: 42, # Total count of documents queried.
+}</pre>
+</div>
+
+<div class="method">
+    <code class="details" id="listDocuments_next">listDocuments_next()</code>
+  <pre>Retrieves the next page of results.
+
+        Args:
+          previous_request: The request for the previous page. (required)
+          previous_response: The response from the request for the previous page. (required)
+
+        Returns:
+          A request object that you can call &#x27;execute()&#x27; on to request the next
+          page. Returns None if there are no more items in the collection.
+        </pre>
+</div>
+
 <div class="method">
     <code class="details" id="updateDatasetSchema">updateDatasetSchema(name, body=None, updateMask=None, x__xgafv=None)</code>
   <pre>Updates a `DatasetSchema`.