Skip to content

Commit

Permalink
feat: make page_range field public
Browse files Browse the repository at this point in the history
fix: `OcrConfig.compute_style_info` is deprecated. Use `PremiumFeatures.compute_style_info` instead.

PiperOrigin-RevId: 568225060
  • Loading branch information
Google APIs authored and Copybara-Service committed Sep 25, 2023
1 parent 38894df commit 3cd21b1
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 3 deletions.
28 changes: 25 additions & 3 deletions google/cloud/documentai/v1/document_io.proto
Expand Up @@ -39,7 +39,7 @@ message RawDocument {
// the following:
// `*`, `?`, `[`, `]`, `%`, `{`, `}`,`'`, `\"`, `,`
// `~`, `=` and `:` are reserved.
// If not specified, a default ID will be generated.
// If not specified, a default ID is generated.
string display_name = 3;
}

Expand Down Expand Up @@ -122,6 +122,19 @@ message OcrConfig {
repeated string language_hints = 1;
}

// Configurations for premium OCR features.
message PremiumFeatures {
// Turn on selection mark detector in OCR engine. Only available in OCR 2.0+
// processors.
bool enable_selection_mark_detection = 3;

// Turn on font identification model and return font style information.
bool compute_style_info = 4;

// Turn on the model that can extract LaTeX math formulas.
bool enable_math_ocr = 5;
}

// Hints for the OCR model.
Hints hints = 2;

Expand All @@ -146,7 +159,16 @@ message OcrConfig {
// Includes symbol level OCR information if set to true.
bool enable_symbol = 6;

// Turn on font id model and returns font style information.
// Use PremiumFeatures.compute_style_info instead.
// Turn on font identification model and return font style information.
// Deprecated, use
// [PremiumFeatures.compute_style_info][google.cloud.documentai.v1.OcrConfig.PremiumFeatures.compute_style_info]
// instead.
bool compute_style_info = 8 [deprecated = true];

// Turn off character box detector in OCR engine. Character box detection is
// enabled by default in OCR 2.0+ processors.
bool disable_character_boxes_detection = 10;

// Configurations for premium OCR features.
PremiumFeatures premium_features = 11;
}
24 changes: 24 additions & 0 deletions google/cloud/documentai/v1/document_processor_service.proto
Expand Up @@ -328,6 +328,30 @@ service DocumentProcessorService {

// Options for Process API
message ProcessOptions {
// A list of individual page numbers.
message IndividualPageSelector {
// Optional. Indices of the pages (starting from 1).
repeated int32 pages = 1 [(google.api.field_behavior) = OPTIONAL];
}

// A subset of pages to process. If not specified, all pages will be
// processed. NOTICE: If any of the page range is set, we will extract and
// process only the given pages from the document. In the output document,
// [Document.Page.page_number][google.cloud.documentai.v1.Document.Page.page_number]
// is referring to the page number in the original document. This
// configuration only applies to sync requests.
oneof page_range {
// Which pages to process (1-indexed).
IndividualPageSelector individual_page_selector = 5;

// Only process certain pages from the start. Process all if the document
// has fewer pages.
int32 from_start = 6;

// Only process certain pages from the end, same as above.
int32 from_end = 7;
}

// Only applicable to `OCR_PROCESSOR`. Returns error if set on other
// processor types.
OcrConfig ocr_config = 1;
Expand Down

0 comments on commit 3cd21b1

Please sign in to comment.