Skip to content

Commit

Permalink
feat: Support a new Layout Processor in Document AI
Browse files Browse the repository at this point in the history
docs: keep the API doc up-to-date with recent changes

PiperOrigin-RevId: 621233157
  • Loading branch information
Google APIs authored and Copybara-Service committed Apr 2, 2024
1 parent 8d326d5 commit d5020ff
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 4 deletions.
137 changes: 137 additions & 0 deletions google/cloud/documentai/v1beta3/document.proto
Expand Up @@ -897,6 +897,137 @@ message Document {
repeated Provenance provenance = 3 [deprecated = true];
}

// Represents the parsed layout of a document as a collection of blocks that
// the document is divided into.
message DocumentLayout {
// Represents a block. A block could be one of the various types (text,
// table, list) supported.
message DocumentLayoutBlock {
// Represents where the block starts and ends in the document.
message LayoutPageSpan {
// Page where block starts in the document.
int32 page_start = 1;

// Page where block ends in the document.
int32 page_end = 2;
}

// Represents a text type block.
message LayoutTextBlock {
// Text content stored in the block.
string text = 1;

// Type of the text in the block. Available options are: `paragraph`,
// `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`,
// `heading-5`, `header`, `footer`.
string type = 2;

// A text block could further have child blocks.
// Repeated blocks support further hierarchies and nested blocks.
repeated DocumentLayoutBlock blocks = 3;
}

// Represents a table type block.
message LayoutTableBlock {
// Header rows at the top of the table.
repeated LayoutTableRow header_rows = 1;

// Body rows containing main table content.
repeated LayoutTableRow body_rows = 2;

// Table caption/title.
string caption = 3;
}

// Represents a row in a table.
message LayoutTableRow {
// A table row is a list of table cells.
repeated LayoutTableCell cells = 1;
}

// Represents a cell in a table row.
message LayoutTableCell {
// A table cell is a list of blocks.
// Repeated blocks support further hierarchies and nested blocks.
repeated DocumentLayoutBlock blocks = 1;

// How many rows this cell spans.
int32 row_span = 2;

// How many columns this cell spans.
int32 col_span = 3;
}

// Represents a list type block.
message LayoutListBlock {
// List entries that constitute a list block.
repeated LayoutListEntry list_entries = 1;

// Type of the list_entries (if exist). Available options are `ordered`
// and `unordered`.
string type = 2;
}

// Represents an entry in the list.
message LayoutListEntry {
// A list entry is a list of blocks.
// Repeated blocks support further hierarchies and nested blocks.
repeated DocumentLayoutBlock blocks = 1;
}

oneof block {
// Block consisting of text content.
LayoutTextBlock text_block = 2;

// Block consisting of table content/structure.
LayoutTableBlock table_block = 3;

// Block consisting of list content/structure.
LayoutListBlock list_block = 4;
}

// ID of the block.
string block_id = 1;

// Page span of the block.
LayoutPageSpan page_span = 5;
}

// List of blocks in the document.
repeated DocumentLayoutBlock blocks = 1;
}

// Represents the chunks that the document is divided into.
message ChunkedDocument {
// Represents a chunk.
message Chunk {
// Represents where the chunk starts and ends in the document.
message ChunkPageSpan {
// Page where chunk starts in the document.
int32 page_start = 1;

// Page where chunk ends in the document.
int32 page_end = 2;
}

// ID of the chunk.
string chunk_id = 1;

// List of all parsed documents layout source blocks used to generate the
// chunk.
repeated string source_block_ids = 2;

// Text content of the chunk.
string content = 3;

// Page span of the chunk.
ChunkPageSpan page_span = 4;
}

// List of chunks.
repeated Chunk chunks = 1;
}

// Original source document from the user.
oneof source {
// Optional. Currently supports Google Cloud Storage URI of the form
Expand Down Expand Up @@ -950,6 +1081,12 @@ message Document {

// Placeholder. Revision history of this document.
repeated Revision revisions = 13;

// Parsed layout of the document.
DocumentLayout document_layout = 17;

// Document chunked based on chunking config.
ChunkedDocument chunked_document = 18;
}

// The revision reference specifies which revision on the document to read.
Expand Down
32 changes: 32 additions & 0 deletions google/cloud/documentai/v1beta3/document_processor_service.proto
Expand Up @@ -342,6 +342,34 @@ service DocumentProcessorService {

// Options for Process API
message ProcessOptions {
// Serving config for layout parser processor.
message LayoutConfig {
// Serving config for chunking.
message ChunkingConfig {
// Optional. The chunk sizes to use when splitting documents, in order of
// level.
int32 chunk_size = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. Whether or not to include ancestor headings when splitting.
bool include_ancestor_headings = 2
[(google.api.field_behavior) = OPTIONAL];

// Optional. The number of tokens to group together when evaluating
// semantic similarity.
bool semantic_chunking_group_size = 3
[(google.api.field_behavior) = OPTIONAL];

// Optional. The percentile of cosine dissimilarity that must be exceeded
// between a group of tokens and the next. The smaller this number is, the
// more chunks will be generated.
int32 breakpoint_percentile_threshold = 4
[(google.api.field_behavior) = OPTIONAL];
}

// Optional. Config for chunking in layout parser processor.
ChunkingConfig chunking_config = 1 [(google.api.field_behavior) = OPTIONAL];
}

// A list of individual page numbers.
message IndividualPageSelector {
// Optional. Indices of the pages (starting from 1).
Expand Down Expand Up @@ -370,6 +398,10 @@ message ProcessOptions {
// Returns error if set on other processor types.
OcrConfig ocr_config = 1;

// Optional. Only applicable to `LAYOUT_PARSER_PROCESSOR`.
// Returns error if set on other processor types.
LayoutConfig layout_config = 9 [(google.api.field_behavior) = OPTIONAL];

// Optional. Override the schema of the
// [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. Will
// return an Invalid Argument error if this field is set when the underlying
Expand Down
8 changes: 4 additions & 4 deletions google/cloud/documentai/v1beta3/processor.proto
Expand Up @@ -94,19 +94,19 @@ message ProcessorVersion {
MODEL_TYPE_CUSTOM = 2;
}

// The resource name of the processor version.
// Identifier. The resource name of the processor version.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processor_version}`
string name = 1;
string name = 1 [(google.api.field_behavior) = IDENTIFIER];

// The display name of the processor version.
string display_name = 2;

// The schema of the processor version. Describes the output.
DocumentSchema document_schema = 12;

// The state of the processor version.
State state = 6;
// Output only. The state of the processor version.
State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// The time the processor version was created.
google.protobuf.Timestamp create_time = 7;
Expand Down

0 comments on commit d5020ff

Please sign in to comment.