Skip to content

Commit

Permalink
feat: support import data from Cloud Spanner, BigTable, SQL and Fires…
Browse files Browse the repository at this point in the history
…tore

feat: support standalone ranking API
feat: support layout detection and more chunking features
feat: support advanced search boosting
docs: keep the API doc up-to-date with recent changes

PiperOrigin-RevId: 621906335
  • Loading branch information
Google APIs authored and Copybara-Service committed Apr 4, 2024
1 parent f0ad215 commit 624b052
Show file tree
Hide file tree
Showing 11 changed files with 738 additions and 48 deletions.
4 changes: 4 additions & 0 deletions google/cloud/discoveryengine/v1alpha/BUILD.bazel
Expand Up @@ -43,6 +43,7 @@ proto_library(
"estimate_billing_service.proto",
"import_config.proto",
"purge_config.proto",
"rank_service.proto",
"recommendation_service.proto",
"schema.proto",
"schema_service.proto",
Expand Down Expand Up @@ -143,6 +144,8 @@ java_gapic_test(
"com.google.cloud.discoveryengine.v1alpha.EngineServiceClientTest",
"com.google.cloud.discoveryengine.v1alpha.EstimateBillingServiceClientHttpJsonTest",
"com.google.cloud.discoveryengine.v1alpha.EstimateBillingServiceClientTest",
"com.google.cloud.discoveryengine.v1alpha.RankServiceClientHttpJsonTest",
"com.google.cloud.discoveryengine.v1alpha.RankServiceClientTest",
"com.google.cloud.discoveryengine.v1alpha.RecommendationServiceClientHttpJsonTest",
"com.google.cloud.discoveryengine.v1alpha.RecommendationServiceClientTest",
"com.google.cloud.discoveryengine.v1alpha.SchemaServiceClientHttpJsonTest",
Expand Down Expand Up @@ -403,6 +406,7 @@ load(

csharp_proto_library(
name = "discoveryengine_csharp_proto",
extra_opts = [],
deps = [":discoveryengine_proto"],
)

Expand Down
36 changes: 35 additions & 1 deletion google/cloud/discoveryengine/v1alpha/chunk.proto
Expand Up @@ -48,6 +48,34 @@ message Chunk {
string title = 2;
}

// Page span of the chunk.
message PageSpan {
// The start page of the chunk.
int32 page_start = 1;

// The end page of the chunk.
int32 page_end = 2;
}

// Metadata of the current chunk. This field is only populated on
// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
// API.
message ChunkMetadata {
// The previous chunks of the current chunk. The number is controlled by
// [SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks].
// This field is only populated on
// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
// API.
repeated Chunk previous_chunks = 1;

// The next chunks of the current chunk. The number is controlled by
// [SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks].
// This field is only populated on
// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
// API.
repeated Chunk next_chunks = 2;
}

// The full resource name of the chunk.
// Format:
// `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}/chunks/{chunk_id}`.
Expand All @@ -56,7 +84,7 @@ message Chunk {
// characters.
string name = 1;

// Unique chunk id of the current chunk.
// Unique chunk ID of the current chunk.
string id = 2;

// Content is a string from a document (parsed content).
Expand All @@ -69,4 +97,10 @@ message Chunk {
// It contains derived data that are not in the original input document.
google.protobuf.Struct derived_struct_data = 4
[(google.api.field_behavior) = OUTPUT_ONLY];

// Page span of the chunk.
PageSpan page_span = 6;

// Output only. Metadata of the current chunk.
ChunkMetadata chunk_metadata = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
}
20 changes: 20 additions & 0 deletions google/cloud/discoveryengine/v1alpha/common.proto
Expand Up @@ -39,6 +39,18 @@ option (google.api.resource_definition) = {
type: "discoveryengine.googleapis.com/Location"
pattern: "projects/{project}/locations/{location}"
};
option (google.api.resource_definition) = {
type: "discoveryengine.googleapis.com/GroundingConfig"
pattern: "projects/{project}/locations/{location}/groundingConfigs/{grounding_config}"
};
option (google.api.resource_definition) = {
type: "discoveryengine.googleapis.com/RankingConfig"
pattern: "projects/{project}/locations/{location}/rankingConfigs/{ranking_config}"
};
option (google.api.resource_definition) = {
type: "healthcare.googleapis.com/FhirStore"
pattern: "projects/{project}/locations/{location}/datasets/{dataset}/fhirStores/{fhir_store}"
};

// The industry vertical associated with the
// [DataStore][google.cloud.discoveryengine.v1alpha.DataStore].
Expand All @@ -52,6 +64,9 @@ enum IndustryVertical {

// The media industry vertical.
MEDIA = 2;

// The healthcare FHIR vertical.
HEALTHCARE_FHIR = 7;
}

// The type of solution.
Expand All @@ -67,6 +82,11 @@ enum SolutionType {

// Used for use cases related to the Generative AI agent.
SOLUTION_TYPE_CHAT = 3;

// Used for use cases related to the Generative Chat agent.
// It's used for Generative chat engine only, the associated data stores
// must enrolled with `SOLUTION_TYPE_CHAT` solution.
SOLUTION_TYPE_GENERATIVE_CHAT = 4;
}

// Tiers of search features. Different tiers might have different
Expand Down
Expand Up @@ -12,6 +12,7 @@ apis:
- name: google.cloud.discoveryengine.v1alpha.DocumentService
- name: google.cloud.discoveryengine.v1alpha.EngineService
- name: google.cloud.discoveryengine.v1alpha.EstimateBillingService
- name: google.cloud.discoveryengine.v1alpha.RankService
- name: google.cloud.discoveryengine.v1alpha.RecommendationService
- name: google.cloud.discoveryengine.v1alpha.SchemaService
- name: google.cloud.discoveryengine.v1alpha.SearchService
Expand Down Expand Up @@ -45,6 +46,9 @@ types:
- name: google.cloud.discoveryengine.v1alpha.EstimateDataSizeMetadata
- name: google.cloud.discoveryengine.v1alpha.EstimateDataSizeResponse
- name: google.cloud.discoveryengine.v1alpha.FieldConfig
- name: google.cloud.discoveryengine.v1alpha.GroundingConfig
- name: google.cloud.discoveryengine.v1alpha.ImportCompletionSuggestionsMetadata
- name: google.cloud.discoveryengine.v1alpha.ImportCompletionSuggestionsResponse
- name: google.cloud.discoveryengine.v1alpha.ImportDocumentsMetadata
- name: google.cloud.discoveryengine.v1alpha.ImportDocumentsResponse
- name: google.cloud.discoveryengine.v1alpha.ImportSuggestionDenyListEntriesMetadata
Expand Down Expand Up @@ -87,6 +91,7 @@ http:
- get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/branches/*/operations/*}'
- get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/models/*/operations/*}'
- get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/operations/*}'
- get: '/v1alpha/{name=projects/*/locations/*/evaluations/*/operations/*}'
- get: '/v1alpha/{name=projects/*/locations/*/operations/*}'
- get: '/v1alpha/{name=projects/*/operations/*}'
- selector: google.longrunning.Operations.ListOperations
Expand Down Expand Up @@ -148,6 +153,10 @@ authentication:
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: google.cloud.discoveryengine.v1alpha.RankService.Rank
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: google.cloud.discoveryengine.v1alpha.RecommendationService.Recommend
oauth:
canonical_scopes: |-
Expand Down
19 changes: 19 additions & 0 deletions google/cloud/discoveryengine/v1alpha/document.proto
Expand Up @@ -202,3 +202,22 @@ message Document {
google.protobuf.Timestamp index_time = 13
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// Document captures all raw metadata information of items to be recommended or
// searched.
message ProcessedDocument {
// Output format of the processed document.
oneof processed_data_format {
// The JSON string representation of the processed document.
string json_data = 2;
}

// Required. Full resource name of the referenced document, in the format
// `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`.
string document = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "discoveryengine.googleapis.com/Document"
}
];
}
64 changes: 64 additions & 0 deletions google/cloud/discoveryengine/v1alpha/document_service.proto
Expand Up @@ -158,6 +158,19 @@ service DocumentService {
metadata_type: "google.cloud.discoveryengine.v1alpha.PurgeDocumentsMetadata"
};
}

// Gets the parsed layout information for a
// [Document][google.cloud.discoveryengine.v1alpha.Document].
rpc GetProcessedDocument(GetProcessedDocumentRequest)
returns (ProcessedDocument) {
option (google.api.http) = {
get: "/v1alpha/{name=projects/*/locations/*/dataStores/*/branches/*/documents/*}:getProcessedDocument"
additional_bindings {
get: "/v1alpha/{name=projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*}:getProcessedDocument"
}
};
option (google.api.method_signature) = "name";
}
}

// Request message for
Expand Down Expand Up @@ -322,3 +335,54 @@ message DeleteDocumentRequest {
}
];
}

// Request message for
// [DocumentService.GetDocument][google.cloud.discoveryengine.v1alpha.DocumentService.GetDocument]
// method.
message GetProcessedDocumentRequest {
// The type of processing to return in the response.
enum ProcessedDocumentType {
// Default value.
PROCESSED_DOCUMENT_TYPE_UNSPECIFIED = 0;

// Available for all data store parsing configs.
PARSED_DOCUMENT = 1;

// Only available if ChunkingConfig is enabeld on the data store.
CHUNKED_DOCUMENT = 2;
}

// The format of the returned processed document. If unspecified, defaults to
// JSON.
enum ProcessedDocumentFormat {
// Default value.
PROCESSED_DOCUMENT_FORMAT_UNSPECIFIED = 0;

// output format will be a JSON string representation of processed document.
JSON = 1;
}

// Required. Full resource name of
// [Document][google.cloud.discoveryengine.v1alpha.Document], such as
// `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}`.
//
// If the caller does not have permission to access the
// [Document][google.cloud.discoveryengine.v1alpha.Document], regardless of
// whether or not it exists, a `PERMISSION_DENIED` error is returned.
//
// If the requested [Document][google.cloud.discoveryengine.v1alpha.Document]
// does not exist, a `NOT_FOUND` error is returned.
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "discoveryengine.googleapis.com/Document"
}
];

// Required. What type of processing to return.
ProcessedDocumentType processed_document_type = 2
[(google.api.field_behavior) = REQUIRED];

// What format output should be. If unspecified, defaults to JSON.
ProcessedDocumentFormat processed_document_format = 3;
}

0 comments on commit 624b052

Please sign in to comment.