From 624b052e0d1306adf64587e0251574ae01719714 Mon Sep 17 00:00:00 2001 From: Google APIs Date: Thu, 4 Apr 2024 10:44:55 -0700 Subject: [PATCH] feat: support import data from Cloud Spanner, BigTable, SQL and Firestore feat: support standalone ranking API feat: support layout detection and more chunking features feat: support advanced search boosting docs: keep the API doc up-to-date with recent changes PiperOrigin-RevId: 621906335 --- .../cloud/discoveryengine/v1alpha/BUILD.bazel | 4 + .../cloud/discoveryengine/v1alpha/chunk.proto | 36 +- .../discoveryengine/v1alpha/common.proto | 20 ++ .../v1alpha/discoveryengine_v1alpha.yaml | 9 + .../discoveryengine/v1alpha/document.proto | 19 + .../v1alpha/document_service.proto | 64 ++++ .../v1alpha/import_config.proto | 333 ++++++++++++++++-- .../v1alpha/rank_service.proto | 115 ++++++ .../v1alpha/search_service.proto | 176 +++++++-- .../v1alpha/search_tuning_service.proto | 7 +- .../v1alpha/serving_config_service.proto | 3 +- 11 files changed, 738 insertions(+), 48 deletions(-) create mode 100644 google/cloud/discoveryengine/v1alpha/rank_service.proto diff --git a/google/cloud/discoveryengine/v1alpha/BUILD.bazel b/google/cloud/discoveryengine/v1alpha/BUILD.bazel index 136e7526f731c..6e8266f484f78 100644 --- a/google/cloud/discoveryengine/v1alpha/BUILD.bazel +++ b/google/cloud/discoveryengine/v1alpha/BUILD.bazel @@ -43,6 +43,7 @@ proto_library( "estimate_billing_service.proto", "import_config.proto", "purge_config.proto", + "rank_service.proto", "recommendation_service.proto", "schema.proto", "schema_service.proto", @@ -143,6 +144,8 @@ java_gapic_test( "com.google.cloud.discoveryengine.v1alpha.EngineServiceClientTest", "com.google.cloud.discoveryengine.v1alpha.EstimateBillingServiceClientHttpJsonTest", "com.google.cloud.discoveryengine.v1alpha.EstimateBillingServiceClientTest", + "com.google.cloud.discoveryengine.v1alpha.RankServiceClientHttpJsonTest", + "com.google.cloud.discoveryengine.v1alpha.RankServiceClientTest", "com.google.cloud.discoveryengine.v1alpha.RecommendationServiceClientHttpJsonTest", "com.google.cloud.discoveryengine.v1alpha.RecommendationServiceClientTest", "com.google.cloud.discoveryengine.v1alpha.SchemaServiceClientHttpJsonTest", @@ -403,6 +406,7 @@ load( csharp_proto_library( name = "discoveryengine_csharp_proto", + extra_opts = [], deps = [":discoveryengine_proto"], ) diff --git a/google/cloud/discoveryengine/v1alpha/chunk.proto b/google/cloud/discoveryengine/v1alpha/chunk.proto index 0d3578d264528..a30c5dd274f2d 100644 --- a/google/cloud/discoveryengine/v1alpha/chunk.proto +++ b/google/cloud/discoveryengine/v1alpha/chunk.proto @@ -48,6 +48,34 @@ message Chunk { string title = 2; } + // Page span of the chunk. + message PageSpan { + // The start page of the chunk. + int32 page_start = 1; + + // The end page of the chunk. + int32 page_end = 2; + } + + // Metadata of the current chunk. This field is only populated on + // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] + // API. + message ChunkMetadata { + // The previous chunks of the current chunk. The number is controlled by + // [SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks]. + // This field is only populated on + // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] + // API. + repeated Chunk previous_chunks = 1; + + // The next chunks of the current chunk. The number is controlled by + // [SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks]. + // This field is only populated on + // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] + // API. + repeated Chunk next_chunks = 2; + } + // The full resource name of the chunk. // Format: // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}/chunks/{chunk_id}`. @@ -56,7 +84,7 @@ message Chunk { // characters. string name = 1; - // Unique chunk id of the current chunk. + // Unique chunk ID of the current chunk. string id = 2; // Content is a string from a document (parsed content). @@ -69,4 +97,10 @@ message Chunk { // It contains derived data that are not in the original input document. google.protobuf.Struct derived_struct_data = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Page span of the chunk. + PageSpan page_span = 6; + + // Output only. Metadata of the current chunk. + ChunkMetadata chunk_metadata = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; } diff --git a/google/cloud/discoveryengine/v1alpha/common.proto b/google/cloud/discoveryengine/v1alpha/common.proto index c096c9c489800..026a6bf6a92e8 100644 --- a/google/cloud/discoveryengine/v1alpha/common.proto +++ b/google/cloud/discoveryengine/v1alpha/common.proto @@ -39,6 +39,18 @@ option (google.api.resource_definition) = { type: "discoveryengine.googleapis.com/Location" pattern: "projects/{project}/locations/{location}" }; +option (google.api.resource_definition) = { + type: "discoveryengine.googleapis.com/GroundingConfig" + pattern: "projects/{project}/locations/{location}/groundingConfigs/{grounding_config}" +}; +option (google.api.resource_definition) = { + type: "discoveryengine.googleapis.com/RankingConfig" + pattern: "projects/{project}/locations/{location}/rankingConfigs/{ranking_config}" +}; +option (google.api.resource_definition) = { + type: "healthcare.googleapis.com/FhirStore" + pattern: "projects/{project}/locations/{location}/datasets/{dataset}/fhirStores/{fhir_store}" +}; // The industry vertical associated with the // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore]. @@ -52,6 +64,9 @@ enum IndustryVertical { // The media industry vertical. MEDIA = 2; + + // The healthcare FHIR vertical. + HEALTHCARE_FHIR = 7; } // The type of solution. @@ -67,6 +82,11 @@ enum SolutionType { // Used for use cases related to the Generative AI agent. SOLUTION_TYPE_CHAT = 3; + + // Used for use cases related to the Generative Chat agent. + // It's used for Generative chat engine only, the associated data stores + // must enrolled with `SOLUTION_TYPE_CHAT` solution. + SOLUTION_TYPE_GENERATIVE_CHAT = 4; } // Tiers of search features. Different tiers might have different diff --git a/google/cloud/discoveryengine/v1alpha/discoveryengine_v1alpha.yaml b/google/cloud/discoveryengine/v1alpha/discoveryengine_v1alpha.yaml index 85f0b637506a6..3d8a8580b948f 100644 --- a/google/cloud/discoveryengine/v1alpha/discoveryengine_v1alpha.yaml +++ b/google/cloud/discoveryengine/v1alpha/discoveryengine_v1alpha.yaml @@ -12,6 +12,7 @@ apis: - name: google.cloud.discoveryengine.v1alpha.DocumentService - name: google.cloud.discoveryengine.v1alpha.EngineService - name: google.cloud.discoveryengine.v1alpha.EstimateBillingService +- name: google.cloud.discoveryengine.v1alpha.RankService - name: google.cloud.discoveryengine.v1alpha.RecommendationService - name: google.cloud.discoveryengine.v1alpha.SchemaService - name: google.cloud.discoveryengine.v1alpha.SearchService @@ -45,6 +46,9 @@ types: - name: google.cloud.discoveryengine.v1alpha.EstimateDataSizeMetadata - name: google.cloud.discoveryengine.v1alpha.EstimateDataSizeResponse - name: google.cloud.discoveryengine.v1alpha.FieldConfig +- name: google.cloud.discoveryengine.v1alpha.GroundingConfig +- name: google.cloud.discoveryengine.v1alpha.ImportCompletionSuggestionsMetadata +- name: google.cloud.discoveryengine.v1alpha.ImportCompletionSuggestionsResponse - name: google.cloud.discoveryengine.v1alpha.ImportDocumentsMetadata - name: google.cloud.discoveryengine.v1alpha.ImportDocumentsResponse - name: google.cloud.discoveryengine.v1alpha.ImportSuggestionDenyListEntriesMetadata @@ -87,6 +91,7 @@ http: - get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/branches/*/operations/*}' - get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/models/*/operations/*}' - get: '/v1alpha/{name=projects/*/locations/*/dataStores/*/operations/*}' + - get: '/v1alpha/{name=projects/*/locations/*/evaluations/*/operations/*}' - get: '/v1alpha/{name=projects/*/locations/*/operations/*}' - get: '/v1alpha/{name=projects/*/operations/*}' - selector: google.longrunning.Operations.ListOperations @@ -148,6 +153,10 @@ authentication: oauth: canonical_scopes: |- https://www.googleapis.com/auth/cloud-platform + - selector: google.cloud.discoveryengine.v1alpha.RankService.Rank + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform - selector: google.cloud.discoveryengine.v1alpha.RecommendationService.Recommend oauth: canonical_scopes: |- diff --git a/google/cloud/discoveryengine/v1alpha/document.proto b/google/cloud/discoveryengine/v1alpha/document.proto index 88fd88868243a..cead31c531a5a 100644 --- a/google/cloud/discoveryengine/v1alpha/document.proto +++ b/google/cloud/discoveryengine/v1alpha/document.proto @@ -202,3 +202,22 @@ message Document { google.protobuf.Timestamp index_time = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; } + +// Document captures all raw metadata information of items to be recommended or +// searched. +message ProcessedDocument { + // Output format of the processed document. + oneof processed_data_format { + // The JSON string representation of the processed document. + string json_data = 2; + } + + // Required. Full resource name of the referenced document, in the format + // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`. + string document = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "discoveryengine.googleapis.com/Document" + } + ]; +} diff --git a/google/cloud/discoveryengine/v1alpha/document_service.proto b/google/cloud/discoveryengine/v1alpha/document_service.proto index c4d18d23ffd44..6e954a9f6cd2e 100644 --- a/google/cloud/discoveryengine/v1alpha/document_service.proto +++ b/google/cloud/discoveryengine/v1alpha/document_service.proto @@ -158,6 +158,19 @@ service DocumentService { metadata_type: "google.cloud.discoveryengine.v1alpha.PurgeDocumentsMetadata" }; } + + // Gets the parsed layout information for a + // [Document][google.cloud.discoveryengine.v1alpha.Document]. + rpc GetProcessedDocument(GetProcessedDocumentRequest) + returns (ProcessedDocument) { + option (google.api.http) = { + get: "/v1alpha/{name=projects/*/locations/*/dataStores/*/branches/*/documents/*}:getProcessedDocument" + additional_bindings { + get: "/v1alpha/{name=projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*}:getProcessedDocument" + } + }; + option (google.api.method_signature) = "name"; + } } // Request message for @@ -322,3 +335,54 @@ message DeleteDocumentRequest { } ]; } + +// Request message for +// [DocumentService.GetDocument][google.cloud.discoveryengine.v1alpha.DocumentService.GetDocument] +// method. +message GetProcessedDocumentRequest { + // The type of processing to return in the response. + enum ProcessedDocumentType { + // Default value. + PROCESSED_DOCUMENT_TYPE_UNSPECIFIED = 0; + + // Available for all data store parsing configs. + PARSED_DOCUMENT = 1; + + // Only available if ChunkingConfig is enabeld on the data store. + CHUNKED_DOCUMENT = 2; + } + + // The format of the returned processed document. If unspecified, defaults to + // JSON. + enum ProcessedDocumentFormat { + // Default value. + PROCESSED_DOCUMENT_FORMAT_UNSPECIFIED = 0; + + // output format will be a JSON string representation of processed document. + JSON = 1; + } + + // Required. Full resource name of + // [Document][google.cloud.discoveryengine.v1alpha.Document], such as + // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}`. + // + // If the caller does not have permission to access the + // [Document][google.cloud.discoveryengine.v1alpha.Document], regardless of + // whether or not it exists, a `PERMISSION_DENIED` error is returned. + // + // If the requested [Document][google.cloud.discoveryengine.v1alpha.Document] + // does not exist, a `NOT_FOUND` error is returned. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "discoveryengine.googleapis.com/Document" + } + ]; + + // Required. What type of processing to return. + ProcessedDocumentType processed_document_type = 2 + [(google.api.field_behavior) = REQUIRED]; + + // What format output should be. If unspecified, defaults to JSON. + ProcessedDocumentFormat processed_document_format = 3; +} diff --git a/google/cloud/discoveryengine/v1alpha/import_config.proto b/google/cloud/discoveryengine/v1alpha/import_config.proto index f6c45aecf186e..062406a9b5f9a 100644 --- a/google/cloud/discoveryengine/v1alpha/import_config.proto +++ b/google/cloud/discoveryengine/v1alpha/import_config.proto @@ -21,6 +21,7 @@ import "google/api/resource.proto"; import "google/cloud/discoveryengine/v1alpha/completion.proto"; import "google/cloud/discoveryengine/v1alpha/document.proto"; import "google/cloud/discoveryengine/v1alpha/user_event.proto"; +import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; import "google/type/date.proto"; @@ -123,6 +124,233 @@ message BigQuerySource { string data_schema = 6; } +// The Spanner source for importing data +message SpannerSource { + // The project ID that the Spanner source is in with a length limit of 128 + // characters. If not specified, inherits the project ID from the parent + // request. + string project_id = 1; + + // Required. The instance ID of the source Spanner table. + string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The database ID of the source Spanner table. + string database_id = 3 [(google.api.field_behavior) = REQUIRED]; + + // Required. The table name of the Spanner database that needs to be imported. + string table_id = 4 [(google.api.field_behavior) = REQUIRED]; + + // Whether to apply data boost on Spanner export. Enabling this option will + // incur additional cost. More info can be found + // [here](https://cloud.google.com/spanner/docs/databoost/databoost-overview#billing_and_quotas). + bool enable_data_boost = 5; +} + +// The Bigtable Options object that contains information to support +// the import. +message BigtableOptions { + // The column family of the Bigtable. + message BigtableColumnFamily { + // The field name to use for this column family in the document. The + // name has to match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. If not set, + // it is parsed from the family name with best effort. However, due to + // different naming patterns, field name collisions could happen, where + // parsing behavior is undefined. + string field_name = 1; + + // The encoding mode of the values when the type is not STRING. + // Acceptable encoding values are: + // + // * `TEXT`: indicates values are alphanumeric text strings. + // * `BINARY`: indicates values are encoded using `HBase Bytes.toBytes` + // family of functions. This can be overridden for a specific column + // by listing that column in `columns` and specifying an encoding for it. + Encoding encoding = 2; + + // The type of values in this column family. + // The values are expected to be encoded using `HBase Bytes.toBytes` + // function when the encoding value is set to `BINARY`. + Type type = 3; + + // The list of objects that contains column level information for each + // column. If a column is not present in this list it will be ignored. + repeated BigtableColumn columns = 4; + } + + // The column of the Bigtable. + message BigtableColumn { + // Required. Qualifier of the column. If it cannot be decoded with utf-8, + // use a base-64 encoded string instead. + bytes qualifier = 1 [(google.api.field_behavior) = REQUIRED]; + + // The field name to use for this column in the document. The name has to + // match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. + // If not set, it is parsed from the qualifier bytes with best effort. + // However, due to different naming patterns, field name collisions could + // happen, where parsing behavior is undefined. + string field_name = 2; + + // The encoding mode of the values when the type is not `STRING`. + // Acceptable encoding values are: + // + // * `TEXT`: indicates values are alphanumeric text strings. + // * `BINARY`: indicates values are encoded using `HBase Bytes.toBytes` + // family of functions. This can be overridden for a specific column + // by listing that column in `columns` and specifying an encoding for it. + Encoding encoding = 3; + + // The type of values in this column family. + // The values are expected to be encoded using `HBase Bytes.toBytes` + // function when the encoding value is set to `BINARY`. + Type type = 4; + } + + // The type of values in a Bigtable column or column family. + // The values are expected to be encoded using + // [HBase + // Bytes.toBytes](https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/util/Bytes.html) + // function when the encoding value is set to `BINARY`. + enum Type { + // The type is unspecified. + TYPE_UNSPECIFIED = 0; + + // String type. + STRING = 1; + + // Numerical type. + NUMBER = 2; + + // Integer type. + INTEGER = 3; + + // Variable length integer type. + VAR_INTEGER = 4; + + // BigDecimal type. + BIG_NUMERIC = 5; + + // Boolean type. + BOOLEAN = 6; + + // JSON type. + JSON = 7; + } + + // The encoding mode of a Bigtable column or column family. + enum Encoding { + // The encoding is unspecified. + ENCODING_UNSPECIFIED = 0; + + // Text encoding. + TEXT = 1; + + // Binary encoding. + BINARY = 2; + } + + // The field name used for saving row key value in the document. The name has + // to match the pattern `[a-zA-Z0-9][a-zA-Z0-9-_]*`. + string key_field_name = 1; + + // The mapping from family names to an object that contains column families + // level information for the given column family. If a family is not present + // in this map it will be ignored. + map families = 2; +} + +// The Cloud Bigtable source for importing data. +message BigtableSource { + // The project ID that the Bigtable source is in with a length limit of 128 + // characters. If not specified, inherits the project ID from the parent + // request. + string project_id = 1; + + // Required. The instance ID of the Cloud Bigtable that needs to be imported. + string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The table ID of the Cloud Bigtable that needs to be imported. + string table_id = 3 [(google.api.field_behavior) = REQUIRED]; + + // Required. Bigtable options that contains information needed when parsing + // data into typed structures. For example, column type annotations. + BigtableOptions bigtable_options = 4 [(google.api.field_behavior) = REQUIRED]; +} + +// Cloud FhirStore source import data from. +message FhirStoreSource { + // Required. The full resource name of the FHIR store to import data from, in + // the format of + // `projects/{project}/locations/{location}/datasets/{dataset}/fhirStores/{fhir_store}`. + string fhir_store = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "healthcare.googleapis.com/FhirStore" + } + ]; + + // Intermediate Cloud Storage directory used for the import with a length + // limit of 2,000 characters. Can be specified if one wants to have the + // FhirStore export to a specific Cloud Storage directory. + string gcs_staging_dir = 2; +} + +// Cloud SQL source import data from. +message CloudSqlSource { + // The project ID that the Cloud SQL source is in with a length limit of 128 + // characters. If not specified, inherits the project ID from the parent + // request. + string project_id = 1; + + // Required. The Cloud SQL instance to copy the data from with a length limit + // of 256 characters. + string instance_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The Cloud SQL database to copy the data from with a length limit + // of 256 characters. + string database_id = 3 [(google.api.field_behavior) = REQUIRED]; + + // Required. The Cloud SQL table to copy the data from with a length limit of + // 256 characters. + string table_id = 4 [(google.api.field_behavior) = REQUIRED]; + + // Intermediate Cloud Storage directory used for the import with a length + // limit of 2,000 characters. Can be specified if one wants to have the + // Cloud SQL export to a specific Cloud Storage directory. + // + // Please ensure that the Cloud SQL service account has the necessary Cloud + // Storage Admin permissions to access the specified Cloud Storage directory. + string gcs_staging_dir = 5; + + // Option for serverless export. Enabling this option will incur + // additional cost. More info can be found + // [here](https://cloud.google.com/sql/pricing#serverless). + bool offload = 6; +} + +// Firestore source import data from. +message FirestoreSource { + // The project ID that the Cloud SQL source is in with a length limit of 128 + // characters. If not specified, inherits the project ID from the parent + // request. + string project_id = 1; + + // Required. The Firestore database to copy the data from with a length limit + // of 256 characters. + string database_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The Firestore collection to copy the data from with a length + // limit of 1,500 characters. + string collection_id = 3 [(google.api.field_behavior) = REQUIRED]; + + // Intermediate Cloud Storage directory used for the import with a length + // limit of 2,000 characters. Can be specified if one wants to have the + // Firestore export to a specific Cloud Storage directory. + // + // Please ensure that the Firestore service account has the necessary Cloud + // Storage Admin permissions to access the specified Cloud Storage directory. + string gcs_staging_dir = 4; +} + // Configuration of destination for Import related errors. message ImportErrorConfig { // Required. Errors destination. @@ -220,6 +448,9 @@ message ImportDocumentsMetadata { // Count of entries that encountered errors while processing. int64 failure_count = 4; + + // Total count of entries that were processed. + int64 total_count = 5; } // Request message for Import methods. @@ -256,6 +487,21 @@ message ImportDocumentsRequest { // BigQuery input source. BigQuerySource bigquery_source = 4; + + // FhirStore input source. + FhirStoreSource fhir_store_source = 10; + + // Spanner input source. + SpannerSource spanner_source = 11; + + // Cloud SQL input source. + CloudSqlSource cloud_sql_source = 12; + + // Firestore input source. + FirestoreSource firestore_source = 13; + + // Cloud Bigtable input source. + BigtableSource bigtable_source = 15; } // Required. The parent branch resource name, such as @@ -276,6 +522,10 @@ message ImportDocumentsRequest { // [ReconciliationMode.INCREMENTAL][google.cloud.discoveryengine.v1alpha.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL]. ReconciliationMode reconciliation_mode = 6; + // Indicates which fields in the provided imported documents to update. If + // not set, the default is to update all fields. + google.protobuf.FieldMask update_mask = 7; + // Whether to automatically generate IDs for the documents if absent. // // If set to `true`, @@ -289,45 +539,54 @@ message ImportDocumentsRequest { // [id_field][google.cloud.discoveryengine.v1alpha.ImportDocumentsRequest.id_field], // otherwise, documents without IDs fail to be imported. // - // Only set this field when using - // [GcsSource][google.cloud.discoveryengine.v1alpha.GcsSource] or - // [BigQuerySource][google.cloud.discoveryengine.v1alpha.BigQuerySource], and - // when + // Supported data sources: + // + // * [GcsSource][google.cloud.discoveryengine.v1alpha.GcsSource]. // [GcsSource.data_schema][google.cloud.discoveryengine.v1alpha.GcsSource.data_schema] - // or + // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. + // * [BigQuerySource][google.cloud.discoveryengine.v1alpha.BigQuerySource]. // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1alpha.BigQuerySource.data_schema] - // is `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. + // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. + // * [SpannerSource][google.cloud.discoveryengine.v1alpha.SpannerSource]. + // * [CloudSqlSource][google.cloud.discoveryengine.v1alpha.CloudSqlSource]. + // * [FirestoreSource][google.cloud.discoveryengine.v1alpha.FirestoreSource]. + // * [BigtableSource][google.cloud.discoveryengine.v1alpha.BigtableSource]. bool auto_generate_ids = 8; - // The field in the Cloud Storage and BigQuery sources that indicates the - // unique IDs of the documents. + // The field indicates the ID field or column to be used as unique IDs of + // the documents. // // For [GcsSource][google.cloud.discoveryengine.v1alpha.GcsSource] it is the // key of the JSON field. For instance, `my_id` for JSON `{"my_id": - // "some_uuid"}`. For - // [BigQuerySource][google.cloud.discoveryengine.v1alpha.BigQuerySource] it is - // the column name of the BigQuery table where the unique ids are stored. + // "some_uuid"}`. For others, it may be the column name of the table where the + // unique ids are stored. // - // The values of the JSON field or the BigQuery column are used as the + // The values of the JSON field or the table column are used as the // [Document.id][google.cloud.discoveryengine.v1alpha.Document.id]s. The JSON - // field or the BigQuery column must be of string type, and the values must be + // field or the table column must be of string type, and the values must be // set as valid strings conform to // [RFC-1034](https://tools.ietf.org/html/rfc1034) with 1-63 characters. // Otherwise, documents without valid IDs fail to be imported. // - // Only set this field when using - // [GcsSource][google.cloud.discoveryengine.v1alpha.GcsSource] or - // [BigQuerySource][google.cloud.discoveryengine.v1alpha.BigQuerySource], and - // when - // [GcsSource.data_schema][google.cloud.discoveryengine.v1alpha.GcsSource.data_schema] - // or - // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1alpha.BigQuerySource.data_schema] - // is `custom`. And only set this field when + // Only set this field when // [auto_generate_ids][google.cloud.discoveryengine.v1alpha.ImportDocumentsRequest.auto_generate_ids] // is unset or set as `false`. Otherwise, an INVALID_ARGUMENT error is thrown. // // If it is unset, a default value `_id` is used when importing from the // allowed data sources. + // + // Supported data sources: + // + // * [GcsSource][google.cloud.discoveryengine.v1alpha.GcsSource]. + // [GcsSource.data_schema][google.cloud.discoveryengine.v1alpha.GcsSource.data_schema] + // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. + // * [BigQuerySource][google.cloud.discoveryengine.v1alpha.BigQuerySource]. + // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1alpha.BigQuerySource.data_schema] + // must be `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown. + // * [SpannerSource][google.cloud.discoveryengine.v1alpha.SpannerSource]. + // * [CloudSqlSource][google.cloud.discoveryengine.v1alpha.CloudSqlSource]. + // * [FirestoreSource][google.cloud.discoveryengine.v1alpha.FirestoreSource]. + // * [BigtableSource][google.cloud.discoveryengine.v1alpha.BigtableSource]. string id_field = 9; } @@ -405,3 +664,35 @@ message ImportSuggestionDenyListEntriesMetadata { // finish time. google.protobuf.Timestamp update_time = 2; } + +// Response of the +// [CompletionService.ImportCompletionSuggestions][google.cloud.discoveryengine.v1alpha.CompletionService.ImportCompletionSuggestions] +// method. If the long running operation is done, this message is returned by +// the google.longrunning.Operations.response field if the operation is +// successful. +message ImportCompletionSuggestionsResponse { + // A sample of errors encountered while processing the request. + repeated google.rpc.Status error_samples = 1; + + // Count of + // [CompletionSuggestion][google.cloud.discoveryengine.v1alpha.CompletionSuggestion]s + // successfully imported. + int64 success_count = 2; + + // Count of + // [CompletionSuggestion][google.cloud.discoveryengine.v1alpha.CompletionSuggestion]s + // that failed to be imported. + int64 failure_count = 3; +} + +// Metadata related to the progress of the ImportCompletionSuggestions +// operation. This will be returned by the google.longrunning.Operation.metadata +// field. +message ImportCompletionSuggestionsMetadata { + // Operation create time. + google.protobuf.Timestamp create_time = 1; + + // Operation last update time. If the operation is done, this is also the + // finish time. + google.protobuf.Timestamp update_time = 2; +} diff --git a/google/cloud/discoveryengine/v1alpha/rank_service.proto b/google/cloud/discoveryengine/v1alpha/rank_service.proto new file mode 100644 index 0000000000000..b10073f2f38be --- /dev/null +++ b/google/cloud/discoveryengine/v1alpha/rank_service.proto @@ -0,0 +1,115 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.discoveryengine.v1alpha; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; + +option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha"; +option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb"; +option java_multiple_files = true; +option java_outer_classname = "RankServiceProto"; +option java_package = "com.google.cloud.discoveryengine.v1alpha"; +option objc_class_prefix = "DISCOVERYENGINE"; +option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; +option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; + +// Service for ranking text records. +service RankService { + option (google.api.default_host) = "discoveryengine.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/cloud-platform"; + + // Ranks a list of text records based on the given input query. + rpc Rank(RankRequest) returns (RankResponse) { + option (google.api.http) = { + post: "/v1alpha/{ranking_config=projects/*/locations/*/rankingConfigs/*}:rank" + body: "*" + }; + } +} + +// Record message for +// [RankService.Rank][google.cloud.discoveryengine.v1alpha.RankService.Rank] +// method. +message RankingRecord { + // The unique ID to represent the record. + string id = 1; + + // The title of the record. Empty by default. + // At least one of + // [title][google.cloud.discoveryengine.v1alpha.RankingRecord.title] or + // [content][google.cloud.discoveryengine.v1alpha.RankingRecord.content] + // should be set otherwise an INVALID_ARGUMENT error is thrown. + string title = 2; + + // The content of the record. Empty by default. + // At least one of + // [title][google.cloud.discoveryengine.v1alpha.RankingRecord.title] or + // [content][google.cloud.discoveryengine.v1alpha.RankingRecord.content] + // should be set otherwise an INVALID_ARGUMENT error is thrown. + string content = 3; + + // The score of this record based on the given query and selected model. + float score = 4; +} + +// Request message for +// [RankService.Rank][google.cloud.discoveryengine.v1alpha.RankService.Rank] +// method. +message RankRequest { + // Required. The resource name of the rank service config, such as + // `projects/{project_num}/locations/{location_id}/rankingConfigs/default_ranking_config`. + string ranking_config = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "discoveryengine.googleapis.com/RankingConfig" + } + ]; + + // The identifier of the model to use. It is one of: + // + // * `semantic-ranker-512@latest`: Semantic ranking model with maxiumn input + // token size 512. + // + // It is set to `semantic-ranker-512@latest` by default if unspecified. + string model = 2; + + // The number of results to return. If this is unset or no bigger than zero, + // returns all results. + int32 top_n = 3; + + // The query to use. + string query = 4; + + // Required. A list of records to rank. At most 200 records to rank. + repeated RankingRecord records = 5 [(google.api.field_behavior) = REQUIRED]; + + // If true, the response will contain only record ID and score. By default, it + // is false, the response will contain record details. + bool ignore_record_details_in_response = 6; +} + +// Response message for +// [RankService.Rank][google.cloud.discoveryengine.v1alpha.RankService.Rank] +// method. +message RankResponse { + // A list of records sorted by descending score. + repeated RankingRecord records = 5; +} diff --git a/google/cloud/discoveryengine/v1alpha/search_service.proto b/google/cloud/discoveryengine/v1alpha/search_service.proto index 920a6ed82fdf1..047a21975058e 100644 --- a/google/cloud/discoveryengine/v1alpha/search_service.proto +++ b/google/cloud/discoveryengine/v1alpha/search_service.proto @@ -218,6 +218,80 @@ message SearchRequest { message BoostSpec { // Boost applies to documents which match a condition. message ConditionBoostSpec { + // Specification for custom ranking based on customer specified attribute + // value. It provides more controls for customized ranking than the simple + // (condition, boost) combination above. + message BoostControlSpec { + // The control points used to define the curve. The curve defined + // through these control points can only be monotonically increasing + // or decreasing(constant values are acceptable). + message ControlPoint { + // Can be one of: + // 1. The numerical field value. + // 2. The duration spec for freshness: + // The value must be formatted as an XSD `dayTimeDuration` value (a + // restricted subset of an ISO 8601 duration value). The pattern for + // this is: `[nD][T[nH][nM][nS]]`. + string attribute_value = 1; + + // The value between -1 to 1 by which to boost the score if the + // attribute_value evaluates to the value specified above. + float boost_amount = 2; + } + + // The attribute(or function) for which the custom ranking is to be + // applied. + enum AttributeType { + // Unspecified AttributeType. + ATTRIBUTE_TYPE_UNSPECIFIED = 0; + + // The value of the numerical field will be used to dynamically update + // the boost amount. In this case, the attribute_value (the x value) + // of the control point will be the actual value of the numerical + // field for which the boost_amount is specified. + NUMERICAL = 1; + + // For the freshness use case the attribute value will be the duration + // between the current time and the date in the datetime field + // specified. The value must be formatted as an XSD `dayTimeDuration` + // value (a restricted subset of an ISO 8601 duration value). The + // pattern for this is: `[nD][T[nH][nM][nS]]`. + // E.g. `5D`, `3DT12H30M`, `T24H`. + FRESHNESS = 2; + } + + // The interpolation type to be applied. Default will be linear + // (Piecewise Linear). + enum InterpolationType { + // Interpolation type is unspecified. In this case, it defaults to + // Linear. + INTERPOLATION_TYPE_UNSPECIFIED = 0; + + // Piecewise linear interpolation will be applied. + LINEAR = 1; + } + + // The name of the field whose value will be used to determine the + // boost amount. + string field_name = 1; + + // The attribute type to be used to determine the boost amount. The + // attribute value can be derived from the field value of the specified + // field_name. In the case of numerical it is straightforward i.e. + // attribute_value = numerical_field_value. In the case of freshness + // however, attribute_value = (time.now() - datetime_field_value). + AttributeType attribute_type = 2; + + // The interpolation type to be applied to connect the control points + // listed below. + InterpolationType interpolation_type = 3; + + // The control points used to define the curve. The monotonic function + // (defined through the interpolation_type above) passes through the + // control points listed here. + repeated ControlPoint control_points = 4; + } + // An expression which specifies a boost condition. The syntax and // supported fields are the same as a filter expression. See // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter] @@ -233,21 +307,27 @@ message SearchRequest { // Strength of the condition boost, which should be in [-1, 1]. Negative // boost means demotion. Default is 0.0. // - // Setting to 1.0 gives the document a big promotion. However, it does not - // necessarily mean that the boosted document will be the top result at - // all times, nor that other documents will be excluded. Results could - // still be shown even when none of them matches the condition. And - // results that are significantly more relevant to the search query can - // still trump your heavily favored but irrelevant documents. + // Setting to 1.0 gives the document a big promotion. However, it does + // not necessarily mean that the boosted document will be the top result + // at all times, nor that other documents will be excluded. Results + // could still be shown even when none of them matches the condition. + // And results that are significantly more relevant to the search query + // can still trump your heavily favored but irrelevant documents. // // Setting to -1.0 gives the document a big demotion. However, results // that are deeply relevant might still be shown. The document will have - // an upstream battle to get a fairly high ranking, but it is not blocked - // out completely. + // an upstream battle to get a fairly high ranking, but it is not + // blocked out completely. // // Setting to 0.0 means no boost applied. The boosting condition is - // ignored. + // ignored. Only one of the (condition, boost) combination or the + // boost_control_spec below are set. If both are set then the global boost + // is ignored and the more fine-grained boost_control_spec is applied. float boost = 2; + + // Complex specification for custom ranking based on customer defined + // attribute value. + BoostControlSpec boost_control_spec = 3; } // Condition boost specifications. If a document matches multiple conditions @@ -348,11 +428,13 @@ message SearchRequest { // Supported values are: // // * `stable`: string. Default value when no value is specified. Uses a - // generally available, fine-tuned version of the text-bison@001 - // model. - // * `preview`: string. (Public preview) Uses a fine-tuned version of - // the text-bison@002 model. This model works only for summaries in - // English. + // generally available, fine-tuned model. For more information, see + // [Answer generation model versions and + // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). + // * `preview`: string. (Public preview) Uses a preview model. For more + // information, see + // [Answer generation model versions and + // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). string version = 1; } @@ -419,6 +501,14 @@ message SearchRequest { // If specified, the spec will be used to modify the model specification // provided to the LLM. ModelSpec model_spec = 7; + + // If true, answer will be generated from most relevant chunks from top + // search results. This feature will improve summary quality. + // Please note that with this feature enabled, not all top search results + // will be referenced and included in the reference list, so the citation + // source index only points to the search results listed in the reference + // list. + bool use_semantic_chunks = 8; } // A specification for configuring the extractive content in a search @@ -459,10 +549,9 @@ message SearchRequest { int32 max_extractive_segment_count = 2; // Specifies whether to return the confidence score from the extractive - // segments in each search result. The default value is `false`. - // - // Note: this is a priavte preview feature and only works for allowlisted - // users, please reach out to Cloud Support team if you want to use it. + // segments in each search result. This feature is available only for new + // or allowlisted data stores. To allowlist your data store, please + // contact your Customer Engineer. The default value is `false`. bool return_extractive_segment_score = 3; // Specifies whether to also include the adjacent from each selected @@ -476,6 +565,23 @@ message SearchRequest { int32 num_next_segments = 5; } + // Specifies the chunk spec to be returned from the search response. + // Only available if the + // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] + // is set to + // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] + message ChunkSpec { + // The number of previous chunks to be returned of the current chunk. The + // maximum allowed value is 3. + // If not specified, no previous chunks will be returned. + int32 num_previous_chunks = 1; + + // The number of next chunks to be returned of the current chunk. The + // maximum allowed value is 3. + // If not specified, no next chunks will be returned. + int32 num_next_chunks = 2; + } + // Specifies the search result mode. If unspecified, the // search result mode is based on // [DataStore.DocumentProcessingConfig.chunking_config][]: @@ -513,6 +619,13 @@ message SearchRequest { // it defaults to `CHUNKS`. // * Otherwise, it defaults to `DOCUMENTS`. SearchResultMode search_result_mode = 4; + + // Specifies the chunk spec to be returned from the search response. + // Only available if the + // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] + // is set to + // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] + ChunkSpec chunk_spec = 5; } // The specification that uses customized query embedding vector to do @@ -560,11 +673,15 @@ message SearchRequest { ImageQuery image_query = 19; // Maximum number of - // [Document][google.cloud.discoveryengine.v1alpha.Document]s to return. If - // unspecified, defaults to a reasonable value. The maximum allowed value is - // 100. Values above 100 are coerced to 100. + // [Document][google.cloud.discoveryengine.v1alpha.Document]s to return. The + // maximum allowed value depends on the data type. Values above the maximum + // value are coerced to the maximum value. // - // If this field is negative, an `INVALID_ARGUMENT` is returned. + // * Websites with basic indexing: Default `10`, Maximum `25`. + // * Websites with advanced indexing: Default `25`, Maximum `50`. + // * Other: Default `50`, Maximum `100`. + // + // If this field is negative, an `INVALID_ARGUMENT` is returned. int32 page_size = 4; // A page token received from a previous @@ -701,7 +818,8 @@ message SearchRequest { // // If // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path] - // is not provided, it will use [ServingConfig.EmbeddingConfig.field_path][]. + // is not provided, it will use + // [ServingConfig.EmbeddingConfig.field_path][google.cloud.discoveryengine.v1alpha.ServingConfig.embedding_config]. EmbeddingSpec embedding_spec = 23; // The ranking expression controls the customized ranking on retrieval @@ -867,6 +985,15 @@ message SearchResponse { // Document reference. message Reference { + // Chunk content. + message ChunkContent { + // Chunk textual content. + string content = 1; + + // Page identifier. + string page_identifier = 2; + } + // Title of the document. string title = 1; @@ -884,6 +1011,9 @@ message SearchResponse { // Cloud Storage or HTTP uri for the document. string uri = 3; + + // List of cited chunk contents derived from document content. + repeated ChunkContent chunk_contents = 4; } // Summary with metadata information. diff --git a/google/cloud/discoveryengine/v1alpha/search_tuning_service.proto b/google/cloud/discoveryengine/v1alpha/search_tuning_service.proto index 334edae3e3879..92d0479a9b907 100644 --- a/google/cloud/discoveryengine/v1alpha/search_tuning_service.proto +++ b/google/cloud/discoveryengine/v1alpha/search_tuning_service.proto @@ -65,8 +65,8 @@ message TrainCustomModelRequest { // A newline delimited jsonl/ndjson file. // // For search-tuning model, each line should have the _id, title - // and text. Example: {"_id": "doc1", title: "relevant doc", "text": - // "relevant text"} + // and text. Example: + // `{"_id": "doc1", title: "relevant doc", "text": "relevant text"}` string corpus_data_path = 1; // The gcs query data which could be associated in train data. @@ -137,6 +137,9 @@ message TrainCustomModelResponse { // * **in-progress**: Model training is in progress. // * **ready**: The model is ready for serving. string model_status = 3; + + // The metrics of the trained model. + map metrics = 4; } // Metadata related to the progress of the TrainCustomModel operation. This is diff --git a/google/cloud/discoveryengine/v1alpha/serving_config_service.proto b/google/cloud/discoveryengine/v1alpha/serving_config_service.proto index 22fd94fd53116..0cba972b1e673 100644 --- a/google/cloud/discoveryengine/v1alpha/serving_config_service.proto +++ b/google/cloud/discoveryengine/v1alpha/serving_config_service.proto @@ -32,7 +32,8 @@ option objc_class_prefix = "DISCOVERYENGINE"; option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; -// Service for operations related to [ServingConfig][]. +// Service for operations related to +// [ServingConfig][google.cloud.discoveryengine.v1alpha.ServingConfig]. service ServingConfigService { option (google.api.default_host) = "discoveryengine.googleapis.com"; option (google.api.oauth_scopes) =