google/ai/generativelanguage/v1beta/generative_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.ai.generativelanguage.v1beta;

import "google/ai/generativelanguage/v1beta/citation.proto";
import "google/ai/generativelanguage/v1beta/content.proto";
import "google/ai/generativelanguage/v1beta/retriever.proto";
import "google/ai/generativelanguage/v1beta/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "GenerativeServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta";

// API for using Large Models that generate multimodal content and have
// additional capabilities beyond text generation.
service GenerativeService {
  option (google.api.default_host) = "generativelanguage.googleapis.com";

  // Generates a response from the model given an input
  // `GenerateContentRequest`.
  rpc GenerateContent(GenerateContentRequest)
      returns (GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:generateContent"
      body: "*"
      additional_bindings {
        post: "/v1beta/{model=tunedModels/*}:generateContent"
        body: "*"
      }
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Generates a grounded answer from the model given an input
  // `GenerateAnswerRequest`.
  rpc GenerateAnswer(GenerateAnswerRequest) returns (GenerateAnswerResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:generateAnswer"
      body: "*"
    };
    option (google.api.method_signature) =
        "model,contents,safety_settings,answer_style";
  }

  // Generates a streamed response from the model given an input
  // `GenerateContentRequest`.
  rpc StreamGenerateContent(GenerateContentRequest)
      returns (stream GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:streamGenerateContent"
      body: "*"
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Generates an embedding from the model given an input `Content`.
  rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:embedContent"
      body: "*"
    };
    option (google.api.method_signature) = "model,content";
  }

  // Generates multiple embeddings from the model given input text in a
  // synchronous call.
  rpc BatchEmbedContents(BatchEmbedContentsRequest)
      returns (BatchEmbedContentsResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:batchEmbedContents"
      body: "*"
    };
    option (google.api.method_signature) = "model,requests";
  }

  // Runs a model's tokenizer on input content and returns the token count.
  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:countTokens"
      body: "*"
    };
    option (google.api.method_signature) = "model,contents";
  }
}

// Type of task for which the embedding will be used.
enum TaskType {
  // Unset value, which will default to one of the other enum values.
  TASK_TYPE_UNSPECIFIED = 0;

  // Specifies the given text is a query in a search/retrieval setting.
  RETRIEVAL_QUERY = 1;

  // Specifies the given text is a document from the corpus being searched.
  RETRIEVAL_DOCUMENT = 2;

  // Specifies the given text will be used for STS.
  SEMANTIC_SIMILARITY = 3;

  // Specifies that the given text will be classified.
  CLASSIFICATION = 4;

  // Specifies that the embeddings will be used for clustering.
  CLUSTERING = 5;
}

// Request to generate a completion from the model.
message GenerateContentRequest {
  // Required. The name of the `Model` to use for generating the completion.
  //
  // Format: `name=models/{model}`.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The content of the current conversation with the model.
  //
  // For single-turn queries, this is a single instance. For multi-turn queries,
  // this is a repeated field that contains conversation history + latest
  // request.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. A list of `Tools` the model may use to generate the next
  // response.
  //
  // A `Tool` is a piece of code that enables the system to interact with
  // external systems to perform an action, or set of actions, outside of
  // knowledge and scope of the model. The only supported tool is currently
  // `Function`.
  repeated Tool tools = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
  // content.
  //
  // This will be enforced on the `GenerateContentRequest.contents` and
  // `GenerateContentResponse.candidates`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any contents and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
  // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
  // HARM_CATEGORY_HARASSMENT are supported.
  repeated SafetySetting safety_settings = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configuration options for model generation and outputs.
  optional GenerationConfig generation_config = 4
      [(google.api.field_behavior) = OPTIONAL];
}

// Configuration options for model generation and outputs. Not all parameters
// may be configurable for every model.
message GenerationConfig {
  // Optional. Number of generated responses to return.
  //
  // This value must be between [1, 8], inclusive. If unset, this will default
  // to 1.
  optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The set of character sequences (up to 5) that will stop output
  // generation. If specified, the API will stop at the first appearance of a
  // stop sequence. The stop sequence will not be included as part of the
  // response.
  repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to include in a candidate.
  //
  // Note: The default value varies by model, see the `Model.output_token_limit`
  // attribute of the `Model` returned from the `getModel` function.
  optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Controls the randomness of the output.
  //
  // Note: The default value varies by model, see the `Model.temperature`
  // attribute of the `Model` returned from the `getModel` function.
  //
  // Values can range from [0.0, infinity).
  optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum cumulative probability of tokens to consider when
  // sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Tokens are sorted based on their assigned probabilities so that only the
  // most likely tokens are considered. Top-k sampling directly limits the
  // maximum number of tokens to consider, while Nucleus sampling limits number
  // of tokens based on the cumulative probability.
  //
  // Note: The default value varies by model, see the `Model.top_p`
  // attribute of the `Model` returned from the `getModel` function.
  optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to consider when sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Top-k sampling considers the set of `top_k` most probable tokens.
  //
  // Note: The default value varies by model, see the `Model.top_k`
  // attribute of the `Model` returned from the `getModel` function.
  optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];
}

// Configuration for retrieving grounding content from a `Corpus` or
// `Document` created using the Semantic Retriever API.
message SemanticRetrieverConfig {
  // Required. Name of the resource for retrieval, e.g. corpora/123 or
  // corpora/123/documents/abc.
  string source = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Query to use for similarity matching `Chunk`s in the given
  // resource.
  Content query = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Filters for selecting `Document`s and/or `Chunk`s from the
  // resource.
  repeated MetadataFilter metadata_filters = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Maximum number of relevant `Chunk`s to retrieve.
  optional int32 max_chunks_count = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Minimum relevance score for retrieved relevant `Chunk`s.
  optional float minimum_relevance_score = 5
      [(google.api.field_behavior) = OPTIONAL];
}

// Response from the model supporting multiple candidates.
//
// Note on safety ratings and content filtering. They are reported for both
// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate
// in `finish_reason` and in `safety_ratings`. The API contract is that:
//  - either all requested candidates are returned or no candidates at all
//  - no candidates are returned only if there was something wrong with the
//    prompt (see `prompt_feedback`)
//  - feedback on each candidate is reported on `finish_reason` and
//    `safety_ratings`.
message GenerateContentResponse {
  // A set of the feedback metadata the prompt specified in
  // `GenerateContentRequest.content`.
  message PromptFeedback {
    // Specifies what was the reason why prompt was blocked.
    enum BlockReason {
      // Default value. This value is unused.
      BLOCK_REASON_UNSPECIFIED = 0;

      // Prompt was blocked due to safety reasons. You can inspect
      // `safety_ratings` to understand which safety category blocked it.
      SAFETY = 1;

      // Prompt was blocked due to unknown reaasons.
      OTHER = 2;
    }

    // Optional. If set, the prompt was blocked and no candidates are returned.
    // Rephrase your prompt.
    BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL];

    // Ratings for safety of the prompt.
    // There is at most one rating per category.
    repeated SafetyRating safety_ratings = 2;
  }

  // Candidate responses from the model.
  repeated Candidate candidates = 1;

  // Returns the prompt's feedback related to the content filters.
  PromptFeedback prompt_feedback = 2;
}

// A response candidate generated from the model.
message Candidate {
  // Defines the reason why the model stopped generating tokens.
  enum FinishReason {
    // Default value. This value is unused.
    FINISH_REASON_UNSPECIFIED = 0;

    // Natural stop point of the model or provided stop sequence.
    STOP = 1;

    // The maximum number of tokens as specified in the request was reached.
    MAX_TOKENS = 2;

    // The candidate content was flagged for safety reasons.
    SAFETY = 3;

    // The candidate content was flagged for recitation reasons.
    RECITATION = 4;

    // Unknown reason.
    OTHER = 5;
  }

  // Output only. Index of the candidate in the list of candidates.
  optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Generated content returned from the model.
  Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Output only. The reason why the model stopped generating tokens.
  //
  // If empty, the model has not stopped generating the tokens.
  FinishReason finish_reason = 2 [
    (google.api.field_behavior) = OPTIONAL,
    (google.api.field_behavior) = OUTPUT_ONLY
  ];

  // List of ratings for the safety of a response candidate.
  //
  // There is at most one rating per category.
  repeated SafetyRating safety_ratings = 5;

  // Output only. Citation information for model-generated candidate.
  //
  // This field may be populated with recitation information for any text
  // included in the `content`. These are passages that are "recited" from
  // copyrighted material in the foundational LLM's training data.
  CitationMetadata citation_metadata = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Token count for this candidate.
  int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Attribution information for sources that contributed to a
  // grounded answer.
  //
  // This field is populated for `GenerateAnswer` calls.
  repeated GroundingAttribution grounding_attributions = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Identifier for the source contributing to this attribution.
message AttributionSourceId {
  // Identifier for a part within a `GroundingPassage`.
  message GroundingPassageId {
    // Output only. ID of the passage matching the `GenerateAnswerRequest`'s
    // `GroundingPassage.id`.
    string passage_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Index of the part within the `GenerateAnswerRequest`'s
    // `GroundingPassage.content`.
    int32 part_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Identifier for a `Chunk` retrieved via Semantic Retriever specified in the
  // `GenerateAnswerRequest` using `SemanticRetrieverConfig`.
  message SemanticRetrieverChunk {
    // Output only. Name of the source matching the request's
    // `SemanticRetrieverConfig.source`. Example: `corpora/123` or
    // `corpora/123/documents/abc`
    string source = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Name of the `Chunk` containing the attributed text.
    // Example: `corpora/123/documents/abc/chunks/xyz`
    string chunk = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  oneof source {
    // Identifier for an inline passage.
    GroundingPassageId grounding_passage = 1;

    // Identifier for a `Chunk` fetched via Semantic Retriever.
    SemanticRetrieverChunk semantic_retriever_chunk = 2;
  }
}

// Attribution for a source that contributed to an answer.
message GroundingAttribution {
  // Output only. Identifier for the source contributing to this attribution.
  AttributionSourceId source_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Grounding source content that makes up this attribution.
  Content content = 2;
}

// Request to generate a grounded answer from the model.
message GenerateAnswerRequest {
  // Style for grounded answers.
  enum AnswerStyle {
    // Unspecified answer style.
    ANSWER_STYLE_UNSPECIFIED = 0;

    // Succint but abstract style.
    ABSTRACTIVE = 1;

    // Very brief and extractive style.
    EXTRACTIVE = 2;

    // Verbose style including extra details. The response may be formatted as a
    // sentence, paragraph, multiple paragraphs, or bullet points, etc.
    VERBOSE = 3;
  }

  // The sources in which to ground the answer.
  oneof grounding_source {
    // Passages provided inline with the request.
    GroundingPassages inline_passages = 6;

    // Content retrieved from resources created via the Semantic Retriever
    // API.
    SemanticRetrieverConfig semantic_retriever = 7;
  }

  // Required. The name of the `Model` to use for generating the grounded
  // response.
  //
  // Format: `model=models/{model}`.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The content of the current conversation with the model. For
  // single-turn queries, this is a single question to answer. For multi-turn
  // queries, this is a repeated field that contains conversation history and
  // the last `Content` in the list containing the question.
  //
  // Note: GenerateAnswer currently only supports queries in English.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Style in which answers should be returned.
  AnswerStyle answer_style = 5 [(google.api.field_behavior) = REQUIRED];

  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
  // content.
  //
  // This will be enforced on the `GenerateAnswerRequest.contents` and
  // `GenerateAnswerResponse.candidate`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any contents and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
  // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
  // HARM_CATEGORY_HARASSMENT are supported.
  repeated SafetySetting safety_settings = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Controls the randomness of the output.
  //
  // Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will
  // produce responses that are more varied and creative, while a value closer
  // to 0.0 will typically result in more straightforward responses from the
  // model. A low temperature (~0.2) is usually recommended for
  // Attributed-Question-Answering use cases.
  optional float temperature = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Response from the model for a grounded answer.
message GenerateAnswerResponse {
  // Feedback related to the input data used to answer the question, as opposed
  // to model-generated response to the question.
  message InputFeedback {
    // Specifies what was the reason why input was blocked.
    enum BlockReason {
      // Default value. This value is unused.
      BLOCK_REASON_UNSPECIFIED = 0;

      // Input was blocked due to safety reasons. You can inspect
      // `safety_ratings` to understand which safety category blocked it.
      SAFETY = 1;

      // Input was blocked due to other reasons.
      OTHER = 2;
    }

    // Optional. If set, the input was blocked and no candidates are returned.
    // Rephrase your input.
    optional BlockReason block_reason = 1
        [(google.api.field_behavior) = OPTIONAL];

    // Ratings for safety of the input.
    // There is at most one rating per category.
    repeated SafetyRating safety_ratings = 2;
  }

  // Candidate answer from the model.
  //
  // Note: The model *always* attempts to provide a grounded answer, even when
  // the answer is unlikely to be answerable from the given passages.
  // In that case, a low-quality or ungrounded answer may be provided, along
  // with a low `answerable_probability`.
  Candidate answer = 1;

  // Output only. The model's estimate of the probability that its answer is
  // correct and grounded in the input passages.
  //
  // A low answerable_probability indicates that the answer might not be
  // grounded in the sources.
  //
  // When `answerable_probability` is low, some clients may wish to:
  //
  // * Display a message to the effect of "We couldn’t answer that question" to
  // the user.
  // * Fall back to a general-purpose LLM that answers the question from world
  // knowledge. The threshold and nature of such fallbacks will depend on
  // individual clients’ use cases. 0.5 is a good starting threshold.
  optional float answerable_probability = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Feedback related to the input data used to answer the
  // question, as opposed to model-generated response to the question.
  //
  // "Input data" can be one or more of the following:
  //
  // - Question specified by the last entry in `GenerateAnswerRequest.content`
  // - Conversation history specified by the other entries in
  // `GenerateAnswerRequest.content`
  // - Grounding sources (`GenerateAnswerRequest.semantic_retriever` or
  // `GenerateAnswerRequest.inline_passages`)
  optional InputFeedback input_feedback = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request containing the `Content` for the model to embed.
message EmbedContentRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The content to embed. Only the `parts.text` fields will be
  // counted.
  Content content = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Optional task type for which the embeddings will be used. Can
  // only be set for `models/embedding-001`.
  optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. An optional title for the text. Only applicable when TaskType is
  // `RETRIEVAL_DOCUMENT`.
  //
  // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality
  // embeddings for retrieval.
  optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
}

// A list of floats representing an embedding.
message ContentEmbedding {
  // The embedding values.
  repeated float values = 1;
}

// The response to an `EmbedContentRequest`.
message EmbedContentResponse {
  // Output only. The embedding generated from the input content.
  ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Batch request to get embeddings from the model for a list of prompts.
message BatchEmbedContentsRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. Embed requests for the batch. The model in each of these requests
  // must match the model specified `BatchEmbedContentsRequest.model`.
  repeated EmbedContentRequest requests = 2
      [(google.api.field_behavior) = REQUIRED];
}

// The response to a `BatchEmbedContentsRequest`.
message BatchEmbedContentsResponse {
  // Output only. The embeddings for each request, in the same order as provided
  // in the batch request.
  repeated ContentEmbedding embeddings = 1
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Counts the number of tokens in the `prompt` sent to a model.
//
// Models may tokenize text differently, so each model may return a different
// `token_count`.
message CountTokensRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The input given to the model as a prompt.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
}

// A response from `CountTokens`.
//
// It returns the model's `token_count` for the `prompt`.
message CountTokensResponse {
  // The number of tokens that the `model` tokenizes the `prompt` into.
  //
  // Always non-negative.
  int32 total_tokens = 1;
}