/
generative_service.proto
645 lines (553 loc) · 24.1 KB
/
generative_service.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.ai.generativelanguage.v1beta;
import "google/ai/generativelanguage/v1beta/citation.proto";
import "google/ai/generativelanguage/v1beta/content.proto";
import "google/ai/generativelanguage/v1beta/retriever.proto";
import "google/ai/generativelanguage/v1beta/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "GenerativeServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta";
// API for using Large Models that generate multimodal content and have
// additional capabilities beyond text generation.
service GenerativeService {
option (google.api.default_host) = "generativelanguage.googleapis.com";
// Generates a response from the model given an input
// `GenerateContentRequest`.
rpc GenerateContent(GenerateContentRequest)
returns (GenerateContentResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:generateContent"
body: "*"
additional_bindings {
post: "/v1beta/{model=tunedModels/*}:generateContent"
body: "*"
}
};
option (google.api.method_signature) = "model,contents";
}
// Generates a grounded answer from the model given an input
// `GenerateAnswerRequest`.
rpc GenerateAnswer(GenerateAnswerRequest) returns (GenerateAnswerResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:generateAnswer"
body: "*"
};
option (google.api.method_signature) =
"model,contents,safety_settings,answer_style";
}
// Generates a streamed response from the model given an input
// `GenerateContentRequest`.
rpc StreamGenerateContent(GenerateContentRequest)
returns (stream GenerateContentResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:streamGenerateContent"
body: "*"
};
option (google.api.method_signature) = "model,contents";
}
// Generates an embedding from the model given an input `Content`.
rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:embedContent"
body: "*"
};
option (google.api.method_signature) = "model,content";
}
// Generates multiple embeddings from the model given input text in a
// synchronous call.
rpc BatchEmbedContents(BatchEmbedContentsRequest)
returns (BatchEmbedContentsResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:batchEmbedContents"
body: "*"
};
option (google.api.method_signature) = "model,requests";
}
// Runs a model's tokenizer on input content and returns the token count.
rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
option (google.api.http) = {
post: "/v1beta/{model=models/*}:countTokens"
body: "*"
};
option (google.api.method_signature) = "model,contents";
}
}
// Type of task for which the embedding will be used.
enum TaskType {
// Unset value, which will default to one of the other enum values.
TASK_TYPE_UNSPECIFIED = 0;
// Specifies the given text is a query in a search/retrieval setting.
RETRIEVAL_QUERY = 1;
// Specifies the given text is a document from the corpus being searched.
RETRIEVAL_DOCUMENT = 2;
// Specifies the given text will be used for STS.
SEMANTIC_SIMILARITY = 3;
// Specifies that the given text will be classified.
CLASSIFICATION = 4;
// Specifies that the embeddings will be used for clustering.
CLUSTERING = 5;
}
// Request to generate a completion from the model.
message GenerateContentRequest {
// Required. The name of the `Model` to use for generating the completion.
//
// Format: `name=models/{model}`.
string model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "generativelanguage.googleapis.com/Model"
}
];
// Required. The content of the current conversation with the model.
//
// For single-turn queries, this is a single instance. For multi-turn queries,
// this is a repeated field that contains conversation history + latest
// request.
repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. A list of `Tools` the model may use to generate the next
// response.
//
// A `Tool` is a piece of code that enables the system to interact with
// external systems to perform an action, or set of actions, outside of
// knowledge and scope of the model. The only supported tool is currently
// `Function`.
repeated Tool tools = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. A list of unique `SafetySetting` instances for blocking unsafe
// content.
//
// This will be enforced on the `GenerateContentRequest.contents` and
// `GenerateContentResponse.candidates`. There should not be more than one
// setting for each `SafetyCategory` type. The API will block any contents and
// responses that fail to meet the thresholds set by these settings. This list
// overrides the default settings for each `SafetyCategory` specified in the
// safety_settings. If there is no `SafetySetting` for a given
// `SafetyCategory` provided in the list, the API will use the default safety
// setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
// HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
// HARM_CATEGORY_HARASSMENT are supported.
repeated SafetySetting safety_settings = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Configuration options for model generation and outputs.
optional GenerationConfig generation_config = 4
[(google.api.field_behavior) = OPTIONAL];
}
// Configuration options for model generation and outputs. Not all parameters
// may be configurable for every model.
message GenerationConfig {
// Optional. Number of generated responses to return.
//
// This value must be between [1, 8], inclusive. If unset, this will default
// to 1.
optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. The set of character sequences (up to 5) that will stop output
// generation. If specified, the API will stop at the first appearance of a
// stop sequence. The stop sequence will not be included as part of the
// response.
repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The maximum number of tokens to include in a candidate.
//
// Note: The default value varies by model, see the `Model.output_token_limit`
// attribute of the `Model` returned from the `getModel` function.
optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Controls the randomness of the output.
//
// Note: The default value varies by model, see the `Model.temperature`
// attribute of the `Model` returned from the `getModel` function.
//
// Values can range from [0.0, infinity).
optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. The maximum cumulative probability of tokens to consider when
// sampling.
//
// The model uses combined Top-k and nucleus sampling.
//
// Tokens are sorted based on their assigned probabilities so that only the
// most likely tokens are considered. Top-k sampling directly limits the
// maximum number of tokens to consider, while Nucleus sampling limits number
// of tokens based on the cumulative probability.
//
// Note: The default value varies by model, see the `Model.top_p`
// attribute of the `Model` returned from the `getModel` function.
optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. The maximum number of tokens to consider when sampling.
//
// The model uses combined Top-k and nucleus sampling.
//
// Top-k sampling considers the set of `top_k` most probable tokens.
//
// Note: The default value varies by model, see the `Model.top_k`
// attribute of the `Model` returned from the `getModel` function.
optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];
}
// Configuration for retrieving grounding content from a `Corpus` or
// `Document` created using the Semantic Retriever API.
message SemanticRetrieverConfig {
// Required. Name of the resource for retrieval, e.g. corpora/123 or
// corpora/123/documents/abc.
string source = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Query to use for similarity matching `Chunk`s in the given
// resource.
Content query = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. Filters for selecting `Document`s and/or `Chunk`s from the
// resource.
repeated MetadataFilter metadata_filters = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Maximum number of relevant `Chunk`s to retrieve.
optional int32 max_chunks_count = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Minimum relevance score for retrieved relevant `Chunk`s.
optional float minimum_relevance_score = 5
[(google.api.field_behavior) = OPTIONAL];
}
// Response from the model supporting multiple candidates.
//
// Note on safety ratings and content filtering. They are reported for both
// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate
// in `finish_reason` and in `safety_ratings`. The API contract is that:
// - either all requested candidates are returned or no candidates at all
// - no candidates are returned only if there was something wrong with the
// prompt (see `prompt_feedback`)
// - feedback on each candidate is reported on `finish_reason` and
// `safety_ratings`.
message GenerateContentResponse {
// A set of the feedback metadata the prompt specified in
// `GenerateContentRequest.content`.
message PromptFeedback {
// Specifies what was the reason why prompt was blocked.
enum BlockReason {
// Default value. This value is unused.
BLOCK_REASON_UNSPECIFIED = 0;
// Prompt was blocked due to safety reasons. You can inspect
// `safety_ratings` to understand which safety category blocked it.
SAFETY = 1;
// Prompt was blocked due to unknown reaasons.
OTHER = 2;
}
// Optional. If set, the prompt was blocked and no candidates are returned.
// Rephrase your prompt.
BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL];
// Ratings for safety of the prompt.
// There is at most one rating per category.
repeated SafetyRating safety_ratings = 2;
}
// Candidate responses from the model.
repeated Candidate candidates = 1;
// Returns the prompt's feedback related to the content filters.
PromptFeedback prompt_feedback = 2;
}
// A response candidate generated from the model.
message Candidate {
// Defines the reason why the model stopped generating tokens.
enum FinishReason {
// Default value. This value is unused.
FINISH_REASON_UNSPECIFIED = 0;
// Natural stop point of the model or provided stop sequence.
STOP = 1;
// The maximum number of tokens as specified in the request was reached.
MAX_TOKENS = 2;
// The candidate content was flagged for safety reasons.
SAFETY = 3;
// The candidate content was flagged for recitation reasons.
RECITATION = 4;
// Unknown reason.
OTHER = 5;
}
// Output only. Index of the candidate in the list of candidates.
optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Generated content returned from the model.
Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Optional. Output only. The reason why the model stopped generating tokens.
//
// If empty, the model has not stopped generating the tokens.
FinishReason finish_reason = 2 [
(google.api.field_behavior) = OPTIONAL,
(google.api.field_behavior) = OUTPUT_ONLY
];
// List of ratings for the safety of a response candidate.
//
// There is at most one rating per category.
repeated SafetyRating safety_ratings = 5;
// Output only. Citation information for model-generated candidate.
//
// This field may be populated with recitation information for any text
// included in the `content`. These are passages that are "recited" from
// copyrighted material in the foundational LLM's training data.
CitationMetadata citation_metadata = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Token count for this candidate.
int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Attribution information for sources that contributed to a
// grounded answer.
//
// This field is populated for `GenerateAnswer` calls.
repeated GroundingAttribution grounding_attributions = 8
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Identifier for the source contributing to this attribution.
message AttributionSourceId {
// Identifier for a part within a `GroundingPassage`.
message GroundingPassageId {
// Output only. ID of the passage matching the `GenerateAnswerRequest`'s
// `GroundingPassage.id`.
string passage_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Index of the part within the `GenerateAnswerRequest`'s
// `GroundingPassage.content`.
int32 part_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Identifier for a `Chunk` retrieved via Semantic Retriever specified in the
// `GenerateAnswerRequest` using `SemanticRetrieverConfig`.
message SemanticRetrieverChunk {
// Output only. Name of the source matching the request's
// `SemanticRetrieverConfig.source`. Example: `corpora/123` or
// `corpora/123/documents/abc`
string source = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Name of the `Chunk` containing the attributed text.
// Example: `corpora/123/documents/abc/chunks/xyz`
string chunk = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
}
oneof source {
// Identifier for an inline passage.
GroundingPassageId grounding_passage = 1;
// Identifier for a `Chunk` fetched via Semantic Retriever.
SemanticRetrieverChunk semantic_retriever_chunk = 2;
}
}
// Attribution for a source that contributed to an answer.
message GroundingAttribution {
// Output only. Identifier for the source contributing to this attribution.
AttributionSourceId source_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
// Grounding source content that makes up this attribution.
Content content = 2;
}
// Request to generate a grounded answer from the model.
message GenerateAnswerRequest {
// Style for grounded answers.
enum AnswerStyle {
// Unspecified answer style.
ANSWER_STYLE_UNSPECIFIED = 0;
// Succint but abstract style.
ABSTRACTIVE = 1;
// Very brief and extractive style.
EXTRACTIVE = 2;
// Verbose style including extra details. The response may be formatted as a
// sentence, paragraph, multiple paragraphs, or bullet points, etc.
VERBOSE = 3;
}
// The sources in which to ground the answer.
oneof grounding_source {
// Passages provided inline with the request.
GroundingPassages inline_passages = 6;
// Content retrieved from resources created via the Semantic Retriever
// API.
SemanticRetrieverConfig semantic_retriever = 7;
}
// Required. The name of the `Model` to use for generating the grounded
// response.
//
// Format: `model=models/{model}`.
string model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "generativelanguage.googleapis.com/Model"
}
];
// Required. The content of the current conversation with the model. For
// single-turn queries, this is a single question to answer. For multi-turn
// queries, this is a repeated field that contains conversation history and
// the last `Content` in the list containing the question.
//
// Note: GenerateAnswer currently only supports queries in English.
repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
// Required. Style in which answers should be returned.
AnswerStyle answer_style = 5 [(google.api.field_behavior) = REQUIRED];
// Optional. A list of unique `SafetySetting` instances for blocking unsafe
// content.
//
// This will be enforced on the `GenerateAnswerRequest.contents` and
// `GenerateAnswerResponse.candidate`. There should not be more than one
// setting for each `SafetyCategory` type. The API will block any contents and
// responses that fail to meet the thresholds set by these settings. This list
// overrides the default settings for each `SafetyCategory` specified in the
// safety_settings. If there is no `SafetySetting` for a given
// `SafetyCategory` provided in the list, the API will use the default safety
// setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
// HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
// HARM_CATEGORY_HARASSMENT are supported.
repeated SafetySetting safety_settings = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Controls the randomness of the output.
//
// Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will
// produce responses that are more varied and creative, while a value closer
// to 0.0 will typically result in more straightforward responses from the
// model. A low temperature (~0.2) is usually recommended for
// Attributed-Question-Answering use cases.
optional float temperature = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Response from the model for a grounded answer.
message GenerateAnswerResponse {
// Feedback related to the input data used to answer the question, as opposed
// to model-generated response to the question.
message InputFeedback {
// Specifies what was the reason why input was blocked.
enum BlockReason {
// Default value. This value is unused.
BLOCK_REASON_UNSPECIFIED = 0;
// Input was blocked due to safety reasons. You can inspect
// `safety_ratings` to understand which safety category blocked it.
SAFETY = 1;
// Input was blocked due to other reasons.
OTHER = 2;
}
// Optional. If set, the input was blocked and no candidates are returned.
// Rephrase your input.
optional BlockReason block_reason = 1
[(google.api.field_behavior) = OPTIONAL];
// Ratings for safety of the input.
// There is at most one rating per category.
repeated SafetyRating safety_ratings = 2;
}
// Candidate answer from the model.
//
// Note: The model *always* attempts to provide a grounded answer, even when
// the answer is unlikely to be answerable from the given passages.
// In that case, a low-quality or ungrounded answer may be provided, along
// with a low `answerable_probability`.
Candidate answer = 1;
// Output only. The model's estimate of the probability that its answer is
// correct and grounded in the input passages.
//
// A low answerable_probability indicates that the answer might not be
// grounded in the sources.
//
// When `answerable_probability` is low, some clients may wish to:
//
// * Display a message to the effect of "We couldn’t answer that question" to
// the user.
// * Fall back to a general-purpose LLM that answers the question from world
// knowledge. The threshold and nature of such fallbacks will depend on
// individual clients’ use cases. 0.5 is a good starting threshold.
optional float answerable_probability = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Feedback related to the input data used to answer the
// question, as opposed to model-generated response to the question.
//
// "Input data" can be one or more of the following:
//
// - Question specified by the last entry in `GenerateAnswerRequest.content`
// - Conversation history specified by the other entries in
// `GenerateAnswerRequest.content`
// - Grounding sources (`GenerateAnswerRequest.semantic_retriever` or
// `GenerateAnswerRequest.inline_passages`)
optional InputFeedback input_feedback = 3
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Request containing the `Content` for the model to embed.
message EmbedContentRequest {
// Required. The model's resource name. This serves as an ID for the Model to
// use.
//
// This name should match a model name returned by the `ListModels` method.
//
// Format: `models/{model}`
string model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "generativelanguage.googleapis.com/Model"
}
];
// Required. The content to embed. Only the `parts.text` fields will be
// counted.
Content content = 2 [(google.api.field_behavior) = REQUIRED];
// Optional. Optional task type for which the embeddings will be used. Can
// only be set for `models/embedding-001`.
optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL];
// Optional. An optional title for the text. Only applicable when TaskType is
// `RETRIEVAL_DOCUMENT`.
//
// Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality
// embeddings for retrieval.
optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
}
// A list of floats representing an embedding.
message ContentEmbedding {
// The embedding values.
repeated float values = 1;
}
// The response to an `EmbedContentRequest`.
message EmbedContentResponse {
// Output only. The embedding generated from the input content.
ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// Batch request to get embeddings from the model for a list of prompts.
message BatchEmbedContentsRequest {
// Required. The model's resource name. This serves as an ID for the Model to
// use.
//
// This name should match a model name returned by the `ListModels` method.
//
// Format: `models/{model}`
string model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "generativelanguage.googleapis.com/Model"
}
];
// Required. Embed requests for the batch. The model in each of these requests
// must match the model specified `BatchEmbedContentsRequest.model`.
repeated EmbedContentRequest requests = 2
[(google.api.field_behavior) = REQUIRED];
}
// The response to a `BatchEmbedContentsRequest`.
message BatchEmbedContentsResponse {
// Output only. The embeddings for each request, in the same order as provided
// in the batch request.
repeated ContentEmbedding embeddings = 1
[(google.api.field_behavior) = OUTPUT_ONLY];
}
// Counts the number of tokens in the `prompt` sent to a model.
//
// Models may tokenize text differently, so each model may return a different
// `token_count`.
message CountTokensRequest {
// Required. The model's resource name. This serves as an ID for the Model to
// use.
//
// This name should match a model name returned by the `ListModels` method.
//
// Format: `models/{model}`
string model = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "generativelanguage.googleapis.com/Model"
}
];
// Required. The input given to the model as a prompt.
repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
}
// A response from `CountTokens`.
//
// It returns the model's `token_count` for the `prompt`.
message CountTokensResponse {
// The number of tokens that the `model` tokenizes the `prompt` into.
//
// Always non-negative.
int32 total_tokens = 1;
}