feat: add ComputeTokens and CountTokens API

PiperOrigin-RevId: 582364082
googleapis · Nov 14, 2023 · fdea53f · fdea53f
1 parent edbf77c
commit fdea53f
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 0 deletions.
diff --git a/google/cloud/aiplatform/v1/BUILD.bazel b/google/cloud/aiplatform/v1/BUILD.bazel
@@ -76,6 +76,7 @@ proto_library(
         "job_service.proto",
         "job_state.proto",
         "lineage_subgraph.proto",
+        "llm_utility_service.proto",
         "machine_resources.proto",
         "manual_batch_tuning_parameters.proto",
         "match_service.proto",
@@ -216,6 +217,7 @@ java_gapic_test(
         "com.google.cloud.aiplatform.v1.IndexEndpointServiceClientTest",
         "com.google.cloud.aiplatform.v1.IndexServiceClientTest",
         "com.google.cloud.aiplatform.v1.JobServiceClientTest",
+        "com.google.cloud.aiplatform.v1.LlmUtilityServiceClientTest",
         "com.google.cloud.aiplatform.v1.MetadataServiceClientTest",
         "com.google.cloud.aiplatform.v1.MigrationServiceClientTest",
         "com.google.cloud.aiplatform.v1.ModelServiceClientTest",

diff --git a/google/cloud/aiplatform/v1/aiplatform_v1.yaml b/google/cloud/aiplatform/v1/aiplatform_v1.yaml
@@ -14,6 +14,7 @@ apis:
 - name: google.cloud.aiplatform.v1.IndexEndpointService
 - name: google.cloud.aiplatform.v1.IndexService
 - name: google.cloud.aiplatform.v1.JobService
+- name: google.cloud.aiplatform.v1.LlmUtilityService
 - name: google.cloud.aiplatform.v1.MatchService
 - name: google.cloud.aiplatform.v1.MetadataService
 - name: google.cloud.aiplatform.v1.MigrationService
@@ -610,6 +611,14 @@ authentication:
       canonical_scopes: |-
         https://www.googleapis.com/auth/cloud-platform,
         https://www.googleapis.com/auth/cloud-platform.read-only
+  - selector: google.cloud.aiplatform.v1.LlmUtilityService.ComputeTokens
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
+  - selector: google.cloud.aiplatform.v1.LlmUtilityService.CountTokens
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
   - selector: google.cloud.aiplatform.v1.MatchService.FindNeighbors
     oauth:
       canonical_scopes: |-

diff --git a/google/cloud/aiplatform/v1/llm_utility_service.proto b/google/cloud/aiplatform/v1/llm_utility_service.proto
@@ -0,0 +1,100 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/aiplatform/v1/prediction_service.proto";
+import "google/protobuf/struct.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "LlmUtilityServiceProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+
+// Service for LLM related utility functions.
+service LlmUtilityService {
+  option (google.api.default_host) = "aiplatform.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
+  // Perform a token counting.
+  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
+    option (google.api.http) = {
+      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens"
+      body: "*"
+      additional_bindings {
+        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens"
+        body: "*"
+      }
+    };
+    option (google.api.method_signature) = "endpoint,instances";
+  }
+
+  // Return a list of tokens based on the input text.
+  rpc ComputeTokens(ComputeTokensRequest) returns (ComputeTokensResponse) {
+    option (google.api.http) = {
+      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:computeTokens"
+      body: "*"
+      additional_bindings {
+        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:computeTokens"
+        body: "*"
+      }
+    };
+    option (google.api.method_signature) = "endpoint,instances";
+  }
+}
+
+// Request message for ComputeTokens RPC call.
+message ComputeTokensRequest {
+  // Required. The name of the Endpoint requested to get lists of tokens and
+  // token ids.
+  string endpoint = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/Endpoint"
+    }
+  ];
+
+  // Required. The instances that are the input to token computing API call.
+  // Schema is identical to the prediction schema of the text model, even for
+  // the non-text models, like chat models, or Codey models.
+  repeated google.protobuf.Value instances = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Tokens info with a list of tokens and the corresponding list of token ids.
+message TokensInfo {
+  // A list of tokens from the input.
+  repeated bytes tokens = 1;
+
+  // A list of token ids from the input.
+  repeated int64 token_ids = 2;
+}
+
+// Response message for ComputeTokens RPC call.
+message ComputeTokensResponse {
+  // Lists of tokens info from the input. A ComputeTokensRequest could have
+  // multiple instances with a prompt in each instance. We also need to return
+  // lists of tokens info for the request with multiple instances.
+  repeated TokensInfo tokens_info = 1;
+}