Skip to content

Commit

Permalink
feat: add PredictionService.ServerStreamingPredict method
Browse files Browse the repository at this point in the history
feat: add `StreamingPredictRequest` type
feat: add `StreamingPredictResponse` type
feat: add `Tensor` type

PiperOrigin-RevId: 551672526
  • Loading branch information
Google APIs authored and Copybara-Service committed Jul 27, 2023
1 parent 149aec4 commit 1b650d6
Show file tree
Hide file tree
Showing 4 changed files with 283 additions and 0 deletions.
54 changes: 54 additions & 0 deletions google/cloud/aiplatform/v1/prediction_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
import "google/api/httpbody.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/explanation.proto";
import "google/cloud/aiplatform/v1/types.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
Expand Down Expand Up @@ -74,6 +75,20 @@ service PredictionService {
option (google.api.method_signature) = "endpoint,http_body";
}

// Perform a server-side streaming online prediction request for Vertex
// LLM streaming.
rpc ServerStreamingPredict(StreamingPredictRequest)
returns (stream StreamingPredictResponse) {
option (google.api.http) = {
post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
body: "*"
additional_bindings {
post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
body: "*"
}
};
}

// Perform an online explanation.
//
// If
Expand Down Expand Up @@ -158,6 +173,11 @@ message PredictResponse {
// name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is
// deployed as the DeployedModel that this prediction hits.
string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Request-level metadata returned by the model. The metadata
// type will be dependent upon the model implementation.
google.protobuf.Value metadata = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request message for
Expand Down Expand Up @@ -191,6 +211,40 @@ message RawPredictRequest {
google.api.HttpBody http_body = 2;
}

// Request message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field
// and optionally [input][]. The subsequent messages must contain [input][].
message StreamingPredictRequest {
// Required. The name of the Endpoint requested to serve the prediction.
// Format:
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
string endpoint = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "aiplatform.googleapis.com/Endpoint"
}
];

// The prediction input.
repeated Tensor inputs = 2;

// The parameters that govern the prediction.
Tensor parameters = 3;
}

// Response message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
message StreamingPredictResponse {
// The prediction output.
repeated Tensor outputs = 1;

// The parameters that govern the prediction.
Tensor parameters = 2;
}

// Request message for
// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
message ExplainRequest {
Expand Down
87 changes: 87 additions & 0 deletions google/cloud/aiplatform/v1/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,90 @@ message StringArray {
// A list of string values.
repeated string values = 1;
}

// A tensor value type.
message Tensor {
// Data type of the tensor.
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field has not
// been set.
DATA_TYPE_UNSPECIFIED = 0;

// Data types that all computation devices are expected to be
// capable to support.
BOOL = 1;

STRING = 2;

FLOAT = 3;

DOUBLE = 4;

INT8 = 5;

INT16 = 6;

INT32 = 7;

INT64 = 8;

UINT8 = 9;

UINT16 = 10;

UINT32 = 11;

UINT64 = 12;
}

// The data type of tensor.
DataType dtype = 1;

// Shape of the tensor.
repeated int64 shape = 2;

// Type specific representations that make it easy to create tensor protos in
// all languages. Only the representation corresponding to "dtype" can
// be set. The values hold the flattened representation of the tensor in
// row major order.
//
// [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
repeated bool bool_val = 3;

// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
repeated string string_val = 14;

// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
repeated bytes bytes_val = 15;

// [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
repeated float float_val = 5;

// [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
repeated double double_val = 6;

// [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
// [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
// [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
repeated int32 int_val = 7;

// [INT64][google.aiplatform.master.Tensor.DataType.INT64]
repeated int64 int64_val = 8;

// [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
// [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
// [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
repeated uint32 uint_val = 9;

// [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
repeated uint64 uint64_val = 10;

// A list of tensor values.
repeated Tensor list_val = 11;

// A map of string to tensor.
map<string, Tensor> struct_val = 12;

// Serialized raw tensor content.
bytes tensor_val = 13;
}
55 changes: 55 additions & 0 deletions google/cloud/aiplatform/v1beta1/prediction_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
import "google/api/httpbody.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1beta1/explanation.proto";
import "google/cloud/aiplatform/v1beta1/types.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
Expand Down Expand Up @@ -74,6 +75,20 @@ service PredictionService {
option (google.api.method_signature) = "endpoint,http_body";
}

// Perform a server-side streaming online prediction request for Vertex
// LLM streaming.
rpc ServerStreamingPredict(StreamingPredictRequest)
returns (stream StreamingPredictResponse) {
option (google.api.http) = {
post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
body: "*"
additional_bindings {
post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
body: "*"
}
};
}

// Perform an online explanation.
//
// If
Expand Down Expand Up @@ -160,6 +175,11 @@ message PredictResponse {
// name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model
// which is deployed as the DeployedModel that this prediction hits.
string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Request-level metadata returned by the model. The metadata
// type will be dependent upon the model implementation.
google.protobuf.Value metadata = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request message for
Expand Down Expand Up @@ -193,6 +213,41 @@ message RawPredictRequest {
google.api.HttpBody http_body = 2;
}

// Request message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint]
// field and optionally [input][]. The subsequent messages must contain
// [input][].
message StreamingPredictRequest {
// Required. The name of the Endpoint requested to serve the prediction.
// Format:
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
string endpoint = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "aiplatform.googleapis.com/Endpoint"
}
];

// The prediction input.
repeated Tensor inputs = 2;

// The parameters that govern the prediction.
Tensor parameters = 3;
}

// Response message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
message StreamingPredictResponse {
// The prediction output.
repeated Tensor outputs = 1;

// The parameters that govern the prediction.
Tensor parameters = 2;
}

// Request message for
// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
message ExplainRequest {
Expand Down
87 changes: 87 additions & 0 deletions google/cloud/aiplatform/v1beta1/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,90 @@ message StringArray {
// A list of string values.
repeated string values = 1;
}

// A tensor value type.
message Tensor {
// Data type of the tensor.
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field has not
// been set.
DATA_TYPE_UNSPECIFIED = 0;

// Data types that all computation devices are expected to be
// capable to support.
BOOL = 1;

STRING = 2;

FLOAT = 3;

DOUBLE = 4;

INT8 = 5;

INT16 = 6;

INT32 = 7;

INT64 = 8;

UINT8 = 9;

UINT16 = 10;

UINT32 = 11;

UINT64 = 12;
}

// The data type of tensor.
DataType dtype = 1;

// Shape of the tensor.
repeated int64 shape = 2;

// Type specific representations that make it easy to create tensor protos in
// all languages. Only the representation corresponding to "dtype" can
// be set. The values hold the flattened representation of the tensor in
// row major order.
//
// [BOOL][google.aiplatform.master.Tensor.DataType.BOOL]
repeated bool bool_val = 3;

// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
repeated string string_val = 14;

// [STRING][google.aiplatform.master.Tensor.DataType.STRING]
repeated bytes bytes_val = 15;

// [FLOAT][google.aiplatform.master.Tensor.DataType.FLOAT]
repeated float float_val = 5;

// [DOUBLE][google.aiplatform.master.Tensor.DataType.DOUBLE]
repeated double double_val = 6;

// [INT_8][google.aiplatform.master.Tensor.DataType.INT8]
// [INT_16][google.aiplatform.master.Tensor.DataType.INT16]
// [INT_32][google.aiplatform.master.Tensor.DataType.INT32]
repeated int32 int_val = 7;

// [INT64][google.aiplatform.master.Tensor.DataType.INT64]
repeated int64 int64_val = 8;

// [UINT8][google.aiplatform.master.Tensor.DataType.UINT8]
// [UINT16][google.aiplatform.master.Tensor.DataType.UINT16]
// [UINT32][google.aiplatform.master.Tensor.DataType.UINT32]
repeated uint32 uint_val = 9;

// [UINT64][google.aiplatform.master.Tensor.DataType.UINT64]
repeated uint64 uint64_val = 10;

// A list of tensor values.
repeated Tensor list_val = 11;

// A map of string to tensor.
map<string, Tensor> struct_val = 12;

// Serialized raw tensor content.
bytes tensor_val = 13;
}

0 comments on commit 1b650d6

Please sign in to comment.