feat: add processing strategy to batch recognition requests

PiperOrigin-RevId: 530882015
googleapis · May 10, 2023 · 189bdfa · 189bdfa
1 parent 620a023
commit 189bdfa
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 57 deletions.
diff --git a/google/cloud/speech/v2/BUILD.bazel b/google/cloud/speech/v2/BUILD.bazel
@@ -236,6 +236,7 @@ php_gapic_library(
     name = "speech_php_gapic",
     srcs = [":speech_proto_with_info"],
     grpc_service_config = "speech_grpc_service_config.json",
+    migration_mode = "PRE_MIGRATION_SURFACE_ONLY",
     rest_numeric_enums = True,
     service_yaml = "speech_v2.yaml",
     transport = "grpc+rest",

diff --git a/google/cloud/speech/v2/cloud_speech.proto b/google/cloud/speech/v2/cloud_speech.proto
@@ -444,8 +444,8 @@ message ListRecognizersRequest {
   ];
 
   // The maximum number of Recognizers to return. The service may return fewer
-  // than this value. If unspecified, at most 20 Recognizers will be returned.
-  // The maximum value is 20; values above 20 will be coerced to 20.
+  // than this value. If unspecified, at most 5 Recognizers will be returned.
+  // The maximum value is 100; values above 100 will be coerced to 100.
   int32 page_size = 2;
 
   // A page token, received from a previous
@@ -588,56 +588,19 @@ message Recognizer {
   // Required. Which model to use for recognition requests. Select the model
   // best suited to your domain to get best results.
   //
-  // Supported models:
-  //
-  // - `latest_long`
-  //
-  //   Best for long form content like media or conversation.
-  //
-  // - `latest_short`
-  //
-  //   Best for short form content like commands or single shot directed speech.
-  //   When using this model, the service will stop transcribing audio after the
-  //   first utterance is detected and completed.
-  //
-  //   When using this model,
-  //   [SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
-  //   is not supported; multi-channel audio is accepted, but only the first
-  //   channel will be processed and transcribed.
-  //
-  // - `telephony`
-  //
-  //   Best for audio that originated from a phone call (typically recorded at
-  //   an 8khz sampling rate).
-  //
-  // - `medical_conversation`
-  //
-  //   For conversations between a medical provider—for example, a doctor or
-  //   nurse—and a patient. Use this model when both a provider and a patient
-  //   are speaking. Words uttered by each speaker are automatically detected
-  //   and labeled in the returned transcript.
-  //
-  //   For supported features please see [medical models
-  //   documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
-  //
-  // - `medical_dictation`
-  //
-  //   For dictated notes spoken by a single medical provider—for example, a
-  //   doctor dictating notes about a patient's blood test results.
-  //
-  //   For supported features please see [medical models
-  //   documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
-  //
-  // - `usm`
-  //
-  //   The next generation of Speech-to-Text models from Google.
+  // Guidance for choosing which model to use can be found in the [Transcription
+  // Models
+  // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
+  // and the models supported in each region can be found in the [Table Of
+  // Supported
+  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
   string model = 4 [(google.api.field_behavior) = REQUIRED];
 
   // Required. The language of the supplied audio as a
   // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
   //
-  // Supported languages for each model are listed at:
-  // https://cloud.google.com/speech-to-text/docs/languages
+  // Supported languages for each model are listed in the [Table of Supported
+  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
   //
   // If additional languages are provided, recognition result will contain
   // recognition in the most likely language detected. The recognition result
@@ -1178,6 +1141,17 @@ message StreamingRecognizeRequest {
 // [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
 // method.
 message BatchRecognizeRequest {
+  // Possible processing strategies for batch requests.
+  enum ProcessingStrategy {
+    // Default value for the processing strategy. The request is processed as
+    // soon as its received.
+    PROCESSING_STRATEGY_UNSPECIFIED = 0;
+
+    // If selected, processes the request during lower utilization periods for a
+    // price discount. The request is fulfilled within 24 hours.
+    DYNAMIC_BATCHING = 1;
+  }
+
   // Required. Resource name of the recognizer to be used for ASR.
   string recognizer = 1 [
     (google.api.field_behavior) = REQUIRED,
@@ -1215,6 +1189,9 @@ message BatchRecognizeRequest {
 
   // Configuration options for where to output the transcripts of each file.
   RecognitionOutputConfig recognition_output_config = 6;
+
+  // Processing strategy to use for this request.
+  ProcessingStrategy processing_strategy = 7;
 }
 
 // Output configurations for Cloud Storage.
@@ -1793,10 +1770,10 @@ message ListCustomClassesRequest {
     }
   ];
 
-  // Number of results per requests. A valid page_size ranges from 0 to 20
+  // Number of results per requests. A valid page_size ranges from 0 to 100
   // inclusive. If the page_size is zero or unspecified, a page size of 5 will
-  // be chosen. If the page size exceeds 20, it will be coerced down to 20. Note
-  // that a call might return fewer results than the requested page size.
+  // be chosen. If the page size exceeds 100, it will be coerced down to 100.
+  // Note that a call might return fewer results than the requested page size.
   int32 page_size = 2;
 
   // A page token, received from a previous
@@ -1948,8 +1925,8 @@ message ListPhraseSetsRequest {
   ];
 
   // The maximum number of PhraseSets to return. The service may return fewer
-  // than this value. If unspecified, at most 20 PhraseSets will be returned.
-  // The maximum value is 20; values above 20 will be coerced to 20.
+  // than this value. If unspecified, at most 5 PhraseSets will be returned.
+  // The maximum value is 100; values above 100 will be coerced to 100.
   int32 page_size = 2;
 
   // A page token, received from a previous

diff --git a/google/cloud/speech/v2/speech_v2.yaml b/google/cloud/speech/v2/speech_v2.yaml
@@ -32,11 +32,6 @@ documentation:
   - selector: google.cloud.location.Locations.ListLocations
     description: Lists information about the supported locations for this service.
 
-  - selector: google.longrunning.Operations.ListOperations
-    description: |-
-      Lists operations that match the specified filter in the request. If
-      the server doesn't support this method, it returns `UNIMPLEMENTED`.
-
 backend:
   rules:
   - selector: google.cloud.location.Locations.GetLocation