Skip to content

Commit

Permalink
feat: add processing strategy to batch recognition requests
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 530882015
  • Loading branch information
Google APIs authored and Copybara-Service committed May 10, 2023
1 parent 620a023 commit 189bdfa
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 57 deletions.
1 change: 1 addition & 0 deletions google/cloud/speech/v2/BUILD.bazel
Expand Up @@ -236,6 +236,7 @@ php_gapic_library(
name = "speech_php_gapic",
srcs = [":speech_proto_with_info"],
grpc_service_config = "speech_grpc_service_config.json",
migration_mode = "PRE_MIGRATION_SURFACE_ONLY",
rest_numeric_enums = True,
service_yaml = "speech_v2.yaml",
transport = "grpc+rest",
Expand Down
81 changes: 29 additions & 52 deletions google/cloud/speech/v2/cloud_speech.proto
Expand Up @@ -444,8 +444,8 @@ message ListRecognizersRequest {
];

// The maximum number of Recognizers to return. The service may return fewer
// than this value. If unspecified, at most 20 Recognizers will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
// than this value. If unspecified, at most 5 Recognizers will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;

// A page token, received from a previous
Expand Down Expand Up @@ -588,56 +588,19 @@ message Recognizer {
// Required. Which model to use for recognition requests. Select the model
// best suited to your domain to get best results.
//
// Supported models:
//
// - `latest_long`
//
// Best for long form content like media or conversation.
//
// - `latest_short`
//
// Best for short form content like commands or single shot directed speech.
// When using this model, the service will stop transcribing audio after the
// first utterance is detected and completed.
//
// When using this model,
// [SEPARATE_RECOGNITION_PER_CHANNEL][google.cloud.speech.v2.RecognitionFeatures.MultiChannelMode.SEPARATE_RECOGNITION_PER_CHANNEL]
// is not supported; multi-channel audio is accepted, but only the first
// channel will be processed and transcribed.
//
// - `telephony`
//
// Best for audio that originated from a phone call (typically recorded at
// an 8khz sampling rate).
//
// - `medical_conversation`
//
// For conversations between a medical provider—for example, a doctor or
// nurse—and a patient. Use this model when both a provider and a patient
// are speaking. Words uttered by each speaker are automatically detected
// and labeled in the returned transcript.
//
// For supported features please see [medical models
// documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
//
// - `medical_dictation`
//
// For dictated notes spoken by a single medical provider—for example, a
// doctor dictating notes about a patient's blood test results.
//
// For supported features please see [medical models
// documentation](https://cloud.google.com/speech-to-text/docs/medical-models).
//
// - `usm`
//
// The next generation of Speech-to-Text models from Google.
// Guidance for choosing which model to use can be found in the [Transcription
// Models
// Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
// and the models supported in each region can be found in the [Table Of
// Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
string model = 4 [(google.api.field_behavior) = REQUIRED];

// Required. The language of the supplied audio as a
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
//
// Supported languages for each model are listed at:
// https://cloud.google.com/speech-to-text/docs/languages
// Supported languages for each model are listed in the [Table of Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
//
// If additional languages are provided, recognition result will contain
// recognition in the most likely language detected. The recognition result
Expand Down Expand Up @@ -1178,6 +1141,17 @@ message StreamingRecognizeRequest {
// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
// method.
message BatchRecognizeRequest {
// Possible processing strategies for batch requests.
enum ProcessingStrategy {
// Default value for the processing strategy. The request is processed as
// soon as its received.
PROCESSING_STRATEGY_UNSPECIFIED = 0;

// If selected, processes the request during lower utilization periods for a
// price discount. The request is fulfilled within 24 hours.
DYNAMIC_BATCHING = 1;
}

// Required. Resource name of the recognizer to be used for ASR.
string recognizer = 1 [
(google.api.field_behavior) = REQUIRED,
Expand Down Expand Up @@ -1215,6 +1189,9 @@ message BatchRecognizeRequest {

// Configuration options for where to output the transcripts of each file.
RecognitionOutputConfig recognition_output_config = 6;

// Processing strategy to use for this request.
ProcessingStrategy processing_strategy = 7;
}

// Output configurations for Cloud Storage.
Expand Down Expand Up @@ -1793,10 +1770,10 @@ message ListCustomClassesRequest {
}
];

// Number of results per requests. A valid page_size ranges from 0 to 20
// Number of results per requests. A valid page_size ranges from 0 to 100
// inclusive. If the page_size is zero or unspecified, a page size of 5 will
// be chosen. If the page size exceeds 20, it will be coerced down to 20. Note
// that a call might return fewer results than the requested page size.
// be chosen. If the page size exceeds 100, it will be coerced down to 100.
// Note that a call might return fewer results than the requested page size.
int32 page_size = 2;

// A page token, received from a previous
Expand Down Expand Up @@ -1948,8 +1925,8 @@ message ListPhraseSetsRequest {
];

// The maximum number of PhraseSets to return. The service may return fewer
// than this value. If unspecified, at most 20 PhraseSets will be returned.
// The maximum value is 20; values above 20 will be coerced to 20.
// than this value. If unspecified, at most 5 PhraseSets will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;

// A page token, received from a previous
Expand Down
5 changes: 0 additions & 5 deletions google/cloud/speech/v2/speech_v2.yaml
Expand Up @@ -32,11 +32,6 @@ documentation:
- selector: google.cloud.location.Locations.ListLocations
description: Lists information about the supported locations for this service.

- selector: google.longrunning.Operations.ListOperations
description: |-
Lists operations that match the specified filter in the request. If
the server doesn't support this method, it returns `UNIMPLEMENTED`.
backend:
rules:
- selector: google.cloud.location.Locations.GetLocation
Expand Down

0 comments on commit 189bdfa

Please sign in to comment.