Skip to content

Commit

Permalink
feat: add model and language_codes fields in RecognitionConfig
Browse files Browse the repository at this point in the history
…message + enable default `_` recognizer

Enables specifying `model` and `language_codes` in requests without having to specify them in the Recognizer (they can still be specified in the Recognizer in the `default_recognition_config` field).

Also enables using the recognizer ID `_` to perform recognition without explicitly creating a Recognizer resource.

The top-level `model` and `language_codes` fields are deprecated in favor of the new fields added in the `RecognitionConfig` message. The old fields continue to work.

PiperOrigin-RevId: 545698919
  • Loading branch information
Google APIs authored and Copybara-Service committed Jul 5, 2023
1 parent 52df7f9 commit e73fc8f
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 22 deletions.
5 changes: 4 additions & 1 deletion google/cloud/speech/v2/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ php_gapic_library(
rest_numeric_enums = True,
service_yaml = "speech_v2.yaml",
transport = "grpc+rest",
deps = [":speech_php_proto"],
deps = [
":speech_php_proto",
],
)

# Open Source Packages
Expand Down Expand Up @@ -334,6 +336,7 @@ load(

csharp_proto_library(
name = "speech_csharp_proto",
extra_opts = [],
deps = [":speech_proto"],
)

Expand Down
74 changes: 53 additions & 21 deletions google/cloud/speech/v2/cloud_speech.proto
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ message Recognizer {
// characters or less.
string display_name = 3;

// Required. Which model to use for recognition requests. Select the model
// Optional. Which model to use for recognition requests. Select the model
// best suited to your domain to get best results.
//
// Guidance for choosing which model to use can be found in the [Transcription
Expand All @@ -594,9 +594,9 @@ message Recognizer {
// and the models supported in each region can be found in the [Table Of
// Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
string model = 4 [(google.api.field_behavior) = REQUIRED];
string model = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL];

// Required. The language of the supplied audio as a
// Optional. The language of the supplied audio as a
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
//
// Supported languages for each model are listed in the [Table of Supported
Expand All @@ -608,7 +608,8 @@ message Recognizer {
// When you create or update a Recognizer, these values are
// stored in normalized BCP-47 form. For example, "en-us" is stored as
// "en-US".
repeated string language_codes = 17 [(google.api.field_behavior) = REQUIRED];
repeated string language_codes = 17
[deprecated = true, (google.api.field_behavior) = OPTIONAL];

// Default configuration to use for requests with this Recognizer.
// This can be overwritten by inline configuration in the
Expand Down Expand Up @@ -867,6 +868,30 @@ message RecognitionConfig {
ExplicitDecodingConfig explicit_decoding_config = 8;
}

// Optional. Which model to use for recognition requests. Select the model
// best suited to your domain to get best results.
//
// Guidance for choosing which model to use can be found in the [Transcription
// Models
// Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
// and the models supported in each region can be found in the [Table Of
// Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
string model = 9 [(google.api.field_behavior) = OPTIONAL];

// Optional. The language of the supplied audio as a
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
// Language tags are normalized to BCP-47 before they are used eg "en-us"
// becomes "en-US".
//
// Supported languages for each model are listed in the [Table of Supported
// Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
//
// If additional languages are provided, recognition result will contain
// recognition in the most likely language detected. The recognition result
// will include the language tag of the language detected in the audio.
repeated string language_codes = 10 [(google.api.field_behavior) = OPTIONAL];

// Speech recognition features to enable.
RecognitionFeatures features = 2;

Expand All @@ -883,7 +908,8 @@ message RecognitionConfig {
message RecognizeRequest {
// Required. The name of the Recognizer to use during recognition. The
// expected format is
// `projects/{project}/locations/{location}/recognizers/{recognizer}`.
// `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
// {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
string recognizer = 3 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
Expand Down Expand Up @@ -1100,24 +1126,27 @@ message StreamingRecognitionConfig {
// [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize]
// method. Multiple
// [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest]
// messages are sent. The first message must contain a
// messages are sent in one call.
//
// If the [Recognizer][google.cloud.speech.v2.Recognizer] referenced by
// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer]
// contains a fully specified request configuration then the stream may only
// contain messages with only
// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set.
//
// Otherwise the first message must contain a
// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and
// optionally a
// a
// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
// message and must not contain
// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All
// subsequent messages must contain
// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] and must not
// contain a
// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
// message.
// message that together fully specify the request configuration and must not
// contain [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All
// subsequent messages must only have
// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set.
message StreamingRecognizeRequest {
// Required. Streaming recognition should start with an initial request having
// a `recognizer`. Subsequent requests carry the audio data to be recognized.
//
// The initial request with configuration can be omitted if the Recognizer
// being used has a
// [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config].
// Required. The name of the Recognizer to use during recognition. The
// expected format is
// `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
// {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
string recognizer = 3 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
Expand Down Expand Up @@ -1152,7 +1181,10 @@ message BatchRecognizeRequest {
DYNAMIC_BATCHING = 1;
}

// Required. Resource name of the recognizer to be used for ASR.
// Required. The name of the Recognizer to use during recognition. The
// expected format is
// `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
// {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
string recognizer = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
Expand Down
4 changes: 4 additions & 0 deletions google/cloud/speech/v2/speech_v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ backend:
- selector: google.cloud.location.Locations.ListLocations
deadline: 355.0
- selector: 'google.cloud.speech.v2.Speech.*'
deadline: 10.0
- selector: google.cloud.speech.v2.Speech.Recognize
deadline: 185.0
- selector: google.cloud.speech.v2.Speech.StreamingRecognize
deadline: 355.0
- selector: 'google.longrunning.Operations.*'
deadline: 355.0
Expand Down

0 comments on commit e73fc8f

Please sign in to comment.