diff --git a/docs/dyn/speech_v1.speech.html b/docs/dyn/speech_v1.speech.html index e383e104007..564bd0de49c 100644 --- a/docs/dyn/speech_v1.speech.html +++ b/docs/dyn/speech_v1.speech.html @@ -134,7 +134,7 @@

Method Details

"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription). "A String", ], - "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. + "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. "diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult. "enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo. "maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6. @@ -252,7 +252,7 @@

Method Details

"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription). "A String", ], - "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. + "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. "diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult. "enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo. "maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6. @@ -325,6 +325,10 @@

Method Details

"resultEndTime": "A String", # Time offset of the end of this result relative to the beginning of the audio. }, ], + "speechAdaptationInfo": { # Information on speech adaptation use in results # Provides information on adaptation behavior in response + "adaptationTimeout": True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript. + "timeoutMessage": "A String", # If set, returns a message specifying which part of the speech adaptation request timed out. + }, "totalBilledTime": "A String", # When available, billed audio seconds for the corresponding request. } diff --git a/docs/dyn/speech_v1p1beta1.speech.html b/docs/dyn/speech_v1p1beta1.speech.html index 6d2b781ca6b..bb74b8983a6 100644 --- a/docs/dyn/speech_v1p1beta1.speech.html +++ b/docs/dyn/speech_v1p1beta1.speech.html @@ -134,7 +134,7 @@

Method Details

"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription). "A String", ], - "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. + "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. "diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult. "enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo. "maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6. @@ -264,7 +264,7 @@

Method Details

"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription). "A String", ], - "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. + "audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'. "diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult. "enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo. "maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6. @@ -349,6 +349,10 @@

Method Details

"resultEndTime": "A String", # Time offset of the end of this result relative to the beginning of the audio. }, ], + "speechAdaptationInfo": { # Information on speech adaptation use in results # Provides information on adaptation behavior in response + "adaptationTimeout": True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript. + "timeoutMessage": "A String", # If set, returns a message specifying which part of the speech adaptation request timed out. + }, "totalBilledTime": "A String", # When available, billed audio seconds for the corresponding request. } diff --git a/googleapiclient/discovery_cache/documents/speech.v1.json b/googleapiclient/discovery_cache/documents/speech.v1.json index ed2fe585b12..91178679dbd 100644 --- a/googleapiclient/discovery_cache/documents/speech.v1.json +++ b/googleapiclient/discovery_cache/documents/speech.v1.json @@ -524,7 +524,7 @@ } } }, - "revision": "20221007", + "revision": "20221020", "rootUrl": "https://speech.googleapis.com/", "schemas": { "ClassItem": { @@ -720,6 +720,10 @@ }, "type": "array" }, + "speechAdaptationInfo": { + "$ref": "SpeechAdaptationInfo", + "description": "Provides information on speech adaptation behavior in response" + }, "totalBilledTime": { "description": "When available, billed audio seconds for the corresponding request.", "format": "google-duration", @@ -834,7 +838,7 @@ "type": "array" }, "audioChannelCount": { - "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.", + "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.", "format": "int32", "type": "integer" }, @@ -1065,6 +1069,10 @@ }, "type": "array" }, + "speechAdaptationInfo": { + "$ref": "SpeechAdaptationInfo", + "description": "Provides information on adaptation behavior in response" + }, "totalBilledTime": { "description": "When available, billed audio seconds for the corresponding request.", "format": "google-duration", @@ -1128,6 +1136,21 @@ }, "type": "object" }, + "SpeechAdaptationInfo": { + "description": "Information on speech adaptation use in results", + "id": "SpeechAdaptationInfo", + "properties": { + "adaptationTimeout": { + "description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.", + "type": "boolean" + }, + "timeoutMessage": { + "description": "If set, returns a message specifying which part of the speech adaptation request timed out.", + "type": "string" + } + }, + "type": "object" + }, "SpeechContext": { "description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.", "id": "SpeechContext", diff --git a/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json b/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json index 11f0c372a8f..76d5e574c6a 100644 --- a/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json +++ b/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json @@ -524,7 +524,7 @@ } } }, - "revision": "20221007", + "revision": "20221020", "rootUrl": "https://speech.googleapis.com/", "schemas": { "ClassItem": { @@ -744,6 +744,10 @@ }, "type": "array" }, + "speechAdaptationInfo": { + "$ref": "SpeechAdaptationInfo", + "description": "Provides information on speech adaptation behavior in response" + }, "totalBilledTime": { "description": "When available, billed audio seconds for the corresponding request.", "format": "google-duration", @@ -858,7 +862,7 @@ "type": "array" }, "audioChannelCount": { - "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.", + "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.", "format": "int32", "type": "integer" }, @@ -1109,6 +1113,10 @@ }, "type": "array" }, + "speechAdaptationInfo": { + "$ref": "SpeechAdaptationInfo", + "description": "Provides information on adaptation behavior in response" + }, "totalBilledTime": { "description": "When available, billed audio seconds for the corresponding request.", "format": "google-duration", @@ -1172,6 +1180,21 @@ }, "type": "object" }, + "SpeechAdaptationInfo": { + "description": "Information on speech adaptation use in results", + "id": "SpeechAdaptationInfo", + "properties": { + "adaptationTimeout": { + "description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.", + "type": "boolean" + }, + "timeoutMessage": { + "description": "If set, returns a message specifying which part of the speech adaptation request timed out.", + "type": "string" + } + }, + "type": "object" + }, "SpeechContext": { "description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.", "id": "SpeechContext",