You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardexpand all lines: docs/dyn/speech_v1.speech.html
+6-2
Original file line number
Diff line number
Diff line change
@@ -134,7 +134,7 @@ <h3>Method Details</h3>
134
134
"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
135
135
"A String",
136
136
],
137
-
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
137
+
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
138
138
"diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
139
139
"enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
140
140
"maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -252,7 +252,7 @@ <h3>Method Details</h3>
252
252
"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
253
253
"A String",
254
254
],
255
-
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
255
+
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
256
256
"diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
257
257
"enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
258
258
"maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -325,6 +325,10 @@ <h3>Method Details</h3>
325
325
"resultEndTime": "A String", # Time offset of the end of this result relative to the beginning of the audio.
326
326
},
327
327
],
328
+
"speechAdaptationInfo": { # Information on speech adaptation use in results # Provides information on adaptation behavior in response
329
+
"adaptationTimeout": True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.
330
+
"timeoutMessage": "A String", # If set, returns a message specifying which part of the speech adaptation request timed out.
331
+
},
328
332
"totalBilledTime": "A String", # When available, billed audio seconds for the corresponding request.
Copy file name to clipboardexpand all lines: docs/dyn/speech_v1p1beta1.speech.html
+6-2
Original file line number
Diff line number
Diff line change
@@ -134,7 +134,7 @@ <h3>Method Details</h3>
134
134
"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
135
135
"A String",
136
136
],
137
-
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
137
+
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
138
138
"diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
139
139
"enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
140
140
"maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -264,7 +264,7 @@ <h3>Method Details</h3>
264
264
"alternativeLanguageCodes": [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
265
265
"A String",
266
266
],
267
-
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
267
+
"audioChannelCount": 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.
268
268
"diarizationConfig": { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
269
269
"enableSpeakerDiarization": True or False, # If 'true', enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
270
270
"maxSpeakerCount": 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -349,6 +349,10 @@ <h3>Method Details</h3>
349
349
"resultEndTime": "A String", # Time offset of the end of this result relative to the beginning of the audio.
350
350
},
351
351
],
352
+
"speechAdaptationInfo": { # Information on speech adaptation use in results # Provides information on adaptation behavior in response
353
+
"adaptationTimeout": True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.
354
+
"timeoutMessage": "A String", # If set, returns a message specifying which part of the speech adaptation request timed out.
355
+
},
352
356
"totalBilledTime": "A String", # When available, billed audio seconds for the corresponding request.
Copy file name to clipboardexpand all lines: googleapiclient/discovery_cache/documents/speech.v1.json
+25-2
Original file line number
Diff line number
Diff line change
@@ -524,7 +524,7 @@
524
524
}
525
525
}
526
526
},
527
-
"revision": "20221007",
527
+
"revision": "20221020",
528
528
"rootUrl": "https://speech.googleapis.com/",
529
529
"schemas": {
530
530
"ClassItem": {
@@ -720,6 +720,10 @@
720
720
},
721
721
"type": "array"
722
722
},
723
+
"speechAdaptationInfo": {
724
+
"$ref": "SpeechAdaptationInfo",
725
+
"description": "Provides information on speech adaptation behavior in response"
726
+
},
723
727
"totalBilledTime": {
724
728
"description": "When available, billed audio seconds for the corresponding request.",
725
729
"format": "google-duration",
@@ -834,7 +838,7 @@
834
838
"type": "array"
835
839
},
836
840
"audioChannelCount": {
837
-
"description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
841
+
"description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
838
842
"format": "int32",
839
843
"type": "integer"
840
844
},
@@ -1065,6 +1069,10 @@
1065
1069
},
1066
1070
"type": "array"
1067
1071
},
1072
+
"speechAdaptationInfo": {
1073
+
"$ref": "SpeechAdaptationInfo",
1074
+
"description": "Provides information on adaptation behavior in response"
1075
+
},
1068
1076
"totalBilledTime": {
1069
1077
"description": "When available, billed audio seconds for the corresponding request.",
1070
1078
"format": "google-duration",
@@ -1128,6 +1136,21 @@
1128
1136
},
1129
1137
"type": "object"
1130
1138
},
1139
+
"SpeechAdaptationInfo": {
1140
+
"description": "Information on speech adaptation use in results",
1141
+
"id": "SpeechAdaptationInfo",
1142
+
"properties": {
1143
+
"adaptationTimeout": {
1144
+
"description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.",
1145
+
"type": "boolean"
1146
+
},
1147
+
"timeoutMessage": {
1148
+
"description": "If set, returns a message specifying which part of the speech adaptation request timed out.",
1149
+
"type": "string"
1150
+
}
1151
+
},
1152
+
"type": "object"
1153
+
},
1131
1154
"SpeechContext": {
1132
1155
"description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.",
Copy file name to clipboardexpand all lines: googleapiclient/discovery_cache/documents/speech.v1p1beta1.json
+25-2
Original file line number
Diff line number
Diff line change
@@ -524,7 +524,7 @@
524
524
}
525
525
}
526
526
},
527
-
"revision": "20221007",
527
+
"revision": "20221020",
528
528
"rootUrl": "https://speech.googleapis.com/",
529
529
"schemas": {
530
530
"ClassItem": {
@@ -744,6 +744,10 @@
744
744
},
745
745
"type": "array"
746
746
},
747
+
"speechAdaptationInfo": {
748
+
"$ref": "SpeechAdaptationInfo",
749
+
"description": "Provides information on speech adaptation behavior in response"
750
+
},
747
751
"totalBilledTime": {
748
752
"description": "When available, billed audio seconds for the corresponding request.",
749
753
"format": "google-duration",
@@ -858,7 +862,7 @@
858
862
"type": "array"
859
863
},
860
864
"audioChannelCount": {
861
-
"description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
865
+
"description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
862
866
"format": "int32",
863
867
"type": "integer"
864
868
},
@@ -1109,6 +1113,10 @@
1109
1113
},
1110
1114
"type": "array"
1111
1115
},
1116
+
"speechAdaptationInfo": {
1117
+
"$ref": "SpeechAdaptationInfo",
1118
+
"description": "Provides information on adaptation behavior in response"
1119
+
},
1112
1120
"totalBilledTime": {
1113
1121
"description": "When available, billed audio seconds for the corresponding request.",
1114
1122
"format": "google-duration",
@@ -1172,6 +1180,21 @@
1172
1180
},
1173
1181
"type": "object"
1174
1182
},
1183
+
"SpeechAdaptationInfo": {
1184
+
"description": "Information on speech adaptation use in results",
1185
+
"id": "SpeechAdaptationInfo",
1186
+
"properties": {
1187
+
"adaptationTimeout": {
1188
+
"description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.",
1189
+
"type": "boolean"
1190
+
},
1191
+
"timeoutMessage": {
1192
+
"description": "If set, returns a message specifying which part of the speech adaptation request timed out.",
1193
+
"type": "string"
1194
+
}
1195
+
},
1196
+
"type": "object"
1197
+
},
1175
1198
"SpeechContext": {
1176
1199
"description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.",
0 commit comments