feat(speech): update the api

yoshi-automation · yoshi-automation · commit 03f32f826408 · 2022-11-01T07:09:42.000Z
#### speech:v1

The following keys were added:
- schemas.LongRunningRecognizeResponse.properties.speechAdaptationInfo.$ref (Total Keys: 1)
- schemas.RecognizeResponse.properties.speechAdaptationInfo.$ref (Total Keys: 1)
- schemas.SpeechAdaptationInfo (Total Keys: 4)

#### speech:v1p1beta1

The following keys were added:
- schemas.LongRunningRecognizeResponse.properties.speechAdaptationInfo.$ref (Total Keys: 1)
- schemas.RecognizeResponse.properties.speechAdaptationInfo.$ref (Total Keys: 1)
- schemas.SpeechAdaptationInfo (Total Keys: 4)
diff --git a/docs/dyn/speech_v1.speech.html b/docs/dyn/speech_v1.speech.html
@@ -134,7 +134,7 @@ <h3>Method Details</h3>
     &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
       &quot;A String&quot;,
     ],
-    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
+    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
     &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
       &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
       &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -252,7 +252,7 @@ <h3>Method Details</h3>
     &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
       &quot;A String&quot;,
     ],
-    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
+    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
     &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
       &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
       &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -325,6 +325,10 @@ <h3>Method Details</h3>
       &quot;resultEndTime&quot;: &quot;A String&quot;, # Time offset of the end of this result relative to the beginning of the audio.
     },
   ],
+  &quot;speechAdaptationInfo&quot;: { # Information on speech adaptation use in results # Provides information on adaptation behavior in response
+    &quot;adaptationTimeout&quot;: True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.
+    &quot;timeoutMessage&quot;: &quot;A String&quot;, # If set, returns a message specifying which part of the speech adaptation request timed out.
+  },
   &quot;totalBilledTime&quot;: &quot;A String&quot;, # When available, billed audio seconds for the corresponding request.
 }</pre>
 </div>
diff --git a/docs/dyn/speech_v1p1beta1.speech.html b/docs/dyn/speech_v1p1beta1.speech.html
@@ -134,7 +134,7 @@ <h3>Method Details</h3>
     &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
       &quot;A String&quot;,
     ],
-    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
+    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
     &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
       &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
       &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -264,7 +264,7 @@ <h3>Method Details</h3>
     &quot;alternativeLanguageCodes&quot;: [ # A list of up to 3 additional [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags, listing possible alternative languages of the supplied audio. See [Language Support](https://cloud.google.com/speech-to-text/docs/languages) for a list of the currently supported language codes. If alternative languages are listed, recognition result will contain recognition in the most likely language detected including the main language_code. The recognition result will include the language tag of the language detected in the audio. Note: This feature is only supported for Voice Command and Voice Search use cases and performance may vary for other use cases (e.g., phone call transcription).
       &quot;A String&quot;,
     ],
-    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are &#x27;1&#x27;-&#x27;254&#x27;. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
+    &quot;audioChannelCount&quot;: 42, # The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to &#x27;true&#x27;.
     &quot;diarizationConfig&quot;: { # Config to enable speaker diarization. # Config to enable speaker diarization and set additional parameters to make diarization better suited for your application. Note: When this is enabled, we send all the words from the beginning of the audio for the top alternative in every consecutive STREAMING responses. This is done in order to improve our speaker tags as our models learn to identify the speakers in the conversation over time. For non-streaming requests, the diarization results will be provided only in the top alternative of the FINAL SpeechRecognitionResult.
       &quot;enableSpeakerDiarization&quot;: True or False, # If &#x27;true&#x27;, enables speaker detection for each recognized word in the top alternative of the recognition result using a speaker_tag provided in the WordInfo.
       &quot;maxSpeakerCount&quot;: 42, # Maximum number of speakers in the conversation. This range gives you more flexibility by allowing the system to automatically determine the correct number of speakers. If not set, the default value is 6.
@@ -349,6 +349,10 @@ <h3>Method Details</h3>
       &quot;resultEndTime&quot;: &quot;A String&quot;, # Time offset of the end of this result relative to the beginning of the audio.
     },
   ],
+  &quot;speechAdaptationInfo&quot;: { # Information on speech adaptation use in results # Provides information on adaptation behavior in response
+    &quot;adaptationTimeout&quot;: True or False, # Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.
+    &quot;timeoutMessage&quot;: &quot;A String&quot;, # If set, returns a message specifying which part of the speech adaptation request timed out.
+  },
   &quot;totalBilledTime&quot;: &quot;A String&quot;, # When available, billed audio seconds for the corresponding request.
 }</pre>
 </div>
diff --git a/googleapiclient/discovery_cache/documents/speech.v1.json b/googleapiclient/discovery_cache/documents/speech.v1.json
@@ -524,7 +524,7 @@
       }
     }
   },
-  "revision": "20221007",
+  "revision": "20221020",
   "rootUrl": "https://speech.googleapis.com/",
   "schemas": {
     "ClassItem": {
@@ -720,6 +720,10 @@
           },
           "type": "array"
         },
+        "speechAdaptationInfo": {
+          "$ref": "SpeechAdaptationInfo",
+          "description": "Provides information on speech adaptation behavior in response"
+        },
         "totalBilledTime": {
           "description": "When available, billed audio seconds for the corresponding request.",
           "format": "google-duration",
@@ -834,7 +838,7 @@
           "type": "array"
         },
         "audioChannelCount": {
-          "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
+          "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
           "format": "int32",
           "type": "integer"
         },
@@ -1065,6 +1069,10 @@
           },
           "type": "array"
         },
+        "speechAdaptationInfo": {
+          "$ref": "SpeechAdaptationInfo",
+          "description": "Provides information on adaptation behavior in response"
+        },
         "totalBilledTime": {
           "description": "When available, billed audio seconds for the corresponding request.",
           "format": "google-duration",
@@ -1128,6 +1136,21 @@
       },
       "type": "object"
     },
+    "SpeechAdaptationInfo": {
+      "description": "Information on speech adaptation use in results",
+      "id": "SpeechAdaptationInfo",
+      "properties": {
+        "adaptationTimeout": {
+          "description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.",
+          "type": "boolean"
+        },
+        "timeoutMessage": {
+          "description": "If set, returns a message specifying which part of the speech adaptation request timed out.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "SpeechContext": {
       "description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.",
       "id": "SpeechContext",
diff --git a/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json b/googleapiclient/discovery_cache/documents/speech.v1p1beta1.json
@@ -524,7 +524,7 @@
       }
     }
   },
-  "revision": "20221007",
+  "revision": "20221020",
   "rootUrl": "https://speech.googleapis.com/",
   "schemas": {
     "ClassItem": {
@@ -744,6 +744,10 @@
           },
           "type": "array"
         },
+        "speechAdaptationInfo": {
+          "$ref": "SpeechAdaptationInfo",
+          "description": "Provides information on speech adaptation behavior in response"
+        },
         "totalBilledTime": {
           "description": "When available, billed audio seconds for the corresponding request.",
           "format": "google-duration",
@@ -858,7 +862,7 @@
           "type": "array"
         },
         "audioChannelCount": {
-          "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`. Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
+          "description": "The number of channels in the input audio data. ONLY set this for MULTI-CHANNEL recognition. Valid values for LINEAR16, OGG_OPUS and FLAC are `1`-`8`. Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults to one channel (mono). Note: We only recognize the first channel by default. To perform independent recognition on each channel set `enable_separate_recognition_per_channel` to 'true'.",
           "format": "int32",
           "type": "integer"
         },
@@ -1109,6 +1113,10 @@
           },
           "type": "array"
         },
+        "speechAdaptationInfo": {
+          "$ref": "SpeechAdaptationInfo",
+          "description": "Provides information on adaptation behavior in response"
+        },
         "totalBilledTime": {
           "description": "When available, billed audio seconds for the corresponding request.",
           "format": "google-duration",
@@ -1172,6 +1180,21 @@
       },
       "type": "object"
     },
+    "SpeechAdaptationInfo": {
+      "description": "Information on speech adaptation use in results",
+      "id": "SpeechAdaptationInfo",
+      "properties": {
+        "adaptationTimeout": {
+          "description": "Whether there was a timeout when applying speech adaptation. If true, adaptation had no effect in the response transcript.",
+          "type": "boolean"
+        },
+        "timeoutMessage": {
+          "description": "If set, returns a message specifying which part of the speech adaptation request timed out.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
     "SpeechContext": {
       "description": "Provides \"hints\" to the speech recognizer to favor specific words and phrases in the results.",
       "id": "SpeechContext",