Skip to content

Commit

Permalink
Merge pull request #62 from stevensu1977/main
Browse files Browse the repository at this point in the history
Add AWS Polly,Transcribe
  • Loading branch information
liou666 committed Aug 18, 2023
2 parents 781d58c + 52a02b1 commit 5a9047c
Show file tree
Hide file tree
Showing 8 changed files with 307 additions and 59 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,17 @@ xattr -rd com.apple.quarantine /path/to/Polyglot.app
+ 选择“F0”价格层,并单击“创建”
+ 创建完成后,转到新服务的“概述”页面,即可查看密钥和终结点

</details>
<details>
<summary>3. aws语音服务如何申请?</summary>

要申请 AWS 语音服务的 Identity Pool ID ,可以按照以下步骤进行:

+ 登录 AWS 门户 (https://console.aws.amazon.com/)
+ 转到“Cognito 认证服务”页面并单击“Create Identity Pool”按钮
+ 在“IAM Role”窗口中,创建Role, 添加Polly, Transcribe 权限即可
+ 创建完成后,转到“概述”页面,即可Identity Pool ID

</details>

<!-- ## 捐赠
Expand Down
6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@
"release:version": "npx standard-version && git push origin --follow-tags"
},
"dependencies": {
"@aws-sdk/client-cognito-identity": "^3.363.0",
"@aws-sdk/client-polly": "^3.363.0",
"@aws-sdk/client-transcribe-streaming": "^3.363.0",
"@aws-sdk/credential-provider-cognito-identity": "^3.363.0",
"@iconify-json/svg-spinners": "^1.1.1",
"@vueuse/core": "^9.13.0",
"api2d": "^0.1.18",
"aws-sdk": "^2.1409.0",
"dexie": "^3.2.3",
"electron-updater": "^5.3.0",
"element-plus": "^2.3.3",
"eventsource-parser": "^0.1.0",
"microphone-stream": "^6.0.1",
"microsoft-cognitiveservices-speech-sdk": "^1.26.0",
"pinia": "^2.0.33",
"pinia-plugin-persistedstate": "^3.1.0",
Expand Down
10 changes: 10 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,16 @@ export const supportLanguageMap = {
'zh-TW': '中文(台湾普通话)',
} as Record<string, string>

export const awsRegions = [
'us-east-1',
'us-east-2',
'us-west-1',
'us-west-2',
'ap-east-1',
'ap-southeast-1',
'eu-central-1',
]

export const azureRegions = [
'australiaeast',
'australiasoutheast',
Expand Down
2 changes: 2 additions & 0 deletions src/constant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ export const OPEN_KEY = 'openKey'
export const OPEN_PROXY = 'openProxy'
export const AZURE_REGION = 'azureRegion'
export const AZURE_KEY = 'azureKey'
export const AWS_REGION = 'awsRegion'
export const AWS_COGNITO_IDENTITY_POOL_ID = 'awsCognitoIdentityPoolId'
export const AZURE_TRANSLATE_KEY = 'azureTranslateKey'
export const VOICE_API_NAME = 'voiceApiName'
export const IS_ALWAYS_RECOGNITION = 'isAlwaysRecognition'
Expand Down
7 changes: 6 additions & 1 deletion src/hooks/useGlobalSetting.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { AUTO_PLAY, AZURE_KEY, AZURE_REGION, AZURE_TRANSLATE_KEY, CHAT_API_NAME, CHAT_REMEMBER_COUNT, IS_ALWAYS_RECOGNITION, OPEN_KEY, OPEN_MAX_TOKEN, OPEN_MODEL, OPEN_PROXY, SELF_AVATAR_URL, TTS_PASSWORD, VOICE_API_NAME } from '@/constant'
import { AUTO_PLAY,AWS_COGNITO_IDENTITY_POOL_ID, AWS_KEY,AWS_REGION,AWS_SECRET_KEY,AZURE_KEY, AZURE_REGION, AZURE_TRANSLATE_KEY, CHAT_API_NAME, CHAT_REMEMBER_COUNT, IS_ALWAYS_RECOGNITION, OPEN_KEY, OPEN_MAX_TOKEN, OPEN_MODEL, OPEN_PROXY, SELF_AVATAR_URL, TTS_PASSWORD, VOICE_API_NAME } from '@/constant'

import { getAvatarUrl } from '@/utils'

Expand All @@ -7,6 +7,9 @@ export const useGlobalSetting = () => {
const openProxy = useLocalStorage(OPEN_PROXY, '')
const azureRegion = useLocalStorage(AZURE_REGION, 'eastasia')
const azureKey = useLocalStorage(AZURE_KEY, '')
const awsRegion = useLocalStorage(AWS_REGION, 'us-east-1')
const awsCognitoIdentityId = useLocalStorage(AWS_COGNITO_IDENTITY_POOL_ID, '')

const openModel = useLocalStorage(OPEN_MODEL, 'gpt-3.5-turbo')
const selfAvatar = useLocalStorage(SELF_AVATAR_URL, getAvatarUrl('self.png'))
const chatApiName = useLocalStorage(CHAT_API_NAME, 'openAI')
Expand All @@ -22,6 +25,8 @@ export const useGlobalSetting = () => {
openKey,
openProxy,
openModel,
awsRegion,
awsCognitoIdentityId,
azureRegion,
azureKey,
selfAvatar,
Expand Down
176 changes: 169 additions & 7 deletions src/hooks/useSpeechService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ import {
SpeechSynthesizer,
} from 'microsoft-cognitiveservices-speech-sdk'

import MicrophoneStream from 'microphone-stream';
import { CognitoIdentityClient } from "@aws-sdk/client-cognito-identity";
import {fromCognitoIdentityPool} from "@aws-sdk/credential-provider-cognito-identity";
import { Polly,SynthesizeSpeechInput,DescribeVoicesCommand } from "@aws-sdk/client-polly";
import {
TranscribeStreamingClient,
StartStreamTranscriptionCommand,
} from '@aws-sdk/client-transcribe-streaming';


const defaultAzureRegion = import.meta.env.VITE_REGION
const defaultAzureKey = import.meta.env.VITE_SCRIPTION_KEY
const accessPassword = import.meta.env.VITE_TTS_ACCESS_PASSWORD
Expand All @@ -17,8 +27,13 @@ interface Config {
isFetchAllVoice?: boolean
}
export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'zh-CN', 'zh-HK', 'ko-KR', 'de-DE'], isFetchAllVoice = true }: Config = {}) => {
const { azureKey, azureRegion, ttsPassword } = useGlobalSetting()
const { azureKey, azureRegion, ttsPassword,voiceApiName } = useGlobalSetting()
const { awsCognitoIdentityId, awsRegion, } = useGlobalSetting()


if(voiceApiName.value==="AWS"){
isFetchAllVoice=false;
}
const resultAzureKey = computed(() => {
if (!azureKey.value) {
if (accessPassword !== ttsPassword.value)
Expand Down Expand Up @@ -58,6 +73,7 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
const audioBlob = ref<Blob>(new Blob())

const allVoices = ref<VoiceInfo[]>([])
const allAWSVoices = ref<any[]>([])

const recognizer = ref<SpeechRecognizer>(new SpeechRecognizer(speechConfig.value))
const synthesizer = ref<SpeechSynthesizer>(new SpeechSynthesizer(speechConfig.value))
Expand All @@ -74,8 +90,28 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
immediate: true,
})

// 语音识别

// AWS polly and transcribe SDK 初始化
const audioAWS = new Audio();
let micStream: MicrophoneStream | undefined = undefined
const polly = new Polly({
region: awsRegion.value ?? "us-east-1",
credentials: fromCognitoIdentityPool({
client: new CognitoIdentityClient({ region: awsRegion.value ?? "us-east-1" }),
identityPoolId: awsCognitoIdentityId.value
}),
});

const transcribe = new TranscribeStreamingClient({
region: awsRegion.value ?? "us-east-1",
credentials: fromCognitoIdentityPool({
client: new CognitoIdentityClient({ region: awsRegion.value ?? "us-east-1" }),
identityPoolId: awsCognitoIdentityId.value
}),
});


// AZure 语音识别
const audioRecorder = async () => {
// 暂时通过 mediaRecorder 方式实现录音保存,后续可能会改为直接通过 SpeechRecognizer 实现保存

Expand Down Expand Up @@ -250,16 +286,41 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
catch (error) {
allVoices.value = []
}
const res = await synthesizer.value.getVoicesAsync()
if (res.errorDetails) {
console.error(`获取语音列表失败:${res.errorDetails}, 请检查语音配置`)
return []
}
return res.voices
}else{
return []
}

const res = await synthesizer.value.getVoicesAsync()
if (res.errorDetails) {
console.error(`获取语音列表失败:${res.errorDetails}, 请检查语音配置`)
return []

}

// 获取AWS 语音列表
async function getAWSVoices() {
const params = {
LanguageCode: "en-US"
};

try {
const data = await polly.describeVoices(params)
if(data.Voices){
allAWSVoices.value=data.Voices.map((item)=>{
return {"id":item.Id,"gender":item.Gender}
})
}
return data.Voices??[];
} catch (error) {
console.error("Error retrieving AWS voices:", error);
return [];
}
return res.voices

}


function applySynthesizerConfiguration() {
// 通过playback结束事件来判断播放结束
player.value = new SpeakerAudioDestination()
Expand All @@ -279,6 +340,100 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
synthesizer.value = new SpeechSynthesizer(speechConfig.value, speakConfig)
}

/* AWS Vocie service */
const startAWSRecognizeSpeech = async (cb?: (text: string) => void) => {

micStream = new MicrophoneStream();
// // this part should be put into an async function

micStream.setStream(
await window.navigator.mediaDevices.getUserMedia({
video: false,
audio: true,
})
);


//构造audioSream
isRecognizing.value = true
const MAX_AUDIO_CHUNK_SIZE = 48000

const audioStream = async function* () {
for await (const chunk of micStream as unknown as Iterable<Buffer>) {
if (chunk.length <= MAX_AUDIO_CHUNK_SIZE) {
yield {
AudioEvent: {
AudioChunk: pcmEncodeChunk(chunk),
},
}
}
}
};

//PCM 编码
const pcmEncodeChunk = (chunk: any) => {
const input = MicrophoneStream.toRaw(chunk);
var offset = 0;
var buffer = new ArrayBuffer(input.length * 2);
var view = new DataView(buffer);
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return Buffer.from(buffer);
};
//Transcribe stream command 初始化
const command = new StartStreamTranscriptionCommand({
LanguageCode: language.value,
MediaEncoding: "pcm",
MediaSampleRateHertz: 44100,
AudioStream: audioStream(),
});

const response = await transcribe.send(command);
let resultText = ""
if (response.TranscriptResultStream) {
for await (const event of response.TranscriptResultStream) {
if (event.TranscriptEvent) {
const results = event.TranscriptEvent?.Transcript?.Results;
results?.map((result: any) => {
(result.Alternatives || []).map((alternative: any) => {
const transcript = alternative.Items.map((item: any) => item.Content).join(" ");
resultText = transcript;
cb && cb(transcript)
});
});
}
}
isRecognizing.value = false
}
return resultText

}

const stopAWSRecognizeSpeech = () => {
micStream?.stop()
}


//语音合成
const awsTextToSpeak = async (text: string, voice?: string) => {
const params: SynthesizeSpeechInput = {
Text: text,
OutputFormat: 'mp3',
VoiceId: 'Joanna', // Replace with the desired voice ID (e.g., Joanna, Matthew, etc.)
};

const response = await polly.synthesizeSpeech(params);

if (response.AudioStream) {
const buffer = await response.AudioStream.transformToByteArray();
audioAWS.src = URL.createObjectURL(new Blob([buffer], { type: 'audio/mpeg' }));
audioAWS.play();
}
}


return {
languages,
language,
Expand All @@ -289,16 +444,23 @@ export const useSpeechService = ({ langs = <const>['fr-FR', 'ja-JP', 'en-US', 'z
isRecognizReadying,
startRecognizeSpeech,
stopRecognizeSpeech,
startAWSRecognizeSpeech,
stopAWSRecognizeSpeech,
recognizeSpeech,
textToSpeak,
awsTextToSpeak,
ssmlToSpeak,
stopTextToSpeak,
getVoices,
getAWSVoices,
allVoices,
allAWSVoices,
isSynthesizing,
rate,
style,
audioBlob,
player,
audioAWS,

}
}

0 comments on commit 5a9047c

Please sign in to comment.