csukuangfj commited on
Commit
5c1c58f
·
1 Parent(s): ec2c459

update model

Browse files
app-tts.js CHANGED
@@ -123,8 +123,10 @@ speedInput.oninput = function() {
123
 
124
  function updateUiForModelType() {
125
  const isZipVoice = ttsInstanceInfo.modelType === 4;
126
- speakerIdSection.classList.toggle('hidden', isZipVoice);
127
- referenceAudioSection.classList.toggle('hidden', !isZipVoice);
 
 
128
  referenceTextSection.classList.toggle('hidden', !isZipVoice);
129
  }
130
 
@@ -193,9 +195,11 @@ function downloadBlob(blob, filename) {
193
 
194
  generateBtn.onclick = async function() {
195
  const isZipVoice = ttsInstanceInfo.modelType === 4;
 
 
196
 
197
  let speakerId = speakerIdInput.value;
198
- if (!isZipVoice) {
199
  if (speakerId.trim().length == 0) {
200
  alert('Please input a speakerId');
201
  return;
@@ -224,7 +228,7 @@ generateBtn.onclick = async function() {
224
  console.log('speed', speedInput.value);
225
  console.log('text', text);
226
 
227
- if (isZipVoice) {
228
  if (!referenceAudioInput.files || referenceAudioInput.files.length === 0) {
229
  alert('Please select a reference audio file');
230
  return;
@@ -236,24 +240,27 @@ generateBtn.onclick = async function() {
236
  return;
237
  }
238
 
239
- const referenceText = referenceTextInput.value.trim();
240
- if (referenceText.length === 0) {
241
- alert('Please input the transcript of the reference audio');
242
- return;
243
- }
244
-
245
  const referenceAudio = await readReferenceAudio(referenceFile);
246
  const genConfig = {
247
  speed: parseFloat(speedInput.value),
248
  referenceAudio: referenceAudio.samples,
249
  referenceSampleRate: referenceAudio.sampleRate,
250
- referenceText: referenceText,
251
- numSteps: 4,
252
- extra: {
253
- min_char_in_sentence: 10,
254
- },
255
  };
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  generateBtn.disabled = true;
258
  setGenerationStatus('Generating audio...');
259
 
 
123
 
124
  function updateUiForModelType() {
125
  const isZipVoice = ttsInstanceInfo.modelType === 4;
126
+ const isPocketTts = ttsInstanceInfo.modelType === 5;
127
+ const useGenerationConfig = isZipVoice || isPocketTts;
128
+ speakerIdSection.classList.toggle('hidden', useGenerationConfig);
129
+ referenceAudioSection.classList.toggle('hidden', !useGenerationConfig);
130
  referenceTextSection.classList.toggle('hidden', !isZipVoice);
131
  }
132
 
 
195
 
196
  generateBtn.onclick = async function() {
197
  const isZipVoice = ttsInstanceInfo.modelType === 4;
198
+ const isPocketTts = ttsInstanceInfo.modelType === 5;
199
+ const useGenerationConfig = isZipVoice || isPocketTts;
200
 
201
  let speakerId = speakerIdInput.value;
202
+ if (!useGenerationConfig) {
203
  if (speakerId.trim().length == 0) {
204
  alert('Please input a speakerId');
205
  return;
 
228
  console.log('speed', speedInput.value);
229
  console.log('text', text);
230
 
231
+ if (useGenerationConfig) {
232
  if (!referenceAudioInput.files || referenceAudioInput.files.length === 0) {
233
  alert('Please select a reference audio file');
234
  return;
 
240
  return;
241
  }
242
 
 
 
 
 
 
 
243
  const referenceAudio = await readReferenceAudio(referenceFile);
244
  const genConfig = {
245
  speed: parseFloat(speedInput.value),
246
  referenceAudio: referenceAudio.samples,
247
  referenceSampleRate: referenceAudio.sampleRate,
248
+ numSteps: isPocketTts ? 5 : 4,
 
 
 
 
249
  };
250
 
251
+ if (isZipVoice) {
252
+ const referenceText = referenceTextInput.value.trim();
253
+ if (referenceText.length === 0) {
254
+ alert('Please input the transcript of the reference audio');
255
+ return;
256
+ }
257
+
258
+ genConfig.referenceText = referenceText;
259
+ genConfig.extra = {
260
+ min_char_in_sentence: 10,
261
+ };
262
+ }
263
+
264
  generateBtn.disabled = true;
265
  setGenerationStatus('Generating audio...');
266
 
sherpa-onnx-tts.js CHANGED
@@ -985,6 +985,17 @@ function createOfflineTts(Module, myConfig) {
985
  guidanceScale: 1.0,
986
  };
987
 
 
 
 
 
 
 
 
 
 
 
 
988
  let ruleFsts = '';
989
 
990
  switch (modelType) {
@@ -1031,6 +1042,17 @@ function createOfflineTts(Module, myConfig) {
1031
  offlineTtsZipVoiceModelConfig.dataDir = './espeak-ng-data';
1032
  offlineTtsZipVoiceModelConfig.lexicon = './lexicon.txt';
1033
  break;
 
 
 
 
 
 
 
 
 
 
 
1034
  }
1035
 
1036
  const offlineTtsModelConfig = {
@@ -1039,6 +1061,7 @@ function createOfflineTts(Module, myConfig) {
1039
  offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
1040
  offlineTtsKittenModelConfig: offlineTtsKittenModelConfig,
1041
  offlineTtsZipVoiceModelConfig: offlineTtsZipVoiceModelConfig,
 
1042
  numThreads: 1,
1043
  debug: 1,
1044
  provider: 'cpu',
 
985
  guidanceScale: 1.0,
986
  };
987
 
988
+ const offlineTtsPocketModelConfig = {
989
+ lmFlow: '',
990
+ lmMain: '',
991
+ encoder: '',
992
+ decoder: '',
993
+ textConditioner: '',
994
+ vocabJson: '',
995
+ tokenScoresJson: '',
996
+ voiceEmbeddingCacheCapacity: 50,
997
+ };
998
+
999
  let ruleFsts = '';
1000
 
1001
  switch (modelType) {
 
1042
  offlineTtsZipVoiceModelConfig.dataDir = './espeak-ng-data';
1043
  offlineTtsZipVoiceModelConfig.lexicon = './lexicon.txt';
1044
  break;
1045
+ case 5:
1046
+ // pocket tts
1047
+ // https://k2-fsa.github.io/sherpa/onnx/tts/pocket.html
1048
+ offlineTtsPocketModelConfig.lmFlow = './lm_flow.int8.onnx';
1049
+ offlineTtsPocketModelConfig.lmMain = './lm_main.int8.onnx';
1050
+ offlineTtsPocketModelConfig.encoder = './encoder.onnx';
1051
+ offlineTtsPocketModelConfig.decoder = './decoder.int8.onnx';
1052
+ offlineTtsPocketModelConfig.textConditioner = './text_conditioner.onnx';
1053
+ offlineTtsPocketModelConfig.vocabJson = './vocab.json';
1054
+ offlineTtsPocketModelConfig.tokenScoresJson = './token_scores.json';
1055
+ break;
1056
  }
1057
 
1058
  const offlineTtsModelConfig = {
 
1061
  offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
1062
  offlineTtsKittenModelConfig: offlineTtsKittenModelConfig,
1063
  offlineTtsZipVoiceModelConfig: offlineTtsZipVoiceModelConfig,
1064
+ offlineTtsPocketModelConfig: offlineTtsPocketModelConfig,
1065
  numThreads: 1,
1066
  debug: 1,
1067
  provider: 'cpu',
sherpa-onnx-wasm-main-tts.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:619888141185f0c6dea5926ab9bb8a525383d1546b96f92d7346323297a73899
3
- size 96524422
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf45b1441eb0aa228a3c6de1ea62a25f5a691eb99fdae68f3d6dc10f5e995f7
3
+ size 96525193
sherpa-onnx-wasm-main-tts.js CHANGED
The diff for this file is too large to render. See raw diff