add central model
Browse files- Dockerfile +1 -0
- infer_onnx.py +9 -7
- spk_to_id_2.json +1 -1
Dockerfile
CHANGED
|
@@ -44,6 +44,7 @@ RUN pip install -r requirements.txt
|
|
| 44 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 45 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 46 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
|
|
|
|
| 47 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
|
| 48 |
|
| 49 |
RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
|
|
|
|
| 44 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 45 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 46 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 47 |
+
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_cen_opset_15_10_steps.onnx --local-dir $HOME/app/
|
| 48 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
|
| 49 |
|
| 50 |
RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
|
infer_onnx.py
CHANGED
|
@@ -36,7 +36,7 @@ def process_text(i: int, text: str, device: torch.device, cleaner:str):
|
|
| 36 |
|
| 37 |
# paths
|
| 38 |
MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
| 39 |
-
|
| 40 |
MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
| 41 |
MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
| 42 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
|
@@ -46,7 +46,7 @@ SPEAKER_ID_DICT="spk_to_id_2.json"
|
|
| 46 |
# Load models
|
| 47 |
sess_options = onnxruntime.SessionOptions()
|
| 48 |
model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 49 |
-
|
| 50 |
model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 51 |
model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 52 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
|
@@ -55,12 +55,14 @@ speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
|
|
| 55 |
accents = [e for e in speaker_id_dict.keys()]
|
| 56 |
|
| 57 |
models={"balear":model_matcha_mel_bal,
|
| 58 |
-
"occidental": model_matcha_mel_occ,
|
| 59 |
-
"valencia": model_matcha_mel_val
|
|
|
|
| 60 |
|
| 61 |
cleaners={"balear": "catalan_balear_cleaners",
|
| 62 |
-
"occidental": "catalan_occidental_cleaners",
|
| 63 |
-
"valencia": "catalan_valencia_cleaners"
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
|
|
@@ -197,7 +199,7 @@ description = """
|
|
| 197 |
|
| 198 |
For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
|
| 199 |
|
| 200 |
-
[Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-
|
| 201 |
|
| 202 |
"""
|
| 203 |
|
|
|
|
| 36 |
|
| 37 |
# paths
|
| 38 |
MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
| 39 |
+
MODEL_PATH_MATCHA_MEL_CAT="matcha_multispeaker_cat_cen_opset_15_10_steps.onnx"
|
| 40 |
MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
| 41 |
MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
| 42 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
|
|
|
| 46 |
# Load models
|
| 47 |
sess_options = onnxruntime.SessionOptions()
|
| 48 |
model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 49 |
+
model_matcha_mel_cat = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_CAT), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 50 |
model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 51 |
model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
| 52 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
|
|
|
| 55 |
accents = [e for e in speaker_id_dict.keys()]
|
| 56 |
|
| 57 |
models={"balear":model_matcha_mel_bal,
|
| 58 |
+
"nord-occidental": model_matcha_mel_occ,
|
| 59 |
+
"valencia": model_matcha_mel_val,
|
| 60 |
+
"central": model_matcha_mel_val}
|
| 61 |
|
| 62 |
cleaners={"balear": "catalan_balear_cleaners",
|
| 63 |
+
"nord-occidental": "catalan_occidental_cleaners",
|
| 64 |
+
"valencia": "catalan_valencia_cleaners",
|
| 65 |
+
"central": "catalan_cleaners"}
|
| 66 |
|
| 67 |
|
| 68 |
speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
|
|
|
|
| 199 |
|
| 200 |
For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
|
| 201 |
|
| 202 |
+
[Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-multispeaker) was trained using openslr69 and festcat datasets
|
| 203 |
|
| 204 |
"""
|
| 205 |
|
spk_to_id_2.json
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
"grau": 0,
|
| 8 |
"elia": 1
|
| 9 |
},
|
| 10 |
-
"occidental":{
|
| 11 |
"pere": 0,
|
| 12 |
"emma": 1
|
| 13 |
},
|
|
|
|
| 7 |
"grau": 0,
|
| 8 |
"elia": 1
|
| 9 |
},
|
| 10 |
+
"nord-occidental":{
|
| 11 |
"pere": 0,
|
| 12 |
"emma": 1
|
| 13 |
},
|