Text-to-Speech
Transformers
Safetensors
higgs_multimodal_qwen3
text-generation
speech-generation
voice-agent
expressive-speech
controllable-tts
multilingual-tts
Instructions to use bosonai/higgs-tts-3-4b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use bosonai/higgs-tts-3-4b with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-speech", model="bosonai/higgs-tts-3-4b")# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("bosonai/higgs-tts-3-4b", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "add_prefix_space": false, | |
| "backend": "tokenizers", | |
| "bos_token": null, | |
| "clean_up_tokenization_spaces": false, | |
| "eos_token": "<|endoftext|>", | |
| "errors": "replace", | |
| "extra_special_tokens": [ | |
| "<|asr|>", | |
| "<|streaming_asr|>", | |
| "<|tts|>", | |
| "<|streaming_tts|>", | |
| "<|audio_cont_txt|>", | |
| "<|audio|>", | |
| "<|audio_end|>", | |
| "<|text|>", | |
| "<|text_end|>", | |
| "<|eoc|>", | |
| "<|user|>", | |
| "<|assistant|>", | |
| "<|system|>", | |
| "<|await_audio|>", | |
| "<|ref_audio|>", | |
| "<|ref_text|>", | |
| "<|emotion:elation|>", | |
| "<|emotion:amusement|>", | |
| "<|emotion:enthusiasm|>", | |
| "<|emotion:determination|>", | |
| "<|emotion:pride|>", | |
| "<|emotion:contentment|>", | |
| "<|emotion:affection|>", | |
| "<|emotion:relief|>", | |
| "<|emotion:contemplation|>", | |
| "<|emotion:confusion|>", | |
| "<|emotion:surprise|>", | |
| "<|emotion:awe|>", | |
| "<|emotion:longing|>", | |
| "<|emotion:arousal|>", | |
| "<|emotion:anger|>", | |
| "<|emotion:fear|>", | |
| "<|emotion:disgust|>", | |
| "<|emotion:bitterness|>", | |
| "<|emotion:sadness|>", | |
| "<|emotion:shame|>", | |
| "<|emotion:helplessness|>", | |
| "<|env:music|>", | |
| "<|env:noise|>", | |
| "<|style:singing|>", | |
| "<|style:shouting|>", | |
| "<|style:whispering|>", | |
| "<|sfx:cough|>", | |
| "<|sfx:laughter|>", | |
| "<|sfx:crying|>", | |
| "<|sfx:screaming|>", | |
| "<|sfx:burping|>", | |
| "<|sfx:humming|>", | |
| "<|sfx:sigh|>", | |
| "<|sfx:sniff|>", | |
| "<|sfx:sneeze|>", | |
| "<|prosody:speed_very_slow|>", | |
| "<|prosody:speed_slow|>", | |
| "<|prosody:speed_fast|>", | |
| "<|prosody:speed_very_fast|>", | |
| "<|prosody:pitch_low|>", | |
| "<|prosody:pitch_high|>", | |
| "<|prosody:pause|>", | |
| "<|prosody:long_pause|>", | |
| "<|chatml|>", | |
| "<|prosody:expressive_high|>", | |
| "<|prosody:expressive_low|>" | |
| ], | |
| "is_local": true, | |
| "model_max_length": 131072, | |
| "pad_token": "<|endoftext|>", | |
| "split_special_tokens": false, | |
| "tokenizer_class": "Qwen2Tokenizer", | |
| "unk_token": null | |
| } | |