| hydra: |
| run: |
| dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S} |
|
|
| datasets: |
| name: Emilia_ZH_EN |
| batch_size_per_gpu: 38400 |
| batch_size_type: frame |
| max_samples: 64 |
| num_workers: 16 |
|
|
| optim: |
| epochs: 11 |
| learning_rate: 7.5e-5 |
| num_warmup_updates: 20000 |
| grad_accumulation_steps: 1 |
| max_grad_norm: 1.0 |
| bnb_optimizer: False |
|
|
| model: |
| name: F5TTS_Base |
| tokenizer: pinyin |
| tokenizer_path: null |
| backbone: DiT |
| arch: |
| dim: 1024 |
| depth: 22 |
| heads: 16 |
| ff_mult: 2 |
| text_dim: 512 |
| text_mask_padding: False |
| conv_layers: 6 |
| pe_attn_head: 1 |
| checkpoint_activations: False |
| mel_spec: |
| target_sample_rate: 24000 |
| n_mel_channels: 100 |
| hop_length: 256 |
| win_length: 1024 |
| n_fft: 1024 |
| mel_spec_type: vocos |
| vocoder: |
| is_local: False |
| local_path: null |
|
|
| ckpts: |
| logger: wandb |
| log_samples: True |
| save_per_updates: 50000 |
| keep_last_n_checkpoints: -1 |
| last_per_updates: 5000 |
| save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name} |