jkrause's picture
Fix metadata.json
aabffb5 verified
{
"model_id": "nvidia/parakeet-tdt-0.6b-v3",
"format": "coreml-fp16",
"sample_rate": 16000,
"max_audio_seconds": 30.0,
"max_audio_samples": 480000,
"max_symbol_steps": 1,
"vocab_size": 8192,
"joint_extra_outputs": 5,
"checkpoint": {
"type": "pretrained",
"model_id": "nvidia/parakeet-tdt-0.6b-v3"
},
"coreml": {
"compute_units": "CPU_AND_NEURAL_ENGINE",
"compute_precision": "FLOAT16"
},
"components": {
"mel_encoder": {
"description": "Fused preprocessor+encoder: raw audio to encoder frames in one model (ANE-accelerated).",
"inputs": {
"audio_signal": [1, 480000],
"audio_length": [1]
},
"outputs": {
"encoder": [1, 1024, 375],
"encoder_length": [1]
},
"path": "parakeet_mel_encoder_30s.mlpackage"
},
"decoder": {
"description": "LSTM prediction network (CPU).",
"inputs": {
"targets": [1, 1],
"target_length": [1],
"h_in": [2, 1, 640],
"c_in": [2, 1, 640]
},
"outputs": {
"decoder": [1, 640, 1],
"h_out": [2, 1, 640],
"c_out": [2, 1, 640]
},
"path": "parakeet_decoder.mlpackage"
},
"joint_logits_single_step": {
"description": "Joint network exposing full-vocab token logits and duration logits (CPU). Enables host-side medical term boosting.",
"inputs": {
"encoder_step": [1, 1024, 1],
"decoder_step": [1, 640, 1]
},
"outputs": {
"token_logits": [1, 1, 1, 8193],
"duration_logits": [1, 1, 1, 5]
},
"path": "parakeet_joint_logits_single_step.mlpackage"
}
}
}