Mediform
/

parakeet-tdt-0.6b-v3-coreml-fp16

Model card Files Files and versions

parakeet-tdt-0.6b-v3-coreml-fp16 / metadata.json

jkrause's picture

Fix metadata.json

aabffb5 verified 3 months ago

history blame contribute delete

1.66 kB

	{
	"model_id": "nvidia/parakeet-tdt-0.6b-v3",
	"format": "coreml-fp16",
	"sample_rate": 16000,
	"max_audio_seconds": 30.0,
	"max_audio_samples": 480000,
	"max_symbol_steps": 1,
	"vocab_size": 8192,
	"joint_extra_outputs": 5,
	"checkpoint": {
	"type": "pretrained",
	"model_id": "nvidia/parakeet-tdt-0.6b-v3"
	},
	"coreml": {
	"compute_units": "CPU_AND_NEURAL_ENGINE",
	"compute_precision": "FLOAT16"
	},
	"components": {
	"mel_encoder": {
	"description": "Fused preprocessor+encoder: raw audio to encoder frames in one model (ANE-accelerated).",
	"inputs": {
	"audio_signal": [1, 480000],
	"audio_length": [1]
	},
	"outputs": {
	"encoder": [1, 1024, 375],
	"encoder_length": [1]
	},
	"path": "parakeet_mel_encoder_30s.mlpackage"
	},
	"decoder": {
	"description": "LSTM prediction network (CPU).",
	"inputs": {
	"targets": [1, 1],
	"target_length": [1],
	"h_in": [2, 1, 640],
	"c_in": [2, 1, 640]
	},
	"outputs": {
	"decoder": [1, 640, 1],
	"h_out": [2, 1, 640],
	"c_out": [2, 1, 640]
	},
	"path": "parakeet_decoder.mlpackage"
	},
	"joint_logits_single_step": {
	"description": "Joint network exposing full-vocab token logits and duration logits (CPU). Enables host-side medical term boosting.",
	"inputs": {
	"encoder_step": [1, 1024, 1],
	"decoder_step": [1, 640, 1]
	},
	"outputs": {
	"token_logits": [1, 1, 1, 8193],
	"duration_logits": [1, 1, 1, 5]
	},
	"path": "parakeet_joint_logits_single_step.mlpackage"
	}
	}
	}