| { | |
| "model_type": "averformer-v4", | |
| "architecture": "AVERFormerV4", | |
| "corpus": "MELD", | |
| "num_classes": 7, | |
| "emotions": [ | |
| "neutral", | |
| "joy", | |
| "sadness", | |
| "anger", | |
| "fear", | |
| "disgust", | |
| "surprise" | |
| ], | |
| "audio_backbone": "microsoft/wavlm-large", | |
| "video_backbone": "MCG-NJU/videomae-large", | |
| "text_backbone": "microsoft/deberta-v3-large", | |
| "fusion_layers": 2, | |
| "lora_r": 16, | |
| "unfreeze_audio": 0, | |
| "unfreeze_video": 0, | |
| "unfreeze_text": 0, | |
| "fusion_dim": 512, | |
| "use_text": true, | |
| "audio_format": { | |
| "sample_rate": 16000, | |
| "channels": 1, | |
| "expected_seconds": "4.0 (rolling buffer; flexible)" | |
| }, | |
| "video_format": { | |
| "num_frames": 16, | |
| "height": 224, | |
| "width": 224, | |
| "imagenet_mean": [ | |
| 0.485, | |
| 0.456, | |
| 0.406 | |
| ], | |
| "imagenet_std": [ | |
| 0.229, | |
| 0.224, | |
| 0.225 | |
| ], | |
| "value_range": "[0, 1]" | |
| }, | |
| "text_format": { | |
| "tokenizer": "microsoft/deberta-v3-large", | |
| "max_length": 256, | |
| "n_ctx_turns": 3, | |
| "speaker_embedding": true | |
| }, | |
| "select_metric": "wF1", | |
| "source_checkpoint": "pytorch_model.pth", | |
| "training_script": "train_v5_spec.py / train_v6_supcon.py / train_v10_mcncl.py", | |
| "val_score": 0.3815709082075283, | |
| "best_epoch": 4, | |
| "seed": 1337 | |
| } |