mhussainahmad
/

averformer-meld-v4

emotion-recognition

Model card Files Files and versions

averformer-meld-v4 / config.json

mhussainahmad's picture

Upload config.json with huggingface_hub

f86fe7c verified about 1 month ago

History Blame Contribute Delete

1.26 kB

	{
	"model_type": "averformer-v4",
	"architecture": "AVERFormerV4",
	"corpus": "MELD",
	"num_classes": 7,
	"emotions": [
	"neutral",
	"joy",
	"sadness",
	"anger",
	"fear",
	"disgust",
	"surprise"
	],
	"audio_backbone": "microsoft/wavlm-large",
	"video_backbone": "MCG-NJU/videomae-large",
	"text_backbone": "microsoft/deberta-v3-large",
	"fusion_layers": 2,
	"lora_r": 16,
	"unfreeze_audio": 0,
	"unfreeze_video": 0,
	"unfreeze_text": 0,
	"fusion_dim": 512,
	"use_text": true,
	"audio_format": {
	"sample_rate": 16000,
	"channels": 1,
	"expected_seconds": "4.0 (rolling buffer; flexible)"
	},
	"video_format": {
	"num_frames": 16,
	"height": 224,
	"width": 224,
	"imagenet_mean": [
	0.485,
	0.456,
	0.406
	],
	"imagenet_std": [
	0.229,
	0.224,
	0.225
	],
	"value_range": "[0, 1]"
	},
	"text_format": {
	"tokenizer": "microsoft/deberta-v3-large",
	"max_length": 256,
	"n_ctx_turns": 3,
	"speaker_embedding": true
	},
	"select_metric": "wF1",
	"source_checkpoint": "pytorch_model.pth",
	"training_script": "train_v5_spec.py / train_v6_supcon.py / train_v10_mcncl.py",
	"val_score": 0.3815709082075283,
	"best_epoch": 4,
	"seed": 1337
	}