{ "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1", "split": "val", "max_samples": 50, "generate_predictions": true, "config": { "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1", "seed": 42, "data": { "name": "gqa_ru", "train_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/train.jsonl", "val_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/val.jsonl", "test_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/test.jsonl", "image_root": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/images", "question_field": "question", "answer_field": "answer", "metric_primary": "accuracy" }, "model": { "name": "qwen35_0_8b_fast", "model_name_or_path": "Qwen/Qwen3.5-0.8B", "processor_name_or_path": "Qwen/Qwen3.5-0.8B", "trust_remote_code": true, "max_seq_length": 1024, "image_resolution": 336 }, "train": { "name": "lora_ft_fast", "seed": 42, "mixed_precision": "bf16", "batch_size": 8, "grad_accumulation_steps": 1, "num_train_epochs": 1.0, "learning_rate": 0.0002, "weight_decay": 0.01, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "max_grad_norm": 1.0, "lora": { "r": 16, "alpha": 32, "dropout": 0.05, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj" ] }, "checkpoint": { "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/lora_ft_fast", "save_strategy": "steps", "save_steps": 250, "eval_steps": 250, "save_total_limit": 5, "load_best_model_at_end": true, "metric_for_best_model": "eval_loss", "greater_is_better": false }, "logging": { "logging_steps": 5, "report_to": [ "tensorboard", "wandb" ] } }, "eval": { "batch_size": 16, "max_new_tokens": 32 }, "inference": { "batch_size": 8, "max_new_tokens": 64 }, "run": { "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/runs/gqa_ru_qwen35_0_8b_lora_fast_v1", "checkpoint_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/gqa_ru_qwen35_0_8b_lora_fast_v1", "eval_metric": "accuracy" }, "raw_paths": { "data_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/data/gqa_ru.yaml", "model_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/model/qwen35_0_8b_fast.yaml", "train_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/train/lora_ft_fast.yaml", "eval_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/eval/default.yaml", "inference_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/inference/default.yaml" } }, "models": { "base": { "samples": 50.0, "answer_tokens": 121.0, "answer_loss": 5.196026565614811, "answer_perplexity": 180.55339760224993, "eval_runtime": 55.80415617499966, "samples_per_second": 0.8959906112226113, "exact_match": 0.18, "token_f1": 0.2, "predictions_path": "runs/gqa_ru_qwen35_0_8b_lora_fast_v1/eval_val_50_generate/val_base_predictions.jsonl" }, "adapter": { "samples": 50.0, "answer_tokens": 121.0, "answer_loss": 2.629122363634346, "answer_perplexity": 13.861599118677365, "eval_runtime": 71.44314903100167, "samples_per_second": 0.6998571686461253, "exact_match": 0.36, "token_f1": 0.36, "predictions_path": "runs/gqa_ru_qwen35_0_8b_lora_fast_v1/eval_val_50_generate/val_adapter_predictions.jsonl" } }, "comparison": { "answer_loss_delta_adapter_minus_base": -2.566904201980465, "answer_loss_relative_improvement": 0.4940129095888755, "exact_match_delta_adapter_minus_base": 0.18 } }