{ "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1", "split": "val", "max_samples": 200, "generate_predictions": false, "config": { "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1", "seed": 42, "data": { "name": "gqa_ru", "train_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/train.jsonl", "val_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/val.jsonl", "test_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/test.jsonl", "image_root": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/images", "question_field": "question", "answer_field": "answer", "metric_primary": "accuracy" }, "model": { "name": "qwen35_0_8b_fast", "model_name_or_path": "Qwen/Qwen3.5-0.8B", "processor_name_or_path": "Qwen/Qwen3.5-0.8B", "trust_remote_code": true, "max_seq_length": 1024, "image_resolution": 336 }, "train": { "name": "lora_ft_fast", "seed": 42, "mixed_precision": "bf16", "batch_size": 8, "grad_accumulation_steps": 1, "num_train_epochs": 1.0, "learning_rate": 0.0002, "weight_decay": 0.01, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "max_grad_norm": 1.0, "lora": { "r": 16, "alpha": 32, "dropout": 0.05, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj" ] }, "checkpoint": { "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/lora_ft_fast", "save_strategy": "steps", "save_steps": 250, "eval_steps": 250, "save_total_limit": 5, "load_best_model_at_end": true, "metric_for_best_model": "eval_loss", "greater_is_better": false }, "logging": { "logging_steps": 5, "report_to": [ "tensorboard", "wandb" ] } }, "eval": { "batch_size": 16, "max_new_tokens": 32 }, "inference": { "batch_size": 8, "max_new_tokens": 64 }, "run": { "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/runs/gqa_ru_qwen35_0_8b_lora_fast_v1", "checkpoint_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/gqa_ru_qwen35_0_8b_lora_fast_v1", "eval_metric": "accuracy" }, "raw_paths": { "data_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/data/gqa_ru.yaml", "model_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/model/qwen35_0_8b_fast.yaml", "train_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/train/lora_ft_fast.yaml", "eval_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/eval/default.yaml", "inference_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/inference/default.yaml" } }, "models": { "base": { "samples": 200.0, "answer_tokens": 496.0, "answer_loss": 5.169734188625889, "answer_perplexity": 175.8680835335284, "eval_runtime": 14.529228995001176, "samples_per_second": 13.765355344651157, "predictions_path": "" }, "adapter": { "samples": 200.0, "answer_tokens": 496.0, "answer_loss": 2.53404495023912, "answer_perplexity": 12.604387280790764, "eval_runtime": 11.7076815229957, "samples_per_second": 17.082801544197203, "predictions_path": "" } }, "comparison": { "answer_loss_delta_adapter_minus_base": -2.635689238386769, "answer_loss_relative_improvement": 0.5098307073863952 } }