{
  "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1",
  "split": "val",
  "max_samples": 50,
  "generate_predictions": true,
  "config": {
    "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1",
    "seed": 42,
    "data": {
      "name": "gqa_ru",
      "train_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/train.jsonl",
      "val_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/val.jsonl",
      "test_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/test.jsonl",
      "image_root": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/images",
      "question_field": "question",
      "answer_field": "answer",
      "metric_primary": "accuracy"
    },
    "model": {
      "name": "qwen35_0_8b_fast",
      "model_name_or_path": "Qwen/Qwen3.5-0.8B",
      "processor_name_or_path": "Qwen/Qwen3.5-0.8B",
      "trust_remote_code": true,
      "max_seq_length": 1024,
      "image_resolution": 336
    },
    "train": {
      "name": "lora_ft_fast",
      "seed": 42,
      "mixed_precision": "bf16",
      "batch_size": 8,
      "grad_accumulation_steps": 1,
      "num_train_epochs": 1.0,
      "learning_rate": 0.0002,
      "weight_decay": 0.01,
      "warmup_ratio": 0.03,
      "lr_scheduler_type": "cosine",
      "max_grad_norm": 1.0,
      "lora": {
        "r": 16,
        "alpha": 32,
        "dropout": 0.05,
        "target_modules": [
          "q_proj",
          "k_proj",
          "v_proj",
          "o_proj"
        ]
      },
      "checkpoint": {
        "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/lora_ft_fast",
        "save_strategy": "steps",
        "save_steps": 250,
        "eval_steps": 250,
        "save_total_limit": 5,
        "load_best_model_at_end": true,
        "metric_for_best_model": "eval_loss",
        "greater_is_better": false
      },
      "logging": {
        "logging_steps": 5,
        "report_to": [
          "tensorboard",
          "wandb"
        ]
      }
    },
    "eval": {
      "batch_size": 16,
      "max_new_tokens": 32
    },
    "inference": {
      "batch_size": 8,
      "max_new_tokens": 64
    },
    "run": {
      "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/runs/gqa_ru_qwen35_0_8b_lora_fast_v1",
      "checkpoint_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/gqa_ru_qwen35_0_8b_lora_fast_v1",
      "eval_metric": "accuracy"
    },
    "raw_paths": {
      "data_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/data/gqa_ru.yaml",
      "model_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/model/qwen35_0_8b_fast.yaml",
      "train_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/train/lora_ft_fast.yaml",
      "eval_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/eval/default.yaml",
      "inference_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/inference/default.yaml"
    }
  },
  "models": {
    "base": {
      "samples": 50.0,
      "answer_tokens": 121.0,
      "answer_loss": 5.196026565614811,
      "answer_perplexity": 180.55339760224993,
      "eval_runtime": 55.80415617499966,
      "samples_per_second": 0.8959906112226113,
      "exact_match": 0.18,
      "token_f1": 0.2,
      "predictions_path": "runs/gqa_ru_qwen35_0_8b_lora_fast_v1/eval_val_50_generate/val_base_predictions.jsonl"
    },
    "adapter": {
      "samples": 50.0,
      "answer_tokens": 121.0,
      "answer_loss": 2.629122363634346,
      "answer_perplexity": 13.861599118677365,
      "eval_runtime": 71.44314903100167,
      "samples_per_second": 0.6998571686461253,
      "exact_match": 0.36,
      "token_f1": 0.36,
      "predictions_path": "runs/gqa_ru_qwen35_0_8b_lora_fast_v1/eval_val_50_generate/val_adapter_predictions.jsonl"
    }
  },
  "comparison": {
    "answer_loss_delta_adapter_minus_base": -2.566904201980465,
    "answer_loss_relative_improvement": 0.4940129095888755,
    "exact_match_delta_adapter_minus_base": 0.18
  }
}