{
  "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1",
  "split": "val",
  "max_samples": 200,
  "generate_predictions": false,
  "config": {
    "experiment_name": "gqa_ru_qwen35_0_8b_lora_fast_v1",
    "seed": 42,
    "data": {
      "name": "gqa_ru",
      "train_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/train.jsonl",
      "val_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/val.jsonl",
      "test_path": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/test.jsonl",
      "image_root": "/home/lockr/projects/VK_education_vllm/data/gqa_ru/images",
      "question_field": "question",
      "answer_field": "answer",
      "metric_primary": "accuracy"
    },
    "model": {
      "name": "qwen35_0_8b_fast",
      "model_name_or_path": "Qwen/Qwen3.5-0.8B",
      "processor_name_or_path": "Qwen/Qwen3.5-0.8B",
      "trust_remote_code": true,
      "max_seq_length": 1024,
      "image_resolution": 336
    },
    "train": {
      "name": "lora_ft_fast",
      "seed": 42,
      "mixed_precision": "bf16",
      "batch_size": 8,
      "grad_accumulation_steps": 1,
      "num_train_epochs": 1.0,
      "learning_rate": 0.0002,
      "weight_decay": 0.01,
      "warmup_ratio": 0.03,
      "lr_scheduler_type": "cosine",
      "max_grad_norm": 1.0,
      "lora": {
        "r": 16,
        "alpha": 32,
        "dropout": 0.05,
        "target_modules": [
          "q_proj",
          "k_proj",
          "v_proj",
          "o_proj"
        ]
      },
      "checkpoint": {
        "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/lora_ft_fast",
        "save_strategy": "steps",
        "save_steps": 250,
        "eval_steps": 250,
        "save_total_limit": 5,
        "load_best_model_at_end": true,
        "metric_for_best_model": "eval_loss",
        "greater_is_better": false
      },
      "logging": {
        "logging_steps": 5,
        "report_to": [
          "tensorboard",
          "wandb"
        ]
      }
    },
    "eval": {
      "batch_size": 16,
      "max_new_tokens": 32
    },
    "inference": {
      "batch_size": 8,
      "max_new_tokens": 64
    },
    "run": {
      "output_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/runs/gqa_ru_qwen35_0_8b_lora_fast_v1",
      "checkpoint_dir": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/checkpoints/gqa_ru_qwen35_0_8b_lora_fast_v1",
      "eval_metric": "accuracy"
    },
    "raw_paths": {
      "data_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/data/gqa_ru.yaml",
      "model_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/model/qwen35_0_8b_fast.yaml",
      "train_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/train/lora_ft_fast.yaml",
      "eval_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/eval/default.yaml",
      "inference_config": "/mnt/c/users/doly2/.codex/worktrees/1137ba20-2e76-4ec5-9fd7-afc51f663806/vk_education_vllm/configs/inference/default.yaml"
    }
  },
  "models": {
    "base": {
      "samples": 200.0,
      "answer_tokens": 496.0,
      "answer_loss": 5.169734188625889,
      "answer_perplexity": 175.8680835335284,
      "eval_runtime": 14.529228995001176,
      "samples_per_second": 13.765355344651157,
      "predictions_path": ""
    },
    "adapter": {
      "samples": 200.0,
      "answer_tokens": 496.0,
      "answer_loss": 2.53404495023912,
      "answer_perplexity": 12.604387280790764,
      "eval_runtime": 11.7076815229957,
      "samples_per_second": 17.082801544197203,
      "predictions_path": ""
    }
  },
  "comparison": {
    "answer_loss_delta_adapter_minus_base": -2.635689238386769,
    "answer_loss_relative_improvement": 0.5098307073863952
  }
}