{
  "model_name_or_path": "Qwen/Qwen3-8B",
  "dataset_name": "nraptisss/TMF921-intent-to-config-research-sota",
  "train_split": "train_sota",
  "eval_split": "validation",
  "output_dir": "/home/user/work/Pepe2/tmf921-intent-training/runs/qwen3-8b-qlora-20260501-083834/outputs/adapter",
  "hub_model_id": "nraptisss/Qwen3-8B-TMF921-Intent-QLoRA-qwen3-8b-qlora-20260501-083834",
  "max_length": 2048,
  "packing": false,
  "assistant_only_loss": true,
  "dataset_num_proc": 8,
  "load_in_4bit": true,
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "lora_r": 64,
  "lora_alpha": 16,
  "lora_dropout": 0.05,
  "lora_target_modules": "all-linear",
  "epochs": 2,
  "learning_rate": 0.0002,
  "lr_scheduler_type": "constant",
  "warmup_steps": 0,
  "weight_decay": 0.0,
  "max_grad_norm": 0.3,
  "per_device_train_batch_size": 2,
  "gradient_accumulation_steps": 8,
  "per_device_eval_batch_size": 2,
  "bf16": true,
  "gradient_checkpointing": true,
  "optim": "paged_adamw_32bit",
  "logging_steps": 10,
  "eval_steps": 250,
  "save_steps": 250,
  "save_total_limit": 3,
  "run_name": "qwen3-8b-qlora-20260501-083834",
  "project": null,
  "trackio_space_id": null,
  "push_to_hub": true,
  "eval_splits": [
    "test_in_distribution",
    "test_template_ood",
    "test_use_case_ood",
    "test_sector_ood",
    "test_adversarial"
  ],
  "generation_max_new_tokens": 2048,
  "generation_temperature": 0.0,
  "generation_top_p": 1.0,
  "eval_max_samples_per_split": null
}