{ "model_name_or_path": "Qwen/Qwen3-8B", "dataset_name": "nraptisss/TMF921-intent-to-config-research-sota", "train_split": "train_sota", "eval_split": "validation", "output_dir": "/home/user/work/Pepe2/tmf921-intent-training/runs/qwen3-8b-qlora-20260501-083834/outputs/adapter", "hub_model_id": "nraptisss/Qwen3-8B-TMF921-Intent-QLoRA-qwen3-8b-qlora-20260501-083834", "max_length": 2048, "packing": false, "assistant_only_loss": true, "dataset_num_proc": 8, "load_in_4bit": true, "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true, "lora_r": 64, "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_modules": "all-linear", "epochs": 2, "learning_rate": 0.0002, "lr_scheduler_type": "constant", "warmup_steps": 0, "weight_decay": 0.0, "max_grad_norm": 0.3, "per_device_train_batch_size": 2, "gradient_accumulation_steps": 8, "per_device_eval_batch_size": 2, "bf16": true, "gradient_checkpointing": true, "optim": "paged_adamw_32bit", "logging_steps": 10, "eval_steps": 250, "save_steps": 250, "save_total_limit": 3, "run_name": "qwen3-8b-qlora-20260501-083834", "project": null, "trackio_space_id": null, "push_to_hub": true, "eval_splits": [ "test_in_distribution", "test_template_ood", "test_use_case_ood", "test_sector_ood", "test_adversarial" ], "generation_max_new_tokens": 2048, "generation_temperature": 0.0, "generation_top_p": 1.0, "eval_max_samples_per_split": null }