# Recommended single-GPU RTX 6000 Ada 48/50GB recipe. # Based on QLoRA + TRL SFTTrainer. Dataset rows are verified to fit 2048 tokens with Qwen3. model_name_or_path: Qwen/Qwen3-8B dataset_name: nraptisss/TMF921-intent-to-config-research-sota train_split: train_sota eval_split: validation output_dir: outputs/qwen3-8b-tmf921-qlora hub_model_id: nraptisss/Qwen3-8B-TMF921-Intent-QLoRA-ResearchSOTA # Sequence/data max_length: 2048 packing: false # safer default. Set true only after installing flash-attn successfully. assistant_only_loss: true dataset_num_proc: 8 # QLoRA load_in_4bit: true bnb_4bit_quant_type: nf4 bnb_4bit_use_double_quant: true lora_r: 64 lora_alpha: 16 lora_dropout: 0.05 lora_target_modules: all-linear # Optimization epochs: 2 learning_rate: 0.0002 lr_scheduler_type: constant warmup_steps: 0 weight_decay: 0.0 max_grad_norm: 0.3 per_device_train_batch_size: 2 gradient_accumulation_steps: 8 per_device_eval_batch_size: 2 bf16: true gradient_checkpointing: true optim: paged_adamw_32bit # Logging/eval/checkpointing logging_steps: 10 eval_steps: 250 save_steps: 250 save_total_limit: 3 run_name: qwen3-8b-tmf921-qlora-r64 project: tmf921-intent-sft trackio_space_id: null # e.g. nraptisss/tmf921-trackio push_to_hub: true # Generation eval after training eval_splits: - test_in_distribution - test_template_ood - test_use_case_ood - test_sector_ood - test_adversarial generation_max_new_tokens: 2048 generation_temperature: 0.0 generation_top_p: 1.0 eval_max_samples_per_split: null