# Recommended single-GPU RTX 6000 Ada 48/50GB recipe.
# Based on QLoRA + TRL SFTTrainer. Dataset rows are verified to fit 2048 tokens with Qwen3.
model_name_or_path: Qwen/Qwen3-8B
dataset_name: nraptisss/TMF921-intent-to-config-research-sota
train_split: train_sota
eval_split: validation
output_dir: outputs/qwen3-8b-tmf921-qlora
hub_model_id: nraptisss/Qwen3-8B-TMF921-Intent-QLoRA-ResearchSOTA

# Sequence/data
max_length: 2048
packing: false                 # safer default. Set true only after installing flash-attn successfully.
assistant_only_loss: true
dataset_num_proc: 8

# QLoRA
load_in_4bit: true
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: true
lora_r: 64
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules: all-linear

# Optimization
epochs: 2
learning_rate: 0.0002
lr_scheduler_type: constant
warmup_steps: 0
weight_decay: 0.0
max_grad_norm: 0.3
per_device_train_batch_size: 2
gradient_accumulation_steps: 8
per_device_eval_batch_size: 2
bf16: true
gradient_checkpointing: true
optim: paged_adamw_32bit

# Logging/eval/checkpointing
logging_steps: 10
eval_steps: 250
save_steps: 250
save_total_limit: 3
run_name: qwen3-8b-tmf921-qlora-r64
project: tmf921-intent-sft
trackio_space_id: null         # e.g. nraptisss/tmf921-trackio
push_to_hub: true

# Generation eval after training
eval_splits:
  - test_in_distribution
  - test_template_ood
  - test_use_case_ood
  - test_sector_ood
  - test_adversarial
generation_max_new_tokens: 2048
generation_temperature: 0.0
generation_top_p: 1.0
eval_max_samples_per_split: null