# MLX-LM LoRA config for fine-tuning VibeThinker-3B into a bug-bounty triage model. model: "WeiboAI/VibeThinker-3B" train: true # Data dir must contain train.jsonl and valid.jsonl (chat / messages format). data: "data/sft" fine_tune_type: lora optimizer: adamw seed: 13 # Apply LoRA to all 36 transformer blocks for richer adaptation (M5 Max has headroom). num_layers: 36 batch_size: 8 iters: 2000 max_seq_length: 2048 grad_checkpoint: true learning_rate: 1.0e-4 # Only learn from the assistant turn, not the (long) system+report prompt. mask_prompt: true steps_per_report: 10 steps_per_eval: 200 val_batches: 25 save_every: 200 adapter_path: "adapters" lora_parameters: keys: - "self_attn.q_proj" - "self_attn.k_proj" - "self_attn.v_proj" - "self_attn.o_proj" - "mlp.gate_proj" - "mlp.up_proj" - "mlp.down_proj" rank: 16 scale: 20.0 dropout: 0.0