# MLX-LM LoRA config for fine-tuning VibeThinker-3B into a bug-bounty triage model.
model: "WeiboAI/VibeThinker-3B"
train: true

# Data dir must contain train.jsonl and valid.jsonl (chat / messages format).
data: "data/sft"

fine_tune_type: lora
optimizer: adamw
seed: 13

# Apply LoRA to all 36 transformer blocks for richer adaptation (M5 Max has headroom).
num_layers: 36

batch_size: 8
iters: 2000
max_seq_length: 2048
grad_checkpoint: true

learning_rate: 1.0e-4
# Only learn from the assistant turn, not the (long) system+report prompt.
mask_prompt: true

steps_per_report: 10
steps_per_eval: 200
val_batches: 25
save_every: 200

adapter_path: "adapters"

lora_parameters:
  keys:
    - "self_attn.q_proj"
    - "self_attn.k_proj"
    - "self_attn.v_proj"
    - "self_attn.o_proj"
    - "mlp.gate_proj"
    - "mlp.up_proj"
    - "mlp.down_proj"
  rank: 16
  scale: 20.0
  dropout: 0.0