macmacmacmac
/

VibeThinker-3B-BugBounty-Triage

+# MLX-LM LoRA config for fine-tuning VibeThinker-3B into a bug-bounty triage model.
+model: "WeiboAI/VibeThinker-3B"
+train: true
+# Data dir must contain train.jsonl and valid.jsonl (chat / messages format).
+data: "data/sft"
+fine_tune_type: lora
+optimizer: adamw
+seed: 13
+# Apply LoRA to all 36 transformer blocks for richer adaptation (M5 Max has headroom).
+num_layers: 36
+batch_size: 8
+iters: 2000
+max_seq_length: 2048
+grad_checkpoint: true
+learning_rate: 1.0e-4
+# Only learn from the assistant turn, not the (long) system+report prompt.
+mask_prompt: true
+steps_per_report: 10
+steps_per_eval: 200
+val_batches: 25
+save_every: 200
+adapter_path: "adapters"
+lora_parameters:
+  keys:
+    - "self_attn.q_proj"
+    - "self_attn.k_proj"
+    - "self_attn.v_proj"
+    - "self_attn.o_proj"
+    - "mlp.gate_proj"
+    - "mlp.up_proj"
+    - "mlp.down_proj"
+  rank: 16
+  scale: 20.0
+  dropout: 0.0