# 1. 开始训练
llamafactory-cli train /home/LLaMA-Factory/examples/train_qlora/mistral_14b_rm_lora.yaml
# 2. 奖励模型开始打分
# python /home/LLaMA-Factory/myscripts/rm_infer_wzw.py \
# --input_path /home/LLaMA-Factory/data/new_byranw_v1.json \
# --output_path /home/LLaMA-Factory/data/new_new_byranw_v1.json \
# --reward_model_path /home/LLaMA-Factory/model/rm_v1_cx \
# --batch_size 4 \
# --fp16

# 2. 训练完成后，调用 API 停止实例
curl --request POST \
  --url https://api.ppinfra.com/gpu-instance/openapi/v1/gpu/instance/stop \
  --header "Authorization: Bearer sk_BXypUUFtanaH9nwIXilww2I8IKxEXnEFLS-dqBpTJ54" \
  --header "Content-Type: application/json" \
  --data '{"instanceId":"fc6c52f1e155fcb9"}'