# 1. 开始训练 llamafactory-cli train /home/LLaMA-Factory/examples/train_qlora/mistral_14b_rm_lora.yaml # 2. 奖励模型开始打分 # python /home/LLaMA-Factory/myscripts/rm_infer_wzw.py \ # --input_path /home/LLaMA-Factory/data/new_byranw_v1.json \ # --output_path /home/LLaMA-Factory/data/new_new_byranw_v1.json \ # --reward_model_path /home/LLaMA-Factory/model/rm_v1_cx \ # --batch_size 4 \ # --fp16 # 2. 训练完成后,调用 API 停止实例 curl --request POST \ --url https://api.ppinfra.com/gpu-instance/openapi/v1/gpu/instance/stop \ --header "Authorization: Bearer sk_BXypUUFtanaH9nwIXilww2I8IKxEXnEFLS-dqBpTJ54" \ --header "Content-Type: application/json" \ --data '{"instanceId":"fc6c52f1e155fcb9"}'