{ "compute_tier": "T4", "base_model": "unsloth/Qwen2.5-3B-bnb-4bit", "beta": 0.1, "lr": 5e-07, "epochs": 1, "final_train_loss": 0.7901351070404052, "end_chosen_reward": -0.699219024181366, "end_rejected_reward": -0.8914896249771118, "end_reward_gap": 0.19227060079574587 }