Text Generation
Transformers
Safetensors
qwen3
alignment-handbook
Generated from Trainer
conversational
text-generation-inference
Instructions to use vukien2301/qwen3-0.6b-wpo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use vukien2301/qwen3-0.6b-wpo with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="vukien2301/qwen3-0.6b-wpo") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("vukien2301/qwen3-0.6b-wpo") model = AutoModelForMultimodalLM.from_pretrained("vukien2301/qwen3-0.6b-wpo") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use vukien2301/qwen3-0.6b-wpo with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "vukien2301/qwen3-0.6b-wpo" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vukien2301/qwen3-0.6b-wpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/vukien2301/qwen3-0.6b-wpo
- SGLang
How to use vukien2301/qwen3-0.6b-wpo with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "vukien2301/qwen3-0.6b-wpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vukien2301/qwen3-0.6b-wpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "vukien2301/qwen3-0.6b-wpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vukien2301/qwen3-0.6b-wpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use vukien2301/qwen3-0.6b-wpo with Docker Model Runner:
docker model run hf.co/vukien2301/qwen3-0.6b-wpo
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1803, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005546311702717693, | |
| "grad_norm": 2.562516450881958, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 0.5525987148284912, | |
| "logits/rejected": 0.8582919239997864, | |
| "logps/chosen": -280.66448974609375, | |
| "logps/rejected": -198.26028442382812, | |
| "loss": 0.2528, | |
| "loss/chosen-sft": 1.1612701416015625, | |
| "loss/dpo": 0.2528122663497925, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.00369430985301733, | |
| "rewards/margins": -0.003086067270487547, | |
| "rewards/rejected": -0.0006082424661144614, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0027731558513588465, | |
| "grad_norm": 1.8309712409973145, | |
| "learning_rate": 1.1049723756906076e-08, | |
| "logits/chosen": 0.6242256164550781, | |
| "logits/rejected": 0.7471870183944702, | |
| "logps/chosen": -365.79705810546875, | |
| "logps/rejected": -280.31201171875, | |
| "loss": 0.2317, | |
| "loss/chosen-sft": 1.3244354724884033, | |
| "loss/dpo": 0.23169046640396118, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.00040742673445492983, | |
| "rewards/margins": -0.0013081450015306473, | |
| "rewards/rejected": 0.0009007179760374129, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005546311702717693, | |
| "grad_norm": 2.4382247924804688, | |
| "learning_rate": 2.486187845303867e-08, | |
| "logits/chosen": 0.649553656578064, | |
| "logits/rejected": 0.8831444978713989, | |
| "logps/chosen": -284.4432067871094, | |
| "logps/rejected": -248.49337768554688, | |
| "loss": 0.2305, | |
| "loss/chosen-sft": 1.1865930557250977, | |
| "loss/dpo": 0.23052072525024414, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.0007243253057822585, | |
| "rewards/margins": -5.907495506107807e-06, | |
| "rewards/rejected": -0.0007184178684838116, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008319467554076539, | |
| "grad_norm": 2.216749668121338, | |
| "learning_rate": 3.867403314917127e-08, | |
| "logits/chosen": 0.7230191826820374, | |
| "logits/rejected": 0.9627832174301147, | |
| "logps/chosen": -302.41400146484375, | |
| "logps/rejected": -264.18963623046875, | |
| "loss": 0.2317, | |
| "loss/chosen-sft": 1.27865731716156, | |
| "loss/dpo": 0.23168079555034637, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": 0.000250987388426438, | |
| "rewards/margins": 0.0003366722376085818, | |
| "rewards/rejected": -8.568489283788949e-05, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.011092623405435386, | |
| "grad_norm": 2.429425001144409, | |
| "learning_rate": 5.248618784530386e-08, | |
| "logits/chosen": 0.7443245649337769, | |
| "logits/rejected": 0.9213382601737976, | |
| "logps/chosen": -296.67926025390625, | |
| "logps/rejected": -233.2224578857422, | |
| "loss": 0.2435, | |
| "loss/chosen-sft": 1.2470526695251465, | |
| "loss/dpo": 0.24353936314582825, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.000928783614654094, | |
| "rewards/margins": 0.0006695252959616482, | |
| "rewards/rejected": 0.00025925817317329347, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013865779256794232, | |
| "grad_norm": 2.4279961585998535, | |
| "learning_rate": 6.629834254143646e-08, | |
| "logits/chosen": 0.8054523468017578, | |
| "logits/rejected": 1.0713064670562744, | |
| "logps/chosen": -301.8001403808594, | |
| "logps/rejected": -265.5172424316406, | |
| "loss": 0.2257, | |
| "loss/chosen-sft": 1.2987916469573975, | |
| "loss/dpo": 0.22568467259407043, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0008613772806711495, | |
| "rewards/margins": -5.407678145274986e-06, | |
| "rewards/rejected": -0.0008559696143493056, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016638935108153077, | |
| "grad_norm": 1.9755202531814575, | |
| "learning_rate": 8.011049723756906e-08, | |
| "logits/chosen": 0.8093868494033813, | |
| "logits/rejected": 0.8907510042190552, | |
| "logps/chosen": -317.31488037109375, | |
| "logps/rejected": -238.03701782226562, | |
| "loss": 0.2252, | |
| "loss/chosen-sft": 1.2658107280731201, | |
| "loss/dpo": 0.225176140666008, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.0001813205162761733, | |
| "rewards/margins": -0.0007404539501294494, | |
| "rewards/rejected": 0.0005591334775090218, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.019412090959511925, | |
| "grad_norm": 2.0302202701568604, | |
| "learning_rate": 9.392265193370165e-08, | |
| "logits/chosen": 0.7306667566299438, | |
| "logits/rejected": 0.9124480485916138, | |
| "logps/chosen": -326.53631591796875, | |
| "logps/rejected": -278.6661682128906, | |
| "loss": 0.2324, | |
| "loss/chosen-sft": 1.2703967094421387, | |
| "loss/dpo": 0.23235292732715607, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -9.17384386411868e-05, | |
| "rewards/margins": 0.00045947995386086404, | |
| "rewards/rejected": -0.0005512182251550257, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.022185246810870772, | |
| "grad_norm": 2.4935853481292725, | |
| "learning_rate": 1.0773480662983425e-07, | |
| "logits/chosen": 0.8364327549934387, | |
| "logits/rejected": 0.9892117381095886, | |
| "logps/chosen": -299.20513916015625, | |
| "logps/rejected": -260.8147888183594, | |
| "loss": 0.237, | |
| "loss/chosen-sft": 1.221801519393921, | |
| "loss/dpo": 0.23703160881996155, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.00010150570597033948, | |
| "rewards/margins": 0.00015612409333698452, | |
| "rewards/rejected": -5.461848195409402e-05, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024958402662229616, | |
| "grad_norm": 2.5195465087890625, | |
| "learning_rate": 1.2154696132596685e-07, | |
| "logits/chosen": 0.7738581895828247, | |
| "logits/rejected": 1.0265463590621948, | |
| "logps/chosen": -318.85101318359375, | |
| "logps/rejected": -237.57666015625, | |
| "loss": 0.2307, | |
| "loss/chosen-sft": 1.2462958097457886, | |
| "loss/dpo": 0.23071476817131042, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -8.556898683309555e-05, | |
| "rewards/margins": 0.0009564283536747098, | |
| "rewards/rejected": -0.0010419972240924835, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.027731558513588463, | |
| "grad_norm": 2.201871871948242, | |
| "learning_rate": 1.3535911602209942e-07, | |
| "logits/chosen": 0.7524826526641846, | |
| "logits/rejected": 0.8071184158325195, | |
| "logps/chosen": -343.429443359375, | |
| "logps/rejected": -256.6566162109375, | |
| "loss": 0.2343, | |
| "loss/chosen-sft": 1.2330760955810547, | |
| "loss/dpo": 0.2342543601989746, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.00010758457938209176, | |
| "rewards/margins": 0.0004232854989822954, | |
| "rewards/rejected": -0.00031570097780786455, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03050471436494731, | |
| "grad_norm": 2.1549618244171143, | |
| "learning_rate": 1.4917127071823204e-07, | |
| "logits/chosen": 0.6793915033340454, | |
| "logits/rejected": 0.9533787965774536, | |
| "logps/chosen": -286.3639221191406, | |
| "logps/rejected": -248.7386932373047, | |
| "loss": 0.2127, | |
| "loss/chosen-sft": 1.2288844585418701, | |
| "loss/dpo": 0.21267767250537872, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.0002672958071343601, | |
| "rewards/margins": 0.0003136277082376182, | |
| "rewards/rejected": -4.6331912017194554e-05, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.033277870216306155, | |
| "grad_norm": 2.341963291168213, | |
| "learning_rate": 1.6298342541436463e-07, | |
| "logits/chosen": 0.7014708518981934, | |
| "logits/rejected": 1.0665433406829834, | |
| "logps/chosen": -310.38134765625, | |
| "logps/rejected": -242.14944458007812, | |
| "loss": 0.2263, | |
| "loss/chosen-sft": 1.2344342470169067, | |
| "loss/dpo": 0.2262984812259674, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": 0.0001726890041027218, | |
| "rewards/margins": 0.000270530057605356, | |
| "rewards/rejected": -9.784109715837985e-05, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.036051026067665005, | |
| "grad_norm": 2.35201096534729, | |
| "learning_rate": 1.7679558011049722e-07, | |
| "logits/chosen": 0.7712376117706299, | |
| "logits/rejected": 0.9752995371818542, | |
| "logps/chosen": -323.18377685546875, | |
| "logps/rejected": -256.13909912109375, | |
| "loss": 0.2288, | |
| "loss/chosen-sft": 1.301064133644104, | |
| "loss/dpo": 0.228795126080513, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": 0.0009808921022340655, | |
| "rewards/margins": 0.000625107903033495, | |
| "rewards/rejected": 0.0003557841700967401, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03882418191902385, | |
| "grad_norm": 2.6178271770477295, | |
| "learning_rate": 1.9060773480662984e-07, | |
| "logits/chosen": 0.7605900168418884, | |
| "logits/rejected": 0.8606400489807129, | |
| "logps/chosen": -351.55023193359375, | |
| "logps/rejected": -263.1789855957031, | |
| "loss": 0.2374, | |
| "loss/chosen-sft": 1.300065040588379, | |
| "loss/dpo": 0.23744645714759827, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0018242821097373962, | |
| "rewards/margins": 0.0014699746388942003, | |
| "rewards/rejected": 0.0003543071507010609, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04159733777038269, | |
| "grad_norm": 1.9824270009994507, | |
| "learning_rate": 2.0441988950276244e-07, | |
| "logits/chosen": 0.8183063268661499, | |
| "logits/rejected": 1.056434988975525, | |
| "logps/chosen": -304.6941833496094, | |
| "logps/rejected": -246.3820343017578, | |
| "loss": 0.2305, | |
| "loss/chosen-sft": 1.2456873655319214, | |
| "loss/dpo": 0.23054738342761993, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0013592742616310716, | |
| "rewards/margins": 0.0002732311259023845, | |
| "rewards/rejected": 0.0010860430775210261, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.044370493621741544, | |
| "grad_norm": 2.122025489807129, | |
| "learning_rate": 2.1823204419889503e-07, | |
| "logits/chosen": 0.7255276441574097, | |
| "logits/rejected": 0.9549322128295898, | |
| "logps/chosen": -319.54302978515625, | |
| "logps/rejected": -265.33941650390625, | |
| "loss": 0.2322, | |
| "loss/chosen-sft": 1.2196282148361206, | |
| "loss/dpo": 0.23221342265605927, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.002971996320411563, | |
| "rewards/margins": 0.002218194305896759, | |
| "rewards/rejected": 0.0007538016652688384, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04714364947310039, | |
| "grad_norm": 2.24088454246521, | |
| "learning_rate": 2.320441988950276e-07, | |
| "logits/chosen": 0.9074276685714722, | |
| "logits/rejected": 0.8777278661727905, | |
| "logps/chosen": -311.9112243652344, | |
| "logps/rejected": -235.16073608398438, | |
| "loss": 0.2216, | |
| "loss/chosen-sft": 1.3307913541793823, | |
| "loss/dpo": 0.22161617875099182, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.0005854293704032898, | |
| "rewards/margins": 0.00022579837241210043, | |
| "rewards/rejected": 0.0003596309688873589, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.04991680532445923, | |
| "grad_norm": 2.139037609100342, | |
| "learning_rate": 2.4585635359116024e-07, | |
| "logits/chosen": 0.5329002141952515, | |
| "logits/rejected": 0.8354529142379761, | |
| "logps/chosen": -297.3415832519531, | |
| "logps/rejected": -252.3900604248047, | |
| "loss": 0.2337, | |
| "loss/chosen-sft": 1.1595966815948486, | |
| "loss/dpo": 0.23370489478111267, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.002842614660039544, | |
| "rewards/margins": 0.003834578674286604, | |
| "rewards/rejected": -0.0009919643634930253, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05268996117581808, | |
| "grad_norm": 2.091688632965088, | |
| "learning_rate": 2.596685082872928e-07, | |
| "logits/chosen": 0.743569016456604, | |
| "logits/rejected": 0.9253543615341187, | |
| "logps/chosen": -319.1153564453125, | |
| "logps/rejected": -271.44854736328125, | |
| "loss": 0.2319, | |
| "loss/chosen-sft": 1.252418041229248, | |
| "loss/dpo": 0.2319067418575287, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": 0.0031713570933789015, | |
| "rewards/margins": 0.002358622383326292, | |
| "rewards/rejected": 0.0008127348264679313, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05546311702717693, | |
| "grad_norm": 2.084585189819336, | |
| "learning_rate": 2.734806629834254e-07, | |
| "logits/chosen": 0.7346712350845337, | |
| "logits/rejected": 0.8320924639701843, | |
| "logps/chosen": -297.8607482910156, | |
| "logps/rejected": -247.6679229736328, | |
| "loss": 0.2305, | |
| "loss/chosen-sft": 1.1974223852157593, | |
| "loss/dpo": 0.23046079277992249, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.0038714460097253323, | |
| "rewards/margins": 0.004991450812667608, | |
| "rewards/rejected": -0.001120004802942276, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05823627287853577, | |
| "grad_norm": 2.2226288318634033, | |
| "learning_rate": 2.87292817679558e-07, | |
| "logits/chosen": 0.7988497614860535, | |
| "logits/rejected": 1.012565016746521, | |
| "logps/chosen": -322.70849609375, | |
| "logps/rejected": -248.58609008789062, | |
| "loss": 0.2288, | |
| "loss/chosen-sft": 1.2408934831619263, | |
| "loss/dpo": 0.228750079870224, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.006251711398363113, | |
| "rewards/margins": 0.007590452674776316, | |
| "rewards/rejected": -0.001338740810751915, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06100942872989462, | |
| "grad_norm": 2.297593593597412, | |
| "learning_rate": 3.011049723756906e-07, | |
| "logits/chosen": 0.726061224937439, | |
| "logits/rejected": 1.0324876308441162, | |
| "logps/chosen": -281.13897705078125, | |
| "logps/rejected": -251.3507537841797, | |
| "loss": 0.2497, | |
| "loss/chosen-sft": 1.165907859802246, | |
| "loss/dpo": 0.24974966049194336, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0033826460130512714, | |
| "rewards/margins": 0.0045662978664040565, | |
| "rewards/rejected": -0.0011836517369374633, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06378258458125347, | |
| "grad_norm": 1.887861728668213, | |
| "learning_rate": 3.149171270718232e-07, | |
| "logits/chosen": 0.7413018345832825, | |
| "logits/rejected": 1.1185897588729858, | |
| "logps/chosen": -276.33709716796875, | |
| "logps/rejected": -222.5193328857422, | |
| "loss": 0.2343, | |
| "loss/chosen-sft": 1.2043964862823486, | |
| "loss/dpo": 0.23427948355674744, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 0.006615237798541784, | |
| "rewards/margins": 0.009055422618985176, | |
| "rewards/rejected": -0.002440184820443392, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06655574043261231, | |
| "grad_norm": 1.9467929601669312, | |
| "learning_rate": 3.287292817679558e-07, | |
| "logits/chosen": 0.779278039932251, | |
| "logits/rejected": 1.0071234703063965, | |
| "logps/chosen": -309.67266845703125, | |
| "logps/rejected": -277.6393127441406, | |
| "loss": 0.2353, | |
| "loss/chosen-sft": 1.250942587852478, | |
| "loss/dpo": 0.23532333970069885, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.005773581098765135, | |
| "rewards/margins": 0.008276325650513172, | |
| "rewards/rejected": -0.0025027443189173937, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06932889628397115, | |
| "grad_norm": 1.7528764009475708, | |
| "learning_rate": 3.425414364640884e-07, | |
| "logits/chosen": 0.7105622291564941, | |
| "logits/rejected": 0.9455775022506714, | |
| "logps/chosen": -247.330810546875, | |
| "logps/rejected": -234.4990692138672, | |
| "loss": 0.2187, | |
| "loss/chosen-sft": 1.204803466796875, | |
| "loss/dpo": 0.21866345405578613, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.005182682536542416, | |
| "rewards/margins": 0.00711778411641717, | |
| "rewards/rejected": -0.0019351018127053976, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07210205213533001, | |
| "grad_norm": 2.430612802505493, | |
| "learning_rate": 3.56353591160221e-07, | |
| "logits/chosen": 0.8638172149658203, | |
| "logits/rejected": 1.0668808221817017, | |
| "logps/chosen": -282.2446594238281, | |
| "logps/rejected": -227.82461547851562, | |
| "loss": 0.2437, | |
| "loss/chosen-sft": 1.202839970588684, | |
| "loss/dpo": 0.243655726313591, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.0073145912028849125, | |
| "rewards/margins": 0.013259527273476124, | |
| "rewards/rejected": -0.005944933742284775, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07487520798668885, | |
| "grad_norm": 2.054070472717285, | |
| "learning_rate": 3.7016574585635355e-07, | |
| "logits/chosen": 0.6850544214248657, | |
| "logits/rejected": 0.9055711627006531, | |
| "logps/chosen": -329.90472412109375, | |
| "logps/rejected": -257.50933837890625, | |
| "loss": 0.232, | |
| "loss/chosen-sft": 1.2098791599273682, | |
| "loss/dpo": 0.2320254147052765, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.008226691745221615, | |
| "rewards/margins": 0.014872360043227673, | |
| "rewards/rejected": -0.006645667366683483, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.0776483638380477, | |
| "grad_norm": 2.211216688156128, | |
| "learning_rate": 3.8397790055248617e-07, | |
| "logits/chosen": 0.7939187288284302, | |
| "logits/rejected": 0.9478033185005188, | |
| "logps/chosen": -318.32879638671875, | |
| "logps/rejected": -252.41796875, | |
| "loss": 0.2326, | |
| "loss/chosen-sft": 1.2694523334503174, | |
| "loss/dpo": 0.2325749397277832, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 0.010530084371566772, | |
| "rewards/margins": 0.015220420435070992, | |
| "rewards/rejected": -0.004690336063504219, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08042151968940654, | |
| "grad_norm": 2.2831854820251465, | |
| "learning_rate": 3.9779005524861873e-07, | |
| "logits/chosen": 0.7261658906936646, | |
| "logits/rejected": 0.7962777614593506, | |
| "logps/chosen": -322.5554504394531, | |
| "logps/rejected": -283.42431640625, | |
| "loss": 0.2576, | |
| "loss/chosen-sft": 1.2340948581695557, | |
| "loss/dpo": 0.25764793157577515, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.0055245086550712585, | |
| "rewards/margins": 0.013468381948769093, | |
| "rewards/rejected": -0.00794387236237526, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08319467554076539, | |
| "grad_norm": 2.4128849506378174, | |
| "learning_rate": 4.1160220994475135e-07, | |
| "logits/chosen": 0.715286374092102, | |
| "logits/rejected": 0.9870149493217468, | |
| "logps/chosen": -293.37115478515625, | |
| "logps/rejected": -232.06796264648438, | |
| "loss": 0.2465, | |
| "loss/chosen-sft": 1.1868611574172974, | |
| "loss/dpo": 0.2465299814939499, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.008405817672610283, | |
| "rewards/margins": 0.021539855748414993, | |
| "rewards/rejected": -0.013134037144482136, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08596783139212424, | |
| "grad_norm": 2.3715765476226807, | |
| "learning_rate": 4.2541436464088397e-07, | |
| "logits/chosen": 0.7736817598342896, | |
| "logits/rejected": 0.842314600944519, | |
| "logps/chosen": -321.31622314453125, | |
| "logps/rejected": -244.69161987304688, | |
| "loss": 0.2485, | |
| "loss/chosen-sft": 1.2318105697631836, | |
| "loss/dpo": 0.24850551784038544, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.011977704241871834, | |
| "rewards/margins": 0.02396266534924507, | |
| "rewards/rejected": -0.011984961107373238, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.08874098724348309, | |
| "grad_norm": 2.3416831493377686, | |
| "learning_rate": 4.3922651933701654e-07, | |
| "logits/chosen": 0.7620590925216675, | |
| "logits/rejected": 1.0964124202728271, | |
| "logps/chosen": -287.9859313964844, | |
| "logps/rejected": -227.1605682373047, | |
| "loss": 0.2379, | |
| "loss/chosen-sft": 1.2391362190246582, | |
| "loss/dpo": 0.23787717521190643, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.008434845134615898, | |
| "rewards/margins": 0.027770137414336205, | |
| "rewards/rejected": -0.019335290417075157, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09151414309484193, | |
| "grad_norm": 2.5888402462005615, | |
| "learning_rate": 4.5303867403314916e-07, | |
| "logits/chosen": 0.7178218960762024, | |
| "logits/rejected": 1.0630967617034912, | |
| "logps/chosen": -300.853759765625, | |
| "logps/rejected": -226.2397918701172, | |
| "loss": 0.2489, | |
| "loss/chosen-sft": 1.2291874885559082, | |
| "loss/dpo": 0.2488756626844406, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 0.0051612951792776585, | |
| "rewards/margins": 0.02443910576403141, | |
| "rewards/rejected": -0.01927780732512474, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09428729894620078, | |
| "grad_norm": 2.380986213684082, | |
| "learning_rate": 4.668508287292817e-07, | |
| "logits/chosen": 0.769342303276062, | |
| "logits/rejected": 1.0527831315994263, | |
| "logps/chosen": -301.633056640625, | |
| "logps/rejected": -271.94549560546875, | |
| "loss": 0.2273, | |
| "loss/chosen-sft": 1.2724863290786743, | |
| "loss/dpo": 0.22729595005512238, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.005723125766962767, | |
| "rewards/margins": 0.024905353784561157, | |
| "rewards/rejected": -0.019182229414582253, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09706045479755962, | |
| "grad_norm": 2.346064329147339, | |
| "learning_rate": 4.806629834254143e-07, | |
| "logits/chosen": 0.7390652894973755, | |
| "logits/rejected": 1.0698118209838867, | |
| "logps/chosen": -336.51434326171875, | |
| "logps/rejected": -264.5447692871094, | |
| "loss": 0.2329, | |
| "loss/chosen-sft": 1.2428892850875854, | |
| "loss/dpo": 0.2328546941280365, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.013324853964149952, | |
| "rewards/margins": 0.037267185747623444, | |
| "rewards/rejected": -0.023942332714796066, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09983361064891846, | |
| "grad_norm": 2.174849271774292, | |
| "learning_rate": 4.944751381215469e-07, | |
| "logits/chosen": 0.6287063360214233, | |
| "logits/rejected": 0.839179515838623, | |
| "logps/chosen": -281.9019470214844, | |
| "logps/rejected": -246.0726318359375, | |
| "loss": 0.2247, | |
| "loss/chosen-sft": 1.2559473514556885, | |
| "loss/dpo": 0.22472822666168213, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.0028144109528511763, | |
| "rewards/margins": 0.03012235462665558, | |
| "rewards/rejected": -0.03293676674365997, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10260676650027732, | |
| "grad_norm": 2.126122236251831, | |
| "learning_rate": 4.999957796414774e-07, | |
| "logits/chosen": 0.7778046727180481, | |
| "logits/rejected": 0.8593734502792358, | |
| "logps/chosen": -305.8876037597656, | |
| "logps/rejected": -231.57009887695312, | |
| "loss": 0.2087, | |
| "loss/chosen-sft": 1.2814704179763794, | |
| "loss/dpo": 0.20866894721984863, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0015260865911841393, | |
| "rewards/margins": 0.032867975533008575, | |
| "rewards/rejected": -0.03439406305551529, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.10537992235163617, | |
| "grad_norm": 2.394864082336426, | |
| "learning_rate": 4.999699890776339e-07, | |
| "logits/chosen": 0.7574223279953003, | |
| "logits/rejected": 0.981839656829834, | |
| "logps/chosen": -307.5397033691406, | |
| "logps/rejected": -260.81689453125, | |
| "loss": 0.2244, | |
| "loss/chosen-sft": 1.2601521015167236, | |
| "loss/dpo": 0.22438263893127441, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.001994546502828598, | |
| "rewards/margins": 0.03860156610608101, | |
| "rewards/rejected": -0.040596116334199905, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.10815307820299501, | |
| "grad_norm": 2.5312979221343994, | |
| "learning_rate": 4.999207550094137e-07, | |
| "logits/chosen": 0.5832860469818115, | |
| "logits/rejected": 0.9633687138557434, | |
| "logps/chosen": -320.2137145996094, | |
| "logps/rejected": -255.04135131835938, | |
| "loss": 0.2567, | |
| "loss/chosen-sft": 1.1643339395523071, | |
| "loss/dpo": 0.25674429535865784, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.009322636760771275, | |
| "rewards/margins": 0.04288625717163086, | |
| "rewards/rejected": -0.05220889300107956, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11092623405435385, | |
| "grad_norm": 2.321089506149292, | |
| "learning_rate": 4.998480820542476e-07, | |
| "logits/chosen": 0.5820528268814087, | |
| "logits/rejected": 0.7807351350784302, | |
| "logps/chosen": -270.94195556640625, | |
| "logps/rejected": -220.6343231201172, | |
| "loss": 0.2267, | |
| "loss/chosen-sft": 1.1440722942352295, | |
| "loss/dpo": 0.22673571109771729, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.0010490523418411613, | |
| "rewards/margins": 0.05040453001856804, | |
| "rewards/rejected": -0.05145358294248581, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1136993899057127, | |
| "grad_norm": 2.525728940963745, | |
| "learning_rate": 4.997519770277884e-07, | |
| "logits/chosen": 0.44963234663009644, | |
| "logits/rejected": 0.9135919809341431, | |
| "logps/chosen": -272.36224365234375, | |
| "logps/rejected": -245.6114044189453, | |
| "loss": 0.2211, | |
| "loss/chosen-sft": 1.1812119483947754, | |
| "loss/dpo": 0.22114984691143036, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.022441323846578598, | |
| "rewards/margins": 0.04137767106294632, | |
| "rewards/rejected": -0.06381900608539581, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.11647254575707154, | |
| "grad_norm": 2.5504720211029053, | |
| "learning_rate": 4.99632448943273e-07, | |
| "logits/chosen": 0.7174805998802185, | |
| "logits/rejected": 1.01616632938385, | |
| "logps/chosen": -292.91656494140625, | |
| "logps/rejected": -247.65097045898438, | |
| "loss": 0.2198, | |
| "loss/chosen-sft": 1.2257840633392334, | |
| "loss/dpo": 0.2198173552751541, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.025929760187864304, | |
| "rewards/margins": 0.05272764712572098, | |
| "rewards/rejected": -0.07865741103887558, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1192457016084304, | |
| "grad_norm": 2.452855110168457, | |
| "learning_rate": 4.994895090106754e-07, | |
| "logits/chosen": 0.6810121536254883, | |
| "logits/rejected": 1.0060815811157227, | |
| "logps/chosen": -282.73846435546875, | |
| "logps/rejected": -267.6331481933594, | |
| "loss": 0.222, | |
| "loss/chosen-sft": 1.192631483078003, | |
| "loss/dpo": 0.2220323085784912, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.02360682748258114, | |
| "rewards/margins": 0.048061199486255646, | |
| "rewards/rejected": -0.07166802138090134, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.12201885745978924, | |
| "grad_norm": 2.1740057468414307, | |
| "learning_rate": 4.993231706356567e-07, | |
| "logits/chosen": 0.6147283911705017, | |
| "logits/rejected": 0.8315596580505371, | |
| "logps/chosen": -336.437255859375, | |
| "logps/rejected": -253.25320434570312, | |
| "loss": 0.2107, | |
| "loss/chosen-sft": 1.323418378829956, | |
| "loss/dpo": 0.21066264808177948, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.03315647691488266, | |
| "rewards/margins": 0.057157598435878754, | |
| "rewards/rejected": -0.09031407535076141, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12479201331114809, | |
| "grad_norm": 2.743816375732422, | |
| "learning_rate": 4.991334494183074e-07, | |
| "logits/chosen": 0.6287509202957153, | |
| "logits/rejected": 0.9196675419807434, | |
| "logps/chosen": -317.734375, | |
| "logps/rejected": -269.40240478515625, | |
| "loss": 0.2176, | |
| "loss/chosen-sft": 1.2351751327514648, | |
| "loss/dpo": 0.21760547161102295, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.026932349428534508, | |
| "rewards/margins": 0.06921146810054779, | |
| "rewards/rejected": -0.09614382684230804, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12756516916250693, | |
| "grad_norm": 2.0664100646972656, | |
| "learning_rate": 4.989203631516842e-07, | |
| "logits/chosen": 0.5628304481506348, | |
| "logits/rejected": 0.9230527877807617, | |
| "logps/chosen": -290.1402282714844, | |
| "logps/rejected": -230.798828125, | |
| "loss": 0.2193, | |
| "loss/chosen-sft": 1.1874725818634033, | |
| "loss/dpo": 0.21933284401893616, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.04623774066567421, | |
| "rewards/margins": 0.04349591210484505, | |
| "rewards/rejected": -0.08973364531993866, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13033832501386577, | |
| "grad_norm": 2.657771587371826, | |
| "learning_rate": 4.986839318201412e-07, | |
| "logits/chosen": 0.5794273018836975, | |
| "logits/rejected": 0.8498503565788269, | |
| "logps/chosen": -283.7383117675781, | |
| "logps/rejected": -270.8831787109375, | |
| "loss": 0.2113, | |
| "loss/chosen-sft": 1.2316051721572876, | |
| "loss/dpo": 0.2112795114517212, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.04904647916555405, | |
| "rewards/margins": 0.06235337257385254, | |
| "rewards/rejected": -0.11139985173940659, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.13311148086522462, | |
| "grad_norm": 2.395413875579834, | |
| "learning_rate": 4.984241775974562e-07, | |
| "logits/chosen": 0.6039844751358032, | |
| "logits/rejected": 0.9748314619064331, | |
| "logps/chosen": -343.57330322265625, | |
| "logps/rejected": -282.4112548828125, | |
| "loss": 0.2046, | |
| "loss/chosen-sft": 1.2928663492202759, | |
| "loss/dpo": 0.20455893874168396, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.05615786463022232, | |
| "rewards/margins": 0.057152897119522095, | |
| "rewards/rejected": -0.11331076920032501, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13588463671658346, | |
| "grad_norm": 2.410879373550415, | |
| "learning_rate": 4.981411248447506e-07, | |
| "logits/chosen": 0.5458131432533264, | |
| "logits/rejected": 0.7987755537033081, | |
| "logps/chosen": -320.3564453125, | |
| "logps/rejected": -274.52349853515625, | |
| "loss": 0.2127, | |
| "loss/chosen-sft": 1.2482545375823975, | |
| "loss/dpo": 0.21267366409301758, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.05543135479092598, | |
| "rewards/margins": 0.07835234701633453, | |
| "rewards/rejected": -0.13378369808197021, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1386577925679423, | |
| "grad_norm": 2.126710891723633, | |
| "learning_rate": 4.978348001082048e-07, | |
| "logits/chosen": 0.5291799306869507, | |
| "logits/rejected": 0.7740551829338074, | |
| "logps/chosen": -305.97369384765625, | |
| "logps/rejected": -258.32623291015625, | |
| "loss": 0.1986, | |
| "loss/chosen-sft": 1.317022442817688, | |
| "loss/dpo": 0.19859442114830017, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.056786395609378815, | |
| "rewards/margins": 0.06887609511613846, | |
| "rewards/rejected": -0.12566249072551727, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14143094841930118, | |
| "grad_norm": 2.286869525909424, | |
| "learning_rate": 4.975052321165688e-07, | |
| "logits/chosen": 0.4996607303619385, | |
| "logits/rejected": 0.8591309785842896, | |
| "logps/chosen": -315.6490783691406, | |
| "logps/rejected": -246.1661834716797, | |
| "loss": 0.2036, | |
| "loss/chosen-sft": 1.2581641674041748, | |
| "loss/dpo": 0.2035764753818512, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.0983358696103096, | |
| "rewards/margins": 0.06135006994009018, | |
| "rewards/rejected": -0.15968593955039978, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.14420410427066002, | |
| "grad_norm": 2.3020002841949463, | |
| "learning_rate": 4.971524517784676e-07, | |
| "logits/chosen": 0.6534574627876282, | |
| "logits/rejected": 0.9783307313919067, | |
| "logps/chosen": -315.9989318847656, | |
| "logps/rejected": -300.7061767578125, | |
| "loss": 0.1887, | |
| "loss/chosen-sft": 1.3003921508789062, | |
| "loss/dpo": 0.18872812390327454, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.0870845764875412, | |
| "rewards/margins": 0.09078534692525864, | |
| "rewards/rejected": -0.17786994576454163, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14697726012201887, | |
| "grad_norm": 2.2956702709198, | |
| "learning_rate": 4.967764921795026e-07, | |
| "logits/chosen": 0.4762963354587555, | |
| "logits/rejected": 0.7974573373794556, | |
| "logps/chosen": -320.36944580078125, | |
| "logps/rejected": -253.922119140625, | |
| "loss": 0.1965, | |
| "loss/chosen-sft": 1.2646160125732422, | |
| "loss/dpo": 0.19646450877189636, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.10069359838962555, | |
| "rewards/margins": 0.07882802188396454, | |
| "rewards/rejected": -0.1795216202735901, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.1497504159733777, | |
| "grad_norm": 2.5840351581573486, | |
| "learning_rate": 4.963773885791484e-07, | |
| "logits/chosen": 0.46905121207237244, | |
| "logits/rejected": 0.6983851194381714, | |
| "logps/chosen": -319.40997314453125, | |
| "logps/rejected": -264.07574462890625, | |
| "loss": 0.2039, | |
| "loss/chosen-sft": 1.2846953868865967, | |
| "loss/dpo": 0.2038877308368683, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.14658531546592712, | |
| "rewards/margins": 0.03527539223432541, | |
| "rewards/rejected": -0.18186071515083313, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15252357182473655, | |
| "grad_norm": 2.3462185859680176, | |
| "learning_rate": 4.959551784074461e-07, | |
| "logits/chosen": 0.5223512053489685, | |
| "logits/rejected": 0.673526406288147, | |
| "logps/chosen": -353.185546875, | |
| "logps/rejected": -287.7161865234375, | |
| "loss": 0.1814, | |
| "loss/chosen-sft": 1.3422802686691284, | |
| "loss/dpo": 0.18135803937911987, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.11449305713176727, | |
| "rewards/margins": 0.09264171123504639, | |
| "rewards/rejected": -0.20713476836681366, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1552967276760954, | |
| "grad_norm": 2.2738516330718994, | |
| "learning_rate": 4.955099012614933e-07, | |
| "logits/chosen": 0.589896023273468, | |
| "logits/rejected": 0.8485240936279297, | |
| "logps/chosen": -340.37811279296875, | |
| "logps/rejected": -278.1029968261719, | |
| "loss": 0.1715, | |
| "loss/chosen-sft": 1.3321316242218018, | |
| "loss/dpo": 0.17148999869823456, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.12517675757408142, | |
| "rewards/margins": 0.09786481410264969, | |
| "rewards/rejected": -0.2230415791273117, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15806988352745424, | |
| "grad_norm": 2.3262434005737305, | |
| "learning_rate": 4.9504159890173e-07, | |
| "logits/chosen": 0.5216140747070312, | |
| "logits/rejected": 0.760550320148468, | |
| "logps/chosen": -334.56842041015625, | |
| "logps/rejected": -273.66644287109375, | |
| "loss": 0.1817, | |
| "loss/chosen-sft": 1.3172911405563354, | |
| "loss/dpo": 0.18172022700309753, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.15752620995044708, | |
| "rewards/margins": 0.09383035451173782, | |
| "rewards/rejected": -0.2513565421104431, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16084303937881309, | |
| "grad_norm": 2.1715376377105713, | |
| "learning_rate": 4.945503152480221e-07, | |
| "logits/chosen": 0.4839824140071869, | |
| "logits/rejected": 0.7215232849121094, | |
| "logps/chosen": -348.7749938964844, | |
| "logps/rejected": -296.0406188964844, | |
| "loss": 0.1779, | |
| "loss/chosen-sft": 1.3408844470977783, | |
| "loss/dpo": 0.17787811160087585, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.13581573963165283, | |
| "rewards/margins": 0.10350732505321503, | |
| "rewards/rejected": -0.23932304978370667, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16361619523017193, | |
| "grad_norm": 1.9679666757583618, | |
| "learning_rate": 4.940360963755426e-07, | |
| "logits/chosen": 0.31268057227134705, | |
| "logits/rejected": 0.45721864700317383, | |
| "logps/chosen": -306.3217468261719, | |
| "logps/rejected": -259.17010498046875, | |
| "loss": 0.168, | |
| "loss/chosen-sft": 1.3279306888580322, | |
| "loss/dpo": 0.16799867153167725, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.14948979020118713, | |
| "rewards/margins": 0.11183127015829086, | |
| "rewards/rejected": -0.2613210082054138, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.16638935108153077, | |
| "grad_norm": 2.079015016555786, | |
| "learning_rate": 4.934989905104502e-07, | |
| "logits/chosen": 0.38664117455482483, | |
| "logits/rejected": 0.5652385354042053, | |
| "logps/chosen": -327.77191162109375, | |
| "logps/rejected": -252.3622589111328, | |
| "loss": 0.1615, | |
| "loss/chosen-sft": 1.3207409381866455, | |
| "loss/dpo": 0.1614522486925125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.15085890889167786, | |
| "rewards/margins": 0.1116364598274231, | |
| "rewards/rejected": -0.26249536871910095, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16916250693288962, | |
| "grad_norm": 1.7469545602798462, | |
| "learning_rate": 4.929390480253667e-07, | |
| "logits/chosen": 0.4214795231819153, | |
| "logits/rejected": 0.5547307729721069, | |
| "logps/chosen": -345.3471984863281, | |
| "logps/rejected": -296.81610107421875, | |
| "loss": 0.1597, | |
| "loss/chosen-sft": 1.3892717361450195, | |
| "loss/dpo": 0.15971598029136658, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.1656077653169632, | |
| "rewards/margins": 0.13407020270824432, | |
| "rewards/rejected": -0.2996779680252075, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1719356627842485, | |
| "grad_norm": 2.5472302436828613, | |
| "learning_rate": 4.923563214346525e-07, | |
| "logits/chosen": 0.2179243117570877, | |
| "logits/rejected": 0.5859929323196411, | |
| "logps/chosen": -359.1659240722656, | |
| "logps/rejected": -323.3959045410156, | |
| "loss": 0.1693, | |
| "loss/chosen-sft": 1.3262033462524414, | |
| "loss/dpo": 0.16931983828544617, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.1735382229089737, | |
| "rewards/margins": 0.17630478739738464, | |
| "rewards/rejected": -0.34984302520751953, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.17470881863560733, | |
| "grad_norm": 1.927839756011963, | |
| "learning_rate": 4.917508653894817e-07, | |
| "logits/chosen": 0.2809959352016449, | |
| "logits/rejected": 0.6429028511047363, | |
| "logps/chosen": -306.2704162597656, | |
| "logps/rejected": -285.570556640625, | |
| "loss": 0.1554, | |
| "loss/chosen-sft": 1.3090779781341553, | |
| "loss/dpo": 0.15539118647575378, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.19202136993408203, | |
| "rewards/margins": 0.13031750917434692, | |
| "rewards/rejected": -0.32233884930610657, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.17748197448696618, | |
| "grad_norm": 1.4578968286514282, | |
| "learning_rate": 4.911227366727166e-07, | |
| "logits/chosen": 0.3806017339229584, | |
| "logits/rejected": 0.646203339099884, | |
| "logps/chosen": -343.3798828125, | |
| "logps/rejected": -279.1783752441406, | |
| "loss": 0.1326, | |
| "loss/chosen-sft": 1.4282009601593018, | |
| "loss/dpo": 0.1326414793729782, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.20244988799095154, | |
| "rewards/margins": 0.11332936584949493, | |
| "rewards/rejected": -0.31577926874160767, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18025513033832502, | |
| "grad_norm": 2.2041287422180176, | |
| "learning_rate": 4.904719941935818e-07, | |
| "logits/chosen": 0.3938923478126526, | |
| "logits/rejected": 0.5521587133407593, | |
| "logps/chosen": -330.3848876953125, | |
| "logps/rejected": -284.7240905761719, | |
| "loss": 0.1679, | |
| "loss/chosen-sft": 1.335784673690796, | |
| "loss/dpo": 0.16794539988040924, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.2126191109418869, | |
| "rewards/margins": 0.12985429167747498, | |
| "rewards/rejected": -0.34247341752052307, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.18302828618968386, | |
| "grad_norm": 2.1968367099761963, | |
| "learning_rate": 4.897986989821405e-07, | |
| "logits/chosen": 0.27726611495018005, | |
| "logits/rejected": 0.42112284898757935, | |
| "logps/chosen": -304.1949462890625, | |
| "logps/rejected": -284.40496826171875, | |
| "loss": 0.1461, | |
| "loss/chosen-sft": 1.3867931365966797, | |
| "loss/dpo": 0.1461399793624878, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.24662432074546814, | |
| "rewards/margins": 0.13204023241996765, | |
| "rewards/rejected": -0.3786645829677582, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1858014420410427, | |
| "grad_norm": 1.8825585842132568, | |
| "learning_rate": 4.891029141835697e-07, | |
| "logits/chosen": 0.320846825838089, | |
| "logits/rejected": 0.5570532083511353, | |
| "logps/chosen": -327.5889587402344, | |
| "logps/rejected": -277.48101806640625, | |
| "loss": 0.1485, | |
| "loss/chosen-sft": 1.314846396446228, | |
| "loss/dpo": 0.1484725922346115, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.23770102858543396, | |
| "rewards/margins": 0.13969966769218445, | |
| "rewards/rejected": -0.3774007260799408, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.18857459789240155, | |
| "grad_norm": 2.134202718734741, | |
| "learning_rate": 4.883847050522388e-07, | |
| "logits/chosen": 0.32603126764297485, | |
| "logits/rejected": 0.5855879783630371, | |
| "logps/chosen": -306.82940673828125, | |
| "logps/rejected": -306.5520324707031, | |
| "loss": 0.1386, | |
| "loss/chosen-sft": 1.3495581150054932, | |
| "loss/dpo": 0.138578861951828, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.25742700695991516, | |
| "rewards/margins": 0.12370799481868744, | |
| "rewards/rejected": -0.3811350464820862, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1913477537437604, | |
| "grad_norm": 1.7875416278839111, | |
| "learning_rate": 4.876441389455892e-07, | |
| "logits/chosen": 0.24733588099479675, | |
| "logits/rejected": 0.45660385489463806, | |
| "logps/chosen": -326.81988525390625, | |
| "logps/rejected": -290.23175048828125, | |
| "loss": 0.1307, | |
| "loss/chosen-sft": 1.372218132019043, | |
| "loss/dpo": 0.130675807595253, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.24057714641094208, | |
| "rewards/margins": 0.166746586561203, | |
| "rewards/rejected": -0.4073237478733063, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.19412090959511924, | |
| "grad_norm": 1.9340115785598755, | |
| "learning_rate": 4.868812853178174e-07, | |
| "logits/chosen": 0.2431308925151825, | |
| "logits/rejected": 0.5894955992698669, | |
| "logps/chosen": -318.0989990234375, | |
| "logps/rejected": -283.68896484375, | |
| "loss": 0.1256, | |
| "loss/chosen-sft": 1.354346513748169, | |
| "loss/dpo": 0.12558932602405548, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.2501407861709595, | |
| "rewards/margins": 0.15478388965129852, | |
| "rewards/rejected": -0.4049246907234192, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19689406544647808, | |
| "grad_norm": 2.164641857147217, | |
| "learning_rate": 4.860962157133614e-07, | |
| "logits/chosen": 0.042165856808423996, | |
| "logits/rejected": 0.2751336395740509, | |
| "logps/chosen": -312.33636474609375, | |
| "logps/rejected": -293.5239562988281, | |
| "loss": 0.1378, | |
| "loss/chosen-sft": 1.307612657546997, | |
| "loss/dpo": 0.1378055065870285, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2775038182735443, | |
| "rewards/margins": 0.1561960130929947, | |
| "rewards/rejected": -0.4336997866630554, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.19966722129783693, | |
| "grad_norm": 1.9318678379058838, | |
| "learning_rate": 4.852890037601906e-07, | |
| "logits/chosen": 0.16851834952831268, | |
| "logits/rejected": 0.38687795400619507, | |
| "logps/chosen": -339.43377685546875, | |
| "logps/rejected": -299.70611572265625, | |
| "loss": 0.1384, | |
| "loss/chosen-sft": 1.4045716524124146, | |
| "loss/dpo": 0.1384141445159912, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.2960866689682007, | |
| "rewards/margins": 0.16027973592281342, | |
| "rewards/rejected": -0.4563663899898529, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20244037714919577, | |
| "grad_norm": 1.874963402748108, | |
| "learning_rate": 4.844597251629008e-07, | |
| "logits/chosen": 0.21792784333229065, | |
| "logits/rejected": 0.3441501259803772, | |
| "logps/chosen": -325.2286682128906, | |
| "logps/rejected": -281.23638916015625, | |
| "loss": 0.1352, | |
| "loss/chosen-sft": 1.3615696430206299, | |
| "loss/dpo": 0.13523094356060028, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.3180539608001709, | |
| "rewards/margins": 0.1318327933549881, | |
| "rewards/rejected": -0.4498867392539978, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.20521353300055464, | |
| "grad_norm": 2.0710604190826416, | |
| "learning_rate": 4.836084576956137e-07, | |
| "logits/chosen": 0.3437557816505432, | |
| "logits/rejected": 0.5105153322219849, | |
| "logps/chosen": -339.3221435546875, | |
| "logps/rejected": -306.34954833984375, | |
| "loss": 0.1202, | |
| "loss/chosen-sft": 1.4008045196533203, | |
| "loss/dpo": 0.12022168934345245, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.295442670583725, | |
| "rewards/margins": 0.2085741013288498, | |
| "rewards/rejected": -0.5040167570114136, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2079866888519135, | |
| "grad_norm": 2.0409181118011475, | |
| "learning_rate": 4.827352811946839e-07, | |
| "logits/chosen": 0.10601860284805298, | |
| "logits/rejected": 0.19340696930885315, | |
| "logps/chosen": -363.87677001953125, | |
| "logps/rejected": -313.70306396484375, | |
| "loss": 0.1444, | |
| "loss/chosen-sft": 1.4021615982055664, | |
| "loss/dpo": 0.14439384639263153, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.3728375732898712, | |
| "rewards/margins": 0.1328699290752411, | |
| "rewards/rejected": -0.5057075023651123, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21075984470327233, | |
| "grad_norm": 1.8490561246871948, | |
| "learning_rate": 4.818402775512101e-07, | |
| "logits/chosen": 0.12518136203289032, | |
| "logits/rejected": 0.1990843415260315, | |
| "logps/chosen": -330.71282958984375, | |
| "logps/rejected": -289.8016052246094, | |
| "loss": 0.1272, | |
| "loss/chosen-sft": 1.3432165384292603, | |
| "loss/dpo": 0.1271766871213913, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.35159942507743835, | |
| "rewards/margins": 0.19611182808876038, | |
| "rewards/rejected": -0.5477112531661987, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21353300055463117, | |
| "grad_norm": 2.0964889526367188, | |
| "learning_rate": 4.80923530703356e-07, | |
| "logits/chosen": 0.10008511692285538, | |
| "logits/rejected": 0.3669296205043793, | |
| "logps/chosen": -345.01715087890625, | |
| "logps/rejected": -335.6979064941406, | |
| "loss": 0.1178, | |
| "loss/chosen-sft": 1.375633955001831, | |
| "loss/dpo": 0.1178009957075119, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.33166831731796265, | |
| "rewards/margins": 0.257099449634552, | |
| "rewards/rejected": -0.5887677073478699, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.21630615640599002, | |
| "grad_norm": 1.3537511825561523, | |
| "learning_rate": 4.799851266284776e-07, | |
| "logits/chosen": 0.029593368992209435, | |
| "logits/rejected": 0.2654074728488922, | |
| "logps/chosen": -345.6829528808594, | |
| "logps/rejected": -295.1934814453125, | |
| "loss": 0.1097, | |
| "loss/chosen-sft": 1.4121724367141724, | |
| "loss/dpo": 0.10970698297023773, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.36835533380508423, | |
| "rewards/margins": 0.22947156429290771, | |
| "rewards/rejected": -0.5978268980979919, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.21907931225734886, | |
| "grad_norm": 2.3625447750091553, | |
| "learning_rate": 4.790251533350597e-07, | |
| "logits/chosen": 0.10083159059286118, | |
| "logits/rejected": 0.31363213062286377, | |
| "logps/chosen": -332.98846435546875, | |
| "logps/rejected": -296.0115051269531, | |
| "loss": 0.1227, | |
| "loss/chosen-sft": 1.3798365592956543, | |
| "loss/dpo": 0.12265870720148087, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.4115685820579529, | |
| "rewards/margins": 0.17796972393989563, | |
| "rewards/rejected": -0.5895382761955261, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2218524681087077, | |
| "grad_norm": 1.8092533349990845, | |
| "learning_rate": 4.780437008544628e-07, | |
| "logits/chosen": 0.0437210276722908, | |
| "logits/rejected": 0.28447720408439636, | |
| "logps/chosen": -328.55364990234375, | |
| "logps/rejected": -299.6891174316406, | |
| "loss": 0.1123, | |
| "loss/chosen-sft": 1.4356980323791504, | |
| "loss/dpo": 0.11227130889892578, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3866044580936432, | |
| "rewards/margins": 0.21935506165027618, | |
| "rewards/rejected": -0.6059595346450806, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.22462562396006655, | |
| "grad_norm": 1.755149245262146, | |
| "learning_rate": 4.770408612324783e-07, | |
| "logits/chosen": 0.06074788048863411, | |
| "logits/rejected": 0.29286664724349976, | |
| "logps/chosen": -342.78314208984375, | |
| "logps/rejected": -324.9090270996094, | |
| "loss": 0.1229, | |
| "loss/chosen-sft": 1.3950598239898682, | |
| "loss/dpo": 0.12285208702087402, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.40826615691185, | |
| "rewards/margins": 0.23281535506248474, | |
| "rewards/rejected": -0.6410815119743347, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2273987798114254, | |
| "grad_norm": 1.7682974338531494, | |
| "learning_rate": 4.760167285206968e-07, | |
| "logits/chosen": 0.03603815287351608, | |
| "logits/rejected": 0.14143748581409454, | |
| "logps/chosen": -357.19000244140625, | |
| "logps/rejected": -312.27984619140625, | |
| "loss": 0.1099, | |
| "loss/chosen-sft": 1.4361236095428467, | |
| "loss/dpo": 0.10990427434444427, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.4452105462551117, | |
| "rewards/margins": 0.2284567803144455, | |
| "rewards/rejected": -0.673667311668396, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.23017193566278424, | |
| "grad_norm": 1.7545676231384277, | |
| "learning_rate": 4.749713987676871e-07, | |
| "logits/chosen": 0.060189586132764816, | |
| "logits/rejected": 0.1871364414691925, | |
| "logps/chosen": -355.28924560546875, | |
| "logps/rejected": -317.2358093261719, | |
| "loss": 0.1093, | |
| "loss/chosen-sft": 1.3852781057357788, | |
| "loss/dpo": 0.10928022861480713, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.43528875708580017, | |
| "rewards/margins": 0.2679973840713501, | |
| "rewards/rejected": -0.7032861709594727, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.23294509151414308, | |
| "grad_norm": 1.7819030284881592, | |
| "learning_rate": 4.7390497000998853e-07, | |
| "logits/chosen": -0.06483317911624908, | |
| "logits/rejected": 0.0744047462940216, | |
| "logps/chosen": -327.4217834472656, | |
| "logps/rejected": -300.5835876464844, | |
| "loss": 0.1031, | |
| "loss/chosen-sft": 1.4440373182296753, | |
| "loss/dpo": 0.10313411056995392, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.420682817697525, | |
| "rewards/margins": 0.2728646397590637, | |
| "rewards/rejected": -0.6935475468635559, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.23571824736550195, | |
| "grad_norm": 1.5001357793807983, | |
| "learning_rate": 4.7281754226291627e-07, | |
| "logits/chosen": -0.01330060325562954, | |
| "logits/rejected": 0.23702244460582733, | |
| "logps/chosen": -346.0107116699219, | |
| "logps/rejected": -335.9864501953125, | |
| "loss": 0.0908, | |
| "loss/chosen-sft": 1.3927044868469238, | |
| "loss/dpo": 0.09075666218996048, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.4789350628852844, | |
| "rewards/margins": 0.313282310962677, | |
| "rewards/rejected": -0.7922172546386719, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2384914032168608, | |
| "grad_norm": 1.6260863542556763, | |
| "learning_rate": 4.717092175111814e-07, | |
| "logits/chosen": 0.0023514986969530582, | |
| "logits/rejected": 0.2060910165309906, | |
| "logps/chosen": -372.6448059082031, | |
| "logps/rejected": -335.29534912109375, | |
| "loss": 0.101, | |
| "loss/chosen-sft": 1.4546012878417969, | |
| "loss/dpo": 0.10103006660938263, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.5173962712287903, | |
| "rewards/margins": 0.24449630081653595, | |
| "rewards/rejected": -0.7618924975395203, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.24126455906821964, | |
| "grad_norm": 1.8368723392486572, | |
| "learning_rate": 4.7058009969932666e-07, | |
| "logits/chosen": -0.010248428210616112, | |
| "logits/rejected": 0.10792305320501328, | |
| "logps/chosen": -364.9273376464844, | |
| "logps/rejected": -310.1815490722656, | |
| "loss": 0.104, | |
| "loss/chosen-sft": 1.4473296403884888, | |
| "loss/dpo": 0.10400193929672241, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.5129069089889526, | |
| "rewards/margins": 0.27995672821998596, | |
| "rewards/rejected": -0.792863667011261, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.24403771491957849, | |
| "grad_norm": 1.4362239837646484, | |
| "learning_rate": 4.694302947219775e-07, | |
| "logits/chosen": -0.04999478533864021, | |
| "logits/rejected": 0.15826420485973358, | |
| "logps/chosen": -371.3370361328125, | |
| "logps/rejected": -349.93511962890625, | |
| "loss": 0.0862, | |
| "loss/chosen-sft": 1.5128648281097412, | |
| "loss/dpo": 0.08617158234119415, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.5207723379135132, | |
| "rewards/margins": 0.3013128638267517, | |
| "rewards/rejected": -0.8220852017402649, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.24681087077093733, | |
| "grad_norm": 1.7123464345932007, | |
| "learning_rate": 4.6825991041391067e-07, | |
| "logits/chosen": -0.131384015083313, | |
| "logits/rejected": 0.14633068442344666, | |
| "logps/chosen": -360.64166259765625, | |
| "logps/rejected": -330.9405822753906, | |
| "loss": 0.0844, | |
| "loss/chosen-sft": 1.433370590209961, | |
| "loss/dpo": 0.08443091064691544, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.49853143095970154, | |
| "rewards/margins": 0.3446124196052551, | |
| "rewards/rejected": -0.8431438207626343, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.24958402662229617, | |
| "grad_norm": 1.4186034202575684, | |
| "learning_rate": 4.670690565399415e-07, | |
| "logits/chosen": -0.17499125003814697, | |
| "logits/rejected": 0.10050486028194427, | |
| "logps/chosen": -378.93157958984375, | |
| "logps/rejected": -318.13885498046875, | |
| "loss": 0.0908, | |
| "loss/chosen-sft": 1.4939839839935303, | |
| "loss/dpo": 0.09077504277229309, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.5793511271476746, | |
| "rewards/margins": 0.26338380575180054, | |
| "rewards/rejected": -0.8427349328994751, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.252357182473655, | |
| "grad_norm": 1.2995809316635132, | |
| "learning_rate": 4.65857844784629e-07, | |
| "logits/chosen": -0.1015838161110878, | |
| "logits/rejected": 0.014798527583479881, | |
| "logps/chosen": -328.8960266113281, | |
| "logps/rejected": -330.8209228515625, | |
| "loss": 0.0818, | |
| "loss/chosen-sft": 1.502540946006775, | |
| "loss/dpo": 0.08181241899728775, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.55858314037323, | |
| "rewards/margins": 0.30958643555641174, | |
| "rewards/rejected": -0.8681696057319641, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.25513033832501386, | |
| "grad_norm": 1.888208270072937, | |
| "learning_rate": 4.6462638874180173e-07, | |
| "logits/chosen": -0.0846543088555336, | |
| "logits/rejected": -0.07818257808685303, | |
| "logps/chosen": -361.853271484375, | |
| "logps/rejected": -330.44915771484375, | |
| "loss": 0.0959, | |
| "loss/chosen-sft": 1.524152398109436, | |
| "loss/dpo": 0.0959150493144989, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.630092978477478, | |
| "rewards/margins": 0.2400503158569336, | |
| "rewards/rejected": -0.8701432943344116, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2579034941763727, | |
| "grad_norm": 1.4479496479034424, | |
| "learning_rate": 4.633748039039044e-07, | |
| "logits/chosen": -0.17475584149360657, | |
| "logits/rejected": -0.1427185982465744, | |
| "logps/chosen": -379.28741455078125, | |
| "logps/rejected": -354.30877685546875, | |
| "loss": 0.0791, | |
| "loss/chosen-sft": 1.561486005783081, | |
| "loss/dpo": 0.0790865495800972, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.5844415426254272, | |
| "rewards/margins": 0.31326019763946533, | |
| "rewards/rejected": -0.8977017402648926, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.26067665002773155, | |
| "grad_norm": 2.0286715030670166, | |
| "learning_rate": 4.621032076511662e-07, | |
| "logits/chosen": -0.17609013617038727, | |
| "logits/rejected": -0.021662216633558273, | |
| "logps/chosen": -377.0018005371094, | |
| "logps/rejected": -337.9693603515625, | |
| "loss": 0.0829, | |
| "loss/chosen-sft": 1.5360634326934814, | |
| "loss/dpo": 0.08291391283273697, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.652755856513977, | |
| "rewards/margins": 0.29427844285964966, | |
| "rewards/rejected": -0.9470342397689819, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2634498058790904, | |
| "grad_norm": 1.7011218070983887, | |
| "learning_rate": 4.6081171924059245e-07, | |
| "logits/chosen": -0.164344921708107, | |
| "logits/rejected": 0.05261586979031563, | |
| "logps/chosen": -335.7785339355469, | |
| "logps/rejected": -326.3609313964844, | |
| "loss": 0.0754, | |
| "loss/chosen-sft": 1.5235470533370972, | |
| "loss/dpo": 0.07544606924057007, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.571322500705719, | |
| "rewards/margins": 0.26280707120895386, | |
| "rewards/rejected": -0.8341296315193176, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.26622296173044924, | |
| "grad_norm": 2.2942631244659424, | |
| "learning_rate": 4.5950045979478004e-07, | |
| "logits/chosen": -0.2594403624534607, | |
| "logits/rejected": -0.0034357428085058928, | |
| "logps/chosen": -354.83197021484375, | |
| "logps/rejected": -359.304443359375, | |
| "loss": 0.0882, | |
| "loss/chosen-sft": 1.4360209703445435, | |
| "loss/dpo": 0.08824630081653595, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6372642517089844, | |
| "rewards/margins": 0.3590429425239563, | |
| "rewards/rejected": -0.9963071942329407, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2689961175818081, | |
| "grad_norm": 2.1030640602111816, | |
| "learning_rate": 4.5816955229055776e-07, | |
| "logits/chosen": -0.3849255442619324, | |
| "logits/rejected": -0.1723778247833252, | |
| "logps/chosen": -331.4612731933594, | |
| "logps/rejected": -328.07037353515625, | |
| "loss": 0.0885, | |
| "loss/chosen-sft": 1.4904568195343018, | |
| "loss/dpo": 0.08851548284292221, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6340426206588745, | |
| "rewards/margins": 0.27626457810401917, | |
| "rewards/rejected": -0.9103072881698608, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2717692734331669, | |
| "grad_norm": 1.9212195873260498, | |
| "learning_rate": 4.56819121547453e-07, | |
| "logits/chosen": -0.13772639632225037, | |
| "logits/rejected": -0.04014406353235245, | |
| "logps/chosen": -368.6828918457031, | |
| "logps/rejected": -321.24365234375, | |
| "loss": 0.0897, | |
| "loss/chosen-sft": 1.4933507442474365, | |
| "loss/dpo": 0.08970335870981216, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.7113620638847351, | |
| "rewards/margins": 0.1993238627910614, | |
| "rewards/rejected": -0.9106858968734741, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.27454242928452577, | |
| "grad_norm": 1.695456624031067, | |
| "learning_rate": 4.554492942159855e-07, | |
| "logits/chosen": -0.42002072930336, | |
| "logits/rejected": -0.18559008836746216, | |
| "logps/chosen": -350.0926818847656, | |
| "logps/rejected": -358.0768127441406, | |
| "loss": 0.0807, | |
| "loss/chosen-sft": 1.4628541469573975, | |
| "loss/dpo": 0.08066307753324509, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6113547086715698, | |
| "rewards/margins": 0.36685535311698914, | |
| "rewards/rejected": -0.9782101511955261, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2773155851358846, | |
| "grad_norm": 2.2545158863067627, | |
| "learning_rate": 4.540601987657893e-07, | |
| "logits/chosen": -0.27345049381256104, | |
| "logits/rejected": -0.0689864382147789, | |
| "logps/chosen": -370.7053527832031, | |
| "logps/rejected": -339.49029541015625, | |
| "loss": 0.0875, | |
| "loss/chosen-sft": 1.5028795003890991, | |
| "loss/dpo": 0.08748678863048553, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.6775597333908081, | |
| "rewards/margins": 0.23481640219688416, | |
| "rewards/rejected": -0.9123761057853699, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28008874098724346, | |
| "grad_norm": 1.761813759803772, | |
| "learning_rate": 4.5265196547356453e-07, | |
| "logits/chosen": -0.4009808599948883, | |
| "logits/rejected": -0.2716490626335144, | |
| "logps/chosen": -375.43145751953125, | |
| "logps/rejected": -348.8159484863281, | |
| "loss": 0.0836, | |
| "loss/chosen-sft": 1.5295841693878174, | |
| "loss/dpo": 0.08356954157352448, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6790491342544556, | |
| "rewards/margins": 0.30093619227409363, | |
| "rewards/rejected": -0.979985237121582, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.28286189683860236, | |
| "grad_norm": 1.7265639305114746, | |
| "learning_rate": 4.5122472641085887e-07, | |
| "logits/chosen": -0.2951423227787018, | |
| "logits/rejected": -0.06516732275485992, | |
| "logps/chosen": -327.62579345703125, | |
| "logps/rejected": -322.10516357421875, | |
| "loss": 0.0822, | |
| "loss/chosen-sft": 1.4988086223602295, | |
| "loss/dpo": 0.08221860229969025, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6168748140335083, | |
| "rewards/margins": 0.27371373772621155, | |
| "rewards/rejected": -0.8905885815620422, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2856350526899612, | |
| "grad_norm": 2.8561835289001465, | |
| "learning_rate": 4.497786154316815e-07, | |
| "logits/chosen": -0.3033020496368408, | |
| "logits/rejected": -0.1509987711906433, | |
| "logps/chosen": -405.4481506347656, | |
| "logps/rejected": -363.7564392089844, | |
| "loss": 0.0839, | |
| "loss/chosen-sft": 1.5500370264053345, | |
| "loss/dpo": 0.08386242389678955, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.7174540758132935, | |
| "rewards/margins": 0.30318009853363037, | |
| "rewards/rejected": -1.0206342935562134, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.28840820854132004, | |
| "grad_norm": 1.3990095853805542, | |
| "learning_rate": 4.483137681599495e-07, | |
| "logits/chosen": -0.4407324194908142, | |
| "logits/rejected": -0.3505704998970032, | |
| "logps/chosen": -393.1653747558594, | |
| "logps/rejected": -341.4064636230469, | |
| "loss": 0.0787, | |
| "loss/chosen-sft": 1.5403501987457275, | |
| "loss/dpo": 0.07872845977544785, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.6447398066520691, | |
| "rewards/margins": 0.4145263731479645, | |
| "rewards/rejected": -1.0592660903930664, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2911813643926789, | |
| "grad_norm": 1.8205024003982544, | |
| "learning_rate": 4.468303219767683e-07, | |
| "logits/chosen": -0.3818379044532776, | |
| "logits/rejected": -0.13233591616153717, | |
| "logps/chosen": -356.06488037109375, | |
| "logps/rejected": -356.4136047363281, | |
| "loss": 0.0827, | |
| "loss/chosen-sft": 1.497135043144226, | |
| "loss/dpo": 0.08271769434213638, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.6697732210159302, | |
| "rewards/margins": 0.2621932625770569, | |
| "rewards/rejected": -0.9319664835929871, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.29395452024403773, | |
| "grad_norm": 1.3097429275512695, | |
| "learning_rate": 4.453284160075473e-07, | |
| "logits/chosen": -0.4805290699005127, | |
| "logits/rejected": -0.3880520164966583, | |
| "logps/chosen": -391.4358825683594, | |
| "logps/rejected": -344.5740661621094, | |
| "loss": 0.0787, | |
| "loss/chosen-sft": 1.4888331890106201, | |
| "loss/dpo": 0.07873310893774033, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.7074838280677795, | |
| "rewards/margins": 0.3189099431037903, | |
| "rewards/rejected": -1.0263937711715698, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2967276760953966, | |
| "grad_norm": 1.8334152698516846, | |
| "learning_rate": 4.438081911089522e-07, | |
| "logits/chosen": -0.1407683938741684, | |
| "logits/rejected": -0.08939669281244278, | |
| "logps/chosen": -368.7952880859375, | |
| "logps/rejected": -324.58660888671875, | |
| "loss": 0.0787, | |
| "loss/chosen-sft": 1.536380410194397, | |
| "loss/dpo": 0.07865273952484131, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7058097124099731, | |
| "rewards/margins": 0.27695003151893616, | |
| "rewards/rejected": -0.9827596545219421, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.2995008319467554, | |
| "grad_norm": 1.0951838493347168, | |
| "learning_rate": 4.422697898556945e-07, | |
| "logits/chosen": -0.1452847272157669, | |
| "logits/rejected": -0.12602165341377258, | |
| "logps/chosen": -388.03533935546875, | |
| "logps/rejected": -322.50787353515625, | |
| "loss": 0.0711, | |
| "loss/chosen-sft": 1.6297862529754639, | |
| "loss/dpo": 0.07111659646034241, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6191781759262085, | |
| "rewards/margins": 0.31716564297676086, | |
| "rewards/rejected": -0.936343789100647, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.30227398779811426, | |
| "grad_norm": 1.171819806098938, | |
| "learning_rate": 4.4071335652716004e-07, | |
| "logits/chosen": -0.28522688150405884, | |
| "logits/rejected": 0.05751846358180046, | |
| "logps/chosen": -330.20245361328125, | |
| "logps/rejected": -353.6244812011719, | |
| "loss": 0.0786, | |
| "loss/chosen-sft": 1.4777370691299438, | |
| "loss/dpo": 0.07859226316213608, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.649788498878479, | |
| "rewards/margins": 0.34457841515541077, | |
| "rewards/rejected": -0.9943668246269226, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.3050471436494731, | |
| "grad_norm": 2.0230906009674072, | |
| "learning_rate": 4.391390370938777e-07, | |
| "logits/chosen": -0.2850199043750763, | |
| "logits/rejected": -0.1109178215265274, | |
| "logps/chosen": -364.53826904296875, | |
| "logps/rejected": -338.34283447265625, | |
| "loss": 0.0867, | |
| "loss/chosen-sft": 1.5412209033966064, | |
| "loss/dpo": 0.08672328293323517, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.6998955607414246, | |
| "rewards/margins": 0.25897401571273804, | |
| "rewards/rejected": -0.9588696360588074, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.30782029950083195, | |
| "grad_norm": 1.2038522958755493, | |
| "learning_rate": 4.3754697920383006e-07, | |
| "logits/chosen": -0.34611284732818604, | |
| "logits/rejected": -0.049505867063999176, | |
| "logps/chosen": -355.5877990722656, | |
| "logps/rejected": -355.5694274902344, | |
| "loss": 0.0705, | |
| "loss/chosen-sft": 1.5247033834457397, | |
| "loss/dpo": 0.0704929307103157, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6597936749458313, | |
| "rewards/margins": 0.4111254811286926, | |
| "rewards/rejected": -1.070919156074524, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3105934553521908, | |
| "grad_norm": 1.383131980895996, | |
| "learning_rate": 4.359373321686053e-07, | |
| "logits/chosen": -0.47573018074035645, | |
| "logits/rejected": -0.38598376512527466, | |
| "logps/chosen": -388.10736083984375, | |
| "logps/rejected": -381.851318359375, | |
| "loss": 0.0792, | |
| "loss/chosen-sft": 1.4690505266189575, | |
| "loss/dpo": 0.07924602925777435, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6938058137893677, | |
| "rewards/margins": 0.39269718527793884, | |
| "rewards/rejected": -1.086503028869629, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.31336661120354964, | |
| "grad_norm": 1.9160140752792358, | |
| "learning_rate": 4.343102469493947e-07, | |
| "logits/chosen": -0.3587764501571655, | |
| "logits/rejected": -0.2721293866634369, | |
| "logps/chosen": -403.26788330078125, | |
| "logps/rejected": -358.91046142578125, | |
| "loss": 0.068, | |
| "loss/chosen-sft": 1.6094681024551392, | |
| "loss/dpo": 0.06795226037502289, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7775420546531677, | |
| "rewards/margins": 0.31379085779190063, | |
| "rewards/rejected": -1.091333031654358, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3161397670549085, | |
| "grad_norm": 1.0593386888504028, | |
| "learning_rate": 4.326658761428342e-07, | |
| "logits/chosen": -0.4315316677093506, | |
| "logits/rejected": -0.2712632417678833, | |
| "logps/chosen": -381.10260009765625, | |
| "logps/rejected": -344.9892883300781, | |
| "loss": 0.0726, | |
| "loss/chosen-sft": 1.552372694015503, | |
| "loss/dpo": 0.07259351015090942, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.800487220287323, | |
| "rewards/margins": 0.31337645649909973, | |
| "rewards/rejected": -1.1138637065887451, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3189129229062673, | |
| "grad_norm": 1.4632201194763184, | |
| "learning_rate": 4.310043739666937e-07, | |
| "logits/chosen": -0.49550876021385193, | |
| "logits/rejected": -0.3280448317527771, | |
| "logps/chosen": -401.83856201171875, | |
| "logps/rejected": -399.66729736328125, | |
| "loss": 0.0516, | |
| "loss/chosen-sft": 1.647662878036499, | |
| "loss/dpo": 0.05160484462976456, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.8304476737976074, | |
| "rewards/margins": 0.4837685227394104, | |
| "rewards/rejected": -1.314216136932373, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.32168607875762617, | |
| "grad_norm": 1.8497745990753174, | |
| "learning_rate": 4.2932589624541296e-07, | |
| "logits/chosen": -0.3687261641025543, | |
| "logits/rejected": -0.27867016196250916, | |
| "logps/chosen": -376.83660888671875, | |
| "logps/rejected": -353.30670166015625, | |
| "loss": 0.0618, | |
| "loss/chosen-sft": 1.6445457935333252, | |
| "loss/dpo": 0.06179194524884224, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -0.8887649774551392, | |
| "rewards/margins": 0.33651480078697205, | |
| "rewards/rejected": -1.2252798080444336, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.324459234608985, | |
| "grad_norm": 1.1594619750976562, | |
| "learning_rate": 4.276306003954881e-07, | |
| "logits/chosen": -0.46756500005722046, | |
| "logits/rejected": -0.4428383708000183, | |
| "logps/chosen": -405.39593505859375, | |
| "logps/rejected": -375.8490905761719, | |
| "loss": 0.0648, | |
| "loss/chosen-sft": 1.6012241840362549, | |
| "loss/dpo": 0.06479503959417343, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0120012760162354, | |
| "rewards/margins": 0.2799423336982727, | |
| "rewards/rejected": -1.2919435501098633, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.32723239046034386, | |
| "grad_norm": 1.4882887601852417, | |
| "learning_rate": 4.25918645410708e-07, | |
| "logits/chosen": -0.5149391889572144, | |
| "logits/rejected": -0.43163880705833435, | |
| "logps/chosen": -395.2956237792969, | |
| "logps/rejected": -388.7841491699219, | |
| "loss": 0.0565, | |
| "loss/chosen-sft": 1.619074821472168, | |
| "loss/dpo": 0.056492336094379425, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.8736220598220825, | |
| "rewards/margins": 0.49877220392227173, | |
| "rewards/rejected": -1.372394323348999, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3300055463117027, | |
| "grad_norm": 1.6804783344268799, | |
| "learning_rate": 4.2419019184724316e-07, | |
| "logits/chosen": -0.4885106086730957, | |
| "logits/rejected": -0.2503131926059723, | |
| "logps/chosen": -417.79327392578125, | |
| "logps/rejected": -404.0755920410156, | |
| "loss": 0.0634, | |
| "loss/chosen-sft": 1.5858757495880127, | |
| "loss/dpo": 0.06336269527673721, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.9350301623344421, | |
| "rewards/margins": 0.4474121630191803, | |
| "rewards/rejected": -1.3824422359466553, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.33277870216306155, | |
| "grad_norm": 1.5922985076904297, | |
| "learning_rate": 4.224454018085878e-07, | |
| "logits/chosen": -0.3938080966472626, | |
| "logits/rejected": -0.20645618438720703, | |
| "logps/chosen": -417.41644287109375, | |
| "logps/rejected": -377.573486328125, | |
| "loss": 0.0521, | |
| "loss/chosen-sft": 1.6786199808120728, | |
| "loss/dpo": 0.05210758373141289, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.9713231325149536, | |
| "rewards/margins": 0.3552365303039551, | |
| "rewards/rejected": -1.3265597820281982, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3355518580144204, | |
| "grad_norm": 1.135326862335205, | |
| "learning_rate": 4.206844389303569e-07, | |
| "logits/chosen": -0.5038084387779236, | |
| "logits/rejected": -0.1152668371796608, | |
| "logps/chosen": -368.07244873046875, | |
| "logps/rejected": -371.786865234375, | |
| "loss": 0.059, | |
| "loss/chosen-sft": 1.6389780044555664, | |
| "loss/dpo": 0.058953166007995605, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.9387050867080688, | |
| "rewards/margins": 0.34334003925323486, | |
| "rewards/rejected": -1.2820451259613037, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.33832501386577923, | |
| "grad_norm": 1.6886770725250244, | |
| "learning_rate": 4.1890746836493987e-07, | |
| "logits/chosen": -0.5036560893058777, | |
| "logits/rejected": -0.3065970242023468, | |
| "logps/chosen": -398.92559814453125, | |
| "logps/rejected": -394.6751403808594, | |
| "loss": 0.063, | |
| "loss/chosen-sft": 1.580437421798706, | |
| "loss/dpo": 0.0630171000957489, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.9837394952774048, | |
| "rewards/margins": 0.3374081552028656, | |
| "rewards/rejected": -1.3211476802825928, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3410981697171381, | |
| "grad_norm": 1.2646410465240479, | |
| "learning_rate": 4.171146567660112e-07, | |
| "logits/chosen": -0.34558913111686707, | |
| "logits/rejected": -0.22349652647972107, | |
| "logps/chosen": -397.49908447265625, | |
| "logps/rejected": -370.159912109375, | |
| "loss": 0.0511, | |
| "loss/chosen-sft": 1.6435205936431885, | |
| "loss/dpo": 0.05106610804796219, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.8979086875915527, | |
| "rewards/margins": 0.5196878910064697, | |
| "rewards/rejected": -1.4175965785980225, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.343871325568497, | |
| "grad_norm": 1.721474528312683, | |
| "learning_rate": 4.153061722729013e-07, | |
| "logits/chosen": -0.4159115254878998, | |
| "logits/rejected": -0.3842083811759949, | |
| "logps/chosen": -425.87567138671875, | |
| "logps/rejected": -361.23883056640625, | |
| "loss": 0.0537, | |
| "loss/chosen-sft": 1.750331163406372, | |
| "loss/dpo": 0.05366608500480652, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.9684473276138306, | |
| "rewards/margins": 0.3472323715686798, | |
| "rewards/rejected": -1.3156797885894775, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3466444814198558, | |
| "grad_norm": 0.8142698407173157, | |
| "learning_rate": 4.1348218449482723e-07, | |
| "logits/chosen": -0.5395044088363647, | |
| "logits/rejected": -0.4873916208744049, | |
| "logps/chosen": -400.04217529296875, | |
| "logps/rejected": -377.87615966796875, | |
| "loss": 0.052, | |
| "loss/chosen-sft": 1.6478407382965088, | |
| "loss/dpo": 0.05204028636217117, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.9235207438468933, | |
| "rewards/margins": 0.4557490348815918, | |
| "rewards/rejected": -1.3792698383331299, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.34941763727121466, | |
| "grad_norm": 1.6725468635559082, | |
| "learning_rate": 4.1164286449498584e-07, | |
| "logits/chosen": -0.5160374641418457, | |
| "logits/rejected": -0.267566978931427, | |
| "logps/chosen": -390.87774658203125, | |
| "logps/rejected": -379.4105224609375, | |
| "loss": 0.0671, | |
| "loss/chosen-sft": 1.6147594451904297, | |
| "loss/dpo": 0.06710793823003769, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.0025107860565186, | |
| "rewards/margins": 0.2850556969642639, | |
| "rewards/rejected": -1.2875664234161377, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3521907931225735, | |
| "grad_norm": 1.5668613910675049, | |
| "learning_rate": 4.0978838477451065e-07, | |
| "logits/chosen": -0.36808034777641296, | |
| "logits/rejected": -0.3304065763950348, | |
| "logps/chosen": -414.526123046875, | |
| "logps/rejected": -388.3052062988281, | |
| "loss": 0.0505, | |
| "loss/chosen-sft": 1.8385422229766846, | |
| "loss/dpo": 0.05049455910921097, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.9938969612121582, | |
| "rewards/margins": 0.41056522727012634, | |
| "rewards/rejected": -1.404462218284607, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.35496394897393235, | |
| "grad_norm": 1.7163115739822388, | |
| "learning_rate": 4.079189192562938e-07, | |
| "logits/chosen": -0.597520649433136, | |
| "logits/rejected": -0.5070825815200806, | |
| "logps/chosen": -420.65447998046875, | |
| "logps/rejected": -367.829345703125, | |
| "loss": 0.0641, | |
| "loss/chosen-sft": 1.6417961120605469, | |
| "loss/dpo": 0.0640825405716896, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.9867199659347534, | |
| "rewards/margins": 0.3000028729438782, | |
| "rewards/rejected": -1.2867228984832764, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3577371048252912, | |
| "grad_norm": 1.513505220413208, | |
| "learning_rate": 4.0603464326867456e-07, | |
| "logits/chosen": -0.3763376474380493, | |
| "logits/rejected": -0.37528449296951294, | |
| "logps/chosen": -425.38043212890625, | |
| "logps/rejected": -415.4266662597656, | |
| "loss": 0.0531, | |
| "loss/chosen-sft": 1.692238211631775, | |
| "loss/dpo": 0.053089797496795654, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.8854948878288269, | |
| "rewards/margins": 0.5308740735054016, | |
| "rewards/rejected": -1.416368842124939, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.36051026067665004, | |
| "grad_norm": 2.2651264667510986, | |
| "learning_rate": 4.041357335289962e-07, | |
| "logits/chosen": -0.6403440833091736, | |
| "logits/rejected": -0.5450460314750671, | |
| "logps/chosen": -418.7191467285156, | |
| "logps/rejected": -380.0513610839844, | |
| "loss": 0.0605, | |
| "loss/chosen-sft": 1.6550674438476562, | |
| "loss/dpo": 0.060495972633361816, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.0474960803985596, | |
| "rewards/margins": 0.3858080506324768, | |
| "rewards/rejected": -1.4333041906356812, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3632834165280089, | |
| "grad_norm": 1.6215060949325562, | |
| "learning_rate": 4.0222236812703247e-07, | |
| "logits/chosen": -0.43776410818099976, | |
| "logits/rejected": -0.33489981293678284, | |
| "logps/chosen": -383.8864440917969, | |
| "logps/rejected": -378.69873046875, | |
| "loss": 0.0604, | |
| "loss/chosen-sft": 1.619616150856018, | |
| "loss/dpo": 0.060428131371736526, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.8916348218917847, | |
| "rewards/margins": 0.45284873247146606, | |
| "rewards/rejected": -1.3444836139678955, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.36605657237936773, | |
| "grad_norm": 1.3622971773147583, | |
| "learning_rate": 4.002947265082854e-07, | |
| "logits/chosen": -0.5182867646217346, | |
| "logits/rejected": -0.41873541474342346, | |
| "logps/chosen": -382.6905212402344, | |
| "logps/rejected": -354.53240966796875, | |
| "loss": 0.0451, | |
| "loss/chosen-sft": 1.6769845485687256, | |
| "loss/dpo": 0.04514864459633827, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.910293698310852, | |
| "rewards/margins": 0.40142306685447693, | |
| "rewards/rejected": -1.3117166757583618, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3688297282307266, | |
| "grad_norm": 1.7427003383636475, | |
| "learning_rate": 3.983529894571558e-07, | |
| "logits/chosen": -0.5258729457855225, | |
| "logits/rejected": -0.35827913880348206, | |
| "logps/chosen": -410.27655029296875, | |
| "logps/rejected": -398.0840759277344, | |
| "loss": 0.0515, | |
| "loss/chosen-sft": 1.6551071405410767, | |
| "loss/dpo": 0.051458604633808136, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.9875022172927856, | |
| "rewards/margins": 0.4931188225746155, | |
| "rewards/rejected": -1.4806209802627563, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.3716028840820854, | |
| "grad_norm": 1.5637753009796143, | |
| "learning_rate": 3.963973390799887e-07, | |
| "logits/chosen": -0.3731691241264343, | |
| "logits/rejected": -0.3140907287597656, | |
| "logps/chosen": -387.2796936035156, | |
| "logps/rejected": -389.09967041015625, | |
| "loss": 0.0573, | |
| "loss/chosen-sft": 1.664841890335083, | |
| "loss/dpo": 0.057298194617033005, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.9336613416671753, | |
| "rewards/margins": 0.4270879626274109, | |
| "rewards/rejected": -1.360749363899231, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.37437603993344426, | |
| "grad_norm": 1.713516116142273, | |
| "learning_rate": 3.944279587879942e-07, | |
| "logits/chosen": -0.6181343197822571, | |
| "logits/rejected": -0.4861913323402405, | |
| "logps/chosen": -399.0451354980469, | |
| "logps/rejected": -375.4527282714844, | |
| "loss": 0.0681, | |
| "loss/chosen-sft": 1.6289466619491577, | |
| "loss/dpo": 0.06814940273761749, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.020996332168579, | |
| "rewards/margins": 0.3115329444408417, | |
| "rewards/rejected": -1.3325293064117432, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3771491957848031, | |
| "grad_norm": 1.3407127857208252, | |
| "learning_rate": 3.9244503328004606e-07, | |
| "logits/chosen": -0.7082573175430298, | |
| "logits/rejected": -0.5655766725540161, | |
| "logps/chosen": -412.4944763183594, | |
| "logps/rejected": -384.94476318359375, | |
| "loss": 0.0535, | |
| "loss/chosen-sft": 1.6391160488128662, | |
| "loss/dpo": 0.05352095887064934, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.062304139137268, | |
| "rewards/margins": 0.3755071461200714, | |
| "rewards/rejected": -1.4378111362457275, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.37992235163616195, | |
| "grad_norm": 0.8834872841835022, | |
| "learning_rate": 3.9044874852536013e-07, | |
| "logits/chosen": -0.5957868099212646, | |
| "logits/rejected": -0.3501487076282501, | |
| "logps/chosen": -390.2853088378906, | |
| "logps/rejected": -378.35296630859375, | |
| "loss": 0.0609, | |
| "loss/chosen-sft": 1.706976294517517, | |
| "loss/dpo": 0.06085295230150223, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.0856157541275024, | |
| "rewards/margins": 0.3497942090034485, | |
| "rewards/rejected": -1.4354099035263062, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3826955074875208, | |
| "grad_norm": 1.8584004640579224, | |
| "learning_rate": 3.8843929174605283e-07, | |
| "logits/chosen": -0.5268598794937134, | |
| "logits/rejected": -0.5594683885574341, | |
| "logps/chosen": -399.52593994140625, | |
| "logps/rejected": -352.721923828125, | |
| "loss": 0.0588, | |
| "loss/chosen-sft": 1.7252442836761475, | |
| "loss/dpo": 0.058761436492204666, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9334037899971008, | |
| "rewards/margins": 0.39858493208885193, | |
| "rewards/rejected": -1.3319886922836304, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.38546866333887964, | |
| "grad_norm": 1.2674587965011597, | |
| "learning_rate": 3.8641685139958234e-07, | |
| "logits/chosen": -0.5243244171142578, | |
| "logits/rejected": -0.3986015319824219, | |
| "logps/chosen": -428.97662353515625, | |
| "logps/rejected": -402.95074462890625, | |
| "loss": 0.0568, | |
| "loss/chosen-sft": 1.6090924739837646, | |
| "loss/dpo": 0.056836675852537155, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.9936995506286621, | |
| "rewards/margins": 0.45986804366111755, | |
| "rewards/rejected": -1.4535675048828125, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.3882418191902385, | |
| "grad_norm": 1.1967881917953491, | |
| "learning_rate": 3.8438161716107453e-07, | |
| "logits/chosen": -0.35420164465904236, | |
| "logits/rejected": -0.16235880553722382, | |
| "logps/chosen": -388.86260986328125, | |
| "logps/rejected": -373.76336669921875, | |
| "loss": 0.0531, | |
| "loss/chosen-sft": 1.7332239151000977, | |
| "loss/dpo": 0.05314627289772034, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.9368470907211304, | |
| "rewards/margins": 0.39190053939819336, | |
| "rewards/rejected": -1.3287477493286133, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3910149750415973, | |
| "grad_norm": 1.0293500423431396, | |
| "learning_rate": 3.8233377990553376e-07, | |
| "logits/chosen": -0.5038026571273804, | |
| "logits/rejected": -0.45793700218200684, | |
| "logps/chosen": -392.74078369140625, | |
| "logps/rejected": -383.5316467285156, | |
| "loss": 0.0469, | |
| "loss/chosen-sft": 1.6530288457870483, | |
| "loss/dpo": 0.04691758006811142, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9149090051651001, | |
| "rewards/margins": 0.5396536588668823, | |
| "rewards/rejected": -1.4545625448226929, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.39378813089295617, | |
| "grad_norm": 1.4593671560287476, | |
| "learning_rate": 3.80273531689942e-07, | |
| "logits/chosen": -0.5035001039505005, | |
| "logits/rejected": -0.39359813928604126, | |
| "logps/chosen": -410.6475524902344, | |
| "logps/rejected": -410.08355712890625, | |
| "loss": 0.0516, | |
| "loss/chosen-sft": 1.5783100128173828, | |
| "loss/dpo": 0.05163818597793579, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.9400428533554077, | |
| "rewards/margins": 0.5864478945732117, | |
| "rewards/rejected": -1.5264908075332642, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.396561286744315, | |
| "grad_norm": 1.9627400636672974, | |
| "learning_rate": 3.7820106573524645e-07, | |
| "logits/chosen": -0.3761715888977051, | |
| "logits/rejected": -0.33153462409973145, | |
| "logps/chosen": -387.8844909667969, | |
| "logps/rejected": -367.91168212890625, | |
| "loss": 0.0642, | |
| "loss/chosen-sft": 1.6243873834609985, | |
| "loss/dpo": 0.0642186850309372, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.0104358196258545, | |
| "rewards/margins": 0.39246731996536255, | |
| "rewards/rejected": -1.4029031991958618, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.39933444259567386, | |
| "grad_norm": 1.1319611072540283, | |
| "learning_rate": 3.7611657640823825e-07, | |
| "logits/chosen": -0.5168325901031494, | |
| "logits/rejected": -0.41257476806640625, | |
| "logps/chosen": -418.1272888183594, | |
| "logps/rejected": -379.2960510253906, | |
| "loss": 0.0503, | |
| "loss/chosen-sft": 1.699724793434143, | |
| "loss/dpo": 0.050268955528736115, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.030914545059204, | |
| "rewards/margins": 0.43790435791015625, | |
| "rewards/rejected": -1.46881902217865, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4021075984470327, | |
| "grad_norm": 1.7412174940109253, | |
| "learning_rate": 3.74020259203324e-07, | |
| "logits/chosen": -0.6365585327148438, | |
| "logits/rejected": -0.4046885371208191, | |
| "logps/chosen": -392.642822265625, | |
| "logps/rejected": -416.04986572265625, | |
| "loss": 0.0541, | |
| "loss/chosen-sft": 1.6220734119415283, | |
| "loss/dpo": 0.05405275151133537, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0092906951904297, | |
| "rewards/margins": 0.5087220072746277, | |
| "rewards/rejected": -1.5180127620697021, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.40488075429839154, | |
| "grad_norm": 1.3539364337921143, | |
| "learning_rate": 3.7191231072419095e-07, | |
| "logits/chosen": -0.41355252265930176, | |
| "logits/rejected": -0.23317034542560577, | |
| "logps/chosen": -427.7068786621094, | |
| "logps/rejected": -423.05108642578125, | |
| "loss": 0.045, | |
| "loss/chosen-sft": 1.7072865962982178, | |
| "loss/dpo": 0.04499746486544609, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.0050982236862183, | |
| "rewards/margins": 0.5913174748420715, | |
| "rewards/rejected": -1.5964157581329346, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.40765391014975044, | |
| "grad_norm": 2.4430134296417236, | |
| "learning_rate": 3.6979292866536864e-07, | |
| "logits/chosen": -0.5284135937690735, | |
| "logits/rejected": -0.39231494069099426, | |
| "logps/chosen": -444.33062744140625, | |
| "logps/rejected": -440.982177734375, | |
| "loss": 0.0645, | |
| "loss/chosen-sft": 1.6972286701202393, | |
| "loss/dpo": 0.06453467905521393, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.0674129724502563, | |
| "rewards/margins": 0.47073474526405334, | |
| "rewards/rejected": -1.5381478071212769, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4104270660011093, | |
| "grad_norm": 1.5764257907867432, | |
| "learning_rate": 3.6766231179368815e-07, | |
| "logits/chosen": -0.6887901425361633, | |
| "logits/rejected": -0.5167983174324036, | |
| "logps/chosen": -443.1559143066406, | |
| "logps/rejected": -440.83489990234375, | |
| "loss": 0.0519, | |
| "loss/chosen-sft": 1.635066270828247, | |
| "loss/dpo": 0.05188404396176338, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.104542851448059, | |
| "rewards/margins": 0.5738195180892944, | |
| "rewards/rejected": -1.678362250328064, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.41320022185246813, | |
| "grad_norm": 1.0604594945907593, | |
| "learning_rate": 3.6552065992964043e-07, | |
| "logits/chosen": -0.6154565215110779, | |
| "logits/rejected": -0.3693044185638428, | |
| "logps/chosen": -406.3134460449219, | |
| "logps/rejected": -377.28228759765625, | |
| "loss": 0.0516, | |
| "loss/chosen-sft": 1.6899700164794922, | |
| "loss/dpo": 0.05156542733311653, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.0907108783721924, | |
| "rewards/margins": 0.35733407735824585, | |
| "rewards/rejected": -1.448045015335083, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.415973377703827, | |
| "grad_norm": 1.4795838594436646, | |
| "learning_rate": 3.6336817392863625e-07, | |
| "logits/chosen": -0.4953575134277344, | |
| "logits/rejected": -0.3335438668727875, | |
| "logps/chosen": -399.979736328125, | |
| "logps/rejected": -392.7442321777344, | |
| "loss": 0.0494, | |
| "loss/chosen-sft": 1.6881647109985352, | |
| "loss/dpo": 0.04941638186573982, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.1530383825302124, | |
| "rewards/margins": 0.4033992886543274, | |
| "rewards/rejected": -1.5564377307891846, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4187465335551858, | |
| "grad_norm": 1.2639776468276978, | |
| "learning_rate": 3.6120505566216906e-07, | |
| "logits/chosen": -0.669465184211731, | |
| "logits/rejected": -0.5370808839797974, | |
| "logps/chosen": -412.94366455078125, | |
| "logps/rejected": -406.2000427246094, | |
| "loss": 0.0412, | |
| "loss/chosen-sft": 1.7509901523590088, | |
| "loss/dpo": 0.04121355339884758, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.085228681564331, | |
| "rewards/margins": 0.5851390361785889, | |
| "rewards/rejected": -1.6703678369522095, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.42151968940654466, | |
| "grad_norm": 1.776004672050476, | |
| "learning_rate": 3.5903150799888215e-07, | |
| "logits/chosen": -0.562536358833313, | |
| "logits/rejected": -0.49020713567733765, | |
| "logps/chosen": -412.8077087402344, | |
| "logps/rejected": -399.482421875, | |
| "loss": 0.0529, | |
| "loss/chosen-sft": 1.6743816137313843, | |
| "loss/dpo": 0.052908383309841156, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.026395320892334, | |
| "rewards/margins": 0.47851258516311646, | |
| "rewards/rejected": -1.5049078464508057, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4242928452579035, | |
| "grad_norm": 1.3225823640823364, | |
| "learning_rate": 3.5684773478554255e-07, | |
| "logits/chosen": -0.5152966976165771, | |
| "logits/rejected": -0.5033225417137146, | |
| "logps/chosen": -405.97271728515625, | |
| "logps/rejected": -377.90875244140625, | |
| "loss": 0.0571, | |
| "loss/chosen-sft": 1.7368265390396118, | |
| "loss/dpo": 0.05706434324383736, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0174944400787354, | |
| "rewards/margins": 0.47621211409568787, | |
| "rewards/rejected": -1.493706464767456, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.42706600110926235, | |
| "grad_norm": 1.3815367221832275, | |
| "learning_rate": 3.546539408279235e-07, | |
| "logits/chosen": -0.49167943000793457, | |
| "logits/rejected": -0.2837482988834381, | |
| "logps/chosen": -405.2371520996094, | |
| "logps/rejected": -391.68670654296875, | |
| "loss": 0.0601, | |
| "loss/chosen-sft": 1.7111915349960327, | |
| "loss/dpo": 0.060050565749406815, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.0395854711532593, | |
| "rewards/margins": 0.37245672941207886, | |
| "rewards/rejected": -1.412042260169983, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4298391569606212, | |
| "grad_norm": 1.1186020374298096, | |
| "learning_rate": 3.5245033187159647e-07, | |
| "logits/chosen": -0.5566674470901489, | |
| "logits/rejected": -0.4645940661430359, | |
| "logps/chosen": -405.1397399902344, | |
| "logps/rejected": -374.271728515625, | |
| "loss": 0.0514, | |
| "loss/chosen-sft": 1.6212804317474365, | |
| "loss/dpo": 0.05135621875524521, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.9940582513809204, | |
| "rewards/margins": 0.45282116532325745, | |
| "rewards/rejected": -1.4468793869018555, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.43261231281198004, | |
| "grad_norm": 1.5393182039260864, | |
| "learning_rate": 3.502371145826352e-07, | |
| "logits/chosen": -0.5995725989341736, | |
| "logits/rejected": -0.5144038200378418, | |
| "logps/chosen": -400.0671081542969, | |
| "logps/rejected": -385.246337890625, | |
| "loss": 0.0464, | |
| "loss/chosen-sft": 1.6027672290802002, | |
| "loss/dpo": 0.04639893397688866, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9513114094734192, | |
| "rewards/margins": 0.5679217576980591, | |
| "rewards/rejected": -1.5192331075668335, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4353854686633389, | |
| "grad_norm": 1.7037338018417358, | |
| "learning_rate": 3.4801449652823374e-07, | |
| "logits/chosen": -0.8535438776016235, | |
| "logits/rejected": -0.6764947175979614, | |
| "logps/chosen": -405.5221252441406, | |
| "logps/rejected": -374.32440185546875, | |
| "loss": 0.0488, | |
| "loss/chosen-sft": 1.583449363708496, | |
| "loss/dpo": 0.048822566866874695, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.958821177482605, | |
| "rewards/margins": 0.5157214999198914, | |
| "rewards/rejected": -1.4745426177978516, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4381586245146977, | |
| "grad_norm": 1.703331708908081, | |
| "learning_rate": 3.4578268615723924e-07, | |
| "logits/chosen": -0.561161458492279, | |
| "logits/rejected": -0.5696666836738586, | |
| "logps/chosen": -420.22381591796875, | |
| "logps/rejected": -396.88299560546875, | |
| "loss": 0.0437, | |
| "loss/chosen-sft": 1.7269833087921143, | |
| "loss/dpo": 0.04372150078415871, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0612952709197998, | |
| "rewards/margins": 0.5599007606506348, | |
| "rewards/rejected": -1.6211960315704346, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.44093178036605657, | |
| "grad_norm": 1.3786600828170776, | |
| "learning_rate": 3.4354189278060317e-07, | |
| "logits/chosen": -0.7774502635002136, | |
| "logits/rejected": -0.6075506806373596, | |
| "logps/chosen": -412.4271545410156, | |
| "logps/rejected": -393.829345703125, | |
| "loss": 0.0453, | |
| "loss/chosen-sft": 1.675945520401001, | |
| "loss/dpo": 0.04528175666928291, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.1789729595184326, | |
| "rewards/margins": 0.48232507705688477, | |
| "rewards/rejected": -1.6612980365753174, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4437049362174154, | |
| "grad_norm": 1.5878552198410034, | |
| "learning_rate": 3.412923265517503e-07, | |
| "logits/chosen": -0.7400108575820923, | |
| "logits/rejected": -0.7512958645820618, | |
| "logps/chosen": -412.6996154785156, | |
| "logps/rejected": -379.6897277832031, | |
| "loss": 0.0488, | |
| "loss/chosen-sft": 1.7094510793685913, | |
| "loss/dpo": 0.048782043159008026, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.1738940477371216, | |
| "rewards/margins": 0.429801881313324, | |
| "rewards/rejected": -1.6036958694458008, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.44647809206877426, | |
| "grad_norm": 1.7441812753677368, | |
| "learning_rate": 3.390341984468699e-07, | |
| "logits/chosen": -0.7113388776779175, | |
| "logits/rejected": -0.6850872039794922, | |
| "logps/chosen": -428.8340759277344, | |
| "logps/rejected": -400.9315490722656, | |
| "loss": 0.0473, | |
| "loss/chosen-sft": 1.7859251499176025, | |
| "loss/dpo": 0.047270990908145905, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.1826239824295044, | |
| "rewards/margins": 0.4750004708766937, | |
| "rewards/rejected": -1.6576244831085205, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.4492512479201331, | |
| "grad_norm": 1.530585765838623, | |
| "learning_rate": 3.367677202451292e-07, | |
| "logits/chosen": -0.6908737421035767, | |
| "logits/rejected": -0.5597144365310669, | |
| "logps/chosen": -455.83758544921875, | |
| "logps/rejected": -452.01385498046875, | |
| "loss": 0.0584, | |
| "loss/chosen-sft": 1.6678813695907593, | |
| "loss/dpo": 0.05841095373034477, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.1806871891021729, | |
| "rewards/margins": 0.6453709602355957, | |
| "rewards/rejected": -1.8260581493377686, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.45202440377149194, | |
| "grad_norm": 0.9433002471923828, | |
| "learning_rate": 3.3449310450881164e-07, | |
| "logits/chosen": -0.7155448198318481, | |
| "logits/rejected": -0.6245291829109192, | |
| "logps/chosen": -421.4974060058594, | |
| "logps/rejected": -427.9629821777344, | |
| "loss": 0.0364, | |
| "loss/chosen-sft": 1.7820746898651123, | |
| "loss/dpo": 0.03638064116239548, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.212754249572754, | |
| "rewards/margins": 0.6936885714530945, | |
| "rewards/rejected": -1.9064428806304932, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.4547975596228508, | |
| "grad_norm": 1.6761059761047363, | |
| "learning_rate": 3.322105645633813e-07, | |
| "logits/chosen": -0.6909006834030151, | |
| "logits/rejected": -0.5668486952781677, | |
| "logps/chosen": -440.548095703125, | |
| "logps/rejected": -421.02392578125, | |
| "loss": 0.0613, | |
| "loss/chosen-sft": 1.691504716873169, | |
| "loss/dpo": 0.06133972853422165, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.250173807144165, | |
| "rewards/margins": 0.3592910170555115, | |
| "rewards/rejected": -1.6094646453857422, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.45757071547420963, | |
| "grad_norm": 1.7704241275787354, | |
| "learning_rate": 3.299203144774767e-07, | |
| "logits/chosen": -0.717354953289032, | |
| "logits/rejected": -0.6114916801452637, | |
| "logps/chosen": -436.1394958496094, | |
| "logps/rejected": -417.92218017578125, | |
| "loss": 0.0443, | |
| "loss/chosen-sft": 1.7568438053131104, | |
| "loss/dpo": 0.044312089681625366, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.234349250793457, | |
| "rewards/margins": 0.4998023509979248, | |
| "rewards/rejected": -1.7341516017913818, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4603438713255685, | |
| "grad_norm": 1.0728057622909546, | |
| "learning_rate": 3.276225690428338e-07, | |
| "logits/chosen": -0.7584112882614136, | |
| "logits/rejected": -0.7765355110168457, | |
| "logps/chosen": -411.444580078125, | |
| "logps/rejected": -426.4891052246094, | |
| "loss": 0.0411, | |
| "loss/chosen-sft": 1.7121307849884033, | |
| "loss/dpo": 0.04111065715551376, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.1426109075546265, | |
| "rewards/margins": 0.6041598916053772, | |
| "rewards/rejected": -1.7467708587646484, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4631170271769273, | |
| "grad_norm": 1.3771647214889526, | |
| "learning_rate": 3.2531754375414206e-07, | |
| "logits/chosen": -0.7121502161026001, | |
| "logits/rejected": -0.732271671295166, | |
| "logps/chosen": -458.24688720703125, | |
| "logps/rejected": -436.6095275878906, | |
| "loss": 0.0516, | |
| "loss/chosen-sft": 1.7345997095108032, | |
| "loss/dpo": 0.051555633544921875, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.2151943445205688, | |
| "rewards/margins": 0.511760413646698, | |
| "rewards/rejected": -1.7269548177719116, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.46589018302828616, | |
| "grad_norm": 1.6047500371932983, | |
| "learning_rate": 3.230054547888339e-07, | |
| "logits/chosen": -0.763964831829071, | |
| "logits/rejected": -0.7086871862411499, | |
| "logps/chosen": -406.05316162109375, | |
| "logps/rejected": -391.0845642089844, | |
| "loss": 0.0415, | |
| "loss/chosen-sft": 1.747057318687439, | |
| "loss/dpo": 0.04153294861316681, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.1723990440368652, | |
| "rewards/margins": 0.46168699860572815, | |
| "rewards/rejected": -1.634086012840271, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.468663338879645, | |
| "grad_norm": 1.445021152496338, | |
| "learning_rate": 3.2068651898681076e-07, | |
| "logits/chosen": -0.742651104927063, | |
| "logits/rejected": -0.7188843488693237, | |
| "logps/chosen": -458.99652099609375, | |
| "logps/rejected": -454.03533935546875, | |
| "loss": 0.0491, | |
| "loss/chosen-sft": 1.6705642938613892, | |
| "loss/dpo": 0.0490889772772789, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.282627820968628, | |
| "rewards/margins": 0.5290722846984863, | |
| "rewards/rejected": -1.8117001056671143, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.4714364947310039, | |
| "grad_norm": 2.1770212650299072, | |
| "learning_rate": 3.183609538301065e-07, | |
| "logits/chosen": -0.8837400674819946, | |
| "logits/rejected": -0.7734067440032959, | |
| "logps/chosen": -432.4523010253906, | |
| "logps/rejected": -402.35064697265625, | |
| "loss": 0.041, | |
| "loss/chosen-sft": 1.7752116918563843, | |
| "loss/dpo": 0.040977347642183304, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.2817370891571045, | |
| "rewards/margins": 0.4654463827610016, | |
| "rewards/rejected": -1.7471835613250732, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.47420965058236275, | |
| "grad_norm": 1.3028136491775513, | |
| "learning_rate": 3.1602897742249077e-07, | |
| "logits/chosen": -0.9581116437911987, | |
| "logits/rejected": -0.7644520998001099, | |
| "logps/chosen": -424.92352294921875, | |
| "logps/rejected": -427.76806640625, | |
| "loss": 0.0447, | |
| "loss/chosen-sft": 1.6508781909942627, | |
| "loss/dpo": 0.04467002674937248, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.2735633850097656, | |
| "rewards/margins": 0.5769675970077515, | |
| "rewards/rejected": -1.850530982017517, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.4769828064337216, | |
| "grad_norm": 1.5738519430160522, | |
| "learning_rate": 3.136908084690142e-07, | |
| "logits/chosen": -0.7455793619155884, | |
| "logits/rejected": -0.6809624433517456, | |
| "logps/chosen": -430.9115295410156, | |
| "logps/rejected": -438.87164306640625, | |
| "loss": 0.0372, | |
| "loss/chosen-sft": 1.8043369054794312, | |
| "loss/dpo": 0.037248264998197556, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.165959119796753, | |
| "rewards/margins": 0.6415104866027832, | |
| "rewards/rejected": -1.8074697256088257, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.47975596228508044, | |
| "grad_norm": 0.9466845989227295, | |
| "learning_rate": 3.113466662554971e-07, | |
| "logits/chosen": -0.8632648587226868, | |
| "logits/rejected": -0.7621570825576782, | |
| "logps/chosen": -416.5755310058594, | |
| "logps/rejected": -404.36383056640625, | |
| "loss": 0.0405, | |
| "loss/chosen-sft": 1.7127773761749268, | |
| "loss/dpo": 0.04047512635588646, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.1657226085662842, | |
| "rewards/margins": 0.5985251665115356, | |
| "rewards/rejected": -1.7642476558685303, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4825291181364393, | |
| "grad_norm": 1.4616957902908325, | |
| "learning_rate": 3.0899677062796356e-07, | |
| "logits/chosen": -0.9384894371032715, | |
| "logits/rejected": -0.7627253532409668, | |
| "logps/chosen": -454.59613037109375, | |
| "logps/rejected": -447.65667724609375, | |
| "loss": 0.0443, | |
| "loss/chosen-sft": 1.7974565029144287, | |
| "loss/dpo": 0.04427001625299454, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.3299897909164429, | |
| "rewards/margins": 0.5200726389884949, | |
| "rewards/rejected": -1.850062370300293, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4853022739877981, | |
| "grad_norm": 1.1953011751174927, | |
| "learning_rate": 3.066413419720231e-07, | |
| "logits/chosen": -1.0708543062210083, | |
| "logits/rejected": -0.9470682144165039, | |
| "logps/chosen": -411.50323486328125, | |
| "logps/rejected": -438.43023681640625, | |
| "loss": 0.0373, | |
| "loss/chosen-sft": 1.778357744216919, | |
| "loss/dpo": 0.03726748377084732, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.4992625713348389, | |
| "rewards/margins": 0.5397129058837891, | |
| "rewards/rejected": -2.038975477218628, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.48807542983915697, | |
| "grad_norm": 1.2079335451126099, | |
| "learning_rate": 3.042806011922021e-07, | |
| "logits/chosen": -0.9137675166130066, | |
| "logits/rejected": -0.8694272041320801, | |
| "logps/chosen": -465.96954345703125, | |
| "logps/rejected": -473.0062561035156, | |
| "loss": 0.0423, | |
| "loss/chosen-sft": 1.8091964721679688, | |
| "loss/dpo": 0.04231221228837967, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.3828151226043701, | |
| "rewards/margins": 0.6629751920700073, | |
| "rewards/rejected": -2.045790195465088, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4908485856905158, | |
| "grad_norm": 2.0538318157196045, | |
| "learning_rate": 3.019147696912256e-07, | |
| "logits/chosen": -0.8203264474868774, | |
| "logits/rejected": -0.8118319511413574, | |
| "logps/chosen": -457.15557861328125, | |
| "logps/rejected": -443.97412109375, | |
| "loss": 0.0359, | |
| "loss/chosen-sft": 1.819947600364685, | |
| "loss/dpo": 0.03590567782521248, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.4309660196304321, | |
| "rewards/margins": 0.6293816566467285, | |
| "rewards/rejected": -2.060347557067871, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.49362174154187466, | |
| "grad_norm": 1.3179012537002563, | |
| "learning_rate": 2.9954406934925353e-07, | |
| "logits/chosen": -1.0511844158172607, | |
| "logits/rejected": -0.8907807469367981, | |
| "logps/chosen": -441.5658264160156, | |
| "logps/rejected": -442.07025146484375, | |
| "loss": 0.0387, | |
| "loss/chosen-sft": 1.8559125661849976, | |
| "loss/dpo": 0.0386546328663826, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4241468906402588, | |
| "rewards/margins": 0.45265036821365356, | |
| "rewards/rejected": -1.8767973184585571, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4963948973932335, | |
| "grad_norm": 1.4792488813400269, | |
| "learning_rate": 2.9716872250307153e-07, | |
| "logits/chosen": -0.9463046789169312, | |
| "logits/rejected": -0.8543429374694824, | |
| "logps/chosen": -422.39288330078125, | |
| "logps/rejected": -453.1312561035156, | |
| "loss": 0.0381, | |
| "loss/chosen-sft": 1.7985944747924805, | |
| "loss/dpo": 0.0381343699991703, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.2584816217422485, | |
| "rewards/margins": 0.6484702825546265, | |
| "rewards/rejected": -1.906951904296875, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.49916805324459235, | |
| "grad_norm": 1.479712724685669, | |
| "learning_rate": 2.9478895192523867e-07, | |
| "logits/chosen": -0.9726383090019226, | |
| "logits/rejected": -0.8202565312385559, | |
| "logps/chosen": -414.5870056152344, | |
| "logps/rejected": -435.7919006347656, | |
| "loss": 0.0542, | |
| "loss/chosen-sft": 1.651619553565979, | |
| "loss/dpo": 0.05416691303253174, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.2731552124023438, | |
| "rewards/margins": 0.5164347290992737, | |
| "rewards/rejected": -1.7895901203155518, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5019412090959512, | |
| "grad_norm": 1.1789251565933228, | |
| "learning_rate": 2.9240498080319503e-07, | |
| "logits/chosen": -0.8077837228775024, | |
| "logits/rejected": -0.7600533366203308, | |
| "logps/chosen": -395.1336975097656, | |
| "logps/rejected": -399.80316162109375, | |
| "loss": 0.0351, | |
| "loss/chosen-sft": 1.7661956548690796, | |
| "loss/dpo": 0.03513758257031441, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.110447883605957, | |
| "rewards/margins": 0.6378698348999023, | |
| "rewards/rejected": -1.7483177185058594, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.50471436494731, | |
| "grad_norm": 1.0820693969726562, | |
| "learning_rate": 2.9001703271832987e-07, | |
| "logits/chosen": -0.839047908782959, | |
| "logits/rejected": -0.7069706916809082, | |
| "logps/chosen": -418.8855895996094, | |
| "logps/rejected": -439.6836853027344, | |
| "loss": 0.0325, | |
| "loss/chosen-sft": 1.841740369796753, | |
| "loss/dpo": 0.03246615082025528, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.2718727588653564, | |
| "rewards/margins": 0.6148180365562439, | |
| "rewards/rejected": -1.8866908550262451, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5074875207986689, | |
| "grad_norm": 1.1299471855163574, | |
| "learning_rate": 2.8762533162501306e-07, | |
| "logits/chosen": -0.8962306976318359, | |
| "logits/rejected": -0.7364431619644165, | |
| "logps/chosen": -440.30419921875, | |
| "logps/rejected": -406.437744140625, | |
| "loss": 0.048, | |
| "loss/chosen-sft": 1.8074274063110352, | |
| "loss/dpo": 0.04800194129347801, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -1.4012686014175415, | |
| "rewards/margins": 0.2889956831932068, | |
| "rewards/rejected": -1.690264344215393, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5102606766500277, | |
| "grad_norm": 1.7517938613891602, | |
| "learning_rate": 2.852301018295914e-07, | |
| "logits/chosen": -1.0834687948226929, | |
| "logits/rejected": -0.7870742082595825, | |
| "logps/chosen": -392.78662109375, | |
| "logps/rejected": -423.45294189453125, | |
| "loss": 0.0458, | |
| "loss/chosen-sft": 1.6456248760223389, | |
| "loss/dpo": 0.045752983540296555, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.2369292974472046, | |
| "rewards/margins": 0.5108424425125122, | |
| "rewards/rejected": -1.7477716207504272, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5130338325013866, | |
| "grad_norm": 1.3080066442489624, | |
| "learning_rate": 2.828315679693518e-07, | |
| "logits/chosen": -0.7610379457473755, | |
| "logits/rejected": -0.6710953712463379, | |
| "logps/chosen": -434.26953125, | |
| "logps/rejected": -448.9422912597656, | |
| "loss": 0.0385, | |
| "loss/chosen-sft": 1.773113489151001, | |
| "loss/dpo": 0.03848852962255478, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.2653634548187256, | |
| "rewards/margins": 0.6251915097236633, | |
| "rewards/rejected": -1.8905551433563232, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5158069883527454, | |
| "grad_norm": 1.5302928686141968, | |
| "learning_rate": 2.80429954991454e-07, | |
| "logits/chosen": -0.8365219831466675, | |
| "logits/rejected": -0.787796139717102, | |
| "logps/chosen": -418.86383056640625, | |
| "logps/rejected": -427.78790283203125, | |
| "loss": 0.0488, | |
| "loss/chosen-sft": 1.701751470565796, | |
| "loss/dpo": 0.048776544630527496, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2631399631500244, | |
| "rewards/margins": 0.5425049066543579, | |
| "rewards/rejected": -1.8056447505950928, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5185801442041043, | |
| "grad_norm": 1.1883572340011597, | |
| "learning_rate": 2.7802548813183364e-07, | |
| "logits/chosen": -0.8014055490493774, | |
| "logits/rejected": -0.8269286155700684, | |
| "logps/chosen": -445.94378662109375, | |
| "logps/rejected": -428.35107421875, | |
| "loss": 0.0446, | |
| "loss/chosen-sft": 1.728753685951233, | |
| "loss/dpo": 0.04457592964172363, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1154412031173706, | |
| "rewards/margins": 0.6285417675971985, | |
| "rewards/rejected": -1.7439830303192139, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5213533000554631, | |
| "grad_norm": 1.7934128046035767, | |
| "learning_rate": 2.756183928940784e-07, | |
| "logits/chosen": -0.7623527646064758, | |
| "logits/rejected": -0.7455809712409973, | |
| "logps/chosen": -425.8876037597656, | |
| "logps/rejected": -401.5950927734375, | |
| "loss": 0.0398, | |
| "loss/chosen-sft": 1.7588945627212524, | |
| "loss/dpo": 0.03979960083961487, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.2177762985229492, | |
| "rewards/margins": 0.4867513179779053, | |
| "rewards/rejected": -1.704527497291565, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5241264559068219, | |
| "grad_norm": 1.373268723487854, | |
| "learning_rate": 2.7320889502827905e-07, | |
| "logits/chosen": -0.8963130712509155, | |
| "logits/rejected": -0.8474391102790833, | |
| "logps/chosen": -429.3999938964844, | |
| "logps/rejected": -427.1219177246094, | |
| "loss": 0.0446, | |
| "loss/chosen-sft": 1.7064225673675537, | |
| "loss/dpo": 0.04461617022752762, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2002841234207153, | |
| "rewards/margins": 0.6481701731681824, | |
| "rewards/rejected": -1.848454236984253, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5268996117581808, | |
| "grad_norm": 1.2301034927368164, | |
| "learning_rate": 2.707972205098576e-07, | |
| "logits/chosen": -0.9842405319213867, | |
| "logits/rejected": -1.0120937824249268, | |
| "logps/chosen": -438.3604431152344, | |
| "logps/rejected": -427.14764404296875, | |
| "loss": 0.0411, | |
| "loss/chosen-sft": 1.813296914100647, | |
| "loss/dpo": 0.041075654327869415, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.2374821901321411, | |
| "rewards/margins": 0.6461740732192993, | |
| "rewards/rejected": -1.8836562633514404, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5296727676095396, | |
| "grad_norm": 1.549978256225586, | |
| "learning_rate": 2.68383595518374e-07, | |
| "logits/chosen": -0.8700895309448242, | |
| "logits/rejected": -0.8361631631851196, | |
| "logps/chosen": -442.2728576660156, | |
| "logps/rejected": -430.29986572265625, | |
| "loss": 0.0374, | |
| "loss/chosen-sft": 1.7138087749481201, | |
| "loss/dpo": 0.03742986172437668, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.300048589706421, | |
| "rewards/margins": 0.5438514947891235, | |
| "rewards/rejected": -1.8438999652862549, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5324459234608985, | |
| "grad_norm": 1.7318216562271118, | |
| "learning_rate": 2.659682464163138e-07, | |
| "logits/chosen": -1.0281853675842285, | |
| "logits/rejected": -1.0226585865020752, | |
| "logps/chosen": -469.50238037109375, | |
| "logps/rejected": -455.428955078125, | |
| "loss": 0.0578, | |
| "loss/chosen-sft": 1.7266900539398193, | |
| "loss/dpo": 0.057835765182971954, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.2760839462280273, | |
| "rewards/margins": 0.5891796946525574, | |
| "rewards/rejected": -1.8652637004852295, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5352190793122573, | |
| "grad_norm": 1.1333941221237183, | |
| "learning_rate": 2.6355139972785885e-07, | |
| "logits/chosen": -0.9396898150444031, | |
| "logits/rejected": -0.849763035774231, | |
| "logps/chosen": -423.746337890625, | |
| "logps/rejected": -441.26837158203125, | |
| "loss": 0.0365, | |
| "loss/chosen-sft": 1.7641470432281494, | |
| "loss/dpo": 0.03645756468176842, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.1291122436523438, | |
| "rewards/margins": 0.7151139378547668, | |
| "rewards/rejected": -1.8442262411117554, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5379922351636162, | |
| "grad_norm": 1.0045897960662842, | |
| "learning_rate": 2.6113328211764235e-07, | |
| "logits/chosen": -1.0510257482528687, | |
| "logits/rejected": -1.082379698753357, | |
| "logps/chosen": -449.94561767578125, | |
| "logps/rejected": -432.62933349609375, | |
| "loss": 0.0391, | |
| "loss/chosen-sft": 1.7299797534942627, | |
| "loss/dpo": 0.039098747074604034, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.1797751188278198, | |
| "rewards/margins": 0.6614251136779785, | |
| "rewards/rejected": -1.8412002325057983, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.540765391014975, | |
| "grad_norm": 1.4864047765731812, | |
| "learning_rate": 2.5871412036949153e-07, | |
| "logits/chosen": -0.956852912902832, | |
| "logits/rejected": -0.8994203805923462, | |
| "logps/chosen": -428.5096130371094, | |
| "logps/rejected": -419.07330322265625, | |
| "loss": 0.0463, | |
| "loss/chosen-sft": 1.774070143699646, | |
| "loss/dpo": 0.04627335071563721, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.2709102630615234, | |
| "rewards/margins": 0.5661468505859375, | |
| "rewards/rejected": -1.837057113647461, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5435385468663338, | |
| "grad_norm": 1.1833720207214355, | |
| "learning_rate": 2.5629414136515825e-07, | |
| "logits/chosen": -0.7828256487846375, | |
| "logits/rejected": -0.6822995543479919, | |
| "logps/chosen": -444.83697509765625, | |
| "logps/rejected": -446.899658203125, | |
| "loss": 0.0362, | |
| "loss/chosen-sft": 1.8193594217300415, | |
| "loss/dpo": 0.03620842099189758, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2841075658798218, | |
| "rewards/margins": 0.670619547367096, | |
| "rewards/rejected": -1.9547271728515625, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5463117027176927, | |
| "grad_norm": 1.4741407632827759, | |
| "learning_rate": 2.5387357206304077e-07, | |
| "logits/chosen": -0.9925743341445923, | |
| "logits/rejected": -0.7799035310745239, | |
| "logps/chosen": -441.3125, | |
| "logps/rejected": -427.43115234375, | |
| "loss": 0.0341, | |
| "loss/chosen-sft": 1.7645461559295654, | |
| "loss/dpo": 0.034115031361579895, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.332214117050171, | |
| "rewards/margins": 0.5084002017974854, | |
| "rewards/rejected": -1.8406140804290771, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5490848585690515, | |
| "grad_norm": 2.5760786533355713, | |
| "learning_rate": 2.514526394768989e-07, | |
| "logits/chosen": -0.8460060358047485, | |
| "logits/rejected": -0.8547107577323914, | |
| "logps/chosen": -451.8817443847656, | |
| "logps/rejected": -449.7991638183594, | |
| "loss": 0.0474, | |
| "loss/chosen-sft": 1.7884471416473389, | |
| "loss/dpo": 0.04735780879855156, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.3389358520507812, | |
| "rewards/margins": 0.6204281449317932, | |
| "rewards/rejected": -1.9593639373779297, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5518580144204104, | |
| "grad_norm": 1.9366549253463745, | |
| "learning_rate": 2.490315706545631e-07, | |
| "logits/chosen": -0.8773177862167358, | |
| "logits/rejected": -0.9315664172172546, | |
| "logps/chosen": -423.4974670410156, | |
| "logps/rejected": -398.5281066894531, | |
| "loss": 0.0402, | |
| "loss/chosen-sft": 1.7923309803009033, | |
| "loss/dpo": 0.04023148491978645, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -1.3467055559158325, | |
| "rewards/margins": 0.4263473451137543, | |
| "rewards/rejected": -1.7730529308319092, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5546311702717692, | |
| "grad_norm": 1.0191140174865723, | |
| "learning_rate": 2.466105926566405e-07, | |
| "logits/chosen": -0.8884904980659485, | |
| "logits/rejected": -0.8517130017280579, | |
| "logps/chosen": -412.35595703125, | |
| "logps/rejected": -398.6607360839844, | |
| "loss": 0.0425, | |
| "loss/chosen-sft": 1.7367550134658813, | |
| "loss/dpo": 0.0425165630877018, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.207376480102539, | |
| "rewards/margins": 0.44255828857421875, | |
| "rewards/rejected": -1.6499347686767578, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5574043261231281, | |
| "grad_norm": 1.5985798835754395, | |
| "learning_rate": 2.441899325352205e-07, | |
| "logits/chosen": -0.8503344655036926, | |
| "logits/rejected": -0.7258619070053101, | |
| "logps/chosen": -405.3533630371094, | |
| "logps/rejected": -423.4427795410156, | |
| "loss": 0.038, | |
| "loss/chosen-sft": 1.7873739004135132, | |
| "loss/dpo": 0.03796621412038803, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.2806193828582764, | |
| "rewards/margins": 0.5564595460891724, | |
| "rewards/rejected": -1.8370788097381592, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5601774819744869, | |
| "grad_norm": 2.0438764095306396, | |
| "learning_rate": 2.417698173125804e-07, | |
| "logits/chosen": -0.8835655450820923, | |
| "logits/rejected": -0.8549100160598755, | |
| "logps/chosen": -443.4007263183594, | |
| "logps/rejected": -412.9955139160156, | |
| "loss": 0.0476, | |
| "loss/chosen-sft": 1.781747817993164, | |
| "loss/dpo": 0.04762732982635498, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.2374616861343384, | |
| "rewards/margins": 0.514583945274353, | |
| "rewards/rejected": -1.7520456314086914, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5629506378258459, | |
| "grad_norm": 2.029576539993286, | |
| "learning_rate": 2.393504739598938e-07, | |
| "logits/chosen": -0.8470139503479004, | |
| "logits/rejected": -0.8256238102912903, | |
| "logps/chosen": -434.9107971191406, | |
| "logps/rejected": -436.6976623535156, | |
| "loss": 0.047, | |
| "loss/chosen-sft": 1.7188364267349243, | |
| "loss/dpo": 0.04702724516391754, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2836484909057617, | |
| "rewards/margins": 0.6734278202056885, | |
| "rewards/rejected": -1.9570764303207397, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5657237936772047, | |
| "grad_norm": 1.5274903774261475, | |
| "learning_rate": 2.3693212937594436e-07, | |
| "logits/chosen": -0.6356366872787476, | |
| "logits/rejected": -0.5753142237663269, | |
| "logps/chosen": -411.853271484375, | |
| "logps/rejected": -436.2920837402344, | |
| "loss": 0.0407, | |
| "loss/chosen-sft": 1.8935306072235107, | |
| "loss/dpo": 0.040696293115615845, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.258502721786499, | |
| "rewards/margins": 0.4864071011543274, | |
| "rewards/rejected": -1.7449098825454712, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5684969495285636, | |
| "grad_norm": 1.1039131879806519, | |
| "learning_rate": 2.3451501036584604e-07, | |
| "logits/chosen": -0.7690117359161377, | |
| "logits/rejected": -0.7704351544380188, | |
| "logps/chosen": -429.619140625, | |
| "logps/rejected": -413.0948791503906, | |
| "loss": 0.0304, | |
| "loss/chosen-sft": 1.7215017080307007, | |
| "loss/dpo": 0.030385727062821388, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.2474838495254517, | |
| "rewards/margins": 0.6526226997375488, | |
| "rewards/rejected": -1.900106430053711, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5712701053799224, | |
| "grad_norm": 2.5751590728759766, | |
| "learning_rate": 2.3209934361977194e-07, | |
| "logits/chosen": -0.9362251162528992, | |
| "logits/rejected": -0.8153011202812195, | |
| "logps/chosen": -431.95391845703125, | |
| "logps/rejected": -430.9054260253906, | |
| "loss": 0.0599, | |
| "loss/chosen-sft": 1.7196757793426514, | |
| "loss/dpo": 0.05986684560775757, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.2645565271377563, | |
| "rewards/margins": 0.46636122465133667, | |
| "rewards/rejected": -1.7309175729751587, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5740432612312812, | |
| "grad_norm": 1.936566710472107, | |
| "learning_rate": 2.296853556916941e-07, | |
| "logits/chosen": -0.6564992070198059, | |
| "logits/rejected": -0.532072126865387, | |
| "logps/chosen": -406.83099365234375, | |
| "logps/rejected": -410.1024475097656, | |
| "loss": 0.0359, | |
| "loss/chosen-sft": 1.8352649211883545, | |
| "loss/dpo": 0.03590982407331467, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.2317606210708618, | |
| "rewards/margins": 0.5972455739974976, | |
| "rewards/rejected": -1.8290061950683594, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5768164170826401, | |
| "grad_norm": 1.4029645919799805, | |
| "learning_rate": 2.2727327297813613e-07, | |
| "logits/chosen": -0.9582229852676392, | |
| "logits/rejected": -0.9992238283157349, | |
| "logps/chosen": -444.89752197265625, | |
| "logps/rejected": -424.8453674316406, | |
| "loss": 0.0355, | |
| "loss/chosen-sft": 1.804992914199829, | |
| "loss/dpo": 0.03546611964702606, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.2539708614349365, | |
| "rewards/margins": 0.5192979574203491, | |
| "rewards/rejected": -1.7732689380645752, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5795895729339989, | |
| "grad_norm": 1.8364492654800415, | |
| "learning_rate": 2.2486332169694095e-07, | |
| "logits/chosen": -0.7399333715438843, | |
| "logits/rejected": -0.7711300253868103, | |
| "logps/chosen": -425.58721923828125, | |
| "logps/rejected": -407.39349365234375, | |
| "loss": 0.0461, | |
| "loss/chosen-sft": 1.7470451593399048, | |
| "loss/dpo": 0.046109430491924286, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.3421345949172974, | |
| "rewards/margins": 0.5037655830383301, | |
| "rewards/rejected": -1.8459001779556274, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5823627287853578, | |
| "grad_norm": 1.4886951446533203, | |
| "learning_rate": 2.224557278660539e-07, | |
| "logits/chosen": -0.7063448429107666, | |
| "logits/rejected": -0.7334424257278442, | |
| "logps/chosen": -429.91693115234375, | |
| "logps/rejected": -450.59893798828125, | |
| "loss": 0.0326, | |
| "loss/chosen-sft": 1.7843490839004517, | |
| "loss/dpo": 0.03259655088186264, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.2502191066741943, | |
| "rewards/margins": 0.8139023780822754, | |
| "rewards/rejected": -2.064121723175049, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5851358846367166, | |
| "grad_norm": 1.7785381078720093, | |
| "learning_rate": 2.200507172823268e-07, | |
| "logits/chosen": -0.822382926940918, | |
| "logits/rejected": -0.698542058467865, | |
| "logps/chosen": -440.42431640625, | |
| "logps/rejected": -438.05169677734375, | |
| "loss": 0.0392, | |
| "loss/chosen-sft": 1.7739006280899048, | |
| "loss/dpo": 0.0392305888235569, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.3763277530670166, | |
| "rewards/margins": 0.5221567153930664, | |
| "rewards/rejected": -1.8984845876693726, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.5879090404880755, | |
| "grad_norm": 1.6011799573898315, | |
| "learning_rate": 2.176485155003405e-07, | |
| "logits/chosen": -0.7036235928535461, | |
| "logits/rejected": -0.6139532327651978, | |
| "logps/chosen": -427.38519287109375, | |
| "logps/rejected": -418.2265625, | |
| "loss": 0.0372, | |
| "loss/chosen-sft": 1.8186620473861694, | |
| "loss/dpo": 0.03721202537417412, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.3542938232421875, | |
| "rewards/margins": 0.4948766827583313, | |
| "rewards/rejected": -1.8491706848144531, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5906821963394343, | |
| "grad_norm": 1.0207229852676392, | |
| "learning_rate": 2.1524934781125164e-07, | |
| "logits/chosen": -0.977032482624054, | |
| "logits/rejected": -0.9711772799491882, | |
| "logps/chosen": -445.39263916015625, | |
| "logps/rejected": -431.14007568359375, | |
| "loss": 0.0468, | |
| "loss/chosen-sft": 1.7722899913787842, | |
| "loss/dpo": 0.04676477983593941, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.318066120147705, | |
| "rewards/margins": 0.6112589836120605, | |
| "rewards/rejected": -1.9293251037597656, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.5934553521907932, | |
| "grad_norm": 1.426311731338501, | |
| "learning_rate": 2.1285343922166393e-07, | |
| "logits/chosen": -0.8198713064193726, | |
| "logits/rejected": -0.7067451477050781, | |
| "logps/chosen": -472.1852111816406, | |
| "logps/rejected": -444.33935546875, | |
| "loss": 0.0446, | |
| "loss/chosen-sft": 1.7742096185684204, | |
| "loss/dpo": 0.04458652064204216, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.3653810024261475, | |
| "rewards/margins": 0.4912968575954437, | |
| "rewards/rejected": -1.8566780090332031, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.596228508042152, | |
| "grad_norm": 1.9216793775558472, | |
| "learning_rate": 2.104610144325252e-07, | |
| "logits/chosen": -0.9030396342277527, | |
| "logits/rejected": -0.9881137609481812, | |
| "logps/chosen": -442.3429260253906, | |
| "logps/rejected": -423.65838623046875, | |
| "loss": 0.0469, | |
| "loss/chosen-sft": 1.7527765035629272, | |
| "loss/dpo": 0.0468582846224308, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.2682268619537354, | |
| "rewards/margins": 0.5394538640975952, | |
| "rewards/rejected": -1.8076804876327515, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.5990016638935108, | |
| "grad_norm": 1.1101206541061401, | |
| "learning_rate": 2.0807229781805415e-07, | |
| "logits/chosen": -0.9745734930038452, | |
| "logits/rejected": -0.7166577577590942, | |
| "logps/chosen": -407.92266845703125, | |
| "logps/rejected": -429.6647033691406, | |
| "loss": 0.0472, | |
| "loss/chosen-sft": 1.707130789756775, | |
| "loss/dpo": 0.04721622169017792, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.232958197593689, | |
| "rewards/margins": 0.6351754069328308, | |
| "rewards/rejected": -1.8681339025497437, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6017748197448697, | |
| "grad_norm": 0.9766126871109009, | |
| "learning_rate": 2.056875134046976e-07, | |
| "logits/chosen": -0.772416889667511, | |
| "logits/rejected": -0.6425715684890747, | |
| "logps/chosen": -451.41204833984375, | |
| "logps/rejected": -415.15338134765625, | |
| "loss": 0.0364, | |
| "loss/chosen-sft": 1.8030576705932617, | |
| "loss/dpo": 0.036396466195583344, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.2050635814666748, | |
| "rewards/margins": 0.5893860459327698, | |
| "rewards/rejected": -1.7944495677947998, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6045479755962285, | |
| "grad_norm": 1.6555626392364502, | |
| "learning_rate": 2.0330688485011926e-07, | |
| "logits/chosen": -0.8580873608589172, | |
| "logits/rejected": -0.8450067639350891, | |
| "logps/chosen": -426.8406677246094, | |
| "logps/rejected": -436.485595703125, | |
| "loss": 0.0461, | |
| "loss/chosen-sft": 1.7769439220428467, | |
| "loss/dpo": 0.04613155499100685, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.2692389488220215, | |
| "rewards/margins": 0.5166956186294556, | |
| "rewards/rejected": -1.7859344482421875, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6073211314475874, | |
| "grad_norm": 1.8412526845932007, | |
| "learning_rate": 2.0093063542222508e-07, | |
| "logits/chosen": -0.8168280720710754, | |
| "logits/rejected": -0.662460446357727, | |
| "logps/chosen": -419.52154541015625, | |
| "logps/rejected": -474.30926513671875, | |
| "loss": 0.037, | |
| "loss/chosen-sft": 1.7386878728866577, | |
| "loss/dpo": 0.03695227950811386, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.2162078619003296, | |
| "rewards/margins": 0.7796363830566406, | |
| "rewards/rejected": -1.9958442449569702, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.6100942872989462, | |
| "grad_norm": 0.9794536232948303, | |
| "learning_rate": 1.9855898797822295e-07, | |
| "logits/chosen": -0.6690505743026733, | |
| "logits/rejected": -0.6739727258682251, | |
| "logps/chosen": -416.6888732910156, | |
| "logps/rejected": -418.7158203125, | |
| "loss": 0.0298, | |
| "loss/chosen-sft": 1.8950073719024658, | |
| "loss/dpo": 0.029840370640158653, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.3056199550628662, | |
| "rewards/margins": 0.519034743309021, | |
| "rewards/rejected": -1.8246548175811768, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6128674431503051, | |
| "grad_norm": 1.5625451803207397, | |
| "learning_rate": 1.9619216494372258e-07, | |
| "logits/chosen": -0.8960970044136047, | |
| "logits/rejected": -0.851621150970459, | |
| "logps/chosen": -416.8877868652344, | |
| "logps/rejected": -431.9337463378906, | |
| "loss": 0.0397, | |
| "loss/chosen-sft": 1.755854606628418, | |
| "loss/dpo": 0.039700526744127274, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.3231550455093384, | |
| "rewards/margins": 0.486478716135025, | |
| "rewards/rejected": -1.8096338510513306, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.6156405990016639, | |
| "grad_norm": 0.9809625744819641, | |
| "learning_rate": 1.9383038829187523e-07, | |
| "logits/chosen": -0.8659313321113586, | |
| "logits/rejected": -0.7378997802734375, | |
| "logps/chosen": -466.25006103515625, | |
| "logps/rejected": -437.8448181152344, | |
| "loss": 0.0338, | |
| "loss/chosen-sft": 1.7783132791519165, | |
| "loss/dpo": 0.03376453369855881, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.262998342514038, | |
| "rewards/margins": 0.7884011268615723, | |
| "rewards/rejected": -2.0513997077941895, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6184137548530227, | |
| "grad_norm": 1.4257228374481201, | |
| "learning_rate": 1.914738795225556e-07, | |
| "logits/chosen": -0.8100174069404602, | |
| "logits/rejected": -0.9278467893600464, | |
| "logps/chosen": -423.9007263183594, | |
| "logps/rejected": -412.9117126464844, | |
| "loss": 0.0436, | |
| "loss/chosen-sft": 1.7985398769378662, | |
| "loss/dpo": 0.04363011568784714, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.3259937763214111, | |
| "rewards/margins": 0.5338584184646606, | |
| "rewards/rejected": -1.8598521947860718, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.6211869107043816, | |
| "grad_norm": 1.4828506708145142, | |
| "learning_rate": 1.8912285964158856e-07, | |
| "logits/chosen": -0.8464866876602173, | |
| "logits/rejected": -0.8284673690795898, | |
| "logps/chosen": -445.93475341796875, | |
| "logps/rejected": -457.19384765625, | |
| "loss": 0.0344, | |
| "loss/chosen-sft": 1.7983877658843994, | |
| "loss/dpo": 0.03439199924468994, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.3726943731307983, | |
| "rewards/margins": 0.6521696448326111, | |
| "rewards/rejected": -2.0248641967773438, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6239600665557404, | |
| "grad_norm": 1.2299528121948242, | |
| "learning_rate": 1.8677754914002231e-07, | |
| "logits/chosen": -0.9932361841201782, | |
| "logits/rejected": -1.02671480178833, | |
| "logps/chosen": -434.72509765625, | |
| "logps/rejected": -444.66766357421875, | |
| "loss": 0.0389, | |
| "loss/chosen-sft": 1.8104565143585205, | |
| "loss/dpo": 0.038856539875268936, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.2957760095596313, | |
| "rewards/margins": 0.6685920357704163, | |
| "rewards/rejected": -1.9643681049346924, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6267332224070993, | |
| "grad_norm": 1.7034136056900024, | |
| "learning_rate": 1.8443816797344896e-07, | |
| "logits/chosen": -0.8144285082817078, | |
| "logits/rejected": -0.6793054342269897, | |
| "logps/chosen": -461.71258544921875, | |
| "logps/rejected": -455.9671325683594, | |
| "loss": 0.0474, | |
| "loss/chosen-sft": 1.8361161947250366, | |
| "loss/dpo": 0.047358639538288116, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.4464315176010132, | |
| "rewards/margins": 0.4728009104728699, | |
| "rewards/rejected": -1.9192323684692383, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6295063782584581, | |
| "grad_norm": 1.4374454021453857, | |
| "learning_rate": 1.821049355413767e-07, | |
| "logits/chosen": -0.8650090098381042, | |
| "logits/rejected": -0.6468401551246643, | |
| "logps/chosen": -443.62274169921875, | |
| "logps/rejected": -443.31427001953125, | |
| "loss": 0.0332, | |
| "loss/chosen-sft": 1.8839133977890015, | |
| "loss/dpo": 0.03318975493311882, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.5003621578216553, | |
| "rewards/margins": 0.500784158706665, | |
| "rewards/rejected": -2.0011465549468994, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.632279534109817, | |
| "grad_norm": 1.7502909898757935, | |
| "learning_rate": 1.7977807066665267e-07, | |
| "logits/chosen": -0.9573124647140503, | |
| "logits/rejected": -0.8072667121887207, | |
| "logps/chosen": -418.649658203125, | |
| "logps/rejected": -418.47589111328125, | |
| "loss": 0.0452, | |
| "loss/chosen-sft": 1.7528860569000244, | |
| "loss/dpo": 0.045168764889240265, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.3320482969284058, | |
| "rewards/margins": 0.5464748740196228, | |
| "rewards/rejected": -1.8785232305526733, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6350526899611758, | |
| "grad_norm": 1.0295249223709106, | |
| "learning_rate": 1.7745779157494096e-07, | |
| "logits/chosen": -0.8213936686515808, | |
| "logits/rejected": -0.7543411254882812, | |
| "logps/chosen": -437.8804626464844, | |
| "logps/rejected": -436.56982421875, | |
| "loss": 0.0303, | |
| "loss/chosen-sft": 1.7841157913208008, | |
| "loss/dpo": 0.030281806364655495, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.393178939819336, | |
| "rewards/margins": 0.679972231388092, | |
| "rewards/rejected": -2.073151111602783, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6378258458125347, | |
| "grad_norm": 1.2215420007705688, | |
| "learning_rate": 1.7514431587425622e-07, | |
| "logits/chosen": -0.7637904286384583, | |
| "logits/rejected": -0.8227758407592773, | |
| "logps/chosen": -429.562255859375, | |
| "logps/rejected": -432.89971923828125, | |
| "loss": 0.0361, | |
| "loss/chosen-sft": 1.8592332601547241, | |
| "loss/dpo": 0.03613410145044327, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4023336172103882, | |
| "rewards/margins": 0.5858246684074402, | |
| "rewards/rejected": -1.9881582260131836, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6405990016638935, | |
| "grad_norm": 1.6888597011566162, | |
| "learning_rate": 1.728378605345553e-07, | |
| "logits/chosen": -0.7143627405166626, | |
| "logits/rejected": -0.7377561330795288, | |
| "logps/chosen": -465.07403564453125, | |
| "logps/rejected": -435.6494140625, | |
| "loss": 0.0425, | |
| "loss/chosen-sft": 1.8083655834197998, | |
| "loss/dpo": 0.04253358393907547, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.460288166999817, | |
| "rewards/margins": 0.5263352990150452, | |
| "rewards/rejected": -1.9866234064102173, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6433721575152523, | |
| "grad_norm": 1.5432595014572144, | |
| "learning_rate": 1.705386418673882e-07, | |
| "logits/chosen": -0.8244765996932983, | |
| "logits/rejected": -0.7667987942695618, | |
| "logps/chosen": -437.60687255859375, | |
| "logps/rejected": -436.830078125, | |
| "loss": 0.0467, | |
| "loss/chosen-sft": 1.7204382419586182, | |
| "loss/dpo": 0.04673684015870094, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.34583580493927, | |
| "rewards/margins": 0.5071083903312683, | |
| "rewards/rejected": -1.8529441356658936, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6461453133666112, | |
| "grad_norm": 1.2096738815307617, | |
| "learning_rate": 1.6824687550561208e-07, | |
| "logits/chosen": -0.7485244274139404, | |
| "logits/rejected": -0.7693239450454712, | |
| "logps/chosen": -464.8460388183594, | |
| "logps/rejected": -450.24713134765625, | |
| "loss": 0.0358, | |
| "loss/chosen-sft": 1.8007644414901733, | |
| "loss/dpo": 0.03579232841730118, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.335431694984436, | |
| "rewards/margins": 0.6632755994796753, | |
| "rewards/rejected": -1.9987071752548218, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.64891846921797, | |
| "grad_norm": 1.1814310550689697, | |
| "learning_rate": 1.659627763831671e-07, | |
| "logits/chosen": -0.7120985984802246, | |
| "logits/rejected": -0.6295477747917175, | |
| "logps/chosen": -436.17083740234375, | |
| "logps/rejected": -424.6572265625, | |
| "loss": 0.0357, | |
| "loss/chosen-sft": 1.8579403162002563, | |
| "loss/dpo": 0.035725630819797516, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.285359263420105, | |
| "rewards/margins": 0.5941758155822754, | |
| "rewards/rejected": -1.8795350790023804, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6516916250693289, | |
| "grad_norm": 1.1809978485107422, | |
| "learning_rate": 1.6368655871491975e-07, | |
| "logits/chosen": -0.7668045163154602, | |
| "logits/rejected": -0.7772132754325867, | |
| "logps/chosen": -462.0221252441406, | |
| "logps/rejected": -435.96954345703125, | |
| "loss": 0.0424, | |
| "loss/chosen-sft": 1.831925630569458, | |
| "loss/dpo": 0.04238344356417656, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.3251338005065918, | |
| "rewards/margins": 0.5784658193588257, | |
| "rewards/rejected": -1.903599500656128, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6544647809206877, | |
| "grad_norm": 1.225167155265808, | |
| "learning_rate": 1.6141843597657172e-07, | |
| "logits/chosen": -0.8141440153121948, | |
| "logits/rejected": -0.7542480230331421, | |
| "logps/chosen": -421.45635986328125, | |
| "logps/rejected": -441.95025634765625, | |
| "loss": 0.0411, | |
| "loss/chosen-sft": 1.7617318630218506, | |
| "loss/dpo": 0.04108492285013199, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.3422725200653076, | |
| "rewards/margins": 0.5261791944503784, | |
| "rewards/rejected": -1.8684518337249756, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6572379367720466, | |
| "grad_norm": 1.3029569387435913, | |
| "learning_rate": 1.5915862088463968e-07, | |
| "logits/chosen": -0.996296226978302, | |
| "logits/rejected": -0.9346402287483215, | |
| "logps/chosen": -446.56622314453125, | |
| "logps/rejected": -449.5144958496094, | |
| "loss": 0.0419, | |
| "loss/chosen-sft": 1.6928361654281616, | |
| "loss/dpo": 0.041938044130802155, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.3604621887207031, | |
| "rewards/margins": 0.7254354357719421, | |
| "rewards/rejected": -2.085897922515869, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6600110926234054, | |
| "grad_norm": 1.26889967918396, | |
| "learning_rate": 1.5690732537650546e-07, | |
| "logits/chosen": -0.7706629037857056, | |
| "logits/rejected": -0.7561334371566772, | |
| "logps/chosen": -435.36602783203125, | |
| "logps/rejected": -470.29901123046875, | |
| "loss": 0.0294, | |
| "loss/chosen-sft": 1.8877112865447998, | |
| "loss/dpo": 0.029442256316542625, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -1.4551079273223877, | |
| "rewards/margins": 0.6810146570205688, | |
| "rewards/rejected": -2.136122703552246, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6627842484747642, | |
| "grad_norm": 1.2422409057617188, | |
| "learning_rate": 1.546647605905393e-07, | |
| "logits/chosen": -0.8117157816886902, | |
| "logits/rejected": -0.7619314193725586, | |
| "logps/chosen": -432.6480407714844, | |
| "logps/rejected": -441.0057678222656, | |
| "loss": 0.0352, | |
| "loss/chosen-sft": 1.8744285106658936, | |
| "loss/dpo": 0.035161614418029785, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.305821180343628, | |
| "rewards/margins": 0.5970739722251892, | |
| "rewards/rejected": -1.9028953313827515, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6655574043261231, | |
| "grad_norm": 0.8602780699729919, | |
| "learning_rate": 1.52431136846298e-07, | |
| "logits/chosen": -0.9312012791633606, | |
| "logits/rejected": -0.7855014204978943, | |
| "logps/chosen": -424.40911865234375, | |
| "logps/rejected": -425.8111877441406, | |
| "loss": 0.0254, | |
| "loss/chosen-sft": 1.8612302541732788, | |
| "loss/dpo": 0.025435030460357666, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.3491990566253662, | |
| "rewards/margins": 0.6118738055229187, | |
| "rewards/rejected": -1.9610726833343506, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6683305601774819, | |
| "grad_norm": 1.010581135749817, | |
| "learning_rate": 1.5020666362480084e-07, | |
| "logits/chosen": -0.8483907580375671, | |
| "logits/rejected": -0.7495467066764832, | |
| "logps/chosen": -495.5743103027344, | |
| "logps/rejected": -482.33209228515625, | |
| "loss": 0.0387, | |
| "loss/chosen-sft": 1.8878498077392578, | |
| "loss/dpo": 0.03866366669535637, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.4326591491699219, | |
| "rewards/margins": 0.7254186868667603, | |
| "rewards/rejected": -2.1580777168273926, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6711037160288408, | |
| "grad_norm": 1.6181249618530273, | |
| "learning_rate": 1.4799154954888222e-07, | |
| "logits/chosen": -1.0427324771881104, | |
| "logits/rejected": -0.809046745300293, | |
| "logps/chosen": -415.38909912109375, | |
| "logps/rejected": -420.14453125, | |
| "loss": 0.0316, | |
| "loss/chosen-sft": 1.8586212396621704, | |
| "loss/dpo": 0.031649235635995865, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.452163577079773, | |
| "rewards/margins": 0.537935197353363, | |
| "rewards/rejected": -1.9900987148284912, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6738768718801996, | |
| "grad_norm": 0.7974721789360046, | |
| "learning_rate": 1.4578600236362697e-07, | |
| "logits/chosen": -0.8521813154220581, | |
| "logits/rejected": -0.6828973889350891, | |
| "logps/chosen": -456.5556640625, | |
| "logps/rejected": -470.51971435546875, | |
| "loss": 0.0311, | |
| "loss/chosen-sft": 1.945521593093872, | |
| "loss/dpo": 0.03109545074403286, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.4207074642181396, | |
| "rewards/margins": 0.7211933732032776, | |
| "rewards/rejected": -2.1419005393981934, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6766500277315585, | |
| "grad_norm": 1.4059048891067505, | |
| "learning_rate": 1.435902289168861e-07, | |
| "logits/chosen": -0.9025594592094421, | |
| "logits/rejected": -0.835253894329071, | |
| "logps/chosen": -480.26593017578125, | |
| "logps/rejected": -467.8526306152344, | |
| "loss": 0.0337, | |
| "loss/chosen-sft": 1.9281394481658936, | |
| "loss/dpo": 0.033659420907497406, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.5400809049606323, | |
| "rewards/margins": 0.5880559086799622, | |
| "rewards/rejected": -2.1281368732452393, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6794231835829173, | |
| "grad_norm": 1.8963831663131714, | |
| "learning_rate": 1.4140443513987807e-07, | |
| "logits/chosen": -1.0080327987670898, | |
| "logits/rejected": -0.9464845657348633, | |
| "logps/chosen": -455.85968017578125, | |
| "logps/rejected": -438.92840576171875, | |
| "loss": 0.0337, | |
| "loss/chosen-sft": 1.840531349182129, | |
| "loss/dpo": 0.033664338290691376, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.5052850246429443, | |
| "rewards/margins": 0.6105053424835205, | |
| "rewards/rejected": -2.115790843963623, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6821963394342762, | |
| "grad_norm": 1.1899809837341309, | |
| "learning_rate": 1.3922882602787523e-07, | |
| "logits/chosen": -0.7441499829292297, | |
| "logits/rejected": -0.7358977198600769, | |
| "logps/chosen": -479.97064208984375, | |
| "logps/rejected": -476.0712890625, | |
| "loss": 0.0317, | |
| "loss/chosen-sft": 1.9372377395629883, | |
| "loss/dpo": 0.03166855126619339, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.5703338384628296, | |
| "rewards/margins": 0.6448060870170593, | |
| "rewards/rejected": -2.2151401042938232, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.684969495285635, | |
| "grad_norm": 1.3064395189285278, | |
| "learning_rate": 1.3706360562097797e-07, | |
| "logits/chosen": -0.9031554460525513, | |
| "logits/rejected": -0.8757888674736023, | |
| "logps/chosen": -471.3316345214844, | |
| "logps/rejected": -446.17120361328125, | |
| "loss": 0.0316, | |
| "loss/chosen-sft": 1.8330347537994385, | |
| "loss/dpo": 0.031595904380083084, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.457507848739624, | |
| "rewards/margins": 0.67474764585495, | |
| "rewards/rejected": -2.1322555541992188, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.687742651136994, | |
| "grad_norm": 1.1494730710983276, | |
| "learning_rate": 1.3490897698497983e-07, | |
| "logits/chosen": -0.9943927526473999, | |
| "logits/rejected": -0.9441978335380554, | |
| "logps/chosen": -467.54266357421875, | |
| "logps/rejected": -473.6890563964844, | |
| "loss": 0.0293, | |
| "loss/chosen-sft": 1.9200853109359741, | |
| "loss/dpo": 0.02934952639043331, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.563489556312561, | |
| "rewards/margins": 0.7292519211769104, | |
| "rewards/rejected": -2.292741298675537, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6905158069883528, | |
| "grad_norm": 1.2559505701065063, | |
| "learning_rate": 1.3276514219232142e-07, | |
| "logits/chosen": -0.6624242067337036, | |
| "logits/rejected": -0.8091727495193481, | |
| "logps/chosen": -471.83941650390625, | |
| "logps/rejected": -457.29510498046875, | |
| "loss": 0.0342, | |
| "loss/chosen-sft": 1.9265739917755127, | |
| "loss/dpo": 0.034207794815301895, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.4902698993682861, | |
| "rewards/margins": 0.6235011219978333, | |
| "rewards/rejected": -2.1137709617614746, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.6932889628397116, | |
| "grad_norm": 1.1430951356887817, | |
| "learning_rate": 1.3063230230314027e-07, | |
| "logits/chosen": -0.9455582499504089, | |
| "logits/rejected": -0.8169571757316589, | |
| "logps/chosen": -444.04193115234375, | |
| "logps/rejected": -469.19354248046875, | |
| "loss": 0.0347, | |
| "loss/chosen-sft": 1.893571138381958, | |
| "loss/dpo": 0.034717872738838196, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.5202562808990479, | |
| "rewards/margins": 0.6696333289146423, | |
| "rewards/rejected": -2.189889669418335, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6960621186910705, | |
| "grad_norm": 1.2610682249069214, | |
| "learning_rate": 1.2851065734641364e-07, | |
| "logits/chosen": -0.9905312657356262, | |
| "logits/rejected": -0.8740830421447754, | |
| "logps/chosen": -422.3661193847656, | |
| "logps/rejected": -433.2808532714844, | |
| "loss": 0.0288, | |
| "loss/chosen-sft": 1.7975590229034424, | |
| "loss/dpo": 0.028823431581258774, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.442055583000183, | |
| "rewards/margins": 0.5924633741378784, | |
| "rewards/rejected": -2.0345191955566406, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.6988352745424293, | |
| "grad_norm": 0.9623591303825378, | |
| "learning_rate": 1.2640040630119916e-07, | |
| "logits/chosen": -0.7741963267326355, | |
| "logits/rejected": -0.8283836245536804, | |
| "logps/chosen": -455.494140625, | |
| "logps/rejected": -467.2837829589844, | |
| "loss": 0.0324, | |
| "loss/chosen-sft": 2.0400707721710205, | |
| "loss/dpo": 0.0323946438729763, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.560781717300415, | |
| "rewards/margins": 0.5312785506248474, | |
| "rewards/rejected": -2.0920603275299072, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7016084303937882, | |
| "grad_norm": 1.337944746017456, | |
| "learning_rate": 1.243017470779729e-07, | |
| "logits/chosen": -1.0406231880187988, | |
| "logits/rejected": -0.8966327905654907, | |
| "logps/chosen": -426.70361328125, | |
| "logps/rejected": -453.76031494140625, | |
| "loss": 0.0359, | |
| "loss/chosen-sft": 1.8434947729110718, | |
| "loss/dpo": 0.03585369139909744, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4888029098510742, | |
| "rewards/margins": 0.7223333120346069, | |
| "rewards/rejected": -2.2111363410949707, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.704381586245147, | |
| "grad_norm": 0.8494482040405273, | |
| "learning_rate": 1.222148765000694e-07, | |
| "logits/chosen": -0.8672950863838196, | |
| "logits/rejected": -0.8264138102531433, | |
| "logps/chosen": -444.71246337890625, | |
| "logps/rejected": -443.8768615722656, | |
| "loss": 0.0316, | |
| "loss/chosen-sft": 1.826744794845581, | |
| "loss/dpo": 0.03159435838460922, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.518831491470337, | |
| "rewards/margins": 0.6539059281349182, | |
| "rewards/rejected": -2.1727373600006104, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7071547420965059, | |
| "grad_norm": 1.5342941284179688, | |
| "learning_rate": 1.2013999028522104e-07, | |
| "logits/chosen": -0.9066953659057617, | |
| "logits/rejected": -0.9141793251037598, | |
| "logps/chosen": -475.5211486816406, | |
| "logps/rejected": -463.80810546875, | |
| "loss": 0.0517, | |
| "loss/chosen-sft": 1.7181308269500732, | |
| "loss/dpo": 0.05173926800489426, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.5922820568084717, | |
| "rewards/margins": 0.48251956701278687, | |
| "rewards/rejected": -2.0748016834259033, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.7099278979478647, | |
| "grad_norm": 1.1075633764266968, | |
| "learning_rate": 1.1807728302720418e-07, | |
| "logits/chosen": -0.8835026025772095, | |
| "logits/rejected": -0.8582647442817688, | |
| "logps/chosen": -450.48870849609375, | |
| "logps/rejected": -440.99871826171875, | |
| "loss": 0.0319, | |
| "loss/chosen-sft": 1.9053627252578735, | |
| "loss/dpo": 0.03186877816915512, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.4612672328948975, | |
| "rewards/margins": 0.6721949577331543, | |
| "rewards/rejected": -2.133462429046631, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7127010537992235, | |
| "grad_norm": 2.3948493003845215, | |
| "learning_rate": 1.1602694817758773e-07, | |
| "logits/chosen": -0.9616036415100098, | |
| "logits/rejected": -0.9405485987663269, | |
| "logps/chosen": -455.7767639160156, | |
| "logps/rejected": -471.544677734375, | |
| "loss": 0.0506, | |
| "loss/chosen-sft": 1.7842986583709717, | |
| "loss/dpo": 0.0505877360701561, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4995403289794922, | |
| "rewards/margins": 0.608048141002655, | |
| "rewards/rejected": -2.107588291168213, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.7154742096505824, | |
| "grad_norm": 1.8153194189071655, | |
| "learning_rate": 1.139891780275912e-07, | |
| "logits/chosen": -1.0241485834121704, | |
| "logits/rejected": -0.897274374961853, | |
| "logps/chosen": -420.4510803222656, | |
| "logps/rejected": -436.85003662109375, | |
| "loss": 0.0345, | |
| "loss/chosen-sft": 1.7239220142364502, | |
| "loss/dpo": 0.034504033625125885, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.3688867092132568, | |
| "rewards/margins": 0.6113638877868652, | |
| "rewards/rejected": -1.980250597000122, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7182473655019412, | |
| "grad_norm": 1.2911590337753296, | |
| "learning_rate": 1.119641636900502e-07, | |
| "logits/chosen": -0.9305152893066406, | |
| "logits/rejected": -0.9541549682617188, | |
| "logps/chosen": -439.82513427734375, | |
| "logps/rejected": -432.65167236328125, | |
| "loss": 0.0283, | |
| "loss/chosen-sft": 1.806671142578125, | |
| "loss/dpo": 0.02832832559943199, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.3666812181472778, | |
| "rewards/margins": 0.6614538431167603, | |
| "rewards/rejected": -2.028134822845459, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.7210205213533001, | |
| "grad_norm": 1.451220989227295, | |
| "learning_rate": 1.0995209508149306e-07, | |
| "logits/chosen": -0.9968698620796204, | |
| "logits/rejected": -0.9646986722946167, | |
| "logps/chosen": -460.3720703125, | |
| "logps/rejected": -457.362548828125, | |
| "loss": 0.0421, | |
| "loss/chosen-sft": 1.8072360754013062, | |
| "loss/dpo": 0.04210533946752548, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.4575574398040771, | |
| "rewards/margins": 0.5370498895645142, | |
| "rewards/rejected": -1.9946073293685913, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7237936772046589, | |
| "grad_norm": 1.028705358505249, | |
| "learning_rate": 1.0795316090432893e-07, | |
| "logits/chosen": -0.7744545936584473, | |
| "logits/rejected": -0.6203697323799133, | |
| "logps/chosen": -474.93524169921875, | |
| "logps/rejected": -458.9364318847656, | |
| "loss": 0.0387, | |
| "loss/chosen-sft": 1.8132940530776978, | |
| "loss/dpo": 0.03869001194834709, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.3626388311386108, | |
| "rewards/margins": 0.6128473281860352, | |
| "rewards/rejected": -1.975486397743225, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.7265668330560178, | |
| "grad_norm": 1.3705142736434937, | |
| "learning_rate": 1.0596754862915136e-07, | |
| "logits/chosen": -1.0295013189315796, | |
| "logits/rejected": -0.9204280972480774, | |
| "logps/chosen": -434.82366943359375, | |
| "logps/rejected": -449.77423095703125, | |
| "loss": 0.0357, | |
| "loss/chosen-sft": 1.7468935251235962, | |
| "loss/dpo": 0.035688553005456924, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2829020023345947, | |
| "rewards/margins": 0.707922101020813, | |
| "rewards/rejected": -1.9908241033554077, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7293399889073766, | |
| "grad_norm": 0.8645944595336914, | |
| "learning_rate": 1.0399544447715494e-07, | |
| "logits/chosen": -0.797434389591217, | |
| "logits/rejected": -0.6160916090011597, | |
| "logps/chosen": -464.4483947753906, | |
| "logps/rejected": -458.0086364746094, | |
| "loss": 0.0418, | |
| "loss/chosen-sft": 1.9083738327026367, | |
| "loss/dpo": 0.04177068918943405, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5665171146392822, | |
| "rewards/margins": 0.468144029378891, | |
| "rewards/rejected": -2.034661054611206, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.7321131447587355, | |
| "grad_norm": 1.3339563608169556, | |
| "learning_rate": 1.0203703340267192e-07, | |
| "logits/chosen": -0.8840950727462769, | |
| "logits/rejected": -0.8916382789611816, | |
| "logps/chosen": -446.3101501464844, | |
| "logps/rejected": -463.23077392578125, | |
| "loss": 0.0404, | |
| "loss/chosen-sft": 1.7909902334213257, | |
| "loss/dpo": 0.0403718575835228, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.371553659439087, | |
| "rewards/margins": 0.7114600539207458, | |
| "rewards/rejected": -2.0830137729644775, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7348863006100943, | |
| "grad_norm": 1.4580516815185547, | |
| "learning_rate": 1.0009249907582485e-07, | |
| "logits/chosen": -1.012226939201355, | |
| "logits/rejected": -0.7340652346611023, | |
| "logps/chosen": -443.7181701660156, | |
| "logps/rejected": -456.22320556640625, | |
| "loss": 0.0308, | |
| "loss/chosen-sft": 1.804347276687622, | |
| "loss/dpo": 0.030787784606218338, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.5312182903289795, | |
| "rewards/margins": 0.6057604551315308, | |
| "rewards/rejected": -2.1369788646698, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7376594564614531, | |
| "grad_norm": 0.9017294645309448, | |
| "learning_rate": 9.816202386530199e-08, | |
| "logits/chosen": -0.8607552647590637, | |
| "logits/rejected": -0.8176922798156738, | |
| "logps/chosen": -476.98260498046875, | |
| "logps/rejected": -490.8758239746094, | |
| "loss": 0.0357, | |
| "loss/chosen-sft": 1.7674789428710938, | |
| "loss/dpo": 0.035682059824466705, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.4640976190567017, | |
| "rewards/margins": 0.7083105444908142, | |
| "rewards/rejected": -2.172408103942871, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.740432612312812, | |
| "grad_norm": 1.749300241470337, | |
| "learning_rate": 9.62457888212535e-08, | |
| "logits/chosen": -0.9406915903091431, | |
| "logits/rejected": -0.8014345169067383, | |
| "logps/chosen": -438.89849853515625, | |
| "logps/rejected": -451.3016662597656, | |
| "loss": 0.0358, | |
| "loss/chosen-sft": 1.861494779586792, | |
| "loss/dpo": 0.03579792380332947, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.349241018295288, | |
| "rewards/margins": 0.7064675688743591, | |
| "rewards/rejected": -2.055708408355713, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7432057681641708, | |
| "grad_norm": 1.16977059841156, | |
| "learning_rate": 9.434397365831162e-08, | |
| "logits/chosen": -0.8669630289077759, | |
| "logits/rejected": -0.8108338117599487, | |
| "logps/chosen": -437.13848876953125, | |
| "logps/rejected": -468.7396545410156, | |
| "loss": 0.0317, | |
| "loss/chosen-sft": 1.831624984741211, | |
| "loss/dpo": 0.03170974552631378, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.3475593328475952, | |
| "rewards/margins": 0.8466382026672363, | |
| "rewards/rejected": -2.194197654724121, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7459789240155297, | |
| "grad_norm": 2.1936497688293457, | |
| "learning_rate": 9.245675673873577e-08, | |
| "logits/chosen": -0.9459794163703918, | |
| "logits/rejected": -0.9377508163452148, | |
| "logps/chosen": -516.6649780273438, | |
| "logps/rejected": -491.7989807128906, | |
| "loss": 0.0465, | |
| "loss/chosen-sft": 1.8350709676742554, | |
| "loss/dpo": 0.04651065915822983, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.5745521783828735, | |
| "rewards/margins": 0.6551668047904968, | |
| "rewards/rejected": -2.2297191619873047, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.7487520798668885, | |
| "grad_norm": 1.4931093454360962, | |
| "learning_rate": 9.058431505568562e-08, | |
| "logits/chosen": -0.7636462450027466, | |
| "logits/rejected": -0.8169956207275391, | |
| "logps/chosen": -464.9522399902344, | |
| "logps/rejected": -456.91650390625, | |
| "loss": 0.0312, | |
| "loss/chosen-sft": 1.9652206897735596, | |
| "loss/dpo": 0.031192084774374962, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4719616174697876, | |
| "rewards/margins": 0.6275067329406738, | |
| "rewards/rejected": -2.099468469619751, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7515252357182474, | |
| "grad_norm": 1.4099466800689697, | |
| "learning_rate": 8.872682421662068e-08, | |
| "logits/chosen": -0.787927508354187, | |
| "logits/rejected": -0.7053574323654175, | |
| "logps/chosen": -463.33087158203125, | |
| "logps/rejected": -486.23675537109375, | |
| "loss": 0.0261, | |
| "loss/chosen-sft": 1.898769736289978, | |
| "loss/dpo": 0.026135969907045364, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.4200894832611084, | |
| "rewards/margins": 0.8304470181465149, | |
| "rewards/rejected": -2.2505364418029785, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7542983915696062, | |
| "grad_norm": 1.183593511581421, | |
| "learning_rate": 8.688445842683173e-08, | |
| "logits/chosen": -0.9655888676643372, | |
| "logits/rejected": -1.0269839763641357, | |
| "logps/chosen": -465.0248107910156, | |
| "logps/rejected": -462.23638916015625, | |
| "loss": 0.037, | |
| "loss/chosen-sft": 1.8918052911758423, | |
| "loss/dpo": 0.036974623799324036, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.49183988571167, | |
| "rewards/margins": 0.5933989882469177, | |
| "rewards/rejected": -2.0852386951446533, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.757071547420965, | |
| "grad_norm": 1.654320240020752, | |
| "learning_rate": 8.505739047310257e-08, | |
| "logits/chosen": -0.9955303072929382, | |
| "logits/rejected": -0.8278782963752747, | |
| "logps/chosen": -464.4917907714844, | |
| "logps/rejected": -458.9212951660156, | |
| "loss": 0.0295, | |
| "loss/chosen-sft": 1.8392751216888428, | |
| "loss/dpo": 0.02950271964073181, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.4409606456756592, | |
| "rewards/margins": 0.6720392107963562, | |
| "rewards/rejected": -2.11299991607666, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.7598447032723239, | |
| "grad_norm": 2.021111249923706, | |
| "learning_rate": 8.324579170750518e-08, | |
| "logits/chosen": -1.111428141593933, | |
| "logits/rejected": -1.0972769260406494, | |
| "logps/chosen": -464.07928466796875, | |
| "logps/rejected": -461.49462890625, | |
| "loss": 0.0334, | |
| "loss/chosen-sft": 1.842139482498169, | |
| "loss/dpo": 0.03337743133306503, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -1.577466368675232, | |
| "rewards/margins": 0.6026979684829712, | |
| "rewards/rejected": -2.1801645755767822, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7626178591236827, | |
| "grad_norm": 2.152736186981201, | |
| "learning_rate": 8.14498320313296e-08, | |
| "logits/chosen": -0.8720847964286804, | |
| "logits/rejected": -0.8584582209587097, | |
| "logps/chosen": -433.6831970214844, | |
| "logps/rejected": -444.64703369140625, | |
| "loss": 0.0365, | |
| "loss/chosen-sft": 1.9097425937652588, | |
| "loss/dpo": 0.03647807240486145, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.5121186971664429, | |
| "rewards/margins": 0.6217208504676819, | |
| "rewards/rejected": -2.1338393688201904, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7653910149750416, | |
| "grad_norm": 1.0486515760421753, | |
| "learning_rate": 7.966967987914932e-08, | |
| "logits/chosen": -0.8114673495292664, | |
| "logits/rejected": -0.8360360264778137, | |
| "logps/chosen": -451.40081787109375, | |
| "logps/rejected": -462.27850341796875, | |
| "loss": 0.0316, | |
| "loss/chosen-sft": 1.915459394454956, | |
| "loss/dpo": 0.03155887499451637, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.4397701025009155, | |
| "rewards/margins": 0.7389817237854004, | |
| "rewards/rejected": -2.1787517070770264, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7681641708264004, | |
| "grad_norm": 1.6471340656280518, | |
| "learning_rate": 7.7905502203025e-08, | |
| "logits/chosen": -0.8942529559135437, | |
| "logits/rejected": -0.7852484583854675, | |
| "logps/chosen": -430.1280822753906, | |
| "logps/rejected": -426.33160400390625, | |
| "loss": 0.0318, | |
| "loss/chosen-sft": 1.8815257549285889, | |
| "loss/dpo": 0.03184288740158081, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.3281621932983398, | |
| "rewards/margins": 0.6897465586662292, | |
| "rewards/rejected": -2.0179085731506348, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.7709373266777593, | |
| "grad_norm": 1.7929314374923706, | |
| "learning_rate": 7.615746445684665e-08, | |
| "logits/chosen": -1.137683391571045, | |
| "logits/rejected": -1.0650242567062378, | |
| "logps/chosen": -483.86932373046875, | |
| "logps/rejected": -496.49847412109375, | |
| "loss": 0.0351, | |
| "loss/chosen-sft": 1.70965576171875, | |
| "loss/dpo": 0.03509928658604622, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.5360954999923706, | |
| "rewards/margins": 0.8399691581726074, | |
| "rewards/rejected": -2.3760645389556885, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7737104825291181, | |
| "grad_norm": 1.5007047653198242, | |
| "learning_rate": 7.442573058081644e-08, | |
| "logits/chosen": -1.022068738937378, | |
| "logits/rejected": -0.8677648305892944, | |
| "logps/chosen": -446.630859375, | |
| "logps/rejected": -453.5487365722656, | |
| "loss": 0.0413, | |
| "loss/chosen-sft": 1.7940679788589478, | |
| "loss/dpo": 0.04133762791752815, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.49905264377594, | |
| "rewards/margins": 0.5288316011428833, | |
| "rewards/rejected": -2.0278842449188232, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.776483638380477, | |
| "grad_norm": 1.9248064756393433, | |
| "learning_rate": 7.271046298607365e-08, | |
| "logits/chosen": -0.8683999180793762, | |
| "logits/rejected": -0.9180633425712585, | |
| "logps/chosen": -461.62811279296875, | |
| "logps/rejected": -475.0498046875, | |
| "loss": 0.0317, | |
| "loss/chosen-sft": 1.8975083827972412, | |
| "loss/dpo": 0.03167451545596123, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.472059726715088, | |
| "rewards/margins": 0.7187052965164185, | |
| "rewards/rejected": -2.190764904022217, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7792567942318358, | |
| "grad_norm": 1.3342360258102417, | |
| "learning_rate": 7.101182253946281e-08, | |
| "logits/chosen": -0.8948928117752075, | |
| "logits/rejected": -0.6546001434326172, | |
| "logps/chosen": -442.4454650878906, | |
| "logps/rejected": -462.285888671875, | |
| "loss": 0.0349, | |
| "loss/chosen-sft": 1.9095041751861572, | |
| "loss/dpo": 0.034947969019412994, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.5290088653564453, | |
| "rewards/margins": 0.6571269631385803, | |
| "rewards/rejected": -2.186135768890381, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.7820299500831946, | |
| "grad_norm": 1.7256712913513184, | |
| "learning_rate": 6.932996854844658e-08, | |
| "logits/chosen": -1.0015538930892944, | |
| "logits/rejected": -0.9933465719223022, | |
| "logps/chosen": -425.962646484375, | |
| "logps/rejected": -465.14178466796875, | |
| "loss": 0.0328, | |
| "loss/chosen-sft": 1.6965240240097046, | |
| "loss/dpo": 0.03280189260840416, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.3612712621688843, | |
| "rewards/margins": 0.8677095174789429, | |
| "rewards/rejected": -2.228980779647827, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7848031059345535, | |
| "grad_norm": 1.550096035003662, | |
| "learning_rate": 6.766505874616571e-08, | |
| "logits/chosen": -0.9519731402397156, | |
| "logits/rejected": -0.9299766421318054, | |
| "logps/chosen": -441.6609802246094, | |
| "logps/rejected": -446.9225158691406, | |
| "loss": 0.0292, | |
| "loss/chosen-sft": 1.765045166015625, | |
| "loss/dpo": 0.029176611453294754, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4031174182891846, | |
| "rewards/margins": 0.6740747094154358, | |
| "rewards/rejected": -2.0771920680999756, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.7875762617859123, | |
| "grad_norm": 1.3679091930389404, | |
| "learning_rate": 6.601724927664492e-08, | |
| "logits/chosen": -0.7503564953804016, | |
| "logits/rejected": -0.7818638682365417, | |
| "logps/chosen": -428.4849548339844, | |
| "logps/rejected": -410.2527770996094, | |
| "loss": 0.0353, | |
| "loss/chosen-sft": 1.8490569591522217, | |
| "loss/dpo": 0.03528434783220291, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.3704160451889038, | |
| "rewards/margins": 0.5347402691841125, | |
| "rewards/rejected": -1.9051563739776611, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7903494176372712, | |
| "grad_norm": 1.4390736818313599, | |
| "learning_rate": 6.438669468015018e-08, | |
| "logits/chosen": -1.0825473070144653, | |
| "logits/rejected": -1.1610548496246338, | |
| "logps/chosen": -407.60589599609375, | |
| "logps/rejected": -412.09747314453125, | |
| "loss": 0.04, | |
| "loss/chosen-sft": 1.8394687175750732, | |
| "loss/dpo": 0.04001585766673088, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -1.425935983657837, | |
| "rewards/margins": 0.44705772399902344, | |
| "rewards/rejected": -1.8729937076568604, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.79312257348863, | |
| "grad_norm": 1.4306334257125854, | |
| "learning_rate": 6.277354787869385e-08, | |
| "logits/chosen": -1.0177534818649292, | |
| "logits/rejected": -0.9789898991584778, | |
| "logps/chosen": -461.71197509765625, | |
| "logps/rejected": -433.2428283691406, | |
| "loss": 0.0298, | |
| "loss/chosen-sft": 1.7876182794570923, | |
| "loss/dpo": 0.029793113470077515, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.4716134071350098, | |
| "rewards/margins": 0.5654765367507935, | |
| "rewards/rejected": -2.0370900630950928, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7958957293399889, | |
| "grad_norm": 1.1099703311920166, | |
| "learning_rate": 6.117796016169374e-08, | |
| "logits/chosen": -1.059214472770691, | |
| "logits/rejected": -1.051519751548767, | |
| "logps/chosen": -492.03424072265625, | |
| "logps/rejected": -475.771484375, | |
| "loss": 0.0285, | |
| "loss/chosen-sft": 1.937116265296936, | |
| "loss/dpo": 0.02849132940173149, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.6160300970077515, | |
| "rewards/margins": 0.6453025937080383, | |
| "rewards/rejected": -2.2613329887390137, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.7986688851913477, | |
| "grad_norm": 1.9658957719802856, | |
| "learning_rate": 5.9600081171784e-08, | |
| "logits/chosen": -0.9570498466491699, | |
| "logits/rejected": -0.9356196522712708, | |
| "logps/chosen": -460.81982421875, | |
| "logps/rejected": -476.683837890625, | |
| "loss": 0.0404, | |
| "loss/chosen-sft": 1.7493022680282593, | |
| "loss/dpo": 0.04036666080355644, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.3861143589019775, | |
| "rewards/margins": 0.7235819697380066, | |
| "rewards/rejected": -2.109696388244629, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.8014420410427066, | |
| "grad_norm": 1.9839755296707153, | |
| "learning_rate": 5.8040058890781035e-08, | |
| "logits/chosen": -0.8933590054512024, | |
| "logits/rejected": -0.9177519083023071, | |
| "logps/chosen": -447.789794921875, | |
| "logps/rejected": -467.09344482421875, | |
| "loss": 0.0316, | |
| "loss/chosen-sft": 1.9308321475982666, | |
| "loss/dpo": 0.03159358352422714, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -1.4023634195327759, | |
| "rewards/margins": 0.8167144656181335, | |
| "rewards/rejected": -2.2190778255462646, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.8042151968940654, | |
| "grad_norm": 1.4874529838562012, | |
| "learning_rate": 5.6498039625804574e-08, | |
| "logits/chosen": -0.9050949215888977, | |
| "logits/rejected": -0.8587920069694519, | |
| "logps/chosen": -458.5428161621094, | |
| "logps/rejected": -460.24542236328125, | |
| "loss": 0.0277, | |
| "loss/chosen-sft": 1.8650929927825928, | |
| "loss/dpo": 0.027697661891579628, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.4219458103179932, | |
| "rewards/margins": 0.7902048826217651, | |
| "rewards/rejected": -2.2121505737304688, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8069883527454242, | |
| "grad_norm": 1.2237356901168823, | |
| "learning_rate": 5.4974167995556955e-08, | |
| "logits/chosen": -1.0649144649505615, | |
| "logits/rejected": -0.894222617149353, | |
| "logps/chosen": -449.2327575683594, | |
| "logps/rejected": -476.51300048828125, | |
| "loss": 0.0296, | |
| "loss/chosen-sft": 1.8358997106552124, | |
| "loss/dpo": 0.029581155627965927, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4774913787841797, | |
| "rewards/margins": 0.7366743087768555, | |
| "rewards/rejected": -2.2141659259796143, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.8097615085967831, | |
| "grad_norm": 1.6183357238769531, | |
| "learning_rate": 5.346858691675915e-08, | |
| "logits/chosen": -0.8394588232040405, | |
| "logits/rejected": -0.8869924545288086, | |
| "logps/chosen": -450.36273193359375, | |
| "logps/rejected": -425.9990234375, | |
| "loss": 0.0354, | |
| "loss/chosen-sft": 1.9527273178100586, | |
| "loss/dpo": 0.035379212349653244, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.555987000465393, | |
| "rewards/margins": 0.40505796670913696, | |
| "rewards/rejected": -1.9610449075698853, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.812534664448142, | |
| "grad_norm": 1.2106329202651978, | |
| "learning_rate": 5.198143759074813e-08, | |
| "logits/chosen": -0.8181321024894714, | |
| "logits/rejected": -0.8268686532974243, | |
| "logps/chosen": -422.52069091796875, | |
| "logps/rejected": -439.79437255859375, | |
| "loss": 0.0218, | |
| "loss/chosen-sft": 1.9324089288711548, | |
| "loss/dpo": 0.02178550697863102, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4317119121551514, | |
| "rewards/margins": 0.7136107087135315, | |
| "rewards/rejected": -2.145322322845459, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.8153078202995009, | |
| "grad_norm": 2.4801158905029297, | |
| "learning_rate": 5.051285949023354e-08, | |
| "logits/chosen": -0.9226022958755493, | |
| "logits/rejected": -0.8704797625541687, | |
| "logps/chosen": -494.6982421875, | |
| "logps/rejected": -470.52734375, | |
| "loss": 0.0366, | |
| "loss/chosen-sft": 1.9025272130966187, | |
| "loss/dpo": 0.036645907908678055, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.6271445751190186, | |
| "rewards/margins": 0.5947997570037842, | |
| "rewards/rejected": -2.2219443321228027, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.8180809761508597, | |
| "grad_norm": 1.554540753364563, | |
| "learning_rate": 4.906299034621761e-08, | |
| "logits/chosen": -0.918652355670929, | |
| "logits/rejected": -0.7777332067489624, | |
| "logps/chosen": -438.8077697753906, | |
| "logps/rejected": -469.95928955078125, | |
| "loss": 0.0358, | |
| "loss/chosen-sft": 1.8588802814483643, | |
| "loss/dpo": 0.035846300423145294, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.3037010431289673, | |
| "rewards/margins": 0.817149817943573, | |
| "rewards/rejected": -2.1208510398864746, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.8208541320022186, | |
| "grad_norm": 1.5162110328674316, | |
| "learning_rate": 4.7631966135077974e-08, | |
| "logits/chosen": -0.9379565119743347, | |
| "logits/rejected": -0.8222628831863403, | |
| "logps/chosen": -460.7021484375, | |
| "logps/rejected": -467.64739990234375, | |
| "loss": 0.027, | |
| "loss/chosen-sft": 1.943677306175232, | |
| "loss/dpo": 0.026978474110364914, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.556178331375122, | |
| "rewards/margins": 0.6195975542068481, | |
| "rewards/rejected": -2.1757760047912598, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8236272878535774, | |
| "grad_norm": 1.4927583932876587, | |
| "learning_rate": 4.621992106581504e-08, | |
| "logits/chosen": -1.0209442377090454, | |
| "logits/rejected": -0.7870509028434753, | |
| "logps/chosen": -411.0484924316406, | |
| "logps/rejected": -458.4707946777344, | |
| "loss": 0.0281, | |
| "loss/chosen-sft": 1.8953702449798584, | |
| "loss/dpo": 0.02814427576959133, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4694321155548096, | |
| "rewards/margins": 0.6849014163017273, | |
| "rewards/rejected": -2.1543333530426025, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.8264004437049363, | |
| "grad_norm": 1.1416542530059814, | |
| "learning_rate": 4.482698756746506e-08, | |
| "logits/chosen": -0.8766797184944153, | |
| "logits/rejected": -0.6899703145027161, | |
| "logps/chosen": -426.66851806640625, | |
| "logps/rejected": -439.741455078125, | |
| "loss": 0.0395, | |
| "loss/chosen-sft": 1.8772623538970947, | |
| "loss/dpo": 0.039542876183986664, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -1.4856750965118408, | |
| "rewards/margins": 0.5127314329147339, | |
| "rewards/rejected": -1.9984066486358643, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8291735995562951, | |
| "grad_norm": 1.0494132041931152, | |
| "learning_rate": 4.3453296276680375e-08, | |
| "logits/chosen": -0.7310534715652466, | |
| "logits/rejected": -0.7772519588470459, | |
| "logps/chosen": -459.5819396972656, | |
| "logps/rejected": -443.21087646484375, | |
| "loss": 0.0361, | |
| "loss/chosen-sft": 1.916404366493225, | |
| "loss/dpo": 0.03613205999135971, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.4593141078948975, | |
| "rewards/margins": 0.5708898305892944, | |
| "rewards/rejected": -2.0302040576934814, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.831946755407654, | |
| "grad_norm": 1.4319831132888794, | |
| "learning_rate": 4.209897602547768e-08, | |
| "logits/chosen": -0.8863110542297363, | |
| "logits/rejected": -0.8555914759635925, | |
| "logps/chosen": -444.46142578125, | |
| "logps/rejected": -446.90155029296875, | |
| "loss": 0.0231, | |
| "loss/chosen-sft": 1.974479079246521, | |
| "loss/dpo": 0.023065898567438126, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.5314444303512573, | |
| "rewards/margins": 0.6271489858627319, | |
| "rewards/rejected": -2.1585934162139893, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8347199112590128, | |
| "grad_norm": 1.5065139532089233, | |
| "learning_rate": 4.076415382915527e-08, | |
| "logits/chosen": -0.9296368360519409, | |
| "logits/rejected": -0.8830710649490356, | |
| "logps/chosen": -458.8507385253906, | |
| "logps/rejected": -464.94677734375, | |
| "loss": 0.0392, | |
| "loss/chosen-sft": 1.758873701095581, | |
| "loss/dpo": 0.039175163954496384, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.4754732847213745, | |
| "rewards/margins": 0.8373439908027649, | |
| "rewards/rejected": -2.312817335128784, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.8374930671103716, | |
| "grad_norm": 2.0096213817596436, | |
| "learning_rate": 3.944895487438102e-08, | |
| "logits/chosen": -0.8368681073188782, | |
| "logits/rejected": -0.8462071418762207, | |
| "logps/chosen": -440.0353088378906, | |
| "logps/rejected": -455.104736328125, | |
| "loss": 0.0277, | |
| "loss/chosen-sft": 1.8118178844451904, | |
| "loss/dpo": 0.02766679786145687, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.4319641590118408, | |
| "rewards/margins": 0.7568520307540894, | |
| "rewards/rejected": -2.1888160705566406, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8402662229617305, | |
| "grad_norm": 1.322721242904663, | |
| "learning_rate": 3.8153502507451727e-08, | |
| "logits/chosen": -0.8090510368347168, | |
| "logits/rejected": -0.7667346596717834, | |
| "logps/chosen": -448.12725830078125, | |
| "logps/rejected": -465.86297607421875, | |
| "loss": 0.0352, | |
| "loss/chosen-sft": 1.8181946277618408, | |
| "loss/dpo": 0.03516048565506935, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.5605225563049316, | |
| "rewards/margins": 0.6109769940376282, | |
| "rewards/rejected": -2.171499729156494, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.8430393788130893, | |
| "grad_norm": 0.6515668034553528, | |
| "learning_rate": 3.687791822272493e-08, | |
| "logits/chosen": -0.8708721995353699, | |
| "logits/rejected": -0.8009916543960571, | |
| "logps/chosen": -413.84649658203125, | |
| "logps/rejected": -422.46844482421875, | |
| "loss": 0.0289, | |
| "loss/chosen-sft": 1.8942821025848389, | |
| "loss/dpo": 0.028922390192747116, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.4152357578277588, | |
| "rewards/margins": 0.6280101537704468, | |
| "rewards/rejected": -2.043246030807495, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.8458125346644482, | |
| "grad_norm": 1.2580747604370117, | |
| "learning_rate": 3.562232165122461e-08, | |
| "logits/chosen": -0.9807391166687012, | |
| "logits/rejected": -0.8759809732437134, | |
| "logps/chosen": -468.5948791503906, | |
| "logps/rejected": -469.4264221191406, | |
| "loss": 0.0312, | |
| "loss/chosen-sft": 1.8805710077285767, | |
| "loss/dpo": 0.03115806356072426, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.6106830835342407, | |
| "rewards/margins": 0.6084949374198914, | |
| "rewards/rejected": -2.2191781997680664, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.848585690515807, | |
| "grad_norm": 1.6682928800582886, | |
| "learning_rate": 3.438683054942154e-08, | |
| "logits/chosen": -0.9435014724731445, | |
| "logits/rejected": -0.9378792643547058, | |
| "logps/chosen": -497.65216064453125, | |
| "logps/rejected": -514.5195922851562, | |
| "loss": 0.0276, | |
| "loss/chosen-sft": 1.8878179788589478, | |
| "loss/dpo": 0.02764343097805977, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.5859915018081665, | |
| "rewards/margins": 0.8114764094352722, | |
| "rewards/rejected": -2.397467851638794, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8513588463671659, | |
| "grad_norm": 1.870586633682251, | |
| "learning_rate": 3.3171560788189555e-08, | |
| "logits/chosen": -0.7878917455673218, | |
| "logits/rejected": -0.8808261752128601, | |
| "logps/chosen": -448.91168212890625, | |
| "logps/rejected": -445.90191650390625, | |
| "loss": 0.03, | |
| "loss/chosen-sft": 1.9762178659439087, | |
| "loss/dpo": 0.029997188597917557, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.4799820184707642, | |
| "rewards/margins": 0.5908954739570618, | |
| "rewards/rejected": -2.0708775520324707, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.8541320022185247, | |
| "grad_norm": 1.2771514654159546, | |
| "learning_rate": 3.19766263419384e-08, | |
| "logits/chosen": -0.9804418683052063, | |
| "logits/rejected": -0.9339153170585632, | |
| "logps/chosen": -451.52972412109375, | |
| "logps/rejected": -487.1279296875, | |
| "loss": 0.0356, | |
| "loss/chosen-sft": 1.733094573020935, | |
| "loss/dpo": 0.03555992618203163, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.3957600593566895, | |
| "rewards/margins": 0.8448952436447144, | |
| "rewards/rejected": -2.2406554222106934, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.8569051580698835, | |
| "grad_norm": 1.825315237045288, | |
| "learning_rate": 3.080213927792471e-08, | |
| "logits/chosen": -0.9238882064819336, | |
| "logits/rejected": -0.9216313362121582, | |
| "logps/chosen": -449.4173278808594, | |
| "logps/rejected": -422.06097412109375, | |
| "loss": 0.0379, | |
| "loss/chosen-sft": 1.8662887811660767, | |
| "loss/dpo": 0.037861116230487823, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.5062415599822998, | |
| "rewards/margins": 0.5276464223861694, | |
| "rewards/rejected": -2.0338878631591797, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.8596783139212424, | |
| "grad_norm": 1.1899453401565552, | |
| "learning_rate": 2.9648209745741838e-08, | |
| "logits/chosen": -0.9589411020278931, | |
| "logits/rejected": -0.8292596936225891, | |
| "logps/chosen": -456.76092529296875, | |
| "logps/rejected": -475.18206787109375, | |
| "loss": 0.0399, | |
| "loss/chosen-sft": 1.9160964488983154, | |
| "loss/dpo": 0.03985082358121872, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.461181879043579, | |
| "rewards/margins": 0.6755915880203247, | |
| "rewards/rejected": -2.1367735862731934, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8624514697726012, | |
| "grad_norm": 0.9635495543479919, | |
| "learning_rate": 2.8514945966989085e-08, | |
| "logits/chosen": -1.0600335597991943, | |
| "logits/rejected": -0.9049699902534485, | |
| "logps/chosen": -442.63555908203125, | |
| "logps/rejected": -459.53570556640625, | |
| "loss": 0.0311, | |
| "loss/chosen-sft": 1.8001188039779663, | |
| "loss/dpo": 0.03108590468764305, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.4419844150543213, | |
| "rewards/margins": 0.7059625387191772, | |
| "rewards/rejected": -2.147946834564209, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.8652246256239601, | |
| "grad_norm": 1.15142822265625, | |
| "learning_rate": 2.7402454225122744e-08, | |
| "logits/chosen": -1.0275518894195557, | |
| "logits/rejected": -0.8722109794616699, | |
| "logps/chosen": -416.63787841796875, | |
| "logps/rejected": -473.12542724609375, | |
| "loss": 0.0299, | |
| "loss/chosen-sft": 1.8142344951629639, | |
| "loss/dpo": 0.029919322580099106, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4069955348968506, | |
| "rewards/margins": 0.8083630800247192, | |
| "rewards/rejected": -2.2153587341308594, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8679977814753189, | |
| "grad_norm": 1.1056207418441772, | |
| "learning_rate": 2.631083885548749e-08, | |
| "logits/chosen": -0.9130992889404297, | |
| "logits/rejected": -0.7697763442993164, | |
| "logps/chosen": -439.4908752441406, | |
| "logps/rejected": -480.8926696777344, | |
| "loss": 0.0255, | |
| "loss/chosen-sft": 1.9055465459823608, | |
| "loss/dpo": 0.025480791926383972, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.5075581073760986, | |
| "rewards/margins": 0.8769866824150085, | |
| "rewards/rejected": -2.384544849395752, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.8707709373266778, | |
| "grad_norm": 1.4552932977676392, | |
| "learning_rate": 2.524020223553208e-08, | |
| "logits/chosen": -0.8160026669502258, | |
| "logits/rejected": -0.8994795083999634, | |
| "logps/chosen": -484.19598388671875, | |
| "logps/rejected": -469.77947998046875, | |
| "loss": 0.0308, | |
| "loss/chosen-sft": 1.9661228656768799, | |
| "loss/dpo": 0.03077336587011814, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.574336051940918, | |
| "rewards/margins": 0.6738288998603821, | |
| "rewards/rejected": -2.2481648921966553, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8735440931780366, | |
| "grad_norm": 1.7502833604812622, | |
| "learning_rate": 2.4190644775207075e-08, | |
| "logits/chosen": -0.9263548851013184, | |
| "logits/rejected": -0.8413556814193726, | |
| "logps/chosen": -437.6075744628906, | |
| "logps/rejected": -467.7953186035156, | |
| "loss": 0.0372, | |
| "loss/chosen-sft": 1.796007513999939, | |
| "loss/dpo": 0.03721100836992264, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.3773536682128906, | |
| "rewards/margins": 0.7799967527389526, | |
| "rewards/rejected": -2.157350540161133, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8763172490293955, | |
| "grad_norm": 0.8901669383049011, | |
| "learning_rate": 2.316226490754844e-08, | |
| "logits/chosen": -0.9022026062011719, | |
| "logits/rejected": -0.9546858072280884, | |
| "logps/chosen": -455.98046875, | |
| "logps/rejected": -470.28582763671875, | |
| "loss": 0.0353, | |
| "loss/chosen-sft": 1.8491671085357666, | |
| "loss/dpo": 0.035286836326122284, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.4982842206954956, | |
| "rewards/margins": 0.7641604542732239, | |
| "rewards/rejected": -2.2624447345733643, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8790904048807543, | |
| "grad_norm": 0.9218027591705322, | |
| "learning_rate": 2.215515907944576e-08, | |
| "logits/chosen": -0.9104015231132507, | |
| "logits/rejected": -0.8896605372428894, | |
| "logps/chosen": -470.4886169433594, | |
| "logps/rejected": -467.1653747558594, | |
| "loss": 0.0378, | |
| "loss/chosen-sft": 1.9249181747436523, | |
| "loss/dpo": 0.037777096033096313, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.5565952062606812, | |
| "rewards/margins": 0.5057957768440247, | |
| "rewards/rejected": -2.0623910427093506, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.8818635607321131, | |
| "grad_norm": 1.4500895738601685, | |
| "learning_rate": 2.1169421742596923e-08, | |
| "logits/chosen": -0.8355404734611511, | |
| "logits/rejected": -0.8188692331314087, | |
| "logps/chosen": -468.78759765625, | |
| "logps/rejected": -436.91619873046875, | |
| "loss": 0.0279, | |
| "loss/chosen-sft": 1.9327714443206787, | |
| "loss/dpo": 0.02788035199046135, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.53583824634552, | |
| "rewards/margins": 0.596767783164978, | |
| "rewards/rejected": -2.132605791091919, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.884636716583472, | |
| "grad_norm": 1.5247975587844849, | |
| "learning_rate": 2.02051453446499e-08, | |
| "logits/chosen": -0.9076545834541321, | |
| "logits/rejected": -0.8217021822929382, | |
| "logps/chosen": -462.9772033691406, | |
| "logps/rejected": -458.1619567871094, | |
| "loss": 0.0357, | |
| "loss/chosen-sft": 1.872582197189331, | |
| "loss/dpo": 0.035654447972774506, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4506585597991943, | |
| "rewards/margins": 0.6560076475143433, | |
| "rewards/rejected": -2.106666088104248, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.8874098724348308, | |
| "grad_norm": 2.1406707763671875, | |
| "learning_rate": 1.9262420320532768e-08, | |
| "logits/chosen": -0.9324251413345337, | |
| "logits/rejected": -0.7913103103637695, | |
| "logps/chosen": -460.52789306640625, | |
| "logps/rejected": -489.23614501953125, | |
| "loss": 0.0312, | |
| "loss/chosen-sft": 1.8192570209503174, | |
| "loss/dpo": 0.03123651072382927, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.4744288921356201, | |
| "rewards/margins": 0.7909864187240601, | |
| "rewards/rejected": -2.2654154300689697, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8901830282861897, | |
| "grad_norm": 2.196462631225586, | |
| "learning_rate": 1.8341335083971815e-08, | |
| "logits/chosen": -0.7642068862915039, | |
| "logits/rejected": -0.7473465800285339, | |
| "logps/chosen": -469.9163513183594, | |
| "logps/rejected": -457.0773010253906, | |
| "loss": 0.0376, | |
| "loss/chosen-sft": 1.9092369079589844, | |
| "loss/dpo": 0.037584852427244186, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.5675512552261353, | |
| "rewards/margins": 0.54124915599823, | |
| "rewards/rejected": -2.1088004112243652, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.8929561841375485, | |
| "grad_norm": 1.6157978773117065, | |
| "learning_rate": 1.7441976019200166e-08, | |
| "logits/chosen": -1.053679347038269, | |
| "logits/rejected": -0.9105228185653687, | |
| "logps/chosen": -452.0389709472656, | |
| "logps/rejected": -464.1913146972656, | |
| "loss": 0.0342, | |
| "loss/chosen-sft": 1.905279517173767, | |
| "loss/dpo": 0.034248754382133484, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.5297168493270874, | |
| "rewards/margins": 0.5347718596458435, | |
| "rewards/rejected": -2.064488649368286, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8957293399889074, | |
| "grad_norm": 1.4368631839752197, | |
| "learning_rate": 1.6564427472855662e-08, | |
| "logits/chosen": -0.928158164024353, | |
| "logits/rejected": -0.781406819820404, | |
| "logps/chosen": -456.40380859375, | |
| "logps/rejected": -482.193603515625, | |
| "loss": 0.0359, | |
| "loss/chosen-sft": 1.7659343481063843, | |
| "loss/dpo": 0.03594128414988518, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.445878267288208, | |
| "rewards/margins": 0.774651825428009, | |
| "rewards/rejected": -2.2205300331115723, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.8985024958402662, | |
| "grad_norm": 1.0532777309417725, | |
| "learning_rate": 1.570877174607088e-08, | |
| "logits/chosen": -0.9026380777359009, | |
| "logits/rejected": -0.7336758375167847, | |
| "logps/chosen": -474.20599365234375, | |
| "logps/rejected": -458.268798828125, | |
| "loss": 0.039, | |
| "loss/chosen-sft": 1.8853695392608643, | |
| "loss/dpo": 0.03895549476146698, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5534743070602417, | |
| "rewards/margins": 0.538535475730896, | |
| "rewards/rejected": -2.0920097827911377, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.901275651691625, | |
| "grad_norm": 1.640979290008545, | |
| "learning_rate": 1.4875089086754111e-08, | |
| "logits/chosen": -0.9097537994384766, | |
| "logits/rejected": -0.68541419506073, | |
| "logps/chosen": -440.2255859375, | |
| "logps/rejected": -470.017333984375, | |
| "loss": 0.0278, | |
| "loss/chosen-sft": 1.8272202014923096, | |
| "loss/dpo": 0.027846310287714005, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.4420444965362549, | |
| "rewards/margins": 0.7163407206535339, | |
| "rewards/rejected": -2.1583850383758545, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.9040488075429839, | |
| "grad_norm": 1.3717536926269531, | |
| "learning_rate": 1.4063457682063573e-08, | |
| "logits/chosen": -1.025506854057312, | |
| "logits/rejected": -0.8326213955879211, | |
| "logps/chosen": -396.4437561035156, | |
| "logps/rejected": -455.72265625, | |
| "loss": 0.0323, | |
| "loss/chosen-sft": 1.8843291997909546, | |
| "loss/dpo": 0.032339178025722504, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4136439561843872, | |
| "rewards/margins": 0.7241330146789551, | |
| "rewards/rejected": -2.137777090072632, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.9068219633943427, | |
| "grad_norm": 1.1654001474380493, | |
| "learning_rate": 1.3273953651074393e-08, | |
| "logits/chosen": -1.0155668258666992, | |
| "logits/rejected": -0.751710057258606, | |
| "logps/chosen": -461.4085998535156, | |
| "logps/rejected": -451.8213806152344, | |
| "loss": 0.0251, | |
| "loss/chosen-sft": 1.8744451999664307, | |
| "loss/dpo": 0.02511768974363804, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.5510212182998657, | |
| "rewards/margins": 0.5998275876045227, | |
| "rewards/rejected": -2.150848865509033, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.9095951192457016, | |
| "grad_norm": 1.6467472314834595, | |
| "learning_rate": 1.250665103763987e-08, | |
| "logits/chosen": -0.8909111022949219, | |
| "logits/rejected": -0.8106695413589478, | |
| "logps/chosen": -450.2090759277344, | |
| "logps/rejected": -483.452880859375, | |
| "loss": 0.0419, | |
| "loss/chosen-sft": 1.795325517654419, | |
| "loss/dpo": 0.04194999486207962, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.475182056427002, | |
| "rewards/margins": 0.7644132971763611, | |
| "rewards/rejected": -2.239595413208008, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.9123682750970604, | |
| "grad_norm": 1.179222583770752, | |
| "learning_rate": 1.1761621803447336e-08, | |
| "logits/chosen": -0.8358624577522278, | |
| "logits/rejected": -0.7992157936096191, | |
| "logps/chosen": -449.79736328125, | |
| "logps/rejected": -441.9453125, | |
| "loss": 0.0362, | |
| "loss/chosen-sft": 1.9050792455673218, | |
| "loss/dpo": 0.03616093099117279, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -1.381279468536377, | |
| "rewards/margins": 0.6649306416511536, | |
| "rewards/rejected": -2.0462100505828857, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.9151414309484193, | |
| "grad_norm": 1.244621753692627, | |
| "learning_rate": 1.1038935821268941e-08, | |
| "logits/chosen": -0.833348274230957, | |
| "logits/rejected": -0.6609139442443848, | |
| "logps/chosen": -409.50445556640625, | |
| "logps/rejected": -461.4767150878906, | |
| "loss": 0.0329, | |
| "loss/chosen-sft": 1.8607597351074219, | |
| "loss/dpo": 0.032938309013843536, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.4654487371444702, | |
| "rewards/margins": 0.6329339146614075, | |
| "rewards/rejected": -2.0983824729919434, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.9179145867997781, | |
| "grad_norm": 1.552565336227417, | |
| "learning_rate": 1.0338660868408927e-08, | |
| "logits/chosen": -0.7576335668563843, | |
| "logits/rejected": -0.7207523584365845, | |
| "logps/chosen": -497.5868225097656, | |
| "logps/rejected": -482.0480041503906, | |
| "loss": 0.0367, | |
| "loss/chosen-sft": 1.8623535633087158, | |
| "loss/dpo": 0.036711666733026505, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.4475281238555908, | |
| "rewards/margins": 0.6721404790878296, | |
| "rewards/rejected": -2.119668483734131, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.920687742651137, | |
| "grad_norm": 2.2845067977905273, | |
| "learning_rate": 9.660862620346877e-09, | |
| "logits/chosen": -0.7926570177078247, | |
| "logits/rejected": -0.7714422345161438, | |
| "logps/chosen": -473.17706298828125, | |
| "logps/rejected": -469.4546813964844, | |
| "loss": 0.0379, | |
| "loss/chosen-sft": 1.8994096517562866, | |
| "loss/dpo": 0.0379011332988739, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.5939931869506836, | |
| "rewards/margins": 0.5131052136421204, | |
| "rewards/rejected": -2.107098340988159, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9234608985024958, | |
| "grad_norm": 1.7006827592849731, | |
| "learning_rate": 9.005604644578473e-09, | |
| "logits/chosen": -0.8427762985229492, | |
| "logits/rejected": -0.8619502186775208, | |
| "logps/chosen": -480.4871520996094, | |
| "logps/rejected": -467.28363037109375, | |
| "loss": 0.0539, | |
| "loss/chosen-sft": 1.8251368999481201, | |
| "loss/dpo": 0.05387115478515625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.6016905307769775, | |
| "rewards/margins": 0.5135762691497803, | |
| "rewards/rejected": -2.115266799926758, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.9262340543538546, | |
| "grad_norm": 1.814226508140564, | |
| "learning_rate": 8.372948394653717e-09, | |
| "logits/chosen": -0.8938384056091309, | |
| "logits/rejected": -0.8742619752883911, | |
| "logps/chosen": -495.4317321777344, | |
| "logps/rejected": -477.78497314453125, | |
| "loss": 0.0345, | |
| "loss/chosen-sft": 1.8645175695419312, | |
| "loss/dpo": 0.03449907898902893, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.5643129348754883, | |
| "rewards/margins": 0.605742871761322, | |
| "rewards/rejected": -2.170055866241455, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9290072102052135, | |
| "grad_norm": 2.352726697921753, | |
| "learning_rate": 7.762953204413475e-09, | |
| "logits/chosen": -1.0139451026916504, | |
| "logits/rejected": -1.1322184801101685, | |
| "logps/chosen": -484.33172607421875, | |
| "logps/rejected": -444.97344970703125, | |
| "loss": 0.044, | |
| "loss/chosen-sft": 1.884155511856079, | |
| "loss/dpo": 0.04397551342844963, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.5331767797470093, | |
| "rewards/margins": 0.5316352844238281, | |
| "rewards/rejected": -2.064812183380127, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.9317803660565723, | |
| "grad_norm": 1.378034234046936, | |
| "learning_rate": 7.175676282424964e-09, | |
| "logits/chosen": -0.9042679667472839, | |
| "logits/rejected": -0.8096022605895996, | |
| "logps/chosen": -460.42950439453125, | |
| "logps/rejected": -475.254638671875, | |
| "loss": 0.0342, | |
| "loss/chosen-sft": 1.8950878381729126, | |
| "loss/dpo": 0.03422309830784798, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.513983130455017, | |
| "rewards/margins": 0.7315508127212524, | |
| "rewards/rejected": -2.2455339431762695, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.9345535219079312, | |
| "grad_norm": 1.6129722595214844, | |
| "learning_rate": 6.611172706616291e-09, | |
| "logits/chosen": -0.9313820004463196, | |
| "logits/rejected": -0.8965193033218384, | |
| "logps/chosen": -446.47283935546875, | |
| "logps/rejected": -424.822509765625, | |
| "loss": 0.0305, | |
| "loss/chosen-sft": 1.8703186511993408, | |
| "loss/dpo": 0.030467282980680466, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.5669549703598022, | |
| "rewards/margins": 0.5169414281845093, | |
| "rewards/rejected": -2.0838963985443115, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.93732667775929, | |
| "grad_norm": 1.3904266357421875, | |
| "learning_rate": 6.069495419111003e-09, | |
| "logits/chosen": -0.8602801561355591, | |
| "logits/rejected": -0.8798410296440125, | |
| "logps/chosen": -514.514892578125, | |
| "logps/rejected": -513.6451416015625, | |
| "loss": 0.0346, | |
| "loss/chosen-sft": 1.8502308130264282, | |
| "loss/dpo": 0.03457609936594963, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.4935173988342285, | |
| "rewards/margins": 0.9913633465766907, | |
| "rewards/rejected": -2.4848809242248535, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.940099833610649, | |
| "grad_norm": 1.7256861925125122, | |
| "learning_rate": 5.550695221263002e-09, | |
| "logits/chosen": -1.1261186599731445, | |
| "logits/rejected": -1.107710599899292, | |
| "logps/chosen": -490.0545959472656, | |
| "logps/rejected": -447.8335876464844, | |
| "loss": 0.0385, | |
| "loss/chosen-sft": 1.7753324508666992, | |
| "loss/dpo": 0.038456808775663376, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.554856538772583, | |
| "rewards/margins": 0.5833019018173218, | |
| "rewards/rejected": -2.1381585597991943, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.9428729894620078, | |
| "grad_norm": 1.7229971885681152, | |
| "learning_rate": 5.054820768891854e-09, | |
| "logits/chosen": -0.7668136954307556, | |
| "logits/rejected": -0.7551401257514954, | |
| "logps/chosen": -470.37451171875, | |
| "logps/rejected": -467.57177734375, | |
| "loss": 0.0284, | |
| "loss/chosen-sft": 1.8557796478271484, | |
| "loss/dpo": 0.028406251221895218, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.5322498083114624, | |
| "rewards/margins": 0.6786109805107117, | |
| "rewards/rejected": -2.2108609676361084, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9456461453133667, | |
| "grad_norm": 2.3088462352752686, | |
| "learning_rate": 4.581918567719917e-09, | |
| "logits/chosen": -0.9519344568252563, | |
| "logits/rejected": -0.9092620015144348, | |
| "logps/chosen": -490.27813720703125, | |
| "logps/rejected": -480.19171142578125, | |
| "loss": 0.0364, | |
| "loss/chosen-sft": 1.8951151371002197, | |
| "loss/dpo": 0.03644304722547531, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.4337555170059204, | |
| "rewards/margins": 0.7906380891799927, | |
| "rewards/rejected": -2.224393367767334, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.9484193011647255, | |
| "grad_norm": 1.4663399457931519, | |
| "learning_rate": 4.132032969010546e-09, | |
| "logits/chosen": -0.9776910543441772, | |
| "logits/rejected": -0.8744028210639954, | |
| "logps/chosen": -475.00274658203125, | |
| "logps/rejected": -476.331298828125, | |
| "loss": 0.0408, | |
| "loss/chosen-sft": 1.855271339416504, | |
| "loss/dpo": 0.040752846747636795, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.570431113243103, | |
| "rewards/margins": 0.5336312055587769, | |
| "rewards/rejected": -2.10406231880188, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9511924570160843, | |
| "grad_norm": 1.3314710855484009, | |
| "learning_rate": 3.705206165408703e-09, | |
| "logits/chosen": -0.9017612338066101, | |
| "logits/rejected": -0.9057533144950867, | |
| "logps/chosen": -448.4554138183594, | |
| "logps/rejected": -430.2477111816406, | |
| "loss": 0.0334, | |
| "loss/chosen-sft": 1.8261409997940063, | |
| "loss/dpo": 0.03343508765101433, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.4459483623504639, | |
| "rewards/margins": 0.5900785326957703, | |
| "rewards/rejected": -2.0360267162323, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.9539656128674432, | |
| "grad_norm": 1.0403763055801392, | |
| "learning_rate": 3.301478186983897e-09, | |
| "logits/chosen": -0.9940805435180664, | |
| "logits/rejected": -1.1318776607513428, | |
| "logps/chosen": -436.7303161621094, | |
| "logps/rejected": -440.62969970703125, | |
| "loss": 0.0342, | |
| "loss/chosen-sft": 1.7668132781982422, | |
| "loss/dpo": 0.03422313928604126, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.4205049276351929, | |
| "rewards/margins": 0.8040353655815125, | |
| "rewards/rejected": -2.2245402336120605, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.956738768718802, | |
| "grad_norm": 1.7417303323745728, | |
| "learning_rate": 2.9208868974759937e-09, | |
| "logits/chosen": -0.8438510894775391, | |
| "logits/rejected": -0.8072006106376648, | |
| "logps/chosen": -452.6871643066406, | |
| "logps/rejected": -420.52227783203125, | |
| "loss": 0.0271, | |
| "loss/chosen-sft": 1.9397671222686768, | |
| "loss/dpo": 0.027111122384667397, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.4771382808685303, | |
| "rewards/margins": 0.5044761896133423, | |
| "rewards/rejected": -1.981614351272583, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9595119245701609, | |
| "grad_norm": 1.5373972654342651, | |
| "learning_rate": 2.5634679907440006e-09, | |
| "logits/chosen": -0.9225784540176392, | |
| "logits/rejected": -0.8528642654418945, | |
| "logps/chosen": -441.6475524902344, | |
| "logps/rejected": -410.14324951171875, | |
| "loss": 0.0367, | |
| "loss/chosen-sft": 1.814764380455017, | |
| "loss/dpo": 0.03674568608403206, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4582865238189697, | |
| "rewards/margins": 0.40831345319747925, | |
| "rewards/rejected": -1.8665997982025146, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9622850804215197, | |
| "grad_norm": 1.2935632467269897, | |
| "learning_rate": 2.229254987418744e-09, | |
| "logits/chosen": -0.8527067303657532, | |
| "logits/rejected": -0.7993227243423462, | |
| "logps/chosen": -499.4395446777344, | |
| "logps/rejected": -482.21197509765625, | |
| "loss": 0.0354, | |
| "loss/chosen-sft": 1.8621113300323486, | |
| "loss/dpo": 0.03540179878473282, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4600284099578857, | |
| "rewards/margins": 0.7969890832901001, | |
| "rewards/rejected": -2.2570176124572754, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.9650582362728786, | |
| "grad_norm": 1.218867540359497, | |
| "learning_rate": 1.9182792317588294e-09, | |
| "logits/chosen": -0.9154025912284851, | |
| "logits/rejected": -0.8462129831314087, | |
| "logps/chosen": -430.40594482421875, | |
| "logps/rejected": -414.01397705078125, | |
| "loss": 0.0362, | |
| "loss/chosen-sft": 1.7988450527191162, | |
| "loss/dpo": 0.03615463525056839, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.418278694152832, | |
| "rewards/margins": 0.5125333666801453, | |
| "rewards/rejected": -1.930812120437622, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9678313921242374, | |
| "grad_norm": 1.9518085718154907, | |
| "learning_rate": 1.6305698887113806e-09, | |
| "logits/chosen": -0.9541261792182922, | |
| "logits/rejected": -0.8191477656364441, | |
| "logps/chosen": -407.2529602050781, | |
| "logps/rejected": -422.18865966796875, | |
| "loss": 0.0323, | |
| "loss/chosen-sft": 1.8834625482559204, | |
| "loss/dpo": 0.0322595052421093, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.3437700271606445, | |
| "rewards/margins": 0.6855727434158325, | |
| "rewards/rejected": -2.0293426513671875, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.9706045479755963, | |
| "grad_norm": 1.7286250591278076, | |
| "learning_rate": 1.366153941176451e-09, | |
| "logits/chosen": -1.052943468093872, | |
| "logits/rejected": -0.9355376362800598, | |
| "logps/chosen": -460.63153076171875, | |
| "logps/rejected": -460.62994384765625, | |
| "loss": 0.0427, | |
| "loss/chosen-sft": 1.877771019935608, | |
| "loss/dpo": 0.042682547122240067, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -1.5018032789230347, | |
| "rewards/margins": 0.6279391050338745, | |
| "rewards/rejected": -2.129742383956909, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9733777038269551, | |
| "grad_norm": 0.8861921429634094, | |
| "learning_rate": 1.1250561874766029e-09, | |
| "logits/chosen": -1.0122451782226562, | |
| "logits/rejected": -0.9112497568130493, | |
| "logps/chosen": -472.03948974609375, | |
| "logps/rejected": -466.75006103515625, | |
| "loss": 0.0358, | |
| "loss/chosen-sft": 1.8839130401611328, | |
| "loss/dpo": 0.03582266345620155, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.5027332305908203, | |
| "rewards/margins": 0.59858238697052, | |
| "rewards/rejected": -2.101315498352051, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.9761508596783139, | |
| "grad_norm": 1.1699074506759644, | |
| "learning_rate": 9.072992390312117e-10, | |
| "logits/chosen": -0.8916594386100769, | |
| "logits/rejected": -0.7321555018424988, | |
| "logps/chosen": -414.146728515625, | |
| "logps/rejected": -429.60125732421875, | |
| "loss": 0.0415, | |
| "loss/chosen-sft": 1.8041099309921265, | |
| "loss/dpo": 0.04147591441869736, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.4231741428375244, | |
| "rewards/margins": 0.5474594235420227, | |
| "rewards/rejected": -1.9706335067749023, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9789240155296728, | |
| "grad_norm": 2.7589733600616455, | |
| "learning_rate": 7.12903518235719e-10, | |
| "logits/chosen": -1.0372343063354492, | |
| "logits/rejected": -0.8465463519096375, | |
| "logps/chosen": -406.49755859375, | |
| "logps/rejected": -411.850341796875, | |
| "loss": 0.0382, | |
| "loss/chosen-sft": 1.7656471729278564, | |
| "loss/dpo": 0.03815629705786705, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -1.410011649131775, | |
| "rewards/margins": 0.5866178274154663, | |
| "rewards/rejected": -1.9966297149658203, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.9816971713810316, | |
| "grad_norm": 1.103261947631836, | |
| "learning_rate": 5.418872565464139e-10, | |
| "logits/chosen": -0.8118406534194946, | |
| "logits/rejected": -0.6884601712226868, | |
| "logps/chosen": -441.5873107910156, | |
| "logps/rejected": -420.6312561035156, | |
| "loss": 0.0308, | |
| "loss/chosen-sft": 1.925108551979065, | |
| "loss/dpo": 0.03077712655067444, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.486276388168335, | |
| "rewards/margins": 0.5229099988937378, | |
| "rewards/rejected": -2.0091867446899414, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9844703272323905, | |
| "grad_norm": 1.2194581031799316, | |
| "learning_rate": 3.942664927706063e-10, | |
| "logits/chosen": -1.0195016860961914, | |
| "logits/rejected": -0.9845107793807983, | |
| "logps/chosen": -432.424560546875, | |
| "logps/rejected": -431.7875061035156, | |
| "loss": 0.0297, | |
| "loss/chosen-sft": 1.9019267559051514, | |
| "loss/dpo": 0.02968726120889187, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -1.5260287523269653, | |
| "rewards/margins": 0.49342551827430725, | |
| "rewards/rejected": -2.01945424079895, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9872434830837493, | |
| "grad_norm": 1.3297661542892456, | |
| "learning_rate": 2.700550715623029e-10, | |
| "logits/chosen": -0.9309781193733215, | |
| "logits/rejected": -0.765326201915741, | |
| "logps/chosen": -445.7706604003906, | |
| "logps/rejected": -450.1620178222656, | |
| "loss": 0.0314, | |
| "loss/chosen-sft": 1.8302927017211914, | |
| "loss/dpo": 0.03142596781253815, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -1.3721158504486084, | |
| "rewards/margins": 0.6550136804580688, | |
| "rewards/rejected": -2.0271294116973877, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9900166389351082, | |
| "grad_norm": 1.6393502950668335, | |
| "learning_rate": 1.692646421239674e-10, | |
| "logits/chosen": -0.9880639910697937, | |
| "logits/rejected": -0.8702710270881653, | |
| "logps/chosen": -454.1924743652344, | |
| "logps/rejected": -474.16741943359375, | |
| "loss": 0.0314, | |
| "loss/chosen-sft": 1.8532609939575195, | |
| "loss/dpo": 0.031443167477846146, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -1.4502158164978027, | |
| "rewards/margins": 0.7252721786499023, | |
| "rewards/rejected": -2.175487995147705, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.992789794786467, | |
| "grad_norm": 1.782798409461975, | |
| "learning_rate": 9.190465711375606e-11, | |
| "logits/chosen": -0.8094294667243958, | |
| "logits/rejected": -0.825986385345459, | |
| "logps/chosen": -438.8473205566406, | |
| "logps/rejected": -475.4677734375, | |
| "loss": 0.0401, | |
| "loss/chosen-sft": 1.7468044757843018, | |
| "loss/dpo": 0.04005669802427292, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.3847625255584717, | |
| "rewards/margins": 0.7623748779296875, | |
| "rewards/rejected": -2.147137403488159, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9955629506378258, | |
| "grad_norm": 1.64493989944458, | |
| "learning_rate": 3.798237175925423e-11, | |
| "logits/chosen": -0.9432379007339478, | |
| "logits/rejected": -0.890951931476593, | |
| "logps/chosen": -427.1351623535156, | |
| "logps/rejected": -463.54705810546875, | |
| "loss": 0.0352, | |
| "loss/chosen-sft": 1.7833278179168701, | |
| "loss/dpo": 0.03518053516745567, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -1.4109861850738525, | |
| "rewards/margins": 0.7486652135848999, | |
| "rewards/rejected": -2.159651279449463, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.9983361064891847, | |
| "grad_norm": 1.9120142459869385, | |
| "learning_rate": 7.502843176826478e-12, | |
| "logits/chosen": -0.9462081789970398, | |
| "logits/rejected": -0.8944603204727173, | |
| "logps/chosen": -472.4647521972656, | |
| "logps/rejected": -491.3990173339844, | |
| "loss": 0.0391, | |
| "loss/chosen-sft": 1.819483757019043, | |
| "loss/dpo": 0.03906460851430893, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.514047384262085, | |
| "rewards/margins": 0.6475565433502197, | |
| "rewards/rejected": -2.161604166030884, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1803, | |
| "total_flos": 0.0, | |
| "train_loss": 0.08027586419418396, | |
| "train_runtime": 2186.6137, | |
| "train_samples_per_second": 26.375, | |
| "train_steps_per_second": 0.825 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1803, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |