Text Generation
PEFT
TensorBoard
Safetensors
English
medical
radiology
medical-coding
icd-10
cpt
llama-3
llama-3-70b
lora
healthcare
clinical
conversational
Instructions to use vineetdaniels/NYXMed-V17-Model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use vineetdaniels/NYXMed-V17-Model with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("vineetdaniels/NYXMed-V16-Model") model = PeftModel.from_pretrained(base_model, "vineetdaniels/NYXMed-V17-Model") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "step": 1405, | |
| "epoch": 0.3977634652133909, | |
| "wallclock": "2026-05-23T01:59:19.731727", | |
| "loss": 0.1505, | |
| "grad_norm": 0.860001266002655, | |
| "learning_rate": 9.27049077439764e-06 | |
| }, | |
| { | |
| "step": 1410, | |
| "epoch": 0.39917899355934605, | |
| "wallclock": "2026-05-23T02:01:12.843146", | |
| "loss": 0.137, | |
| "grad_norm": 0.8940677642822266, | |
| "learning_rate": 9.264517869578343e-06, | |
| "step_time_sec": 113.11 | |
| }, | |
| { | |
| "step": 1415, | |
| "epoch": 0.40059452190530115, | |
| "wallclock": "2026-05-23T02:03:05.328968", | |
| "loss": 0.1519, | |
| "grad_norm": 0.8663320541381836, | |
| "learning_rate": 9.258522553059383e-06, | |
| "step_time_sec": 112.49 | |
| }, | |
| { | |
| "step": 1420, | |
| "epoch": 0.4020100502512563, | |
| "wallclock": "2026-05-23T02:04:58.249080", | |
| "loss": 0.1329, | |
| "grad_norm": 0.8876581192016602, | |
| "learning_rate": 9.252504856348483e-06, | |
| "step_time_sec": 112.92 | |
| }, | |
| { | |
| "step": 1425, | |
| "epoch": 0.4034255785972114, | |
| "wallclock": "2026-05-23T02:06:50.925025", | |
| "loss": 0.1339, | |
| "grad_norm": 0.7425838708877563, | |
| "learning_rate": 9.246464811070978e-06, | |
| "step_time_sec": 112.68 | |
| }, | |
| { | |
| "step": 1430, | |
| "epoch": 0.40484110694316655, | |
| "wallclock": "2026-05-23T02:08:44.125444", | |
| "loss": 0.1263, | |
| "grad_norm": 0.8344400525093079, | |
| "learning_rate": 9.240402448969655e-06, | |
| "step_time_sec": 113.2 | |
| }, | |
| { | |
| "step": 1435, | |
| "epoch": 0.40625663528912165, | |
| "wallclock": "2026-05-23T02:10:37.926903", | |
| "loss": 0.1374, | |
| "grad_norm": 0.920082688331604, | |
| "learning_rate": 9.234317801904584e-06, | |
| "step_time_sec": 113.8 | |
| }, | |
| { | |
| "step": 1440, | |
| "epoch": 0.4076721636350768, | |
| "wallclock": "2026-05-23T02:12:30.509342", | |
| "loss": 0.1522, | |
| "grad_norm": 0.9682347178459167, | |
| "learning_rate": 9.228210901852953e-06, | |
| "step_time_sec": 112.58 | |
| }, | |
| { | |
| "step": 1445, | |
| "epoch": 0.4090876919810319, | |
| "wallclock": "2026-05-23T02:14:22.744101", | |
| "loss": 0.1435, | |
| "grad_norm": 0.8033989667892456, | |
| "learning_rate": 9.222081780908894e-06, | |
| "step_time_sec": 112.23 | |
| }, | |
| { | |
| "step": 1450, | |
| "epoch": 0.41050322032698705, | |
| "wallclock": "2026-05-23T02:16:16.036698", | |
| "loss": 0.132, | |
| "grad_norm": 1.0462369918823242, | |
| "learning_rate": 9.215930471283323e-06, | |
| "step_time_sec": 113.29, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 64.34 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1455, | |
| "epoch": 0.4119187486729422, | |
| "wallclock": "2026-05-23T02:18:08.338694", | |
| "loss": 0.1657, | |
| "grad_norm": 2.268519401550293, | |
| "learning_rate": 9.209757005303761e-06, | |
| "step_time_sec": 112.3 | |
| }, | |
| { | |
| "step": 1460, | |
| "epoch": 0.4133342770188973, | |
| "wallclock": "2026-05-23T02:20:01.532146", | |
| "loss": 0.1165, | |
| "grad_norm": 0.7390187978744507, | |
| "learning_rate": 9.203561415414174e-06, | |
| "step_time_sec": 113.19 | |
| }, | |
| { | |
| "step": 1465, | |
| "epoch": 0.41474980536485245, | |
| "wallclock": "2026-05-23T02:21:55.232651", | |
| "loss": 0.12, | |
| "grad_norm": 0.884283721446991, | |
| "learning_rate": 9.197343734174798e-06, | |
| "step_time_sec": 113.7 | |
| }, | |
| { | |
| "step": 1470, | |
| "epoch": 0.41616533371080755, | |
| "wallclock": "2026-05-23T02:23:47.669724", | |
| "loss": 0.1227, | |
| "grad_norm": 0.7426964640617371, | |
| "learning_rate": 9.191103994261963e-06, | |
| "step_time_sec": 112.44 | |
| }, | |
| { | |
| "step": 1475, | |
| "epoch": 0.4175808620567627, | |
| "wallclock": "2026-05-23T02:25:40.551477", | |
| "loss": 0.1423, | |
| "grad_norm": 1.1171990633010864, | |
| "learning_rate": 9.184842228467929e-06, | |
| "step_time_sec": 112.88 | |
| }, | |
| { | |
| "step": 1480, | |
| "epoch": 0.4189963904027178, | |
| "wallclock": "2026-05-23T02:27:34.235355", | |
| "loss": 0.1356, | |
| "grad_norm": 1.0424611568450928, | |
| "learning_rate": 9.178558469700712e-06, | |
| "step_time_sec": 113.68 | |
| }, | |
| { | |
| "step": 1485, | |
| "epoch": 0.42041191874867295, | |
| "wallclock": "2026-05-23T02:29:26.581237", | |
| "loss": 0.1192, | |
| "grad_norm": 0.7916944026947021, | |
| "learning_rate": 9.172252750983904e-06, | |
| "step_time_sec": 112.35 | |
| }, | |
| { | |
| "step": 1490, | |
| "epoch": 0.42182744709462805, | |
| "wallclock": "2026-05-23T02:31:19.100873", | |
| "loss": 0.1178, | |
| "grad_norm": 0.6911448240280151, | |
| "learning_rate": 9.165925105456513e-06, | |
| "step_time_sec": 112.52 | |
| }, | |
| { | |
| "step": 1495, | |
| "epoch": 0.4232429754405832, | |
| "wallclock": "2026-05-23T02:33:12.432128", | |
| "loss": 0.1268, | |
| "grad_norm": 1.207095980644226, | |
| "learning_rate": 9.159575566372774e-06, | |
| "step_time_sec": 113.33 | |
| }, | |
| { | |
| "step": 1500, | |
| "epoch": 0.4246585037865383, | |
| "wallclock": "2026-05-23T02:35:05.236376", | |
| "loss": 0.1249, | |
| "grad_norm": 0.8602229952812195, | |
| "learning_rate": 9.153204167101984e-06, | |
| "step_time_sec": 112.8, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 65.95 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1500, | |
| "epoch": 0.4246585037865383, | |
| "wallclock": "2026-05-23T02:35:56.386847", | |
| "eval_loss": 0.14635811746120453, | |
| "eval_runtime": 51.064, | |
| "eval_samples_per_second": 4.896, | |
| "eval_steps_per_second": 1.234, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 65.95 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1505, | |
| "epoch": 0.42607403213249345, | |
| "wallclock": "2026-05-23T02:39:31.314975", | |
| "loss": 0.132, | |
| "grad_norm": 0.9033521413803101, | |
| "learning_rate": 9.146810941128326e-06, | |
| "step_time_sec": 266.08 | |
| }, | |
| { | |
| "step": 1510, | |
| "epoch": 0.4274895604784486, | |
| "wallclock": "2026-05-23T02:41:24.639692", | |
| "loss": 0.1235, | |
| "grad_norm": 0.9021329879760742, | |
| "learning_rate": 9.140395922050687e-06, | |
| "step_time_sec": 113.32 | |
| }, | |
| { | |
| "step": 1515, | |
| "epoch": 0.4289050888244037, | |
| "wallclock": "2026-05-23T02:43:21.636680", | |
| "loss": 0.1443, | |
| "grad_norm": 0.8108121752738953, | |
| "learning_rate": 9.133959143582485e-06, | |
| "step_time_sec": 117.0 | |
| }, | |
| { | |
| "step": 1520, | |
| "epoch": 0.43032061717035885, | |
| "wallclock": "2026-05-23T02:45:14.801586", | |
| "loss": 0.1256, | |
| "grad_norm": 0.9193041920661926, | |
| "learning_rate": 9.127500639551497e-06, | |
| "step_time_sec": 113.16 | |
| }, | |
| { | |
| "step": 1525, | |
| "epoch": 0.43173614551631395, | |
| "wallclock": "2026-05-23T02:47:07.650420", | |
| "loss": 0.1356, | |
| "grad_norm": 0.8465185761451721, | |
| "learning_rate": 9.12102044389967e-06, | |
| "step_time_sec": 112.85 | |
| }, | |
| { | |
| "step": 1530, | |
| "epoch": 0.4331516738622691, | |
| "wallclock": "2026-05-23T02:49:00.408689", | |
| "loss": 0.1384, | |
| "grad_norm": 0.973936140537262, | |
| "learning_rate": 9.114518590682955e-06, | |
| "step_time_sec": 112.76 | |
| }, | |
| { | |
| "step": 1535, | |
| "epoch": 0.4345672022082242, | |
| "wallclock": "2026-05-23T02:50:52.832459", | |
| "loss": 0.1274, | |
| "grad_norm": 1.2166610956192017, | |
| "learning_rate": 9.107995114071116e-06, | |
| "step_time_sec": 112.42 | |
| }, | |
| { | |
| "step": 1540, | |
| "epoch": 0.43598273055417935, | |
| "wallclock": "2026-05-23T02:52:44.842922", | |
| "loss": 0.122, | |
| "grad_norm": 0.985847532749176, | |
| "learning_rate": 9.101450048347562e-06, | |
| "step_time_sec": 112.01 | |
| }, | |
| { | |
| "step": 1545, | |
| "epoch": 0.43739825890013445, | |
| "wallclock": "2026-05-23T02:54:38.307969", | |
| "loss": 0.1365, | |
| "grad_norm": 0.7600606083869934, | |
| "learning_rate": 9.094883427909156e-06, | |
| "step_time_sec": 113.47 | |
| }, | |
| { | |
| "step": 1550, | |
| "epoch": 0.4388137872460896, | |
| "wallclock": "2026-05-23T02:56:31.349254", | |
| "loss": 0.1379, | |
| "grad_norm": 0.7994720339775085, | |
| "learning_rate": 9.088295287266042e-06, | |
| "step_time_sec": 113.04, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1555, | |
| "epoch": 0.44022931559204476, | |
| "wallclock": "2026-05-23T02:58:24.231104", | |
| "loss": 0.1325, | |
| "grad_norm": 0.8235543370246887, | |
| "learning_rate": 9.081685661041463e-06, | |
| "step_time_sec": 112.88 | |
| }, | |
| { | |
| "step": 1560, | |
| "epoch": 0.44164484393799985, | |
| "wallclock": "2026-05-23T03:00:19.009451", | |
| "loss": 0.1112, | |
| "grad_norm": 1.33493173122406, | |
| "learning_rate": 9.075054583971575e-06, | |
| "step_time_sec": 114.78 | |
| }, | |
| { | |
| "step": 1565, | |
| "epoch": 0.443060372283955, | |
| "wallclock": "2026-05-23T03:02:11.720414", | |
| "loss": 0.1274, | |
| "grad_norm": 0.6676927804946899, | |
| "learning_rate": 9.068402090905263e-06, | |
| "step_time_sec": 112.71 | |
| }, | |
| { | |
| "step": 1570, | |
| "epoch": 0.4444759006299101, | |
| "wallclock": "2026-05-23T03:04:04.443929", | |
| "loss": 0.1158, | |
| "grad_norm": 2.0362584590911865, | |
| "learning_rate": 9.06172821680397e-06, | |
| "step_time_sec": 112.72 | |
| }, | |
| { | |
| "step": 1575, | |
| "epoch": 0.44589142897586526, | |
| "wallclock": "2026-05-23T03:05:58.111861", | |
| "loss": 0.1459, | |
| "grad_norm": 0.8041182160377502, | |
| "learning_rate": 9.055032996741492e-06, | |
| "step_time_sec": 113.67 | |
| }, | |
| { | |
| "step": 1580, | |
| "epoch": 0.44730695732182035, | |
| "wallclock": "2026-05-23T03:07:51.100629", | |
| "loss": 0.1209, | |
| "grad_norm": 0.6887193918228149, | |
| "learning_rate": 9.048316465903823e-06, | |
| "step_time_sec": 112.99 | |
| }, | |
| { | |
| "step": 1585, | |
| "epoch": 0.4487224856677755, | |
| "wallclock": "2026-05-23T03:09:44.719059", | |
| "loss": 0.1472, | |
| "grad_norm": 0.9417322278022766, | |
| "learning_rate": 9.041578659588938e-06, | |
| "step_time_sec": 113.62 | |
| }, | |
| { | |
| "step": 1590, | |
| "epoch": 0.4501380140137306, | |
| "wallclock": "2026-05-23T03:11:39.177916", | |
| "loss": 0.1198, | |
| "grad_norm": 0.7076205611228943, | |
| "learning_rate": 9.034819613206631e-06, | |
| "step_time_sec": 114.46 | |
| }, | |
| { | |
| "step": 1595, | |
| "epoch": 0.45155354235968576, | |
| "wallclock": "2026-05-23T03:13:32.601273", | |
| "loss": 0.1576, | |
| "grad_norm": 0.8126243948936462, | |
| "learning_rate": 9.028039362278318e-06, | |
| "step_time_sec": 113.42 | |
| }, | |
| { | |
| "step": 1600, | |
| "epoch": 0.45296907070564085, | |
| "wallclock": "2026-05-23T03:15:25.341230", | |
| "loss": 0.1392, | |
| "grad_norm": 0.8675165176391602, | |
| "learning_rate": 9.021237942436855e-06, | |
| "step_time_sec": 112.74, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1600, | |
| "epoch": 0.45296907070564085, | |
| "wallclock": "2026-05-23T03:16:17.416846", | |
| "eval_loss": 0.14519159495830536, | |
| "eval_runtime": 51.9828, | |
| "eval_samples_per_second": 4.809, | |
| "eval_steps_per_second": 1.212, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1605, | |
| "epoch": 0.454384599051596, | |
| "wallclock": "2026-05-23T03:19:54.242069", | |
| "loss": 0.1252, | |
| "grad_norm": 0.7624632716178894, | |
| "learning_rate": 9.01441538942635e-06, | |
| "step_time_sec": 268.9 | |
| }, | |
| { | |
| "step": 1610, | |
| "epoch": 0.45580012739755116, | |
| "wallclock": "2026-05-23T03:21:47.588042", | |
| "loss": 0.1305, | |
| "grad_norm": 0.5635123252868652, | |
| "learning_rate": 9.007571739101968e-06, | |
| "step_time_sec": 113.35 | |
| }, | |
| { | |
| "step": 1615, | |
| "epoch": 0.45721565574350626, | |
| "wallclock": "2026-05-23T03:23:41.226600", | |
| "loss": 0.1117, | |
| "grad_norm": 0.7951876521110535, | |
| "learning_rate": 9.000707027429757e-06, | |
| "step_time_sec": 113.64 | |
| }, | |
| { | |
| "step": 1620, | |
| "epoch": 0.4586311840894614, | |
| "wallclock": "2026-05-23T03:25:36.047456", | |
| "loss": 0.1283, | |
| "grad_norm": 1.121505618095398, | |
| "learning_rate": 8.993821290486442e-06, | |
| "step_time_sec": 114.82 | |
| }, | |
| { | |
| "step": 1625, | |
| "epoch": 0.4600467124354165, | |
| "wallclock": "2026-05-23T03:27:30.028714", | |
| "loss": 0.1127, | |
| "grad_norm": 0.9441781640052795, | |
| "learning_rate": 8.98691456445925e-06, | |
| "step_time_sec": 113.98 | |
| }, | |
| { | |
| "step": 1630, | |
| "epoch": 0.46146224078137166, | |
| "wallclock": "2026-05-23T03:29:23.551875", | |
| "loss": 0.1246, | |
| "grad_norm": 0.8297203779220581, | |
| "learning_rate": 8.979986885645712e-06, | |
| "step_time_sec": 113.52 | |
| }, | |
| { | |
| "step": 1635, | |
| "epoch": 0.46287776912732675, | |
| "wallclock": "2026-05-23T03:31:17.421607", | |
| "loss": 0.1365, | |
| "grad_norm": 1.1671549081802368, | |
| "learning_rate": 8.973038290453475e-06, | |
| "step_time_sec": 113.87 | |
| }, | |
| { | |
| "step": 1640, | |
| "epoch": 0.4642932974732819, | |
| "wallclock": "2026-05-23T03:33:12.304973", | |
| "loss": 0.1158, | |
| "grad_norm": 0.8376030325889587, | |
| "learning_rate": 8.966068815400108e-06, | |
| "step_time_sec": 114.88 | |
| }, | |
| { | |
| "step": 1645, | |
| "epoch": 0.465708825819237, | |
| "wallclock": "2026-05-23T03:35:06.915657", | |
| "loss": 0.1276, | |
| "grad_norm": 0.9669609069824219, | |
| "learning_rate": 8.95907849711291e-06, | |
| "step_time_sec": 114.61 | |
| }, | |
| { | |
| "step": 1650, | |
| "epoch": 0.46712435416519216, | |
| "wallclock": "2026-05-23T03:36:59.993882", | |
| "loss": 0.1638, | |
| "grad_norm": 1.0771512985229492, | |
| "learning_rate": 8.952067372328726e-06, | |
| "step_time_sec": 113.08, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1655, | |
| "epoch": 0.4685398825111473, | |
| "wallclock": "2026-05-23T03:38:54.654794", | |
| "loss": 0.1403, | |
| "grad_norm": 0.7746709585189819, | |
| "learning_rate": 8.94503547789374e-06, | |
| "step_time_sec": 114.66 | |
| }, | |
| { | |
| "step": 1660, | |
| "epoch": 0.4699554108571024, | |
| "wallclock": "2026-05-23T03:40:47.102060", | |
| "loss": 0.1352, | |
| "grad_norm": 1.1372244358062744, | |
| "learning_rate": 8.937982850763293e-06, | |
| "step_time_sec": 112.45 | |
| }, | |
| { | |
| "step": 1665, | |
| "epoch": 0.47137093920305756, | |
| "wallclock": "2026-05-23T03:42:40.432111", | |
| "loss": 0.1537, | |
| "grad_norm": 0.8946406245231628, | |
| "learning_rate": 8.930909528001682e-06, | |
| "step_time_sec": 113.33 | |
| }, | |
| { | |
| "step": 1670, | |
| "epoch": 0.47278646754901266, | |
| "wallclock": "2026-05-23T03:44:34.489209", | |
| "loss": 0.1252, | |
| "grad_norm": 0.6626783013343811, | |
| "learning_rate": 8.923815546781968e-06, | |
| "step_time_sec": 114.06 | |
| }, | |
| { | |
| "step": 1675, | |
| "epoch": 0.4742019958949678, | |
| "wallclock": "2026-05-23T03:46:29.929407", | |
| "loss": 0.1148, | |
| "grad_norm": 0.7032930850982666, | |
| "learning_rate": 8.916700944385783e-06, | |
| "step_time_sec": 115.44 | |
| }, | |
| { | |
| "step": 1680, | |
| "epoch": 0.4756175242409229, | |
| "wallclock": "2026-05-23T03:48:23.844510", | |
| "loss": 0.139, | |
| "grad_norm": 0.9184028506278992, | |
| "learning_rate": 8.90956575820313e-06, | |
| "step_time_sec": 113.92 | |
| }, | |
| { | |
| "step": 1685, | |
| "epoch": 0.47703305258687806, | |
| "wallclock": "2026-05-23T03:50:18.747236", | |
| "loss": 0.1439, | |
| "grad_norm": 0.9489091038703918, | |
| "learning_rate": 8.902410025732182e-06, | |
| "step_time_sec": 114.9 | |
| }, | |
| { | |
| "step": 1690, | |
| "epoch": 0.47844858093283316, | |
| "wallclock": "2026-05-23T03:52:12.030744", | |
| "loss": 0.1063, | |
| "grad_norm": 0.8725413680076599, | |
| "learning_rate": 8.895233784579098e-06, | |
| "step_time_sec": 113.28 | |
| }, | |
| { | |
| "step": 1695, | |
| "epoch": 0.4798641092787883, | |
| "wallclock": "2026-05-23T03:54:05.237973", | |
| "loss": 0.1254, | |
| "grad_norm": 0.8798477649688721, | |
| "learning_rate": 8.888037072457817e-06, | |
| "step_time_sec": 113.21 | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.48127963762474346, | |
| "wallclock": "2026-05-23T03:55:59.391912", | |
| "loss": 0.1357, | |
| "grad_norm": 0.8217583298683167, | |
| "learning_rate": 8.88081992718986e-06, | |
| "step_time_sec": 114.15, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.48127963762474346, | |
| "wallclock": "2026-05-23T03:56:51.332412", | |
| "eval_loss": 0.14282415807247162, | |
| "eval_runtime": 51.857, | |
| "eval_samples_per_second": 4.821, | |
| "eval_steps_per_second": 1.215, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1705, | |
| "epoch": 0.48269516597069856, | |
| "wallclock": "2026-05-23T04:00:27.956136", | |
| "loss": 0.1428, | |
| "grad_norm": 0.7931806445121765, | |
| "learning_rate": 8.873582386704132e-06, | |
| "step_time_sec": 268.56 | |
| }, | |
| { | |
| "step": 1710, | |
| "epoch": 0.4841106943166537, | |
| "wallclock": "2026-05-23T04:02:22.110676", | |
| "loss": 0.1402, | |
| "grad_norm": 1.0113517045974731, | |
| "learning_rate": 8.86632448903672e-06, | |
| "step_time_sec": 114.15 | |
| }, | |
| { | |
| "step": 1715, | |
| "epoch": 0.4855262226626088, | |
| "wallclock": "2026-05-23T04:04:17.103828", | |
| "loss": 0.1213, | |
| "grad_norm": 0.9483981132507324, | |
| "learning_rate": 8.859046272330698e-06, | |
| "step_time_sec": 114.99 | |
| }, | |
| { | |
| "step": 1720, | |
| "epoch": 0.48694175100856396, | |
| "wallclock": "2026-05-23T04:06:09.837485", | |
| "loss": 0.1287, | |
| "grad_norm": 0.8060489296913147, | |
| "learning_rate": 8.851747774835927e-06, | |
| "step_time_sec": 112.73 | |
| }, | |
| { | |
| "step": 1725, | |
| "epoch": 0.48835727935451906, | |
| "wallclock": "2026-05-23T04:08:03.048184", | |
| "loss": 0.1348, | |
| "grad_norm": 1.2514666318893433, | |
| "learning_rate": 8.84442903490885e-06, | |
| "step_time_sec": 113.21 | |
| }, | |
| { | |
| "step": 1730, | |
| "epoch": 0.4897728077004742, | |
| "wallclock": "2026-05-23T04:09:56.786981", | |
| "loss": 0.1261, | |
| "grad_norm": 0.8523698449134827, | |
| "learning_rate": 8.837090091012289e-06, | |
| "step_time_sec": 113.74 | |
| }, | |
| { | |
| "step": 1735, | |
| "epoch": 0.4911883360464293, | |
| "wallclock": "2026-05-23T04:11:50.314356", | |
| "loss": 0.1365, | |
| "grad_norm": 1.0180977582931519, | |
| "learning_rate": 8.82973098171525e-06, | |
| "step_time_sec": 113.53 | |
| }, | |
| { | |
| "step": 1740, | |
| "epoch": 0.49260386439238446, | |
| "wallclock": "2026-05-23T04:13:43.729160", | |
| "loss": 0.1338, | |
| "grad_norm": 0.5706004500389099, | |
| "learning_rate": 8.822351745692714e-06, | |
| "step_time_sec": 113.41 | |
| }, | |
| { | |
| "step": 1745, | |
| "epoch": 0.49401939273833956, | |
| "wallclock": "2026-05-23T04:15:36.906607", | |
| "loss": 0.1225, | |
| "grad_norm": 0.8971516489982605, | |
| "learning_rate": 8.814952421725434e-06, | |
| "step_time_sec": 113.18 | |
| }, | |
| { | |
| "step": 1750, | |
| "epoch": 0.4954349210842947, | |
| "wallclock": "2026-05-23T04:17:31.144814", | |
| "loss": 0.1199, | |
| "grad_norm": 0.8799176812171936, | |
| "learning_rate": 8.807533048699734e-06, | |
| "step_time_sec": 114.24, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1755, | |
| "epoch": 0.49685044943024986, | |
| "wallclock": "2026-05-23T04:19:24.124359", | |
| "loss": 0.1161, | |
| "grad_norm": 0.7670193910598755, | |
| "learning_rate": 8.800093665607307e-06, | |
| "step_time_sec": 112.98 | |
| }, | |
| { | |
| "step": 1760, | |
| "epoch": 0.49826597777620496, | |
| "wallclock": "2026-05-23T04:21:16.253579", | |
| "loss": 0.1362, | |
| "grad_norm": 1.0961898565292358, | |
| "learning_rate": 8.792634311545002e-06, | |
| "step_time_sec": 112.13 | |
| }, | |
| { | |
| "step": 1765, | |
| "epoch": 0.4996815061221601, | |
| "wallclock": "2026-05-23T04:23:08.900369", | |
| "loss": 0.1246, | |
| "grad_norm": 0.9300926923751831, | |
| "learning_rate": 8.785155025714626e-06, | |
| "step_time_sec": 112.65 | |
| }, | |
| { | |
| "step": 1770, | |
| "epoch": 0.5010970344681153, | |
| "wallclock": "2026-05-23T04:25:01.641415", | |
| "loss": 0.13, | |
| "grad_norm": 0.9323188066482544, | |
| "learning_rate": 8.777655847422734e-06, | |
| "step_time_sec": 112.74 | |
| }, | |
| { | |
| "step": 1775, | |
| "epoch": 0.5025125628140703, | |
| "wallclock": "2026-05-23T04:26:53.919382", | |
| "loss": 0.1228, | |
| "grad_norm": 0.8098039627075195, | |
| "learning_rate": 8.770136816080426e-06, | |
| "step_time_sec": 112.28 | |
| }, | |
| { | |
| "step": 1780, | |
| "epoch": 0.5039280911600255, | |
| "wallclock": "2026-05-23T04:28:47.742000", | |
| "loss": 0.1395, | |
| "grad_norm": 0.857759952545166, | |
| "learning_rate": 8.76259797120313e-06, | |
| "step_time_sec": 113.82 | |
| }, | |
| { | |
| "step": 1785, | |
| "epoch": 0.5053436195059806, | |
| "wallclock": "2026-05-23T04:30:40.247364", | |
| "loss": 0.1259, | |
| "grad_norm": 0.847581148147583, | |
| "learning_rate": 8.755039352410414e-06, | |
| "step_time_sec": 112.51 | |
| }, | |
| { | |
| "step": 1790, | |
| "epoch": 0.5067591478519358, | |
| "wallclock": "2026-05-23T04:32:33.726589", | |
| "loss": 0.1352, | |
| "grad_norm": 0.7166717052459717, | |
| "learning_rate": 8.747460999425755e-06, | |
| "step_time_sec": 113.48 | |
| }, | |
| { | |
| "step": 1795, | |
| "epoch": 0.5081746761978909, | |
| "wallclock": "2026-05-23T04:34:27.718052", | |
| "loss": 0.1319, | |
| "grad_norm": 1.0256786346435547, | |
| "learning_rate": 8.739862952076346e-06, | |
| "step_time_sec": 113.99 | |
| }, | |
| { | |
| "step": 1800, | |
| "epoch": 0.509590204543846, | |
| "wallclock": "2026-05-23T04:36:20.348096", | |
| "loss": 0.1174, | |
| "grad_norm": 0.7882758975028992, | |
| "learning_rate": 8.732245250292878e-06, | |
| "step_time_sec": 112.63, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1800, | |
| "epoch": 0.509590204543846, | |
| "wallclock": "2026-05-23T04:37:12.805799", | |
| "eval_loss": 0.14175137877464294, | |
| "eval_runtime": 52.3687, | |
| "eval_samples_per_second": 4.774, | |
| "eval_steps_per_second": 1.203, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1805, | |
| "epoch": 0.5110057328898011, | |
| "wallclock": "2026-05-23T04:40:48.723033", | |
| "loss": 0.1259, | |
| "grad_norm": 0.9180939793586731, | |
| "learning_rate": 8.72460793410934e-06, | |
| "step_time_sec": 268.37 | |
| }, | |
| { | |
| "step": 1810, | |
| "epoch": 0.5124212612357563, | |
| "wallclock": "2026-05-23T04:42:42.010667", | |
| "loss": 0.1238, | |
| "grad_norm": 0.8965495228767395, | |
| "learning_rate": 8.716951043662796e-06, | |
| "step_time_sec": 113.29 | |
| }, | |
| { | |
| "step": 1815, | |
| "epoch": 0.5138367895817114, | |
| "wallclock": "2026-05-23T04:44:35.309189", | |
| "loss": 0.1064, | |
| "grad_norm": 0.9334513545036316, | |
| "learning_rate": 8.709274619193182e-06, | |
| "step_time_sec": 113.3 | |
| }, | |
| { | |
| "step": 1820, | |
| "epoch": 0.5152523179276665, | |
| "wallclock": "2026-05-23T04:46:29.001032", | |
| "loss": 0.1171, | |
| "grad_norm": 0.7548913955688477, | |
| "learning_rate": 8.701578701043097e-06, | |
| "step_time_sec": 113.69 | |
| }, | |
| { | |
| "step": 1825, | |
| "epoch": 0.5166678462736216, | |
| "wallclock": "2026-05-23T04:48:22.554066", | |
| "loss": 0.1248, | |
| "grad_norm": 1.022698998451233, | |
| "learning_rate": 8.693863329657576e-06, | |
| "step_time_sec": 113.55 | |
| }, | |
| { | |
| "step": 1830, | |
| "epoch": 0.5180833746195768, | |
| "wallclock": "2026-05-23T04:50:15.543925", | |
| "loss": 0.1423, | |
| "grad_norm": 1.0240012407302856, | |
| "learning_rate": 8.686128545583906e-06, | |
| "step_time_sec": 112.99 | |
| }, | |
| { | |
| "step": 1835, | |
| "epoch": 0.5194989029655319, | |
| "wallclock": "2026-05-23T04:52:08.302700", | |
| "loss": 0.1373, | |
| "grad_norm": 1.0934542417526245, | |
| "learning_rate": 8.678374389471375e-06, | |
| "step_time_sec": 112.76 | |
| }, | |
| { | |
| "step": 1840, | |
| "epoch": 0.520914431311487, | |
| "wallclock": "2026-05-23T04:54:00.947870", | |
| "loss": 0.1463, | |
| "grad_norm": 1.0597333908081055, | |
| "learning_rate": 8.670600902071096e-06, | |
| "step_time_sec": 112.65 | |
| }, | |
| { | |
| "step": 1845, | |
| "epoch": 0.5223299596574421, | |
| "wallclock": "2026-05-23T04:55:54.818374", | |
| "loss": 0.1206, | |
| "grad_norm": 0.7178345918655396, | |
| "learning_rate": 8.662808124235765e-06, | |
| "step_time_sec": 113.87 | |
| }, | |
| { | |
| "step": 1850, | |
| "epoch": 0.5237454880033973, | |
| "wallclock": "2026-05-23T04:57:48.527340", | |
| "loss": 0.1075, | |
| "grad_norm": 1.275473952293396, | |
| "learning_rate": 8.65499609691946e-06, | |
| "step_time_sec": 113.71, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1855, | |
| "epoch": 0.5251610163493524, | |
| "wallclock": "2026-05-23T04:59:41.867913", | |
| "loss": 0.1023, | |
| "grad_norm": 0.5519967675209045, | |
| "learning_rate": 8.647164861177422e-06, | |
| "step_time_sec": 113.34 | |
| }, | |
| { | |
| "step": 1860, | |
| "epoch": 0.5265765446953076, | |
| "wallclock": "2026-05-23T05:01:36.546653", | |
| "loss": 0.1367, | |
| "grad_norm": 0.9184526205062866, | |
| "learning_rate": 8.639314458165839e-06, | |
| "step_time_sec": 114.68 | |
| }, | |
| { | |
| "step": 1865, | |
| "epoch": 0.5279920730412626, | |
| "wallclock": "2026-05-23T05:03:30.220915", | |
| "loss": 0.1332, | |
| "grad_norm": 0.938758373260498, | |
| "learning_rate": 8.631444929141635e-06, | |
| "step_time_sec": 113.67 | |
| }, | |
| { | |
| "step": 1870, | |
| "epoch": 0.5294076013872178, | |
| "wallclock": "2026-05-23T05:05:24.720616", | |
| "loss": 0.107, | |
| "grad_norm": 0.8511345982551575, | |
| "learning_rate": 8.62355631546224e-06, | |
| "step_time_sec": 114.5 | |
| }, | |
| { | |
| "step": 1875, | |
| "epoch": 0.5308231297331729, | |
| "wallclock": "2026-05-23T05:07:19.388697", | |
| "loss": 0.1276, | |
| "grad_norm": 1.1140179634094238, | |
| "learning_rate": 8.615648658585392e-06, | |
| "step_time_sec": 114.67 | |
| }, | |
| { | |
| "step": 1880, | |
| "epoch": 0.5322386580791281, | |
| "wallclock": "2026-05-23T05:09:13.975351", | |
| "loss": 0.117, | |
| "grad_norm": 0.6539268493652344, | |
| "learning_rate": 8.607722000068898e-06, | |
| "step_time_sec": 114.59 | |
| }, | |
| { | |
| "step": 1885, | |
| "epoch": 0.5336541864250831, | |
| "wallclock": "2026-05-23T05:11:08.325687", | |
| "loss": 0.1193, | |
| "grad_norm": 0.8391310572624207, | |
| "learning_rate": 8.599776381570433e-06, | |
| "step_time_sec": 114.35 | |
| }, | |
| { | |
| "step": 1890, | |
| "epoch": 0.5350697147710383, | |
| "wallclock": "2026-05-23T05:13:02.941530", | |
| "loss": 0.1264, | |
| "grad_norm": 0.844965398311615, | |
| "learning_rate": 8.59181184484731e-06, | |
| "step_time_sec": 114.62 | |
| }, | |
| { | |
| "step": 1895, | |
| "epoch": 0.5364852431169934, | |
| "wallclock": "2026-05-23T05:14:56.481372", | |
| "loss": 0.1396, | |
| "grad_norm": 0.7179044485092163, | |
| "learning_rate": 8.583828431756272e-06, | |
| "step_time_sec": 113.54 | |
| }, | |
| { | |
| "step": 1900, | |
| "epoch": 0.5379007714629486, | |
| "wallclock": "2026-05-23T05:16:51.210427", | |
| "loss": 0.0974, | |
| "grad_norm": 0.8166824579238892, | |
| "learning_rate": 8.575826184253254e-06, | |
| "step_time_sec": 114.73, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1900, | |
| "epoch": 0.5379007714629486, | |
| "wallclock": "2026-05-23T05:17:43.683064", | |
| "eval_loss": 0.14031976461410522, | |
| "eval_runtime": 52.3833, | |
| "eval_samples_per_second": 4.773, | |
| "eval_steps_per_second": 1.203, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1905, | |
| "epoch": 0.5393162998089037, | |
| "wallclock": "2026-05-23T05:21:18.913048", | |
| "loss": 0.1219, | |
| "grad_norm": 1.006734013557434, | |
| "learning_rate": 8.567805144393176e-06, | |
| "step_time_sec": 267.7 | |
| }, | |
| { | |
| "step": 1910, | |
| "epoch": 0.5407318281548588, | |
| "wallclock": "2026-05-23T05:23:13.218448", | |
| "loss": 0.1284, | |
| "grad_norm": 0.8619974255561829, | |
| "learning_rate": 8.559765354329728e-06, | |
| "step_time_sec": 114.31 | |
| }, | |
| { | |
| "step": 1915, | |
| "epoch": 0.5421473565008139, | |
| "wallclock": "2026-05-23T05:25:08.140980", | |
| "loss": 0.1184, | |
| "grad_norm": 1.2139092683792114, | |
| "learning_rate": 8.55170685631513e-06, | |
| "step_time_sec": 114.92 | |
| }, | |
| { | |
| "step": 1920, | |
| "epoch": 0.5435628848467691, | |
| "wallclock": "2026-05-23T05:27:03.707486", | |
| "loss": 0.1129, | |
| "grad_norm": 0.9047484397888184, | |
| "learning_rate": 8.54362969269992e-06, | |
| "step_time_sec": 115.57 | |
| }, | |
| { | |
| "step": 1925, | |
| "epoch": 0.5449784131927242, | |
| "wallclock": "2026-05-23T05:28:57.612333", | |
| "loss": 0.1163, | |
| "grad_norm": 0.6891061663627625, | |
| "learning_rate": 8.535533905932739e-06, | |
| "step_time_sec": 113.9 | |
| }, | |
| { | |
| "step": 1930, | |
| "epoch": 0.5463939415386793, | |
| "wallclock": "2026-05-23T05:30:52.594285", | |
| "loss": 0.1164, | |
| "grad_norm": 0.6650737524032593, | |
| "learning_rate": 8.527419538560088e-06, | |
| "step_time_sec": 114.98 | |
| }, | |
| { | |
| "step": 1935, | |
| "epoch": 0.5478094698846344, | |
| "wallclock": "2026-05-23T05:32:48.432100", | |
| "loss": 0.1187, | |
| "grad_norm": 1.1412484645843506, | |
| "learning_rate": 8.51928663322613e-06, | |
| "step_time_sec": 115.84 | |
| }, | |
| { | |
| "step": 1940, | |
| "epoch": 0.5492249982305896, | |
| "wallclock": "2026-05-23T05:34:43.177149", | |
| "loss": 0.1342, | |
| "grad_norm": 0.7133747339248657, | |
| "learning_rate": 8.511135232672442e-06, | |
| "step_time_sec": 114.75 | |
| }, | |
| { | |
| "step": 1945, | |
| "epoch": 0.5506405265765447, | |
| "wallclock": "2026-05-23T05:36:42.534792", | |
| "loss": 0.1132, | |
| "grad_norm": 1.0151540040969849, | |
| "learning_rate": 8.502965379737802e-06, | |
| "step_time_sec": 119.36 | |
| }, | |
| { | |
| "step": 1950, | |
| "epoch": 0.5520560549224999, | |
| "wallclock": "2026-05-23T05:38:46.820577", | |
| "loss": 0.1273, | |
| "grad_norm": 1.6805675029754639, | |
| "learning_rate": 8.494777117357964e-06, | |
| "step_time_sec": 124.29, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1955, | |
| "epoch": 0.5534715832684549, | |
| "wallclock": "2026-05-23T05:40:50.018680", | |
| "loss": 0.1142, | |
| "grad_norm": 0.9018206596374512, | |
| "learning_rate": 8.486570488565432e-06, | |
| "step_time_sec": 123.2 | |
| }, | |
| { | |
| "step": 1960, | |
| "epoch": 0.5548871116144101, | |
| "wallclock": "2026-05-23T05:42:53.671070", | |
| "loss": 0.1258, | |
| "grad_norm": 0.7533476948738098, | |
| "learning_rate": 8.478345536489232e-06, | |
| "step_time_sec": 123.65 | |
| }, | |
| { | |
| "step": 1965, | |
| "epoch": 0.5563026399603652, | |
| "wallclock": "2026-05-23T05:44:56.957810", | |
| "loss": 0.1218, | |
| "grad_norm": 1.134895920753479, | |
| "learning_rate": 8.470102304354685e-06, | |
| "step_time_sec": 123.29 | |
| }, | |
| { | |
| "step": 1970, | |
| "epoch": 0.5577181683063204, | |
| "wallclock": "2026-05-23T05:47:01.054040", | |
| "loss": 0.1344, | |
| "grad_norm": 0.9846596717834473, | |
| "learning_rate": 8.461840835483179e-06, | |
| "step_time_sec": 124.1 | |
| }, | |
| { | |
| "step": 1975, | |
| "epoch": 0.5591336966522754, | |
| "wallclock": "2026-05-23T05:49:04.326418", | |
| "loss": 0.1272, | |
| "grad_norm": 0.8339362144470215, | |
| "learning_rate": 8.45356117329195e-06, | |
| "step_time_sec": 123.27 | |
| }, | |
| { | |
| "step": 1980, | |
| "epoch": 0.5605492249982306, | |
| "wallclock": "2026-05-23T05:51:07.881648", | |
| "loss": 0.1041, | |
| "grad_norm": 1.041932463645935, | |
| "learning_rate": 8.445263361293839e-06, | |
| "step_time_sec": 123.56 | |
| }, | |
| { | |
| "step": 1985, | |
| "epoch": 0.5619647533441857, | |
| "wallclock": "2026-05-23T05:53:11.738690", | |
| "loss": 0.1492, | |
| "grad_norm": 0.9378158450126648, | |
| "learning_rate": 8.436947443097074e-06, | |
| "step_time_sec": 123.86 | |
| }, | |
| { | |
| "step": 1990, | |
| "epoch": 0.5633802816901409, | |
| "wallclock": "2026-05-23T05:55:16.469073", | |
| "loss": 0.1055, | |
| "grad_norm": 1.0052165985107422, | |
| "learning_rate": 8.428613462405042e-06, | |
| "step_time_sec": 124.73 | |
| }, | |
| { | |
| "step": 1995, | |
| "epoch": 0.564795810036096, | |
| "wallclock": "2026-05-23T05:57:21.072731", | |
| "loss": 0.1157, | |
| "grad_norm": 0.9656962752342224, | |
| "learning_rate": 8.42026146301605e-06, | |
| "step_time_sec": 124.6 | |
| }, | |
| { | |
| "step": 2000, | |
| "epoch": 0.5662113383820511, | |
| "wallclock": "2026-05-23T05:59:27.133239", | |
| "loss": 0.1099, | |
| "grad_norm": 0.6400126814842224, | |
| "learning_rate": 8.411891488823102e-06, | |
| "step_time_sec": 126.06, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2000, | |
| "epoch": 0.5662113383820511, | |
| "wallclock": "2026-05-23T06:00:26.330552", | |
| "eval_loss": 0.13213595747947693, | |
| "eval_runtime": 59.102, | |
| "eval_samples_per_second": 4.23, | |
| "eval_steps_per_second": 1.066, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2005, | |
| "epoch": 0.5676268667280062, | |
| "wallclock": "2026-05-23T06:04:02.423533", | |
| "loss": 0.1243, | |
| "grad_norm": 1.0383392572402954, | |
| "learning_rate": 8.40350358381367e-06, | |
| "step_time_sec": 275.29 | |
| }, | |
| { | |
| "step": 2010, | |
| "epoch": 0.5690423950739614, | |
| "wallclock": "2026-05-23T06:05:55.119665", | |
| "loss": 0.1192, | |
| "grad_norm": 1.1544498205184937, | |
| "learning_rate": 8.39509779206945e-06, | |
| "step_time_sec": 112.7 | |
| }, | |
| { | |
| "step": 2015, | |
| "epoch": 0.5704579234199165, | |
| "wallclock": "2026-05-23T06:07:49.815988", | |
| "loss": 0.125, | |
| "grad_norm": 1.1813828945159912, | |
| "learning_rate": 8.386674157766156e-06, | |
| "step_time_sec": 114.7 | |
| }, | |
| { | |
| "step": 2020, | |
| "epoch": 0.5718734517658716, | |
| "wallclock": "2026-05-23T06:09:44.079892", | |
| "loss": 0.0941, | |
| "grad_norm": 0.582125723361969, | |
| "learning_rate": 8.378232725173253e-06, | |
| "step_time_sec": 114.26 | |
| }, | |
| { | |
| "step": 2025, | |
| "epoch": 0.5732889801118267, | |
| "wallclock": "2026-05-23T06:11:37.953666", | |
| "loss": 0.1276, | |
| "grad_norm": 0.8630328178405762, | |
| "learning_rate": 8.369773538653756e-06, | |
| "step_time_sec": 113.87 | |
| }, | |
| { | |
| "step": 2030, | |
| "epoch": 0.5747045084577819, | |
| "wallclock": "2026-05-23T06:13:33.538279", | |
| "loss": 0.1139, | |
| "grad_norm": 0.7153676748275757, | |
| "learning_rate": 8.361296642663977e-06, | |
| "step_time_sec": 115.58 | |
| }, | |
| { | |
| "step": 2035, | |
| "epoch": 0.576120036803737, | |
| "wallclock": "2026-05-23T06:15:28.201077", | |
| "loss": 0.1186, | |
| "grad_norm": 1.0687501430511475, | |
| "learning_rate": 8.352802081753304e-06, | |
| "step_time_sec": 114.66 | |
| }, | |
| { | |
| "step": 2040, | |
| "epoch": 0.5775355651496922, | |
| "wallclock": "2026-05-23T06:17:21.826972", | |
| "loss": 0.0957, | |
| "grad_norm": 0.7276541590690613, | |
| "learning_rate": 8.344289900563955e-06, | |
| "step_time_sec": 113.63 | |
| }, | |
| { | |
| "step": 2045, | |
| "epoch": 0.5789510934956472, | |
| "wallclock": "2026-05-23T06:19:15.755614", | |
| "loss": 0.1418, | |
| "grad_norm": 1.2831865549087524, | |
| "learning_rate": 8.335760143830753e-06, | |
| "step_time_sec": 113.93 | |
| }, | |
| { | |
| "step": 2050, | |
| "epoch": 0.5803666218416024, | |
| "wallclock": "2026-05-23T06:21:10.146824", | |
| "loss": 0.0902, | |
| "grad_norm": 0.8044394850730896, | |
| "learning_rate": 8.327212856380886e-06, | |
| "step_time_sec": 114.39, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2055, | |
| "epoch": 0.5817821501875575, | |
| "wallclock": "2026-05-23T06:23:03.760022", | |
| "loss": 0.1254, | |
| "grad_norm": 0.8785029053688049, | |
| "learning_rate": 8.318648083133675e-06, | |
| "step_time_sec": 113.61 | |
| }, | |
| { | |
| "step": 2060, | |
| "epoch": 0.5831976785335127, | |
| "wallclock": "2026-05-23T06:24:58.159811", | |
| "loss": 0.1295, | |
| "grad_norm": 0.8821666240692139, | |
| "learning_rate": 8.310065869100332e-06, | |
| "step_time_sec": 114.4 | |
| }, | |
| { | |
| "step": 2065, | |
| "epoch": 0.5846132068794677, | |
| "wallclock": "2026-05-23T06:26:51.514557", | |
| "loss": 0.1296, | |
| "grad_norm": 1.0319464206695557, | |
| "learning_rate": 8.301466259383729e-06, | |
| "step_time_sec": 113.35 | |
| }, | |
| { | |
| "step": 2070, | |
| "epoch": 0.5860287352254229, | |
| "wallclock": "2026-05-23T06:28:45.591485", | |
| "loss": 0.1134, | |
| "grad_norm": 0.7893862724304199, | |
| "learning_rate": 8.292849299178158e-06, | |
| "step_time_sec": 114.08 | |
| }, | |
| { | |
| "step": 2075, | |
| "epoch": 0.587444263571378, | |
| "wallclock": "2026-05-23T06:30:40.034770", | |
| "loss": 0.1123, | |
| "grad_norm": 0.8960036635398865, | |
| "learning_rate": 8.284215033769098e-06, | |
| "step_time_sec": 114.44 | |
| }, | |
| { | |
| "step": 2080, | |
| "epoch": 0.5888597919173332, | |
| "wallclock": "2026-05-23T06:32:33.013250", | |
| "loss": 0.1019, | |
| "grad_norm": 0.7732668519020081, | |
| "learning_rate": 8.275563508532972e-06, | |
| "step_time_sec": 112.98 | |
| }, | |
| { | |
| "step": 2085, | |
| "epoch": 0.5902753202632883, | |
| "wallclock": "2026-05-23T06:34:26.451713", | |
| "loss": 0.1159, | |
| "grad_norm": 1.014701008796692, | |
| "learning_rate": 8.266894768936907e-06, | |
| "step_time_sec": 113.44 | |
| }, | |
| { | |
| "step": 2090, | |
| "epoch": 0.5916908486092434, | |
| "wallclock": "2026-05-23T06:36:20.092613", | |
| "loss": 0.117, | |
| "grad_norm": 1.0048466920852661, | |
| "learning_rate": 8.258208860538498e-06, | |
| "step_time_sec": 113.64 | |
| }, | |
| { | |
| "step": 2095, | |
| "epoch": 0.5931063769551985, | |
| "wallclock": "2026-05-23T06:38:13.619925", | |
| "loss": 0.1295, | |
| "grad_norm": 1.0775166749954224, | |
| "learning_rate": 8.249505828985575e-06, | |
| "step_time_sec": 113.53 | |
| }, | |
| { | |
| "step": 2100, | |
| "epoch": 0.5945219053011537, | |
| "wallclock": "2026-05-23T06:40:07.681597", | |
| "loss": 0.1198, | |
| "grad_norm": 1.339026689529419, | |
| "learning_rate": 8.240785720015954e-06, | |
| "step_time_sec": 114.06, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2100, | |
| "epoch": 0.5945219053011537, | |
| "wallclock": "2026-05-23T06:40:59.897976", | |
| "eval_loss": 0.1282491832971573, | |
| "eval_runtime": 52.1233, | |
| "eval_samples_per_second": 4.796, | |
| "eval_steps_per_second": 1.209, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2105, | |
| "epoch": 0.5959374336471088, | |
| "wallclock": "2026-05-23T06:44:35.507980", | |
| "loss": 0.0943, | |
| "grad_norm": 0.7660958766937256, | |
| "learning_rate": 8.232048579457194e-06, | |
| "step_time_sec": 267.83 | |
| }, | |
| { | |
| "step": 2110, | |
| "epoch": 0.5973529619930639, | |
| "wallclock": "2026-05-23T06:46:29.777766", | |
| "loss": 0.11, | |
| "grad_norm": 0.9617125391960144, | |
| "learning_rate": 8.22329445322637e-06, | |
| "step_time_sec": 114.27 | |
| }, | |
| { | |
| "step": 2115, | |
| "epoch": 0.598768490339019, | |
| "wallclock": "2026-05-23T06:48:22.536086", | |
| "loss": 0.1132, | |
| "grad_norm": 1.1251046657562256, | |
| "learning_rate": 8.214523387329815e-06, | |
| "step_time_sec": 112.76 | |
| }, | |
| { | |
| "step": 2120, | |
| "epoch": 0.6001840186849742, | |
| "wallclock": "2026-05-23T06:50:15.285691", | |
| "loss": 0.1012, | |
| "grad_norm": 0.8359034657478333, | |
| "learning_rate": 8.205735427862897e-06, | |
| "step_time_sec": 112.75 | |
| }, | |
| { | |
| "step": 2125, | |
| "epoch": 0.6015995470309293, | |
| "wallclock": "2026-05-23T06:52:10.239923", | |
| "loss": 0.0948, | |
| "grad_norm": 0.8290632963180542, | |
| "learning_rate": 8.196930621009756e-06, | |
| "step_time_sec": 114.95 | |
| }, | |
| { | |
| "step": 2130, | |
| "epoch": 0.6030150753768844, | |
| "wallclock": "2026-05-23T06:54:03.899054", | |
| "loss": 0.1103, | |
| "grad_norm": 0.707132875919342, | |
| "learning_rate": 8.188109013043076e-06, | |
| "step_time_sec": 113.66 | |
| }, | |
| { | |
| "step": 2135, | |
| "epoch": 0.6044306037228395, | |
| "wallclock": "2026-05-23T06:55:56.905229", | |
| "loss": 0.111, | |
| "grad_norm": 0.940647542476654, | |
| "learning_rate": 8.179270650323839e-06, | |
| "step_time_sec": 113.01 | |
| }, | |
| { | |
| "step": 2140, | |
| "epoch": 0.6058461320687947, | |
| "wallclock": "2026-05-23T06:57:51.331282", | |
| "loss": 0.1101, | |
| "grad_norm": 0.7413908243179321, | |
| "learning_rate": 8.170415579301076e-06, | |
| "step_time_sec": 114.43 | |
| }, | |
| { | |
| "step": 2145, | |
| "epoch": 0.6072616604147498, | |
| "wallclock": "2026-05-23T06:59:44.905917", | |
| "loss": 0.1021, | |
| "grad_norm": 1.1988078355789185, | |
| "learning_rate": 8.161543846511628e-06, | |
| "step_time_sec": 113.57 | |
| }, | |
| { | |
| "step": 2150, | |
| "epoch": 0.608677188760705, | |
| "wallclock": "2026-05-23T07:01:39.153468", | |
| "loss": 0.1143, | |
| "grad_norm": 1.0968750715255737, | |
| "learning_rate": 8.152655498579903e-06, | |
| "step_time_sec": 114.25, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2155, | |
| "epoch": 0.61009271710666, | |
| "wallclock": "2026-05-23T07:03:33.733115", | |
| "loss": 0.1268, | |
| "grad_norm": 0.8552664518356323, | |
| "learning_rate": 8.143750582217625e-06, | |
| "step_time_sec": 114.58 | |
| }, | |
| { | |
| "step": 2160, | |
| "epoch": 0.6115082454526152, | |
| "wallclock": "2026-05-23T07:05:27.710732", | |
| "loss": 0.1103, | |
| "grad_norm": 0.7791701555252075, | |
| "learning_rate": 8.13482914422359e-06, | |
| "step_time_sec": 113.98 | |
| }, | |
| { | |
| "step": 2165, | |
| "epoch": 0.6129237737985703, | |
| "wallclock": "2026-05-23T07:07:22.028971", | |
| "loss": 0.1155, | |
| "grad_norm": 0.7360658645629883, | |
| "learning_rate": 8.125891231483425e-06, | |
| "step_time_sec": 114.32 | |
| }, | |
| { | |
| "step": 2170, | |
| "epoch": 0.6143393021445255, | |
| "wallclock": "2026-05-23T07:09:16.562706", | |
| "loss": 0.1132, | |
| "grad_norm": 1.0679337978363037, | |
| "learning_rate": 8.11693689096934e-06, | |
| "step_time_sec": 114.53 | |
| }, | |
| { | |
| "step": 2175, | |
| "epoch": 0.6157548304904805, | |
| "wallclock": "2026-05-23T07:11:10.858404", | |
| "loss": 0.129, | |
| "grad_norm": 0.9493758082389832, | |
| "learning_rate": 8.107966169739871e-06, | |
| "step_time_sec": 114.3 | |
| }, | |
| { | |
| "step": 2180, | |
| "epoch": 0.6171703588364357, | |
| "wallclock": "2026-05-23T07:13:03.638564", | |
| "loss": 0.1302, | |
| "grad_norm": 0.9018224477767944, | |
| "learning_rate": 8.09897911493965e-06, | |
| "step_time_sec": 112.78 | |
| }, | |
| { | |
| "step": 2185, | |
| "epoch": 0.6185858871823908, | |
| "wallclock": "2026-05-23T07:14:57.306827", | |
| "loss": 0.1218, | |
| "grad_norm": 0.8794463276863098, | |
| "learning_rate": 8.089975773799143e-06, | |
| "step_time_sec": 113.67 | |
| }, | |
| { | |
| "step": 2190, | |
| "epoch": 0.620001415528346, | |
| "wallclock": "2026-05-23T07:16:51.323807", | |
| "loss": 0.11, | |
| "grad_norm": 0.8043993711471558, | |
| "learning_rate": 8.080956193634409e-06, | |
| "step_time_sec": 114.02 | |
| }, | |
| { | |
| "step": 2195, | |
| "epoch": 0.6214169438743011, | |
| "wallclock": "2026-05-23T07:18:45.611509", | |
| "loss": 0.0976, | |
| "grad_norm": 1.1800931692123413, | |
| "learning_rate": 8.07192042184685e-06, | |
| "step_time_sec": 114.29 | |
| }, | |
| { | |
| "step": 2200, | |
| "epoch": 0.6228324722202562, | |
| "wallclock": "2026-05-23T07:20:38.621541", | |
| "loss": 0.1349, | |
| "grad_norm": 1.5049303770065308, | |
| "learning_rate": 8.062868505922958e-06, | |
| "step_time_sec": 113.01, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2200, | |
| "epoch": 0.6228324722202562, | |
| "wallclock": "2026-05-23T07:21:30.438443", | |
| "eval_loss": 0.12787169218063354, | |
| "eval_runtime": 51.72, | |
| "eval_samples_per_second": 4.834, | |
| "eval_steps_per_second": 1.218, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2205, | |
| "epoch": 0.6242480005662113, | |
| "wallclock": "2026-05-23T07:25:03.848081", | |
| "loss": 0.1167, | |
| "grad_norm": 0.8288029432296753, | |
| "learning_rate": 8.053800493434072e-06, | |
| "step_time_sec": 265.23 | |
| }, | |
| { | |
| "step": 2210, | |
| "epoch": 0.6256635289121665, | |
| "wallclock": "2026-05-23T07:26:56.740627", | |
| "loss": 0.1079, | |
| "grad_norm": 0.8604945540428162, | |
| "learning_rate": 8.044716432036126e-06, | |
| "step_time_sec": 112.89 | |
| }, | |
| { | |
| "step": 2215, | |
| "epoch": 0.6270790572581216, | |
| "wallclock": "2026-05-23T07:28:54.914996", | |
| "loss": 0.1164, | |
| "grad_norm": 0.997947633266449, | |
| "learning_rate": 8.035616369469392e-06, | |
| "step_time_sec": 118.17 | |
| }, | |
| { | |
| "step": 2220, | |
| "epoch": 0.6284945856040767, | |
| "wallclock": "2026-05-23T07:30:49.126331", | |
| "loss": 0.102, | |
| "grad_norm": 0.8771962523460388, | |
| "learning_rate": 8.02650035355824e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 2225, | |
| "epoch": 0.6299101139500318, | |
| "wallclock": "2026-05-23T07:32:43.224804", | |
| "loss": 0.1177, | |
| "grad_norm": 0.909534752368927, | |
| "learning_rate": 8.017368432210875e-06, | |
| "step_time_sec": 114.1 | |
| }, | |
| { | |
| "step": 2230, | |
| "epoch": 0.631325642295987, | |
| "wallclock": "2026-05-23T07:34:37.246776", | |
| "loss": 0.1316, | |
| "grad_norm": 1.185617446899414, | |
| "learning_rate": 8.008220653419097e-06, | |
| "step_time_sec": 114.02 | |
| }, | |
| { | |
| "step": 2235, | |
| "epoch": 0.6327411706419421, | |
| "wallclock": "2026-05-23T07:36:31.707708", | |
| "loss": 0.0931, | |
| "grad_norm": 0.9247961044311523, | |
| "learning_rate": 7.99905706525804e-06, | |
| "step_time_sec": 114.46 | |
| }, | |
| { | |
| "step": 2240, | |
| "epoch": 0.6341566989878973, | |
| "wallclock": "2026-05-23T07:38:24.836647", | |
| "loss": 0.0937, | |
| "grad_norm": 0.9448702931404114, | |
| "learning_rate": 7.989877715885925e-06, | |
| "step_time_sec": 113.13 | |
| }, | |
| { | |
| "step": 2245, | |
| "epoch": 0.6355722273338523, | |
| "wallclock": "2026-05-23T07:40:18.101149", | |
| "loss": 0.1124, | |
| "grad_norm": 0.9247167110443115, | |
| "learning_rate": 7.980682653543799e-06, | |
| "step_time_sec": 113.26 | |
| }, | |
| { | |
| "step": 2250, | |
| "epoch": 0.6369877556798075, | |
| "wallclock": "2026-05-23T07:42:13.210519", | |
| "loss": 0.1081, | |
| "grad_norm": 1.228428602218628, | |
| "learning_rate": 7.97147192655529e-06, | |
| "step_time_sec": 115.11, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2255, | |
| "epoch": 0.6384032840257626, | |
| "wallclock": "2026-05-23T07:44:07.337158", | |
| "loss": 0.1082, | |
| "grad_norm": 0.8680895566940308, | |
| "learning_rate": 7.962245583326354e-06, | |
| "step_time_sec": 114.13 | |
| }, | |
| { | |
| "step": 2260, | |
| "epoch": 0.6398188123717178, | |
| "wallclock": "2026-05-23T07:46:01.112153", | |
| "loss": 0.1073, | |
| "grad_norm": 0.7317308783531189, | |
| "learning_rate": 7.953003672345009e-06, | |
| "step_time_sec": 113.77 | |
| }, | |
| { | |
| "step": 2265, | |
| "epoch": 0.6412343407176728, | |
| "wallclock": "2026-05-23T07:47:55.256008", | |
| "loss": 0.1213, | |
| "grad_norm": 0.9891361594200134, | |
| "learning_rate": 7.943746242181091e-06, | |
| "step_time_sec": 114.14 | |
| }, | |
| { | |
| "step": 2270, | |
| "epoch": 0.642649869063628, | |
| "wallclock": "2026-05-23T07:49:48.031699", | |
| "loss": 0.1084, | |
| "grad_norm": 0.8852012753486633, | |
| "learning_rate": 7.934473341485998e-06, | |
| "step_time_sec": 112.78 | |
| }, | |
| { | |
| "step": 2275, | |
| "epoch": 0.6440653974095831, | |
| "wallclock": "2026-05-23T07:51:41.760762", | |
| "loss": 0.1015, | |
| "grad_norm": 0.6731085181236267, | |
| "learning_rate": 7.925185018992426e-06, | |
| "step_time_sec": 113.73 | |
| }, | |
| { | |
| "step": 2280, | |
| "epoch": 0.6454809257555383, | |
| "wallclock": "2026-05-23T07:53:37.755943", | |
| "loss": 0.0927, | |
| "grad_norm": 0.8080906271934509, | |
| "learning_rate": 7.91588132351412e-06, | |
| "step_time_sec": 116.0 | |
| }, | |
| { | |
| "step": 2285, | |
| "epoch": 0.6468964541014934, | |
| "wallclock": "2026-05-23T07:55:31.246122", | |
| "loss": 0.117, | |
| "grad_norm": 0.9637818336486816, | |
| "learning_rate": 7.906562303945622e-06, | |
| "step_time_sec": 113.49 | |
| }, | |
| { | |
| "step": 2290, | |
| "epoch": 0.6483119824474485, | |
| "wallclock": "2026-05-23T07:57:25.355025", | |
| "loss": 0.1148, | |
| "grad_norm": 0.8999826908111572, | |
| "learning_rate": 7.897228009262003e-06, | |
| "step_time_sec": 114.11 | |
| }, | |
| { | |
| "step": 2295, | |
| "epoch": 0.6497275107934036, | |
| "wallclock": "2026-05-23T07:59:20.568291", | |
| "loss": 0.1202, | |
| "grad_norm": 0.655300498008728, | |
| "learning_rate": 7.887878488518608e-06, | |
| "step_time_sec": 115.21 | |
| }, | |
| { | |
| "step": 2300, | |
| "epoch": 0.6511430391393588, | |
| "wallclock": "2026-05-23T08:01:15.440455", | |
| "loss": 0.1164, | |
| "grad_norm": 1.327991247177124, | |
| "learning_rate": 7.878513790850805e-06, | |
| "step_time_sec": 114.87, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2300, | |
| "epoch": 0.6511430391393588, | |
| "wallclock": "2026-05-23T08:02:07.666082", | |
| "eval_loss": 0.12934190034866333, | |
| "eval_runtime": 52.1298, | |
| "eval_samples_per_second": 4.796, | |
| "eval_steps_per_second": 1.209, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2305, | |
| "epoch": 0.6525585674853139, | |
| "wallclock": "2026-05-23T08:05:41.525707", | |
| "loss": 0.0931, | |
| "grad_norm": 1.3085259199142456, | |
| "learning_rate": 7.869133965473723e-06, | |
| "step_time_sec": 266.09 | |
| }, | |
| { | |
| "step": 2310, | |
| "epoch": 0.653974095831269, | |
| "wallclock": "2026-05-23T08:07:39.677572", | |
| "loss": 0.1252, | |
| "grad_norm": 0.9861677289009094, | |
| "learning_rate": 7.859739061681992e-06, | |
| "step_time_sec": 118.15 | |
| }, | |
| { | |
| "step": 2315, | |
| "epoch": 0.6553896241772241, | |
| "wallclock": "2026-05-23T08:09:33.975162", | |
| "loss": 0.1131, | |
| "grad_norm": 0.685297966003418, | |
| "learning_rate": 7.850329128849482e-06, | |
| "step_time_sec": 114.3 | |
| }, | |
| { | |
| "step": 2320, | |
| "epoch": 0.6568051525231793, | |
| "wallclock": "2026-05-23T08:11:28.536426", | |
| "loss": 0.1087, | |
| "grad_norm": 0.8919675946235657, | |
| "learning_rate": 7.840904216429053e-06, | |
| "step_time_sec": 114.56 | |
| }, | |
| { | |
| "step": 2325, | |
| "epoch": 0.6582206808691344, | |
| "wallclock": "2026-05-23T08:13:23.250581", | |
| "loss": 0.1037, | |
| "grad_norm": 0.9594758152961731, | |
| "learning_rate": 7.83146437395228e-06, | |
| "step_time_sec": 114.71 | |
| }, | |
| { | |
| "step": 2330, | |
| "epoch": 0.6596362092150896, | |
| "wallclock": "2026-05-23T08:15:17.659280", | |
| "loss": 0.1021, | |
| "grad_norm": 0.79726243019104, | |
| "learning_rate": 7.82200965102921e-06, | |
| "step_time_sec": 114.41 | |
| }, | |
| { | |
| "step": 2335, | |
| "epoch": 0.6610517375610446, | |
| "wallclock": "2026-05-23T08:17:10.906487", | |
| "loss": 0.1267, | |
| "grad_norm": 1.4677671194076538, | |
| "learning_rate": 7.812540097348085e-06, | |
| "step_time_sec": 113.25 | |
| }, | |
| { | |
| "step": 2340, | |
| "epoch": 0.6624672659069998, | |
| "wallclock": "2026-05-23T08:19:05.623865", | |
| "loss": 0.1022, | |
| "grad_norm": 0.8115029335021973, | |
| "learning_rate": 7.803055762675096e-06, | |
| "step_time_sec": 114.72 | |
| }, | |
| { | |
| "step": 2345, | |
| "epoch": 0.6638827942529549, | |
| "wallclock": "2026-05-23T08:21:00.057684", | |
| "loss": 0.097, | |
| "grad_norm": 0.7353535890579224, | |
| "learning_rate": 7.793556696854105e-06, | |
| "step_time_sec": 114.43 | |
| }, | |
| { | |
| "step": 2350, | |
| "epoch": 0.6652983225989101, | |
| "wallclock": "2026-05-23T08:22:52.623668", | |
| "loss": 0.1056, | |
| "grad_norm": 0.9155029058456421, | |
| "learning_rate": 7.784042949806401e-06, | |
| "step_time_sec": 112.57, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2355, | |
| "epoch": 0.6667138509448651, | |
| "wallclock": "2026-05-23T08:24:46.327258", | |
| "loss": 0.119, | |
| "grad_norm": 1.1471012830734253, | |
| "learning_rate": 7.77451457153042e-06, | |
| "step_time_sec": 113.7 | |
| }, | |
| { | |
| "step": 2360, | |
| "epoch": 0.6681293792908203, | |
| "wallclock": "2026-05-23T08:26:40.729225", | |
| "loss": 0.1122, | |
| "grad_norm": 1.1479600667953491, | |
| "learning_rate": 7.764971612101497e-06, | |
| "step_time_sec": 114.4 | |
| }, | |
| { | |
| "step": 2365, | |
| "epoch": 0.6695449076367754, | |
| "wallclock": "2026-05-23T08:28:34.893479", | |
| "loss": 0.1187, | |
| "grad_norm": 0.990744411945343, | |
| "learning_rate": 7.755414121671596e-06, | |
| "step_time_sec": 114.16 | |
| }, | |
| { | |
| "step": 2370, | |
| "epoch": 0.6709604359827306, | |
| "wallclock": "2026-05-23T08:30:29.655994", | |
| "loss": 0.1045, | |
| "grad_norm": 0.8785448670387268, | |
| "learning_rate": 7.745842150469043e-06, | |
| "step_time_sec": 114.76 | |
| }, | |
| { | |
| "step": 2375, | |
| "epoch": 0.6723759643286856, | |
| "wallclock": "2026-05-23T08:32:24.847718", | |
| "loss": 0.1015, | |
| "grad_norm": 1.0024092197418213, | |
| "learning_rate": 7.736255748798272e-06, | |
| "step_time_sec": 115.19 | |
| }, | |
| { | |
| "step": 2380, | |
| "epoch": 0.6737914926746408, | |
| "wallclock": "2026-05-23T08:34:19.407078", | |
| "loss": 0.1087, | |
| "grad_norm": 1.0146054029464722, | |
| "learning_rate": 7.726654967039546e-06, | |
| "step_time_sec": 114.56 | |
| }, | |
| { | |
| "step": 2385, | |
| "epoch": 0.6752070210205959, | |
| "wallclock": "2026-05-23T08:36:13.103873", | |
| "loss": 0.1194, | |
| "grad_norm": 1.0869743824005127, | |
| "learning_rate": 7.717039855648711e-06, | |
| "step_time_sec": 113.7 | |
| }, | |
| { | |
| "step": 2390, | |
| "epoch": 0.6766225493665511, | |
| "wallclock": "2026-05-23T08:38:07.793063", | |
| "loss": 0.1053, | |
| "grad_norm": 0.6551274061203003, | |
| "learning_rate": 7.707410465156916e-06, | |
| "step_time_sec": 114.69 | |
| }, | |
| { | |
| "step": 2395, | |
| "epoch": 0.6780380777125062, | |
| "wallclock": "2026-05-23T08:40:01.316930", | |
| "loss": 0.0985, | |
| "grad_norm": 0.9398195147514343, | |
| "learning_rate": 7.69776684617035e-06, | |
| "step_time_sec": 113.52 | |
| }, | |
| { | |
| "step": 2400, | |
| "epoch": 0.6794536060584613, | |
| "wallclock": "2026-05-23T08:41:54.704114", | |
| "loss": 0.1208, | |
| "grad_norm": 1.1209269762039185, | |
| "learning_rate": 7.688109049369984e-06, | |
| "step_time_sec": 113.39, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2400, | |
| "epoch": 0.6794536060584613, | |
| "wallclock": "2026-05-23T08:42:47.203641", | |
| "eval_loss": 0.11854572594165802, | |
| "eval_runtime": 52.4158, | |
| "eval_samples_per_second": 4.77, | |
| "eval_steps_per_second": 1.202, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2405, | |
| "epoch": 0.6808691344044164, | |
| "wallclock": "2026-05-23T08:46:24.285045", | |
| "loss": 0.1037, | |
| "grad_norm": 0.8943494558334351, | |
| "learning_rate": 7.678437125511293e-06, | |
| "step_time_sec": 269.58 | |
| }, | |
| { | |
| "step": 2410, | |
| "epoch": 0.6822846627503716, | |
| "wallclock": "2026-05-23T08:48:17.119278", | |
| "loss": 0.1201, | |
| "grad_norm": 1.3184447288513184, | |
| "learning_rate": 7.668751125423997e-06, | |
| "step_time_sec": 112.83 | |
| }, | |
| { | |
| "step": 2415, | |
| "epoch": 0.6837001910963267, | |
| "wallclock": "2026-05-23T08:50:10.316231", | |
| "loss": 0.127, | |
| "grad_norm": 1.2354567050933838, | |
| "learning_rate": 7.659051100011796e-06, | |
| "step_time_sec": 113.2 | |
| }, | |
| { | |
| "step": 2420, | |
| "epoch": 0.6851157194422818, | |
| "wallclock": "2026-05-23T08:52:04.524428", | |
| "loss": 0.0854, | |
| "grad_norm": 0.7846460342407227, | |
| "learning_rate": 7.649337100252091e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 2425, | |
| "epoch": 0.6865312477882369, | |
| "wallclock": "2026-05-23T08:53:59.953373", | |
| "loss": 0.1035, | |
| "grad_norm": 0.6973745226860046, | |
| "learning_rate": 7.639609177195732e-06, | |
| "step_time_sec": 115.43 | |
| }, | |
| { | |
| "step": 2430, | |
| "epoch": 0.6879467761341921, | |
| "wallclock": "2026-05-23T08:55:54.650826", | |
| "loss": 0.1035, | |
| "grad_norm": 0.8783355951309204, | |
| "learning_rate": 7.629867381966739e-06, | |
| "step_time_sec": 114.7 | |
| }, | |
| { | |
| "step": 2435, | |
| "epoch": 0.6893623044801472, | |
| "wallclock": "2026-05-23T08:57:49.808654", | |
| "loss": 0.1103, | |
| "grad_norm": 0.8976749777793884, | |
| "learning_rate": 7.6201117657620284e-06, | |
| "step_time_sec": 115.16 | |
| }, | |
| { | |
| "step": 2440, | |
| "epoch": 0.6907778328261024, | |
| "wallclock": "2026-05-23T08:59:43.041184", | |
| "loss": 0.1041, | |
| "grad_norm": 1.3639253377914429, | |
| "learning_rate": 7.610342379851159e-06, | |
| "step_time_sec": 113.23 | |
| }, | |
| { | |
| "step": 2445, | |
| "epoch": 0.6921933611720574, | |
| "wallclock": "2026-05-23T09:01:36.414580", | |
| "loss": 0.1172, | |
| "grad_norm": 1.34951651096344, | |
| "learning_rate": 7.600559275576054e-06, | |
| "step_time_sec": 113.37 | |
| }, | |
| { | |
| "step": 2450, | |
| "epoch": 0.6936088895180126, | |
| "wallclock": "2026-05-23T09:03:31.256289", | |
| "loss": 0.1272, | |
| "grad_norm": 1.2545363903045654, | |
| "learning_rate": 7.590762504350729e-06, | |
| "step_time_sec": 114.84, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 69.86 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2455, | |
| "epoch": 0.6950244178639677, | |
| "wallclock": "2026-05-23T09:05:26.347778", | |
| "loss": 0.1098, | |
| "grad_norm": 0.902570903301239, | |
| "learning_rate": 7.580952117661028e-06, | |
| "step_time_sec": 115.09 | |
| }, | |
| { | |
| "step": 2460, | |
| "epoch": 0.6964399462099229, | |
| "wallclock": "2026-05-23T09:07:22.291465", | |
| "loss": 0.1261, | |
| "grad_norm": 1.299424171447754, | |
| "learning_rate": 7.571128167064347e-06, | |
| "step_time_sec": 115.94 | |
| }, | |
| { | |
| "step": 2465, | |
| "epoch": 0.6978554745558779, | |
| "wallclock": "2026-05-23T09:09:16.390275", | |
| "loss": 0.1101, | |
| "grad_norm": 0.9918133020401001, | |
| "learning_rate": 7.5612907041893645e-06, | |
| "step_time_sec": 114.1 | |
| }, | |
| { | |
| "step": 2470, | |
| "epoch": 0.6992710029018331, | |
| "wallclock": "2026-05-23T09:11:10.300186", | |
| "loss": 0.0887, | |
| "grad_norm": 0.9212543964385986, | |
| "learning_rate": 7.551439780735775e-06, | |
| "step_time_sec": 113.91 | |
| }, | |
| { | |
| "step": 2475, | |
| "epoch": 0.7006865312477882, | |
| "wallclock": "2026-05-23T09:13:02.291441", | |
| "loss": 0.1198, | |
| "grad_norm": 1.1632072925567627, | |
| "learning_rate": 7.541575448474012e-06, | |
| "step_time_sec": 111.99 | |
| }, | |
| { | |
| "step": 2480, | |
| "epoch": 0.7021020595937434, | |
| "wallclock": "2026-05-23T09:14:55.310823", | |
| "loss": 0.0919, | |
| "grad_norm": 0.9132311940193176, | |
| "learning_rate": 7.531697759244978e-06, | |
| "step_time_sec": 113.02 | |
| }, | |
| { | |
| "step": 2485, | |
| "epoch": 0.7035175879396985, | |
| "wallclock": "2026-05-23T09:16:49.595016", | |
| "loss": 0.1046, | |
| "grad_norm": 0.9931870698928833, | |
| "learning_rate": 7.521806764959769e-06, | |
| "step_time_sec": 114.28 | |
| }, | |
| { | |
| "step": 2490, | |
| "epoch": 0.7049331162856536, | |
| "wallclock": "2026-05-23T09:18:43.462544", | |
| "loss": 0.0934, | |
| "grad_norm": 0.810712993144989, | |
| "learning_rate": 7.511902517599407e-06, | |
| "step_time_sec": 113.87 | |
| }, | |
| { | |
| "step": 2495, | |
| "epoch": 0.7063486446316087, | |
| "wallclock": "2026-05-23T09:20:37.403219", | |
| "loss": 0.1027, | |
| "grad_norm": 1.004841685295105, | |
| "learning_rate": 7.501985069214561e-06, | |
| "step_time_sec": 113.94 | |
| }, | |
| { | |
| "step": 2500, | |
| "epoch": 0.7077641729775639, | |
| "wallclock": "2026-05-23T09:22:33.235203", | |
| "loss": 0.0982, | |
| "grad_norm": 0.7684575319290161, | |
| "learning_rate": 7.492054471925282e-06, | |
| "step_time_sec": 115.83, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2500, | |
| "epoch": 0.7077641729775639, | |
| "wallclock": "2026-05-23T09:23:26.146278", | |
| "eval_loss": 0.11603201180696487, | |
| "eval_runtime": 52.8156, | |
| "eval_samples_per_second": 4.733, | |
| "eval_steps_per_second": 1.193, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2505, | |
| "epoch": 0.709179701323519, | |
| "wallclock": "2026-05-23T09:27:05.152438", | |
| "loss": 0.1083, | |
| "grad_norm": 0.8736166954040527, | |
| "learning_rate": 7.482110777920719e-06, | |
| "step_time_sec": 271.92 | |
| }, | |
| { | |
| "step": 2510, | |
| "epoch": 0.7105952296694741, | |
| "wallclock": "2026-05-23T09:28:58.645822", | |
| "loss": 0.1197, | |
| "grad_norm": 1.1975699663162231, | |
| "learning_rate": 7.472154039458851e-06, | |
| "step_time_sec": 113.49 | |
| }, | |
| { | |
| "step": 2515, | |
| "epoch": 0.7120107580154292, | |
| "wallclock": "2026-05-23T09:30:54.443603", | |
| "loss": 0.1261, | |
| "grad_norm": 1.4840281009674072, | |
| "learning_rate": 7.462184308866209e-06, | |
| "step_time_sec": 115.8 | |
| }, | |
| { | |
| "step": 2520, | |
| "epoch": 0.7134262863613844, | |
| "wallclock": "2026-05-23T09:32:48.921941", | |
| "loss": 0.1001, | |
| "grad_norm": 0.9024205803871155, | |
| "learning_rate": 7.452201638537605e-06, | |
| "step_time_sec": 114.48 | |
| }, | |
| { | |
| "step": 2525, | |
| "epoch": 0.7148418147073395, | |
| "wallclock": "2026-05-23T09:34:43.594377", | |
| "loss": 0.0883, | |
| "grad_norm": 2.425753355026245, | |
| "learning_rate": 7.442206080935852e-06, | |
| "step_time_sec": 114.67 | |
| }, | |
| { | |
| "step": 2530, | |
| "epoch": 0.7162573430532947, | |
| "wallclock": "2026-05-23T09:36:38.043629", | |
| "loss": 0.1033, | |
| "grad_norm": 0.9202796816825867, | |
| "learning_rate": 7.432197688591494e-06, | |
| "step_time_sec": 114.45 | |
| }, | |
| { | |
| "step": 2535, | |
| "epoch": 0.7176728713992497, | |
| "wallclock": "2026-05-23T09:38:33.443082", | |
| "loss": 0.1229, | |
| "grad_norm": 0.8916212320327759, | |
| "learning_rate": 7.422176514102524e-06, | |
| "step_time_sec": 115.4 | |
| }, | |
| { | |
| "step": 2540, | |
| "epoch": 0.7190883997452049, | |
| "wallclock": "2026-05-23T09:40:26.131948", | |
| "loss": 0.0948, | |
| "grad_norm": 0.7314426898956299, | |
| "learning_rate": 7.41214261013411e-06, | |
| "step_time_sec": 112.69 | |
| }, | |
| { | |
| "step": 2545, | |
| "epoch": 0.72050392809116, | |
| "wallclock": "2026-05-23T09:42:19.625497", | |
| "loss": 0.1031, | |
| "grad_norm": 1.2673311233520508, | |
| "learning_rate": 7.402096029418317e-06, | |
| "step_time_sec": 113.49 | |
| }, | |
| { | |
| "step": 2550, | |
| "epoch": 0.7219194564371152, | |
| "wallclock": "2026-05-23T09:44:13.852248", | |
| "loss": 0.1199, | |
| "grad_norm": 0.9767388701438904, | |
| "learning_rate": 7.3920368247538384e-06, | |
| "step_time_sec": 114.23, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2555, | |
| "epoch": 0.7233349847830702, | |
| "wallclock": "2026-05-23T09:46:07.447283", | |
| "loss": 0.1087, | |
| "grad_norm": 1.0202505588531494, | |
| "learning_rate": 7.381965049005703e-06, | |
| "step_time_sec": 113.6 | |
| }, | |
| { | |
| "step": 2560, | |
| "epoch": 0.7247505131290254, | |
| "wallclock": "2026-05-23T09:48:01.720028", | |
| "loss": 0.0971, | |
| "grad_norm": 1.1415823698043823, | |
| "learning_rate": 7.371880755105008e-06, | |
| "step_time_sec": 114.27 | |
| }, | |
| { | |
| "step": 2565, | |
| "epoch": 0.7261660414749805, | |
| "wallclock": "2026-05-23T09:49:56.656471", | |
| "loss": 0.1001, | |
| "grad_norm": 1.0273898839950562, | |
| "learning_rate": 7.361783996048641e-06, | |
| "step_time_sec": 114.94 | |
| }, | |
| { | |
| "step": 2570, | |
| "epoch": 0.7275815698209357, | |
| "wallclock": "2026-05-23T09:51:49.820193", | |
| "loss": 0.1057, | |
| "grad_norm": 1.1736416816711426, | |
| "learning_rate": 7.3516748248989955e-06, | |
| "step_time_sec": 113.16 | |
| }, | |
| { | |
| "step": 2575, | |
| "epoch": 0.7289970981668908, | |
| "wallclock": "2026-05-23T09:53:43.050372", | |
| "loss": 0.1056, | |
| "grad_norm": 0.8515759706497192, | |
| "learning_rate": 7.341553294783699e-06, | |
| "step_time_sec": 113.23 | |
| }, | |
| { | |
| "step": 2580, | |
| "epoch": 0.7304126265128459, | |
| "wallclock": "2026-05-23T09:55:37.522835", | |
| "loss": 0.1058, | |
| "grad_norm": 0.8394744992256165, | |
| "learning_rate": 7.3314194588953256e-06, | |
| "step_time_sec": 114.47 | |
| }, | |
| { | |
| "step": 2585, | |
| "epoch": 0.731828154858801, | |
| "wallclock": "2026-05-23T09:57:31.953180", | |
| "loss": 0.1082, | |
| "grad_norm": 0.7621601819992065, | |
| "learning_rate": 7.3212733704911235e-06, | |
| "step_time_sec": 114.43 | |
| }, | |
| { | |
| "step": 2590, | |
| "epoch": 0.7332436832047562, | |
| "wallclock": "2026-05-23T09:59:25.144746", | |
| "loss": 0.1147, | |
| "grad_norm": 1.1607191562652588, | |
| "learning_rate": 7.311115082892733e-06, | |
| "step_time_sec": 113.19 | |
| }, | |
| { | |
| "step": 2595, | |
| "epoch": 0.7346592115507113, | |
| "wallclock": "2026-05-23T10:01:19.943656", | |
| "loss": 0.1141, | |
| "grad_norm": 0.9936063289642334, | |
| "learning_rate": 7.300944649485908e-06, | |
| "step_time_sec": 114.8 | |
| }, | |
| { | |
| "step": 2600, | |
| "epoch": 0.7360747398966664, | |
| "wallclock": "2026-05-23T10:03:14.923839", | |
| "loss": 0.1048, | |
| "grad_norm": 0.7679593563079834, | |
| "learning_rate": 7.2907621237202275e-06, | |
| "step_time_sec": 114.98, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2600, | |
| "epoch": 0.7360747398966664, | |
| "wallclock": "2026-05-23T10:04:07.231100", | |
| "eval_loss": 0.11498851329088211, | |
| "eval_runtime": 52.2032, | |
| "eval_samples_per_second": 4.789, | |
| "eval_steps_per_second": 1.207, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2605, | |
| "epoch": 0.7374902682426215, | |
| "wallclock": "2026-05-23T10:07:42.813659", | |
| "loss": 0.0785, | |
| "grad_norm": 0.8581358790397644, | |
| "learning_rate": 7.280567559108825e-06, | |
| "step_time_sec": 267.89 | |
| }, | |
| { | |
| "step": 2610, | |
| "epoch": 0.7389057965885767, | |
| "wallclock": "2026-05-23T10:09:36.902094", | |
| "loss": 0.0795, | |
| "grad_norm": 1.4470053911209106, | |
| "learning_rate": 7.270361009228104e-06, | |
| "step_time_sec": 114.09 | |
| }, | |
| { | |
| "step": 2615, | |
| "epoch": 0.7403213249345318, | |
| "wallclock": "2026-05-23T10:11:32.049560", | |
| "loss": 0.1029, | |
| "grad_norm": 1.1154381036758423, | |
| "learning_rate": 7.260142527717449e-06, | |
| "step_time_sec": 115.15 | |
| }, | |
| { | |
| "step": 2620, | |
| "epoch": 0.741736853280487, | |
| "wallclock": "2026-05-23T10:13:28.524157", | |
| "loss": 0.114, | |
| "grad_norm": 1.143662929534912, | |
| "learning_rate": 7.249912168278954e-06, | |
| "step_time_sec": 116.47 | |
| }, | |
| { | |
| "step": 2625, | |
| "epoch": 0.743152381626442, | |
| "wallclock": "2026-05-23T10:15:25.719237", | |
| "loss": 0.1157, | |
| "grad_norm": 1.3383020162582397, | |
| "learning_rate": 7.23966998467714e-06, | |
| "step_time_sec": 117.2 | |
| }, | |
| { | |
| "step": 2630, | |
| "epoch": 0.7445679099723972, | |
| "wallclock": "2026-05-23T10:17:20.106607", | |
| "loss": 0.097, | |
| "grad_norm": 1.3460333347320557, | |
| "learning_rate": 7.229416030738661e-06, | |
| "step_time_sec": 114.39 | |
| }, | |
| { | |
| "step": 2635, | |
| "epoch": 0.7459834383183523, | |
| "wallclock": "2026-05-23T10:19:12.833927", | |
| "loss": 0.0934, | |
| "grad_norm": 1.0922449827194214, | |
| "learning_rate": 7.219150360352032e-06, | |
| "step_time_sec": 112.73 | |
| }, | |
| { | |
| "step": 2640, | |
| "epoch": 0.7473989666643075, | |
| "wallclock": "2026-05-23T10:21:07.756043", | |
| "loss": 0.1099, | |
| "grad_norm": 0.9513120651245117, | |
| "learning_rate": 7.208873027467345e-06, | |
| "step_time_sec": 114.92 | |
| }, | |
| { | |
| "step": 2645, | |
| "epoch": 0.7488144950102625, | |
| "wallclock": "2026-05-23T10:23:00.826108", | |
| "loss": 0.1106, | |
| "grad_norm": 0.9753119945526123, | |
| "learning_rate": 7.198584086095979e-06, | |
| "step_time_sec": 113.07 | |
| }, | |
| { | |
| "step": 2650, | |
| "epoch": 0.7502300233562177, | |
| "wallclock": "2026-05-23T10:24:56.030014", | |
| "loss": 0.0936, | |
| "grad_norm": 1.4077311754226685, | |
| "learning_rate": 7.188283590310322e-06, | |
| "step_time_sec": 115.2, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2655, | |
| "epoch": 0.7516455517021728, | |
| "wallclock": "2026-05-23T10:26:50.125758", | |
| "loss": 0.1239, | |
| "grad_norm": 0.8350121378898621, | |
| "learning_rate": 7.177971594243486e-06, | |
| "step_time_sec": 114.1 | |
| }, | |
| { | |
| "step": 2660, | |
| "epoch": 0.753061080048128, | |
| "wallclock": "2026-05-23T10:28:44.428938", | |
| "loss": 0.1024, | |
| "grad_norm": 1.0880407094955444, | |
| "learning_rate": 7.167648152089017e-06, | |
| "step_time_sec": 114.3 | |
| }, | |
| { | |
| "step": 2665, | |
| "epoch": 0.754476608394083, | |
| "wallclock": "2026-05-23T10:30:38.443004", | |
| "loss": 0.1, | |
| "grad_norm": 1.0360862016677856, | |
| "learning_rate": 7.157313318100622e-06, | |
| "step_time_sec": 114.01 | |
| }, | |
| { | |
| "step": 2670, | |
| "epoch": 0.7558921367400382, | |
| "wallclock": "2026-05-23T10:32:32.126033", | |
| "loss": 0.1126, | |
| "grad_norm": 1.0407313108444214, | |
| "learning_rate": 7.14696714659187e-06, | |
| "step_time_sec": 113.68 | |
| }, | |
| { | |
| "step": 2675, | |
| "epoch": 0.7573076650859933, | |
| "wallclock": "2026-05-23T10:34:25.914981", | |
| "loss": 0.0933, | |
| "grad_norm": 1.346449613571167, | |
| "learning_rate": 7.136609691935914e-06, | |
| "step_time_sec": 113.79 | |
| }, | |
| { | |
| "step": 2680, | |
| "epoch": 0.7587231934319485, | |
| "wallclock": "2026-05-23T10:36:21.156179", | |
| "loss": 0.0893, | |
| "grad_norm": 1.2292298078536987, | |
| "learning_rate": 7.1262410085652075e-06, | |
| "step_time_sec": 115.24 | |
| }, | |
| { | |
| "step": 2685, | |
| "epoch": 0.7601387217779036, | |
| "wallclock": "2026-05-23T10:38:15.006638", | |
| "loss": 0.1224, | |
| "grad_norm": 1.2744159698486328, | |
| "learning_rate": 7.115861150971215e-06, | |
| "step_time_sec": 113.85 | |
| }, | |
| { | |
| "step": 2690, | |
| "epoch": 0.7615542501238587, | |
| "wallclock": "2026-05-23T10:40:09.527798", | |
| "loss": 0.0863, | |
| "grad_norm": 1.0019073486328125, | |
| "learning_rate": 7.105470173704121e-06, | |
| "step_time_sec": 114.52 | |
| }, | |
| { | |
| "step": 2695, | |
| "epoch": 0.7629697784698138, | |
| "wallclock": "2026-05-23T10:42:03.509958", | |
| "loss": 0.098, | |
| "grad_norm": 1.0547888278961182, | |
| "learning_rate": 7.095068131372552e-06, | |
| "step_time_sec": 113.98 | |
| }, | |
| { | |
| "step": 2700, | |
| "epoch": 0.764385306815769, | |
| "wallclock": "2026-05-23T10:44:00.051414", | |
| "loss": 0.116, | |
| "grad_norm": 0.9419006109237671, | |
| "learning_rate": 7.0846550786432885e-06, | |
| "step_time_sec": 116.54, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2700, | |
| "epoch": 0.764385306815769, | |
| "wallclock": "2026-05-23T10:44:52.913063", | |
| "eval_loss": 0.110720694065094, | |
| "eval_runtime": 52.7686, | |
| "eval_samples_per_second": 4.738, | |
| "eval_steps_per_second": 1.194, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2705, | |
| "epoch": 0.7658008351617241, | |
| "wallclock": "2026-05-23T10:48:31.237934", | |
| "loss": 0.0998, | |
| "grad_norm": 1.0532370805740356, | |
| "learning_rate": 7.074231070240969e-06, | |
| "step_time_sec": 271.19 | |
| }, | |
| { | |
| "step": 2710, | |
| "epoch": 0.7672163635076792, | |
| "wallclock": "2026-05-23T10:50:25.703822", | |
| "loss": 0.1059, | |
| "grad_norm": 1.1707059144973755, | |
| "learning_rate": 7.063796160947811e-06, | |
| "step_time_sec": 114.47 | |
| }, | |
| { | |
| "step": 2715, | |
| "epoch": 0.7686318918536343, | |
| "wallclock": "2026-05-23T10:52:20.230127", | |
| "loss": 0.0836, | |
| "grad_norm": 1.0319560766220093, | |
| "learning_rate": 7.0533504056033234e-06, | |
| "step_time_sec": 114.53 | |
| }, | |
| { | |
| "step": 2720, | |
| "epoch": 0.7700474201995895, | |
| "wallclock": "2026-05-23T10:54:15.428683", | |
| "loss": 0.0971, | |
| "grad_norm": 1.1601600646972656, | |
| "learning_rate": 7.042893859104008e-06, | |
| "step_time_sec": 115.2 | |
| }, | |
| { | |
| "step": 2725, | |
| "epoch": 0.7714629485455446, | |
| "wallclock": "2026-05-23T10:56:09.560972", | |
| "loss": 0.0808, | |
| "grad_norm": 1.0438365936279297, | |
| "learning_rate": 7.032426576403084e-06, | |
| "step_time_sec": 114.13 | |
| }, | |
| { | |
| "step": 2730, | |
| "epoch": 0.7728784768914998, | |
| "wallclock": "2026-05-23T10:58:03.848888", | |
| "loss": 0.102, | |
| "grad_norm": 1.1061596870422363, | |
| "learning_rate": 7.021948612510194e-06, | |
| "step_time_sec": 114.29 | |
| }, | |
| { | |
| "step": 2735, | |
| "epoch": 0.7742940052374548, | |
| "wallclock": "2026-05-23T10:59:58.320211", | |
| "loss": 0.0984, | |
| "grad_norm": 0.7871215343475342, | |
| "learning_rate": 7.011460022491111e-06, | |
| "step_time_sec": 114.47 | |
| }, | |
| { | |
| "step": 2740, | |
| "epoch": 0.77570953358341, | |
| "wallclock": "2026-05-23T11:01:53.123512", | |
| "loss": 0.0861, | |
| "grad_norm": 0.9695367813110352, | |
| "learning_rate": 7.000960861467454e-06, | |
| "step_time_sec": 114.8 | |
| }, | |
| { | |
| "step": 2745, | |
| "epoch": 0.7771250619293651, | |
| "wallclock": "2026-05-23T11:03:47.400982", | |
| "loss": 0.0988, | |
| "grad_norm": 0.9494866132736206, | |
| "learning_rate": 6.990451184616399e-06, | |
| "step_time_sec": 114.28 | |
| }, | |
| { | |
| "step": 2750, | |
| "epoch": 0.7785405902753203, | |
| "wallclock": "2026-05-23T11:05:41.439834", | |
| "loss": 0.0848, | |
| "grad_norm": 0.8476992249488831, | |
| "learning_rate": 6.979931047170382e-06, | |
| "step_time_sec": 114.04, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2755, | |
| "epoch": 0.7799561186212753, | |
| "wallclock": "2026-05-23T11:07:35.107884", | |
| "loss": 0.0877, | |
| "grad_norm": 0.9056459069252014, | |
| "learning_rate": 6.969400504416816e-06, | |
| "step_time_sec": 113.67 | |
| }, | |
| { | |
| "step": 2760, | |
| "epoch": 0.7813716469672305, | |
| "wallclock": "2026-05-23T11:09:27.251632", | |
| "loss": 0.0942, | |
| "grad_norm": 1.1086695194244385, | |
| "learning_rate": 6.9588596116978015e-06, | |
| "step_time_sec": 112.14 | |
| }, | |
| { | |
| "step": 2765, | |
| "epoch": 0.7827871753131856, | |
| "wallclock": "2026-05-23T11:11:21.586762", | |
| "loss": 0.1023, | |
| "grad_norm": 1.655490756034851, | |
| "learning_rate": 6.948308424409824e-06, | |
| "step_time_sec": 114.34 | |
| }, | |
| { | |
| "step": 2770, | |
| "epoch": 0.7842027036591408, | |
| "wallclock": "2026-05-23T11:13:15.016276", | |
| "loss": 0.1057, | |
| "grad_norm": 0.9345031380653381, | |
| "learning_rate": 6.937746998003477e-06, | |
| "step_time_sec": 113.43 | |
| }, | |
| { | |
| "step": 2775, | |
| "epoch": 0.785618232005096, | |
| "wallclock": "2026-05-23T11:15:09.371971", | |
| "loss": 0.0827, | |
| "grad_norm": 1.2220042943954468, | |
| "learning_rate": 6.927175387983165e-06, | |
| "step_time_sec": 114.36 | |
| }, | |
| { | |
| "step": 2780, | |
| "epoch": 0.787033760351051, | |
| "wallclock": "2026-05-23T11:17:03.650861", | |
| "loss": 0.1028, | |
| "grad_norm": 0.8835825324058533, | |
| "learning_rate": 6.9165936499068065e-06, | |
| "step_time_sec": 114.28 | |
| }, | |
| { | |
| "step": 2785, | |
| "epoch": 0.7884492886970061, | |
| "wallclock": "2026-05-23T11:18:57.806390", | |
| "loss": 0.095, | |
| "grad_norm": 1.1001851558685303, | |
| "learning_rate": 6.906001839385551e-06, | |
| "step_time_sec": 114.16 | |
| }, | |
| { | |
| "step": 2790, | |
| "epoch": 0.7898648170429613, | |
| "wallclock": "2026-05-23T11:20:52.228547", | |
| "loss": 0.0906, | |
| "grad_norm": 0.7298992276191711, | |
| "learning_rate": 6.895400012083482e-06, | |
| "step_time_sec": 114.42 | |
| }, | |
| { | |
| "step": 2795, | |
| "epoch": 0.7912803453889165, | |
| "wallclock": "2026-05-23T11:22:47.415434", | |
| "loss": 0.1135, | |
| "grad_norm": 0.8096187710762024, | |
| "learning_rate": 6.884788223717326e-06, | |
| "step_time_sec": 115.19 | |
| }, | |
| { | |
| "step": 2800, | |
| "epoch": 0.7926958737348715, | |
| "wallclock": "2026-05-23T11:24:42.100846", | |
| "loss": 0.0896, | |
| "grad_norm": 0.7147625088691711, | |
| "learning_rate": 6.874166530056153e-06, | |
| "step_time_sec": 114.69, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2800, | |
| "epoch": 0.7926958737348715, | |
| "wallclock": "2026-05-23T11:25:35.071421", | |
| "eval_loss": 0.10768646746873856, | |
| "eval_runtime": 52.8634, | |
| "eval_samples_per_second": 4.729, | |
| "eval_steps_per_second": 1.192, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2805, | |
| "epoch": 0.7941114020808266, | |
| "wallclock": "2026-05-23T11:29:12.349456", | |
| "loss": 0.104, | |
| "grad_norm": 1.1961441040039062, | |
| "learning_rate": 6.863534986921098e-06, | |
| "step_time_sec": 270.25 | |
| }, | |
| { | |
| "step": 2810, | |
| "epoch": 0.7955269304267818, | |
| "wallclock": "2026-05-23T11:31:06.641212", | |
| "loss": 0.0879, | |
| "grad_norm": 0.8926189541816711, | |
| "learning_rate": 6.852893650185051e-06, | |
| "step_time_sec": 114.29 | |
| }, | |
| { | |
| "step": 2815, | |
| "epoch": 0.796942458772737, | |
| "wallclock": "2026-05-23T11:33:01.446223", | |
| "loss": 0.0953, | |
| "grad_norm": 0.6535293459892273, | |
| "learning_rate": 6.842242575772374e-06, | |
| "step_time_sec": 114.81 | |
| }, | |
| { | |
| "step": 2820, | |
| "epoch": 0.7983579871186921, | |
| "wallclock": "2026-05-23T11:34:55.451278", | |
| "loss": 0.0921, | |
| "grad_norm": 1.124362587928772, | |
| "learning_rate": 6.831581819658608e-06, | |
| "step_time_sec": 114.01 | |
| }, | |
| { | |
| "step": 2825, | |
| "epoch": 0.7997735154646471, | |
| "wallclock": "2026-05-23T11:36:49.662998", | |
| "loss": 0.1037, | |
| "grad_norm": 0.7776113152503967, | |
| "learning_rate": 6.820911437870169e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 2830, | |
| "epoch": 0.8011890438106023, | |
| "wallclock": "2026-05-23T11:38:45.282209", | |
| "loss": 0.0958, | |
| "grad_norm": 1.1590611934661865, | |
| "learning_rate": 6.810231486484064e-06, | |
| "step_time_sec": 115.62 | |
| }, | |
| { | |
| "step": 2835, | |
| "epoch": 0.8026045721565574, | |
| "wallclock": "2026-05-23T11:40:40.066510", | |
| "loss": 0.0928, | |
| "grad_norm": 0.9135128259658813, | |
| "learning_rate": 6.79954202162759e-06, | |
| "step_time_sec": 114.78 | |
| }, | |
| { | |
| "step": 2840, | |
| "epoch": 0.8040201005025126, | |
| "wallclock": "2026-05-23T11:42:35.293647", | |
| "loss": 0.1171, | |
| "grad_norm": 1.5331295728683472, | |
| "learning_rate": 6.788843099478041e-06, | |
| "step_time_sec": 115.23 | |
| }, | |
| { | |
| "step": 2845, | |
| "epoch": 0.8054356288484676, | |
| "wallclock": "2026-05-23T11:44:32.410482", | |
| "loss": 0.0892, | |
| "grad_norm": 0.802897036075592, | |
| "learning_rate": 6.778134776262413e-06, | |
| "step_time_sec": 117.12 | |
| }, | |
| { | |
| "step": 2850, | |
| "epoch": 0.8068511571944228, | |
| "wallclock": "2026-05-23T11:46:26.926697", | |
| "loss": 0.1081, | |
| "grad_norm": 1.0739949941635132, | |
| "learning_rate": 6.76741710825711e-06, | |
| "step_time_sec": 114.52, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2855, | |
| "epoch": 0.808266685540378, | |
| "wallclock": "2026-05-23T11:48:20.898137", | |
| "loss": 0.0906, | |
| "grad_norm": 1.2039380073547363, | |
| "learning_rate": 6.756690151787643e-06, | |
| "step_time_sec": 113.97 | |
| }, | |
| { | |
| "step": 2860, | |
| "epoch": 0.8096822138863331, | |
| "wallclock": "2026-05-23T11:50:16.624972", | |
| "loss": 0.0765, | |
| "grad_norm": 0.9947606325149536, | |
| "learning_rate": 6.74595396322834e-06, | |
| "step_time_sec": 115.73 | |
| }, | |
| { | |
| "step": 2865, | |
| "epoch": 0.8110977422322883, | |
| "wallclock": "2026-05-23T11:52:08.742942", | |
| "loss": 0.0886, | |
| "grad_norm": 1.0721163749694824, | |
| "learning_rate": 6.735208599002048e-06, | |
| "step_time_sec": 112.12 | |
| }, | |
| { | |
| "step": 2870, | |
| "epoch": 0.8125132705782433, | |
| "wallclock": "2026-05-23T11:54:01.649122", | |
| "loss": 0.0956, | |
| "grad_norm": 0.9984346628189087, | |
| "learning_rate": 6.724454115579832e-06, | |
| "step_time_sec": 112.91 | |
| }, | |
| { | |
| "step": 2875, | |
| "epoch": 0.8139287989241984, | |
| "wallclock": "2026-05-23T11:55:55.986094", | |
| "loss": 0.1013, | |
| "grad_norm": 0.8976569771766663, | |
| "learning_rate": 6.713690569480685e-06, | |
| "step_time_sec": 114.34 | |
| }, | |
| { | |
| "step": 2880, | |
| "epoch": 0.8153443272701536, | |
| "wallclock": "2026-05-23T11:57:50.253656", | |
| "loss": 0.11, | |
| "grad_norm": 1.3766424655914307, | |
| "learning_rate": 6.7029180172712295e-06, | |
| "step_time_sec": 114.27 | |
| }, | |
| { | |
| "step": 2885, | |
| "epoch": 0.8167598556161088, | |
| "wallclock": "2026-05-23T11:59:45.942765", | |
| "loss": 0.0871, | |
| "grad_norm": 0.7033481597900391, | |
| "learning_rate": 6.6921365155654126e-06, | |
| "step_time_sec": 115.69 | |
| }, | |
| { | |
| "step": 2890, | |
| "epoch": 0.8181753839620638, | |
| "wallclock": "2026-05-23T12:01:42.248219", | |
| "loss": 0.0872, | |
| "grad_norm": 1.1330105066299438, | |
| "learning_rate": 6.6813461210242215e-06, | |
| "step_time_sec": 116.31 | |
| }, | |
| { | |
| "step": 2895, | |
| "epoch": 0.819590912308019, | |
| "wallclock": "2026-05-23T12:03:36.053753", | |
| "loss": 0.1055, | |
| "grad_norm": 1.2184752225875854, | |
| "learning_rate": 6.670546890355374e-06, | |
| "step_time_sec": 113.81 | |
| }, | |
| { | |
| "step": 2900, | |
| "epoch": 0.8210064406539741, | |
| "wallclock": "2026-05-23T12:05:30.028128", | |
| "loss": 0.1058, | |
| "grad_norm": 0.665178656578064, | |
| "learning_rate": 6.659738880313025e-06, | |
| "step_time_sec": 113.97, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2900, | |
| "epoch": 0.8210064406539741, | |
| "wallclock": "2026-05-23T12:06:22.012953", | |
| "eval_loss": 0.10808777064085007, | |
| "eval_runtime": 51.8846, | |
| "eval_samples_per_second": 4.818, | |
| "eval_steps_per_second": 1.214, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2905, | |
| "epoch": 0.8224219689999293, | |
| "wallclock": "2026-05-23T12:09:56.926177", | |
| "loss": 0.0969, | |
| "grad_norm": 1.386168122291565, | |
| "learning_rate": 6.648922147697471e-06, | |
| "step_time_sec": 266.9 | |
| }, | |
| { | |
| "step": 2910, | |
| "epoch": 0.8238374973458844, | |
| "wallclock": "2026-05-23T12:11:52.616991", | |
| "loss": 0.0787, | |
| "grad_norm": 1.0408498048782349, | |
| "learning_rate": 6.63809674935485e-06, | |
| "step_time_sec": 115.69 | |
| }, | |
| { | |
| "step": 2915, | |
| "epoch": 0.8252530256918394, | |
| "wallclock": "2026-05-23T12:13:47.509339", | |
| "loss": 0.106, | |
| "grad_norm": 1.0766488313674927, | |
| "learning_rate": 6.6272627421768366e-06, | |
| "step_time_sec": 114.89 | |
| }, | |
| { | |
| "step": 2920, | |
| "epoch": 0.8266685540377946, | |
| "wallclock": "2026-05-23T12:15:43.081952", | |
| "loss": 0.0859, | |
| "grad_norm": 0.8988505005836487, | |
| "learning_rate": 6.616420183100353e-06, | |
| "step_time_sec": 115.57 | |
| }, | |
| { | |
| "step": 2925, | |
| "epoch": 0.8280840823837498, | |
| "wallclock": "2026-05-23T12:17:37.913984", | |
| "loss": 0.09, | |
| "grad_norm": 1.0285881757736206, | |
| "learning_rate": 6.605569129107263e-06, | |
| "step_time_sec": 114.83 | |
| }, | |
| { | |
| "step": 2930, | |
| "epoch": 0.8294996107297049, | |
| "wallclock": "2026-05-23T12:19:32.363447", | |
| "loss": 0.0921, | |
| "grad_norm": 1.0034139156341553, | |
| "learning_rate": 6.594709637224075e-06, | |
| "step_time_sec": 114.45 | |
| }, | |
| { | |
| "step": 2935, | |
| "epoch": 0.83091513907566, | |
| "wallclock": "2026-05-23T12:21:27.369008", | |
| "loss": 0.0802, | |
| "grad_norm": 0.8240336775779724, | |
| "learning_rate": 6.583841764521641e-06, | |
| "step_time_sec": 115.01 | |
| }, | |
| { | |
| "step": 2940, | |
| "epoch": 0.8323306674216151, | |
| "wallclock": "2026-05-23T12:23:23.007495", | |
| "loss": 0.095, | |
| "grad_norm": 1.2371604442596436, | |
| "learning_rate": 6.572965568114859e-06, | |
| "step_time_sec": 115.64 | |
| }, | |
| { | |
| "step": 2945, | |
| "epoch": 0.8337461957675703, | |
| "wallclock": "2026-05-23T12:25:16.996557", | |
| "loss": 0.095, | |
| "grad_norm": 1.1819149255752563, | |
| "learning_rate": 6.562081105162369e-06, | |
| "step_time_sec": 113.99 | |
| }, | |
| { | |
| "step": 2950, | |
| "epoch": 0.8351617241135254, | |
| "wallclock": "2026-05-23T12:27:11.125332", | |
| "loss": 0.08, | |
| "grad_norm": 1.0016002655029297, | |
| "learning_rate": 6.551188432866257e-06, | |
| "step_time_sec": 114.13, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 2955, | |
| "epoch": 0.8365772524594804, | |
| "wallclock": "2026-05-23T12:29:06.207323", | |
| "loss": 0.1015, | |
| "grad_norm": 1.2419204711914062, | |
| "learning_rate": 6.5402876084717514e-06, | |
| "step_time_sec": 115.08 | |
| }, | |
| { | |
| "step": 2960, | |
| "epoch": 0.8379927808054356, | |
| "wallclock": "2026-05-23T12:31:00.634331", | |
| "loss": 0.0848, | |
| "grad_norm": 1.0234307050704956, | |
| "learning_rate": 6.529378689266923e-06, | |
| "step_time_sec": 114.43 | |
| }, | |
| { | |
| "step": 2965, | |
| "epoch": 0.8394083091513908, | |
| "wallclock": "2026-05-23T12:32:54.709674", | |
| "loss": 0.1004, | |
| "grad_norm": 1.3117458820343018, | |
| "learning_rate": 6.518461732582385e-06, | |
| "step_time_sec": 114.08 | |
| }, | |
| { | |
| "step": 2970, | |
| "epoch": 0.8408238374973459, | |
| "wallclock": "2026-05-23T12:34:50.639025", | |
| "loss": 0.0861, | |
| "grad_norm": 0.6640080213546753, | |
| "learning_rate": 6.507536795790989e-06, | |
| "step_time_sec": 115.93 | |
| }, | |
| { | |
| "step": 2975, | |
| "epoch": 0.8422393658433011, | |
| "wallclock": "2026-05-23T12:36:44.816198", | |
| "loss": 0.0921, | |
| "grad_norm": 0.7706874012947083, | |
| "learning_rate": 6.496603936307525e-06, | |
| "step_time_sec": 114.18 | |
| }, | |
| { | |
| "step": 2980, | |
| "epoch": 0.8436548941892561, | |
| "wallclock": "2026-05-23T12:38:41.632400", | |
| "loss": 0.0774, | |
| "grad_norm": 0.9700288772583008, | |
| "learning_rate": 6.4856632115884245e-06, | |
| "step_time_sec": 116.82 | |
| }, | |
| { | |
| "step": 2985, | |
| "epoch": 0.8450704225352113, | |
| "wallclock": "2026-05-23T12:40:37.751686", | |
| "loss": 0.0827, | |
| "grad_norm": 1.0276799201965332, | |
| "learning_rate": 6.4747146791314456e-06, | |
| "step_time_sec": 116.12 | |
| }, | |
| { | |
| "step": 2990, | |
| "epoch": 0.8464859508811664, | |
| "wallclock": "2026-05-23T12:42:31.949658", | |
| "loss": 0.1038, | |
| "grad_norm": 1.124481439590454, | |
| "learning_rate": 6.4637583964753855e-06, | |
| "step_time_sec": 114.2 | |
| }, | |
| { | |
| "step": 2995, | |
| "epoch": 0.8479014792271216, | |
| "wallclock": "2026-05-23T12:44:25.803641", | |
| "loss": 0.1034, | |
| "grad_norm": 1.4556708335876465, | |
| "learning_rate": 6.452794421199772e-06, | |
| "step_time_sec": 113.85 | |
| }, | |
| { | |
| "step": 3000, | |
| "epoch": 0.8493170075730766, | |
| "wallclock": "2026-05-23T12:46:20.662475", | |
| "loss": 0.0808, | |
| "grad_norm": 0.7637086510658264, | |
| "learning_rate": 6.441822810924555e-06, | |
| "step_time_sec": 114.86, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3000, | |
| "epoch": 0.8493170075730766, | |
| "wallclock": "2026-05-23T12:47:14.237339", | |
| "eval_loss": 0.10439032316207886, | |
| "eval_runtime": 53.4665, | |
| "eval_samples_per_second": 4.676, | |
| "eval_steps_per_second": 1.178, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3005, | |
| "epoch": 0.8507325359190318, | |
| "wallclock": "2026-05-23T12:50:47.509195", | |
| "loss": 0.0871, | |
| "grad_norm": 1.2917267084121704, | |
| "learning_rate": 6.430843623309815e-06, | |
| "step_time_sec": 266.85 | |
| }, | |
| { | |
| "step": 3010, | |
| "epoch": 0.8521480642649869, | |
| "wallclock": "2026-05-23T12:52:42.040424", | |
| "loss": 0.0832, | |
| "grad_norm": 1.129804015159607, | |
| "learning_rate": 6.419856916055453e-06, | |
| "step_time_sec": 114.53 | |
| }, | |
| { | |
| "step": 3015, | |
| "epoch": 0.8535635926109421, | |
| "wallclock": "2026-05-23T12:54:39.916099", | |
| "loss": 0.0821, | |
| "grad_norm": 0.72309809923172, | |
| "learning_rate": 6.408862746900884e-06, | |
| "step_time_sec": 117.88 | |
| }, | |
| { | |
| "step": 3020, | |
| "epoch": 0.8549791209568972, | |
| "wallclock": "2026-05-23T12:56:33.338244", | |
| "loss": 0.0764, | |
| "grad_norm": 0.6796430349349976, | |
| "learning_rate": 6.397861173624745e-06, | |
| "step_time_sec": 113.42 | |
| }, | |
| { | |
| "step": 3025, | |
| "epoch": 0.8563946493028523, | |
| "wallclock": "2026-05-23T12:58:26.302630", | |
| "loss": 0.0984, | |
| "grad_norm": 1.0264241695404053, | |
| "learning_rate": 6.386852254044582e-06, | |
| "step_time_sec": 112.96 | |
| }, | |
| { | |
| "step": 3030, | |
| "epoch": 0.8578101776488074, | |
| "wallclock": "2026-05-23T13:00:19.802002", | |
| "loss": 0.0875, | |
| "grad_norm": 1.4211701154708862, | |
| "learning_rate": 6.375836046016547e-06, | |
| "step_time_sec": 113.5 | |
| }, | |
| { | |
| "step": 3035, | |
| "epoch": 0.8592257059947626, | |
| "wallclock": "2026-05-23T13:02:13.721659", | |
| "loss": 0.0833, | |
| "grad_norm": 1.0724290609359741, | |
| "learning_rate": 6.3648126074350955e-06, | |
| "step_time_sec": 113.92 | |
| }, | |
| { | |
| "step": 3040, | |
| "epoch": 0.8606412343407177, | |
| "wallclock": "2026-05-23T13:04:08.151856", | |
| "loss": 0.0943, | |
| "grad_norm": 0.9527065753936768, | |
| "learning_rate": 6.353781996232689e-06, | |
| "step_time_sec": 114.43 | |
| }, | |
| { | |
| "step": 3045, | |
| "epoch": 0.8620567626866728, | |
| "wallclock": "2026-05-23T13:06:02.084910", | |
| "loss": 0.0915, | |
| "grad_norm": 0.9171473979949951, | |
| "learning_rate": 6.342744270379471e-06, | |
| "step_time_sec": 113.93 | |
| }, | |
| { | |
| "step": 3050, | |
| "epoch": 0.8634722910326279, | |
| "wallclock": "2026-05-23T13:07:56.129979", | |
| "loss": 0.0772, | |
| "grad_norm": 1.1974050998687744, | |
| "learning_rate": 6.331699487882987e-06, | |
| "step_time_sec": 114.05, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3055, | |
| "epoch": 0.8648878193785831, | |
| "wallclock": "2026-05-23T13:09:50.014721", | |
| "loss": 0.079, | |
| "grad_norm": 1.2516535520553589, | |
| "learning_rate": 6.320647706787866e-06, | |
| "step_time_sec": 113.88 | |
| }, | |
| { | |
| "step": 3060, | |
| "epoch": 0.8663033477245382, | |
| "wallclock": "2026-05-23T13:11:43.877111", | |
| "loss": 0.0819, | |
| "grad_norm": 0.8899274468421936, | |
| "learning_rate": 6.30958898517551e-06, | |
| "step_time_sec": 113.86 | |
| }, | |
| { | |
| "step": 3065, | |
| "epoch": 0.8677188760704934, | |
| "wallclock": "2026-05-23T13:13:37.608088", | |
| "loss": 0.0766, | |
| "grad_norm": 0.6620562076568604, | |
| "learning_rate": 6.298523381163805e-06, | |
| "step_time_sec": 113.73 | |
| }, | |
| { | |
| "step": 3070, | |
| "epoch": 0.8691344044164484, | |
| "wallclock": "2026-05-23T13:15:31.210530", | |
| "loss": 0.0868, | |
| "grad_norm": 1.2216447591781616, | |
| "learning_rate": 6.287450952906802e-06, | |
| "step_time_sec": 113.6 | |
| }, | |
| { | |
| "step": 3075, | |
| "epoch": 0.8705499327624036, | |
| "wallclock": "2026-05-23T13:17:25.173111", | |
| "loss": 0.0823, | |
| "grad_norm": 1.8554191589355469, | |
| "learning_rate": 6.276371758594416e-06, | |
| "step_time_sec": 113.96 | |
| }, | |
| { | |
| "step": 3080, | |
| "epoch": 0.8719654611083587, | |
| "wallclock": "2026-05-23T13:19:19.783902", | |
| "loss": 0.1078, | |
| "grad_norm": 1.097886085510254, | |
| "learning_rate": 6.265285856452123e-06, | |
| "step_time_sec": 114.61 | |
| }, | |
| { | |
| "step": 3085, | |
| "epoch": 0.8733809894543139, | |
| "wallclock": "2026-05-23T13:21:13.021188", | |
| "loss": 0.1032, | |
| "grad_norm": 0.9588475227355957, | |
| "learning_rate": 6.254193304740648e-06, | |
| "step_time_sec": 113.24 | |
| }, | |
| { | |
| "step": 3090, | |
| "epoch": 0.8747965178002689, | |
| "wallclock": "2026-05-23T13:23:05.522960", | |
| "loss": 0.0746, | |
| "grad_norm": 0.9044705629348755, | |
| "learning_rate": 6.243094161755664e-06, | |
| "step_time_sec": 112.5 | |
| }, | |
| { | |
| "step": 3095, | |
| "epoch": 0.876212046146224, | |
| "wallclock": "2026-05-23T13:25:01.305677", | |
| "loss": 0.0996, | |
| "grad_norm": 1.350035309791565, | |
| "learning_rate": 6.231988485827483e-06, | |
| "step_time_sec": 115.78 | |
| }, | |
| { | |
| "step": 3100, | |
| "epoch": 0.8776275744921792, | |
| "wallclock": "2026-05-23T13:26:54.684490", | |
| "loss": 0.0947, | |
| "grad_norm": 1.00934898853302, | |
| "learning_rate": 6.220876335320752e-06, | |
| "step_time_sec": 113.38, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3100, | |
| "epoch": 0.8776275744921792, | |
| "wallclock": "2026-05-23T13:27:47.250665", | |
| "eval_loss": 0.10196959972381592, | |
| "eval_runtime": 52.4629, | |
| "eval_samples_per_second": 4.765, | |
| "eval_steps_per_second": 1.201, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3105, | |
| "epoch": 0.8790431028381344, | |
| "wallclock": "2026-05-23T13:31:22.745504", | |
| "loss": 0.076, | |
| "grad_norm": 1.0037931203842163, | |
| "learning_rate": 6.209757768634142e-06, | |
| "step_time_sec": 268.06 | |
| }, | |
| { | |
| "step": 3110, | |
| "epoch": 0.8804586311840895, | |
| "wallclock": "2026-05-23T13:33:17.015868", | |
| "loss": 0.1029, | |
| "grad_norm": 1.383480191230774, | |
| "learning_rate": 6.1986328442000425e-06, | |
| "step_time_sec": 114.27 | |
| }, | |
| { | |
| "step": 3115, | |
| "epoch": 0.8818741595300446, | |
| "wallclock": "2026-05-23T13:35:09.821778", | |
| "loss": 0.0865, | |
| "grad_norm": 1.2162877321243286, | |
| "learning_rate": 6.18750162048426e-06, | |
| "step_time_sec": 112.81 | |
| }, | |
| { | |
| "step": 3120, | |
| "epoch": 0.8832896878759997, | |
| "wallclock": "2026-05-23T13:37:02.349154", | |
| "loss": 0.0919, | |
| "grad_norm": 0.8934468626976013, | |
| "learning_rate": 6.176364155985701e-06, | |
| "step_time_sec": 112.53 | |
| }, | |
| { | |
| "step": 3125, | |
| "epoch": 0.8847052162219549, | |
| "wallclock": "2026-05-23T13:38:56.196594", | |
| "loss": 0.0939, | |
| "grad_norm": 0.848867654800415, | |
| "learning_rate": 6.165220509236076e-06, | |
| "step_time_sec": 113.85 | |
| }, | |
| { | |
| "step": 3130, | |
| "epoch": 0.88612074456791, | |
| "wallclock": "2026-05-23T13:40:49.415671", | |
| "loss": 0.0871, | |
| "grad_norm": 1.3182566165924072, | |
| "learning_rate": 6.1540707387995775e-06, | |
| "step_time_sec": 113.22 | |
| }, | |
| { | |
| "step": 3135, | |
| "epoch": 0.887536272913865, | |
| "wallclock": "2026-05-23T13:42:44.238969", | |
| "loss": 0.1092, | |
| "grad_norm": 1.0121556520462036, | |
| "learning_rate": 6.1429149032725875e-06, | |
| "step_time_sec": 114.82 | |
| }, | |
| { | |
| "step": 3140, | |
| "epoch": 0.8889518012598202, | |
| "wallclock": "2026-05-23T13:44:39.595399", | |
| "loss": 0.0762, | |
| "grad_norm": 1.2405686378479004, | |
| "learning_rate": 6.13175306128336e-06, | |
| "step_time_sec": 115.36 | |
| }, | |
| { | |
| "step": 3145, | |
| "epoch": 0.8903673296057754, | |
| "wallclock": "2026-05-23T13:46:34.620333", | |
| "loss": 0.0945, | |
| "grad_norm": 1.2402104139328003, | |
| "learning_rate": 6.120585271491713e-06, | |
| "step_time_sec": 115.02 | |
| }, | |
| { | |
| "step": 3150, | |
| "epoch": 0.8917828579517305, | |
| "wallclock": "2026-05-23T13:48:29.023383", | |
| "loss": 0.0857, | |
| "grad_norm": 0.910408616065979, | |
| "learning_rate": 6.1094115925887235e-06, | |
| "step_time_sec": 114.4, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3155, | |
| "epoch": 0.8931983862976857, | |
| "wallclock": "2026-05-23T13:50:23.390785", | |
| "loss": 0.0891, | |
| "grad_norm": 0.9833782315254211, | |
| "learning_rate": 6.098232083296423e-06, | |
| "step_time_sec": 114.37 | |
| }, | |
| { | |
| "step": 3160, | |
| "epoch": 0.8946139146436407, | |
| "wallclock": "2026-05-23T13:52:17.597934", | |
| "loss": 0.0749, | |
| "grad_norm": 1.2741199731826782, | |
| "learning_rate": 6.087046802367476e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 3165, | |
| "epoch": 0.8960294429895959, | |
| "wallclock": "2026-05-23T13:54:11.564969", | |
| "loss": 0.0801, | |
| "grad_norm": 1.4001483917236328, | |
| "learning_rate": 6.075855808584886e-06, | |
| "step_time_sec": 113.97 | |
| }, | |
| { | |
| "step": 3170, | |
| "epoch": 0.897444971335551, | |
| "wallclock": "2026-05-23T13:56:05.962888", | |
| "loss": 0.0825, | |
| "grad_norm": 0.909929633140564, | |
| "learning_rate": 6.064659160761676e-06, | |
| "step_time_sec": 114.4 | |
| }, | |
| { | |
| "step": 3175, | |
| "epoch": 0.8988604996815062, | |
| "wallclock": "2026-05-23T13:58:01.036489", | |
| "loss": 0.0584, | |
| "grad_norm": 0.8718348145484924, | |
| "learning_rate": 6.053456917740585e-06, | |
| "step_time_sec": 115.07 | |
| }, | |
| { | |
| "step": 3180, | |
| "epoch": 0.9002760280274612, | |
| "wallclock": "2026-05-23T13:59:54.455386", | |
| "loss": 0.0979, | |
| "grad_norm": 1.4148125648498535, | |
| "learning_rate": 6.042249138393753e-06, | |
| "step_time_sec": 113.42 | |
| }, | |
| { | |
| "step": 3185, | |
| "epoch": 0.9016915563734164, | |
| "wallclock": "2026-05-23T14:01:49.207549", | |
| "loss": 0.0914, | |
| "grad_norm": 0.9834646582603455, | |
| "learning_rate": 6.031035881622422e-06, | |
| "step_time_sec": 114.75 | |
| }, | |
| { | |
| "step": 3190, | |
| "epoch": 0.9031070847193715, | |
| "wallclock": "2026-05-23T14:03:45.252526", | |
| "loss": 0.1002, | |
| "grad_norm": 1.3153408765792847, | |
| "learning_rate": 6.019817206356615e-06, | |
| "step_time_sec": 116.04 | |
| }, | |
| { | |
| "step": 3195, | |
| "epoch": 0.9045226130653267, | |
| "wallclock": "2026-05-23T14:05:40.611460", | |
| "loss": 0.0856, | |
| "grad_norm": 0.9440031051635742, | |
| "learning_rate": 6.008593171554833e-06, | |
| "step_time_sec": 115.36 | |
| }, | |
| { | |
| "step": 3200, | |
| "epoch": 0.9059381414112817, | |
| "wallclock": "2026-05-23T14:07:53.973146", | |
| "loss": 0.0969, | |
| "grad_norm": 1.2231155633926392, | |
| "learning_rate": 5.997363836203744e-06, | |
| "step_time_sec": 133.36, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3200, | |
| "epoch": 0.9059381414112817, | |
| "wallclock": "2026-05-23T14:09:02.006751", | |
| "eval_loss": 0.09937935322523117, | |
| "eval_runtime": 67.9272, | |
| "eval_samples_per_second": 3.68, | |
| "eval_steps_per_second": 0.927, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3205, | |
| "epoch": 0.9073536697572369, | |
| "wallclock": "2026-05-23T14:12:39.937912", | |
| "loss": 0.1062, | |
| "grad_norm": 1.7902098894119263, | |
| "learning_rate": 5.98612925931787e-06, | |
| "step_time_sec": 285.96 | |
| }, | |
| { | |
| "step": 3210, | |
| "epoch": 0.908769198103192, | |
| "wallclock": "2026-05-23T14:14:33.769972", | |
| "loss": 0.0924, | |
| "grad_norm": 1.207891583442688, | |
| "learning_rate": 5.974889499939283e-06, | |
| "step_time_sec": 113.83 | |
| }, | |
| { | |
| "step": 3215, | |
| "epoch": 0.9101847264491472, | |
| "wallclock": "2026-05-23T14:16:28.806370", | |
| "loss": 0.0685, | |
| "grad_norm": 1.0537505149841309, | |
| "learning_rate": 5.96364461713729e-06, | |
| "step_time_sec": 115.04 | |
| }, | |
| { | |
| "step": 3220, | |
| "epoch": 0.9116002547951023, | |
| "wallclock": "2026-05-23T14:18:23.631952", | |
| "loss": 0.0881, | |
| "grad_norm": 1.5671195983886719, | |
| "learning_rate": 5.952394670008119e-06, | |
| "step_time_sec": 114.83 | |
| }, | |
| { | |
| "step": 3225, | |
| "epoch": 0.9130157831410574, | |
| "wallclock": "2026-05-23T14:20:17.612828", | |
| "loss": 0.1033, | |
| "grad_norm": 1.521396517753601, | |
| "learning_rate": 5.94113971767462e-06, | |
| "step_time_sec": 113.98 | |
| }, | |
| { | |
| "step": 3230, | |
| "epoch": 0.9144313114870125, | |
| "wallclock": "2026-05-23T14:22:11.445585", | |
| "loss": 0.0618, | |
| "grad_norm": 0.9208618402481079, | |
| "learning_rate": 5.9298798192859434e-06, | |
| "step_time_sec": 113.83 | |
| }, | |
| { | |
| "step": 3235, | |
| "epoch": 0.9158468398329677, | |
| "wallclock": "2026-05-23T14:24:06.144035", | |
| "loss": 0.0873, | |
| "grad_norm": 1.1370309591293335, | |
| "learning_rate": 5.9186150340172325e-06, | |
| "step_time_sec": 114.7 | |
| }, | |
| { | |
| "step": 3240, | |
| "epoch": 0.9172623681789228, | |
| "wallclock": "2026-05-23T14:25:59.240016", | |
| "loss": 0.0803, | |
| "grad_norm": 1.02957022190094, | |
| "learning_rate": 5.907345421069314e-06, | |
| "step_time_sec": 113.1 | |
| }, | |
| { | |
| "step": 3245, | |
| "epoch": 0.9186778965248779, | |
| "wallclock": "2026-05-23T14:27:52.197012", | |
| "loss": 0.0811, | |
| "grad_norm": 1.390236496925354, | |
| "learning_rate": 5.896071039668388e-06, | |
| "step_time_sec": 112.96 | |
| }, | |
| { | |
| "step": 3250, | |
| "epoch": 0.920093424870833, | |
| "wallclock": "2026-05-23T14:29:46.705683", | |
| "loss": 0.0873, | |
| "grad_norm": 1.451936960220337, | |
| "learning_rate": 5.8847919490657114e-06, | |
| "step_time_sec": 114.51, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3255, | |
| "epoch": 0.9215089532167882, | |
| "wallclock": "2026-05-23T14:31:40.040364", | |
| "loss": 0.087, | |
| "grad_norm": 1.092909574508667, | |
| "learning_rate": 5.873508208537291e-06, | |
| "step_time_sec": 113.33 | |
| }, | |
| { | |
| "step": 3260, | |
| "epoch": 0.9229244815627433, | |
| "wallclock": "2026-05-23T14:33:33.153697", | |
| "loss": 0.0701, | |
| "grad_norm": 1.0500355958938599, | |
| "learning_rate": 5.8622198773835725e-06, | |
| "step_time_sec": 113.11 | |
| }, | |
| { | |
| "step": 3265, | |
| "epoch": 0.9243400099086985, | |
| "wallclock": "2026-05-23T14:35:26.975118", | |
| "loss": 0.0858, | |
| "grad_norm": 1.4215220212936401, | |
| "learning_rate": 5.850927014929124e-06, | |
| "step_time_sec": 113.82 | |
| }, | |
| { | |
| "step": 3270, | |
| "epoch": 0.9257555382546535, | |
| "wallclock": "2026-05-23T14:37:21.438405", | |
| "loss": 0.0904, | |
| "grad_norm": 1.1870381832122803, | |
| "learning_rate": 5.83962968052233e-06, | |
| "step_time_sec": 114.46 | |
| }, | |
| { | |
| "step": 3275, | |
| "epoch": 0.9271710666006087, | |
| "wallclock": "2026-05-23T14:39:15.451984", | |
| "loss": 0.0791, | |
| "grad_norm": 0.9800876379013062, | |
| "learning_rate": 5.828327933535075e-06, | |
| "step_time_sec": 114.01 | |
| }, | |
| { | |
| "step": 3280, | |
| "epoch": 0.9285865949465638, | |
| "wallclock": "2026-05-23T14:41:09.339469", | |
| "loss": 0.0825, | |
| "grad_norm": 1.2808606624603271, | |
| "learning_rate": 5.817021833362434e-06, | |
| "step_time_sec": 113.89 | |
| }, | |
| { | |
| "step": 3285, | |
| "epoch": 0.930002123292519, | |
| "wallclock": "2026-05-23T14:43:02.863576", | |
| "loss": 0.1006, | |
| "grad_norm": 0.8630105257034302, | |
| "learning_rate": 5.805711439422361e-06, | |
| "step_time_sec": 113.52 | |
| }, | |
| { | |
| "step": 3290, | |
| "epoch": 0.931417651638474, | |
| "wallclock": "2026-05-23T14:44:56.304812", | |
| "loss": 0.0951, | |
| "grad_norm": 1.7691140174865723, | |
| "learning_rate": 5.794396811155372e-06, | |
| "step_time_sec": 113.44 | |
| }, | |
| { | |
| "step": 3295, | |
| "epoch": 0.9328331799844292, | |
| "wallclock": "2026-05-23T14:46:51.225086", | |
| "loss": 0.0859, | |
| "grad_norm": 1.175764799118042, | |
| "learning_rate": 5.78307800802424e-06, | |
| "step_time_sec": 114.92 | |
| }, | |
| { | |
| "step": 3300, | |
| "epoch": 0.9342487083303843, | |
| "wallclock": "2026-05-23T14:48:45.011673", | |
| "loss": 0.0789, | |
| "grad_norm": 1.243912696838379, | |
| "learning_rate": 5.771755089513678e-06, | |
| "step_time_sec": 113.79, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3300, | |
| "epoch": 0.9342487083303843, | |
| "wallclock": "2026-05-23T14:49:37.428370", | |
| "eval_loss": 0.09591619670391083, | |
| "eval_runtime": 52.3234, | |
| "eval_samples_per_second": 4.778, | |
| "eval_steps_per_second": 1.204, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3305, | |
| "epoch": 0.9356642366763395, | |
| "wallclock": "2026-05-23T14:53:13.024206", | |
| "loss": 0.0852, | |
| "grad_norm": 1.0862802267074585, | |
| "learning_rate": 5.760428115130021e-06, | |
| "step_time_sec": 268.01 | |
| }, | |
| { | |
| "step": 3310, | |
| "epoch": 0.9370797650222946, | |
| "wallclock": "2026-05-23T14:55:06.032103", | |
| "loss": 0.0869, | |
| "grad_norm": 1.217329502105713, | |
| "learning_rate": 5.749097144400929e-06, | |
| "step_time_sec": 113.01 | |
| }, | |
| { | |
| "step": 3315, | |
| "epoch": 0.9384952933682497, | |
| "wallclock": "2026-05-23T14:57:00.935813", | |
| "loss": 0.0695, | |
| "grad_norm": 0.9838262796401978, | |
| "learning_rate": 5.737762236875057e-06, | |
| "step_time_sec": 114.9 | |
| }, | |
| { | |
| "step": 3320, | |
| "epoch": 0.9399108217142048, | |
| "wallclock": "2026-05-23T14:58:55.441455", | |
| "loss": 0.0866, | |
| "grad_norm": 1.0086387395858765, | |
| "learning_rate": 5.726423452121751e-06, | |
| "step_time_sec": 114.51 | |
| }, | |
| { | |
| "step": 3325, | |
| "epoch": 0.94132635006016, | |
| "wallclock": "2026-05-23T15:00:49.538622", | |
| "loss": 0.0939, | |
| "grad_norm": 1.14065420627594, | |
| "learning_rate": 5.7150808497307345e-06, | |
| "step_time_sec": 114.1 | |
| }, | |
| { | |
| "step": 3330, | |
| "epoch": 0.9427418784061151, | |
| "wallclock": "2026-05-23T15:02:45.245821", | |
| "loss": 0.0974, | |
| "grad_norm": 1.3234528303146362, | |
| "learning_rate": 5.7037344893117956e-06, | |
| "step_time_sec": 115.71 | |
| }, | |
| { | |
| "step": 3335, | |
| "epoch": 0.9441574067520702, | |
| "wallclock": "2026-05-23T15:04:37.650997", | |
| "loss": 0.0788, | |
| "grad_norm": 1.4045474529266357, | |
| "learning_rate": 5.692384430494466e-06, | |
| "step_time_sec": 112.41 | |
| }, | |
| { | |
| "step": 3340, | |
| "epoch": 0.9455729350980253, | |
| "wallclock": "2026-05-23T15:06:32.368133", | |
| "loss": 0.0849, | |
| "grad_norm": 1.256629228591919, | |
| "learning_rate": 5.6810307329277226e-06, | |
| "step_time_sec": 114.72 | |
| }, | |
| { | |
| "step": 3345, | |
| "epoch": 0.9469884634439805, | |
| "wallclock": "2026-05-23T15:08:26.827602", | |
| "loss": 0.0824, | |
| "grad_norm": 1.130339503288269, | |
| "learning_rate": 5.669673456279659e-06, | |
| "step_time_sec": 114.46 | |
| }, | |
| { | |
| "step": 3350, | |
| "epoch": 0.9484039917899356, | |
| "wallclock": "2026-05-23T15:10:21.207468", | |
| "loss": 0.0693, | |
| "grad_norm": 1.282491683959961, | |
| "learning_rate": 5.65831266023718e-06, | |
| "step_time_sec": 114.38, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3355, | |
| "epoch": 0.9498195201358908, | |
| "wallclock": "2026-05-23T15:12:15.228993", | |
| "loss": 0.0891, | |
| "grad_norm": 1.3946020603179932, | |
| "learning_rate": 5.646948404505686e-06, | |
| "step_time_sec": 114.02 | |
| }, | |
| { | |
| "step": 3360, | |
| "epoch": 0.9512350484818458, | |
| "wallclock": "2026-05-23T15:14:09.438263", | |
| "loss": 0.0761, | |
| "grad_norm": 1.1103034019470215, | |
| "learning_rate": 5.635580748808763e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 3365, | |
| "epoch": 0.952650576827801, | |
| "wallclock": "2026-05-23T15:16:05.154445", | |
| "loss": 0.0839, | |
| "grad_norm": 1.1873400211334229, | |
| "learning_rate": 5.624209752887858e-06, | |
| "step_time_sec": 115.72 | |
| }, | |
| { | |
| "step": 3370, | |
| "epoch": 0.9540661051737561, | |
| "wallclock": "2026-05-23T15:17:59.804004", | |
| "loss": 0.0777, | |
| "grad_norm": 0.7737529277801514, | |
| "learning_rate": 5.612835476501979e-06, | |
| "step_time_sec": 114.65 | |
| }, | |
| { | |
| "step": 3375, | |
| "epoch": 0.9554816335197113, | |
| "wallclock": "2026-05-23T15:19:54.892005", | |
| "loss": 0.0812, | |
| "grad_norm": 0.9554314613342285, | |
| "learning_rate": 5.601457979427369e-06, | |
| "step_time_sec": 115.09 | |
| }, | |
| { | |
| "step": 3380, | |
| "epoch": 0.9568971618656663, | |
| "wallclock": "2026-05-23T15:21:50.036983", | |
| "loss": 0.0792, | |
| "grad_norm": 1.1392946243286133, | |
| "learning_rate": 5.5900773214572016e-06, | |
| "step_time_sec": 115.14 | |
| }, | |
| { | |
| "step": 3385, | |
| "epoch": 0.9583126902116215, | |
| "wallclock": "2026-05-23T15:23:43.201325", | |
| "loss": 0.0727, | |
| "grad_norm": 1.3224341869354248, | |
| "learning_rate": 5.578693562401257e-06, | |
| "step_time_sec": 113.16 | |
| }, | |
| { | |
| "step": 3390, | |
| "epoch": 0.9597282185575766, | |
| "wallclock": "2026-05-23T15:25:36.809850", | |
| "loss": 0.0755, | |
| "grad_norm": 1.0473873615264893, | |
| "learning_rate": 5.567306762085619e-06, | |
| "step_time_sec": 113.61 | |
| }, | |
| { | |
| "step": 3395, | |
| "epoch": 0.9611437469035318, | |
| "wallclock": "2026-05-23T15:27:31.712929", | |
| "loss": 0.086, | |
| "grad_norm": 0.9381260871887207, | |
| "learning_rate": 5.555916980352349e-06, | |
| "step_time_sec": 114.9 | |
| }, | |
| { | |
| "step": 3400, | |
| "epoch": 0.9625592752494869, | |
| "wallclock": "2026-05-23T15:29:26.406120", | |
| "loss": 0.0665, | |
| "grad_norm": 1.002871036529541, | |
| "learning_rate": 5.544524277059179e-06, | |
| "step_time_sec": 114.69, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3400, | |
| "epoch": 0.9625592752494869, | |
| "wallclock": "2026-05-23T15:30:19.956625", | |
| "eval_loss": 0.09332611411809921, | |
| "eval_runtime": 53.4588, | |
| "eval_samples_per_second": 4.676, | |
| "eval_steps_per_second": 1.178, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3405, | |
| "epoch": 0.963974803595442, | |
| "wallclock": "2026-05-23T15:33:54.931063", | |
| "loss": 0.0858, | |
| "grad_norm": 1.1658086776733398, | |
| "learning_rate": 5.5331287120791954e-06, | |
| "step_time_sec": 268.52 | |
| }, | |
| { | |
| "step": 3410, | |
| "epoch": 0.9653903319413971, | |
| "wallclock": "2026-05-23T15:35:48.621597", | |
| "loss": 0.0581, | |
| "grad_norm": 0.7146378755569458, | |
| "learning_rate": 5.5217303453005225e-06, | |
| "step_time_sec": 113.69 | |
| }, | |
| { | |
| "step": 3415, | |
| "epoch": 0.9668058602873523, | |
| "wallclock": "2026-05-23T15:37:42.046818", | |
| "loss": 0.0755, | |
| "grad_norm": 1.1899656057357788, | |
| "learning_rate": 5.51032923662601e-06, | |
| "step_time_sec": 113.43 | |
| }, | |
| { | |
| "step": 3420, | |
| "epoch": 0.9682213886333074, | |
| "wallclock": "2026-05-23T15:39:35.748693", | |
| "loss": 0.0946, | |
| "grad_norm": 1.0844637155532837, | |
| "learning_rate": 5.498925445972918e-06, | |
| "step_time_sec": 113.7 | |
| }, | |
| { | |
| "step": 3425, | |
| "epoch": 0.9696369169792625, | |
| "wallclock": "2026-05-23T15:41:30.464137", | |
| "loss": 0.0793, | |
| "grad_norm": 1.2279070615768433, | |
| "learning_rate": 5.4875190332726e-06, | |
| "step_time_sec": 114.72 | |
| }, | |
| { | |
| "step": 3430, | |
| "epoch": 0.9710524453252176, | |
| "wallclock": "2026-05-23T15:43:26.510045", | |
| "loss": 0.0798, | |
| "grad_norm": 0.8382053971290588, | |
| "learning_rate": 5.476110058470192e-06, | |
| "step_time_sec": 116.05 | |
| }, | |
| { | |
| "step": 3435, | |
| "epoch": 0.9724679736711728, | |
| "wallclock": "2026-05-23T15:45:21.432258", | |
| "loss": 0.0813, | |
| "grad_norm": 0.9018872976303101, | |
| "learning_rate": 5.464698581524292e-06, | |
| "step_time_sec": 114.92 | |
| }, | |
| { | |
| "step": 3440, | |
| "epoch": 0.9738835020171279, | |
| "wallclock": "2026-05-23T15:47:17.457363", | |
| "loss": 0.0759, | |
| "grad_norm": 1.4535553455352783, | |
| "learning_rate": 5.453284662406646e-06, | |
| "step_time_sec": 116.03 | |
| }, | |
| { | |
| "step": 3445, | |
| "epoch": 0.9752990303630831, | |
| "wallclock": "2026-05-23T15:49:11.750836", | |
| "loss": 0.0817, | |
| "grad_norm": 1.2279826402664185, | |
| "learning_rate": 5.4418683611018416e-06, | |
| "step_time_sec": 114.29 | |
| }, | |
| { | |
| "step": 3450, | |
| "epoch": 0.9767145587090381, | |
| "wallclock": "2026-05-23T15:51:05.914914", | |
| "loss": 0.074, | |
| "grad_norm": 1.2694281339645386, | |
| "learning_rate": 5.430449737606978e-06, | |
| "step_time_sec": 114.16, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3455, | |
| "epoch": 0.9781300870549933, | |
| "wallclock": "2026-05-23T15:53:02.864640", | |
| "loss": 0.088, | |
| "grad_norm": 1.1676980257034302, | |
| "learning_rate": 5.4190288519313626e-06, | |
| "step_time_sec": 116.95 | |
| }, | |
| { | |
| "step": 3460, | |
| "epoch": 0.9795456154009484, | |
| "wallclock": "2026-05-23T15:54:57.888514", | |
| "loss": 0.0917, | |
| "grad_norm": 1.2617217302322388, | |
| "learning_rate": 5.407605764096193e-06, | |
| "step_time_sec": 115.02 | |
| }, | |
| { | |
| "step": 3465, | |
| "epoch": 0.9809611437469036, | |
| "wallclock": "2026-05-23T15:56:52.536839", | |
| "loss": 0.0839, | |
| "grad_norm": 2.16770339012146, | |
| "learning_rate": 5.396180534134234e-06, | |
| "step_time_sec": 114.65 | |
| }, | |
| { | |
| "step": 3470, | |
| "epoch": 0.9823766720928586, | |
| "wallclock": "2026-05-23T15:58:47.764833", | |
| "loss": 0.077, | |
| "grad_norm": 1.0116336345672607, | |
| "learning_rate": 5.384753222089515e-06, | |
| "step_time_sec": 115.23 | |
| }, | |
| { | |
| "step": 3475, | |
| "epoch": 0.9837922004388138, | |
| "wallclock": "2026-05-23T16:00:43.362477", | |
| "loss": 0.0812, | |
| "grad_norm": 1.185133457183838, | |
| "learning_rate": 5.373323888017003e-06, | |
| "step_time_sec": 115.6 | |
| }, | |
| { | |
| "step": 3480, | |
| "epoch": 0.9852077287847689, | |
| "wallclock": "2026-05-23T16:02:37.887940", | |
| "loss": 0.0719, | |
| "grad_norm": 1.3264069557189941, | |
| "learning_rate": 5.361892591982291e-06, | |
| "step_time_sec": 114.53 | |
| }, | |
| { | |
| "step": 3485, | |
| "epoch": 0.9866232571307241, | |
| "wallclock": "2026-05-23T16:04:31.882696", | |
| "loss": 0.064, | |
| "grad_norm": 0.7329959273338318, | |
| "learning_rate": 5.350459394061287e-06, | |
| "step_time_sec": 113.99 | |
| }, | |
| { | |
| "step": 3490, | |
| "epoch": 0.9880387854766791, | |
| "wallclock": "2026-05-23T16:06:25.692519", | |
| "loss": 0.0819, | |
| "grad_norm": 0.8542604446411133, | |
| "learning_rate": 5.339024354339892e-06, | |
| "step_time_sec": 113.81 | |
| }, | |
| { | |
| "step": 3495, | |
| "epoch": 0.9894543138226343, | |
| "wallclock": "2026-05-23T16:08:20.147221", | |
| "loss": 0.0867, | |
| "grad_norm": 1.266552448272705, | |
| "learning_rate": 5.327587532913685e-06, | |
| "step_time_sec": 114.45 | |
| }, | |
| { | |
| "step": 3500, | |
| "epoch": 0.9908698421685894, | |
| "wallclock": "2026-05-23T16:10:14.094051", | |
| "loss": 0.0898, | |
| "grad_norm": 1.8799265623092651, | |
| "learning_rate": 5.31614898988761e-06, | |
| "step_time_sec": 113.95, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3500, | |
| "epoch": 0.9908698421685894, | |
| "wallclock": "2026-05-23T16:11:07.029960", | |
| "eval_loss": 0.08754169940948486, | |
| "eval_runtime": 52.8398, | |
| "eval_samples_per_second": 4.731, | |
| "eval_steps_per_second": 1.192, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3505, | |
| "epoch": 0.9922853705145446, | |
| "wallclock": "2026-05-23T16:14:43.226754", | |
| "loss": 0.0769, | |
| "grad_norm": 1.471697211265564, | |
| "learning_rate": 5.3047087853756585e-06, | |
| "step_time_sec": 269.13 | |
| }, | |
| { | |
| "step": 3510, | |
| "epoch": 0.9937008988604997, | |
| "wallclock": "2026-05-23T16:16:37.120854", | |
| "loss": 0.0858, | |
| "grad_norm": 1.3194319009780884, | |
| "learning_rate": 5.2932669795005545e-06, | |
| "step_time_sec": 113.89 | |
| }, | |
| { | |
| "step": 3515, | |
| "epoch": 0.9951164272064548, | |
| "wallclock": "2026-05-23T16:18:30.428191", | |
| "loss": 0.0649, | |
| "grad_norm": 1.7350393533706665, | |
| "learning_rate": 5.281823632393436e-06, | |
| "step_time_sec": 113.31 | |
| }, | |
| { | |
| "step": 3520, | |
| "epoch": 0.9965319555524099, | |
| "wallclock": "2026-05-23T16:20:22.921865", | |
| "loss": 0.08, | |
| "grad_norm": 1.3547072410583496, | |
| "learning_rate": 5.270378804193543e-06, | |
| "step_time_sec": 112.49 | |
| }, | |
| { | |
| "step": 3525, | |
| "epoch": 0.9979474838983651, | |
| "wallclock": "2026-05-23T16:22:17.038164", | |
| "loss": 0.0836, | |
| "grad_norm": 1.2849969863891602, | |
| "learning_rate": 5.258932555047897e-06, | |
| "step_time_sec": 114.12 | |
| }, | |
| { | |
| "step": 3530, | |
| "epoch": 0.9993630122443202, | |
| "wallclock": "2026-05-23T16:24:11.663981", | |
| "loss": 0.0811, | |
| "grad_norm": 0.9789690971374512, | |
| "learning_rate": 5.247484945110988e-06, | |
| "step_time_sec": 114.63 | |
| }, | |
| { | |
| "step": 3535, | |
| "epoch": 1.0007785405902754, | |
| "wallclock": "2026-05-23T16:26:14.307733", | |
| "loss": 0.0578, | |
| "grad_norm": 0.6540358066558838, | |
| "learning_rate": 5.23603603454446e-06, | |
| "step_time_sec": 122.64 | |
| }, | |
| { | |
| "step": 3540, | |
| "epoch": 1.0021940689362305, | |
| "wallclock": "2026-05-23T16:28:05.823018", | |
| "loss": 0.0478, | |
| "grad_norm": 0.8033650517463684, | |
| "learning_rate": 5.2245858835167854e-06, | |
| "step_time_sec": 111.52 | |
| }, | |
| { | |
| "step": 3545, | |
| "epoch": 1.0036095972821857, | |
| "wallclock": "2026-05-23T16:29:56.996787", | |
| "loss": 0.0555, | |
| "grad_norm": 1.4636964797973633, | |
| "learning_rate": 5.213134552202963e-06, | |
| "step_time_sec": 111.17 | |
| }, | |
| { | |
| "step": 3550, | |
| "epoch": 1.0050251256281406, | |
| "wallclock": "2026-05-23T16:31:49.397682", | |
| "loss": 0.0424, | |
| "grad_norm": 0.8096024990081787, | |
| "learning_rate": 5.201682100784194e-06, | |
| "step_time_sec": 112.4, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3555, | |
| "epoch": 1.0064406539740958, | |
| "wallclock": "2026-05-23T16:33:40.456471", | |
| "loss": 0.0427, | |
| "grad_norm": 1.144333839416504, | |
| "learning_rate": 5.190228589447559e-06, | |
| "step_time_sec": 111.06 | |
| }, | |
| { | |
| "step": 3560, | |
| "epoch": 1.007856182320051, | |
| "wallclock": "2026-05-23T16:35:31.855205", | |
| "loss": 0.0432, | |
| "grad_norm": 1.8258119821548462, | |
| "learning_rate": 5.1787740783857164e-06, | |
| "step_time_sec": 111.4 | |
| }, | |
| { | |
| "step": 3565, | |
| "epoch": 1.009271710666006, | |
| "wallclock": "2026-05-23T16:37:24.317151", | |
| "loss": 0.0428, | |
| "grad_norm": 1.1291868686676025, | |
| "learning_rate": 5.167318627796577e-06, | |
| "step_time_sec": 112.46 | |
| }, | |
| { | |
| "step": 3570, | |
| "epoch": 1.0106872390119612, | |
| "wallclock": "2026-05-23T16:39:16.531936", | |
| "loss": 0.0518, | |
| "grad_norm": 2.1567795276641846, | |
| "learning_rate": 5.155862297882985e-06, | |
| "step_time_sec": 112.21 | |
| }, | |
| { | |
| "step": 3575, | |
| "epoch": 1.0121027673579164, | |
| "wallclock": "2026-05-23T16:41:08.942223", | |
| "loss": 0.045, | |
| "grad_norm": 1.2312395572662354, | |
| "learning_rate": 5.1444051488524115e-06, | |
| "step_time_sec": 112.41 | |
| }, | |
| { | |
| "step": 3580, | |
| "epoch": 1.0135182957038715, | |
| "wallclock": "2026-05-23T16:43:01.368164", | |
| "loss": 0.0472, | |
| "grad_norm": 1.38804030418396, | |
| "learning_rate": 5.13294724091663e-06, | |
| "step_time_sec": 112.43 | |
| }, | |
| { | |
| "step": 3585, | |
| "epoch": 1.0149338240498267, | |
| "wallclock": "2026-05-23T16:44:53.319138", | |
| "loss": 0.052, | |
| "grad_norm": 1.0492668151855469, | |
| "learning_rate": 5.1214886342914e-06, | |
| "step_time_sec": 111.95 | |
| }, | |
| { | |
| "step": 3590, | |
| "epoch": 1.0163493523957818, | |
| "wallclock": "2026-05-23T16:46:44.915175", | |
| "loss": 0.0447, | |
| "grad_norm": 0.9781032204627991, | |
| "learning_rate": 5.110029389196155e-06, | |
| "step_time_sec": 111.6 | |
| }, | |
| { | |
| "step": 3595, | |
| "epoch": 1.0177648807417368, | |
| "wallclock": "2026-05-23T16:48:37.406030", | |
| "loss": 0.0549, | |
| "grad_norm": 1.2402184009552002, | |
| "learning_rate": 5.0985695658536875e-06, | |
| "step_time_sec": 112.49 | |
| }, | |
| { | |
| "step": 3600, | |
| "epoch": 1.019180409087692, | |
| "wallclock": "2026-05-23T16:50:28.649642", | |
| "loss": 0.0433, | |
| "grad_norm": 1.0172066688537598, | |
| "learning_rate": 5.08710922448982e-06, | |
| "step_time_sec": 111.24, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3600, | |
| "epoch": 1.019180409087692, | |
| "wallclock": "2026-05-23T16:51:23.450008", | |
| "eval_loss": 0.08236898481845856, | |
| "eval_runtime": 54.7048, | |
| "eval_samples_per_second": 4.57, | |
| "eval_steps_per_second": 1.152, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3605, | |
| "epoch": 1.020595937433647, | |
| "wallclock": "2026-05-23T16:55:03.292833", | |
| "loss": 0.0511, | |
| "grad_norm": 1.146644949913025, | |
| "learning_rate": 5.0756484253331075e-06, | |
| "step_time_sec": 274.64 | |
| }, | |
| { | |
| "step": 3610, | |
| "epoch": 1.0220114657796022, | |
| "wallclock": "2026-05-23T16:56:56.732361", | |
| "loss": 0.0483, | |
| "grad_norm": 1.9536317586898804, | |
| "learning_rate": 5.0641872286145025e-06, | |
| "step_time_sec": 113.44 | |
| }, | |
| { | |
| "step": 3615, | |
| "epoch": 1.0234269941255574, | |
| "wallclock": "2026-05-23T16:58:49.705561", | |
| "loss": 0.0479, | |
| "grad_norm": 0.8863971829414368, | |
| "learning_rate": 5.052725694567052e-06, | |
| "step_time_sec": 112.97 | |
| }, | |
| { | |
| "step": 3620, | |
| "epoch": 1.0248425224715125, | |
| "wallclock": "2026-05-23T17:00:43.544919", | |
| "loss": 0.0442, | |
| "grad_norm": 0.6922377943992615, | |
| "learning_rate": 5.0412638834255755e-06, | |
| "step_time_sec": 113.84 | |
| }, | |
| { | |
| "step": 3625, | |
| "epoch": 1.0262580508174677, | |
| "wallclock": "2026-05-23T17:02:36.269444", | |
| "loss": 0.0559, | |
| "grad_norm": 1.2457826137542725, | |
| "learning_rate": 5.029801855426345e-06, | |
| "step_time_sec": 112.72 | |
| }, | |
| { | |
| "step": 3630, | |
| "epoch": 1.0276735791634228, | |
| "wallclock": "2026-05-23T17:04:30.429515", | |
| "loss": 0.0469, | |
| "grad_norm": 1.0091979503631592, | |
| "learning_rate": 5.018339670806775e-06, | |
| "step_time_sec": 114.16 | |
| }, | |
| { | |
| "step": 3635, | |
| "epoch": 1.0290891075093778, | |
| "wallclock": "2026-05-23T17:06:23.820278", | |
| "loss": 0.0491, | |
| "grad_norm": 1.115814208984375, | |
| "learning_rate": 5.006877389805106e-06, | |
| "step_time_sec": 113.39 | |
| }, | |
| { | |
| "step": 3640, | |
| "epoch": 1.030504635855333, | |
| "wallclock": "2026-05-23T17:08:16.705614", | |
| "loss": 0.0435, | |
| "grad_norm": 1.3016657829284668, | |
| "learning_rate": 4.995415072660077e-06, | |
| "step_time_sec": 112.89 | |
| }, | |
| { | |
| "step": 3645, | |
| "epoch": 1.031920164201288, | |
| "wallclock": "2026-05-23T17:10:11.282102", | |
| "loss": 0.0492, | |
| "grad_norm": 1.312011957168579, | |
| "learning_rate": 4.983952779610626e-06, | |
| "step_time_sec": 114.58 | |
| }, | |
| { | |
| "step": 3650, | |
| "epoch": 1.0333356925472432, | |
| "wallclock": "2026-05-23T17:12:06.127157", | |
| "loss": 0.0436, | |
| "grad_norm": 0.9364621639251709, | |
| "learning_rate": 4.9724905708955575e-06, | |
| "step_time_sec": 114.85, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3655, | |
| "epoch": 1.0347512208931984, | |
| "wallclock": "2026-05-23T17:14:00.741230", | |
| "loss": 0.0565, | |
| "grad_norm": 1.8892085552215576, | |
| "learning_rate": 4.9610285067532345e-06, | |
| "step_time_sec": 114.61 | |
| }, | |
| { | |
| "step": 3660, | |
| "epoch": 1.0361667492391535, | |
| "wallclock": "2026-05-23T17:15:55.594162", | |
| "loss": 0.0476, | |
| "grad_norm": 0.8621354103088379, | |
| "learning_rate": 4.949566647421264e-06, | |
| "step_time_sec": 114.85 | |
| }, | |
| { | |
| "step": 3665, | |
| "epoch": 1.0375822775851087, | |
| "wallclock": "2026-05-23T17:17:49.683934", | |
| "loss": 0.0406, | |
| "grad_norm": 0.8497494459152222, | |
| "learning_rate": 4.938105053136173e-06, | |
| "step_time_sec": 114.09 | |
| }, | |
| { | |
| "step": 3670, | |
| "epoch": 1.0389978059310638, | |
| "wallclock": "2026-05-23T17:19:44.142869", | |
| "loss": 0.0365, | |
| "grad_norm": 1.4974132776260376, | |
| "learning_rate": 4.926643784133095e-06, | |
| "step_time_sec": 114.46 | |
| }, | |
| { | |
| "step": 3675, | |
| "epoch": 1.040413334277019, | |
| "wallclock": "2026-05-23T17:21:38.802035", | |
| "loss": 0.0487, | |
| "grad_norm": 0.9692957997322083, | |
| "learning_rate": 4.915182900645454e-06, | |
| "step_time_sec": 114.66 | |
| }, | |
| { | |
| "step": 3680, | |
| "epoch": 1.0418288626229741, | |
| "wallclock": "2026-05-23T17:23:32.339493", | |
| "loss": 0.0506, | |
| "grad_norm": 1.0823785066604614, | |
| "learning_rate": 4.903722462904653e-06, | |
| "step_time_sec": 113.54 | |
| }, | |
| { | |
| "step": 3685, | |
| "epoch": 1.043244390968929, | |
| "wallclock": "2026-05-23T17:25:27.273367", | |
| "loss": 0.0385, | |
| "grad_norm": 0.6259887218475342, | |
| "learning_rate": 4.892262531139747e-06, | |
| "step_time_sec": 114.93 | |
| }, | |
| { | |
| "step": 3690, | |
| "epoch": 1.0446599193148842, | |
| "wallclock": "2026-05-23T17:27:22.317617", | |
| "loss": 0.0461, | |
| "grad_norm": 0.6526616811752319, | |
| "learning_rate": 4.880803165577132e-06, | |
| "step_time_sec": 115.04 | |
| }, | |
| { | |
| "step": 3695, | |
| "epoch": 1.0460754476608394, | |
| "wallclock": "2026-05-23T17:29:17.202916", | |
| "loss": 0.0497, | |
| "grad_norm": 1.2579582929611206, | |
| "learning_rate": 4.869344426440234e-06, | |
| "step_time_sec": 114.89 | |
| }, | |
| { | |
| "step": 3700, | |
| "epoch": 1.0474909760067945, | |
| "wallclock": "2026-05-23T17:31:10.559777", | |
| "loss": 0.0453, | |
| "grad_norm": 1.414987325668335, | |
| "learning_rate": 4.857886373949179e-06, | |
| "step_time_sec": 113.36, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3700, | |
| "epoch": 1.0474909760067945, | |
| "wallclock": "2026-05-23T17:32:02.738420", | |
| "eval_loss": 0.08643540740013123, | |
| "eval_runtime": 52.0828, | |
| "eval_samples_per_second": 4.8, | |
| "eval_steps_per_second": 1.21, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3705, | |
| "epoch": 1.0489065043527497, | |
| "wallclock": "2026-05-23T17:35:38.532817", | |
| "loss": 0.0536, | |
| "grad_norm": 1.0652250051498413, | |
| "learning_rate": 4.846429068320488e-06, | |
| "step_time_sec": 267.97 | |
| }, | |
| { | |
| "step": 3710, | |
| "epoch": 1.0503220326987048, | |
| "wallclock": "2026-05-23T17:37:30.356092", | |
| "loss": 0.0482, | |
| "grad_norm": 1.0449877977371216, | |
| "learning_rate": 4.834972569766762e-06, | |
| "step_time_sec": 111.82 | |
| }, | |
| { | |
| "step": 3715, | |
| "epoch": 1.05173756104466, | |
| "wallclock": "2026-05-23T17:39:23.408283", | |
| "loss": 0.0397, | |
| "grad_norm": 0.9513642191886902, | |
| "learning_rate": 4.823516938496352e-06, | |
| "step_time_sec": 113.05 | |
| }, | |
| { | |
| "step": 3720, | |
| "epoch": 1.0531530893906151, | |
| "wallclock": "2026-05-23T17:41:16.527974", | |
| "loss": 0.0328, | |
| "grad_norm": 1.774491548538208, | |
| "learning_rate": 4.812062234713054e-06, | |
| "step_time_sec": 113.12 | |
| }, | |
| { | |
| "step": 3725, | |
| "epoch": 1.05456861773657, | |
| "wallclock": "2026-05-23T17:43:08.761213", | |
| "loss": 0.0485, | |
| "grad_norm": 1.2329373359680176, | |
| "learning_rate": 4.800608518615793e-06, | |
| "step_time_sec": 112.23 | |
| }, | |
| { | |
| "step": 3730, | |
| "epoch": 1.0559841460825252, | |
| "wallclock": "2026-05-23T17:45:02.615351", | |
| "loss": 0.0527, | |
| "grad_norm": 1.060661792755127, | |
| "learning_rate": 4.789155850398301e-06, | |
| "step_time_sec": 113.85 | |
| }, | |
| { | |
| "step": 3735, | |
| "epoch": 1.0573996744284804, | |
| "wallclock": "2026-05-23T17:46:56.000441", | |
| "loss": 0.0642, | |
| "grad_norm": 1.075607180595398, | |
| "learning_rate": 4.777704290248799e-06, | |
| "step_time_sec": 113.39 | |
| }, | |
| { | |
| "step": 3740, | |
| "epoch": 1.0588152027744355, | |
| "wallclock": "2026-05-23T17:48:49.574582", | |
| "loss": 0.0388, | |
| "grad_norm": 0.9697294235229492, | |
| "learning_rate": 4.766253898349694e-06, | |
| "step_time_sec": 113.57 | |
| }, | |
| { | |
| "step": 3745, | |
| "epoch": 1.0602307311203907, | |
| "wallclock": "2026-05-23T17:50:41.983236", | |
| "loss": 0.0409, | |
| "grad_norm": 1.6531593799591064, | |
| "learning_rate": 4.754804734877245e-06, | |
| "step_time_sec": 112.41 | |
| }, | |
| { | |
| "step": 3750, | |
| "epoch": 1.0616462594663458, | |
| "wallclock": "2026-05-23T17:52:35.437590", | |
| "loss": 0.0355, | |
| "grad_norm": 1.1890569925308228, | |
| "learning_rate": 4.743356860001256e-06, | |
| "step_time_sec": 113.45, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3755, | |
| "epoch": 1.063061787812301, | |
| "wallclock": "2026-05-23T17:54:28.614407", | |
| "loss": 0.0418, | |
| "grad_norm": 1.71039879322052, | |
| "learning_rate": 4.731910333884766e-06, | |
| "step_time_sec": 113.18 | |
| }, | |
| { | |
| "step": 3760, | |
| "epoch": 1.0644773161582561, | |
| "wallclock": "2026-05-23T17:56:21.627594", | |
| "loss": 0.0414, | |
| "grad_norm": 2.179187774658203, | |
| "learning_rate": 4.720465216683718e-06, | |
| "step_time_sec": 113.01 | |
| }, | |
| { | |
| "step": 3765, | |
| "epoch": 1.0658928445042113, | |
| "wallclock": "2026-05-23T17:58:15.543251", | |
| "loss": 0.0499, | |
| "grad_norm": 1.6056452989578247, | |
| "learning_rate": 4.70902156854665e-06, | |
| "step_time_sec": 113.92 | |
| }, | |
| { | |
| "step": 3770, | |
| "epoch": 1.0673083728501664, | |
| "wallclock": "2026-05-23T18:00:08.857393", | |
| "loss": 0.0444, | |
| "grad_norm": 1.382399320602417, | |
| "learning_rate": 4.697579449614389e-06, | |
| "step_time_sec": 113.31 | |
| }, | |
| { | |
| "step": 3775, | |
| "epoch": 1.0687239011961214, | |
| "wallclock": "2026-05-23T18:02:01.237195", | |
| "loss": 0.0639, | |
| "grad_norm": 1.6109445095062256, | |
| "learning_rate": 4.686138920019717e-06, | |
| "step_time_sec": 112.38 | |
| }, | |
| { | |
| "step": 3780, | |
| "epoch": 1.0701394295420765, | |
| "wallclock": "2026-05-23T18:03:54.810260", | |
| "loss": 0.0491, | |
| "grad_norm": 1.0975931882858276, | |
| "learning_rate": 4.674700039887062e-06, | |
| "step_time_sec": 113.57 | |
| }, | |
| { | |
| "step": 3785, | |
| "epoch": 1.0715549578880317, | |
| "wallclock": "2026-05-23T18:05:49.257162", | |
| "loss": 0.0489, | |
| "grad_norm": 0.7262698411941528, | |
| "learning_rate": 4.6632628693321925e-06, | |
| "step_time_sec": 114.45 | |
| }, | |
| { | |
| "step": 3790, | |
| "epoch": 1.0729704862339868, | |
| "wallclock": "2026-05-23T18:07:42.422565", | |
| "loss": 0.0488, | |
| "grad_norm": 1.3172861337661743, | |
| "learning_rate": 4.651827468461885e-06, | |
| "step_time_sec": 113.17 | |
| }, | |
| { | |
| "step": 3795, | |
| "epoch": 1.074386014579942, | |
| "wallclock": "2026-05-23T18:09:36.139435", | |
| "loss": 0.0466, | |
| "grad_norm": 1.1381676197052002, | |
| "learning_rate": 4.640393897373614e-06, | |
| "step_time_sec": 113.72 | |
| }, | |
| { | |
| "step": 3800, | |
| "epoch": 1.0758015429258971, | |
| "wallclock": "2026-05-23T18:11:30.211725", | |
| "loss": 0.0403, | |
| "grad_norm": 1.1041913032531738, | |
| "learning_rate": 4.628962216155249e-06, | |
| "step_time_sec": 114.07, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3800, | |
| "epoch": 1.0758015429258971, | |
| "wallclock": "2026-05-23T18:12:22.307956", | |
| "eval_loss": 0.08441882580518723, | |
| "eval_runtime": 51.9948, | |
| "eval_samples_per_second": 4.808, | |
| "eval_steps_per_second": 1.212, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3805, | |
| "epoch": 1.0772170712718523, | |
| "wallclock": "2026-05-23T18:15:57.451185", | |
| "loss": 0.0379, | |
| "grad_norm": 1.2826017141342163, | |
| "learning_rate": 4.617532484884715e-06, | |
| "step_time_sec": 267.24 | |
| }, | |
| { | |
| "step": 3810, | |
| "epoch": 1.0786325996178074, | |
| "wallclock": "2026-05-23T18:17:50.121618", | |
| "loss": 0.0338, | |
| "grad_norm": 1.091307282447815, | |
| "learning_rate": 4.606104763629693e-06, | |
| "step_time_sec": 112.67 | |
| }, | |
| { | |
| "step": 3815, | |
| "epoch": 1.0800481279637624, | |
| "wallclock": "2026-05-23T18:19:44.910197", | |
| "loss": 0.058, | |
| "grad_norm": 1.0848028659820557, | |
| "learning_rate": 4.594679112447307e-06, | |
| "step_time_sec": 114.79 | |
| }, | |
| { | |
| "step": 3820, | |
| "epoch": 1.0814636563097175, | |
| "wallclock": "2026-05-23T18:21:40.024155", | |
| "loss": 0.049, | |
| "grad_norm": 1.1905133724212646, | |
| "learning_rate": 4.5832555913837925e-06, | |
| "step_time_sec": 115.11 | |
| }, | |
| { | |
| "step": 3825, | |
| "epoch": 1.0828791846556727, | |
| "wallclock": "2026-05-23T18:23:34.370594", | |
| "loss": 0.067, | |
| "grad_norm": 1.21793532371521, | |
| "learning_rate": 4.571834260474195e-06, | |
| "step_time_sec": 114.35 | |
| }, | |
| { | |
| "step": 3830, | |
| "epoch": 1.0842947130016278, | |
| "wallclock": "2026-05-23T18:25:28.664784", | |
| "loss": 0.0395, | |
| "grad_norm": 1.1224967241287231, | |
| "learning_rate": 4.560415179742052e-06, | |
| "step_time_sec": 114.29 | |
| }, | |
| { | |
| "step": 3835, | |
| "epoch": 1.085710241347583, | |
| "wallclock": "2026-05-23T18:27:22.873783", | |
| "loss": 0.0559, | |
| "grad_norm": 0.9353971481323242, | |
| "learning_rate": 4.5489984091990735e-06, | |
| "step_time_sec": 114.21 | |
| }, | |
| { | |
| "step": 3840, | |
| "epoch": 1.0871257696935381, | |
| "wallclock": "2026-05-23T18:29:17.206871", | |
| "loss": 0.0554, | |
| "grad_norm": 0.6831589937210083, | |
| "learning_rate": 4.537584008844823e-06, | |
| "step_time_sec": 114.33 | |
| }, | |
| { | |
| "step": 3845, | |
| "epoch": 1.0885412980394933, | |
| "wallclock": "2026-05-23T18:31:12.398612", | |
| "loss": 0.0525, | |
| "grad_norm": 1.0940909385681152, | |
| "learning_rate": 4.526172038666419e-06, | |
| "step_time_sec": 115.19 | |
| }, | |
| { | |
| "step": 3850, | |
| "epoch": 1.0899568263854484, | |
| "wallclock": "2026-05-23T18:33:06.117469", | |
| "loss": 0.0573, | |
| "grad_norm": 0.8475215435028076, | |
| "learning_rate": 4.514762558638199e-06, | |
| "step_time_sec": 113.72, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3855, | |
| "epoch": 1.0913723547314036, | |
| "wallclock": "2026-05-23T18:34:59.556884", | |
| "loss": 0.0483, | |
| "grad_norm": 1.0420924425125122, | |
| "learning_rate": 4.503355628721417e-06, | |
| "step_time_sec": 113.44 | |
| }, | |
| { | |
| "step": 3860, | |
| "epoch": 1.0927878830773585, | |
| "wallclock": "2026-05-23T18:36:53.139554", | |
| "loss": 0.0524, | |
| "grad_norm": 1.5134800672531128, | |
| "learning_rate": 4.491951308863926e-06, | |
| "step_time_sec": 113.58 | |
| }, | |
| { | |
| "step": 3865, | |
| "epoch": 1.0942034114233137, | |
| "wallclock": "2026-05-23T18:38:46.961705", | |
| "loss": 0.0327, | |
| "grad_norm": 1.369831919670105, | |
| "learning_rate": 4.480549658999862e-06, | |
| "step_time_sec": 113.82 | |
| }, | |
| { | |
| "step": 3870, | |
| "epoch": 1.0956189397692688, | |
| "wallclock": "2026-05-23T18:40:39.105763", | |
| "loss": 0.0467, | |
| "grad_norm": 1.45563542842865, | |
| "learning_rate": 4.469150739049327e-06, | |
| "step_time_sec": 112.14 | |
| }, | |
| { | |
| "step": 3875, | |
| "epoch": 1.097034468115224, | |
| "wallclock": "2026-05-23T18:42:31.937185", | |
| "loss": 0.0471, | |
| "grad_norm": 0.9477264881134033, | |
| "learning_rate": 4.45775460891808e-06, | |
| "step_time_sec": 112.83 | |
| }, | |
| { | |
| "step": 3880, | |
| "epoch": 1.0984499964611791, | |
| "wallclock": "2026-05-23T18:44:25.360640", | |
| "loss": 0.0492, | |
| "grad_norm": 0.7854604721069336, | |
| "learning_rate": 4.446361328497215e-06, | |
| "step_time_sec": 113.42 | |
| }, | |
| { | |
| "step": 3885, | |
| "epoch": 1.0998655248071343, | |
| "wallclock": "2026-05-23T18:46:17.165445", | |
| "loss": 0.0427, | |
| "grad_norm": 0.7942948341369629, | |
| "learning_rate": 4.434970957662849e-06, | |
| "step_time_sec": 111.8 | |
| }, | |
| { | |
| "step": 3890, | |
| "epoch": 1.1012810531530894, | |
| "wallclock": "2026-05-23T18:48:10.450485", | |
| "loss": 0.043, | |
| "grad_norm": 1.1920311450958252, | |
| "learning_rate": 4.423583556275814e-06, | |
| "step_time_sec": 113.29 | |
| }, | |
| { | |
| "step": 3895, | |
| "epoch": 1.1026965814990446, | |
| "wallclock": "2026-05-23T18:50:04.687540", | |
| "loss": 0.0502, | |
| "grad_norm": 1.4759620428085327, | |
| "learning_rate": 4.41219918418133e-06, | |
| "step_time_sec": 114.24 | |
| }, | |
| { | |
| "step": 3900, | |
| "epoch": 1.1041121098449997, | |
| "wallclock": "2026-05-23T18:51:57.525567", | |
| "loss": 0.0514, | |
| "grad_norm": 1.4128731489181519, | |
| "learning_rate": 4.400817901208697e-06, | |
| "step_time_sec": 112.84, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3900, | |
| "epoch": 1.1041121098449997, | |
| "wallclock": "2026-05-23T18:52:50.285022", | |
| "eval_loss": 0.08410802483558655, | |
| "eval_runtime": 52.6672, | |
| "eval_samples_per_second": 4.747, | |
| "eval_steps_per_second": 1.196, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3900, | |
| "epoch": 1.1041121098449997, | |
| "wallclock": "2026-05-23T18:54:34.984762", | |
| "train_runtime": 61042.0543, | |
| "train_samples_per_second": 3.703, | |
| "train_steps_per_second": 0.116, | |
| "total_flos": 1.0157637889163264e+16, | |
| "train_loss": 0.0625501875159068, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 3900, | |
| "epoch": 1.1041121098449997, | |
| "wallclock": "2026-05-23T18:55:51.257083", | |
| "eval_loss": 0.08236898481845856, | |
| "eval_runtime": 62.4976, | |
| "eval_samples_per_second": 4.0, | |
| "eval_steps_per_second": 1.008, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 74.24 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| } | |
| ] |