NYXMed-V17-Model / training_metrics.json
vineetdaniels's picture
Model save
c060c67 verified
Raw
History Blame Contribute Delete
157 kB
[
{
"step": 1405,
"epoch": 0.3977634652133909,
"wallclock": "2026-05-23T01:59:19.731727",
"loss": 0.1505,
"grad_norm": 0.860001266002655,
"learning_rate": 9.27049077439764e-06
},
{
"step": 1410,
"epoch": 0.39917899355934605,
"wallclock": "2026-05-23T02:01:12.843146",
"loss": 0.137,
"grad_norm": 0.8940677642822266,
"learning_rate": 9.264517869578343e-06,
"step_time_sec": 113.11
},
{
"step": 1415,
"epoch": 0.40059452190530115,
"wallclock": "2026-05-23T02:03:05.328968",
"loss": 0.1519,
"grad_norm": 0.8663320541381836,
"learning_rate": 9.258522553059383e-06,
"step_time_sec": 112.49
},
{
"step": 1420,
"epoch": 0.4020100502512563,
"wallclock": "2026-05-23T02:04:58.249080",
"loss": 0.1329,
"grad_norm": 0.8876581192016602,
"learning_rate": 9.252504856348483e-06,
"step_time_sec": 112.92
},
{
"step": 1425,
"epoch": 0.4034255785972114,
"wallclock": "2026-05-23T02:06:50.925025",
"loss": 0.1339,
"grad_norm": 0.7425838708877563,
"learning_rate": 9.246464811070978e-06,
"step_time_sec": 112.68
},
{
"step": 1430,
"epoch": 0.40484110694316655,
"wallclock": "2026-05-23T02:08:44.125444",
"loss": 0.1263,
"grad_norm": 0.8344400525093079,
"learning_rate": 9.240402448969655e-06,
"step_time_sec": 113.2
},
{
"step": 1435,
"epoch": 0.40625663528912165,
"wallclock": "2026-05-23T02:10:37.926903",
"loss": 0.1374,
"grad_norm": 0.920082688331604,
"learning_rate": 9.234317801904584e-06,
"step_time_sec": 113.8
},
{
"step": 1440,
"epoch": 0.4076721636350768,
"wallclock": "2026-05-23T02:12:30.509342",
"loss": 0.1522,
"grad_norm": 0.9682347178459167,
"learning_rate": 9.228210901852953e-06,
"step_time_sec": 112.58
},
{
"step": 1445,
"epoch": 0.4090876919810319,
"wallclock": "2026-05-23T02:14:22.744101",
"loss": 0.1435,
"grad_norm": 0.8033989667892456,
"learning_rate": 9.222081780908894e-06,
"step_time_sec": 112.23
},
{
"step": 1450,
"epoch": 0.41050322032698705,
"wallclock": "2026-05-23T02:16:16.036698",
"loss": 0.132,
"grad_norm": 1.0462369918823242,
"learning_rate": 9.215930471283323e-06,
"step_time_sec": 113.29,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 64.34
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1455,
"epoch": 0.4119187486729422,
"wallclock": "2026-05-23T02:18:08.338694",
"loss": 0.1657,
"grad_norm": 2.268519401550293,
"learning_rate": 9.209757005303761e-06,
"step_time_sec": 112.3
},
{
"step": 1460,
"epoch": 0.4133342770188973,
"wallclock": "2026-05-23T02:20:01.532146",
"loss": 0.1165,
"grad_norm": 0.7390187978744507,
"learning_rate": 9.203561415414174e-06,
"step_time_sec": 113.19
},
{
"step": 1465,
"epoch": 0.41474980536485245,
"wallclock": "2026-05-23T02:21:55.232651",
"loss": 0.12,
"grad_norm": 0.884283721446991,
"learning_rate": 9.197343734174798e-06,
"step_time_sec": 113.7
},
{
"step": 1470,
"epoch": 0.41616533371080755,
"wallclock": "2026-05-23T02:23:47.669724",
"loss": 0.1227,
"grad_norm": 0.7426964640617371,
"learning_rate": 9.191103994261963e-06,
"step_time_sec": 112.44
},
{
"step": 1475,
"epoch": 0.4175808620567627,
"wallclock": "2026-05-23T02:25:40.551477",
"loss": 0.1423,
"grad_norm": 1.1171990633010864,
"learning_rate": 9.184842228467929e-06,
"step_time_sec": 112.88
},
{
"step": 1480,
"epoch": 0.4189963904027178,
"wallclock": "2026-05-23T02:27:34.235355",
"loss": 0.1356,
"grad_norm": 1.0424611568450928,
"learning_rate": 9.178558469700712e-06,
"step_time_sec": 113.68
},
{
"step": 1485,
"epoch": 0.42041191874867295,
"wallclock": "2026-05-23T02:29:26.581237",
"loss": 0.1192,
"grad_norm": 0.7916944026947021,
"learning_rate": 9.172252750983904e-06,
"step_time_sec": 112.35
},
{
"step": 1490,
"epoch": 0.42182744709462805,
"wallclock": "2026-05-23T02:31:19.100873",
"loss": 0.1178,
"grad_norm": 0.6911448240280151,
"learning_rate": 9.165925105456513e-06,
"step_time_sec": 112.52
},
{
"step": 1495,
"epoch": 0.4232429754405832,
"wallclock": "2026-05-23T02:33:12.432128",
"loss": 0.1268,
"grad_norm": 1.207095980644226,
"learning_rate": 9.159575566372774e-06,
"step_time_sec": 113.33
},
{
"step": 1500,
"epoch": 0.4246585037865383,
"wallclock": "2026-05-23T02:35:05.236376",
"loss": 0.1249,
"grad_norm": 0.8602229952812195,
"learning_rate": 9.153204167101984e-06,
"step_time_sec": 112.8,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 65.95
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1500,
"epoch": 0.4246585037865383,
"wallclock": "2026-05-23T02:35:56.386847",
"eval_loss": 0.14635811746120453,
"eval_runtime": 51.064,
"eval_samples_per_second": 4.896,
"eval_steps_per_second": 1.234,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 65.95
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1505,
"epoch": 0.42607403213249345,
"wallclock": "2026-05-23T02:39:31.314975",
"loss": 0.132,
"grad_norm": 0.9033521413803101,
"learning_rate": 9.146810941128326e-06,
"step_time_sec": 266.08
},
{
"step": 1510,
"epoch": 0.4274895604784486,
"wallclock": "2026-05-23T02:41:24.639692",
"loss": 0.1235,
"grad_norm": 0.9021329879760742,
"learning_rate": 9.140395922050687e-06,
"step_time_sec": 113.32
},
{
"step": 1515,
"epoch": 0.4289050888244037,
"wallclock": "2026-05-23T02:43:21.636680",
"loss": 0.1443,
"grad_norm": 0.8108121752738953,
"learning_rate": 9.133959143582485e-06,
"step_time_sec": 117.0
},
{
"step": 1520,
"epoch": 0.43032061717035885,
"wallclock": "2026-05-23T02:45:14.801586",
"loss": 0.1256,
"grad_norm": 0.9193041920661926,
"learning_rate": 9.127500639551497e-06,
"step_time_sec": 113.16
},
{
"step": 1525,
"epoch": 0.43173614551631395,
"wallclock": "2026-05-23T02:47:07.650420",
"loss": 0.1356,
"grad_norm": 0.8465185761451721,
"learning_rate": 9.12102044389967e-06,
"step_time_sec": 112.85
},
{
"step": 1530,
"epoch": 0.4331516738622691,
"wallclock": "2026-05-23T02:49:00.408689",
"loss": 0.1384,
"grad_norm": 0.973936140537262,
"learning_rate": 9.114518590682955e-06,
"step_time_sec": 112.76
},
{
"step": 1535,
"epoch": 0.4345672022082242,
"wallclock": "2026-05-23T02:50:52.832459",
"loss": 0.1274,
"grad_norm": 1.2166610956192017,
"learning_rate": 9.107995114071116e-06,
"step_time_sec": 112.42
},
{
"step": 1540,
"epoch": 0.43598273055417935,
"wallclock": "2026-05-23T02:52:44.842922",
"loss": 0.122,
"grad_norm": 0.985847532749176,
"learning_rate": 9.101450048347562e-06,
"step_time_sec": 112.01
},
{
"step": 1545,
"epoch": 0.43739825890013445,
"wallclock": "2026-05-23T02:54:38.307969",
"loss": 0.1365,
"grad_norm": 0.7600606083869934,
"learning_rate": 9.094883427909156e-06,
"step_time_sec": 113.47
},
{
"step": 1550,
"epoch": 0.4388137872460896,
"wallclock": "2026-05-23T02:56:31.349254",
"loss": 0.1379,
"grad_norm": 0.7994720339775085,
"learning_rate": 9.088295287266042e-06,
"step_time_sec": 113.04,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1555,
"epoch": 0.44022931559204476,
"wallclock": "2026-05-23T02:58:24.231104",
"loss": 0.1325,
"grad_norm": 0.8235543370246887,
"learning_rate": 9.081685661041463e-06,
"step_time_sec": 112.88
},
{
"step": 1560,
"epoch": 0.44164484393799985,
"wallclock": "2026-05-23T03:00:19.009451",
"loss": 0.1112,
"grad_norm": 1.33493173122406,
"learning_rate": 9.075054583971575e-06,
"step_time_sec": 114.78
},
{
"step": 1565,
"epoch": 0.443060372283955,
"wallclock": "2026-05-23T03:02:11.720414",
"loss": 0.1274,
"grad_norm": 0.6676927804946899,
"learning_rate": 9.068402090905263e-06,
"step_time_sec": 112.71
},
{
"step": 1570,
"epoch": 0.4444759006299101,
"wallclock": "2026-05-23T03:04:04.443929",
"loss": 0.1158,
"grad_norm": 2.0362584590911865,
"learning_rate": 9.06172821680397e-06,
"step_time_sec": 112.72
},
{
"step": 1575,
"epoch": 0.44589142897586526,
"wallclock": "2026-05-23T03:05:58.111861",
"loss": 0.1459,
"grad_norm": 0.8041182160377502,
"learning_rate": 9.055032996741492e-06,
"step_time_sec": 113.67
},
{
"step": 1580,
"epoch": 0.44730695732182035,
"wallclock": "2026-05-23T03:07:51.100629",
"loss": 0.1209,
"grad_norm": 0.6887193918228149,
"learning_rate": 9.048316465903823e-06,
"step_time_sec": 112.99
},
{
"step": 1585,
"epoch": 0.4487224856677755,
"wallclock": "2026-05-23T03:09:44.719059",
"loss": 0.1472,
"grad_norm": 0.9417322278022766,
"learning_rate": 9.041578659588938e-06,
"step_time_sec": 113.62
},
{
"step": 1590,
"epoch": 0.4501380140137306,
"wallclock": "2026-05-23T03:11:39.177916",
"loss": 0.1198,
"grad_norm": 0.7076205611228943,
"learning_rate": 9.034819613206631e-06,
"step_time_sec": 114.46
},
{
"step": 1595,
"epoch": 0.45155354235968576,
"wallclock": "2026-05-23T03:13:32.601273",
"loss": 0.1576,
"grad_norm": 0.8126243948936462,
"learning_rate": 9.028039362278318e-06,
"step_time_sec": 113.42
},
{
"step": 1600,
"epoch": 0.45296907070564085,
"wallclock": "2026-05-23T03:15:25.341230",
"loss": 0.1392,
"grad_norm": 0.8675165176391602,
"learning_rate": 9.021237942436855e-06,
"step_time_sec": 112.74,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1600,
"epoch": 0.45296907070564085,
"wallclock": "2026-05-23T03:16:17.416846",
"eval_loss": 0.14519159495830536,
"eval_runtime": 51.9828,
"eval_samples_per_second": 4.809,
"eval_steps_per_second": 1.212,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1605,
"epoch": 0.454384599051596,
"wallclock": "2026-05-23T03:19:54.242069",
"loss": 0.1252,
"grad_norm": 0.7624632716178894,
"learning_rate": 9.01441538942635e-06,
"step_time_sec": 268.9
},
{
"step": 1610,
"epoch": 0.45580012739755116,
"wallclock": "2026-05-23T03:21:47.588042",
"loss": 0.1305,
"grad_norm": 0.5635123252868652,
"learning_rate": 9.007571739101968e-06,
"step_time_sec": 113.35
},
{
"step": 1615,
"epoch": 0.45721565574350626,
"wallclock": "2026-05-23T03:23:41.226600",
"loss": 0.1117,
"grad_norm": 0.7951876521110535,
"learning_rate": 9.000707027429757e-06,
"step_time_sec": 113.64
},
{
"step": 1620,
"epoch": 0.4586311840894614,
"wallclock": "2026-05-23T03:25:36.047456",
"loss": 0.1283,
"grad_norm": 1.121505618095398,
"learning_rate": 8.993821290486442e-06,
"step_time_sec": 114.82
},
{
"step": 1625,
"epoch": 0.4600467124354165,
"wallclock": "2026-05-23T03:27:30.028714",
"loss": 0.1127,
"grad_norm": 0.9441781640052795,
"learning_rate": 8.98691456445925e-06,
"step_time_sec": 113.98
},
{
"step": 1630,
"epoch": 0.46146224078137166,
"wallclock": "2026-05-23T03:29:23.551875",
"loss": 0.1246,
"grad_norm": 0.8297203779220581,
"learning_rate": 8.979986885645712e-06,
"step_time_sec": 113.52
},
{
"step": 1635,
"epoch": 0.46287776912732675,
"wallclock": "2026-05-23T03:31:17.421607",
"loss": 0.1365,
"grad_norm": 1.1671549081802368,
"learning_rate": 8.973038290453475e-06,
"step_time_sec": 113.87
},
{
"step": 1640,
"epoch": 0.4642932974732819,
"wallclock": "2026-05-23T03:33:12.304973",
"loss": 0.1158,
"grad_norm": 0.8376030325889587,
"learning_rate": 8.966068815400108e-06,
"step_time_sec": 114.88
},
{
"step": 1645,
"epoch": 0.465708825819237,
"wallclock": "2026-05-23T03:35:06.915657",
"loss": 0.1276,
"grad_norm": 0.9669609069824219,
"learning_rate": 8.95907849711291e-06,
"step_time_sec": 114.61
},
{
"step": 1650,
"epoch": 0.46712435416519216,
"wallclock": "2026-05-23T03:36:59.993882",
"loss": 0.1638,
"grad_norm": 1.0771512985229492,
"learning_rate": 8.952067372328726e-06,
"step_time_sec": 113.08,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1655,
"epoch": 0.4685398825111473,
"wallclock": "2026-05-23T03:38:54.654794",
"loss": 0.1403,
"grad_norm": 0.7746709585189819,
"learning_rate": 8.94503547789374e-06,
"step_time_sec": 114.66
},
{
"step": 1660,
"epoch": 0.4699554108571024,
"wallclock": "2026-05-23T03:40:47.102060",
"loss": 0.1352,
"grad_norm": 1.1372244358062744,
"learning_rate": 8.937982850763293e-06,
"step_time_sec": 112.45
},
{
"step": 1665,
"epoch": 0.47137093920305756,
"wallclock": "2026-05-23T03:42:40.432111",
"loss": 0.1537,
"grad_norm": 0.8946406245231628,
"learning_rate": 8.930909528001682e-06,
"step_time_sec": 113.33
},
{
"step": 1670,
"epoch": 0.47278646754901266,
"wallclock": "2026-05-23T03:44:34.489209",
"loss": 0.1252,
"grad_norm": 0.6626783013343811,
"learning_rate": 8.923815546781968e-06,
"step_time_sec": 114.06
},
{
"step": 1675,
"epoch": 0.4742019958949678,
"wallclock": "2026-05-23T03:46:29.929407",
"loss": 0.1148,
"grad_norm": 0.7032930850982666,
"learning_rate": 8.916700944385783e-06,
"step_time_sec": 115.44
},
{
"step": 1680,
"epoch": 0.4756175242409229,
"wallclock": "2026-05-23T03:48:23.844510",
"loss": 0.139,
"grad_norm": 0.9184028506278992,
"learning_rate": 8.90956575820313e-06,
"step_time_sec": 113.92
},
{
"step": 1685,
"epoch": 0.47703305258687806,
"wallclock": "2026-05-23T03:50:18.747236",
"loss": 0.1439,
"grad_norm": 0.9489091038703918,
"learning_rate": 8.902410025732182e-06,
"step_time_sec": 114.9
},
{
"step": 1690,
"epoch": 0.47844858093283316,
"wallclock": "2026-05-23T03:52:12.030744",
"loss": 0.1063,
"grad_norm": 0.8725413680076599,
"learning_rate": 8.895233784579098e-06,
"step_time_sec": 113.28
},
{
"step": 1695,
"epoch": 0.4798641092787883,
"wallclock": "2026-05-23T03:54:05.237973",
"loss": 0.1254,
"grad_norm": 0.8798477649688721,
"learning_rate": 8.888037072457817e-06,
"step_time_sec": 113.21
},
{
"step": 1700,
"epoch": 0.48127963762474346,
"wallclock": "2026-05-23T03:55:59.391912",
"loss": 0.1357,
"grad_norm": 0.8217583298683167,
"learning_rate": 8.88081992718986e-06,
"step_time_sec": 114.15,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1700,
"epoch": 0.48127963762474346,
"wallclock": "2026-05-23T03:56:51.332412",
"eval_loss": 0.14282415807247162,
"eval_runtime": 51.857,
"eval_samples_per_second": 4.821,
"eval_steps_per_second": 1.215,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1705,
"epoch": 0.48269516597069856,
"wallclock": "2026-05-23T04:00:27.956136",
"loss": 0.1428,
"grad_norm": 0.7931806445121765,
"learning_rate": 8.873582386704132e-06,
"step_time_sec": 268.56
},
{
"step": 1710,
"epoch": 0.4841106943166537,
"wallclock": "2026-05-23T04:02:22.110676",
"loss": 0.1402,
"grad_norm": 1.0113517045974731,
"learning_rate": 8.86632448903672e-06,
"step_time_sec": 114.15
},
{
"step": 1715,
"epoch": 0.4855262226626088,
"wallclock": "2026-05-23T04:04:17.103828",
"loss": 0.1213,
"grad_norm": 0.9483981132507324,
"learning_rate": 8.859046272330698e-06,
"step_time_sec": 114.99
},
{
"step": 1720,
"epoch": 0.48694175100856396,
"wallclock": "2026-05-23T04:06:09.837485",
"loss": 0.1287,
"grad_norm": 0.8060489296913147,
"learning_rate": 8.851747774835927e-06,
"step_time_sec": 112.73
},
{
"step": 1725,
"epoch": 0.48835727935451906,
"wallclock": "2026-05-23T04:08:03.048184",
"loss": 0.1348,
"grad_norm": 1.2514666318893433,
"learning_rate": 8.84442903490885e-06,
"step_time_sec": 113.21
},
{
"step": 1730,
"epoch": 0.4897728077004742,
"wallclock": "2026-05-23T04:09:56.786981",
"loss": 0.1261,
"grad_norm": 0.8523698449134827,
"learning_rate": 8.837090091012289e-06,
"step_time_sec": 113.74
},
{
"step": 1735,
"epoch": 0.4911883360464293,
"wallclock": "2026-05-23T04:11:50.314356",
"loss": 0.1365,
"grad_norm": 1.0180977582931519,
"learning_rate": 8.82973098171525e-06,
"step_time_sec": 113.53
},
{
"step": 1740,
"epoch": 0.49260386439238446,
"wallclock": "2026-05-23T04:13:43.729160",
"loss": 0.1338,
"grad_norm": 0.5706004500389099,
"learning_rate": 8.822351745692714e-06,
"step_time_sec": 113.41
},
{
"step": 1745,
"epoch": 0.49401939273833956,
"wallclock": "2026-05-23T04:15:36.906607",
"loss": 0.1225,
"grad_norm": 0.8971516489982605,
"learning_rate": 8.814952421725434e-06,
"step_time_sec": 113.18
},
{
"step": 1750,
"epoch": 0.4954349210842947,
"wallclock": "2026-05-23T04:17:31.144814",
"loss": 0.1199,
"grad_norm": 0.8799176812171936,
"learning_rate": 8.807533048699734e-06,
"step_time_sec": 114.24,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1755,
"epoch": 0.49685044943024986,
"wallclock": "2026-05-23T04:19:24.124359",
"loss": 0.1161,
"grad_norm": 0.7670193910598755,
"learning_rate": 8.800093665607307e-06,
"step_time_sec": 112.98
},
{
"step": 1760,
"epoch": 0.49826597777620496,
"wallclock": "2026-05-23T04:21:16.253579",
"loss": 0.1362,
"grad_norm": 1.0961898565292358,
"learning_rate": 8.792634311545002e-06,
"step_time_sec": 112.13
},
{
"step": 1765,
"epoch": 0.4996815061221601,
"wallclock": "2026-05-23T04:23:08.900369",
"loss": 0.1246,
"grad_norm": 0.9300926923751831,
"learning_rate": 8.785155025714626e-06,
"step_time_sec": 112.65
},
{
"step": 1770,
"epoch": 0.5010970344681153,
"wallclock": "2026-05-23T04:25:01.641415",
"loss": 0.13,
"grad_norm": 0.9323188066482544,
"learning_rate": 8.777655847422734e-06,
"step_time_sec": 112.74
},
{
"step": 1775,
"epoch": 0.5025125628140703,
"wallclock": "2026-05-23T04:26:53.919382",
"loss": 0.1228,
"grad_norm": 0.8098039627075195,
"learning_rate": 8.770136816080426e-06,
"step_time_sec": 112.28
},
{
"step": 1780,
"epoch": 0.5039280911600255,
"wallclock": "2026-05-23T04:28:47.742000",
"loss": 0.1395,
"grad_norm": 0.857759952545166,
"learning_rate": 8.76259797120313e-06,
"step_time_sec": 113.82
},
{
"step": 1785,
"epoch": 0.5053436195059806,
"wallclock": "2026-05-23T04:30:40.247364",
"loss": 0.1259,
"grad_norm": 0.847581148147583,
"learning_rate": 8.755039352410414e-06,
"step_time_sec": 112.51
},
{
"step": 1790,
"epoch": 0.5067591478519358,
"wallclock": "2026-05-23T04:32:33.726589",
"loss": 0.1352,
"grad_norm": 0.7166717052459717,
"learning_rate": 8.747460999425755e-06,
"step_time_sec": 113.48
},
{
"step": 1795,
"epoch": 0.5081746761978909,
"wallclock": "2026-05-23T04:34:27.718052",
"loss": 0.1319,
"grad_norm": 1.0256786346435547,
"learning_rate": 8.739862952076346e-06,
"step_time_sec": 113.99
},
{
"step": 1800,
"epoch": 0.509590204543846,
"wallclock": "2026-05-23T04:36:20.348096",
"loss": 0.1174,
"grad_norm": 0.7882758975028992,
"learning_rate": 8.732245250292878e-06,
"step_time_sec": 112.63,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1800,
"epoch": 0.509590204543846,
"wallclock": "2026-05-23T04:37:12.805799",
"eval_loss": 0.14175137877464294,
"eval_runtime": 52.3687,
"eval_samples_per_second": 4.774,
"eval_steps_per_second": 1.203,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1805,
"epoch": 0.5110057328898011,
"wallclock": "2026-05-23T04:40:48.723033",
"loss": 0.1259,
"grad_norm": 0.9180939793586731,
"learning_rate": 8.72460793410934e-06,
"step_time_sec": 268.37
},
{
"step": 1810,
"epoch": 0.5124212612357563,
"wallclock": "2026-05-23T04:42:42.010667",
"loss": 0.1238,
"grad_norm": 0.8965495228767395,
"learning_rate": 8.716951043662796e-06,
"step_time_sec": 113.29
},
{
"step": 1815,
"epoch": 0.5138367895817114,
"wallclock": "2026-05-23T04:44:35.309189",
"loss": 0.1064,
"grad_norm": 0.9334513545036316,
"learning_rate": 8.709274619193182e-06,
"step_time_sec": 113.3
},
{
"step": 1820,
"epoch": 0.5152523179276665,
"wallclock": "2026-05-23T04:46:29.001032",
"loss": 0.1171,
"grad_norm": 0.7548913955688477,
"learning_rate": 8.701578701043097e-06,
"step_time_sec": 113.69
},
{
"step": 1825,
"epoch": 0.5166678462736216,
"wallclock": "2026-05-23T04:48:22.554066",
"loss": 0.1248,
"grad_norm": 1.022698998451233,
"learning_rate": 8.693863329657576e-06,
"step_time_sec": 113.55
},
{
"step": 1830,
"epoch": 0.5180833746195768,
"wallclock": "2026-05-23T04:50:15.543925",
"loss": 0.1423,
"grad_norm": 1.0240012407302856,
"learning_rate": 8.686128545583906e-06,
"step_time_sec": 112.99
},
{
"step": 1835,
"epoch": 0.5194989029655319,
"wallclock": "2026-05-23T04:52:08.302700",
"loss": 0.1373,
"grad_norm": 1.0934542417526245,
"learning_rate": 8.678374389471375e-06,
"step_time_sec": 112.76
},
{
"step": 1840,
"epoch": 0.520914431311487,
"wallclock": "2026-05-23T04:54:00.947870",
"loss": 0.1463,
"grad_norm": 1.0597333908081055,
"learning_rate": 8.670600902071096e-06,
"step_time_sec": 112.65
},
{
"step": 1845,
"epoch": 0.5223299596574421,
"wallclock": "2026-05-23T04:55:54.818374",
"loss": 0.1206,
"grad_norm": 0.7178345918655396,
"learning_rate": 8.662808124235765e-06,
"step_time_sec": 113.87
},
{
"step": 1850,
"epoch": 0.5237454880033973,
"wallclock": "2026-05-23T04:57:48.527340",
"loss": 0.1075,
"grad_norm": 1.275473952293396,
"learning_rate": 8.65499609691946e-06,
"step_time_sec": 113.71,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1855,
"epoch": 0.5251610163493524,
"wallclock": "2026-05-23T04:59:41.867913",
"loss": 0.1023,
"grad_norm": 0.5519967675209045,
"learning_rate": 8.647164861177422e-06,
"step_time_sec": 113.34
},
{
"step": 1860,
"epoch": 0.5265765446953076,
"wallclock": "2026-05-23T05:01:36.546653",
"loss": 0.1367,
"grad_norm": 0.9184526205062866,
"learning_rate": 8.639314458165839e-06,
"step_time_sec": 114.68
},
{
"step": 1865,
"epoch": 0.5279920730412626,
"wallclock": "2026-05-23T05:03:30.220915",
"loss": 0.1332,
"grad_norm": 0.938758373260498,
"learning_rate": 8.631444929141635e-06,
"step_time_sec": 113.67
},
{
"step": 1870,
"epoch": 0.5294076013872178,
"wallclock": "2026-05-23T05:05:24.720616",
"loss": 0.107,
"grad_norm": 0.8511345982551575,
"learning_rate": 8.62355631546224e-06,
"step_time_sec": 114.5
},
{
"step": 1875,
"epoch": 0.5308231297331729,
"wallclock": "2026-05-23T05:07:19.388697",
"loss": 0.1276,
"grad_norm": 1.1140179634094238,
"learning_rate": 8.615648658585392e-06,
"step_time_sec": 114.67
},
{
"step": 1880,
"epoch": 0.5322386580791281,
"wallclock": "2026-05-23T05:09:13.975351",
"loss": 0.117,
"grad_norm": 0.6539268493652344,
"learning_rate": 8.607722000068898e-06,
"step_time_sec": 114.59
},
{
"step": 1885,
"epoch": 0.5336541864250831,
"wallclock": "2026-05-23T05:11:08.325687",
"loss": 0.1193,
"grad_norm": 0.8391310572624207,
"learning_rate": 8.599776381570433e-06,
"step_time_sec": 114.35
},
{
"step": 1890,
"epoch": 0.5350697147710383,
"wallclock": "2026-05-23T05:13:02.941530",
"loss": 0.1264,
"grad_norm": 0.844965398311615,
"learning_rate": 8.59181184484731e-06,
"step_time_sec": 114.62
},
{
"step": 1895,
"epoch": 0.5364852431169934,
"wallclock": "2026-05-23T05:14:56.481372",
"loss": 0.1396,
"grad_norm": 0.7179044485092163,
"learning_rate": 8.583828431756272e-06,
"step_time_sec": 113.54
},
{
"step": 1900,
"epoch": 0.5379007714629486,
"wallclock": "2026-05-23T05:16:51.210427",
"loss": 0.0974,
"grad_norm": 0.8166824579238892,
"learning_rate": 8.575826184253254e-06,
"step_time_sec": 114.73,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1900,
"epoch": 0.5379007714629486,
"wallclock": "2026-05-23T05:17:43.683064",
"eval_loss": 0.14031976461410522,
"eval_runtime": 52.3833,
"eval_samples_per_second": 4.773,
"eval_steps_per_second": 1.203,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1905,
"epoch": 0.5393162998089037,
"wallclock": "2026-05-23T05:21:18.913048",
"loss": 0.1219,
"grad_norm": 1.006734013557434,
"learning_rate": 8.567805144393176e-06,
"step_time_sec": 267.7
},
{
"step": 1910,
"epoch": 0.5407318281548588,
"wallclock": "2026-05-23T05:23:13.218448",
"loss": 0.1284,
"grad_norm": 0.8619974255561829,
"learning_rate": 8.559765354329728e-06,
"step_time_sec": 114.31
},
{
"step": 1915,
"epoch": 0.5421473565008139,
"wallclock": "2026-05-23T05:25:08.140980",
"loss": 0.1184,
"grad_norm": 1.2139092683792114,
"learning_rate": 8.55170685631513e-06,
"step_time_sec": 114.92
},
{
"step": 1920,
"epoch": 0.5435628848467691,
"wallclock": "2026-05-23T05:27:03.707486",
"loss": 0.1129,
"grad_norm": 0.9047484397888184,
"learning_rate": 8.54362969269992e-06,
"step_time_sec": 115.57
},
{
"step": 1925,
"epoch": 0.5449784131927242,
"wallclock": "2026-05-23T05:28:57.612333",
"loss": 0.1163,
"grad_norm": 0.6891061663627625,
"learning_rate": 8.535533905932739e-06,
"step_time_sec": 113.9
},
{
"step": 1930,
"epoch": 0.5463939415386793,
"wallclock": "2026-05-23T05:30:52.594285",
"loss": 0.1164,
"grad_norm": 0.6650737524032593,
"learning_rate": 8.527419538560088e-06,
"step_time_sec": 114.98
},
{
"step": 1935,
"epoch": 0.5478094698846344,
"wallclock": "2026-05-23T05:32:48.432100",
"loss": 0.1187,
"grad_norm": 1.1412484645843506,
"learning_rate": 8.51928663322613e-06,
"step_time_sec": 115.84
},
{
"step": 1940,
"epoch": 0.5492249982305896,
"wallclock": "2026-05-23T05:34:43.177149",
"loss": 0.1342,
"grad_norm": 0.7133747339248657,
"learning_rate": 8.511135232672442e-06,
"step_time_sec": 114.75
},
{
"step": 1945,
"epoch": 0.5506405265765447,
"wallclock": "2026-05-23T05:36:42.534792",
"loss": 0.1132,
"grad_norm": 1.0151540040969849,
"learning_rate": 8.502965379737802e-06,
"step_time_sec": 119.36
},
{
"step": 1950,
"epoch": 0.5520560549224999,
"wallclock": "2026-05-23T05:38:46.820577",
"loss": 0.1273,
"grad_norm": 1.6805675029754639,
"learning_rate": 8.494777117357964e-06,
"step_time_sec": 124.29,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 1955,
"epoch": 0.5534715832684549,
"wallclock": "2026-05-23T05:40:50.018680",
"loss": 0.1142,
"grad_norm": 0.9018206596374512,
"learning_rate": 8.486570488565432e-06,
"step_time_sec": 123.2
},
{
"step": 1960,
"epoch": 0.5548871116144101,
"wallclock": "2026-05-23T05:42:53.671070",
"loss": 0.1258,
"grad_norm": 0.7533476948738098,
"learning_rate": 8.478345536489232e-06,
"step_time_sec": 123.65
},
{
"step": 1965,
"epoch": 0.5563026399603652,
"wallclock": "2026-05-23T05:44:56.957810",
"loss": 0.1218,
"grad_norm": 1.134895920753479,
"learning_rate": 8.470102304354685e-06,
"step_time_sec": 123.29
},
{
"step": 1970,
"epoch": 0.5577181683063204,
"wallclock": "2026-05-23T05:47:01.054040",
"loss": 0.1344,
"grad_norm": 0.9846596717834473,
"learning_rate": 8.461840835483179e-06,
"step_time_sec": 124.1
},
{
"step": 1975,
"epoch": 0.5591336966522754,
"wallclock": "2026-05-23T05:49:04.326418",
"loss": 0.1272,
"grad_norm": 0.8339362144470215,
"learning_rate": 8.45356117329195e-06,
"step_time_sec": 123.27
},
{
"step": 1980,
"epoch": 0.5605492249982306,
"wallclock": "2026-05-23T05:51:07.881648",
"loss": 0.1041,
"grad_norm": 1.041932463645935,
"learning_rate": 8.445263361293839e-06,
"step_time_sec": 123.56
},
{
"step": 1985,
"epoch": 0.5619647533441857,
"wallclock": "2026-05-23T05:53:11.738690",
"loss": 0.1492,
"grad_norm": 0.9378158450126648,
"learning_rate": 8.436947443097074e-06,
"step_time_sec": 123.86
},
{
"step": 1990,
"epoch": 0.5633802816901409,
"wallclock": "2026-05-23T05:55:16.469073",
"loss": 0.1055,
"grad_norm": 1.0052165985107422,
"learning_rate": 8.428613462405042e-06,
"step_time_sec": 124.73
},
{
"step": 1995,
"epoch": 0.564795810036096,
"wallclock": "2026-05-23T05:57:21.072731",
"loss": 0.1157,
"grad_norm": 0.9656962752342224,
"learning_rate": 8.42026146301605e-06,
"step_time_sec": 124.6
},
{
"step": 2000,
"epoch": 0.5662113383820511,
"wallclock": "2026-05-23T05:59:27.133239",
"loss": 0.1099,
"grad_norm": 0.6400126814842224,
"learning_rate": 8.411891488823102e-06,
"step_time_sec": 126.06,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2000,
"epoch": 0.5662113383820511,
"wallclock": "2026-05-23T06:00:26.330552",
"eval_loss": 0.13213595747947693,
"eval_runtime": 59.102,
"eval_samples_per_second": 4.23,
"eval_steps_per_second": 1.066,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2005,
"epoch": 0.5676268667280062,
"wallclock": "2026-05-23T06:04:02.423533",
"loss": 0.1243,
"grad_norm": 1.0383392572402954,
"learning_rate": 8.40350358381367e-06,
"step_time_sec": 275.29
},
{
"step": 2010,
"epoch": 0.5690423950739614,
"wallclock": "2026-05-23T06:05:55.119665",
"loss": 0.1192,
"grad_norm": 1.1544498205184937,
"learning_rate": 8.39509779206945e-06,
"step_time_sec": 112.7
},
{
"step": 2015,
"epoch": 0.5704579234199165,
"wallclock": "2026-05-23T06:07:49.815988",
"loss": 0.125,
"grad_norm": 1.1813828945159912,
"learning_rate": 8.386674157766156e-06,
"step_time_sec": 114.7
},
{
"step": 2020,
"epoch": 0.5718734517658716,
"wallclock": "2026-05-23T06:09:44.079892",
"loss": 0.0941,
"grad_norm": 0.582125723361969,
"learning_rate": 8.378232725173253e-06,
"step_time_sec": 114.26
},
{
"step": 2025,
"epoch": 0.5732889801118267,
"wallclock": "2026-05-23T06:11:37.953666",
"loss": 0.1276,
"grad_norm": 0.8630328178405762,
"learning_rate": 8.369773538653756e-06,
"step_time_sec": 113.87
},
{
"step": 2030,
"epoch": 0.5747045084577819,
"wallclock": "2026-05-23T06:13:33.538279",
"loss": 0.1139,
"grad_norm": 0.7153676748275757,
"learning_rate": 8.361296642663977e-06,
"step_time_sec": 115.58
},
{
"step": 2035,
"epoch": 0.576120036803737,
"wallclock": "2026-05-23T06:15:28.201077",
"loss": 0.1186,
"grad_norm": 1.0687501430511475,
"learning_rate": 8.352802081753304e-06,
"step_time_sec": 114.66
},
{
"step": 2040,
"epoch": 0.5775355651496922,
"wallclock": "2026-05-23T06:17:21.826972",
"loss": 0.0957,
"grad_norm": 0.7276541590690613,
"learning_rate": 8.344289900563955e-06,
"step_time_sec": 113.63
},
{
"step": 2045,
"epoch": 0.5789510934956472,
"wallclock": "2026-05-23T06:19:15.755614",
"loss": 0.1418,
"grad_norm": 1.2831865549087524,
"learning_rate": 8.335760143830753e-06,
"step_time_sec": 113.93
},
{
"step": 2050,
"epoch": 0.5803666218416024,
"wallclock": "2026-05-23T06:21:10.146824",
"loss": 0.0902,
"grad_norm": 0.8044394850730896,
"learning_rate": 8.327212856380886e-06,
"step_time_sec": 114.39,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2055,
"epoch": 0.5817821501875575,
"wallclock": "2026-05-23T06:23:03.760022",
"loss": 0.1254,
"grad_norm": 0.8785029053688049,
"learning_rate": 8.318648083133675e-06,
"step_time_sec": 113.61
},
{
"step": 2060,
"epoch": 0.5831976785335127,
"wallclock": "2026-05-23T06:24:58.159811",
"loss": 0.1295,
"grad_norm": 0.8821666240692139,
"learning_rate": 8.310065869100332e-06,
"step_time_sec": 114.4
},
{
"step": 2065,
"epoch": 0.5846132068794677,
"wallclock": "2026-05-23T06:26:51.514557",
"loss": 0.1296,
"grad_norm": 1.0319464206695557,
"learning_rate": 8.301466259383729e-06,
"step_time_sec": 113.35
},
{
"step": 2070,
"epoch": 0.5860287352254229,
"wallclock": "2026-05-23T06:28:45.591485",
"loss": 0.1134,
"grad_norm": 0.7893862724304199,
"learning_rate": 8.292849299178158e-06,
"step_time_sec": 114.08
},
{
"step": 2075,
"epoch": 0.587444263571378,
"wallclock": "2026-05-23T06:30:40.034770",
"loss": 0.1123,
"grad_norm": 0.8960036635398865,
"learning_rate": 8.284215033769098e-06,
"step_time_sec": 114.44
},
{
"step": 2080,
"epoch": 0.5888597919173332,
"wallclock": "2026-05-23T06:32:33.013250",
"loss": 0.1019,
"grad_norm": 0.7732668519020081,
"learning_rate": 8.275563508532972e-06,
"step_time_sec": 112.98
},
{
"step": 2085,
"epoch": 0.5902753202632883,
"wallclock": "2026-05-23T06:34:26.451713",
"loss": 0.1159,
"grad_norm": 1.014701008796692,
"learning_rate": 8.266894768936907e-06,
"step_time_sec": 113.44
},
{
"step": 2090,
"epoch": 0.5916908486092434,
"wallclock": "2026-05-23T06:36:20.092613",
"loss": 0.117,
"grad_norm": 1.0048466920852661,
"learning_rate": 8.258208860538498e-06,
"step_time_sec": 113.64
},
{
"step": 2095,
"epoch": 0.5931063769551985,
"wallclock": "2026-05-23T06:38:13.619925",
"loss": 0.1295,
"grad_norm": 1.0775166749954224,
"learning_rate": 8.249505828985575e-06,
"step_time_sec": 113.53
},
{
"step": 2100,
"epoch": 0.5945219053011537,
"wallclock": "2026-05-23T06:40:07.681597",
"loss": 0.1198,
"grad_norm": 1.339026689529419,
"learning_rate": 8.240785720015954e-06,
"step_time_sec": 114.06,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2100,
"epoch": 0.5945219053011537,
"wallclock": "2026-05-23T06:40:59.897976",
"eval_loss": 0.1282491832971573,
"eval_runtime": 52.1233,
"eval_samples_per_second": 4.796,
"eval_steps_per_second": 1.209,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2105,
"epoch": 0.5959374336471088,
"wallclock": "2026-05-23T06:44:35.507980",
"loss": 0.0943,
"grad_norm": 0.7660958766937256,
"learning_rate": 8.232048579457194e-06,
"step_time_sec": 267.83
},
{
"step": 2110,
"epoch": 0.5973529619930639,
"wallclock": "2026-05-23T06:46:29.777766",
"loss": 0.11,
"grad_norm": 0.9617125391960144,
"learning_rate": 8.22329445322637e-06,
"step_time_sec": 114.27
},
{
"step": 2115,
"epoch": 0.598768490339019,
"wallclock": "2026-05-23T06:48:22.536086",
"loss": 0.1132,
"grad_norm": 1.1251046657562256,
"learning_rate": 8.214523387329815e-06,
"step_time_sec": 112.76
},
{
"step": 2120,
"epoch": 0.6001840186849742,
"wallclock": "2026-05-23T06:50:15.285691",
"loss": 0.1012,
"grad_norm": 0.8359034657478333,
"learning_rate": 8.205735427862897e-06,
"step_time_sec": 112.75
},
{
"step": 2125,
"epoch": 0.6015995470309293,
"wallclock": "2026-05-23T06:52:10.239923",
"loss": 0.0948,
"grad_norm": 0.8290632963180542,
"learning_rate": 8.196930621009756e-06,
"step_time_sec": 114.95
},
{
"step": 2130,
"epoch": 0.6030150753768844,
"wallclock": "2026-05-23T06:54:03.899054",
"loss": 0.1103,
"grad_norm": 0.707132875919342,
"learning_rate": 8.188109013043076e-06,
"step_time_sec": 113.66
},
{
"step": 2135,
"epoch": 0.6044306037228395,
"wallclock": "2026-05-23T06:55:56.905229",
"loss": 0.111,
"grad_norm": 0.940647542476654,
"learning_rate": 8.179270650323839e-06,
"step_time_sec": 113.01
},
{
"step": 2140,
"epoch": 0.6058461320687947,
"wallclock": "2026-05-23T06:57:51.331282",
"loss": 0.1101,
"grad_norm": 0.7413908243179321,
"learning_rate": 8.170415579301076e-06,
"step_time_sec": 114.43
},
{
"step": 2145,
"epoch": 0.6072616604147498,
"wallclock": "2026-05-23T06:59:44.905917",
"loss": 0.1021,
"grad_norm": 1.1988078355789185,
"learning_rate": 8.161543846511628e-06,
"step_time_sec": 113.57
},
{
"step": 2150,
"epoch": 0.608677188760705,
"wallclock": "2026-05-23T07:01:39.153468",
"loss": 0.1143,
"grad_norm": 1.0968750715255737,
"learning_rate": 8.152655498579903e-06,
"step_time_sec": 114.25,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2155,
"epoch": 0.61009271710666,
"wallclock": "2026-05-23T07:03:33.733115",
"loss": 0.1268,
"grad_norm": 0.8552664518356323,
"learning_rate": 8.143750582217625e-06,
"step_time_sec": 114.58
},
{
"step": 2160,
"epoch": 0.6115082454526152,
"wallclock": "2026-05-23T07:05:27.710732",
"loss": 0.1103,
"grad_norm": 0.7791701555252075,
"learning_rate": 8.13482914422359e-06,
"step_time_sec": 113.98
},
{
"step": 2165,
"epoch": 0.6129237737985703,
"wallclock": "2026-05-23T07:07:22.028971",
"loss": 0.1155,
"grad_norm": 0.7360658645629883,
"learning_rate": 8.125891231483425e-06,
"step_time_sec": 114.32
},
{
"step": 2170,
"epoch": 0.6143393021445255,
"wallclock": "2026-05-23T07:09:16.562706",
"loss": 0.1132,
"grad_norm": 1.0679337978363037,
"learning_rate": 8.11693689096934e-06,
"step_time_sec": 114.53
},
{
"step": 2175,
"epoch": 0.6157548304904805,
"wallclock": "2026-05-23T07:11:10.858404",
"loss": 0.129,
"grad_norm": 0.9493758082389832,
"learning_rate": 8.107966169739871e-06,
"step_time_sec": 114.3
},
{
"step": 2180,
"epoch": 0.6171703588364357,
"wallclock": "2026-05-23T07:13:03.638564",
"loss": 0.1302,
"grad_norm": 0.9018224477767944,
"learning_rate": 8.09897911493965e-06,
"step_time_sec": 112.78
},
{
"step": 2185,
"epoch": 0.6185858871823908,
"wallclock": "2026-05-23T07:14:57.306827",
"loss": 0.1218,
"grad_norm": 0.8794463276863098,
"learning_rate": 8.089975773799143e-06,
"step_time_sec": 113.67
},
{
"step": 2190,
"epoch": 0.620001415528346,
"wallclock": "2026-05-23T07:16:51.323807",
"loss": 0.11,
"grad_norm": 0.8043993711471558,
"learning_rate": 8.080956193634409e-06,
"step_time_sec": 114.02
},
{
"step": 2195,
"epoch": 0.6214169438743011,
"wallclock": "2026-05-23T07:18:45.611509",
"loss": 0.0976,
"grad_norm": 1.1800931692123413,
"learning_rate": 8.07192042184685e-06,
"step_time_sec": 114.29
},
{
"step": 2200,
"epoch": 0.6228324722202562,
"wallclock": "2026-05-23T07:20:38.621541",
"loss": 0.1349,
"grad_norm": 1.5049303770065308,
"learning_rate": 8.062868505922958e-06,
"step_time_sec": 113.01,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2200,
"epoch": 0.6228324722202562,
"wallclock": "2026-05-23T07:21:30.438443",
"eval_loss": 0.12787169218063354,
"eval_runtime": 51.72,
"eval_samples_per_second": 4.834,
"eval_steps_per_second": 1.218,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2205,
"epoch": 0.6242480005662113,
"wallclock": "2026-05-23T07:25:03.848081",
"loss": 0.1167,
"grad_norm": 0.8288029432296753,
"learning_rate": 8.053800493434072e-06,
"step_time_sec": 265.23
},
{
"step": 2210,
"epoch": 0.6256635289121665,
"wallclock": "2026-05-23T07:26:56.740627",
"loss": 0.1079,
"grad_norm": 0.8604945540428162,
"learning_rate": 8.044716432036126e-06,
"step_time_sec": 112.89
},
{
"step": 2215,
"epoch": 0.6270790572581216,
"wallclock": "2026-05-23T07:28:54.914996",
"loss": 0.1164,
"grad_norm": 0.997947633266449,
"learning_rate": 8.035616369469392e-06,
"step_time_sec": 118.17
},
{
"step": 2220,
"epoch": 0.6284945856040767,
"wallclock": "2026-05-23T07:30:49.126331",
"loss": 0.102,
"grad_norm": 0.8771962523460388,
"learning_rate": 8.02650035355824e-06,
"step_time_sec": 114.21
},
{
"step": 2225,
"epoch": 0.6299101139500318,
"wallclock": "2026-05-23T07:32:43.224804",
"loss": 0.1177,
"grad_norm": 0.909534752368927,
"learning_rate": 8.017368432210875e-06,
"step_time_sec": 114.1
},
{
"step": 2230,
"epoch": 0.631325642295987,
"wallclock": "2026-05-23T07:34:37.246776",
"loss": 0.1316,
"grad_norm": 1.185617446899414,
"learning_rate": 8.008220653419097e-06,
"step_time_sec": 114.02
},
{
"step": 2235,
"epoch": 0.6327411706419421,
"wallclock": "2026-05-23T07:36:31.707708",
"loss": 0.0931,
"grad_norm": 0.9247961044311523,
"learning_rate": 7.99905706525804e-06,
"step_time_sec": 114.46
},
{
"step": 2240,
"epoch": 0.6341566989878973,
"wallclock": "2026-05-23T07:38:24.836647",
"loss": 0.0937,
"grad_norm": 0.9448702931404114,
"learning_rate": 7.989877715885925e-06,
"step_time_sec": 113.13
},
{
"step": 2245,
"epoch": 0.6355722273338523,
"wallclock": "2026-05-23T07:40:18.101149",
"loss": 0.1124,
"grad_norm": 0.9247167110443115,
"learning_rate": 7.980682653543799e-06,
"step_time_sec": 113.26
},
{
"step": 2250,
"epoch": 0.6369877556798075,
"wallclock": "2026-05-23T07:42:13.210519",
"loss": 0.1081,
"grad_norm": 1.228428602218628,
"learning_rate": 7.97147192655529e-06,
"step_time_sec": 115.11,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2255,
"epoch": 0.6384032840257626,
"wallclock": "2026-05-23T07:44:07.337158",
"loss": 0.1082,
"grad_norm": 0.8680895566940308,
"learning_rate": 7.962245583326354e-06,
"step_time_sec": 114.13
},
{
"step": 2260,
"epoch": 0.6398188123717178,
"wallclock": "2026-05-23T07:46:01.112153",
"loss": 0.1073,
"grad_norm": 0.7317308783531189,
"learning_rate": 7.953003672345009e-06,
"step_time_sec": 113.77
},
{
"step": 2265,
"epoch": 0.6412343407176728,
"wallclock": "2026-05-23T07:47:55.256008",
"loss": 0.1213,
"grad_norm": 0.9891361594200134,
"learning_rate": 7.943746242181091e-06,
"step_time_sec": 114.14
},
{
"step": 2270,
"epoch": 0.642649869063628,
"wallclock": "2026-05-23T07:49:48.031699",
"loss": 0.1084,
"grad_norm": 0.8852012753486633,
"learning_rate": 7.934473341485998e-06,
"step_time_sec": 112.78
},
{
"step": 2275,
"epoch": 0.6440653974095831,
"wallclock": "2026-05-23T07:51:41.760762",
"loss": 0.1015,
"grad_norm": 0.6731085181236267,
"learning_rate": 7.925185018992426e-06,
"step_time_sec": 113.73
},
{
"step": 2280,
"epoch": 0.6454809257555383,
"wallclock": "2026-05-23T07:53:37.755943",
"loss": 0.0927,
"grad_norm": 0.8080906271934509,
"learning_rate": 7.91588132351412e-06,
"step_time_sec": 116.0
},
{
"step": 2285,
"epoch": 0.6468964541014934,
"wallclock": "2026-05-23T07:55:31.246122",
"loss": 0.117,
"grad_norm": 0.9637818336486816,
"learning_rate": 7.906562303945622e-06,
"step_time_sec": 113.49
},
{
"step": 2290,
"epoch": 0.6483119824474485,
"wallclock": "2026-05-23T07:57:25.355025",
"loss": 0.1148,
"grad_norm": 0.8999826908111572,
"learning_rate": 7.897228009262003e-06,
"step_time_sec": 114.11
},
{
"step": 2295,
"epoch": 0.6497275107934036,
"wallclock": "2026-05-23T07:59:20.568291",
"loss": 0.1202,
"grad_norm": 0.655300498008728,
"learning_rate": 7.887878488518608e-06,
"step_time_sec": 115.21
},
{
"step": 2300,
"epoch": 0.6511430391393588,
"wallclock": "2026-05-23T08:01:15.440455",
"loss": 0.1164,
"grad_norm": 1.327991247177124,
"learning_rate": 7.878513790850805e-06,
"step_time_sec": 114.87,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2300,
"epoch": 0.6511430391393588,
"wallclock": "2026-05-23T08:02:07.666082",
"eval_loss": 0.12934190034866333,
"eval_runtime": 52.1298,
"eval_samples_per_second": 4.796,
"eval_steps_per_second": 1.209,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2305,
"epoch": 0.6525585674853139,
"wallclock": "2026-05-23T08:05:41.525707",
"loss": 0.0931,
"grad_norm": 1.3085259199142456,
"learning_rate": 7.869133965473723e-06,
"step_time_sec": 266.09
},
{
"step": 2310,
"epoch": 0.653974095831269,
"wallclock": "2026-05-23T08:07:39.677572",
"loss": 0.1252,
"grad_norm": 0.9861677289009094,
"learning_rate": 7.859739061681992e-06,
"step_time_sec": 118.15
},
{
"step": 2315,
"epoch": 0.6553896241772241,
"wallclock": "2026-05-23T08:09:33.975162",
"loss": 0.1131,
"grad_norm": 0.685297966003418,
"learning_rate": 7.850329128849482e-06,
"step_time_sec": 114.3
},
{
"step": 2320,
"epoch": 0.6568051525231793,
"wallclock": "2026-05-23T08:11:28.536426",
"loss": 0.1087,
"grad_norm": 0.8919675946235657,
"learning_rate": 7.840904216429053e-06,
"step_time_sec": 114.56
},
{
"step": 2325,
"epoch": 0.6582206808691344,
"wallclock": "2026-05-23T08:13:23.250581",
"loss": 0.1037,
"grad_norm": 0.9594758152961731,
"learning_rate": 7.83146437395228e-06,
"step_time_sec": 114.71
},
{
"step": 2330,
"epoch": 0.6596362092150896,
"wallclock": "2026-05-23T08:15:17.659280",
"loss": 0.1021,
"grad_norm": 0.79726243019104,
"learning_rate": 7.82200965102921e-06,
"step_time_sec": 114.41
},
{
"step": 2335,
"epoch": 0.6610517375610446,
"wallclock": "2026-05-23T08:17:10.906487",
"loss": 0.1267,
"grad_norm": 1.4677671194076538,
"learning_rate": 7.812540097348085e-06,
"step_time_sec": 113.25
},
{
"step": 2340,
"epoch": 0.6624672659069998,
"wallclock": "2026-05-23T08:19:05.623865",
"loss": 0.1022,
"grad_norm": 0.8115029335021973,
"learning_rate": 7.803055762675096e-06,
"step_time_sec": 114.72
},
{
"step": 2345,
"epoch": 0.6638827942529549,
"wallclock": "2026-05-23T08:21:00.057684",
"loss": 0.097,
"grad_norm": 0.7353535890579224,
"learning_rate": 7.793556696854105e-06,
"step_time_sec": 114.43
},
{
"step": 2350,
"epoch": 0.6652983225989101,
"wallclock": "2026-05-23T08:22:52.623668",
"loss": 0.1056,
"grad_norm": 0.9155029058456421,
"learning_rate": 7.784042949806401e-06,
"step_time_sec": 112.57,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2355,
"epoch": 0.6667138509448651,
"wallclock": "2026-05-23T08:24:46.327258",
"loss": 0.119,
"grad_norm": 1.1471012830734253,
"learning_rate": 7.77451457153042e-06,
"step_time_sec": 113.7
},
{
"step": 2360,
"epoch": 0.6681293792908203,
"wallclock": "2026-05-23T08:26:40.729225",
"loss": 0.1122,
"grad_norm": 1.1479600667953491,
"learning_rate": 7.764971612101497e-06,
"step_time_sec": 114.4
},
{
"step": 2365,
"epoch": 0.6695449076367754,
"wallclock": "2026-05-23T08:28:34.893479",
"loss": 0.1187,
"grad_norm": 0.990744411945343,
"learning_rate": 7.755414121671596e-06,
"step_time_sec": 114.16
},
{
"step": 2370,
"epoch": 0.6709604359827306,
"wallclock": "2026-05-23T08:30:29.655994",
"loss": 0.1045,
"grad_norm": 0.8785448670387268,
"learning_rate": 7.745842150469043e-06,
"step_time_sec": 114.76
},
{
"step": 2375,
"epoch": 0.6723759643286856,
"wallclock": "2026-05-23T08:32:24.847718",
"loss": 0.1015,
"grad_norm": 1.0024092197418213,
"learning_rate": 7.736255748798272e-06,
"step_time_sec": 115.19
},
{
"step": 2380,
"epoch": 0.6737914926746408,
"wallclock": "2026-05-23T08:34:19.407078",
"loss": 0.1087,
"grad_norm": 1.0146054029464722,
"learning_rate": 7.726654967039546e-06,
"step_time_sec": 114.56
},
{
"step": 2385,
"epoch": 0.6752070210205959,
"wallclock": "2026-05-23T08:36:13.103873",
"loss": 0.1194,
"grad_norm": 1.0869743824005127,
"learning_rate": 7.717039855648711e-06,
"step_time_sec": 113.7
},
{
"step": 2390,
"epoch": 0.6766225493665511,
"wallclock": "2026-05-23T08:38:07.793063",
"loss": 0.1053,
"grad_norm": 0.6551274061203003,
"learning_rate": 7.707410465156916e-06,
"step_time_sec": 114.69
},
{
"step": 2395,
"epoch": 0.6780380777125062,
"wallclock": "2026-05-23T08:40:01.316930",
"loss": 0.0985,
"grad_norm": 0.9398195147514343,
"learning_rate": 7.69776684617035e-06,
"step_time_sec": 113.52
},
{
"step": 2400,
"epoch": 0.6794536060584613,
"wallclock": "2026-05-23T08:41:54.704114",
"loss": 0.1208,
"grad_norm": 1.1209269762039185,
"learning_rate": 7.688109049369984e-06,
"step_time_sec": 113.39,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2400,
"epoch": 0.6794536060584613,
"wallclock": "2026-05-23T08:42:47.203641",
"eval_loss": 0.11854572594165802,
"eval_runtime": 52.4158,
"eval_samples_per_second": 4.77,
"eval_steps_per_second": 1.202,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2405,
"epoch": 0.6808691344044164,
"wallclock": "2026-05-23T08:46:24.285045",
"loss": 0.1037,
"grad_norm": 0.8943494558334351,
"learning_rate": 7.678437125511293e-06,
"step_time_sec": 269.58
},
{
"step": 2410,
"epoch": 0.6822846627503716,
"wallclock": "2026-05-23T08:48:17.119278",
"loss": 0.1201,
"grad_norm": 1.3184447288513184,
"learning_rate": 7.668751125423997e-06,
"step_time_sec": 112.83
},
{
"step": 2415,
"epoch": 0.6837001910963267,
"wallclock": "2026-05-23T08:50:10.316231",
"loss": 0.127,
"grad_norm": 1.2354567050933838,
"learning_rate": 7.659051100011796e-06,
"step_time_sec": 113.2
},
{
"step": 2420,
"epoch": 0.6851157194422818,
"wallclock": "2026-05-23T08:52:04.524428",
"loss": 0.0854,
"grad_norm": 0.7846460342407227,
"learning_rate": 7.649337100252091e-06,
"step_time_sec": 114.21
},
{
"step": 2425,
"epoch": 0.6865312477882369,
"wallclock": "2026-05-23T08:53:59.953373",
"loss": 0.1035,
"grad_norm": 0.6973745226860046,
"learning_rate": 7.639609177195732e-06,
"step_time_sec": 115.43
},
{
"step": 2430,
"epoch": 0.6879467761341921,
"wallclock": "2026-05-23T08:55:54.650826",
"loss": 0.1035,
"grad_norm": 0.8783355951309204,
"learning_rate": 7.629867381966739e-06,
"step_time_sec": 114.7
},
{
"step": 2435,
"epoch": 0.6893623044801472,
"wallclock": "2026-05-23T08:57:49.808654",
"loss": 0.1103,
"grad_norm": 0.8976749777793884,
"learning_rate": 7.6201117657620284e-06,
"step_time_sec": 115.16
},
{
"step": 2440,
"epoch": 0.6907778328261024,
"wallclock": "2026-05-23T08:59:43.041184",
"loss": 0.1041,
"grad_norm": 1.3639253377914429,
"learning_rate": 7.610342379851159e-06,
"step_time_sec": 113.23
},
{
"step": 2445,
"epoch": 0.6921933611720574,
"wallclock": "2026-05-23T09:01:36.414580",
"loss": 0.1172,
"grad_norm": 1.34951651096344,
"learning_rate": 7.600559275576054e-06,
"step_time_sec": 113.37
},
{
"step": 2450,
"epoch": 0.6936088895180126,
"wallclock": "2026-05-23T09:03:31.256289",
"loss": 0.1272,
"grad_norm": 1.2545363903045654,
"learning_rate": 7.590762504350729e-06,
"step_time_sec": 114.84,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 69.86
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2455,
"epoch": 0.6950244178639677,
"wallclock": "2026-05-23T09:05:26.347778",
"loss": 0.1098,
"grad_norm": 0.902570903301239,
"learning_rate": 7.580952117661028e-06,
"step_time_sec": 115.09
},
{
"step": 2460,
"epoch": 0.6964399462099229,
"wallclock": "2026-05-23T09:07:22.291465",
"loss": 0.1261,
"grad_norm": 1.299424171447754,
"learning_rate": 7.571128167064347e-06,
"step_time_sec": 115.94
},
{
"step": 2465,
"epoch": 0.6978554745558779,
"wallclock": "2026-05-23T09:09:16.390275",
"loss": 0.1101,
"grad_norm": 0.9918133020401001,
"learning_rate": 7.5612907041893645e-06,
"step_time_sec": 114.1
},
{
"step": 2470,
"epoch": 0.6992710029018331,
"wallclock": "2026-05-23T09:11:10.300186",
"loss": 0.0887,
"grad_norm": 0.9212543964385986,
"learning_rate": 7.551439780735775e-06,
"step_time_sec": 113.91
},
{
"step": 2475,
"epoch": 0.7006865312477882,
"wallclock": "2026-05-23T09:13:02.291441",
"loss": 0.1198,
"grad_norm": 1.1632072925567627,
"learning_rate": 7.541575448474012e-06,
"step_time_sec": 111.99
},
{
"step": 2480,
"epoch": 0.7021020595937434,
"wallclock": "2026-05-23T09:14:55.310823",
"loss": 0.0919,
"grad_norm": 0.9132311940193176,
"learning_rate": 7.531697759244978e-06,
"step_time_sec": 113.02
},
{
"step": 2485,
"epoch": 0.7035175879396985,
"wallclock": "2026-05-23T09:16:49.595016",
"loss": 0.1046,
"grad_norm": 0.9931870698928833,
"learning_rate": 7.521806764959769e-06,
"step_time_sec": 114.28
},
{
"step": 2490,
"epoch": 0.7049331162856536,
"wallclock": "2026-05-23T09:18:43.462544",
"loss": 0.0934,
"grad_norm": 0.810712993144989,
"learning_rate": 7.511902517599407e-06,
"step_time_sec": 113.87
},
{
"step": 2495,
"epoch": 0.7063486446316087,
"wallclock": "2026-05-23T09:20:37.403219",
"loss": 0.1027,
"grad_norm": 1.004841685295105,
"learning_rate": 7.501985069214561e-06,
"step_time_sec": 113.94
},
{
"step": 2500,
"epoch": 0.7077641729775639,
"wallclock": "2026-05-23T09:22:33.235203",
"loss": 0.0982,
"grad_norm": 0.7684575319290161,
"learning_rate": 7.492054471925282e-06,
"step_time_sec": 115.83,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2500,
"epoch": 0.7077641729775639,
"wallclock": "2026-05-23T09:23:26.146278",
"eval_loss": 0.11603201180696487,
"eval_runtime": 52.8156,
"eval_samples_per_second": 4.733,
"eval_steps_per_second": 1.193,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2505,
"epoch": 0.709179701323519,
"wallclock": "2026-05-23T09:27:05.152438",
"loss": 0.1083,
"grad_norm": 0.8736166954040527,
"learning_rate": 7.482110777920719e-06,
"step_time_sec": 271.92
},
{
"step": 2510,
"epoch": 0.7105952296694741,
"wallclock": "2026-05-23T09:28:58.645822",
"loss": 0.1197,
"grad_norm": 1.1975699663162231,
"learning_rate": 7.472154039458851e-06,
"step_time_sec": 113.49
},
{
"step": 2515,
"epoch": 0.7120107580154292,
"wallclock": "2026-05-23T09:30:54.443603",
"loss": 0.1261,
"grad_norm": 1.4840281009674072,
"learning_rate": 7.462184308866209e-06,
"step_time_sec": 115.8
},
{
"step": 2520,
"epoch": 0.7134262863613844,
"wallclock": "2026-05-23T09:32:48.921941",
"loss": 0.1001,
"grad_norm": 0.9024205803871155,
"learning_rate": 7.452201638537605e-06,
"step_time_sec": 114.48
},
{
"step": 2525,
"epoch": 0.7148418147073395,
"wallclock": "2026-05-23T09:34:43.594377",
"loss": 0.0883,
"grad_norm": 2.425753355026245,
"learning_rate": 7.442206080935852e-06,
"step_time_sec": 114.67
},
{
"step": 2530,
"epoch": 0.7162573430532947,
"wallclock": "2026-05-23T09:36:38.043629",
"loss": 0.1033,
"grad_norm": 0.9202796816825867,
"learning_rate": 7.432197688591494e-06,
"step_time_sec": 114.45
},
{
"step": 2535,
"epoch": 0.7176728713992497,
"wallclock": "2026-05-23T09:38:33.443082",
"loss": 0.1229,
"grad_norm": 0.8916212320327759,
"learning_rate": 7.422176514102524e-06,
"step_time_sec": 115.4
},
{
"step": 2540,
"epoch": 0.7190883997452049,
"wallclock": "2026-05-23T09:40:26.131948",
"loss": 0.0948,
"grad_norm": 0.7314426898956299,
"learning_rate": 7.41214261013411e-06,
"step_time_sec": 112.69
},
{
"step": 2545,
"epoch": 0.72050392809116,
"wallclock": "2026-05-23T09:42:19.625497",
"loss": 0.1031,
"grad_norm": 1.2673311233520508,
"learning_rate": 7.402096029418317e-06,
"step_time_sec": 113.49
},
{
"step": 2550,
"epoch": 0.7219194564371152,
"wallclock": "2026-05-23T09:44:13.852248",
"loss": 0.1199,
"grad_norm": 0.9767388701438904,
"learning_rate": 7.3920368247538384e-06,
"step_time_sec": 114.23,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2555,
"epoch": 0.7233349847830702,
"wallclock": "2026-05-23T09:46:07.447283",
"loss": 0.1087,
"grad_norm": 1.0202505588531494,
"learning_rate": 7.381965049005703e-06,
"step_time_sec": 113.6
},
{
"step": 2560,
"epoch": 0.7247505131290254,
"wallclock": "2026-05-23T09:48:01.720028",
"loss": 0.0971,
"grad_norm": 1.1415823698043823,
"learning_rate": 7.371880755105008e-06,
"step_time_sec": 114.27
},
{
"step": 2565,
"epoch": 0.7261660414749805,
"wallclock": "2026-05-23T09:49:56.656471",
"loss": 0.1001,
"grad_norm": 1.0273898839950562,
"learning_rate": 7.361783996048641e-06,
"step_time_sec": 114.94
},
{
"step": 2570,
"epoch": 0.7275815698209357,
"wallclock": "2026-05-23T09:51:49.820193",
"loss": 0.1057,
"grad_norm": 1.1736416816711426,
"learning_rate": 7.3516748248989955e-06,
"step_time_sec": 113.16
},
{
"step": 2575,
"epoch": 0.7289970981668908,
"wallclock": "2026-05-23T09:53:43.050372",
"loss": 0.1056,
"grad_norm": 0.8515759706497192,
"learning_rate": 7.341553294783699e-06,
"step_time_sec": 113.23
},
{
"step": 2580,
"epoch": 0.7304126265128459,
"wallclock": "2026-05-23T09:55:37.522835",
"loss": 0.1058,
"grad_norm": 0.8394744992256165,
"learning_rate": 7.3314194588953256e-06,
"step_time_sec": 114.47
},
{
"step": 2585,
"epoch": 0.731828154858801,
"wallclock": "2026-05-23T09:57:31.953180",
"loss": 0.1082,
"grad_norm": 0.7621601819992065,
"learning_rate": 7.3212733704911235e-06,
"step_time_sec": 114.43
},
{
"step": 2590,
"epoch": 0.7332436832047562,
"wallclock": "2026-05-23T09:59:25.144746",
"loss": 0.1147,
"grad_norm": 1.1607191562652588,
"learning_rate": 7.311115082892733e-06,
"step_time_sec": 113.19
},
{
"step": 2595,
"epoch": 0.7346592115507113,
"wallclock": "2026-05-23T10:01:19.943656",
"loss": 0.1141,
"grad_norm": 0.9936063289642334,
"learning_rate": 7.300944649485908e-06,
"step_time_sec": 114.8
},
{
"step": 2600,
"epoch": 0.7360747398966664,
"wallclock": "2026-05-23T10:03:14.923839",
"loss": 0.1048,
"grad_norm": 0.7679593563079834,
"learning_rate": 7.2907621237202275e-06,
"step_time_sec": 114.98,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2600,
"epoch": 0.7360747398966664,
"wallclock": "2026-05-23T10:04:07.231100",
"eval_loss": 0.11498851329088211,
"eval_runtime": 52.2032,
"eval_samples_per_second": 4.789,
"eval_steps_per_second": 1.207,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2605,
"epoch": 0.7374902682426215,
"wallclock": "2026-05-23T10:07:42.813659",
"loss": 0.0785,
"grad_norm": 0.8581358790397644,
"learning_rate": 7.280567559108825e-06,
"step_time_sec": 267.89
},
{
"step": 2610,
"epoch": 0.7389057965885767,
"wallclock": "2026-05-23T10:09:36.902094",
"loss": 0.0795,
"grad_norm": 1.4470053911209106,
"learning_rate": 7.270361009228104e-06,
"step_time_sec": 114.09
},
{
"step": 2615,
"epoch": 0.7403213249345318,
"wallclock": "2026-05-23T10:11:32.049560",
"loss": 0.1029,
"grad_norm": 1.1154381036758423,
"learning_rate": 7.260142527717449e-06,
"step_time_sec": 115.15
},
{
"step": 2620,
"epoch": 0.741736853280487,
"wallclock": "2026-05-23T10:13:28.524157",
"loss": 0.114,
"grad_norm": 1.143662929534912,
"learning_rate": 7.249912168278954e-06,
"step_time_sec": 116.47
},
{
"step": 2625,
"epoch": 0.743152381626442,
"wallclock": "2026-05-23T10:15:25.719237",
"loss": 0.1157,
"grad_norm": 1.3383020162582397,
"learning_rate": 7.23966998467714e-06,
"step_time_sec": 117.2
},
{
"step": 2630,
"epoch": 0.7445679099723972,
"wallclock": "2026-05-23T10:17:20.106607",
"loss": 0.097,
"grad_norm": 1.3460333347320557,
"learning_rate": 7.229416030738661e-06,
"step_time_sec": 114.39
},
{
"step": 2635,
"epoch": 0.7459834383183523,
"wallclock": "2026-05-23T10:19:12.833927",
"loss": 0.0934,
"grad_norm": 1.0922449827194214,
"learning_rate": 7.219150360352032e-06,
"step_time_sec": 112.73
},
{
"step": 2640,
"epoch": 0.7473989666643075,
"wallclock": "2026-05-23T10:21:07.756043",
"loss": 0.1099,
"grad_norm": 0.9513120651245117,
"learning_rate": 7.208873027467345e-06,
"step_time_sec": 114.92
},
{
"step": 2645,
"epoch": 0.7488144950102625,
"wallclock": "2026-05-23T10:23:00.826108",
"loss": 0.1106,
"grad_norm": 0.9753119945526123,
"learning_rate": 7.198584086095979e-06,
"step_time_sec": 113.07
},
{
"step": 2650,
"epoch": 0.7502300233562177,
"wallclock": "2026-05-23T10:24:56.030014",
"loss": 0.0936,
"grad_norm": 1.4077311754226685,
"learning_rate": 7.188283590310322e-06,
"step_time_sec": 115.2,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2655,
"epoch": 0.7516455517021728,
"wallclock": "2026-05-23T10:26:50.125758",
"loss": 0.1239,
"grad_norm": 0.8350121378898621,
"learning_rate": 7.177971594243486e-06,
"step_time_sec": 114.1
},
{
"step": 2660,
"epoch": 0.753061080048128,
"wallclock": "2026-05-23T10:28:44.428938",
"loss": 0.1024,
"grad_norm": 1.0880407094955444,
"learning_rate": 7.167648152089017e-06,
"step_time_sec": 114.3
},
{
"step": 2665,
"epoch": 0.754476608394083,
"wallclock": "2026-05-23T10:30:38.443004",
"loss": 0.1,
"grad_norm": 1.0360862016677856,
"learning_rate": 7.157313318100622e-06,
"step_time_sec": 114.01
},
{
"step": 2670,
"epoch": 0.7558921367400382,
"wallclock": "2026-05-23T10:32:32.126033",
"loss": 0.1126,
"grad_norm": 1.0407313108444214,
"learning_rate": 7.14696714659187e-06,
"step_time_sec": 113.68
},
{
"step": 2675,
"epoch": 0.7573076650859933,
"wallclock": "2026-05-23T10:34:25.914981",
"loss": 0.0933,
"grad_norm": 1.346449613571167,
"learning_rate": 7.136609691935914e-06,
"step_time_sec": 113.79
},
{
"step": 2680,
"epoch": 0.7587231934319485,
"wallclock": "2026-05-23T10:36:21.156179",
"loss": 0.0893,
"grad_norm": 1.2292298078536987,
"learning_rate": 7.1262410085652075e-06,
"step_time_sec": 115.24
},
{
"step": 2685,
"epoch": 0.7601387217779036,
"wallclock": "2026-05-23T10:38:15.006638",
"loss": 0.1224,
"grad_norm": 1.2744159698486328,
"learning_rate": 7.115861150971215e-06,
"step_time_sec": 113.85
},
{
"step": 2690,
"epoch": 0.7615542501238587,
"wallclock": "2026-05-23T10:40:09.527798",
"loss": 0.0863,
"grad_norm": 1.0019073486328125,
"learning_rate": 7.105470173704121e-06,
"step_time_sec": 114.52
},
{
"step": 2695,
"epoch": 0.7629697784698138,
"wallclock": "2026-05-23T10:42:03.509958",
"loss": 0.098,
"grad_norm": 1.0547888278961182,
"learning_rate": 7.095068131372552e-06,
"step_time_sec": 113.98
},
{
"step": 2700,
"epoch": 0.764385306815769,
"wallclock": "2026-05-23T10:44:00.051414",
"loss": 0.116,
"grad_norm": 0.9419006109237671,
"learning_rate": 7.0846550786432885e-06,
"step_time_sec": 116.54,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2700,
"epoch": 0.764385306815769,
"wallclock": "2026-05-23T10:44:52.913063",
"eval_loss": 0.110720694065094,
"eval_runtime": 52.7686,
"eval_samples_per_second": 4.738,
"eval_steps_per_second": 1.194,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2705,
"epoch": 0.7658008351617241,
"wallclock": "2026-05-23T10:48:31.237934",
"loss": 0.0998,
"grad_norm": 1.0532370805740356,
"learning_rate": 7.074231070240969e-06,
"step_time_sec": 271.19
},
{
"step": 2710,
"epoch": 0.7672163635076792,
"wallclock": "2026-05-23T10:50:25.703822",
"loss": 0.1059,
"grad_norm": 1.1707059144973755,
"learning_rate": 7.063796160947811e-06,
"step_time_sec": 114.47
},
{
"step": 2715,
"epoch": 0.7686318918536343,
"wallclock": "2026-05-23T10:52:20.230127",
"loss": 0.0836,
"grad_norm": 1.0319560766220093,
"learning_rate": 7.0533504056033234e-06,
"step_time_sec": 114.53
},
{
"step": 2720,
"epoch": 0.7700474201995895,
"wallclock": "2026-05-23T10:54:15.428683",
"loss": 0.0971,
"grad_norm": 1.1601600646972656,
"learning_rate": 7.042893859104008e-06,
"step_time_sec": 115.2
},
{
"step": 2725,
"epoch": 0.7714629485455446,
"wallclock": "2026-05-23T10:56:09.560972",
"loss": 0.0808,
"grad_norm": 1.0438365936279297,
"learning_rate": 7.032426576403084e-06,
"step_time_sec": 114.13
},
{
"step": 2730,
"epoch": 0.7728784768914998,
"wallclock": "2026-05-23T10:58:03.848888",
"loss": 0.102,
"grad_norm": 1.1061596870422363,
"learning_rate": 7.021948612510194e-06,
"step_time_sec": 114.29
},
{
"step": 2735,
"epoch": 0.7742940052374548,
"wallclock": "2026-05-23T10:59:58.320211",
"loss": 0.0984,
"grad_norm": 0.7871215343475342,
"learning_rate": 7.011460022491111e-06,
"step_time_sec": 114.47
},
{
"step": 2740,
"epoch": 0.77570953358341,
"wallclock": "2026-05-23T11:01:53.123512",
"loss": 0.0861,
"grad_norm": 0.9695367813110352,
"learning_rate": 7.000960861467454e-06,
"step_time_sec": 114.8
},
{
"step": 2745,
"epoch": 0.7771250619293651,
"wallclock": "2026-05-23T11:03:47.400982",
"loss": 0.0988,
"grad_norm": 0.9494866132736206,
"learning_rate": 6.990451184616399e-06,
"step_time_sec": 114.28
},
{
"step": 2750,
"epoch": 0.7785405902753203,
"wallclock": "2026-05-23T11:05:41.439834",
"loss": 0.0848,
"grad_norm": 0.8476992249488831,
"learning_rate": 6.979931047170382e-06,
"step_time_sec": 114.04,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2755,
"epoch": 0.7799561186212753,
"wallclock": "2026-05-23T11:07:35.107884",
"loss": 0.0877,
"grad_norm": 0.9056459069252014,
"learning_rate": 6.969400504416816e-06,
"step_time_sec": 113.67
},
{
"step": 2760,
"epoch": 0.7813716469672305,
"wallclock": "2026-05-23T11:09:27.251632",
"loss": 0.0942,
"grad_norm": 1.1086695194244385,
"learning_rate": 6.9588596116978015e-06,
"step_time_sec": 112.14
},
{
"step": 2765,
"epoch": 0.7827871753131856,
"wallclock": "2026-05-23T11:11:21.586762",
"loss": 0.1023,
"grad_norm": 1.655490756034851,
"learning_rate": 6.948308424409824e-06,
"step_time_sec": 114.34
},
{
"step": 2770,
"epoch": 0.7842027036591408,
"wallclock": "2026-05-23T11:13:15.016276",
"loss": 0.1057,
"grad_norm": 0.9345031380653381,
"learning_rate": 6.937746998003477e-06,
"step_time_sec": 113.43
},
{
"step": 2775,
"epoch": 0.785618232005096,
"wallclock": "2026-05-23T11:15:09.371971",
"loss": 0.0827,
"grad_norm": 1.2220042943954468,
"learning_rate": 6.927175387983165e-06,
"step_time_sec": 114.36
},
{
"step": 2780,
"epoch": 0.787033760351051,
"wallclock": "2026-05-23T11:17:03.650861",
"loss": 0.1028,
"grad_norm": 0.8835825324058533,
"learning_rate": 6.9165936499068065e-06,
"step_time_sec": 114.28
},
{
"step": 2785,
"epoch": 0.7884492886970061,
"wallclock": "2026-05-23T11:18:57.806390",
"loss": 0.095,
"grad_norm": 1.1001851558685303,
"learning_rate": 6.906001839385551e-06,
"step_time_sec": 114.16
},
{
"step": 2790,
"epoch": 0.7898648170429613,
"wallclock": "2026-05-23T11:20:52.228547",
"loss": 0.0906,
"grad_norm": 0.7298992276191711,
"learning_rate": 6.895400012083482e-06,
"step_time_sec": 114.42
},
{
"step": 2795,
"epoch": 0.7912803453889165,
"wallclock": "2026-05-23T11:22:47.415434",
"loss": 0.1135,
"grad_norm": 0.8096187710762024,
"learning_rate": 6.884788223717326e-06,
"step_time_sec": 115.19
},
{
"step": 2800,
"epoch": 0.7926958737348715,
"wallclock": "2026-05-23T11:24:42.100846",
"loss": 0.0896,
"grad_norm": 0.7147625088691711,
"learning_rate": 6.874166530056153e-06,
"step_time_sec": 114.69,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2800,
"epoch": 0.7926958737348715,
"wallclock": "2026-05-23T11:25:35.071421",
"eval_loss": 0.10768646746873856,
"eval_runtime": 52.8634,
"eval_samples_per_second": 4.729,
"eval_steps_per_second": 1.192,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2805,
"epoch": 0.7941114020808266,
"wallclock": "2026-05-23T11:29:12.349456",
"loss": 0.104,
"grad_norm": 1.1961441040039062,
"learning_rate": 6.863534986921098e-06,
"step_time_sec": 270.25
},
{
"step": 2810,
"epoch": 0.7955269304267818,
"wallclock": "2026-05-23T11:31:06.641212",
"loss": 0.0879,
"grad_norm": 0.8926189541816711,
"learning_rate": 6.852893650185051e-06,
"step_time_sec": 114.29
},
{
"step": 2815,
"epoch": 0.796942458772737,
"wallclock": "2026-05-23T11:33:01.446223",
"loss": 0.0953,
"grad_norm": 0.6535293459892273,
"learning_rate": 6.842242575772374e-06,
"step_time_sec": 114.81
},
{
"step": 2820,
"epoch": 0.7983579871186921,
"wallclock": "2026-05-23T11:34:55.451278",
"loss": 0.0921,
"grad_norm": 1.124362587928772,
"learning_rate": 6.831581819658608e-06,
"step_time_sec": 114.01
},
{
"step": 2825,
"epoch": 0.7997735154646471,
"wallclock": "2026-05-23T11:36:49.662998",
"loss": 0.1037,
"grad_norm": 0.7776113152503967,
"learning_rate": 6.820911437870169e-06,
"step_time_sec": 114.21
},
{
"step": 2830,
"epoch": 0.8011890438106023,
"wallclock": "2026-05-23T11:38:45.282209",
"loss": 0.0958,
"grad_norm": 1.1590611934661865,
"learning_rate": 6.810231486484064e-06,
"step_time_sec": 115.62
},
{
"step": 2835,
"epoch": 0.8026045721565574,
"wallclock": "2026-05-23T11:40:40.066510",
"loss": 0.0928,
"grad_norm": 0.9135128259658813,
"learning_rate": 6.79954202162759e-06,
"step_time_sec": 114.78
},
{
"step": 2840,
"epoch": 0.8040201005025126,
"wallclock": "2026-05-23T11:42:35.293647",
"loss": 0.1171,
"grad_norm": 1.5331295728683472,
"learning_rate": 6.788843099478041e-06,
"step_time_sec": 115.23
},
{
"step": 2845,
"epoch": 0.8054356288484676,
"wallclock": "2026-05-23T11:44:32.410482",
"loss": 0.0892,
"grad_norm": 0.802897036075592,
"learning_rate": 6.778134776262413e-06,
"step_time_sec": 117.12
},
{
"step": 2850,
"epoch": 0.8068511571944228,
"wallclock": "2026-05-23T11:46:26.926697",
"loss": 0.1081,
"grad_norm": 1.0739949941635132,
"learning_rate": 6.76741710825711e-06,
"step_time_sec": 114.52,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2855,
"epoch": 0.808266685540378,
"wallclock": "2026-05-23T11:48:20.898137",
"loss": 0.0906,
"grad_norm": 1.2039380073547363,
"learning_rate": 6.756690151787643e-06,
"step_time_sec": 113.97
},
{
"step": 2860,
"epoch": 0.8096822138863331,
"wallclock": "2026-05-23T11:50:16.624972",
"loss": 0.0765,
"grad_norm": 0.9947606325149536,
"learning_rate": 6.74595396322834e-06,
"step_time_sec": 115.73
},
{
"step": 2865,
"epoch": 0.8110977422322883,
"wallclock": "2026-05-23T11:52:08.742942",
"loss": 0.0886,
"grad_norm": 1.0721163749694824,
"learning_rate": 6.735208599002048e-06,
"step_time_sec": 112.12
},
{
"step": 2870,
"epoch": 0.8125132705782433,
"wallclock": "2026-05-23T11:54:01.649122",
"loss": 0.0956,
"grad_norm": 0.9984346628189087,
"learning_rate": 6.724454115579832e-06,
"step_time_sec": 112.91
},
{
"step": 2875,
"epoch": 0.8139287989241984,
"wallclock": "2026-05-23T11:55:55.986094",
"loss": 0.1013,
"grad_norm": 0.8976569771766663,
"learning_rate": 6.713690569480685e-06,
"step_time_sec": 114.34
},
{
"step": 2880,
"epoch": 0.8153443272701536,
"wallclock": "2026-05-23T11:57:50.253656",
"loss": 0.11,
"grad_norm": 1.3766424655914307,
"learning_rate": 6.7029180172712295e-06,
"step_time_sec": 114.27
},
{
"step": 2885,
"epoch": 0.8167598556161088,
"wallclock": "2026-05-23T11:59:45.942765",
"loss": 0.0871,
"grad_norm": 0.7033481597900391,
"learning_rate": 6.6921365155654126e-06,
"step_time_sec": 115.69
},
{
"step": 2890,
"epoch": 0.8181753839620638,
"wallclock": "2026-05-23T12:01:42.248219",
"loss": 0.0872,
"grad_norm": 1.1330105066299438,
"learning_rate": 6.6813461210242215e-06,
"step_time_sec": 116.31
},
{
"step": 2895,
"epoch": 0.819590912308019,
"wallclock": "2026-05-23T12:03:36.053753",
"loss": 0.1055,
"grad_norm": 1.2184752225875854,
"learning_rate": 6.670546890355374e-06,
"step_time_sec": 113.81
},
{
"step": 2900,
"epoch": 0.8210064406539741,
"wallclock": "2026-05-23T12:05:30.028128",
"loss": 0.1058,
"grad_norm": 0.665178656578064,
"learning_rate": 6.659738880313025e-06,
"step_time_sec": 113.97,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2900,
"epoch": 0.8210064406539741,
"wallclock": "2026-05-23T12:06:22.012953",
"eval_loss": 0.10808777064085007,
"eval_runtime": 51.8846,
"eval_samples_per_second": 4.818,
"eval_steps_per_second": 1.214,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2905,
"epoch": 0.8224219689999293,
"wallclock": "2026-05-23T12:09:56.926177",
"loss": 0.0969,
"grad_norm": 1.386168122291565,
"learning_rate": 6.648922147697471e-06,
"step_time_sec": 266.9
},
{
"step": 2910,
"epoch": 0.8238374973458844,
"wallclock": "2026-05-23T12:11:52.616991",
"loss": 0.0787,
"grad_norm": 1.0408498048782349,
"learning_rate": 6.63809674935485e-06,
"step_time_sec": 115.69
},
{
"step": 2915,
"epoch": 0.8252530256918394,
"wallclock": "2026-05-23T12:13:47.509339",
"loss": 0.106,
"grad_norm": 1.0766488313674927,
"learning_rate": 6.6272627421768366e-06,
"step_time_sec": 114.89
},
{
"step": 2920,
"epoch": 0.8266685540377946,
"wallclock": "2026-05-23T12:15:43.081952",
"loss": 0.0859,
"grad_norm": 0.8988505005836487,
"learning_rate": 6.616420183100353e-06,
"step_time_sec": 115.57
},
{
"step": 2925,
"epoch": 0.8280840823837498,
"wallclock": "2026-05-23T12:17:37.913984",
"loss": 0.09,
"grad_norm": 1.0285881757736206,
"learning_rate": 6.605569129107263e-06,
"step_time_sec": 114.83
},
{
"step": 2930,
"epoch": 0.8294996107297049,
"wallclock": "2026-05-23T12:19:32.363447",
"loss": 0.0921,
"grad_norm": 1.0034139156341553,
"learning_rate": 6.594709637224075e-06,
"step_time_sec": 114.45
},
{
"step": 2935,
"epoch": 0.83091513907566,
"wallclock": "2026-05-23T12:21:27.369008",
"loss": 0.0802,
"grad_norm": 0.8240336775779724,
"learning_rate": 6.583841764521641e-06,
"step_time_sec": 115.01
},
{
"step": 2940,
"epoch": 0.8323306674216151,
"wallclock": "2026-05-23T12:23:23.007495",
"loss": 0.095,
"grad_norm": 1.2371604442596436,
"learning_rate": 6.572965568114859e-06,
"step_time_sec": 115.64
},
{
"step": 2945,
"epoch": 0.8337461957675703,
"wallclock": "2026-05-23T12:25:16.996557",
"loss": 0.095,
"grad_norm": 1.1819149255752563,
"learning_rate": 6.562081105162369e-06,
"step_time_sec": 113.99
},
{
"step": 2950,
"epoch": 0.8351617241135254,
"wallclock": "2026-05-23T12:27:11.125332",
"loss": 0.08,
"grad_norm": 1.0016002655029297,
"learning_rate": 6.551188432866257e-06,
"step_time_sec": 114.13,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 2955,
"epoch": 0.8365772524594804,
"wallclock": "2026-05-23T12:29:06.207323",
"loss": 0.1015,
"grad_norm": 1.2419204711914062,
"learning_rate": 6.5402876084717514e-06,
"step_time_sec": 115.08
},
{
"step": 2960,
"epoch": 0.8379927808054356,
"wallclock": "2026-05-23T12:31:00.634331",
"loss": 0.0848,
"grad_norm": 1.0234307050704956,
"learning_rate": 6.529378689266923e-06,
"step_time_sec": 114.43
},
{
"step": 2965,
"epoch": 0.8394083091513908,
"wallclock": "2026-05-23T12:32:54.709674",
"loss": 0.1004,
"grad_norm": 1.3117458820343018,
"learning_rate": 6.518461732582385e-06,
"step_time_sec": 114.08
},
{
"step": 2970,
"epoch": 0.8408238374973459,
"wallclock": "2026-05-23T12:34:50.639025",
"loss": 0.0861,
"grad_norm": 0.6640080213546753,
"learning_rate": 6.507536795790989e-06,
"step_time_sec": 115.93
},
{
"step": 2975,
"epoch": 0.8422393658433011,
"wallclock": "2026-05-23T12:36:44.816198",
"loss": 0.0921,
"grad_norm": 0.7706874012947083,
"learning_rate": 6.496603936307525e-06,
"step_time_sec": 114.18
},
{
"step": 2980,
"epoch": 0.8436548941892561,
"wallclock": "2026-05-23T12:38:41.632400",
"loss": 0.0774,
"grad_norm": 0.9700288772583008,
"learning_rate": 6.4856632115884245e-06,
"step_time_sec": 116.82
},
{
"step": 2985,
"epoch": 0.8450704225352113,
"wallclock": "2026-05-23T12:40:37.751686",
"loss": 0.0827,
"grad_norm": 1.0276799201965332,
"learning_rate": 6.4747146791314456e-06,
"step_time_sec": 116.12
},
{
"step": 2990,
"epoch": 0.8464859508811664,
"wallclock": "2026-05-23T12:42:31.949658",
"loss": 0.1038,
"grad_norm": 1.124481439590454,
"learning_rate": 6.4637583964753855e-06,
"step_time_sec": 114.2
},
{
"step": 2995,
"epoch": 0.8479014792271216,
"wallclock": "2026-05-23T12:44:25.803641",
"loss": 0.1034,
"grad_norm": 1.4556708335876465,
"learning_rate": 6.452794421199772e-06,
"step_time_sec": 113.85
},
{
"step": 3000,
"epoch": 0.8493170075730766,
"wallclock": "2026-05-23T12:46:20.662475",
"loss": 0.0808,
"grad_norm": 0.7637086510658264,
"learning_rate": 6.441822810924555e-06,
"step_time_sec": 114.86,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3000,
"epoch": 0.8493170075730766,
"wallclock": "2026-05-23T12:47:14.237339",
"eval_loss": 0.10439032316207886,
"eval_runtime": 53.4665,
"eval_samples_per_second": 4.676,
"eval_steps_per_second": 1.178,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3005,
"epoch": 0.8507325359190318,
"wallclock": "2026-05-23T12:50:47.509195",
"loss": 0.0871,
"grad_norm": 1.2917267084121704,
"learning_rate": 6.430843623309815e-06,
"step_time_sec": 266.85
},
{
"step": 3010,
"epoch": 0.8521480642649869,
"wallclock": "2026-05-23T12:52:42.040424",
"loss": 0.0832,
"grad_norm": 1.129804015159607,
"learning_rate": 6.419856916055453e-06,
"step_time_sec": 114.53
},
{
"step": 3015,
"epoch": 0.8535635926109421,
"wallclock": "2026-05-23T12:54:39.916099",
"loss": 0.0821,
"grad_norm": 0.72309809923172,
"learning_rate": 6.408862746900884e-06,
"step_time_sec": 117.88
},
{
"step": 3020,
"epoch": 0.8549791209568972,
"wallclock": "2026-05-23T12:56:33.338244",
"loss": 0.0764,
"grad_norm": 0.6796430349349976,
"learning_rate": 6.397861173624745e-06,
"step_time_sec": 113.42
},
{
"step": 3025,
"epoch": 0.8563946493028523,
"wallclock": "2026-05-23T12:58:26.302630",
"loss": 0.0984,
"grad_norm": 1.0264241695404053,
"learning_rate": 6.386852254044582e-06,
"step_time_sec": 112.96
},
{
"step": 3030,
"epoch": 0.8578101776488074,
"wallclock": "2026-05-23T13:00:19.802002",
"loss": 0.0875,
"grad_norm": 1.4211701154708862,
"learning_rate": 6.375836046016547e-06,
"step_time_sec": 113.5
},
{
"step": 3035,
"epoch": 0.8592257059947626,
"wallclock": "2026-05-23T13:02:13.721659",
"loss": 0.0833,
"grad_norm": 1.0724290609359741,
"learning_rate": 6.3648126074350955e-06,
"step_time_sec": 113.92
},
{
"step": 3040,
"epoch": 0.8606412343407177,
"wallclock": "2026-05-23T13:04:08.151856",
"loss": 0.0943,
"grad_norm": 0.9527065753936768,
"learning_rate": 6.353781996232689e-06,
"step_time_sec": 114.43
},
{
"step": 3045,
"epoch": 0.8620567626866728,
"wallclock": "2026-05-23T13:06:02.084910",
"loss": 0.0915,
"grad_norm": 0.9171473979949951,
"learning_rate": 6.342744270379471e-06,
"step_time_sec": 113.93
},
{
"step": 3050,
"epoch": 0.8634722910326279,
"wallclock": "2026-05-23T13:07:56.129979",
"loss": 0.0772,
"grad_norm": 1.1974050998687744,
"learning_rate": 6.331699487882987e-06,
"step_time_sec": 114.05,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3055,
"epoch": 0.8648878193785831,
"wallclock": "2026-05-23T13:09:50.014721",
"loss": 0.079,
"grad_norm": 1.2516535520553589,
"learning_rate": 6.320647706787866e-06,
"step_time_sec": 113.88
},
{
"step": 3060,
"epoch": 0.8663033477245382,
"wallclock": "2026-05-23T13:11:43.877111",
"loss": 0.0819,
"grad_norm": 0.8899274468421936,
"learning_rate": 6.30958898517551e-06,
"step_time_sec": 113.86
},
{
"step": 3065,
"epoch": 0.8677188760704934,
"wallclock": "2026-05-23T13:13:37.608088",
"loss": 0.0766,
"grad_norm": 0.6620562076568604,
"learning_rate": 6.298523381163805e-06,
"step_time_sec": 113.73
},
{
"step": 3070,
"epoch": 0.8691344044164484,
"wallclock": "2026-05-23T13:15:31.210530",
"loss": 0.0868,
"grad_norm": 1.2216447591781616,
"learning_rate": 6.287450952906802e-06,
"step_time_sec": 113.6
},
{
"step": 3075,
"epoch": 0.8705499327624036,
"wallclock": "2026-05-23T13:17:25.173111",
"loss": 0.0823,
"grad_norm": 1.8554191589355469,
"learning_rate": 6.276371758594416e-06,
"step_time_sec": 113.96
},
{
"step": 3080,
"epoch": 0.8719654611083587,
"wallclock": "2026-05-23T13:19:19.783902",
"loss": 0.1078,
"grad_norm": 1.097886085510254,
"learning_rate": 6.265285856452123e-06,
"step_time_sec": 114.61
},
{
"step": 3085,
"epoch": 0.8733809894543139,
"wallclock": "2026-05-23T13:21:13.021188",
"loss": 0.1032,
"grad_norm": 0.9588475227355957,
"learning_rate": 6.254193304740648e-06,
"step_time_sec": 113.24
},
{
"step": 3090,
"epoch": 0.8747965178002689,
"wallclock": "2026-05-23T13:23:05.522960",
"loss": 0.0746,
"grad_norm": 0.9044705629348755,
"learning_rate": 6.243094161755664e-06,
"step_time_sec": 112.5
},
{
"step": 3095,
"epoch": 0.876212046146224,
"wallclock": "2026-05-23T13:25:01.305677",
"loss": 0.0996,
"grad_norm": 1.350035309791565,
"learning_rate": 6.231988485827483e-06,
"step_time_sec": 115.78
},
{
"step": 3100,
"epoch": 0.8776275744921792,
"wallclock": "2026-05-23T13:26:54.684490",
"loss": 0.0947,
"grad_norm": 1.00934898853302,
"learning_rate": 6.220876335320752e-06,
"step_time_sec": 113.38,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3100,
"epoch": 0.8776275744921792,
"wallclock": "2026-05-23T13:27:47.250665",
"eval_loss": 0.10196959972381592,
"eval_runtime": 52.4629,
"eval_samples_per_second": 4.765,
"eval_steps_per_second": 1.201,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3105,
"epoch": 0.8790431028381344,
"wallclock": "2026-05-23T13:31:22.745504",
"loss": 0.076,
"grad_norm": 1.0037931203842163,
"learning_rate": 6.209757768634142e-06,
"step_time_sec": 268.06
},
{
"step": 3110,
"epoch": 0.8804586311840895,
"wallclock": "2026-05-23T13:33:17.015868",
"loss": 0.1029,
"grad_norm": 1.383480191230774,
"learning_rate": 6.1986328442000425e-06,
"step_time_sec": 114.27
},
{
"step": 3115,
"epoch": 0.8818741595300446,
"wallclock": "2026-05-23T13:35:09.821778",
"loss": 0.0865,
"grad_norm": 1.2162877321243286,
"learning_rate": 6.18750162048426e-06,
"step_time_sec": 112.81
},
{
"step": 3120,
"epoch": 0.8832896878759997,
"wallclock": "2026-05-23T13:37:02.349154",
"loss": 0.0919,
"grad_norm": 0.8934468626976013,
"learning_rate": 6.176364155985701e-06,
"step_time_sec": 112.53
},
{
"step": 3125,
"epoch": 0.8847052162219549,
"wallclock": "2026-05-23T13:38:56.196594",
"loss": 0.0939,
"grad_norm": 0.848867654800415,
"learning_rate": 6.165220509236076e-06,
"step_time_sec": 113.85
},
{
"step": 3130,
"epoch": 0.88612074456791,
"wallclock": "2026-05-23T13:40:49.415671",
"loss": 0.0871,
"grad_norm": 1.3182566165924072,
"learning_rate": 6.1540707387995775e-06,
"step_time_sec": 113.22
},
{
"step": 3135,
"epoch": 0.887536272913865,
"wallclock": "2026-05-23T13:42:44.238969",
"loss": 0.1092,
"grad_norm": 1.0121556520462036,
"learning_rate": 6.1429149032725875e-06,
"step_time_sec": 114.82
},
{
"step": 3140,
"epoch": 0.8889518012598202,
"wallclock": "2026-05-23T13:44:39.595399",
"loss": 0.0762,
"grad_norm": 1.2405686378479004,
"learning_rate": 6.13175306128336e-06,
"step_time_sec": 115.36
},
{
"step": 3145,
"epoch": 0.8903673296057754,
"wallclock": "2026-05-23T13:46:34.620333",
"loss": 0.0945,
"grad_norm": 1.2402104139328003,
"learning_rate": 6.120585271491713e-06,
"step_time_sec": 115.02
},
{
"step": 3150,
"epoch": 0.8917828579517305,
"wallclock": "2026-05-23T13:48:29.023383",
"loss": 0.0857,
"grad_norm": 0.910408616065979,
"learning_rate": 6.1094115925887235e-06,
"step_time_sec": 114.4,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3155,
"epoch": 0.8931983862976857,
"wallclock": "2026-05-23T13:50:23.390785",
"loss": 0.0891,
"grad_norm": 0.9833782315254211,
"learning_rate": 6.098232083296423e-06,
"step_time_sec": 114.37
},
{
"step": 3160,
"epoch": 0.8946139146436407,
"wallclock": "2026-05-23T13:52:17.597934",
"loss": 0.0749,
"grad_norm": 1.2741199731826782,
"learning_rate": 6.087046802367476e-06,
"step_time_sec": 114.21
},
{
"step": 3165,
"epoch": 0.8960294429895959,
"wallclock": "2026-05-23T13:54:11.564969",
"loss": 0.0801,
"grad_norm": 1.4001483917236328,
"learning_rate": 6.075855808584886e-06,
"step_time_sec": 113.97
},
{
"step": 3170,
"epoch": 0.897444971335551,
"wallclock": "2026-05-23T13:56:05.962888",
"loss": 0.0825,
"grad_norm": 0.909929633140564,
"learning_rate": 6.064659160761676e-06,
"step_time_sec": 114.4
},
{
"step": 3175,
"epoch": 0.8988604996815062,
"wallclock": "2026-05-23T13:58:01.036489",
"loss": 0.0584,
"grad_norm": 0.8718348145484924,
"learning_rate": 6.053456917740585e-06,
"step_time_sec": 115.07
},
{
"step": 3180,
"epoch": 0.9002760280274612,
"wallclock": "2026-05-23T13:59:54.455386",
"loss": 0.0979,
"grad_norm": 1.4148125648498535,
"learning_rate": 6.042249138393753e-06,
"step_time_sec": 113.42
},
{
"step": 3185,
"epoch": 0.9016915563734164,
"wallclock": "2026-05-23T14:01:49.207549",
"loss": 0.0914,
"grad_norm": 0.9834646582603455,
"learning_rate": 6.031035881622422e-06,
"step_time_sec": 114.75
},
{
"step": 3190,
"epoch": 0.9031070847193715,
"wallclock": "2026-05-23T14:03:45.252526",
"loss": 0.1002,
"grad_norm": 1.3153408765792847,
"learning_rate": 6.019817206356615e-06,
"step_time_sec": 116.04
},
{
"step": 3195,
"epoch": 0.9045226130653267,
"wallclock": "2026-05-23T14:05:40.611460",
"loss": 0.0856,
"grad_norm": 0.9440031051635742,
"learning_rate": 6.008593171554833e-06,
"step_time_sec": 115.36
},
{
"step": 3200,
"epoch": 0.9059381414112817,
"wallclock": "2026-05-23T14:07:53.973146",
"loss": 0.0969,
"grad_norm": 1.2231155633926392,
"learning_rate": 5.997363836203744e-06,
"step_time_sec": 133.36,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3200,
"epoch": 0.9059381414112817,
"wallclock": "2026-05-23T14:09:02.006751",
"eval_loss": 0.09937935322523117,
"eval_runtime": 67.9272,
"eval_samples_per_second": 3.68,
"eval_steps_per_second": 0.927,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3205,
"epoch": 0.9073536697572369,
"wallclock": "2026-05-23T14:12:39.937912",
"loss": 0.1062,
"grad_norm": 1.7902098894119263,
"learning_rate": 5.98612925931787e-06,
"step_time_sec": 285.96
},
{
"step": 3210,
"epoch": 0.908769198103192,
"wallclock": "2026-05-23T14:14:33.769972",
"loss": 0.0924,
"grad_norm": 1.207891583442688,
"learning_rate": 5.974889499939283e-06,
"step_time_sec": 113.83
},
{
"step": 3215,
"epoch": 0.9101847264491472,
"wallclock": "2026-05-23T14:16:28.806370",
"loss": 0.0685,
"grad_norm": 1.0537505149841309,
"learning_rate": 5.96364461713729e-06,
"step_time_sec": 115.04
},
{
"step": 3220,
"epoch": 0.9116002547951023,
"wallclock": "2026-05-23T14:18:23.631952",
"loss": 0.0881,
"grad_norm": 1.5671195983886719,
"learning_rate": 5.952394670008119e-06,
"step_time_sec": 114.83
},
{
"step": 3225,
"epoch": 0.9130157831410574,
"wallclock": "2026-05-23T14:20:17.612828",
"loss": 0.1033,
"grad_norm": 1.521396517753601,
"learning_rate": 5.94113971767462e-06,
"step_time_sec": 113.98
},
{
"step": 3230,
"epoch": 0.9144313114870125,
"wallclock": "2026-05-23T14:22:11.445585",
"loss": 0.0618,
"grad_norm": 0.9208618402481079,
"learning_rate": 5.9298798192859434e-06,
"step_time_sec": 113.83
},
{
"step": 3235,
"epoch": 0.9158468398329677,
"wallclock": "2026-05-23T14:24:06.144035",
"loss": 0.0873,
"grad_norm": 1.1370309591293335,
"learning_rate": 5.9186150340172325e-06,
"step_time_sec": 114.7
},
{
"step": 3240,
"epoch": 0.9172623681789228,
"wallclock": "2026-05-23T14:25:59.240016",
"loss": 0.0803,
"grad_norm": 1.02957022190094,
"learning_rate": 5.907345421069314e-06,
"step_time_sec": 113.1
},
{
"step": 3245,
"epoch": 0.9186778965248779,
"wallclock": "2026-05-23T14:27:52.197012",
"loss": 0.0811,
"grad_norm": 1.390236496925354,
"learning_rate": 5.896071039668388e-06,
"step_time_sec": 112.96
},
{
"step": 3250,
"epoch": 0.920093424870833,
"wallclock": "2026-05-23T14:29:46.705683",
"loss": 0.0873,
"grad_norm": 1.451936960220337,
"learning_rate": 5.8847919490657114e-06,
"step_time_sec": 114.51,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3255,
"epoch": 0.9215089532167882,
"wallclock": "2026-05-23T14:31:40.040364",
"loss": 0.087,
"grad_norm": 1.092909574508667,
"learning_rate": 5.873508208537291e-06,
"step_time_sec": 113.33
},
{
"step": 3260,
"epoch": 0.9229244815627433,
"wallclock": "2026-05-23T14:33:33.153697",
"loss": 0.0701,
"grad_norm": 1.0500355958938599,
"learning_rate": 5.8622198773835725e-06,
"step_time_sec": 113.11
},
{
"step": 3265,
"epoch": 0.9243400099086985,
"wallclock": "2026-05-23T14:35:26.975118",
"loss": 0.0858,
"grad_norm": 1.4215220212936401,
"learning_rate": 5.850927014929124e-06,
"step_time_sec": 113.82
},
{
"step": 3270,
"epoch": 0.9257555382546535,
"wallclock": "2026-05-23T14:37:21.438405",
"loss": 0.0904,
"grad_norm": 1.1870381832122803,
"learning_rate": 5.83962968052233e-06,
"step_time_sec": 114.46
},
{
"step": 3275,
"epoch": 0.9271710666006087,
"wallclock": "2026-05-23T14:39:15.451984",
"loss": 0.0791,
"grad_norm": 0.9800876379013062,
"learning_rate": 5.828327933535075e-06,
"step_time_sec": 114.01
},
{
"step": 3280,
"epoch": 0.9285865949465638,
"wallclock": "2026-05-23T14:41:09.339469",
"loss": 0.0825,
"grad_norm": 1.2808606624603271,
"learning_rate": 5.817021833362434e-06,
"step_time_sec": 113.89
},
{
"step": 3285,
"epoch": 0.930002123292519,
"wallclock": "2026-05-23T14:43:02.863576",
"loss": 0.1006,
"grad_norm": 0.8630105257034302,
"learning_rate": 5.805711439422361e-06,
"step_time_sec": 113.52
},
{
"step": 3290,
"epoch": 0.931417651638474,
"wallclock": "2026-05-23T14:44:56.304812",
"loss": 0.0951,
"grad_norm": 1.7691140174865723,
"learning_rate": 5.794396811155372e-06,
"step_time_sec": 113.44
},
{
"step": 3295,
"epoch": 0.9328331799844292,
"wallclock": "2026-05-23T14:46:51.225086",
"loss": 0.0859,
"grad_norm": 1.175764799118042,
"learning_rate": 5.78307800802424e-06,
"step_time_sec": 114.92
},
{
"step": 3300,
"epoch": 0.9342487083303843,
"wallclock": "2026-05-23T14:48:45.011673",
"loss": 0.0789,
"grad_norm": 1.243912696838379,
"learning_rate": 5.771755089513678e-06,
"step_time_sec": 113.79,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3300,
"epoch": 0.9342487083303843,
"wallclock": "2026-05-23T14:49:37.428370",
"eval_loss": 0.09591619670391083,
"eval_runtime": 52.3234,
"eval_samples_per_second": 4.778,
"eval_steps_per_second": 1.204,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3305,
"epoch": 0.9356642366763395,
"wallclock": "2026-05-23T14:53:13.024206",
"loss": 0.0852,
"grad_norm": 1.0862802267074585,
"learning_rate": 5.760428115130021e-06,
"step_time_sec": 268.01
},
{
"step": 3310,
"epoch": 0.9370797650222946,
"wallclock": "2026-05-23T14:55:06.032103",
"loss": 0.0869,
"grad_norm": 1.217329502105713,
"learning_rate": 5.749097144400929e-06,
"step_time_sec": 113.01
},
{
"step": 3315,
"epoch": 0.9384952933682497,
"wallclock": "2026-05-23T14:57:00.935813",
"loss": 0.0695,
"grad_norm": 0.9838262796401978,
"learning_rate": 5.737762236875057e-06,
"step_time_sec": 114.9
},
{
"step": 3320,
"epoch": 0.9399108217142048,
"wallclock": "2026-05-23T14:58:55.441455",
"loss": 0.0866,
"grad_norm": 1.0086387395858765,
"learning_rate": 5.726423452121751e-06,
"step_time_sec": 114.51
},
{
"step": 3325,
"epoch": 0.94132635006016,
"wallclock": "2026-05-23T15:00:49.538622",
"loss": 0.0939,
"grad_norm": 1.14065420627594,
"learning_rate": 5.7150808497307345e-06,
"step_time_sec": 114.1
},
{
"step": 3330,
"epoch": 0.9427418784061151,
"wallclock": "2026-05-23T15:02:45.245821",
"loss": 0.0974,
"grad_norm": 1.3234528303146362,
"learning_rate": 5.7037344893117956e-06,
"step_time_sec": 115.71
},
{
"step": 3335,
"epoch": 0.9441574067520702,
"wallclock": "2026-05-23T15:04:37.650997",
"loss": 0.0788,
"grad_norm": 1.4045474529266357,
"learning_rate": 5.692384430494466e-06,
"step_time_sec": 112.41
},
{
"step": 3340,
"epoch": 0.9455729350980253,
"wallclock": "2026-05-23T15:06:32.368133",
"loss": 0.0849,
"grad_norm": 1.256629228591919,
"learning_rate": 5.6810307329277226e-06,
"step_time_sec": 114.72
},
{
"step": 3345,
"epoch": 0.9469884634439805,
"wallclock": "2026-05-23T15:08:26.827602",
"loss": 0.0824,
"grad_norm": 1.130339503288269,
"learning_rate": 5.669673456279659e-06,
"step_time_sec": 114.46
},
{
"step": 3350,
"epoch": 0.9484039917899356,
"wallclock": "2026-05-23T15:10:21.207468",
"loss": 0.0693,
"grad_norm": 1.282491683959961,
"learning_rate": 5.65831266023718e-06,
"step_time_sec": 114.38,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3355,
"epoch": 0.9498195201358908,
"wallclock": "2026-05-23T15:12:15.228993",
"loss": 0.0891,
"grad_norm": 1.3946020603179932,
"learning_rate": 5.646948404505686e-06,
"step_time_sec": 114.02
},
{
"step": 3360,
"epoch": 0.9512350484818458,
"wallclock": "2026-05-23T15:14:09.438263",
"loss": 0.0761,
"grad_norm": 1.1103034019470215,
"learning_rate": 5.635580748808763e-06,
"step_time_sec": 114.21
},
{
"step": 3365,
"epoch": 0.952650576827801,
"wallclock": "2026-05-23T15:16:05.154445",
"loss": 0.0839,
"grad_norm": 1.1873400211334229,
"learning_rate": 5.624209752887858e-06,
"step_time_sec": 115.72
},
{
"step": 3370,
"epoch": 0.9540661051737561,
"wallclock": "2026-05-23T15:17:59.804004",
"loss": 0.0777,
"grad_norm": 0.7737529277801514,
"learning_rate": 5.612835476501979e-06,
"step_time_sec": 114.65
},
{
"step": 3375,
"epoch": 0.9554816335197113,
"wallclock": "2026-05-23T15:19:54.892005",
"loss": 0.0812,
"grad_norm": 0.9554314613342285,
"learning_rate": 5.601457979427369e-06,
"step_time_sec": 115.09
},
{
"step": 3380,
"epoch": 0.9568971618656663,
"wallclock": "2026-05-23T15:21:50.036983",
"loss": 0.0792,
"grad_norm": 1.1392946243286133,
"learning_rate": 5.5900773214572016e-06,
"step_time_sec": 115.14
},
{
"step": 3385,
"epoch": 0.9583126902116215,
"wallclock": "2026-05-23T15:23:43.201325",
"loss": 0.0727,
"grad_norm": 1.3224341869354248,
"learning_rate": 5.578693562401257e-06,
"step_time_sec": 113.16
},
{
"step": 3390,
"epoch": 0.9597282185575766,
"wallclock": "2026-05-23T15:25:36.809850",
"loss": 0.0755,
"grad_norm": 1.0473873615264893,
"learning_rate": 5.567306762085619e-06,
"step_time_sec": 113.61
},
{
"step": 3395,
"epoch": 0.9611437469035318,
"wallclock": "2026-05-23T15:27:31.712929",
"loss": 0.086,
"grad_norm": 0.9381260871887207,
"learning_rate": 5.555916980352349e-06,
"step_time_sec": 114.9
},
{
"step": 3400,
"epoch": 0.9625592752494869,
"wallclock": "2026-05-23T15:29:26.406120",
"loss": 0.0665,
"grad_norm": 1.002871036529541,
"learning_rate": 5.544524277059179e-06,
"step_time_sec": 114.69,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3400,
"epoch": 0.9625592752494869,
"wallclock": "2026-05-23T15:30:19.956625",
"eval_loss": 0.09332611411809921,
"eval_runtime": 53.4588,
"eval_samples_per_second": 4.676,
"eval_steps_per_second": 1.178,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3405,
"epoch": 0.963974803595442,
"wallclock": "2026-05-23T15:33:54.931063",
"loss": 0.0858,
"grad_norm": 1.1658086776733398,
"learning_rate": 5.5331287120791954e-06,
"step_time_sec": 268.52
},
{
"step": 3410,
"epoch": 0.9653903319413971,
"wallclock": "2026-05-23T15:35:48.621597",
"loss": 0.0581,
"grad_norm": 0.7146378755569458,
"learning_rate": 5.5217303453005225e-06,
"step_time_sec": 113.69
},
{
"step": 3415,
"epoch": 0.9668058602873523,
"wallclock": "2026-05-23T15:37:42.046818",
"loss": 0.0755,
"grad_norm": 1.1899656057357788,
"learning_rate": 5.51032923662601e-06,
"step_time_sec": 113.43
},
{
"step": 3420,
"epoch": 0.9682213886333074,
"wallclock": "2026-05-23T15:39:35.748693",
"loss": 0.0946,
"grad_norm": 1.0844637155532837,
"learning_rate": 5.498925445972918e-06,
"step_time_sec": 113.7
},
{
"step": 3425,
"epoch": 0.9696369169792625,
"wallclock": "2026-05-23T15:41:30.464137",
"loss": 0.0793,
"grad_norm": 1.2279070615768433,
"learning_rate": 5.4875190332726e-06,
"step_time_sec": 114.72
},
{
"step": 3430,
"epoch": 0.9710524453252176,
"wallclock": "2026-05-23T15:43:26.510045",
"loss": 0.0798,
"grad_norm": 0.8382053971290588,
"learning_rate": 5.476110058470192e-06,
"step_time_sec": 116.05
},
{
"step": 3435,
"epoch": 0.9724679736711728,
"wallclock": "2026-05-23T15:45:21.432258",
"loss": 0.0813,
"grad_norm": 0.9018872976303101,
"learning_rate": 5.464698581524292e-06,
"step_time_sec": 114.92
},
{
"step": 3440,
"epoch": 0.9738835020171279,
"wallclock": "2026-05-23T15:47:17.457363",
"loss": 0.0759,
"grad_norm": 1.4535553455352783,
"learning_rate": 5.453284662406646e-06,
"step_time_sec": 116.03
},
{
"step": 3445,
"epoch": 0.9752990303630831,
"wallclock": "2026-05-23T15:49:11.750836",
"loss": 0.0817,
"grad_norm": 1.2279826402664185,
"learning_rate": 5.4418683611018416e-06,
"step_time_sec": 114.29
},
{
"step": 3450,
"epoch": 0.9767145587090381,
"wallclock": "2026-05-23T15:51:05.914914",
"loss": 0.074,
"grad_norm": 1.2694281339645386,
"learning_rate": 5.430449737606978e-06,
"step_time_sec": 114.16,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3455,
"epoch": 0.9781300870549933,
"wallclock": "2026-05-23T15:53:02.864640",
"loss": 0.088,
"grad_norm": 1.1676980257034302,
"learning_rate": 5.4190288519313626e-06,
"step_time_sec": 116.95
},
{
"step": 3460,
"epoch": 0.9795456154009484,
"wallclock": "2026-05-23T15:54:57.888514",
"loss": 0.0917,
"grad_norm": 1.2617217302322388,
"learning_rate": 5.407605764096193e-06,
"step_time_sec": 115.02
},
{
"step": 3465,
"epoch": 0.9809611437469036,
"wallclock": "2026-05-23T15:56:52.536839",
"loss": 0.0839,
"grad_norm": 2.16770339012146,
"learning_rate": 5.396180534134234e-06,
"step_time_sec": 114.65
},
{
"step": 3470,
"epoch": 0.9823766720928586,
"wallclock": "2026-05-23T15:58:47.764833",
"loss": 0.077,
"grad_norm": 1.0116336345672607,
"learning_rate": 5.384753222089515e-06,
"step_time_sec": 115.23
},
{
"step": 3475,
"epoch": 0.9837922004388138,
"wallclock": "2026-05-23T16:00:43.362477",
"loss": 0.0812,
"grad_norm": 1.185133457183838,
"learning_rate": 5.373323888017003e-06,
"step_time_sec": 115.6
},
{
"step": 3480,
"epoch": 0.9852077287847689,
"wallclock": "2026-05-23T16:02:37.887940",
"loss": 0.0719,
"grad_norm": 1.3264069557189941,
"learning_rate": 5.361892591982291e-06,
"step_time_sec": 114.53
},
{
"step": 3485,
"epoch": 0.9866232571307241,
"wallclock": "2026-05-23T16:04:31.882696",
"loss": 0.064,
"grad_norm": 0.7329959273338318,
"learning_rate": 5.350459394061287e-06,
"step_time_sec": 113.99
},
{
"step": 3490,
"epoch": 0.9880387854766791,
"wallclock": "2026-05-23T16:06:25.692519",
"loss": 0.0819,
"grad_norm": 0.8542604446411133,
"learning_rate": 5.339024354339892e-06,
"step_time_sec": 113.81
},
{
"step": 3495,
"epoch": 0.9894543138226343,
"wallclock": "2026-05-23T16:08:20.147221",
"loss": 0.0867,
"grad_norm": 1.266552448272705,
"learning_rate": 5.327587532913685e-06,
"step_time_sec": 114.45
},
{
"step": 3500,
"epoch": 0.9908698421685894,
"wallclock": "2026-05-23T16:10:14.094051",
"loss": 0.0898,
"grad_norm": 1.8799265623092651,
"learning_rate": 5.31614898988761e-06,
"step_time_sec": 113.95,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3500,
"epoch": 0.9908698421685894,
"wallclock": "2026-05-23T16:11:07.029960",
"eval_loss": 0.08754169940948486,
"eval_runtime": 52.8398,
"eval_samples_per_second": 4.731,
"eval_steps_per_second": 1.192,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3505,
"epoch": 0.9922853705145446,
"wallclock": "2026-05-23T16:14:43.226754",
"loss": 0.0769,
"grad_norm": 1.471697211265564,
"learning_rate": 5.3047087853756585e-06,
"step_time_sec": 269.13
},
{
"step": 3510,
"epoch": 0.9937008988604997,
"wallclock": "2026-05-23T16:16:37.120854",
"loss": 0.0858,
"grad_norm": 1.3194319009780884,
"learning_rate": 5.2932669795005545e-06,
"step_time_sec": 113.89
},
{
"step": 3515,
"epoch": 0.9951164272064548,
"wallclock": "2026-05-23T16:18:30.428191",
"loss": 0.0649,
"grad_norm": 1.7350393533706665,
"learning_rate": 5.281823632393436e-06,
"step_time_sec": 113.31
},
{
"step": 3520,
"epoch": 0.9965319555524099,
"wallclock": "2026-05-23T16:20:22.921865",
"loss": 0.08,
"grad_norm": 1.3547072410583496,
"learning_rate": 5.270378804193543e-06,
"step_time_sec": 112.49
},
{
"step": 3525,
"epoch": 0.9979474838983651,
"wallclock": "2026-05-23T16:22:17.038164",
"loss": 0.0836,
"grad_norm": 1.2849969863891602,
"learning_rate": 5.258932555047897e-06,
"step_time_sec": 114.12
},
{
"step": 3530,
"epoch": 0.9993630122443202,
"wallclock": "2026-05-23T16:24:11.663981",
"loss": 0.0811,
"grad_norm": 0.9789690971374512,
"learning_rate": 5.247484945110988e-06,
"step_time_sec": 114.63
},
{
"step": 3535,
"epoch": 1.0007785405902754,
"wallclock": "2026-05-23T16:26:14.307733",
"loss": 0.0578,
"grad_norm": 0.6540358066558838,
"learning_rate": 5.23603603454446e-06,
"step_time_sec": 122.64
},
{
"step": 3540,
"epoch": 1.0021940689362305,
"wallclock": "2026-05-23T16:28:05.823018",
"loss": 0.0478,
"grad_norm": 0.8033650517463684,
"learning_rate": 5.2245858835167854e-06,
"step_time_sec": 111.52
},
{
"step": 3545,
"epoch": 1.0036095972821857,
"wallclock": "2026-05-23T16:29:56.996787",
"loss": 0.0555,
"grad_norm": 1.4636964797973633,
"learning_rate": 5.213134552202963e-06,
"step_time_sec": 111.17
},
{
"step": 3550,
"epoch": 1.0050251256281406,
"wallclock": "2026-05-23T16:31:49.397682",
"loss": 0.0424,
"grad_norm": 0.8096024990081787,
"learning_rate": 5.201682100784194e-06,
"step_time_sec": 112.4,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3555,
"epoch": 1.0064406539740958,
"wallclock": "2026-05-23T16:33:40.456471",
"loss": 0.0427,
"grad_norm": 1.144333839416504,
"learning_rate": 5.190228589447559e-06,
"step_time_sec": 111.06
},
{
"step": 3560,
"epoch": 1.007856182320051,
"wallclock": "2026-05-23T16:35:31.855205",
"loss": 0.0432,
"grad_norm": 1.8258119821548462,
"learning_rate": 5.1787740783857164e-06,
"step_time_sec": 111.4
},
{
"step": 3565,
"epoch": 1.009271710666006,
"wallclock": "2026-05-23T16:37:24.317151",
"loss": 0.0428,
"grad_norm": 1.1291868686676025,
"learning_rate": 5.167318627796577e-06,
"step_time_sec": 112.46
},
{
"step": 3570,
"epoch": 1.0106872390119612,
"wallclock": "2026-05-23T16:39:16.531936",
"loss": 0.0518,
"grad_norm": 2.1567795276641846,
"learning_rate": 5.155862297882985e-06,
"step_time_sec": 112.21
},
{
"step": 3575,
"epoch": 1.0121027673579164,
"wallclock": "2026-05-23T16:41:08.942223",
"loss": 0.045,
"grad_norm": 1.2312395572662354,
"learning_rate": 5.1444051488524115e-06,
"step_time_sec": 112.41
},
{
"step": 3580,
"epoch": 1.0135182957038715,
"wallclock": "2026-05-23T16:43:01.368164",
"loss": 0.0472,
"grad_norm": 1.38804030418396,
"learning_rate": 5.13294724091663e-06,
"step_time_sec": 112.43
},
{
"step": 3585,
"epoch": 1.0149338240498267,
"wallclock": "2026-05-23T16:44:53.319138",
"loss": 0.052,
"grad_norm": 1.0492668151855469,
"learning_rate": 5.1214886342914e-06,
"step_time_sec": 111.95
},
{
"step": 3590,
"epoch": 1.0163493523957818,
"wallclock": "2026-05-23T16:46:44.915175",
"loss": 0.0447,
"grad_norm": 0.9781032204627991,
"learning_rate": 5.110029389196155e-06,
"step_time_sec": 111.6
},
{
"step": 3595,
"epoch": 1.0177648807417368,
"wallclock": "2026-05-23T16:48:37.406030",
"loss": 0.0549,
"grad_norm": 1.2402184009552002,
"learning_rate": 5.0985695658536875e-06,
"step_time_sec": 112.49
},
{
"step": 3600,
"epoch": 1.019180409087692,
"wallclock": "2026-05-23T16:50:28.649642",
"loss": 0.0433,
"grad_norm": 1.0172066688537598,
"learning_rate": 5.08710922448982e-06,
"step_time_sec": 111.24,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3600,
"epoch": 1.019180409087692,
"wallclock": "2026-05-23T16:51:23.450008",
"eval_loss": 0.08236898481845856,
"eval_runtime": 54.7048,
"eval_samples_per_second": 4.57,
"eval_steps_per_second": 1.152,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3605,
"epoch": 1.020595937433647,
"wallclock": "2026-05-23T16:55:03.292833",
"loss": 0.0511,
"grad_norm": 1.146644949913025,
"learning_rate": 5.0756484253331075e-06,
"step_time_sec": 274.64
},
{
"step": 3610,
"epoch": 1.0220114657796022,
"wallclock": "2026-05-23T16:56:56.732361",
"loss": 0.0483,
"grad_norm": 1.9536317586898804,
"learning_rate": 5.0641872286145025e-06,
"step_time_sec": 113.44
},
{
"step": 3615,
"epoch": 1.0234269941255574,
"wallclock": "2026-05-23T16:58:49.705561",
"loss": 0.0479,
"grad_norm": 0.8863971829414368,
"learning_rate": 5.052725694567052e-06,
"step_time_sec": 112.97
},
{
"step": 3620,
"epoch": 1.0248425224715125,
"wallclock": "2026-05-23T17:00:43.544919",
"loss": 0.0442,
"grad_norm": 0.6922377943992615,
"learning_rate": 5.0412638834255755e-06,
"step_time_sec": 113.84
},
{
"step": 3625,
"epoch": 1.0262580508174677,
"wallclock": "2026-05-23T17:02:36.269444",
"loss": 0.0559,
"grad_norm": 1.2457826137542725,
"learning_rate": 5.029801855426345e-06,
"step_time_sec": 112.72
},
{
"step": 3630,
"epoch": 1.0276735791634228,
"wallclock": "2026-05-23T17:04:30.429515",
"loss": 0.0469,
"grad_norm": 1.0091979503631592,
"learning_rate": 5.018339670806775e-06,
"step_time_sec": 114.16
},
{
"step": 3635,
"epoch": 1.0290891075093778,
"wallclock": "2026-05-23T17:06:23.820278",
"loss": 0.0491,
"grad_norm": 1.115814208984375,
"learning_rate": 5.006877389805106e-06,
"step_time_sec": 113.39
},
{
"step": 3640,
"epoch": 1.030504635855333,
"wallclock": "2026-05-23T17:08:16.705614",
"loss": 0.0435,
"grad_norm": 1.3016657829284668,
"learning_rate": 4.995415072660077e-06,
"step_time_sec": 112.89
},
{
"step": 3645,
"epoch": 1.031920164201288,
"wallclock": "2026-05-23T17:10:11.282102",
"loss": 0.0492,
"grad_norm": 1.312011957168579,
"learning_rate": 4.983952779610626e-06,
"step_time_sec": 114.58
},
{
"step": 3650,
"epoch": 1.0333356925472432,
"wallclock": "2026-05-23T17:12:06.127157",
"loss": 0.0436,
"grad_norm": 0.9364621639251709,
"learning_rate": 4.9724905708955575e-06,
"step_time_sec": 114.85,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3655,
"epoch": 1.0347512208931984,
"wallclock": "2026-05-23T17:14:00.741230",
"loss": 0.0565,
"grad_norm": 1.8892085552215576,
"learning_rate": 4.9610285067532345e-06,
"step_time_sec": 114.61
},
{
"step": 3660,
"epoch": 1.0361667492391535,
"wallclock": "2026-05-23T17:15:55.594162",
"loss": 0.0476,
"grad_norm": 0.8621354103088379,
"learning_rate": 4.949566647421264e-06,
"step_time_sec": 114.85
},
{
"step": 3665,
"epoch": 1.0375822775851087,
"wallclock": "2026-05-23T17:17:49.683934",
"loss": 0.0406,
"grad_norm": 0.8497494459152222,
"learning_rate": 4.938105053136173e-06,
"step_time_sec": 114.09
},
{
"step": 3670,
"epoch": 1.0389978059310638,
"wallclock": "2026-05-23T17:19:44.142869",
"loss": 0.0365,
"grad_norm": 1.4974132776260376,
"learning_rate": 4.926643784133095e-06,
"step_time_sec": 114.46
},
{
"step": 3675,
"epoch": 1.040413334277019,
"wallclock": "2026-05-23T17:21:38.802035",
"loss": 0.0487,
"grad_norm": 0.9692957997322083,
"learning_rate": 4.915182900645454e-06,
"step_time_sec": 114.66
},
{
"step": 3680,
"epoch": 1.0418288626229741,
"wallclock": "2026-05-23T17:23:32.339493",
"loss": 0.0506,
"grad_norm": 1.0823785066604614,
"learning_rate": 4.903722462904653e-06,
"step_time_sec": 113.54
},
{
"step": 3685,
"epoch": 1.043244390968929,
"wallclock": "2026-05-23T17:25:27.273367",
"loss": 0.0385,
"grad_norm": 0.6259887218475342,
"learning_rate": 4.892262531139747e-06,
"step_time_sec": 114.93
},
{
"step": 3690,
"epoch": 1.0446599193148842,
"wallclock": "2026-05-23T17:27:22.317617",
"loss": 0.0461,
"grad_norm": 0.6526616811752319,
"learning_rate": 4.880803165577132e-06,
"step_time_sec": 115.04
},
{
"step": 3695,
"epoch": 1.0460754476608394,
"wallclock": "2026-05-23T17:29:17.202916",
"loss": 0.0497,
"grad_norm": 1.2579582929611206,
"learning_rate": 4.869344426440234e-06,
"step_time_sec": 114.89
},
{
"step": 3700,
"epoch": 1.0474909760067945,
"wallclock": "2026-05-23T17:31:10.559777",
"loss": 0.0453,
"grad_norm": 1.414987325668335,
"learning_rate": 4.857886373949179e-06,
"step_time_sec": 113.36,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3700,
"epoch": 1.0474909760067945,
"wallclock": "2026-05-23T17:32:02.738420",
"eval_loss": 0.08643540740013123,
"eval_runtime": 52.0828,
"eval_samples_per_second": 4.8,
"eval_steps_per_second": 1.21,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3705,
"epoch": 1.0489065043527497,
"wallclock": "2026-05-23T17:35:38.532817",
"loss": 0.0536,
"grad_norm": 1.0652250051498413,
"learning_rate": 4.846429068320488e-06,
"step_time_sec": 267.97
},
{
"step": 3710,
"epoch": 1.0503220326987048,
"wallclock": "2026-05-23T17:37:30.356092",
"loss": 0.0482,
"grad_norm": 1.0449877977371216,
"learning_rate": 4.834972569766762e-06,
"step_time_sec": 111.82
},
{
"step": 3715,
"epoch": 1.05173756104466,
"wallclock": "2026-05-23T17:39:23.408283",
"loss": 0.0397,
"grad_norm": 0.9513642191886902,
"learning_rate": 4.823516938496352e-06,
"step_time_sec": 113.05
},
{
"step": 3720,
"epoch": 1.0531530893906151,
"wallclock": "2026-05-23T17:41:16.527974",
"loss": 0.0328,
"grad_norm": 1.774491548538208,
"learning_rate": 4.812062234713054e-06,
"step_time_sec": 113.12
},
{
"step": 3725,
"epoch": 1.05456861773657,
"wallclock": "2026-05-23T17:43:08.761213",
"loss": 0.0485,
"grad_norm": 1.2329373359680176,
"learning_rate": 4.800608518615793e-06,
"step_time_sec": 112.23
},
{
"step": 3730,
"epoch": 1.0559841460825252,
"wallclock": "2026-05-23T17:45:02.615351",
"loss": 0.0527,
"grad_norm": 1.060661792755127,
"learning_rate": 4.789155850398301e-06,
"step_time_sec": 113.85
},
{
"step": 3735,
"epoch": 1.0573996744284804,
"wallclock": "2026-05-23T17:46:56.000441",
"loss": 0.0642,
"grad_norm": 1.075607180595398,
"learning_rate": 4.777704290248799e-06,
"step_time_sec": 113.39
},
{
"step": 3740,
"epoch": 1.0588152027744355,
"wallclock": "2026-05-23T17:48:49.574582",
"loss": 0.0388,
"grad_norm": 0.9697294235229492,
"learning_rate": 4.766253898349694e-06,
"step_time_sec": 113.57
},
{
"step": 3745,
"epoch": 1.0602307311203907,
"wallclock": "2026-05-23T17:50:41.983236",
"loss": 0.0409,
"grad_norm": 1.6531593799591064,
"learning_rate": 4.754804734877245e-06,
"step_time_sec": 112.41
},
{
"step": 3750,
"epoch": 1.0616462594663458,
"wallclock": "2026-05-23T17:52:35.437590",
"loss": 0.0355,
"grad_norm": 1.1890569925308228,
"learning_rate": 4.743356860001256e-06,
"step_time_sec": 113.45,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3755,
"epoch": 1.063061787812301,
"wallclock": "2026-05-23T17:54:28.614407",
"loss": 0.0418,
"grad_norm": 1.71039879322052,
"learning_rate": 4.731910333884766e-06,
"step_time_sec": 113.18
},
{
"step": 3760,
"epoch": 1.0644773161582561,
"wallclock": "2026-05-23T17:56:21.627594",
"loss": 0.0414,
"grad_norm": 2.179187774658203,
"learning_rate": 4.720465216683718e-06,
"step_time_sec": 113.01
},
{
"step": 3765,
"epoch": 1.0658928445042113,
"wallclock": "2026-05-23T17:58:15.543251",
"loss": 0.0499,
"grad_norm": 1.6056452989578247,
"learning_rate": 4.70902156854665e-06,
"step_time_sec": 113.92
},
{
"step": 3770,
"epoch": 1.0673083728501664,
"wallclock": "2026-05-23T18:00:08.857393",
"loss": 0.0444,
"grad_norm": 1.382399320602417,
"learning_rate": 4.697579449614389e-06,
"step_time_sec": 113.31
},
{
"step": 3775,
"epoch": 1.0687239011961214,
"wallclock": "2026-05-23T18:02:01.237195",
"loss": 0.0639,
"grad_norm": 1.6109445095062256,
"learning_rate": 4.686138920019717e-06,
"step_time_sec": 112.38
},
{
"step": 3780,
"epoch": 1.0701394295420765,
"wallclock": "2026-05-23T18:03:54.810260",
"loss": 0.0491,
"grad_norm": 1.0975931882858276,
"learning_rate": 4.674700039887062e-06,
"step_time_sec": 113.57
},
{
"step": 3785,
"epoch": 1.0715549578880317,
"wallclock": "2026-05-23T18:05:49.257162",
"loss": 0.0489,
"grad_norm": 0.7262698411941528,
"learning_rate": 4.6632628693321925e-06,
"step_time_sec": 114.45
},
{
"step": 3790,
"epoch": 1.0729704862339868,
"wallclock": "2026-05-23T18:07:42.422565",
"loss": 0.0488,
"grad_norm": 1.3172861337661743,
"learning_rate": 4.651827468461885e-06,
"step_time_sec": 113.17
},
{
"step": 3795,
"epoch": 1.074386014579942,
"wallclock": "2026-05-23T18:09:36.139435",
"loss": 0.0466,
"grad_norm": 1.1381676197052002,
"learning_rate": 4.640393897373614e-06,
"step_time_sec": 113.72
},
{
"step": 3800,
"epoch": 1.0758015429258971,
"wallclock": "2026-05-23T18:11:30.211725",
"loss": 0.0403,
"grad_norm": 1.1041913032531738,
"learning_rate": 4.628962216155249e-06,
"step_time_sec": 114.07,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3800,
"epoch": 1.0758015429258971,
"wallclock": "2026-05-23T18:12:22.307956",
"eval_loss": 0.08441882580518723,
"eval_runtime": 51.9948,
"eval_samples_per_second": 4.808,
"eval_steps_per_second": 1.212,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3805,
"epoch": 1.0772170712718523,
"wallclock": "2026-05-23T18:15:57.451185",
"loss": 0.0379,
"grad_norm": 1.2826017141342163,
"learning_rate": 4.617532484884715e-06,
"step_time_sec": 267.24
},
{
"step": 3810,
"epoch": 1.0786325996178074,
"wallclock": "2026-05-23T18:17:50.121618",
"loss": 0.0338,
"grad_norm": 1.091307282447815,
"learning_rate": 4.606104763629693e-06,
"step_time_sec": 112.67
},
{
"step": 3815,
"epoch": 1.0800481279637624,
"wallclock": "2026-05-23T18:19:44.910197",
"loss": 0.058,
"grad_norm": 1.0848028659820557,
"learning_rate": 4.594679112447307e-06,
"step_time_sec": 114.79
},
{
"step": 3820,
"epoch": 1.0814636563097175,
"wallclock": "2026-05-23T18:21:40.024155",
"loss": 0.049,
"grad_norm": 1.1905133724212646,
"learning_rate": 4.5832555913837925e-06,
"step_time_sec": 115.11
},
{
"step": 3825,
"epoch": 1.0828791846556727,
"wallclock": "2026-05-23T18:23:34.370594",
"loss": 0.067,
"grad_norm": 1.21793532371521,
"learning_rate": 4.571834260474195e-06,
"step_time_sec": 114.35
},
{
"step": 3830,
"epoch": 1.0842947130016278,
"wallclock": "2026-05-23T18:25:28.664784",
"loss": 0.0395,
"grad_norm": 1.1224967241287231,
"learning_rate": 4.560415179742052e-06,
"step_time_sec": 114.29
},
{
"step": 3835,
"epoch": 1.085710241347583,
"wallclock": "2026-05-23T18:27:22.873783",
"loss": 0.0559,
"grad_norm": 0.9353971481323242,
"learning_rate": 4.5489984091990735e-06,
"step_time_sec": 114.21
},
{
"step": 3840,
"epoch": 1.0871257696935381,
"wallclock": "2026-05-23T18:29:17.206871",
"loss": 0.0554,
"grad_norm": 0.6831589937210083,
"learning_rate": 4.537584008844823e-06,
"step_time_sec": 114.33
},
{
"step": 3845,
"epoch": 1.0885412980394933,
"wallclock": "2026-05-23T18:31:12.398612",
"loss": 0.0525,
"grad_norm": 1.0940909385681152,
"learning_rate": 4.526172038666419e-06,
"step_time_sec": 115.19
},
{
"step": 3850,
"epoch": 1.0899568263854484,
"wallclock": "2026-05-23T18:33:06.117469",
"loss": 0.0573,
"grad_norm": 0.8475215435028076,
"learning_rate": 4.514762558638199e-06,
"step_time_sec": 113.72,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3855,
"epoch": 1.0913723547314036,
"wallclock": "2026-05-23T18:34:59.556884",
"loss": 0.0483,
"grad_norm": 1.0420924425125122,
"learning_rate": 4.503355628721417e-06,
"step_time_sec": 113.44
},
{
"step": 3860,
"epoch": 1.0927878830773585,
"wallclock": "2026-05-23T18:36:53.139554",
"loss": 0.0524,
"grad_norm": 1.5134800672531128,
"learning_rate": 4.491951308863926e-06,
"step_time_sec": 113.58
},
{
"step": 3865,
"epoch": 1.0942034114233137,
"wallclock": "2026-05-23T18:38:46.961705",
"loss": 0.0327,
"grad_norm": 1.369831919670105,
"learning_rate": 4.480549658999862e-06,
"step_time_sec": 113.82
},
{
"step": 3870,
"epoch": 1.0956189397692688,
"wallclock": "2026-05-23T18:40:39.105763",
"loss": 0.0467,
"grad_norm": 1.45563542842865,
"learning_rate": 4.469150739049327e-06,
"step_time_sec": 112.14
},
{
"step": 3875,
"epoch": 1.097034468115224,
"wallclock": "2026-05-23T18:42:31.937185",
"loss": 0.0471,
"grad_norm": 0.9477264881134033,
"learning_rate": 4.45775460891808e-06,
"step_time_sec": 112.83
},
{
"step": 3880,
"epoch": 1.0984499964611791,
"wallclock": "2026-05-23T18:44:25.360640",
"loss": 0.0492,
"grad_norm": 0.7854604721069336,
"learning_rate": 4.446361328497215e-06,
"step_time_sec": 113.42
},
{
"step": 3885,
"epoch": 1.0998655248071343,
"wallclock": "2026-05-23T18:46:17.165445",
"loss": 0.0427,
"grad_norm": 0.7942948341369629,
"learning_rate": 4.434970957662849e-06,
"step_time_sec": 111.8
},
{
"step": 3890,
"epoch": 1.1012810531530894,
"wallclock": "2026-05-23T18:48:10.450485",
"loss": 0.043,
"grad_norm": 1.1920311450958252,
"learning_rate": 4.423583556275814e-06,
"step_time_sec": 113.29
},
{
"step": 3895,
"epoch": 1.1026965814990446,
"wallclock": "2026-05-23T18:50:04.687540",
"loss": 0.0502,
"grad_norm": 1.4759620428085327,
"learning_rate": 4.41219918418133e-06,
"step_time_sec": 114.24
},
{
"step": 3900,
"epoch": 1.1041121098449997,
"wallclock": "2026-05-23T18:51:57.525567",
"loss": 0.0514,
"grad_norm": 1.4128731489181519,
"learning_rate": 4.400817901208697e-06,
"step_time_sec": 112.84,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3900,
"epoch": 1.1041121098449997,
"wallclock": "2026-05-23T18:52:50.285022",
"eval_loss": 0.08410802483558655,
"eval_runtime": 52.6672,
"eval_samples_per_second": 4.747,
"eval_steps_per_second": 1.196,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3900,
"epoch": 1.1041121098449997,
"wallclock": "2026-05-23T18:54:34.984762",
"train_runtime": 61042.0543,
"train_samples_per_second": 3.703,
"train_steps_per_second": 0.116,
"total_flos": 1.0157637889163264e+16,
"train_loss": 0.0625501875159068,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 33.45,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
},
{
"step": 3900,
"epoch": 1.1041121098449997,
"wallclock": "2026-05-23T18:55:51.257083",
"eval_loss": 0.08236898481845856,
"eval_runtime": 62.4976,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 1.008,
"gpu": [
{
"gpu": 0,
"mem_allocated_gb": 39.05,
"mem_reserved_gb": 74.24
},
{
"gpu": 1,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 2,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
},
{
"gpu": 3,
"mem_allocated_gb": 0.0,
"mem_reserved_gb": 0.0
}
]
}
]