Text Generation
PEFT
TensorBoard
Safetensors
English
medical
radiology
medical-coding
icd-10
cpt
llama-3
lora
healthcare
conversational
Instructions to use vineetdaniels/NYXMed-V18-Model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use vineetdaniels/NYXMed-V18-Model with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("vineetdaniels/NYXMed-V17-Merged") model = PeftModel.from_pretrained(base_model, "vineetdaniels/NYXMed-V18-Model") - Notebooks
- Google Colab
- Kaggle
| [ | |
| { | |
| "step": 1, | |
| "epoch": 0.0005407963225850064, | |
| "wallclock": "2026-06-24T03:50:12.865305", | |
| "loss": 0.4579, | |
| "grad_norm": 2.3048150539398193, | |
| "learning_rate": 9.00900900900901e-08 | |
| }, | |
| { | |
| "step": 5, | |
| "epoch": 0.002703981612925032, | |
| "wallclock": "2026-06-24T03:51:38.176434", | |
| "loss": 0.3614, | |
| "grad_norm": 1.8860491514205933, | |
| "learning_rate": 4.504504504504505e-07, | |
| "step_time_sec": 85.31 | |
| }, | |
| { | |
| "step": 10, | |
| "epoch": 0.005407963225850064, | |
| "wallclock": "2026-06-24T03:53:23.303779", | |
| "loss": 0.4862, | |
| "grad_norm": 2.008655548095703, | |
| "learning_rate": 9.00900900900901e-07, | |
| "step_time_sec": 105.13 | |
| }, | |
| { | |
| "step": 15, | |
| "epoch": 0.008111944838775096, | |
| "wallclock": "2026-06-24T03:55:09.377081", | |
| "loss": 0.4649, | |
| "grad_norm": 2.2208123207092285, | |
| "learning_rate": 1.3513513513513515e-06, | |
| "step_time_sec": 106.07 | |
| }, | |
| { | |
| "step": 20, | |
| "epoch": 0.010815926451700129, | |
| "wallclock": "2026-06-24T03:56:56.093355", | |
| "loss": 0.4136, | |
| "grad_norm": 1.8327311277389526, | |
| "learning_rate": 1.801801801801802e-06, | |
| "step_time_sec": 106.72 | |
| }, | |
| { | |
| "step": 25, | |
| "epoch": 0.01351990806462516, | |
| "wallclock": "2026-06-24T03:58:40.595516", | |
| "loss": 0.4941, | |
| "grad_norm": 2.889826536178589, | |
| "learning_rate": 2.2522522522522524e-06, | |
| "step_time_sec": 104.5 | |
| }, | |
| { | |
| "step": 30, | |
| "epoch": 0.01622388967755019, | |
| "wallclock": "2026-06-24T04:00:26.401533", | |
| "loss": 0.3599, | |
| "grad_norm": 2.0262131690979004, | |
| "learning_rate": 2.702702702702703e-06, | |
| "step_time_sec": 105.81 | |
| }, | |
| { | |
| "step": 35, | |
| "epoch": 0.018927871290475226, | |
| "wallclock": "2026-06-24T04:02:12.105552", | |
| "loss": 0.2975, | |
| "grad_norm": 1.627108097076416, | |
| "learning_rate": 3.1531531531531532e-06, | |
| "step_time_sec": 105.7 | |
| }, | |
| { | |
| "step": 40, | |
| "epoch": 0.021631852903400257, | |
| "wallclock": "2026-06-24T04:03:56.186851", | |
| "loss": 0.3563, | |
| "grad_norm": 1.6855164766311646, | |
| "learning_rate": 3.603603603603604e-06, | |
| "step_time_sec": 104.08 | |
| }, | |
| { | |
| "step": 45, | |
| "epoch": 0.02433583451632529, | |
| "wallclock": "2026-06-24T04:05:41.990511", | |
| "loss": 0.3596, | |
| "grad_norm": 1.661110520362854, | |
| "learning_rate": 4.0540540540540545e-06, | |
| "step_time_sec": 105.8 | |
| }, | |
| { | |
| "step": 50, | |
| "epoch": 0.02703981612925032, | |
| "wallclock": "2026-06-24T04:07:27.997794", | |
| "loss": 0.267, | |
| "grad_norm": 1.2917487621307373, | |
| "learning_rate": 4.504504504504505e-06, | |
| "step_time_sec": 106.01, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 75.27 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 55, | |
| "epoch": 0.029743797742175355, | |
| "wallclock": "2026-06-24T04:09:12.604118", | |
| "loss": 0.2226, | |
| "grad_norm": 0.9745638370513916, | |
| "learning_rate": 4.954954954954955e-06, | |
| "step_time_sec": 104.61 | |
| }, | |
| { | |
| "step": 60, | |
| "epoch": 0.03244777935510038, | |
| "wallclock": "2026-06-24T04:10:57.892162", | |
| "loss": 0.1948, | |
| "grad_norm": 1.2490293979644775, | |
| "learning_rate": 5.405405405405406e-06, | |
| "step_time_sec": 105.29 | |
| }, | |
| { | |
| "step": 65, | |
| "epoch": 0.03515176096802542, | |
| "wallclock": "2026-06-24T04:12:44.688074", | |
| "loss": 0.2015, | |
| "grad_norm": 0.9993012547492981, | |
| "learning_rate": 5.855855855855856e-06, | |
| "step_time_sec": 106.8 | |
| }, | |
| { | |
| "step": 70, | |
| "epoch": 0.03785574258095045, | |
| "wallclock": "2026-06-24T04:14:30.291858", | |
| "loss": 0.2271, | |
| "grad_norm": 1.3109948635101318, | |
| "learning_rate": 6.3063063063063065e-06, | |
| "step_time_sec": 105.6 | |
| }, | |
| { | |
| "step": 75, | |
| "epoch": 0.040559724193875484, | |
| "wallclock": "2026-06-24T04:16:14.219008", | |
| "loss": 0.157, | |
| "grad_norm": 0.6500820517539978, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "step_time_sec": 103.93 | |
| }, | |
| { | |
| "step": 80, | |
| "epoch": 0.043263705806800515, | |
| "wallclock": "2026-06-24T04:17:59.413498", | |
| "loss": 0.1579, | |
| "grad_norm": 0.8443478345870972, | |
| "learning_rate": 7.207207207207208e-06, | |
| "step_time_sec": 105.19 | |
| }, | |
| { | |
| "step": 85, | |
| "epoch": 0.045967687419725546, | |
| "wallclock": "2026-06-24T04:19:44.517567", | |
| "loss": 0.1641, | |
| "grad_norm": 0.9615593552589417, | |
| "learning_rate": 7.657657657657658e-06, | |
| "step_time_sec": 105.1 | |
| }, | |
| { | |
| "step": 90, | |
| "epoch": 0.04867166903265058, | |
| "wallclock": "2026-06-24T04:21:28.592924", | |
| "loss": 0.1288, | |
| "grad_norm": 0.6482295989990234, | |
| "learning_rate": 8.108108108108109e-06, | |
| "step_time_sec": 104.08 | |
| }, | |
| { | |
| "step": 95, | |
| "epoch": 0.05137565064557561, | |
| "wallclock": "2026-06-24T04:23:14.290954", | |
| "loss": 0.136, | |
| "grad_norm": 0.8641292452812195, | |
| "learning_rate": 8.55855855855856e-06, | |
| "step_time_sec": 105.7 | |
| }, | |
| { | |
| "step": 100, | |
| "epoch": 0.05407963225850064, | |
| "wallclock": "2026-06-24T04:24:58.500503", | |
| "loss": 0.1148, | |
| "grad_norm": 0.7579247355461121, | |
| "learning_rate": 9.00900900900901e-06, | |
| "step_time_sec": 104.21, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 77.06 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 100, | |
| "epoch": 0.05407963225850064, | |
| "wallclock": "2026-06-24T04:26:38.179343", | |
| "eval_loss": 0.09886857122182846, | |
| "eval_runtime": 99.676, | |
| "eval_samples_per_second": 5.016, | |
| "eval_steps_per_second": 1.254, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 77.07 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 105, | |
| "epoch": 0.05678361387142567, | |
| "wallclock": "2026-06-24T04:29:47.276194", | |
| "loss": 0.1357, | |
| "grad_norm": 1.0714149475097656, | |
| "learning_rate": 9.45945945945946e-06, | |
| "step_time_sec": 288.78 | |
| }, | |
| { | |
| "step": 110, | |
| "epoch": 0.05948759548435071, | |
| "wallclock": "2026-06-24T04:31:33.882914", | |
| "loss": 0.1446, | |
| "grad_norm": 0.9079675078392029, | |
| "learning_rate": 9.90990990990991e-06, | |
| "step_time_sec": 106.61 | |
| }, | |
| { | |
| "step": 115, | |
| "epoch": 0.06219157709727574, | |
| "wallclock": "2026-06-24T04:33:20.601103", | |
| "loss": 0.1169, | |
| "grad_norm": 0.7082911729812622, | |
| "learning_rate": 9.999969317090495e-06, | |
| "step_time_sec": 106.72 | |
| }, | |
| { | |
| "step": 120, | |
| "epoch": 0.06489555871020077, | |
| "wallclock": "2026-06-24T04:35:06.071476", | |
| "loss": 0.1294, | |
| "grad_norm": 0.8290165066719055, | |
| "learning_rate": 9.99984466841603e-06, | |
| "step_time_sec": 105.47 | |
| }, | |
| { | |
| "step": 125, | |
| "epoch": 0.0675995403231258, | |
| "wallclock": "2026-06-24T04:36:51.012116", | |
| "loss": 0.0967, | |
| "grad_norm": 3.919275999069214, | |
| "learning_rate": 9.999624138683289e-06, | |
| "step_time_sec": 104.94 | |
| }, | |
| { | |
| "step": 130, | |
| "epoch": 0.07030352193605084, | |
| "wallclock": "2026-06-24T04:38:36.077195", | |
| "loss": 0.1226, | |
| "grad_norm": 0.9278262853622437, | |
| "learning_rate": 9.999307732121325e-06, | |
| "step_time_sec": 105.07 | |
| }, | |
| { | |
| "step": 135, | |
| "epoch": 0.07300750354897587, | |
| "wallclock": "2026-06-24T04:40:20.812264", | |
| "loss": 0.1098, | |
| "grad_norm": 0.7507790923118591, | |
| "learning_rate": 9.998895454797807e-06, | |
| "step_time_sec": 104.74 | |
| }, | |
| { | |
| "step": 140, | |
| "epoch": 0.0757114851619009, | |
| "wallclock": "2026-06-24T04:42:06.675192", | |
| "loss": 0.1013, | |
| "grad_norm": 0.42517712712287903, | |
| "learning_rate": 9.998387314618898e-06, | |
| "step_time_sec": 105.86 | |
| }, | |
| { | |
| "step": 145, | |
| "epoch": 0.07841546677482593, | |
| "wallclock": "2026-06-24T04:43:52.811351", | |
| "loss": 0.1198, | |
| "grad_norm": 0.9795618653297424, | |
| "learning_rate": 9.997783321329104e-06, | |
| "step_time_sec": 106.14 | |
| }, | |
| { | |
| "step": 150, | |
| "epoch": 0.08111944838775097, | |
| "wallclock": "2026-06-24T04:45:37.500620", | |
| "loss": 0.1097, | |
| "grad_norm": 0.8131667375564575, | |
| "learning_rate": 9.997083486511088e-06, | |
| "step_time_sec": 104.69, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 155, | |
| "epoch": 0.08382343000067599, | |
| "wallclock": "2026-06-24T04:47:23.099655", | |
| "loss": 0.1064, | |
| "grad_norm": 1.0449674129486084, | |
| "learning_rate": 9.996287823585446e-06, | |
| "step_time_sec": 105.6 | |
| }, | |
| { | |
| "step": 160, | |
| "epoch": 0.08652741161360103, | |
| "wallclock": "2026-06-24T04:49:08.308222", | |
| "loss": 0.1413, | |
| "grad_norm": 1.2222431898117065, | |
| "learning_rate": 9.995396347810456e-06, | |
| "step_time_sec": 105.21 | |
| }, | |
| { | |
| "step": 165, | |
| "epoch": 0.08923139322652605, | |
| "wallclock": "2026-06-24T04:50:53.614369", | |
| "loss": 0.095, | |
| "grad_norm": 0.6692535877227783, | |
| "learning_rate": 9.994409076281776e-06, | |
| "step_time_sec": 105.31 | |
| }, | |
| { | |
| "step": 170, | |
| "epoch": 0.09193537483945109, | |
| "wallclock": "2026-06-24T04:52:39.312428", | |
| "loss": 0.0987, | |
| "grad_norm": 0.7257323861122131, | |
| "learning_rate": 9.99332602793212e-06, | |
| "step_time_sec": 105.7 | |
| }, | |
| { | |
| "step": 175, | |
| "epoch": 0.09463935645237613, | |
| "wallclock": "2026-06-24T04:54:25.104575", | |
| "loss": 0.1172, | |
| "grad_norm": 0.694538414478302, | |
| "learning_rate": 9.992147223530901e-06, | |
| "step_time_sec": 105.79 | |
| }, | |
| { | |
| "step": 180, | |
| "epoch": 0.09734333806530115, | |
| "wallclock": "2026-06-24T04:56:09.832372", | |
| "loss": 0.0912, | |
| "grad_norm": 0.5451284646987915, | |
| "learning_rate": 9.99087268568382e-06, | |
| "step_time_sec": 104.73 | |
| }, | |
| { | |
| "step": 185, | |
| "epoch": 0.1000473196782262, | |
| "wallclock": "2026-06-24T04:57:55.711549", | |
| "loss": 0.1088, | |
| "grad_norm": 0.7407487034797668, | |
| "learning_rate": 9.989502438832447e-06, | |
| "step_time_sec": 105.88 | |
| }, | |
| { | |
| "step": 190, | |
| "epoch": 0.10275130129115122, | |
| "wallclock": "2026-06-24T04:59:41.200392", | |
| "loss": 0.1331, | |
| "grad_norm": 0.6032689809799194, | |
| "learning_rate": 9.988036509253742e-06, | |
| "step_time_sec": 105.49 | |
| }, | |
| { | |
| "step": 195, | |
| "epoch": 0.10545528290407626, | |
| "wallclock": "2026-06-24T05:01:24.975629", | |
| "loss": 0.091, | |
| "grad_norm": 0.7505941390991211, | |
| "learning_rate": 9.986474925059551e-06, | |
| "step_time_sec": 103.78 | |
| }, | |
| { | |
| "step": 200, | |
| "epoch": 0.10815926451700128, | |
| "wallclock": "2026-06-24T05:03:10.604700", | |
| "loss": 0.1116, | |
| "grad_norm": 0.6309108138084412, | |
| "learning_rate": 9.984817716196075e-06, | |
| "step_time_sec": 105.63, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 200, | |
| "epoch": 0.10815926451700128, | |
| "wallclock": "2026-06-24T05:04:50.512683", | |
| "eval_loss": 0.08997273445129395, | |
| "eval_runtime": 99.9051, | |
| "eval_samples_per_second": 5.005, | |
| "eval_steps_per_second": 1.251, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 205, | |
| "epoch": 0.11086324612992632, | |
| "wallclock": "2026-06-24T05:07:50.102603", | |
| "loss": 0.0916, | |
| "grad_norm": 0.4750153422355652, | |
| "learning_rate": 9.983064914443293e-06, | |
| "step_time_sec": 279.5 | |
| }, | |
| { | |
| "step": 210, | |
| "epoch": 0.11356722774285134, | |
| "wallclock": "2026-06-24T05:09:36.370868", | |
| "loss": 0.1137, | |
| "grad_norm": 0.6414338946342468, | |
| "learning_rate": 9.981216553414342e-06, | |
| "step_time_sec": 106.27 | |
| }, | |
| { | |
| "step": 215, | |
| "epoch": 0.11627120935577638, | |
| "wallclock": "2026-06-24T05:11:30.304900", | |
| "loss": 0.1261, | |
| "grad_norm": 0.7359138131141663, | |
| "learning_rate": 9.979272668554885e-06, | |
| "step_time_sec": 113.93 | |
| }, | |
| { | |
| "step": 220, | |
| "epoch": 0.11897519096870142, | |
| "wallclock": "2026-06-24T05:13:14.700481", | |
| "loss": 0.1016, | |
| "grad_norm": 0.8333423137664795, | |
| "learning_rate": 9.97723329714243e-06, | |
| "step_time_sec": 104.4 | |
| }, | |
| { | |
| "step": 225, | |
| "epoch": 0.12167917258162644, | |
| "wallclock": "2026-06-24T05:14:58.874076", | |
| "loss": 0.1084, | |
| "grad_norm": 0.7175215482711792, | |
| "learning_rate": 9.97509847828561e-06, | |
| "step_time_sec": 104.17 | |
| }, | |
| { | |
| "step": 230, | |
| "epoch": 0.12438315419455148, | |
| "wallclock": "2026-06-24T05:16:44.695629", | |
| "loss": 0.1165, | |
| "grad_norm": 0.5170373320579529, | |
| "learning_rate": 9.972868252923433e-06, | |
| "step_time_sec": 105.82 | |
| }, | |
| { | |
| "step": 235, | |
| "epoch": 0.12708713580747652, | |
| "wallclock": "2026-06-24T05:18:30.420350", | |
| "loss": 0.1014, | |
| "grad_norm": 1.0086610317230225, | |
| "learning_rate": 9.970542663824504e-06, | |
| "step_time_sec": 105.72 | |
| }, | |
| { | |
| "step": 240, | |
| "epoch": 0.12979111742040153, | |
| "wallclock": "2026-06-24T05:20:15.400259", | |
| "loss": 0.102, | |
| "grad_norm": 0.6341211199760437, | |
| "learning_rate": 9.968121755586196e-06, | |
| "step_time_sec": 104.98 | |
| }, | |
| { | |
| "step": 245, | |
| "epoch": 0.13249509903332657, | |
| "wallclock": "2026-06-24T05:22:01.027986", | |
| "loss": 0.1059, | |
| "grad_norm": 0.7365284562110901, | |
| "learning_rate": 9.965605574633798e-06, | |
| "step_time_sec": 105.63 | |
| }, | |
| { | |
| "step": 250, | |
| "epoch": 0.1351990806462516, | |
| "wallclock": "2026-06-24T05:23:47.012942", | |
| "loss": 0.0803, | |
| "grad_norm": 0.6329382061958313, | |
| "learning_rate": 9.96299416921963e-06, | |
| "step_time_sec": 105.98, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 255, | |
| "epoch": 0.13790306225917665, | |
| "wallclock": "2026-06-24T05:25:33.107736", | |
| "loss": 0.1085, | |
| "grad_norm": 0.6384260654449463, | |
| "learning_rate": 9.960287589422111e-06, | |
| "step_time_sec": 106.09 | |
| }, | |
| { | |
| "step": 260, | |
| "epoch": 0.14060704387210168, | |
| "wallclock": "2026-06-24T05:27:19.205894", | |
| "loss": 0.0953, | |
| "grad_norm": 0.7029681205749512, | |
| "learning_rate": 9.957485887144797e-06, | |
| "step_time_sec": 106.1 | |
| }, | |
| { | |
| "step": 265, | |
| "epoch": 0.1433110254850267, | |
| "wallclock": "2026-06-24T05:29:05.191995", | |
| "loss": 0.0855, | |
| "grad_norm": 0.7882628440856934, | |
| "learning_rate": 9.954589116115398e-06, | |
| "step_time_sec": 105.99 | |
| }, | |
| { | |
| "step": 270, | |
| "epoch": 0.14601500709795173, | |
| "wallclock": "2026-06-24T05:30:49.706053", | |
| "loss": 0.0996, | |
| "grad_norm": 0.8211791515350342, | |
| "learning_rate": 9.95159733188473e-06, | |
| "step_time_sec": 104.51 | |
| }, | |
| { | |
| "step": 275, | |
| "epoch": 0.14871898871087677, | |
| "wallclock": "2026-06-24T05:32:37.121576", | |
| "loss": 0.1183, | |
| "grad_norm": 0.8806095719337463, | |
| "learning_rate": 9.948510591825666e-06, | |
| "step_time_sec": 107.42 | |
| }, | |
| { | |
| "step": 280, | |
| "epoch": 0.1514229703238018, | |
| "wallclock": "2026-06-24T05:34:21.610766", | |
| "loss": 0.0954, | |
| "grad_norm": 0.7867270708084106, | |
| "learning_rate": 9.945328955132023e-06, | |
| "step_time_sec": 104.49 | |
| }, | |
| { | |
| "step": 285, | |
| "epoch": 0.15412695193672682, | |
| "wallclock": "2026-06-24T05:36:07.329379", | |
| "loss": 0.0886, | |
| "grad_norm": 0.7445922493934631, | |
| "learning_rate": 9.942052482817436e-06, | |
| "step_time_sec": 105.72 | |
| }, | |
| { | |
| "step": 290, | |
| "epoch": 0.15683093354965186, | |
| "wallclock": "2026-06-24T05:37:53.691605", | |
| "loss": 0.0762, | |
| "grad_norm": 0.4893661141395569, | |
| "learning_rate": 9.938681237714186e-06, | |
| "step_time_sec": 106.36 | |
| }, | |
| { | |
| "step": 295, | |
| "epoch": 0.1595349151625769, | |
| "wallclock": "2026-06-24T05:39:38.617866", | |
| "loss": 0.1037, | |
| "grad_norm": 0.7313506603240967, | |
| "learning_rate": 9.935215284471989e-06, | |
| "step_time_sec": 104.93 | |
| }, | |
| { | |
| "step": 300, | |
| "epoch": 0.16223889677550193, | |
| "wallclock": "2026-06-24T05:41:23.828815", | |
| "loss": 0.0868, | |
| "grad_norm": 0.7617091536521912, | |
| "learning_rate": 9.93165468955676e-06, | |
| "step_time_sec": 105.21, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 300, | |
| "epoch": 0.16223889677550193, | |
| "wallclock": "2026-06-24T05:43:03.826680", | |
| "eval_loss": 0.0827580988407135, | |
| "eval_runtime": 99.9942, | |
| "eval_samples_per_second": 5.0, | |
| "eval_steps_per_second": 1.25, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 305, | |
| "epoch": 0.16494287838842697, | |
| "wallclock": "2026-06-24T05:46:13.578643", | |
| "loss": 0.074, | |
| "grad_norm": 0.4504067301750183, | |
| "learning_rate": 9.927999521249347e-06, | |
| "step_time_sec": 289.75 | |
| }, | |
| { | |
| "step": 310, | |
| "epoch": 0.16764686000135198, | |
| "wallclock": "2026-06-24T05:48:00.303575", | |
| "loss": 0.1073, | |
| "grad_norm": 0.6431950330734253, | |
| "learning_rate": 9.924249849644205e-06, | |
| "step_time_sec": 106.72 | |
| }, | |
| { | |
| "step": 315, | |
| "epoch": 0.17035084161427702, | |
| "wallclock": "2026-06-24T05:49:45.680208", | |
| "loss": 0.1112, | |
| "grad_norm": 0.9043431878089905, | |
| "learning_rate": 9.920405746648067e-06, | |
| "step_time_sec": 105.38 | |
| }, | |
| { | |
| "step": 320, | |
| "epoch": 0.17305482322720206, | |
| "wallclock": "2026-06-24T05:51:32.404247", | |
| "loss": 0.0764, | |
| "grad_norm": 0.6045661568641663, | |
| "learning_rate": 9.916467285978556e-06, | |
| "step_time_sec": 106.72 | |
| }, | |
| { | |
| "step": 325, | |
| "epoch": 0.1757588048401271, | |
| "wallclock": "2026-06-24T05:53:18.181343", | |
| "loss": 0.0956, | |
| "grad_norm": 0.8464241623878479, | |
| "learning_rate": 9.912434543162769e-06, | |
| "step_time_sec": 105.78 | |
| }, | |
| { | |
| "step": 330, | |
| "epoch": 0.1784627864530521, | |
| "wallclock": "2026-06-24T05:55:03.598110", | |
| "loss": 0.1038, | |
| "grad_norm": 0.43105682730674744, | |
| "learning_rate": 9.908307595535842e-06, | |
| "step_time_sec": 105.42 | |
| }, | |
| { | |
| "step": 335, | |
| "epoch": 0.18116676806597715, | |
| "wallclock": "2026-06-24T05:56:50.010277", | |
| "loss": 0.1136, | |
| "grad_norm": 0.4054422676563263, | |
| "learning_rate": 9.904086522239455e-06, | |
| "step_time_sec": 106.41 | |
| }, | |
| { | |
| "step": 340, | |
| "epoch": 0.18387074967890218, | |
| "wallclock": "2026-06-24T05:58:35.002723", | |
| "loss": 0.081, | |
| "grad_norm": 0.7263162732124329, | |
| "learning_rate": 9.899771404220318e-06, | |
| "step_time_sec": 104.99 | |
| }, | |
| { | |
| "step": 345, | |
| "epoch": 0.18657473129182722, | |
| "wallclock": "2026-06-24T06:00:19.132697", | |
| "loss": 0.1075, | |
| "grad_norm": 1.7756342887878418, | |
| "learning_rate": 9.895362324228616e-06, | |
| "step_time_sec": 104.13 | |
| }, | |
| { | |
| "step": 350, | |
| "epoch": 0.18927871290475226, | |
| "wallclock": "2026-06-24T06:02:04.923652", | |
| "loss": 0.0862, | |
| "grad_norm": 0.4385850131511688, | |
| "learning_rate": 9.890859366816429e-06, | |
| "step_time_sec": 105.79, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 355, | |
| "epoch": 0.19198269451767727, | |
| "wallclock": "2026-06-24T06:03:51.291232", | |
| "loss": 0.1074, | |
| "grad_norm": 0.5257728099822998, | |
| "learning_rate": 9.886262618336103e-06, | |
| "step_time_sec": 106.37 | |
| }, | |
| { | |
| "step": 360, | |
| "epoch": 0.1946866761306023, | |
| "wallclock": "2026-06-24T06:05:36.306087", | |
| "loss": 0.1147, | |
| "grad_norm": 0.6658884882926941, | |
| "learning_rate": 9.881572166938598e-06, | |
| "step_time_sec": 105.01 | |
| }, | |
| { | |
| "step": 365, | |
| "epoch": 0.19739065774352735, | |
| "wallclock": "2026-06-24T06:07:22.917361", | |
| "loss": 0.0827, | |
| "grad_norm": 0.5998860597610474, | |
| "learning_rate": 9.876788102571797e-06, | |
| "step_time_sec": 106.61 | |
| }, | |
| { | |
| "step": 370, | |
| "epoch": 0.2000946393564524, | |
| "wallclock": "2026-06-24T06:09:08.111845", | |
| "loss": 0.1197, | |
| "grad_norm": 0.8167080879211426, | |
| "learning_rate": 9.871910516978782e-06, | |
| "step_time_sec": 105.19 | |
| }, | |
| { | |
| "step": 375, | |
| "epoch": 0.2027986209693774, | |
| "wallclock": "2026-06-24T06:10:53.235112", | |
| "loss": 0.1089, | |
| "grad_norm": 0.8197498321533203, | |
| "learning_rate": 9.86693950369607e-06, | |
| "step_time_sec": 105.12 | |
| }, | |
| { | |
| "step": 380, | |
| "epoch": 0.20550260258230243, | |
| "wallclock": "2026-06-24T06:12:39.106309", | |
| "loss": 0.0881, | |
| "grad_norm": 0.5486798882484436, | |
| "learning_rate": 9.861875158051831e-06, | |
| "step_time_sec": 105.87 | |
| }, | |
| { | |
| "step": 385, | |
| "epoch": 0.20820658419522747, | |
| "wallclock": "2026-06-24T06:14:24.784134", | |
| "loss": 0.0849, | |
| "grad_norm": 0.6048823595046997, | |
| "learning_rate": 9.85671757716404e-06, | |
| "step_time_sec": 105.68 | |
| }, | |
| { | |
| "step": 390, | |
| "epoch": 0.2109105658081525, | |
| "wallclock": "2026-06-24T06:16:09.415638", | |
| "loss": 0.1078, | |
| "grad_norm": 0.4019126296043396, | |
| "learning_rate": 9.851466859938637e-06, | |
| "step_time_sec": 104.63 | |
| }, | |
| { | |
| "step": 395, | |
| "epoch": 0.21361454742107755, | |
| "wallclock": "2026-06-24T06:17:56.691186", | |
| "loss": 0.1119, | |
| "grad_norm": 0.6954424381256104, | |
| "learning_rate": 9.84612310706761e-06, | |
| "step_time_sec": 107.28 | |
| }, | |
| { | |
| "step": 400, | |
| "epoch": 0.21631852903400256, | |
| "wallclock": "2026-06-24T06:19:42.292515", | |
| "loss": 0.0945, | |
| "grad_norm": 0.6359832882881165, | |
| "learning_rate": 9.840686421027085e-06, | |
| "step_time_sec": 105.6, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 400, | |
| "epoch": 0.21631852903400256, | |
| "wallclock": "2026-06-24T06:21:22.500688", | |
| "eval_loss": 0.08142668008804321, | |
| "eval_runtime": 100.2043, | |
| "eval_samples_per_second": 4.99, | |
| "eval_steps_per_second": 1.247, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 405, | |
| "epoch": 0.2190225106469276, | |
| "wallclock": "2026-06-24T06:24:29.504207", | |
| "loss": 0.0882, | |
| "grad_norm": 0.6086682081222534, | |
| "learning_rate": 9.835156906075338e-06, | |
| "step_time_sec": 287.21 | |
| }, | |
| { | |
| "step": 410, | |
| "epoch": 0.22172649225985264, | |
| "wallclock": "2026-06-24T06:26:16.103755", | |
| "loss": 0.1007, | |
| "grad_norm": 1.0316163301467896, | |
| "learning_rate": 9.829534668250814e-06, | |
| "step_time_sec": 106.6 | |
| }, | |
| { | |
| "step": 415, | |
| "epoch": 0.22443047387277767, | |
| "wallclock": "2026-06-24T06:28:01.409742", | |
| "loss": 0.093, | |
| "grad_norm": 0.7095230221748352, | |
| "learning_rate": 9.823819815370084e-06, | |
| "step_time_sec": 105.31 | |
| }, | |
| { | |
| "step": 420, | |
| "epoch": 0.22713445548570269, | |
| "wallclock": "2026-06-24T06:29:47.402659", | |
| "loss": 0.1019, | |
| "grad_norm": 0.7305953502655029, | |
| "learning_rate": 9.818012457025782e-06, | |
| "step_time_sec": 105.99 | |
| }, | |
| { | |
| "step": 425, | |
| "epoch": 0.22983843709862772, | |
| "wallclock": "2026-06-24T06:31:34.377021", | |
| "loss": 0.1021, | |
| "grad_norm": 0.5319082140922546, | |
| "learning_rate": 9.812112704584503e-06, | |
| "step_time_sec": 106.97 | |
| }, | |
| { | |
| "step": 430, | |
| "epoch": 0.23254241871155276, | |
| "wallclock": "2026-06-24T06:33:19.008040", | |
| "loss": 0.1063, | |
| "grad_norm": 0.8568723797798157, | |
| "learning_rate": 9.806120671184658e-06, | |
| "step_time_sec": 104.63 | |
| }, | |
| { | |
| "step": 435, | |
| "epoch": 0.2352464003244778, | |
| "wallclock": "2026-06-24T06:35:03.722758", | |
| "loss": 0.0605, | |
| "grad_norm": 0.45772790908813477, | |
| "learning_rate": 9.80003647173432e-06, | |
| "step_time_sec": 104.71 | |
| }, | |
| { | |
| "step": 440, | |
| "epoch": 0.23795038193740284, | |
| "wallclock": "2026-06-24T06:36:50.225935", | |
| "loss": 0.0991, | |
| "grad_norm": 0.7904582023620605, | |
| "learning_rate": 9.793860222909012e-06, | |
| "step_time_sec": 106.5 | |
| }, | |
| { | |
| "step": 445, | |
| "epoch": 0.24065436355032785, | |
| "wallclock": "2026-06-24T06:38:35.809144", | |
| "loss": 0.0913, | |
| "grad_norm": 0.5529101490974426, | |
| "learning_rate": 9.787592043149467e-06, | |
| "step_time_sec": 105.58 | |
| }, | |
| { | |
| "step": 450, | |
| "epoch": 0.2433583451632529, | |
| "wallclock": "2026-06-24T06:40:20.203877", | |
| "loss": 0.0679, | |
| "grad_norm": 0.7900363206863403, | |
| "learning_rate": 9.78123205265936e-06, | |
| "step_time_sec": 104.39, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 455, | |
| "epoch": 0.24606232677617793, | |
| "wallclock": "2026-06-24T06:42:06.492257", | |
| "loss": 0.1056, | |
| "grad_norm": 0.6831784248352051, | |
| "learning_rate": 9.774780373403003e-06, | |
| "step_time_sec": 106.29 | |
| }, | |
| { | |
| "step": 460, | |
| "epoch": 0.24876630838910296, | |
| "wallclock": "2026-06-24T06:43:52.409118", | |
| "loss": 0.0933, | |
| "grad_norm": 0.5478017330169678, | |
| "learning_rate": 9.768237129103009e-06, | |
| "step_time_sec": 105.92 | |
| }, | |
| { | |
| "step": 465, | |
| "epoch": 0.251470290002028, | |
| "wallclock": "2026-06-24T06:45:37.500906", | |
| "loss": 0.089, | |
| "grad_norm": 0.4542732238769531, | |
| "learning_rate": 9.761602445237914e-06, | |
| "step_time_sec": 105.09 | |
| }, | |
| { | |
| "step": 470, | |
| "epoch": 0.25417427161495304, | |
| "wallclock": "2026-06-24T06:47:23.183901", | |
| "loss": 0.1057, | |
| "grad_norm": 2.437464714050293, | |
| "learning_rate": 9.75487644903977e-06, | |
| "step_time_sec": 105.68 | |
| }, | |
| { | |
| "step": 475, | |
| "epoch": 0.25687825322787805, | |
| "wallclock": "2026-06-24T06:49:09.011851", | |
| "loss": 0.0774, | |
| "grad_norm": 0.6896166801452637, | |
| "learning_rate": 9.748059269491711e-06, | |
| "step_time_sec": 105.83 | |
| }, | |
| { | |
| "step": 480, | |
| "epoch": 0.25958223484080306, | |
| "wallclock": "2026-06-24T06:50:54.196439", | |
| "loss": 0.0913, | |
| "grad_norm": 0.5685729384422302, | |
| "learning_rate": 9.741151037325481e-06, | |
| "step_time_sec": 105.18 | |
| }, | |
| { | |
| "step": 485, | |
| "epoch": 0.2622862164537281, | |
| "wallclock": "2026-06-24T06:52:39.978133", | |
| "loss": 0.086, | |
| "grad_norm": 0.8516511917114258, | |
| "learning_rate": 9.73415188501891e-06, | |
| "step_time_sec": 105.78 | |
| }, | |
| { | |
| "step": 490, | |
| "epoch": 0.26499019806665314, | |
| "wallclock": "2026-06-24T06:54:27.309828", | |
| "loss": 0.0872, | |
| "grad_norm": 0.7482581734657288, | |
| "learning_rate": 9.727061946793402e-06, | |
| "step_time_sec": 107.33 | |
| }, | |
| { | |
| "step": 495, | |
| "epoch": 0.2676941796795782, | |
| "wallclock": "2026-06-24T06:56:12.188135", | |
| "loss": 0.0733, | |
| "grad_norm": 0.544495701789856, | |
| "learning_rate": 9.71988135861133e-06, | |
| "step_time_sec": 104.88 | |
| }, | |
| { | |
| "step": 500, | |
| "epoch": 0.2703981612925032, | |
| "wallclock": "2026-06-24T06:57:57.321125", | |
| "loss": 0.0771, | |
| "grad_norm": 0.6160959005355835, | |
| "learning_rate": 9.712610258173453e-06, | |
| "step_time_sec": 105.13, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.98 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 500, | |
| "epoch": 0.2703981612925032, | |
| "wallclock": "2026-06-24T06:59:37.554773", | |
| "eval_loss": 0.0792667418718338, | |
| "eval_runtime": 100.2297, | |
| "eval_samples_per_second": 4.989, | |
| "eval_steps_per_second": 1.247, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 505, | |
| "epoch": 0.2731021429054282, | |
| "wallclock": "2026-06-24T07:02:45.894476", | |
| "loss": 0.0798, | |
| "grad_norm": 0.8482615351676941, | |
| "learning_rate": 9.705248784916267e-06, | |
| "step_time_sec": 288.57 | |
| }, | |
| { | |
| "step": 510, | |
| "epoch": 0.2758061245183533, | |
| "wallclock": "2026-06-24T07:04:30.505457", | |
| "loss": 0.0955, | |
| "grad_norm": 0.5648516416549683, | |
| "learning_rate": 9.697797080009323e-06, | |
| "step_time_sec": 104.61 | |
| }, | |
| { | |
| "step": 515, | |
| "epoch": 0.2785101061312783, | |
| "wallclock": "2026-06-24T07:06:15.804916", | |
| "loss": 0.082, | |
| "grad_norm": 0.6227542757987976, | |
| "learning_rate": 9.690255286352532e-06, | |
| "step_time_sec": 105.3 | |
| }, | |
| { | |
| "step": 520, | |
| "epoch": 0.28121408774420337, | |
| "wallclock": "2026-06-24T07:08:01.704738", | |
| "loss": 0.1104, | |
| "grad_norm": 0.7219036221504211, | |
| "learning_rate": 9.682623548573418e-06, | |
| "step_time_sec": 105.9 | |
| }, | |
| { | |
| "step": 525, | |
| "epoch": 0.2839180693571284, | |
| "wallclock": "2026-06-24T07:09:47.411077", | |
| "loss": 0.0873, | |
| "grad_norm": 0.5870639681816101, | |
| "learning_rate": 9.674902013024348e-06, | |
| "step_time_sec": 105.71 | |
| }, | |
| { | |
| "step": 530, | |
| "epoch": 0.2866220509700534, | |
| "wallclock": "2026-06-24T07:11:33.115669", | |
| "loss": 0.1001, | |
| "grad_norm": 0.5214188694953918, | |
| "learning_rate": 9.667090827779721e-06, | |
| "step_time_sec": 105.7 | |
| }, | |
| { | |
| "step": 535, | |
| "epoch": 0.28932603258297845, | |
| "wallclock": "2026-06-24T07:13:18.886097", | |
| "loss": 0.0888, | |
| "grad_norm": 0.5477219223976135, | |
| "learning_rate": 9.659190142633133e-06, | |
| "step_time_sec": 105.77 | |
| }, | |
| { | |
| "step": 540, | |
| "epoch": 0.29203001419590346, | |
| "wallclock": "2026-06-24T07:15:03.294899", | |
| "loss": 0.0893, | |
| "grad_norm": 0.6372500061988831, | |
| "learning_rate": 9.651200109094498e-06, | |
| "step_time_sec": 104.41 | |
| }, | |
| { | |
| "step": 545, | |
| "epoch": 0.2947339958088285, | |
| "wallclock": "2026-06-24T07:16:49.807494", | |
| "loss": 0.0706, | |
| "grad_norm": 0.6478589177131653, | |
| "learning_rate": 9.643120880387155e-06, | |
| "step_time_sec": 106.51 | |
| }, | |
| { | |
| "step": 550, | |
| "epoch": 0.29743797742175354, | |
| "wallclock": "2026-06-24T07:18:36.819978", | |
| "loss": 0.0848, | |
| "grad_norm": 0.7352571487426758, | |
| "learning_rate": 9.634952611444914e-06, | |
| "step_time_sec": 107.01, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 555, | |
| "epoch": 0.30014195903467855, | |
| "wallclock": "2026-06-24T07:20:22.813541", | |
| "loss": 0.0956, | |
| "grad_norm": 0.8457625508308411, | |
| "learning_rate": 9.626695458909098e-06, | |
| "step_time_sec": 105.99 | |
| }, | |
| { | |
| "step": 560, | |
| "epoch": 0.3028459406476036, | |
| "wallclock": "2026-06-24T07:22:08.475130", | |
| "loss": 0.082, | |
| "grad_norm": 0.8473530411720276, | |
| "learning_rate": 9.618349581125529e-06, | |
| "step_time_sec": 105.66 | |
| }, | |
| { | |
| "step": 565, | |
| "epoch": 0.30554992226052863, | |
| "wallclock": "2026-06-24T07:23:54.730055", | |
| "loss": 0.0666, | |
| "grad_norm": 0.7220405340194702, | |
| "learning_rate": 9.609915138141497e-06, | |
| "step_time_sec": 106.25 | |
| }, | |
| { | |
| "step": 570, | |
| "epoch": 0.30825390387345364, | |
| "wallclock": "2026-06-24T07:25:39.415962", | |
| "loss": 0.0714, | |
| "grad_norm": 0.6538407206535339, | |
| "learning_rate": 9.601392291702693e-06, | |
| "step_time_sec": 104.69 | |
| }, | |
| { | |
| "step": 575, | |
| "epoch": 0.3109578854863787, | |
| "wallclock": "2026-06-24T07:27:23.791188", | |
| "loss": 0.073, | |
| "grad_norm": 0.851050853729248, | |
| "learning_rate": 9.592781205250102e-06, | |
| "step_time_sec": 104.38 | |
| }, | |
| { | |
| "step": 580, | |
| "epoch": 0.3136618670993037, | |
| "wallclock": "2026-06-24T07:29:08.982414", | |
| "loss": 0.0972, | |
| "grad_norm": 0.7455153465270996, | |
| "learning_rate": 9.584082043916867e-06, | |
| "step_time_sec": 105.19 | |
| }, | |
| { | |
| "step": 585, | |
| "epoch": 0.3163658487122288, | |
| "wallclock": "2026-06-24T07:30:54.304933", | |
| "loss": 0.0728, | |
| "grad_norm": 0.39666956663131714, | |
| "learning_rate": 9.575294974525131e-06, | |
| "step_time_sec": 105.32 | |
| }, | |
| { | |
| "step": 590, | |
| "epoch": 0.3190698303251538, | |
| "wallclock": "2026-06-24T07:32:40.198440", | |
| "loss": 0.0719, | |
| "grad_norm": 0.283635675907135, | |
| "learning_rate": 9.566420165582832e-06, | |
| "step_time_sec": 105.89 | |
| }, | |
| { | |
| "step": 595, | |
| "epoch": 0.3217738119380788, | |
| "wallclock": "2026-06-24T07:34:26.091391", | |
| "loss": 0.0892, | |
| "grad_norm": 0.6910920739173889, | |
| "learning_rate": 9.557457787280474e-06, | |
| "step_time_sec": 105.89 | |
| }, | |
| { | |
| "step": 600, | |
| "epoch": 0.32447779355100387, | |
| "wallclock": "2026-06-24T07:36:12.209290", | |
| "loss": 0.0895, | |
| "grad_norm": 0.6658245325088501, | |
| "learning_rate": 9.548408011487857e-06, | |
| "step_time_sec": 106.12, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 600, | |
| "epoch": 0.32447779355100387, | |
| "wallclock": "2026-06-24T07:37:52.689311", | |
| "eval_loss": 0.07775916159152985, | |
| "eval_runtime": 100.4763, | |
| "eval_samples_per_second": 4.976, | |
| "eval_steps_per_second": 1.244, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 605, | |
| "epoch": 0.3271817751639289, | |
| "wallclock": "2026-06-24T07:40:59.587964", | |
| "loss": 0.0984, | |
| "grad_norm": 0.6905266046524048, | |
| "learning_rate": 9.539271011750787e-06, | |
| "step_time_sec": 287.38 | |
| }, | |
| { | |
| "step": 610, | |
| "epoch": 0.32988575677685394, | |
| "wallclock": "2026-06-24T07:42:47.180462", | |
| "loss": 0.0823, | |
| "grad_norm": 0.6062604784965515, | |
| "learning_rate": 9.530046963287753e-06, | |
| "step_time_sec": 107.59 | |
| }, | |
| { | |
| "step": 615, | |
| "epoch": 0.33258973838977895, | |
| "wallclock": "2026-06-24T07:44:31.276448", | |
| "loss": 0.0767, | |
| "grad_norm": 0.7175081968307495, | |
| "learning_rate": 9.520736042986551e-06, | |
| "step_time_sec": 104.1 | |
| }, | |
| { | |
| "step": 620, | |
| "epoch": 0.33529372000270397, | |
| "wallclock": "2026-06-24T07:46:15.885770", | |
| "loss": 0.0882, | |
| "grad_norm": 0.898894190788269, | |
| "learning_rate": 9.51133842940091e-06, | |
| "step_time_sec": 104.61 | |
| }, | |
| { | |
| "step": 625, | |
| "epoch": 0.33799770161562903, | |
| "wallclock": "2026-06-24T07:48:01.530170", | |
| "loss": 0.0873, | |
| "grad_norm": 0.5385039448738098, | |
| "learning_rate": 9.501854302747053e-06, | |
| "step_time_sec": 105.64 | |
| }, | |
| { | |
| "step": 630, | |
| "epoch": 0.34070168322855404, | |
| "wallclock": "2026-06-24T07:49:46.498449", | |
| "loss": 0.0817, | |
| "grad_norm": 0.5420588850975037, | |
| "learning_rate": 9.492283844900255e-06, | |
| "step_time_sec": 104.97 | |
| }, | |
| { | |
| "step": 635, | |
| "epoch": 0.34340566484147905, | |
| "wallclock": "2026-06-24T07:51:31.809385", | |
| "loss": 0.0879, | |
| "grad_norm": 1.3086037635803223, | |
| "learning_rate": 9.482627239391335e-06, | |
| "step_time_sec": 105.31 | |
| }, | |
| { | |
| "step": 640, | |
| "epoch": 0.3461096464544041, | |
| "wallclock": "2026-06-24T07:53:18.306307", | |
| "loss": 0.0728, | |
| "grad_norm": 0.6617655158042908, | |
| "learning_rate": 9.472884671403164e-06, | |
| "step_time_sec": 106.5 | |
| }, | |
| { | |
| "step": 645, | |
| "epoch": 0.34881362806732913, | |
| "wallclock": "2026-06-24T07:55:03.697153", | |
| "loss": 0.0593, | |
| "grad_norm": 0.6209415197372437, | |
| "learning_rate": 9.46305632776709e-06, | |
| "step_time_sec": 105.39 | |
| }, | |
| { | |
| "step": 650, | |
| "epoch": 0.3515176096802542, | |
| "wallclock": "2026-06-24T07:56:48.600267", | |
| "loss": 0.0816, | |
| "grad_norm": 1.021694302558899, | |
| "learning_rate": 9.453142396959364e-06, | |
| "step_time_sec": 104.9, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 655, | |
| "epoch": 0.3542215912931792, | |
| "wallclock": "2026-06-24T07:58:33.519930", | |
| "loss": 0.0802, | |
| "grad_norm": 0.8565160036087036, | |
| "learning_rate": 9.443143069097531e-06, | |
| "step_time_sec": 104.92 | |
| }, | |
| { | |
| "step": 660, | |
| "epoch": 0.3569255729061042, | |
| "wallclock": "2026-06-24T08:00:17.709412", | |
| "loss": 0.073, | |
| "grad_norm": 0.8225128650665283, | |
| "learning_rate": 9.433058535936775e-06, | |
| "step_time_sec": 104.19 | |
| }, | |
| { | |
| "step": 665, | |
| "epoch": 0.3596295545190293, | |
| "wallclock": "2026-06-24T08:02:02.897175", | |
| "loss": 0.0805, | |
| "grad_norm": 0.8371864557266235, | |
| "learning_rate": 9.422888990866243e-06, | |
| "step_time_sec": 105.19 | |
| }, | |
| { | |
| "step": 670, | |
| "epoch": 0.3623335361319543, | |
| "wallclock": "2026-06-24T08:03:49.320420", | |
| "loss": 0.0855, | |
| "grad_norm": 0.6681428551673889, | |
| "learning_rate": 9.412634628905345e-06, | |
| "step_time_sec": 106.42 | |
| }, | |
| { | |
| "step": 675, | |
| "epoch": 0.36503751774487936, | |
| "wallclock": "2026-06-24T08:05:33.903831", | |
| "loss": 0.0768, | |
| "grad_norm": 0.6769019365310669, | |
| "learning_rate": 9.402295646700005e-06, | |
| "step_time_sec": 104.58 | |
| }, | |
| { | |
| "step": 680, | |
| "epoch": 0.36774149935780437, | |
| "wallclock": "2026-06-24T08:07:19.895810", | |
| "loss": 0.0829, | |
| "grad_norm": 0.5479181408882141, | |
| "learning_rate": 9.391872242518895e-06, | |
| "step_time_sec": 105.99 | |
| }, | |
| { | |
| "step": 685, | |
| "epoch": 0.3704454809707294, | |
| "wallclock": "2026-06-24T08:09:05.010792", | |
| "loss": 0.0745, | |
| "grad_norm": 0.499809205532074, | |
| "learning_rate": 9.381364616249627e-06, | |
| "step_time_sec": 105.11 | |
| }, | |
| { | |
| "step": 690, | |
| "epoch": 0.37314946258365445, | |
| "wallclock": "2026-06-24T08:10:50.298200", | |
| "loss": 0.0735, | |
| "grad_norm": 1.0203771591186523, | |
| "learning_rate": 9.370772969394927e-06, | |
| "step_time_sec": 105.29 | |
| }, | |
| { | |
| "step": 695, | |
| "epoch": 0.37585344419657946, | |
| "wallclock": "2026-06-24T08:12:35.415602", | |
| "loss": 0.0705, | |
| "grad_norm": 0.7761706113815308, | |
| "learning_rate": 9.360097505068767e-06, | |
| "step_time_sec": 105.12 | |
| }, | |
| { | |
| "step": 700, | |
| "epoch": 0.3785574258095045, | |
| "wallclock": "2026-06-24T08:14:21.710932", | |
| "loss": 0.0984, | |
| "grad_norm": 2.708282709121704, | |
| "learning_rate": 9.349338427992471e-06, | |
| "step_time_sec": 106.3, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 700, | |
| "epoch": 0.3785574258095045, | |
| "wallclock": "2026-06-24T08:16:02.035763", | |
| "eval_loss": 0.07459608465433121, | |
| "eval_runtime": 100.3142, | |
| "eval_samples_per_second": 4.984, | |
| "eval_steps_per_second": 1.246, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 705, | |
| "epoch": 0.38126140742242953, | |
| "wallclock": "2026-06-24T08:19:08.313913", | |
| "loss": 0.0745, | |
| "grad_norm": 0.4694831073284149, | |
| "learning_rate": 9.338495944490788e-06, | |
| "step_time_sec": 286.6 | |
| }, | |
| { | |
| "step": 710, | |
| "epoch": 0.38396538903535454, | |
| "wallclock": "2026-06-24T08:20:53.768411", | |
| "loss": 0.1006, | |
| "grad_norm": 0.715364396572113, | |
| "learning_rate": 9.327570262487934e-06, | |
| "step_time_sec": 105.45 | |
| }, | |
| { | |
| "step": 715, | |
| "epoch": 0.3866693706482796, | |
| "wallclock": "2026-06-24T08:22:39.591734", | |
| "loss": 0.0863, | |
| "grad_norm": 0.5401411056518555, | |
| "learning_rate": 9.316561591503612e-06, | |
| "step_time_sec": 105.82 | |
| }, | |
| { | |
| "step": 720, | |
| "epoch": 0.3893733522612046, | |
| "wallclock": "2026-06-24T08:24:24.650372", | |
| "loss": 0.0955, | |
| "grad_norm": 0.5890225768089294, | |
| "learning_rate": 9.305470142648982e-06, | |
| "step_time_sec": 105.06 | |
| }, | |
| { | |
| "step": 725, | |
| "epoch": 0.39207733387412963, | |
| "wallclock": "2026-06-24T08:26:09.603072", | |
| "loss": 0.0733, | |
| "grad_norm": 0.6464399695396423, | |
| "learning_rate": 9.294296128622625e-06, | |
| "step_time_sec": 104.95 | |
| }, | |
| { | |
| "step": 730, | |
| "epoch": 0.3947813154870547, | |
| "wallclock": "2026-06-24T08:27:56.312801", | |
| "loss": 0.0723, | |
| "grad_norm": 0.5359171628952026, | |
| "learning_rate": 9.283039763706455e-06, | |
| "step_time_sec": 106.71 | |
| }, | |
| { | |
| "step": 735, | |
| "epoch": 0.3974852970999797, | |
| "wallclock": "2026-06-24T08:29:41.888724", | |
| "loss": 0.0767, | |
| "grad_norm": 0.7463257908821106, | |
| "learning_rate": 9.27170126376161e-06, | |
| "step_time_sec": 105.58 | |
| }, | |
| { | |
| "step": 740, | |
| "epoch": 0.4001892787129048, | |
| "wallclock": "2026-06-24T08:31:26.792262", | |
| "loss": 0.0696, | |
| "grad_norm": 0.8311108946800232, | |
| "learning_rate": 9.260280846224328e-06, | |
| "step_time_sec": 104.9 | |
| }, | |
| { | |
| "step": 745, | |
| "epoch": 0.4028932603258298, | |
| "wallclock": "2026-06-24T08:33:12.308668", | |
| "loss": 0.0674, | |
| "grad_norm": 0.7888720631599426, | |
| "learning_rate": 9.24877873010175e-06, | |
| "step_time_sec": 105.52 | |
| }, | |
| { | |
| "step": 750, | |
| "epoch": 0.4055972419387548, | |
| "wallclock": "2026-06-24T08:34:57.915757", | |
| "loss": 0.0689, | |
| "grad_norm": 0.5358040928840637, | |
| "learning_rate": 9.237195135967746e-06, | |
| "step_time_sec": 105.61, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 755, | |
| "epoch": 0.40830122355167986, | |
| "wallclock": "2026-06-24T08:36:43.108457", | |
| "loss": 0.0991, | |
| "grad_norm": 0.7390076518058777, | |
| "learning_rate": 9.225530285958669e-06, | |
| "step_time_sec": 105.19 | |
| }, | |
| { | |
| "step": 760, | |
| "epoch": 0.41100520516460487, | |
| "wallclock": "2026-06-24T08:38:29.912596", | |
| "loss": 0.0848, | |
| "grad_norm": 0.6667785048484802, | |
| "learning_rate": 9.213784403769097e-06, | |
| "step_time_sec": 106.8 | |
| }, | |
| { | |
| "step": 765, | |
| "epoch": 0.41370918677752994, | |
| "wallclock": "2026-06-24T08:40:15.181668", | |
| "loss": 0.064, | |
| "grad_norm": 1.133137583732605, | |
| "learning_rate": 9.201957714647554e-06, | |
| "step_time_sec": 105.27 | |
| }, | |
| { | |
| "step": 770, | |
| "epoch": 0.41641316839045495, | |
| "wallclock": "2026-06-24T08:42:01.118538", | |
| "loss": 0.0654, | |
| "grad_norm": 0.8551876544952393, | |
| "learning_rate": 9.19005044539218e-06, | |
| "step_time_sec": 105.94 | |
| }, | |
| { | |
| "step": 775, | |
| "epoch": 0.41911715000337996, | |
| "wallclock": "2026-06-24T08:43:47.610874", | |
| "loss": 0.0846, | |
| "grad_norm": 0.7466854453086853, | |
| "learning_rate": 9.178062824346383e-06, | |
| "step_time_sec": 106.49 | |
| }, | |
| { | |
| "step": 780, | |
| "epoch": 0.421821131616305, | |
| "wallclock": "2026-06-24T08:45:33.303287", | |
| "loss": 0.0912, | |
| "grad_norm": 0.5469369292259216, | |
| "learning_rate": 9.165995081394463e-06, | |
| "step_time_sec": 105.69 | |
| }, | |
| { | |
| "step": 785, | |
| "epoch": 0.42452511322923003, | |
| "wallclock": "2026-06-24T08:47:18.578926", | |
| "loss": 0.0864, | |
| "grad_norm": 0.9799915552139282, | |
| "learning_rate": 9.153847447957205e-06, | |
| "step_time_sec": 105.28 | |
| }, | |
| { | |
| "step": 790, | |
| "epoch": 0.4272290948421551, | |
| "wallclock": "2026-06-24T08:49:06.294584", | |
| "loss": 0.0954, | |
| "grad_norm": 0.6794901490211487, | |
| "learning_rate": 9.141620156987432e-06, | |
| "step_time_sec": 107.72 | |
| }, | |
| { | |
| "step": 795, | |
| "epoch": 0.4299330764550801, | |
| "wallclock": "2026-06-24T08:50:52.095214", | |
| "loss": 0.0547, | |
| "grad_norm": 0.6824802160263062, | |
| "learning_rate": 9.12931344296555e-06, | |
| "step_time_sec": 105.8 | |
| }, | |
| { | |
| "step": 800, | |
| "epoch": 0.4326370580680051, | |
| "wallclock": "2026-06-24T08:52:36.490555", | |
| "loss": 0.0677, | |
| "grad_norm": 0.5517615675926208, | |
| "learning_rate": 9.116927541895042e-06, | |
| "step_time_sec": 104.4, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 800, | |
| "epoch": 0.4326370580680051, | |
| "wallclock": "2026-06-24T08:54:16.950971", | |
| "eval_loss": 0.07352492958307266, | |
| "eval_runtime": 100.4561, | |
| "eval_samples_per_second": 4.977, | |
| "eval_steps_per_second": 1.244, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 805, | |
| "epoch": 0.4353410396809302, | |
| "wallclock": "2026-06-24T08:57:27.878085", | |
| "loss": 0.0604, | |
| "grad_norm": 0.561560869216919, | |
| "learning_rate": 9.10446269129795e-06, | |
| "step_time_sec": 291.39 | |
| }, | |
| { | |
| "step": 810, | |
| "epoch": 0.4380450212938552, | |
| "wallclock": "2026-06-24T08:59:12.110183", | |
| "loss": 0.0793, | |
| "grad_norm": 0.784087061882019, | |
| "learning_rate": 9.091919130210313e-06, | |
| "step_time_sec": 104.23 | |
| }, | |
| { | |
| "step": 815, | |
| "epoch": 0.4407490029067802, | |
| "wallclock": "2026-06-24T09:00:57.619395", | |
| "loss": 0.0523, | |
| "grad_norm": 0.47488337755203247, | |
| "learning_rate": 9.079297099177585e-06, | |
| "step_time_sec": 105.51 | |
| }, | |
| { | |
| "step": 820, | |
| "epoch": 0.4434529845197053, | |
| "wallclock": "2026-06-24T09:02:44.201504", | |
| "loss": 0.092, | |
| "grad_norm": 0.6607430577278137, | |
| "learning_rate": 9.066596840250024e-06, | |
| "step_time_sec": 106.58 | |
| }, | |
| { | |
| "step": 825, | |
| "epoch": 0.4461569661326303, | |
| "wallclock": "2026-06-24T09:04:30.495107", | |
| "loss": 0.0559, | |
| "grad_norm": 0.5975196361541748, | |
| "learning_rate": 9.053818596978051e-06, | |
| "step_time_sec": 106.29 | |
| }, | |
| { | |
| "step": 830, | |
| "epoch": 0.44886094774555535, | |
| "wallclock": "2026-06-24T09:06:14.400764", | |
| "loss": 0.0749, | |
| "grad_norm": 0.5973978042602539, | |
| "learning_rate": 9.040962614407574e-06, | |
| "step_time_sec": 103.91 | |
| }, | |
| { | |
| "step": 835, | |
| "epoch": 0.45156492935848036, | |
| "wallclock": "2026-06-24T09:08:00.823060", | |
| "loss": 0.0673, | |
| "grad_norm": 0.8808339238166809, | |
| "learning_rate": 9.028029139075297e-06, | |
| "step_time_sec": 106.42 | |
| }, | |
| { | |
| "step": 840, | |
| "epoch": 0.45426891097140537, | |
| "wallclock": "2026-06-24T09:09:46.093782", | |
| "loss": 0.0975, | |
| "grad_norm": 0.9540690779685974, | |
| "learning_rate": 9.015018419003982e-06, | |
| "step_time_sec": 105.27 | |
| }, | |
| { | |
| "step": 845, | |
| "epoch": 0.45697289258433044, | |
| "wallclock": "2026-06-24T09:11:31.785014", | |
| "loss": 0.0815, | |
| "grad_norm": 0.7579560875892639, | |
| "learning_rate": 9.001930703697708e-06, | |
| "step_time_sec": 105.69 | |
| }, | |
| { | |
| "step": 850, | |
| "epoch": 0.45967687419725545, | |
| "wallclock": "2026-06-24T09:13:18.016135", | |
| "loss": 0.077, | |
| "grad_norm": 1.2188389301300049, | |
| "learning_rate": 8.988766244137065e-06, | |
| "step_time_sec": 106.23, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 855, | |
| "epoch": 0.4623808558101805, | |
| "wallclock": "2026-06-24T09:15:04.661348", | |
| "loss": 0.0777, | |
| "grad_norm": 0.7465812563896179, | |
| "learning_rate": 8.975525292774362e-06, | |
| "step_time_sec": 106.65 | |
| }, | |
| { | |
| "step": 860, | |
| "epoch": 0.4650848374231055, | |
| "wallclock": "2026-06-24T09:16:51.200338", | |
| "loss": 0.065, | |
| "grad_norm": 0.634141206741333, | |
| "learning_rate": 8.962208103528774e-06, | |
| "step_time_sec": 106.54 | |
| }, | |
| { | |
| "step": 865, | |
| "epoch": 0.46778881903603053, | |
| "wallclock": "2026-06-24T09:18:37.705663", | |
| "loss": 0.0723, | |
| "grad_norm": 0.5434172749519348, | |
| "learning_rate": 8.948814931781472e-06, | |
| "step_time_sec": 106.51 | |
| }, | |
| { | |
| "step": 870, | |
| "epoch": 0.4704928006489556, | |
| "wallclock": "2026-06-24T09:20:22.682651", | |
| "loss": 0.0674, | |
| "grad_norm": 0.851901650428772, | |
| "learning_rate": 8.935346034370732e-06, | |
| "step_time_sec": 104.98 | |
| }, | |
| { | |
| "step": 875, | |
| "epoch": 0.4731967822618806, | |
| "wallclock": "2026-06-24T09:22:08.000109", | |
| "loss": 0.0648, | |
| "grad_norm": 0.5568099617958069, | |
| "learning_rate": 8.921801669587005e-06, | |
| "step_time_sec": 105.32 | |
| }, | |
| { | |
| "step": 880, | |
| "epoch": 0.4759007638748057, | |
| "wallclock": "2026-06-24T09:23:53.107265", | |
| "loss": 0.073, | |
| "grad_norm": 0.726121723651886, | |
| "learning_rate": 8.908182097167965e-06, | |
| "step_time_sec": 105.11 | |
| }, | |
| { | |
| "step": 885, | |
| "epoch": 0.4786047454877307, | |
| "wallclock": "2026-06-24T09:25:38.993222", | |
| "loss": 0.0676, | |
| "grad_norm": 0.6540066003799438, | |
| "learning_rate": 8.894487578293534e-06, | |
| "step_time_sec": 105.89 | |
| }, | |
| { | |
| "step": 890, | |
| "epoch": 0.4813087271006557, | |
| "wallclock": "2026-06-24T09:27:25.285677", | |
| "loss": 0.0699, | |
| "grad_norm": 0.5976990461349487, | |
| "learning_rate": 8.880718375580857e-06, | |
| "step_time_sec": 106.29 | |
| }, | |
| { | |
| "step": 895, | |
| "epoch": 0.48401270871358076, | |
| "wallclock": "2026-06-24T09:29:12.234716", | |
| "loss": 0.0687, | |
| "grad_norm": 0.5673884749412537, | |
| "learning_rate": 8.866874753079286e-06, | |
| "step_time_sec": 106.95 | |
| }, | |
| { | |
| "step": 900, | |
| "epoch": 0.4867166903265058, | |
| "wallclock": "2026-06-24T09:30:57.290947", | |
| "loss": 0.0865, | |
| "grad_norm": 0.825077474117279, | |
| "learning_rate": 8.852956976265304e-06, | |
| "step_time_sec": 105.06, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 900, | |
| "epoch": 0.4867166903265058, | |
| "wallclock": "2026-06-24T09:32:37.746275", | |
| "eval_loss": 0.0734986960887909, | |
| "eval_runtime": 100.4508, | |
| "eval_samples_per_second": 4.978, | |
| "eval_steps_per_second": 1.244, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 905, | |
| "epoch": 0.4894206719394308, | |
| "wallclock": "2026-06-24T09:35:44.600337", | |
| "loss": 0.0661, | |
| "grad_norm": 0.6024107933044434, | |
| "learning_rate": 8.838965312037435e-06, | |
| "step_time_sec": 287.31 | |
| }, | |
| { | |
| "step": 910, | |
| "epoch": 0.49212465355235585, | |
| "wallclock": "2026-06-24T09:37:31.615349", | |
| "loss": 0.0864, | |
| "grad_norm": 0.7744714617729187, | |
| "learning_rate": 8.824900028711128e-06, | |
| "step_time_sec": 107.02 | |
| }, | |
| { | |
| "step": 915, | |
| "epoch": 0.49482863516528086, | |
| "wallclock": "2026-06-24T09:39:17.106852", | |
| "loss": 0.0608, | |
| "grad_norm": 0.6232128143310547, | |
| "learning_rate": 8.810761396013616e-06, | |
| "step_time_sec": 105.49 | |
| }, | |
| { | |
| "step": 920, | |
| "epoch": 0.4975326167782059, | |
| "wallclock": "2026-06-24T09:41:01.831415", | |
| "loss": 0.0667, | |
| "grad_norm": 0.7434114217758179, | |
| "learning_rate": 8.796549685078732e-06, | |
| "step_time_sec": 104.72 | |
| }, | |
| { | |
| "step": 925, | |
| "epoch": 0.500236598391131, | |
| "wallclock": "2026-06-24T09:42:49.107296", | |
| "loss": 0.0545, | |
| "grad_norm": 0.5467560887336731, | |
| "learning_rate": 8.782265168441722e-06, | |
| "step_time_sec": 107.28 | |
| }, | |
| { | |
| "step": 930, | |
| "epoch": 0.502940580004056, | |
| "wallclock": "2026-06-24T09:44:33.913707", | |
| "loss": 0.0608, | |
| "grad_norm": 0.49254247546195984, | |
| "learning_rate": 8.76790812003401e-06, | |
| "step_time_sec": 104.81 | |
| }, | |
| { | |
| "step": 935, | |
| "epoch": 0.505644561616981, | |
| "wallclock": "2026-06-24T09:46:19.229213", | |
| "loss": 0.0684, | |
| "grad_norm": 0.48784705996513367, | |
| "learning_rate": 8.753478815177947e-06, | |
| "step_time_sec": 105.32 | |
| }, | |
| { | |
| "step": 940, | |
| "epoch": 0.5083485432299061, | |
| "wallclock": "2026-06-24T09:48:05.411373", | |
| "loss": 0.0731, | |
| "grad_norm": 0.48523765802383423, | |
| "learning_rate": 8.738977530581534e-06, | |
| "step_time_sec": 106.18 | |
| }, | |
| { | |
| "step": 945, | |
| "epoch": 0.511052524842831, | |
| "wallclock": "2026-06-24T09:49:51.295620", | |
| "loss": 0.0843, | |
| "grad_norm": 1.2344911098480225, | |
| "learning_rate": 8.724404544333111e-06, | |
| "step_time_sec": 105.88 | |
| }, | |
| { | |
| "step": 950, | |
| "epoch": 0.5137565064557561, | |
| "wallclock": "2026-06-24T09:51:36.494251", | |
| "loss": 0.0754, | |
| "grad_norm": 0.9410877227783203, | |
| "learning_rate": 8.709760135896033e-06, | |
| "step_time_sec": 105.2, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 955, | |
| "epoch": 0.5164604880686812, | |
| "wallclock": "2026-06-24T09:53:21.420428", | |
| "loss": 0.0843, | |
| "grad_norm": 0.3976033329963684, | |
| "learning_rate": 8.695044586103297e-06, | |
| "step_time_sec": 104.93 | |
| }, | |
| { | |
| "step": 960, | |
| "epoch": 0.5191644696816061, | |
| "wallclock": "2026-06-24T09:55:05.899797", | |
| "loss": 0.0742, | |
| "grad_norm": 0.7704766392707825, | |
| "learning_rate": 8.680258177152166e-06, | |
| "step_time_sec": 104.48 | |
| }, | |
| { | |
| "step": 965, | |
| "epoch": 0.5218684512945312, | |
| "wallclock": "2026-06-24T09:56:51.000492", | |
| "loss": 0.0685, | |
| "grad_norm": 0.7557464838027954, | |
| "learning_rate": 8.665401192598761e-06, | |
| "step_time_sec": 105.1 | |
| }, | |
| { | |
| "step": 970, | |
| "epoch": 0.5245724329074563, | |
| "wallclock": "2026-06-24T09:58:36.814098", | |
| "loss": 0.0646, | |
| "grad_norm": 0.8066175580024719, | |
| "learning_rate": 8.65047391735261e-06, | |
| "step_time_sec": 105.81 | |
| }, | |
| { | |
| "step": 975, | |
| "epoch": 0.5272764145203812, | |
| "wallclock": "2026-06-24T10:00:21.726492", | |
| "loss": 0.0703, | |
| "grad_norm": 1.2292455434799194, | |
| "learning_rate": 8.635476637671197e-06, | |
| "step_time_sec": 104.91 | |
| }, | |
| { | |
| "step": 980, | |
| "epoch": 0.5299803961333063, | |
| "wallclock": "2026-06-24T10:02:07.309600", | |
| "loss": 0.0722, | |
| "grad_norm": 0.7355031967163086, | |
| "learning_rate": 8.620409641154465e-06, | |
| "step_time_sec": 105.58 | |
| }, | |
| { | |
| "step": 985, | |
| "epoch": 0.5326843777462313, | |
| "wallclock": "2026-06-24T10:03:54.319283", | |
| "loss": 0.0593, | |
| "grad_norm": 0.9767148494720459, | |
| "learning_rate": 8.605273216739307e-06, | |
| "step_time_sec": 107.01 | |
| }, | |
| { | |
| "step": 990, | |
| "epoch": 0.5353883593591564, | |
| "wallclock": "2026-06-24T10:05:40.086818", | |
| "loss": 0.0597, | |
| "grad_norm": 0.6078879237174988, | |
| "learning_rate": 8.590067654694017e-06, | |
| "step_time_sec": 105.77 | |
| }, | |
| { | |
| "step": 995, | |
| "epoch": 0.5380923409720814, | |
| "wallclock": "2026-06-24T10:07:24.909120", | |
| "loss": 0.0819, | |
| "grad_norm": 0.5737846493721008, | |
| "learning_rate": 8.574793246612727e-06, | |
| "step_time_sec": 104.82 | |
| }, | |
| { | |
| "step": 1000, | |
| "epoch": 0.5407963225850064, | |
| "wallclock": "2026-06-24T10:09:11.121871", | |
| "loss": 0.0704, | |
| "grad_norm": 0.5743271708488464, | |
| "learning_rate": 8.559450285409825e-06, | |
| "step_time_sec": 106.21, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1000, | |
| "epoch": 0.5407963225850064, | |
| "wallclock": "2026-06-24T10:10:51.453842", | |
| "eval_loss": 0.07477952539920807, | |
| "eval_runtime": 100.3276, | |
| "eval_samples_per_second": 4.984, | |
| "eval_steps_per_second": 1.246, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1005, | |
| "epoch": 0.5435003041979315, | |
| "wallclock": "2026-06-24T10:13:58.794483", | |
| "loss": 0.0445, | |
| "grad_norm": 0.3843560218811035, | |
| "learning_rate": 8.544039065314317e-06, | |
| "step_time_sec": 287.67 | |
| }, | |
| { | |
| "step": 1010, | |
| "epoch": 0.5462042858108564, | |
| "wallclock": "2026-06-24T10:15:44.821711", | |
| "loss": 0.0824, | |
| "grad_norm": 0.788098156452179, | |
| "learning_rate": 8.528559881864209e-06, | |
| "step_time_sec": 106.03 | |
| }, | |
| { | |
| "step": 1015, | |
| "epoch": 0.5489082674237815, | |
| "wallclock": "2026-06-24T10:17:30.511914", | |
| "loss": 0.0593, | |
| "grad_norm": 0.5601520538330078, | |
| "learning_rate": 8.513013031900814e-06, | |
| "step_time_sec": 105.69 | |
| }, | |
| { | |
| "step": 1020, | |
| "epoch": 0.5516122490367066, | |
| "wallclock": "2026-06-24T10:19:15.717942", | |
| "loss": 0.0578, | |
| "grad_norm": 0.4331408739089966, | |
| "learning_rate": 8.497398813563086e-06, | |
| "step_time_sec": 105.21 | |
| }, | |
| { | |
| "step": 1025, | |
| "epoch": 0.5543162306496315, | |
| "wallclock": "2026-06-24T10:21:00.311987", | |
| "loss": 0.0772, | |
| "grad_norm": 0.7381686568260193, | |
| "learning_rate": 8.48171752628188e-06, | |
| "step_time_sec": 104.59 | |
| }, | |
| { | |
| "step": 1030, | |
| "epoch": 0.5570202122625566, | |
| "wallclock": "2026-06-24T10:22:46.121305", | |
| "loss": 0.052, | |
| "grad_norm": 0.7812600135803223, | |
| "learning_rate": 8.46596947077422e-06, | |
| "step_time_sec": 105.81 | |
| }, | |
| { | |
| "step": 1035, | |
| "epoch": 0.5597241938754817, | |
| "wallclock": "2026-06-24T10:24:31.783704", | |
| "loss": 0.0757, | |
| "grad_norm": 0.7333759069442749, | |
| "learning_rate": 8.450154949037539e-06, | |
| "step_time_sec": 105.66 | |
| }, | |
| { | |
| "step": 1040, | |
| "epoch": 0.5624281754884067, | |
| "wallclock": "2026-06-24T10:26:17.283088", | |
| "loss": 0.0588, | |
| "grad_norm": 0.7570787668228149, | |
| "learning_rate": 8.434274264343869e-06, | |
| "step_time_sec": 105.5 | |
| }, | |
| { | |
| "step": 1045, | |
| "epoch": 0.5651321571013317, | |
| "wallclock": "2026-06-24T10:28:03.723823", | |
| "loss": 0.0491, | |
| "grad_norm": 0.42195039987564087, | |
| "learning_rate": 8.418327721234044e-06, | |
| "step_time_sec": 106.44 | |
| }, | |
| { | |
| "step": 1050, | |
| "epoch": 0.5678361387142568, | |
| "wallclock": "2026-06-24T10:29:49.107217", | |
| "loss": 0.0524, | |
| "grad_norm": 0.5051612257957458, | |
| "learning_rate": 8.40231562551185e-06, | |
| "step_time_sec": 105.38, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1055, | |
| "epoch": 0.5705401203271818, | |
| "wallclock": "2026-06-24T10:31:34.010033", | |
| "loss": 0.0582, | |
| "grad_norm": 0.6454283595085144, | |
| "learning_rate": 8.386238284238163e-06, | |
| "step_time_sec": 104.9 | |
| }, | |
| { | |
| "step": 1060, | |
| "epoch": 0.5732441019401068, | |
| "wallclock": "2026-06-24T10:33:20.785609", | |
| "loss": 0.0835, | |
| "grad_norm": 0.9067649841308594, | |
| "learning_rate": 8.37009600572506e-06, | |
| "step_time_sec": 106.78 | |
| }, | |
| { | |
| "step": 1065, | |
| "epoch": 0.5759480835530318, | |
| "wallclock": "2026-06-24T10:35:06.993088", | |
| "loss": 0.0608, | |
| "grad_norm": 0.5329049825668335, | |
| "learning_rate": 8.35388909952991e-06, | |
| "step_time_sec": 106.21 | |
| }, | |
| { | |
| "step": 1070, | |
| "epoch": 0.5786520651659569, | |
| "wallclock": "2026-06-24T10:36:51.613853", | |
| "loss": 0.0732, | |
| "grad_norm": 1.0739482641220093, | |
| "learning_rate": 8.337617876449427e-06, | |
| "step_time_sec": 104.62 | |
| }, | |
| { | |
| "step": 1075, | |
| "epoch": 0.5813560467788819, | |
| "wallclock": "2026-06-24T10:38:38.500861", | |
| "loss": 0.0712, | |
| "grad_norm": 0.5759013295173645, | |
| "learning_rate": 8.321282648513727e-06, | |
| "step_time_sec": 106.89 | |
| }, | |
| { | |
| "step": 1080, | |
| "epoch": 0.5840600283918069, | |
| "wallclock": "2026-06-24T10:40:24.115300", | |
| "loss": 0.06, | |
| "grad_norm": 0.5534053444862366, | |
| "learning_rate": 8.304883728980325e-06, | |
| "step_time_sec": 105.61 | |
| }, | |
| { | |
| "step": 1085, | |
| "epoch": 0.586764010004732, | |
| "wallclock": "2026-06-24T10:42:09.998730", | |
| "loss": 0.0613, | |
| "grad_norm": 0.7383453845977783, | |
| "learning_rate": 8.288421432328146e-06, | |
| "step_time_sec": 105.88 | |
| }, | |
| { | |
| "step": 1090, | |
| "epoch": 0.589467991617657, | |
| "wallclock": "2026-06-24T10:43:56.874611", | |
| "loss": 0.0611, | |
| "grad_norm": 0.5191856026649475, | |
| "learning_rate": 8.271896074251483e-06, | |
| "step_time_sec": 106.88 | |
| }, | |
| { | |
| "step": 1095, | |
| "epoch": 0.592171973230582, | |
| "wallclock": "2026-06-24T10:45:41.729013", | |
| "loss": 0.0571, | |
| "grad_norm": 0.5893298983573914, | |
| "learning_rate": 8.255307971653941e-06, | |
| "step_time_sec": 104.85 | |
| }, | |
| { | |
| "step": 1100, | |
| "epoch": 0.5948759548435071, | |
| "wallclock": "2026-06-24T10:47:28.787718", | |
| "loss": 0.0579, | |
| "grad_norm": 0.6628295183181763, | |
| "learning_rate": 8.238657442642375e-06, | |
| "step_time_sec": 107.06, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1100, | |
| "epoch": 0.5948759548435071, | |
| "wallclock": "2026-06-24T10:49:09.237050", | |
| "eval_loss": 0.07260795682668686, | |
| "eval_runtime": 100.4446, | |
| "eval_samples_per_second": 4.978, | |
| "eval_steps_per_second": 1.244, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1105, | |
| "epoch": 0.5975799364564321, | |
| "wallclock": "2026-06-24T10:52:18.585949", | |
| "loss": 0.0701, | |
| "grad_norm": 0.9406186938285828, | |
| "learning_rate": 8.221944806520768e-06, | |
| "step_time_sec": 289.8 | |
| }, | |
| { | |
| "step": 1110, | |
| "epoch": 0.6002839180693571, | |
| "wallclock": "2026-06-24T10:54:03.822666", | |
| "loss": 0.0709, | |
| "grad_norm": 0.6916760206222534, | |
| "learning_rate": 8.205170383784125e-06, | |
| "step_time_sec": 105.24 | |
| }, | |
| { | |
| "step": 1115, | |
| "epoch": 0.6029878996822822, | |
| "wallclock": "2026-06-24T10:55:49.426292", | |
| "loss": 0.0591, | |
| "grad_norm": 0.4082253575325012, | |
| "learning_rate": 8.188334496112322e-06, | |
| "step_time_sec": 105.6 | |
| }, | |
| { | |
| "step": 1120, | |
| "epoch": 0.6056918812952072, | |
| "wallclock": "2026-06-24T10:57:36.587027", | |
| "loss": 0.053, | |
| "grad_norm": 0.5415107011795044, | |
| "learning_rate": 8.171437466363934e-06, | |
| "step_time_sec": 107.16 | |
| }, | |
| { | |
| "step": 1125, | |
| "epoch": 0.6083958629081322, | |
| "wallclock": "2026-06-24T10:59:22.605930", | |
| "loss": 0.0612, | |
| "grad_norm": 0.5975248217582703, | |
| "learning_rate": 8.154479618570046e-06, | |
| "step_time_sec": 106.02 | |
| }, | |
| { | |
| "step": 1130, | |
| "epoch": 0.6110998445210573, | |
| "wallclock": "2026-06-24T11:01:07.592225", | |
| "loss": 0.0632, | |
| "grad_norm": 0.36586880683898926, | |
| "learning_rate": 8.137461277928039e-06, | |
| "step_time_sec": 104.99 | |
| }, | |
| { | |
| "step": 1135, | |
| "epoch": 0.6138038261339823, | |
| "wallclock": "2026-06-24T11:02:52.501190", | |
| "loss": 0.0638, | |
| "grad_norm": 0.6821796298027039, | |
| "learning_rate": 8.120382770795354e-06, | |
| "step_time_sec": 104.91 | |
| }, | |
| { | |
| "step": 1140, | |
| "epoch": 0.6165078077469073, | |
| "wallclock": "2026-06-24T11:04:37.321268", | |
| "loss": 0.0672, | |
| "grad_norm": 0.7406355142593384, | |
| "learning_rate": 8.103244424683232e-06, | |
| "step_time_sec": 104.82 | |
| }, | |
| { | |
| "step": 1145, | |
| "epoch": 0.6192117893598323, | |
| "wallclock": "2026-06-24T11:06:23.997767", | |
| "loss": 0.0551, | |
| "grad_norm": 0.6757558584213257, | |
| "learning_rate": 8.086046568250438e-06, | |
| "step_time_sec": 106.68 | |
| }, | |
| { | |
| "step": 1150, | |
| "epoch": 0.6219157709727574, | |
| "wallclock": "2026-06-24T11:08:10.298796", | |
| "loss": 0.0585, | |
| "grad_norm": 0.6179367899894714, | |
| "learning_rate": 8.06878953129695e-06, | |
| "step_time_sec": 106.3, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1155, | |
| "epoch": 0.6246197525856824, | |
| "wallclock": "2026-06-24T11:09:56.115384", | |
| "loss": 0.0607, | |
| "grad_norm": 0.9675397872924805, | |
| "learning_rate": 8.051473644757644e-06, | |
| "step_time_sec": 105.82 | |
| }, | |
| { | |
| "step": 1160, | |
| "epoch": 0.6273237341986074, | |
| "wallclock": "2026-06-24T11:11:41.181597", | |
| "loss": 0.0779, | |
| "grad_norm": 0.6827834248542786, | |
| "learning_rate": 8.034099240695942e-06, | |
| "step_time_sec": 105.07 | |
| }, | |
| { | |
| "step": 1165, | |
| "epoch": 0.6300277158115325, | |
| "wallclock": "2026-06-24T11:13:27.479419", | |
| "loss": 0.0539, | |
| "grad_norm": 0.43536603450775146, | |
| "learning_rate": 8.016666652297443e-06, | |
| "step_time_sec": 106.3 | |
| }, | |
| { | |
| "step": 1170, | |
| "epoch": 0.6327316974244576, | |
| "wallclock": "2026-06-24T11:15:14.490172", | |
| "loss": 0.0508, | |
| "grad_norm": 0.5545168519020081, | |
| "learning_rate": 7.999176213863536e-06, | |
| "step_time_sec": 107.01 | |
| }, | |
| { | |
| "step": 1175, | |
| "epoch": 0.6354356790373825, | |
| "wallclock": "2026-06-24T11:17:00.009949", | |
| "loss": 0.0414, | |
| "grad_norm": 0.43939608335494995, | |
| "learning_rate": 7.981628260804992e-06, | |
| "step_time_sec": 105.52 | |
| }, | |
| { | |
| "step": 1180, | |
| "epoch": 0.6381396606503076, | |
| "wallclock": "2026-06-24T11:18:49.614174", | |
| "loss": 0.0585, | |
| "grad_norm": 0.7514466047286987, | |
| "learning_rate": 7.964023129635528e-06, | |
| "step_time_sec": 109.6 | |
| }, | |
| { | |
| "step": 1185, | |
| "epoch": 0.6408436422632326, | |
| "wallclock": "2026-06-24T11:20:35.886257", | |
| "loss": 0.0742, | |
| "grad_norm": 1.2430953979492188, | |
| "learning_rate": 7.946361157965354e-06, | |
| "step_time_sec": 106.27 | |
| }, | |
| { | |
| "step": 1190, | |
| "epoch": 0.6435476238761576, | |
| "wallclock": "2026-06-24T11:22:21.402749", | |
| "loss": 0.0556, | |
| "grad_norm": 0.6524196863174438, | |
| "learning_rate": 7.928642684494696e-06, | |
| "step_time_sec": 105.52 | |
| }, | |
| { | |
| "step": 1195, | |
| "epoch": 0.6462516054890827, | |
| "wallclock": "2026-06-24T11:24:07.831963", | |
| "loss": 0.066, | |
| "grad_norm": 0.7481945157051086, | |
| "learning_rate": 7.910868049007312e-06, | |
| "step_time_sec": 106.43 | |
| }, | |
| { | |
| "step": 1200, | |
| "epoch": 0.6489555871020077, | |
| "wallclock": "2026-06-24T11:25:53.724397", | |
| "loss": 0.0712, | |
| "grad_norm": 0.9145833849906921, | |
| "learning_rate": 7.893037592363959e-06, | |
| "step_time_sec": 105.89, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1200, | |
| "epoch": 0.6489555871020077, | |
| "wallclock": "2026-06-24T11:27:34.112458", | |
| "eval_loss": 0.0755784884095192, | |
| "eval_runtime": 100.3831, | |
| "eval_samples_per_second": 4.981, | |
| "eval_steps_per_second": 1.245, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1205, | |
| "epoch": 0.6516595687149327, | |
| "wallclock": "2026-06-24T11:30:41.010276", | |
| "loss": 0.0515, | |
| "grad_norm": 0.7790878415107727, | |
| "learning_rate": 7.875151656495874e-06, | |
| "step_time_sec": 287.29 | |
| }, | |
| { | |
| "step": 1210, | |
| "epoch": 0.6543635503278578, | |
| "wallclock": "2026-06-24T11:32:27.481096", | |
| "loss": 0.0566, | |
| "grad_norm": 0.6310415267944336, | |
| "learning_rate": 7.8572105843982e-06, | |
| "step_time_sec": 106.47 | |
| }, | |
| { | |
| "step": 1215, | |
| "epoch": 0.6570675319407828, | |
| "wallclock": "2026-06-24T11:34:13.893600", | |
| "loss": 0.0451, | |
| "grad_norm": 0.5569303631782532, | |
| "learning_rate": 7.839214720123427e-06, | |
| "step_time_sec": 106.41 | |
| }, | |
| { | |
| "step": 1220, | |
| "epoch": 0.6597715135537079, | |
| "wallclock": "2026-06-24T11:35:58.515537", | |
| "loss": 0.0688, | |
| "grad_norm": 0.636441171169281, | |
| "learning_rate": 7.821164408774772e-06, | |
| "step_time_sec": 104.62 | |
| }, | |
| { | |
| "step": 1225, | |
| "epoch": 0.6624754951666328, | |
| "wallclock": "2026-06-24T11:37:42.397948", | |
| "loss": 0.0697, | |
| "grad_norm": 0.7517639398574829, | |
| "learning_rate": 7.803059996499584e-06, | |
| "step_time_sec": 103.88 | |
| }, | |
| { | |
| "step": 1230, | |
| "epoch": 0.6651794767795579, | |
| "wallclock": "2026-06-24T11:39:28.808346", | |
| "loss": 0.0575, | |
| "grad_norm": 0.5596706867218018, | |
| "learning_rate": 7.78490183048269e-06, | |
| "step_time_sec": 106.41 | |
| }, | |
| { | |
| "step": 1235, | |
| "epoch": 0.667883458392483, | |
| "wallclock": "2026-06-24T11:41:14.200078", | |
| "loss": 0.0586, | |
| "grad_norm": 0.645969033241272, | |
| "learning_rate": 7.76669025893974e-06, | |
| "step_time_sec": 105.39 | |
| }, | |
| { | |
| "step": 1240, | |
| "epoch": 0.6705874400054079, | |
| "wallclock": "2026-06-24T11:43:00.685648", | |
| "loss": 0.0658, | |
| "grad_norm": 0.7119715213775635, | |
| "learning_rate": 7.748425631110536e-06, | |
| "step_time_sec": 106.49 | |
| }, | |
| { | |
| "step": 1245, | |
| "epoch": 0.673291421618333, | |
| "wallclock": "2026-06-24T11:44:47.803121", | |
| "loss": 0.07, | |
| "grad_norm": 1.2201249599456787, | |
| "learning_rate": 7.730108297252328e-06, | |
| "step_time_sec": 107.12 | |
| }, | |
| { | |
| "step": 1250, | |
| "epoch": 0.6759954032312581, | |
| "wallclock": "2026-06-24T11:46:32.404383", | |
| "loss": 0.0473, | |
| "grad_norm": 0.7548292875289917, | |
| "learning_rate": 7.7117386086331e-06, | |
| "step_time_sec": 104.6, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1255, | |
| "epoch": 0.678699384844183, | |
| "wallclock": "2026-06-24T11:48:18.393499", | |
| "loss": 0.0855, | |
| "grad_norm": 1.1629971265792847, | |
| "learning_rate": 7.693316917524832e-06, | |
| "step_time_sec": 105.99 | |
| }, | |
| { | |
| "step": 1260, | |
| "epoch": 0.6814033664571081, | |
| "wallclock": "2026-06-24T11:50:04.895492", | |
| "loss": 0.0511, | |
| "grad_norm": 0.798232913017273, | |
| "learning_rate": 7.674843577196747e-06, | |
| "step_time_sec": 106.5 | |
| }, | |
| { | |
| "step": 1265, | |
| "epoch": 0.6841073480700331, | |
| "wallclock": "2026-06-24T11:51:50.622300", | |
| "loss": 0.055, | |
| "grad_norm": 0.5960519909858704, | |
| "learning_rate": 7.656318941908534e-06, | |
| "step_time_sec": 105.73 | |
| }, | |
| { | |
| "step": 1270, | |
| "epoch": 0.6868113296829581, | |
| "wallclock": "2026-06-24T11:53:37.318998", | |
| "loss": 0.069, | |
| "grad_norm": 0.8142486810684204, | |
| "learning_rate": 7.637743366903559e-06, | |
| "step_time_sec": 106.7 | |
| }, | |
| { | |
| "step": 1275, | |
| "epoch": 0.6895153112958832, | |
| "wallclock": "2026-06-24T11:55:24.097132", | |
| "loss": 0.0486, | |
| "grad_norm": 0.6205362677574158, | |
| "learning_rate": 7.61911720840204e-06, | |
| "step_time_sec": 106.78 | |
| }, | |
| { | |
| "step": 1280, | |
| "epoch": 0.6922192929088082, | |
| "wallclock": "2026-06-24T11:57:09.718166", | |
| "loss": 0.0653, | |
| "grad_norm": 1.1235874891281128, | |
| "learning_rate": 7.60044082359424e-06, | |
| "step_time_sec": 105.62 | |
| }, | |
| { | |
| "step": 1285, | |
| "epoch": 0.6949232745217333, | |
| "wallclock": "2026-06-24T11:58:56.721018", | |
| "loss": 0.078, | |
| "grad_norm": 0.8355940580368042, | |
| "learning_rate": 7.581714570633586e-06, | |
| "step_time_sec": 107.0 | |
| }, | |
| { | |
| "step": 1290, | |
| "epoch": 0.6976272561346583, | |
| "wallclock": "2026-06-24T12:00:43.212505", | |
| "loss": 0.0592, | |
| "grad_norm": 2.8660950660705566, | |
| "learning_rate": 7.562938808629829e-06, | |
| "step_time_sec": 106.49 | |
| }, | |
| { | |
| "step": 1295, | |
| "epoch": 0.7003312377475833, | |
| "wallclock": "2026-06-24T12:02:28.492093", | |
| "loss": 0.058, | |
| "grad_norm": 0.8744626045227051, | |
| "learning_rate": 7.54411389764214e-06, | |
| "step_time_sec": 105.28 | |
| }, | |
| { | |
| "step": 1300, | |
| "epoch": 0.7030352193605084, | |
| "wallclock": "2026-06-24T12:04:14.101813", | |
| "loss": 0.0608, | |
| "grad_norm": 0.6016539931297302, | |
| "learning_rate": 7.52524019867221e-06, | |
| "step_time_sec": 105.61, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1300, | |
| "epoch": 0.7030352193605084, | |
| "wallclock": "2026-06-24T12:05:54.677349", | |
| "eval_loss": 0.07137385755777359, | |
| "eval_runtime": 100.5705, | |
| "eval_samples_per_second": 4.972, | |
| "eval_steps_per_second": 1.243, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1305, | |
| "epoch": 0.7057392009734333, | |
| "wallclock": "2026-06-24T12:08:54.483469", | |
| "loss": 0.0709, | |
| "grad_norm": 0.6734678149223328, | |
| "learning_rate": 7.506318073657331e-06, | |
| "step_time_sec": 280.38 | |
| }, | |
| { | |
| "step": 1310, | |
| "epoch": 0.7084431825863584, | |
| "wallclock": "2026-06-24T12:10:38.696936", | |
| "loss": 0.0623, | |
| "grad_norm": 0.7641857266426086, | |
| "learning_rate": 7.4873478854634476e-06, | |
| "step_time_sec": 104.21 | |
| }, | |
| { | |
| "step": 1315, | |
| "epoch": 0.7111471641992835, | |
| "wallclock": "2026-06-24T12:12:32.495225", | |
| "loss": 0.0587, | |
| "grad_norm": 0.8493006229400635, | |
| "learning_rate": 7.4683299978782076e-06, | |
| "step_time_sec": 113.8 | |
| }, | |
| { | |
| "step": 1320, | |
| "epoch": 0.7138511458122084, | |
| "wallclock": "2026-06-24T12:14:17.906835", | |
| "loss": 0.0474, | |
| "grad_norm": 0.4841386377811432, | |
| "learning_rate": 7.449264775603979e-06, | |
| "step_time_sec": 105.41 | |
| }, | |
| { | |
| "step": 1325, | |
| "epoch": 0.7165551274251335, | |
| "wallclock": "2026-06-24T12:16:05.319711", | |
| "loss": 0.0603, | |
| "grad_norm": 0.872616171836853, | |
| "learning_rate": 7.430152584250856e-06, | |
| "step_time_sec": 107.41 | |
| }, | |
| { | |
| "step": 1330, | |
| "epoch": 0.7192591090380586, | |
| "wallclock": "2026-06-24T12:17:51.521993", | |
| "loss": 0.0525, | |
| "grad_norm": 0.7304244041442871, | |
| "learning_rate": 7.410993790329652e-06, | |
| "step_time_sec": 106.2 | |
| }, | |
| { | |
| "step": 1335, | |
| "epoch": 0.7219630906509835, | |
| "wallclock": "2026-06-24T12:19:38.816730", | |
| "loss": 0.0441, | |
| "grad_norm": 0.5004603266716003, | |
| "learning_rate": 7.3917887612448665e-06, | |
| "step_time_sec": 107.29 | |
| }, | |
| { | |
| "step": 1340, | |
| "epoch": 0.7246670722639086, | |
| "wallclock": "2026-06-24T12:21:24.716857", | |
| "loss": 0.0669, | |
| "grad_norm": 0.6454601287841797, | |
| "learning_rate": 7.372537865287648e-06, | |
| "step_time_sec": 105.9 | |
| }, | |
| { | |
| "step": 1345, | |
| "epoch": 0.7273710538768337, | |
| "wallclock": "2026-06-24T12:23:10.411567", | |
| "loss": 0.0422, | |
| "grad_norm": 0.9636154174804688, | |
| "learning_rate": 7.353241471628716e-06, | |
| "step_time_sec": 105.69 | |
| }, | |
| { | |
| "step": 1350, | |
| "epoch": 0.7300750354897587, | |
| "wallclock": "2026-06-24T12:24:56.017994", | |
| "loss": 0.0456, | |
| "grad_norm": 0.6495915651321411, | |
| "learning_rate": 7.3338999503112975e-06, | |
| "step_time_sec": 105.61, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1355, | |
| "epoch": 0.7327790171026837, | |
| "wallclock": "2026-06-24T12:26:41.498254", | |
| "loss": 0.0547, | |
| "grad_norm": 0.8502314686775208, | |
| "learning_rate": 7.314513672244021e-06, | |
| "step_time_sec": 105.48 | |
| }, | |
| { | |
| "step": 1360, | |
| "epoch": 0.7354829987156087, | |
| "wallclock": "2026-06-24T12:28:28.488384", | |
| "loss": 0.0607, | |
| "grad_norm": 0.5915205478668213, | |
| "learning_rate": 7.295083009193808e-06, | |
| "step_time_sec": 106.99 | |
| }, | |
| { | |
| "step": 1365, | |
| "epoch": 0.7381869803285338, | |
| "wallclock": "2026-06-24T12:30:17.161947", | |
| "loss": 0.0654, | |
| "grad_norm": 0.7883327603340149, | |
| "learning_rate": 7.275608333778742e-06, | |
| "step_time_sec": 108.67 | |
| }, | |
| { | |
| "step": 1370, | |
| "epoch": 0.7408909619414588, | |
| "wallclock": "2026-06-24T12:32:05.107393", | |
| "loss": 0.0552, | |
| "grad_norm": 0.7381963133811951, | |
| "learning_rate": 7.256090019460922e-06, | |
| "step_time_sec": 107.95 | |
| }, | |
| { | |
| "step": 1375, | |
| "epoch": 0.7435949435543838, | |
| "wallclock": "2026-06-24T12:33:52.735316", | |
| "loss": 0.0649, | |
| "grad_norm": 0.8336455821990967, | |
| "learning_rate": 7.236528440539303e-06, | |
| "step_time_sec": 107.63 | |
| }, | |
| { | |
| "step": 1380, | |
| "epoch": 0.7462989251673089, | |
| "wallclock": "2026-06-24T12:35:40.395535", | |
| "loss": 0.0393, | |
| "grad_norm": 0.5212644338607788, | |
| "learning_rate": 7.2169239721425154e-06, | |
| "step_time_sec": 107.66 | |
| }, | |
| { | |
| "step": 1385, | |
| "epoch": 0.7490029067802338, | |
| "wallclock": "2026-06-24T12:37:27.680116", | |
| "loss": 0.0633, | |
| "grad_norm": 0.8368508815765381, | |
| "learning_rate": 7.197276990221677e-06, | |
| "step_time_sec": 107.28 | |
| }, | |
| { | |
| "step": 1390, | |
| "epoch": 0.7517068883931589, | |
| "wallclock": "2026-06-24T12:39:14.615045", | |
| "loss": 0.0497, | |
| "grad_norm": 0.7919797897338867, | |
| "learning_rate": 7.177587871543172e-06, | |
| "step_time_sec": 106.93 | |
| }, | |
| { | |
| "step": 1395, | |
| "epoch": 0.754410870006084, | |
| "wallclock": "2026-06-24T12:41:03.202011", | |
| "loss": 0.0564, | |
| "grad_norm": 0.8120989799499512, | |
| "learning_rate": 7.157856993681442e-06, | |
| "step_time_sec": 108.59 | |
| }, | |
| { | |
| "step": 1400, | |
| "epoch": 0.757114851619009, | |
| "wallclock": "2026-06-24T12:42:50.343742", | |
| "loss": 0.0647, | |
| "grad_norm": 0.6419529318809509, | |
| "learning_rate": 7.138084735011727e-06, | |
| "step_time_sec": 107.14, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1400, | |
| "epoch": 0.757114851619009, | |
| "wallclock": "2026-06-24T12:44:31.432549", | |
| "eval_loss": 0.07097452133893967, | |
| "eval_runtime": 101.0814, | |
| "eval_samples_per_second": 4.947, | |
| "eval_steps_per_second": 1.237, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1405, | |
| "epoch": 0.759818833231934, | |
| "wallclock": "2026-06-24T12:47:46.178602", | |
| "loss": 0.0585, | |
| "grad_norm": 0.5049331188201904, | |
| "learning_rate": 7.118271474702828e-06, | |
| "step_time_sec": 295.83 | |
| }, | |
| { | |
| "step": 1410, | |
| "epoch": 0.7625228148448591, | |
| "wallclock": "2026-06-24T12:49:34.393720", | |
| "loss": 0.0597, | |
| "grad_norm": 0.8283151984214783, | |
| "learning_rate": 7.098417592709819e-06, | |
| "step_time_sec": 108.22 | |
| }, | |
| { | |
| "step": 1415, | |
| "epoch": 0.7652267964577841, | |
| "wallclock": "2026-06-24T12:51:22.319403", | |
| "loss": 0.052, | |
| "grad_norm": 0.6273178458213806, | |
| "learning_rate": 7.078523469766772e-06, | |
| "step_time_sec": 107.93 | |
| }, | |
| { | |
| "step": 1420, | |
| "epoch": 0.7679307780707091, | |
| "wallclock": "2026-06-24T12:53:09.791827", | |
| "loss": 0.0504, | |
| "grad_norm": 0.6756861805915833, | |
| "learning_rate": 7.0585894873794514e-06, | |
| "step_time_sec": 107.47 | |
| }, | |
| { | |
| "step": 1425, | |
| "epoch": 0.7706347596836342, | |
| "wallclock": "2026-06-24T12:54:57.620245", | |
| "loss": 0.0341, | |
| "grad_norm": 0.5247818231582642, | |
| "learning_rate": 7.038616027817998e-06, | |
| "step_time_sec": 107.83 | |
| }, | |
| { | |
| "step": 1430, | |
| "epoch": 0.7733387412965592, | |
| "wallclock": "2026-06-24T12:56:43.618402", | |
| "loss": 0.0617, | |
| "grad_norm": 0.5578892230987549, | |
| "learning_rate": 7.018603474109601e-06, | |
| "step_time_sec": 106.0 | |
| }, | |
| { | |
| "step": 1435, | |
| "epoch": 0.7760427229094842, | |
| "wallclock": "2026-06-24T12:58:29.201049", | |
| "loss": 0.0443, | |
| "grad_norm": 0.8692203760147095, | |
| "learning_rate": 6.9985522100311465e-06, | |
| "step_time_sec": 105.58 | |
| }, | |
| { | |
| "step": 1440, | |
| "epoch": 0.7787467045224092, | |
| "wallclock": "2026-06-24T13:00:16.509392", | |
| "loss": 0.0692, | |
| "grad_norm": 0.6393124461174011, | |
| "learning_rate": 6.978462620101865e-06, | |
| "step_time_sec": 107.31 | |
| }, | |
| { | |
| "step": 1445, | |
| "epoch": 0.7814506861353343, | |
| "wallclock": "2026-06-24T13:02:04.488002", | |
| "loss": 0.0524, | |
| "grad_norm": 0.66062992811203, | |
| "learning_rate": 6.958335089575952e-06, | |
| "step_time_sec": 107.98 | |
| }, | |
| { | |
| "step": 1450, | |
| "epoch": 0.7841546677482593, | |
| "wallclock": "2026-06-24T13:03:51.464399", | |
| "loss": 0.0606, | |
| "grad_norm": 0.3925676643848419, | |
| "learning_rate": 6.938170004435186e-06, | |
| "step_time_sec": 106.98, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1455, | |
| "epoch": 0.7868586493611843, | |
| "wallclock": "2026-06-24T13:05:39.613199", | |
| "loss": 0.0539, | |
| "grad_norm": 0.7175688147544861, | |
| "learning_rate": 6.91796775138152e-06, | |
| "step_time_sec": 108.15 | |
| }, | |
| { | |
| "step": 1460, | |
| "epoch": 0.7895626309741094, | |
| "wallclock": "2026-06-24T13:07:26.686179", | |
| "loss": 0.0571, | |
| "grad_norm": 0.6691136360168457, | |
| "learning_rate": 6.89772871782967e-06, | |
| "step_time_sec": 107.07 | |
| }, | |
| { | |
| "step": 1465, | |
| "epoch": 0.7922666125870345, | |
| "wallclock": "2026-06-24T13:09:14.110537", | |
| "loss": 0.0749, | |
| "grad_norm": 0.963224470615387, | |
| "learning_rate": 6.877453291899685e-06, | |
| "step_time_sec": 107.42 | |
| }, | |
| { | |
| "step": 1470, | |
| "epoch": 0.7949705941999594, | |
| "wallclock": "2026-06-24T13:11:01.097029", | |
| "loss": 0.0665, | |
| "grad_norm": 0.702336311340332, | |
| "learning_rate": 6.857141862409504e-06, | |
| "step_time_sec": 106.99 | |
| }, | |
| { | |
| "step": 1475, | |
| "epoch": 0.7976745758128845, | |
| "wallclock": "2026-06-24T13:12:48.608856", | |
| "loss": 0.0502, | |
| "grad_norm": 0.5416118502616882, | |
| "learning_rate": 6.836794818867496e-06, | |
| "step_time_sec": 107.51 | |
| }, | |
| { | |
| "step": 1480, | |
| "epoch": 0.8003785574258095, | |
| "wallclock": "2026-06-24T13:14:36.033474", | |
| "loss": 0.0441, | |
| "grad_norm": 0.5691907405853271, | |
| "learning_rate": 6.816412551464999e-06, | |
| "step_time_sec": 107.42 | |
| }, | |
| { | |
| "step": 1485, | |
| "epoch": 0.8030825390387345, | |
| "wallclock": "2026-06-24T13:16:24.108145", | |
| "loss": 0.0626, | |
| "grad_norm": 0.6911583542823792, | |
| "learning_rate": 6.795995451068828e-06, | |
| "step_time_sec": 108.07 | |
| }, | |
| { | |
| "step": 1490, | |
| "epoch": 0.8057865206516596, | |
| "wallclock": "2026-06-24T13:18:10.811010", | |
| "loss": 0.0563, | |
| "grad_norm": 1.3713301420211792, | |
| "learning_rate": 6.775543909213786e-06, | |
| "step_time_sec": 106.7 | |
| }, | |
| { | |
| "step": 1495, | |
| "epoch": 0.8084905022645846, | |
| "wallclock": "2026-06-24T13:19:58.721530", | |
| "loss": 0.0483, | |
| "grad_norm": 0.7632337212562561, | |
| "learning_rate": 6.755058318095151e-06, | |
| "step_time_sec": 107.91 | |
| }, | |
| { | |
| "step": 1500, | |
| "epoch": 0.8111944838775096, | |
| "wallclock": "2026-06-24T13:21:45.388432", | |
| "loss": 0.0697, | |
| "grad_norm": 1.1038848161697388, | |
| "learning_rate": 6.73453907056116e-06, | |
| "step_time_sec": 106.67, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1500, | |
| "epoch": 0.8111944838775096, | |
| "wallclock": "2026-06-24T13:23:26.179575", | |
| "eval_loss": 0.0741763636469841, | |
| "eval_runtime": 100.7848, | |
| "eval_samples_per_second": 4.961, | |
| "eval_steps_per_second": 1.24, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1505, | |
| "epoch": 0.8138984654904347, | |
| "wallclock": "2026-06-24T13:26:37.503409", | |
| "loss": 0.0618, | |
| "grad_norm": 0.8092767000198364, | |
| "learning_rate": 6.71398656010547e-06, | |
| "step_time_sec": 292.11 | |
| }, | |
| { | |
| "step": 1510, | |
| "epoch": 0.8166024471033597, | |
| "wallclock": "2026-06-24T13:28:25.707697", | |
| "loss": 0.0432, | |
| "grad_norm": 0.6367549300193787, | |
| "learning_rate": 6.693401180859618e-06, | |
| "step_time_sec": 108.2 | |
| }, | |
| { | |
| "step": 1515, | |
| "epoch": 0.8193064287162848, | |
| "wallclock": "2026-06-24T13:30:13.512145", | |
| "loss": 0.0434, | |
| "grad_norm": 0.7922583222389221, | |
| "learning_rate": 6.672783327585454e-06, | |
| "step_time_sec": 107.8 | |
| }, | |
| { | |
| "step": 1520, | |
| "epoch": 0.8220104103292097, | |
| "wallclock": "2026-06-24T13:32:01.383536", | |
| "loss": 0.061, | |
| "grad_norm": 0.7766749858856201, | |
| "learning_rate": 6.65213339566758e-06, | |
| "step_time_sec": 107.87 | |
| }, | |
| { | |
| "step": 1525, | |
| "epoch": 0.8247143919421348, | |
| "wallclock": "2026-06-24T13:33:48.783229", | |
| "loss": 0.0369, | |
| "grad_norm": 0.5121834874153137, | |
| "learning_rate": 6.631451781105767e-06, | |
| "step_time_sec": 107.4 | |
| }, | |
| { | |
| "step": 1530, | |
| "epoch": 0.8274183735550599, | |
| "wallclock": "2026-06-24T13:35:36.299372", | |
| "loss": 0.0582, | |
| "grad_norm": 0.726270318031311, | |
| "learning_rate": 6.6107388805073495e-06, | |
| "step_time_sec": 107.52 | |
| }, | |
| { | |
| "step": 1535, | |
| "epoch": 0.8301223551679848, | |
| "wallclock": "2026-06-24T13:37:23.706841", | |
| "loss": 0.0468, | |
| "grad_norm": 0.6746184825897217, | |
| "learning_rate": 6.589995091079636e-06, | |
| "step_time_sec": 107.41 | |
| }, | |
| { | |
| "step": 1540, | |
| "epoch": 0.8328263367809099, | |
| "wallclock": "2026-06-24T13:39:10.512639", | |
| "loss": 0.0595, | |
| "grad_norm": 0.8106797337532043, | |
| "learning_rate": 6.569220810622281e-06, | |
| "step_time_sec": 106.81 | |
| }, | |
| { | |
| "step": 1545, | |
| "epoch": 0.835530318393835, | |
| "wallclock": "2026-06-24T13:40:56.792388", | |
| "loss": 0.0539, | |
| "grad_norm": 0.7323052287101746, | |
| "learning_rate": 6.548416437519658e-06, | |
| "step_time_sec": 106.28 | |
| }, | |
| { | |
| "step": 1550, | |
| "epoch": 0.8382343000067599, | |
| "wallclock": "2026-06-24T13:42:44.227103", | |
| "loss": 0.0491, | |
| "grad_norm": 0.6671241521835327, | |
| "learning_rate": 6.5275823707332275e-06, | |
| "step_time_sec": 107.43, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1555, | |
| "epoch": 0.840938281619685, | |
| "wallclock": "2026-06-24T13:44:32.006313", | |
| "loss": 0.0463, | |
| "grad_norm": 1.1120103597640991, | |
| "learning_rate": 6.50671900979387e-06, | |
| "step_time_sec": 107.78 | |
| }, | |
| { | |
| "step": 1560, | |
| "epoch": 0.84364226323261, | |
| "wallclock": "2026-06-24T13:46:19.183463", | |
| "loss": 0.0542, | |
| "grad_norm": 0.3518182039260864, | |
| "learning_rate": 6.485826754794245e-06, | |
| "step_time_sec": 107.18 | |
| }, | |
| { | |
| "step": 1565, | |
| "epoch": 0.846346244845535, | |
| "wallclock": "2026-06-24T13:48:06.385481", | |
| "loss": 0.0379, | |
| "grad_norm": 0.7423526644706726, | |
| "learning_rate": 6.464906006381101e-06, | |
| "step_time_sec": 107.2 | |
| }, | |
| { | |
| "step": 1570, | |
| "epoch": 0.8490502264584601, | |
| "wallclock": "2026-06-24T13:49:54.323330", | |
| "loss": 0.0477, | |
| "grad_norm": 0.7195249795913696, | |
| "learning_rate": 6.443957165747601e-06, | |
| "step_time_sec": 107.94 | |
| }, | |
| { | |
| "step": 1575, | |
| "epoch": 0.8517542080713851, | |
| "wallclock": "2026-06-24T13:51:40.924711", | |
| "loss": 0.058, | |
| "grad_norm": 1.0430902242660522, | |
| "learning_rate": 6.422980634625627e-06, | |
| "step_time_sec": 106.6 | |
| }, | |
| { | |
| "step": 1580, | |
| "epoch": 0.8544581896843102, | |
| "wallclock": "2026-06-24T13:53:26.389981", | |
| "loss": 0.054, | |
| "grad_norm": 0.8965272903442383, | |
| "learning_rate": 6.4019768152780785e-06, | |
| "step_time_sec": 105.47 | |
| }, | |
| { | |
| "step": 1585, | |
| "epoch": 0.8571621712972352, | |
| "wallclock": "2026-06-24T13:55:13.357681", | |
| "loss": 0.0538, | |
| "grad_norm": 0.9105026125907898, | |
| "learning_rate": 6.380946110491151e-06, | |
| "step_time_sec": 106.97 | |
| }, | |
| { | |
| "step": 1590, | |
| "epoch": 0.8598661529101602, | |
| "wallclock": "2026-06-24T13:57:00.092764", | |
| "loss": 0.0405, | |
| "grad_norm": 0.7773502469062805, | |
| "learning_rate": 6.359888923566621e-06, | |
| "step_time_sec": 106.74 | |
| }, | |
| { | |
| "step": 1595, | |
| "epoch": 0.8625701345230853, | |
| "wallclock": "2026-06-24T13:58:47.207895", | |
| "loss": 0.0522, | |
| "grad_norm": 1.0928678512573242, | |
| "learning_rate": 6.338805658314106e-06, | |
| "step_time_sec": 107.12 | |
| }, | |
| { | |
| "step": 1600, | |
| "epoch": 0.8652741161360102, | |
| "wallclock": "2026-06-24T14:00:34.321798", | |
| "loss": 0.0346, | |
| "grad_norm": 0.37700727581977844, | |
| "learning_rate": 6.317696719043327e-06, | |
| "step_time_sec": 107.11, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1600, | |
| "epoch": 0.8652741161360102, | |
| "wallclock": "2026-06-24T14:02:14.979680", | |
| "eval_loss": 0.07683192193508148, | |
| "eval_runtime": 100.6515, | |
| "eval_samples_per_second": 4.968, | |
| "eval_steps_per_second": 1.242, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1605, | |
| "epoch": 0.8679780977489353, | |
| "wallclock": "2026-06-24T14:05:26.609530", | |
| "loss": 0.0504, | |
| "grad_norm": 0.8107773065567017, | |
| "learning_rate": 6.2965625105563445e-06, | |
| "step_time_sec": 292.29 | |
| }, | |
| { | |
| "step": 1610, | |
| "epoch": 0.8706820793618604, | |
| "wallclock": "2026-06-24T14:07:13.558796", | |
| "loss": 0.0366, | |
| "grad_norm": 1.109079122543335, | |
| "learning_rate": 6.275403438139801e-06, | |
| "step_time_sec": 106.95 | |
| }, | |
| { | |
| "step": 1615, | |
| "epoch": 0.8733860609747853, | |
| "wallclock": "2026-06-24T14:09:00.902964", | |
| "loss": 0.0516, | |
| "grad_norm": 0.990442156791687, | |
| "learning_rate": 6.254219907557159e-06, | |
| "step_time_sec": 107.34 | |
| }, | |
| { | |
| "step": 1620, | |
| "epoch": 0.8760900425877104, | |
| "wallclock": "2026-06-24T14:10:48.617807", | |
| "loss": 0.0418, | |
| "grad_norm": 0.7781974077224731, | |
| "learning_rate": 6.2330123250409e-06, | |
| "step_time_sec": 107.71 | |
| }, | |
| { | |
| "step": 1625, | |
| "epoch": 0.8787940242006355, | |
| "wallclock": "2026-06-24T14:12:35.112434", | |
| "loss": 0.0574, | |
| "grad_norm": 1.2163763046264648, | |
| "learning_rate": 6.211781097284754e-06, | |
| "step_time_sec": 106.49 | |
| }, | |
| { | |
| "step": 1630, | |
| "epoch": 0.8814980058135604, | |
| "wallclock": "2026-06-24T14:14:21.209253", | |
| "loss": 0.0626, | |
| "grad_norm": 0.9669123291969299, | |
| "learning_rate": 6.190526631435882e-06, | |
| "step_time_sec": 106.1 | |
| }, | |
| { | |
| "step": 1635, | |
| "epoch": 0.8842019874264855, | |
| "wallclock": "2026-06-24T14:16:08.902128", | |
| "loss": 0.04, | |
| "grad_norm": 1.140141487121582, | |
| "learning_rate": 6.169249335087085e-06, | |
| "step_time_sec": 107.69 | |
| }, | |
| { | |
| "step": 1640, | |
| "epoch": 0.8869059690394105, | |
| "wallclock": "2026-06-24T14:17:56.627117", | |
| "loss": 0.0813, | |
| "grad_norm": 1.00438392162323, | |
| "learning_rate": 6.1479496162689775e-06, | |
| "step_time_sec": 107.72 | |
| }, | |
| { | |
| "step": 1645, | |
| "epoch": 0.8896099506523356, | |
| "wallclock": "2026-06-24T14:19:43.930288", | |
| "loss": 0.051, | |
| "grad_norm": 1.1830681562423706, | |
| "learning_rate": 6.1266278834421634e-06, | |
| "step_time_sec": 107.3 | |
| }, | |
| { | |
| "step": 1650, | |
| "epoch": 0.8923139322652606, | |
| "wallclock": "2026-06-24T14:21:30.620817", | |
| "loss": 0.048, | |
| "grad_norm": 0.7539001107215881, | |
| "learning_rate": 6.105284545489408e-06, | |
| "step_time_sec": 106.69, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1655, | |
| "epoch": 0.8950179138781856, | |
| "wallclock": "2026-06-24T14:23:17.090603", | |
| "loss": 0.044, | |
| "grad_norm": 0.8819478750228882, | |
| "learning_rate": 6.083920011707794e-06, | |
| "step_time_sec": 106.47 | |
| }, | |
| { | |
| "step": 1660, | |
| "epoch": 0.8977218954911107, | |
| "wallclock": "2026-06-24T14:25:03.986530", | |
| "loss": 0.0369, | |
| "grad_norm": 0.6605198383331299, | |
| "learning_rate": 6.062534691800865e-06, | |
| "step_time_sec": 106.9 | |
| }, | |
| { | |
| "step": 1665, | |
| "epoch": 0.9004258771040357, | |
| "wallclock": "2026-06-24T14:26:50.818521", | |
| "loss": 0.0434, | |
| "grad_norm": 0.5586560368537903, | |
| "learning_rate": 6.04112899587079e-06, | |
| "step_time_sec": 106.83 | |
| }, | |
| { | |
| "step": 1670, | |
| "epoch": 0.9031298587169607, | |
| "wallclock": "2026-06-24T14:28:38.292626", | |
| "loss": 0.0537, | |
| "grad_norm": 0.6612546443939209, | |
| "learning_rate": 6.019703334410473e-06, | |
| "step_time_sec": 107.47 | |
| }, | |
| { | |
| "step": 1675, | |
| "epoch": 0.9058338403298858, | |
| "wallclock": "2026-06-24T14:30:25.839650", | |
| "loss": 0.045, | |
| "grad_norm": 0.8835639357566833, | |
| "learning_rate": 5.998258118295699e-06, | |
| "step_time_sec": 107.55 | |
| }, | |
| { | |
| "step": 1680, | |
| "epoch": 0.9085378219428107, | |
| "wallclock": "2026-06-24T14:32:15.497667", | |
| "loss": 0.0351, | |
| "grad_norm": 0.7877563834190369, | |
| "learning_rate": 5.9767937587772464e-06, | |
| "step_time_sec": 109.66 | |
| }, | |
| { | |
| "step": 1685, | |
| "epoch": 0.9112418035557358, | |
| "wallclock": "2026-06-24T14:34:02.801842", | |
| "loss": 0.0423, | |
| "grad_norm": 0.8421223759651184, | |
| "learning_rate": 5.955310667473003e-06, | |
| "step_time_sec": 107.3 | |
| }, | |
| { | |
| "step": 1690, | |
| "epoch": 0.9139457851686609, | |
| "wallclock": "2026-06-24T14:35:49.804897", | |
| "loss": 0.0549, | |
| "grad_norm": 0.9553209543228149, | |
| "learning_rate": 5.933809256360076e-06, | |
| "step_time_sec": 107.0 | |
| }, | |
| { | |
| "step": 1695, | |
| "epoch": 0.9166497667815859, | |
| "wallclock": "2026-06-24T14:37:39.298110", | |
| "loss": 0.0365, | |
| "grad_norm": 0.9886178374290466, | |
| "learning_rate": 5.912289937766882e-06, | |
| "step_time_sec": 109.49 | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.9193537483945109, | |
| "wallclock": "2026-06-24T14:39:25.987650", | |
| "loss": 0.0488, | |
| "grad_norm": 0.7625762820243835, | |
| "learning_rate": 5.890753124365252e-06, | |
| "step_time_sec": 106.69, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.9193537483945109, | |
| "wallclock": "2026-06-24T14:41:06.647821", | |
| "eval_loss": 0.0766952782869339, | |
| "eval_runtime": 100.6532, | |
| "eval_samples_per_second": 4.968, | |
| "eval_steps_per_second": 1.242, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.9193537483945109, | |
| "wallclock": "2026-06-24T14:42:31.431895", | |
| "train_runtime": 39166.0066, | |
| "train_samples_per_second": 3.021, | |
| "train_steps_per_second": 0.094, | |
| "total_flos": 5114610608766976.0, | |
| "train_loss": 0.08617227443877389, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 33.45, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| }, | |
| { | |
| "step": 1700, | |
| "epoch": 0.9193537483945109, | |
| "wallclock": "2026-06-24T14:44:29.019553", | |
| "eval_loss": 0.07097452133893967, | |
| "eval_runtime": 100.1105, | |
| "eval_samples_per_second": 4.994, | |
| "eval_steps_per_second": 1.249, | |
| "gpu": [ | |
| { | |
| "gpu": 0, | |
| "mem_allocated_gb": 39.05, | |
| "mem_reserved_gb": 80.99 | |
| }, | |
| { | |
| "gpu": 1, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 2, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| }, | |
| { | |
| "gpu": 3, | |
| "mem_allocated_gb": 0.0, | |
| "mem_reserved_gb": 0.0 | |
| } | |
| ] | |
| } | |
| ] |