diff --git "a/checkpoint-10399/trainer_state.json" "b/checkpoint-10399/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-10399/trainer_state.json" @@ -0,0 +1,104024 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 10399, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 9.616771649757177e-05, + "grad_norm": 2429.848849799292, + "learning_rate": 0.0, + "loss": 29.1834, + "memory/device_mem_reserved(gib)": 56.89, + "memory/max_mem_active(gib)": 25.29, + "memory/max_mem_allocated(gib)": 25.04, + "step": 1 + }, + { + "epoch": 0.00019233543299514353, + "grad_norm": 2420.607988566772, + "learning_rate": 4.854368932038835e-08, + "loss": 28.8005, + "memory/device_mem_reserved(gib)": 74.34, + "memory/max_mem_active(gib)": 25.88, + "memory/max_mem_allocated(gib)": 25.88, + "step": 2 + }, + { + "epoch": 0.0002885031494927153, + "grad_norm": 2416.54095316768, + "learning_rate": 9.70873786407767e-08, + "loss": 28.5905, + "memory/device_mem_reserved(gib)": 74.34, + "memory/max_mem_active(gib)": 25.88, + "memory/max_mem_allocated(gib)": 25.88, + "step": 3 + }, + { + "epoch": 0.00038467086599028706, + "grad_norm": 2349.0089401125633, + "learning_rate": 1.4563106796116507e-07, + "loss": 28.3037, + "memory/device_mem_reserved(gib)": 74.34, + "memory/max_mem_active(gib)": 25.88, + "memory/max_mem_allocated(gib)": 25.88, + "step": 4 + }, + { + "epoch": 0.0004808385824878588, + "grad_norm": 2181.5873755657663, + "learning_rate": 1.941747572815534e-07, + "loss": 26.0038, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 5 + }, + { + "epoch": 0.0005770062989854306, + "grad_norm": 1882.5620947775483, + "learning_rate": 2.4271844660194176e-07, + "loss": 20.1584, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 6 + }, + { + "epoch": 0.0006731740154830024, + "grad_norm": 1732.017828714566, + "learning_rate": 2.9126213592233014e-07, + "loss": 17.9048, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 7 + }, + { + "epoch": 0.0007693417319805741, + "grad_norm": 1195.3482294419946, + "learning_rate": 3.398058252427185e-07, + "loss": 8.302, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 8 + }, + { + "epoch": 0.0008655094484781459, + "grad_norm": 1262.2426239646948, + "learning_rate": 3.883495145631068e-07, + "loss": 7.1896, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 9 + }, + { + "epoch": 0.0009616771649757176, + "grad_norm": 1270.2178312346512, + "learning_rate": 4.368932038834952e-07, + "loss": 3.9999, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 10 + }, + { + "epoch": 0.0010578448814732894, + "grad_norm": 218.3421285493393, + "learning_rate": 4.854368932038835e-07, + "loss": 0.686, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 11 + }, + { + "epoch": 0.0011540125979708612, + "grad_norm": 105.97370148885034, + "learning_rate": 5.339805825242719e-07, + "loss": 0.4284, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 12 + }, + { + "epoch": 0.0012501803144684329, + "grad_norm": 226.19628673710397, + "learning_rate": 5.825242718446603e-07, + "loss": 0.5214, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 13 + }, + { + "epoch": 0.0013463480309660047, + "grad_norm": 129.00456476039471, + "learning_rate": 6.310679611650486e-07, + "loss": 0.3974, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 14 + }, + { + "epoch": 0.0014425157474635764, + "grad_norm": 29.4587184036619, + "learning_rate": 6.79611650485437e-07, + "loss": 0.3381, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 15 + }, + { + "epoch": 0.0015386834639611483, + "grad_norm": 288.82571967897724, + "learning_rate": 7.281553398058253e-07, + "loss": 0.6504, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 16 + }, + { + "epoch": 0.0016348511804587201, + "grad_norm": 44.600734980453055, + "learning_rate": 7.766990291262136e-07, + "loss": 0.2899, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 17 + }, + { + "epoch": 0.0017310188969562918, + "grad_norm": 76.79192469279319, + "learning_rate": 8.25242718446602e-07, + "loss": 0.3752, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 18 + }, + { + "epoch": 0.0018271866134538636, + "grad_norm": 87.7936808111718, + "learning_rate": 8.737864077669904e-07, + "loss": 0.353, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 19 + }, + { + "epoch": 0.0019233543299514353, + "grad_norm": 47.98466269907861, + "learning_rate": 9.223300970873787e-07, + "loss": 0.3238, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 20 + }, + { + "epoch": 0.002019522046449007, + "grad_norm": 47.044522507264716, + "learning_rate": 9.70873786407767e-07, + "loss": 0.2679, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.5, + "memory/max_mem_allocated(gib)": 32.5, + "step": 21 + }, + { + "epoch": 0.0021156897629465788, + "grad_norm": 51.53644110399519, + "learning_rate": 1.0194174757281554e-06, + "loss": 0.33, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 22 + }, + { + "epoch": 0.0022118574794441504, + "grad_norm": 37.074920702339625, + "learning_rate": 1.0679611650485437e-06, + "loss": 0.2827, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 23 + }, + { + "epoch": 0.0023080251959417225, + "grad_norm": 47.75540583802018, + "learning_rate": 1.1165048543689322e-06, + "loss": 0.2675, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 24 + }, + { + "epoch": 0.002404192912439294, + "grad_norm": 32.77271618705602, + "learning_rate": 1.1650485436893206e-06, + "loss": 0.2174, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 25 + }, + { + "epoch": 0.0025003606289368658, + "grad_norm": 34.58052680686949, + "learning_rate": 1.213592233009709e-06, + "loss": 0.2643, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 26 + }, + { + "epoch": 0.002596528345434438, + "grad_norm": 18.816159527451884, + "learning_rate": 1.2621359223300972e-06, + "loss": 0.241, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 27 + }, + { + "epoch": 0.0026926960619320095, + "grad_norm": 16.897501644378597, + "learning_rate": 1.3106796116504856e-06, + "loss": 0.2279, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 28 + }, + { + "epoch": 0.002788863778429581, + "grad_norm": 26.413763742311463, + "learning_rate": 1.359223300970874e-06, + "loss": 0.2322, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 29 + }, + { + "epoch": 0.002885031494927153, + "grad_norm": 17.610775079162263, + "learning_rate": 1.4077669902912622e-06, + "loss": 0.2278, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 30 + }, + { + "epoch": 0.002981199211424725, + "grad_norm": 38.092325044435356, + "learning_rate": 1.4563106796116506e-06, + "loss": 0.2291, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 31 + }, + { + "epoch": 0.0030773669279222965, + "grad_norm": 25.489962747154216, + "learning_rate": 1.5048543689320389e-06, + "loss": 0.2383, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 32 + }, + { + "epoch": 0.003173534644419868, + "grad_norm": 22.025874188800135, + "learning_rate": 1.5533980582524272e-06, + "loss": 0.2244, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 33 + }, + { + "epoch": 0.0032697023609174402, + "grad_norm": 15.841792477252564, + "learning_rate": 1.6019417475728158e-06, + "loss": 0.2175, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 34 + }, + { + "epoch": 0.003365870077415012, + "grad_norm": 30.300414646762174, + "learning_rate": 1.650485436893204e-06, + "loss": 0.2569, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 35 + }, + { + "epoch": 0.0034620377939125835, + "grad_norm": 17.981889059774556, + "learning_rate": 1.6990291262135924e-06, + "loss": 0.2098, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 36 + }, + { + "epoch": 0.003558205510410155, + "grad_norm": 13.66506920457067, + "learning_rate": 1.7475728155339808e-06, + "loss": 0.1984, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 37 + }, + { + "epoch": 0.0036543732269077272, + "grad_norm": 22.336131933646683, + "learning_rate": 1.796116504854369e-06, + "loss": 0.2296, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 38 + }, + { + "epoch": 0.003750540943405299, + "grad_norm": 16.37482443929349, + "learning_rate": 1.8446601941747574e-06, + "loss": 0.2509, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 39 + }, + { + "epoch": 0.0038467086599028705, + "grad_norm": 19.319249353753708, + "learning_rate": 1.8932038834951458e-06, + "loss": 0.1983, + "memory/device_mem_reserved(gib)": 93.86, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 40 + }, + { + "epoch": 0.003942876376400443, + "grad_norm": 20.16531416594172, + "learning_rate": 1.941747572815534e-06, + "loss": 0.2005, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 41 + }, + { + "epoch": 0.004039044092898014, + "grad_norm": 14.703703389843385, + "learning_rate": 1.9902912621359226e-06, + "loss": 0.2083, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 42 + }, + { + "epoch": 0.004135211809395586, + "grad_norm": 16.127237535933748, + "learning_rate": 2.0388349514563107e-06, + "loss": 0.1831, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 43 + }, + { + "epoch": 0.0042313795258931575, + "grad_norm": 23.759102058959286, + "learning_rate": 2.0873786407766993e-06, + "loss": 0.231, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 44 + }, + { + "epoch": 0.004327547242390729, + "grad_norm": 7.725386725027137, + "learning_rate": 2.1359223300970874e-06, + "loss": 0.2101, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 45 + }, + { + "epoch": 0.004423714958888301, + "grad_norm": 7.551769048374523, + "learning_rate": 2.184466019417476e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 46 + }, + { + "epoch": 0.004519882675385873, + "grad_norm": 8.80034090723396, + "learning_rate": 2.2330097087378645e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 47 + }, + { + "epoch": 0.004616050391883445, + "grad_norm": 11.096188157310623, + "learning_rate": 2.2815533980582526e-06, + "loss": 0.1861, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 48 + }, + { + "epoch": 0.004712218108381017, + "grad_norm": 10.367448513392098, + "learning_rate": 2.330097087378641e-06, + "loss": 0.1898, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 49 + }, + { + "epoch": 0.004808385824878588, + "grad_norm": 39.81010932115165, + "learning_rate": 2.3786407766990293e-06, + "loss": 0.283, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 50 + }, + { + "epoch": 0.00490455354137616, + "grad_norm": 39.9480075012853, + "learning_rate": 2.427184466019418e-06, + "loss": 0.3093, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 51 + }, + { + "epoch": 0.0050007212578737316, + "grad_norm": 5.802413854348412, + "learning_rate": 2.475728155339806e-06, + "loss": 0.1788, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 52 + }, + { + "epoch": 0.005096888974371303, + "grad_norm": 28.384648859910623, + "learning_rate": 2.5242718446601945e-06, + "loss": 0.288, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 53 + }, + { + "epoch": 0.005193056690868876, + "grad_norm": 21.924104711005878, + "learning_rate": 2.5728155339805826e-06, + "loss": 0.2172, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 54 + }, + { + "epoch": 0.005289224407366447, + "grad_norm": 12.250178037697422, + "learning_rate": 2.621359223300971e-06, + "loss": 0.1874, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 55 + }, + { + "epoch": 0.005385392123864019, + "grad_norm": 13.668642962517177, + "learning_rate": 2.6699029126213593e-06, + "loss": 0.1929, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 56 + }, + { + "epoch": 0.005481559840361591, + "grad_norm": 17.97320602825451, + "learning_rate": 2.718446601941748e-06, + "loss": 0.2018, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 57 + }, + { + "epoch": 0.005577727556859162, + "grad_norm": 9.902366420767839, + "learning_rate": 2.766990291262136e-06, + "loss": 0.1938, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 58 + }, + { + "epoch": 0.005673895273356734, + "grad_norm": 25.30814467716048, + "learning_rate": 2.8155339805825245e-06, + "loss": 0.2255, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 59 + }, + { + "epoch": 0.005770062989854306, + "grad_norm": 23.513533502895516, + "learning_rate": 2.8640776699029126e-06, + "loss": 0.2283, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 60 + }, + { + "epoch": 0.005866230706351878, + "grad_norm": 7.541750548493176, + "learning_rate": 2.912621359223301e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 61 + }, + { + "epoch": 0.00596239842284945, + "grad_norm": 9.302872799089787, + "learning_rate": 2.9611650485436892e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 62 + }, + { + "epoch": 0.006058566139347021, + "grad_norm": 12.873282533168988, + "learning_rate": 3.0097087378640778e-06, + "loss": 0.1926, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 63 + }, + { + "epoch": 0.006154733855844593, + "grad_norm": 6.8694865278040735, + "learning_rate": 3.058252427184466e-06, + "loss": 0.1907, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 64 + }, + { + "epoch": 0.006250901572342165, + "grad_norm": 9.561246061012298, + "learning_rate": 3.1067961165048544e-06, + "loss": 0.1717, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 65 + }, + { + "epoch": 0.006347069288839736, + "grad_norm": 10.050960897489002, + "learning_rate": 3.1553398058252434e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 66 + }, + { + "epoch": 0.006443237005337308, + "grad_norm": 12.250583743193253, + "learning_rate": 3.2038834951456315e-06, + "loss": 0.219, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 67 + }, + { + "epoch": 0.0065394047218348805, + "grad_norm": 4.319824278616158, + "learning_rate": 3.25242718446602e-06, + "loss": 0.167, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 68 + }, + { + "epoch": 0.006635572438332452, + "grad_norm": 17.101495819097085, + "learning_rate": 3.300970873786408e-06, + "loss": 0.2172, + "memory/device_mem_reserved(gib)": 113.58, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 69 + }, + { + "epoch": 0.006731740154830024, + "grad_norm": 7.829657021208238, + "learning_rate": 3.3495145631067967e-06, + "loss": 0.1802, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 70 + }, + { + "epoch": 0.006827907871327595, + "grad_norm": 27.99617291546435, + "learning_rate": 3.398058252427185e-06, + "loss": 0.2729, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 71 + }, + { + "epoch": 0.006924075587825167, + "grad_norm": 31.835724383460487, + "learning_rate": 3.4466019417475734e-06, + "loss": 0.2638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 72 + }, + { + "epoch": 0.007020243304322739, + "grad_norm": 6.173236920100566, + "learning_rate": 3.4951456310679615e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 73 + }, + { + "epoch": 0.00711641102082031, + "grad_norm": 11.923802301819183, + "learning_rate": 3.54368932038835e-06, + "loss": 0.1831, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 74 + }, + { + "epoch": 0.007212578737317882, + "grad_norm": 5.992791172305156, + "learning_rate": 3.592233009708738e-06, + "loss": 0.1828, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 75 + }, + { + "epoch": 0.0073087464538154545, + "grad_norm": 13.638307903419584, + "learning_rate": 3.6407766990291267e-06, + "loss": 0.2208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 76 + }, + { + "epoch": 0.007404914170313026, + "grad_norm": 16.624560803910434, + "learning_rate": 3.689320388349515e-06, + "loss": 0.1877, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 77 + }, + { + "epoch": 0.007501081886810598, + "grad_norm": 13.708429333385892, + "learning_rate": 3.7378640776699034e-06, + "loss": 0.1848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 78 + }, + { + "epoch": 0.007597249603308169, + "grad_norm": 11.337855054927994, + "learning_rate": 3.7864077669902915e-06, + "loss": 0.2035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 79 + }, + { + "epoch": 0.007693417319805741, + "grad_norm": 13.282578966144504, + "learning_rate": 3.83495145631068e-06, + "loss": 0.1994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 80 + }, + { + "epoch": 0.007789585036303313, + "grad_norm": 16.392346847256224, + "learning_rate": 3.883495145631068e-06, + "loss": 0.2278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 81 + }, + { + "epoch": 0.007885752752800885, + "grad_norm": 8.519879958500223, + "learning_rate": 3.932038834951457e-06, + "loss": 0.1809, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 82 + }, + { + "epoch": 0.007981920469298456, + "grad_norm": 13.231023268893429, + "learning_rate": 3.980582524271845e-06, + "loss": 0.1869, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 32.87, + "memory/max_mem_allocated(gib)": 32.87, + "step": 83 + }, + { + "epoch": 0.008078088185796028, + "grad_norm": 8.927698385523552, + "learning_rate": 4.029126213592233e-06, + "loss": 0.1964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 84 + }, + { + "epoch": 0.0081742559022936, + "grad_norm": 11.221512367812354, + "learning_rate": 4.0776699029126215e-06, + "loss": 0.1756, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 85 + }, + { + "epoch": 0.008270423618791172, + "grad_norm": 6.483826232264867, + "learning_rate": 4.12621359223301e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 86 + }, + { + "epoch": 0.008366591335288744, + "grad_norm": 7.483868710103482, + "learning_rate": 4.1747572815533986e-06, + "loss": 0.2029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 87 + }, + { + "epoch": 0.008462759051786315, + "grad_norm": 8.300887626164297, + "learning_rate": 4.223300970873786e-06, + "loss": 0.218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 88 + }, + { + "epoch": 0.008558926768283888, + "grad_norm": 12.311339081677396, + "learning_rate": 4.271844660194175e-06, + "loss": 0.2164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 89 + }, + { + "epoch": 0.008655094484781458, + "grad_norm": 5.977902040062802, + "learning_rate": 4.320388349514563e-06, + "loss": 0.2318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 90 + }, + { + "epoch": 0.008751262201279031, + "grad_norm": 15.053061531962772, + "learning_rate": 4.368932038834952e-06, + "loss": 0.2144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 91 + }, + { + "epoch": 0.008847429917776602, + "grad_norm": 6.653637537266505, + "learning_rate": 4.4174757281553404e-06, + "loss": 0.1953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 92 + }, + { + "epoch": 0.008943597634274174, + "grad_norm": 19.463147646560138, + "learning_rate": 4.466019417475729e-06, + "loss": 0.2489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 93 + }, + { + "epoch": 0.009039765350771747, + "grad_norm": 15.936273579132537, + "learning_rate": 4.514563106796117e-06, + "loss": 0.2406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 94 + }, + { + "epoch": 0.009135933067269317, + "grad_norm": 7.758140392642136, + "learning_rate": 4.563106796116505e-06, + "loss": 0.2094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 95 + }, + { + "epoch": 0.00923210078376689, + "grad_norm": 8.831126991059358, + "learning_rate": 4.611650485436894e-06, + "loss": 0.2075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 96 + }, + { + "epoch": 0.00932826850026446, + "grad_norm": 10.295580069594434, + "learning_rate": 4.660194174757282e-06, + "loss": 0.1957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 97 + }, + { + "epoch": 0.009424436216762033, + "grad_norm": 4.573234879475048, + "learning_rate": 4.70873786407767e-06, + "loss": 0.1826, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 98 + }, + { + "epoch": 0.009520603933259604, + "grad_norm": 12.526366525177544, + "learning_rate": 4.7572815533980585e-06, + "loss": 0.204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 99 + }, + { + "epoch": 0.009616771649757177, + "grad_norm": 10.047038570637612, + "learning_rate": 4.805825242718447e-06, + "loss": 0.1877, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 100 + }, + { + "epoch": 0.009712939366254749, + "grad_norm": 14.5028486877356, + "learning_rate": 4.854368932038836e-06, + "loss": 0.2092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 101 + }, + { + "epoch": 0.00980910708275232, + "grad_norm": 14.55648884840812, + "learning_rate": 4.902912621359223e-06, + "loss": 0.227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 102 + }, + { + "epoch": 0.009905274799249892, + "grad_norm": 5.5515396287251395, + "learning_rate": 4.951456310679612e-06, + "loss": 0.193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 103 + }, + { + "epoch": 0.010001442515747463, + "grad_norm": 9.34808128011466, + "learning_rate": 5e-06, + "loss": 0.1992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 104 + }, + { + "epoch": 0.010097610232245036, + "grad_norm": 4.3469805230749, + "learning_rate": 4.999999883621518e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 105 + }, + { + "epoch": 0.010193777948742606, + "grad_norm": 4.28253154530092, + "learning_rate": 4.999999534486084e-06, + "loss": 0.1964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 106 + }, + { + "epoch": 0.010289945665240179, + "grad_norm": 5.724300800585411, + "learning_rate": 4.9999989525937294e-06, + "loss": 0.169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 107 + }, + { + "epoch": 0.010386113381737751, + "grad_norm": 3.9643351200848627, + "learning_rate": 4.9999981379445085e-06, + "loss": 0.1782, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 108 + }, + { + "epoch": 0.010482281098235322, + "grad_norm": 16.490282744567253, + "learning_rate": 4.9999970905384985e-06, + "loss": 0.2601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 109 + }, + { + "epoch": 0.010578448814732895, + "grad_norm": 5.926472057353583, + "learning_rate": 4.999995810375795e-06, + "loss": 0.2004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 110 + }, + { + "epoch": 0.010674616531230465, + "grad_norm": 3.881050058226854, + "learning_rate": 4.999994297456517e-06, + "loss": 0.2054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 111 + }, + { + "epoch": 0.010770784247728038, + "grad_norm": 11.496227480950454, + "learning_rate": 4.999992551780808e-06, + "loss": 0.2042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 112 + }, + { + "epoch": 0.010866951964225609, + "grad_norm": 5.41393975720306, + "learning_rate": 4.9999905733488285e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 113 + }, + { + "epoch": 0.010963119680723181, + "grad_norm": 15.6405504174374, + "learning_rate": 4.999988362160763e-06, + "loss": 0.2225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 114 + }, + { + "epoch": 0.011059287397220754, + "grad_norm": 23.493888126874733, + "learning_rate": 4.999985918216818e-06, + "loss": 0.3391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 115 + }, + { + "epoch": 0.011155455113718325, + "grad_norm": 5.5279583347241, + "learning_rate": 4.999983241517219e-06, + "loss": 0.2128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 116 + }, + { + "epoch": 0.011251622830215897, + "grad_norm": 22.494929380831046, + "learning_rate": 4.999980332062218e-06, + "loss": 0.3322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 117 + }, + { + "epoch": 0.011347790546713468, + "grad_norm": 19.13068720747119, + "learning_rate": 4.999977189852084e-06, + "loss": 0.2479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 118 + }, + { + "epoch": 0.01144395826321104, + "grad_norm": 3.794946153473758, + "learning_rate": 4.999973814887111e-06, + "loss": 0.1921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 34.54, + "memory/max_mem_allocated(gib)": 34.54, + "step": 119 + }, + { + "epoch": 0.011540125979708611, + "grad_norm": 8.141643405385118, + "learning_rate": 4.999970207167611e-06, + "loss": 0.1887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 120 + }, + { + "epoch": 0.011636293696206184, + "grad_norm": 9.6042991531712, + "learning_rate": 4.999966366693922e-06, + "loss": 0.2155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 121 + }, + { + "epoch": 0.011732461412703756, + "grad_norm": 5.746829918212715, + "learning_rate": 4.999962293466402e-06, + "loss": 0.1866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 122 + }, + { + "epoch": 0.011828629129201327, + "grad_norm": 8.256783850947832, + "learning_rate": 4.9999579874854275e-06, + "loss": 0.1949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 123 + }, + { + "epoch": 0.0119247968456989, + "grad_norm": 4.96536795404654, + "learning_rate": 4.999953448751402e-06, + "loss": 0.1929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 124 + }, + { + "epoch": 0.01202096456219647, + "grad_norm": 3.0808697003979604, + "learning_rate": 4.9999486772647466e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 125 + }, + { + "epoch": 0.012117132278694043, + "grad_norm": 6.265338132871403, + "learning_rate": 4.999943673025905e-06, + "loss": 0.181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 126 + }, + { + "epoch": 0.012213299995191614, + "grad_norm": 6.425520857374259, + "learning_rate": 4.999938436035345e-06, + "loss": 0.1923, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 127 + }, + { + "epoch": 0.012309467711689186, + "grad_norm": 13.319122243333634, + "learning_rate": 4.999932966293553e-06, + "loss": 0.2526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 128 + }, + { + "epoch": 0.012405635428186759, + "grad_norm": 8.742388589741768, + "learning_rate": 4.999927263801039e-06, + "loss": 0.2026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 129 + }, + { + "epoch": 0.01250180314468433, + "grad_norm": 8.039450542100651, + "learning_rate": 4.999921328558333e-06, + "loss": 0.1813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 130 + }, + { + "epoch": 0.012597970861181902, + "grad_norm": 10.543976818106907, + "learning_rate": 4.9999151605659875e-06, + "loss": 0.23, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 131 + }, + { + "epoch": 0.012694138577679473, + "grad_norm": 3.2332337416427848, + "learning_rate": 4.999908759824578e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 132 + }, + { + "epoch": 0.012790306294177045, + "grad_norm": 4.720055172955144, + "learning_rate": 4.9999021263347005e-06, + "loss": 0.1705, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 133 + }, + { + "epoch": 0.012886474010674616, + "grad_norm": 5.68056124053232, + "learning_rate": 4.99989526009697e-06, + "loss": 0.1851, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 134 + }, + { + "epoch": 0.012982641727172188, + "grad_norm": 5.330797705083614, + "learning_rate": 4.9998881611120285e-06, + "loss": 0.1932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 135 + }, + { + "epoch": 0.013078809443669761, + "grad_norm": 5.879785918293704, + "learning_rate": 4.9998808293805355e-06, + "loss": 0.1833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 136 + }, + { + "epoch": 0.013174977160167332, + "grad_norm": 4.46809144647565, + "learning_rate": 4.999873264903175e-06, + "loss": 0.1902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 137 + }, + { + "epoch": 0.013271144876664904, + "grad_norm": 4.604635899545463, + "learning_rate": 4.99986546768065e-06, + "loss": 0.1793, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 138 + }, + { + "epoch": 0.013367312593162475, + "grad_norm": 5.635947153374676, + "learning_rate": 4.999857437713686e-06, + "loss": 0.1796, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 139 + }, + { + "epoch": 0.013463480309660047, + "grad_norm": 7.033126029674587, + "learning_rate": 4.999849175003032e-06, + "loss": 0.1642, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 140 + }, + { + "epoch": 0.013559648026157618, + "grad_norm": 6.887802868269277, + "learning_rate": 4.9998406795494566e-06, + "loss": 0.1864, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 141 + }, + { + "epoch": 0.01365581574265519, + "grad_norm": 8.076552948325903, + "learning_rate": 4.999831951353751e-06, + "loss": 0.2253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 142 + }, + { + "epoch": 0.013751983459152762, + "grad_norm": 8.907575249769963, + "learning_rate": 4.999822990416727e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 143 + }, + { + "epoch": 0.013848151175650334, + "grad_norm": 4.786174271385518, + "learning_rate": 4.9998137967392205e-06, + "loss": 0.2102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 144 + }, + { + "epoch": 0.013944318892147907, + "grad_norm": 8.219277569050085, + "learning_rate": 4.999804370322086e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 145 + }, + { + "epoch": 0.014040486608645477, + "grad_norm": 2.957116088274576, + "learning_rate": 4.999794711166202e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 146 + }, + { + "epoch": 0.01413665432514305, + "grad_norm": 2.8139580993876874, + "learning_rate": 4.999784819272468e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 147 + }, + { + "epoch": 0.01423282204164062, + "grad_norm": 7.294522428971027, + "learning_rate": 4.999774694641803e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 148 + }, + { + "epoch": 0.014328989758138193, + "grad_norm": 4.622019080522286, + "learning_rate": 4.9997643372751515e-06, + "loss": 0.1773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 149 + }, + { + "epoch": 0.014425157474635764, + "grad_norm": 8.380738039576391, + "learning_rate": 4.9997537471734774e-06, + "loss": 0.1773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 150 + }, + { + "epoch": 0.014521325191133336, + "grad_norm": 7.911356409993633, + "learning_rate": 4.999742924337767e-06, + "loss": 0.2159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 151 + }, + { + "epoch": 0.014617492907630909, + "grad_norm": 4.901778498548959, + "learning_rate": 4.999731868769027e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 152 + }, + { + "epoch": 0.01471366062412848, + "grad_norm": 14.429758794634704, + "learning_rate": 4.9997205804682875e-06, + "loss": 0.2218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 153 + }, + { + "epoch": 0.014809828340626052, + "grad_norm": 3.221352910820699, + "learning_rate": 4.9997090594365994e-06, + "loss": 0.1806, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 154 + }, + { + "epoch": 0.014905996057123623, + "grad_norm": 9.084768156339187, + "learning_rate": 4.999697305675034e-06, + "loss": 0.1961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 155 + }, + { + "epoch": 0.015002163773621196, + "grad_norm": 8.411415481151808, + "learning_rate": 4.999685319184688e-06, + "loss": 0.1823, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 156 + }, + { + "epoch": 0.015098331490118766, + "grad_norm": 4.747046840404914, + "learning_rate": 4.999673099966675e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 157 + }, + { + "epoch": 0.015194499206616339, + "grad_norm": 7.823006045989041, + "learning_rate": 4.999660648022136e-06, + "loss": 0.1703, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 158 + }, + { + "epoch": 0.015290666923113911, + "grad_norm": 2.74477690183639, + "learning_rate": 4.999647963352225e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 159 + }, + { + "epoch": 0.015386834639611482, + "grad_norm": 10.733689702228247, + "learning_rate": 4.999635045958129e-06, + "loss": 0.2253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 160 + }, + { + "epoch": 0.015483002356109055, + "grad_norm": 4.444510380300314, + "learning_rate": 4.999621895841046e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 161 + }, + { + "epoch": 0.015579170072606625, + "grad_norm": 5.680299148392844, + "learning_rate": 4.999608513002202e-06, + "loss": 0.1811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 162 + }, + { + "epoch": 0.015675337789104196, + "grad_norm": 3.6438076631254486, + "learning_rate": 4.999594897442844e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 163 + }, + { + "epoch": 0.01577150550560177, + "grad_norm": 3.530248931722717, + "learning_rate": 4.999581049164238e-06, + "loss": 0.1825, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 164 + }, + { + "epoch": 0.01586767322209934, + "grad_norm": 3.638400740772143, + "learning_rate": 4.9995669681676735e-06, + "loss": 0.1811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 165 + }, + { + "epoch": 0.015963840938596912, + "grad_norm": 7.119948595100235, + "learning_rate": 4.999552654454463e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 166 + }, + { + "epoch": 0.016060008655094486, + "grad_norm": 9.293921031343356, + "learning_rate": 4.999538108025938e-06, + "loss": 0.1911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 167 + }, + { + "epoch": 0.016156176371592057, + "grad_norm": 6.923397363309511, + "learning_rate": 4.999523328883451e-06, + "loss": 0.2019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 168 + }, + { + "epoch": 0.016252344088089628, + "grad_norm": 7.349236328382989, + "learning_rate": 4.999508317028382e-06, + "loss": 0.2007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 169 + }, + { + "epoch": 0.0163485118045872, + "grad_norm": 8.20335709242232, + "learning_rate": 4.999493072462126e-06, + "loss": 0.1909, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 170 + }, + { + "epoch": 0.016444679521084773, + "grad_norm": 6.2324832459577975, + "learning_rate": 4.999477595186103e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 171 + }, + { + "epoch": 0.016540847237582344, + "grad_norm": 8.097030658838715, + "learning_rate": 4.999461885201753e-06, + "loss": 0.2023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 172 + }, + { + "epoch": 0.016637014954079914, + "grad_norm": 7.850481310815464, + "learning_rate": 4.99944594251054e-06, + "loss": 0.2036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 173 + }, + { + "epoch": 0.01673318267057749, + "grad_norm": 6.928622688906619, + "learning_rate": 4.999429767113947e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 174 + }, + { + "epoch": 0.01682935038707506, + "grad_norm": 6.120353255957257, + "learning_rate": 4.999413359013482e-06, + "loss": 0.2018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 175 + }, + { + "epoch": 0.01692551810357263, + "grad_norm": 7.706889948992412, + "learning_rate": 4.999396718210671e-06, + "loss": 0.2421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 176 + }, + { + "epoch": 0.0170216858200702, + "grad_norm": 6.898149583730028, + "learning_rate": 4.9993798447070625e-06, + "loss": 0.1999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 177 + }, + { + "epoch": 0.017117853536567775, + "grad_norm": 3.5976593219053448, + "learning_rate": 4.99936273850423e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 178 + }, + { + "epoch": 0.017214021253065346, + "grad_norm": 4.473341206885504, + "learning_rate": 4.999345399603764e-06, + "loss": 0.172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 179 + }, + { + "epoch": 0.017310188969562917, + "grad_norm": 7.281756039833025, + "learning_rate": 4.999327828007281e-06, + "loss": 0.194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 180 + }, + { + "epoch": 0.01740635668606049, + "grad_norm": 5.951996527988666, + "learning_rate": 4.999310023716415e-06, + "loss": 0.2027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 181 + }, + { + "epoch": 0.017502524402558062, + "grad_norm": 6.707074039045628, + "learning_rate": 4.999291986732823e-06, + "loss": 0.1864, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 182 + }, + { + "epoch": 0.017598692119055633, + "grad_norm": 4.313309052277139, + "learning_rate": 4.999273717058186e-06, + "loss": 0.1709, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 183 + }, + { + "epoch": 0.017694859835553203, + "grad_norm": 6.115817131074366, + "learning_rate": 4.9992552146942054e-06, + "loss": 0.1805, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 184 + }, + { + "epoch": 0.017791027552050778, + "grad_norm": 7.156097978433799, + "learning_rate": 4.999236479642602e-06, + "loss": 0.2179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 185 + }, + { + "epoch": 0.01788719526854835, + "grad_norm": 3.845557781584584, + "learning_rate": 4.999217511905121e-06, + "loss": 0.1854, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 186 + }, + { + "epoch": 0.01798336298504592, + "grad_norm": 4.866413119365121, + "learning_rate": 4.999198311483529e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 187 + }, + { + "epoch": 0.018079530701543493, + "grad_norm": 5.431988074213398, + "learning_rate": 4.9991788783796115e-06, + "loss": 0.2027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 188 + }, + { + "epoch": 0.018175698418041064, + "grad_norm": 7.862450705656546, + "learning_rate": 4.99915921259518e-06, + "loss": 0.2044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 189 + }, + { + "epoch": 0.018271866134538635, + "grad_norm": 8.621245662138753, + "learning_rate": 4.9991393141320645e-06, + "loss": 0.1737, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 190 + }, + { + "epoch": 0.018368033851036206, + "grad_norm": 3.1471633828300054, + "learning_rate": 4.9991191829921175e-06, + "loss": 0.2014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 191 + }, + { + "epoch": 0.01846420156753378, + "grad_norm": 7.1693291455327, + "learning_rate": 4.999098819177214e-06, + "loss": 0.2094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 192 + }, + { + "epoch": 0.01856036928403135, + "grad_norm": 2.9929510898371414, + "learning_rate": 4.9990782226892495e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 193 + }, + { + "epoch": 0.01865653700052892, + "grad_norm": 10.490630395046658, + "learning_rate": 4.999057393530141e-06, + "loss": 0.2142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 194 + }, + { + "epoch": 0.018752704717026496, + "grad_norm": 6.06022019131897, + "learning_rate": 4.999036331701828e-06, + "loss": 0.1766, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 195 + }, + { + "epoch": 0.018848872433524066, + "grad_norm": 5.017911014500965, + "learning_rate": 4.999015037206272e-06, + "loss": 0.2019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 196 + }, + { + "epoch": 0.018945040150021637, + "grad_norm": 4.38886653666375, + "learning_rate": 4.9989935100454555e-06, + "loss": 0.2036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 197 + }, + { + "epoch": 0.019041207866519208, + "grad_norm": 5.030871601764532, + "learning_rate": 4.998971750221382e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 198 + }, + { + "epoch": 0.019137375583016782, + "grad_norm": 3.3233762584771074, + "learning_rate": 4.998949757736079e-06, + "loss": 0.1824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 199 + }, + { + "epoch": 0.019233543299514353, + "grad_norm": 2.9398602859261542, + "learning_rate": 4.998927532591592e-06, + "loss": 0.1878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 200 + }, + { + "epoch": 0.019329711016011924, + "grad_norm": 6.160369433266719, + "learning_rate": 4.998905074789991e-06, + "loss": 0.1643, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 201 + }, + { + "epoch": 0.019425878732509498, + "grad_norm": 3.005263570875574, + "learning_rate": 4.998882384333368e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 202 + }, + { + "epoch": 0.01952204644900707, + "grad_norm": 6.893266443005762, + "learning_rate": 4.998859461223834e-06, + "loss": 0.2037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 203 + }, + { + "epoch": 0.01961821416550464, + "grad_norm": 4.487316134843178, + "learning_rate": 4.998836305463524e-06, + "loss": 0.2045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 204 + }, + { + "epoch": 0.01971438188200221, + "grad_norm": 5.467556743615876, + "learning_rate": 4.998812917054593e-06, + "loss": 0.1694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 205 + }, + { + "epoch": 0.019810549598499785, + "grad_norm": 8.771664176855545, + "learning_rate": 4.99878929599922e-06, + "loss": 0.1914, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 206 + }, + { + "epoch": 0.019906717314997355, + "grad_norm": 3.615114228141093, + "learning_rate": 4.998765442299603e-06, + "loss": 0.174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 207 + }, + { + "epoch": 0.020002885031494926, + "grad_norm": 7.423189427942231, + "learning_rate": 4.998741355957963e-06, + "loss": 0.18, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 208 + }, + { + "epoch": 0.0200990527479925, + "grad_norm": 3.5470179503892125, + "learning_rate": 4.998717036976544e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 209 + }, + { + "epoch": 0.02019522046449007, + "grad_norm": 9.16088453043861, + "learning_rate": 4.998692485357607e-06, + "loss": 0.2151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 210 + }, + { + "epoch": 0.020291388180987642, + "grad_norm": 3.2361149837296375, + "learning_rate": 4.998667701103441e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 211 + }, + { + "epoch": 0.020387555897485213, + "grad_norm": 3.3649120527983136, + "learning_rate": 4.998642684216352e-06, + "loss": 0.1984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 212 + }, + { + "epoch": 0.020483723613982787, + "grad_norm": 12.550355869284216, + "learning_rate": 4.9986174346986694e-06, + "loss": 0.2054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 213 + }, + { + "epoch": 0.020579891330480358, + "grad_norm": 6.230357557677488, + "learning_rate": 4.9985919525527435e-06, + "loss": 0.1892, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 214 + }, + { + "epoch": 0.02067605904697793, + "grad_norm": 6.790973793582754, + "learning_rate": 4.9985662377809476e-06, + "loss": 0.1811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 215 + }, + { + "epoch": 0.020772226763475503, + "grad_norm": 9.990881966784746, + "learning_rate": 4.998540290385675e-06, + "loss": 0.2084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 216 + }, + { + "epoch": 0.020868394479973074, + "grad_norm": 2.95533729500904, + "learning_rate": 4.998514110369342e-06, + "loss": 0.1806, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 217 + }, + { + "epoch": 0.020964562196470644, + "grad_norm": 9.597194231101746, + "learning_rate": 4.998487697734386e-06, + "loss": 0.1809, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 218 + }, + { + "epoch": 0.021060729912968215, + "grad_norm": 7.739942934654333, + "learning_rate": 4.9984610524832664e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 219 + }, + { + "epoch": 0.02115689762946579, + "grad_norm": 5.790567187272488, + "learning_rate": 4.998434174618464e-06, + "loss": 0.2037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 220 + }, + { + "epoch": 0.02125306534596336, + "grad_norm": 4.2326076873479, + "learning_rate": 4.99840706414248e-06, + "loss": 0.1995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 221 + }, + { + "epoch": 0.02134923306246093, + "grad_norm": 2.6000189511276464, + "learning_rate": 4.998379721057839e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 222 + }, + { + "epoch": 0.021445400778958505, + "grad_norm": 9.415185111206084, + "learning_rate": 4.998352145367087e-06, + "loss": 0.1836, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 223 + }, + { + "epoch": 0.021541568495456076, + "grad_norm": 5.0550207770309, + "learning_rate": 4.998324337072792e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 224 + }, + { + "epoch": 0.021637736211953647, + "grad_norm": 3.8731766555805005, + "learning_rate": 4.998296296177542e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 225 + }, + { + "epoch": 0.021733903928451218, + "grad_norm": 3.836312860172965, + "learning_rate": 4.998268022683947e-06, + "loss": 0.1812, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 226 + }, + { + "epoch": 0.021830071644948792, + "grad_norm": 4.195234787870878, + "learning_rate": 4.998239516594642e-06, + "loss": 0.1953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 227 + }, + { + "epoch": 0.021926239361446363, + "grad_norm": 4.735643789470154, + "learning_rate": 4.998210777912279e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 228 + }, + { + "epoch": 0.022022407077943933, + "grad_norm": 8.941768863735732, + "learning_rate": 4.998181806639534e-06, + "loss": 0.2079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 229 + }, + { + "epoch": 0.022118574794441508, + "grad_norm": 9.420246546075628, + "learning_rate": 4.998152602779104e-06, + "loss": 0.1846, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 230 + }, + { + "epoch": 0.02221474251093908, + "grad_norm": 2.4046353354559873, + "learning_rate": 4.998123166333708e-06, + "loss": 0.1749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 231 + }, + { + "epoch": 0.02231091022743665, + "grad_norm": 6.704223696437119, + "learning_rate": 4.998093497306088e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 232 + }, + { + "epoch": 0.02240707794393422, + "grad_norm": 4.146744474354103, + "learning_rate": 4.9980635956990044e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 233 + }, + { + "epoch": 0.022503245660431794, + "grad_norm": 4.043191843174161, + "learning_rate": 4.998033461515242e-06, + "loss": 0.1944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 234 + }, + { + "epoch": 0.022599413376929365, + "grad_norm": 12.731416994056344, + "learning_rate": 4.9980030947576064e-06, + "loss": 0.2017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 235 + }, + { + "epoch": 0.022695581093426936, + "grad_norm": 4.151837920241551, + "learning_rate": 4.997972495428924e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 236 + }, + { + "epoch": 0.02279174880992451, + "grad_norm": 3.439934914452828, + "learning_rate": 4.997941663532045e-06, + "loss": 0.1809, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 237 + }, + { + "epoch": 0.02288791652642208, + "grad_norm": 10.581916536506544, + "learning_rate": 4.99791059906984e-06, + "loss": 0.206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 238 + }, + { + "epoch": 0.02298408424291965, + "grad_norm": 8.125138850289897, + "learning_rate": 4.9978793020452e-06, + "loss": 0.1919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 239 + }, + { + "epoch": 0.023080251959417222, + "grad_norm": 2.612456808285792, + "learning_rate": 4.997847772461038e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 240 + }, + { + "epoch": 0.023176419675914797, + "grad_norm": 10.135685752543976, + "learning_rate": 4.997816010320293e-06, + "loss": 0.2414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 241 + }, + { + "epoch": 0.023272587392412367, + "grad_norm": 12.034023094024187, + "learning_rate": 4.997784015625919e-06, + "loss": 0.2392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 242 + }, + { + "epoch": 0.023368755108909938, + "grad_norm": 10.56294197739104, + "learning_rate": 4.997751788380895e-06, + "loss": 0.2179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 243 + }, + { + "epoch": 0.023464922825407512, + "grad_norm": 6.358274197361932, + "learning_rate": 4.997719328588224e-06, + "loss": 0.1777, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 244 + }, + { + "epoch": 0.023561090541905083, + "grad_norm": 8.562198790012854, + "learning_rate": 4.997686636250926e-06, + "loss": 0.2129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 245 + }, + { + "epoch": 0.023657258258402654, + "grad_norm": 4.483651157620859, + "learning_rate": 4.997653711372044e-06, + "loss": 0.1844, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 246 + }, + { + "epoch": 0.023753425974900225, + "grad_norm": 7.35643081462999, + "learning_rate": 4.997620553954645e-06, + "loss": 0.1965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 247 + }, + { + "epoch": 0.0238495936913978, + "grad_norm": 7.377519938540657, + "learning_rate": 4.997587164001815e-06, + "loss": 0.1901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 248 + }, + { + "epoch": 0.02394576140789537, + "grad_norm": 3.9881994247494057, + "learning_rate": 4.997553541516664e-06, + "loss": 0.2166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 249 + }, + { + "epoch": 0.02404192912439294, + "grad_norm": 12.729279433614375, + "learning_rate": 4.9975196865023215e-06, + "loss": 0.2162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 250 + }, + { + "epoch": 0.024138096840890515, + "grad_norm": 12.81474928206202, + "learning_rate": 4.997485598961939e-06, + "loss": 0.2033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 251 + }, + { + "epoch": 0.024234264557388085, + "grad_norm": 7.089982582590072, + "learning_rate": 4.9974512788986915e-06, + "loss": 0.2031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 252 + }, + { + "epoch": 0.024330432273885656, + "grad_norm": 4.164769562960317, + "learning_rate": 4.997416726315773e-06, + "loss": 0.2008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 253 + }, + { + "epoch": 0.024426599990383227, + "grad_norm": 6.39262289048171, + "learning_rate": 4.9973819412164e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 254 + }, + { + "epoch": 0.0245227677068808, + "grad_norm": 5.538295010519253, + "learning_rate": 4.997346923603814e-06, + "loss": 0.1839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 255 + }, + { + "epoch": 0.024618935423378372, + "grad_norm": 4.714549866146675, + "learning_rate": 4.997311673481272e-06, + "loss": 0.207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 256 + }, + { + "epoch": 0.024715103139875943, + "grad_norm": 7.245886844380704, + "learning_rate": 4.997276190852057e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 257 + }, + { + "epoch": 0.024811270856373517, + "grad_norm": 4.696544512886312, + "learning_rate": 4.997240475719474e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 258 + }, + { + "epoch": 0.024907438572871088, + "grad_norm": 6.790073170494659, + "learning_rate": 4.997204528086845e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 259 + }, + { + "epoch": 0.02500360628936866, + "grad_norm": 10.470467656359233, + "learning_rate": 4.997168347957521e-06, + "loss": 0.203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 260 + }, + { + "epoch": 0.02509977400586623, + "grad_norm": 6.436800388671853, + "learning_rate": 4.997131935334866e-06, + "loss": 0.182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 261 + }, + { + "epoch": 0.025195941722363804, + "grad_norm": 6.548682314279288, + "learning_rate": 4.997095290222274e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 262 + }, + { + "epoch": 0.025292109438861374, + "grad_norm": 13.02443421187475, + "learning_rate": 4.997058412623154e-06, + "loss": 0.2025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 263 + }, + { + "epoch": 0.025388277155358945, + "grad_norm": 6.521783096034669, + "learning_rate": 4.99702130254094e-06, + "loss": 0.219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 264 + }, + { + "epoch": 0.02548444487185652, + "grad_norm": 5.093827184899447, + "learning_rate": 4.9969839599790885e-06, + "loss": 0.1616, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 265 + }, + { + "epoch": 0.02558061258835409, + "grad_norm": 7.632979303625395, + "learning_rate": 4.996946384941075e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 266 + }, + { + "epoch": 0.02567678030485166, + "grad_norm": 3.7130307582886366, + "learning_rate": 4.996908577430397e-06, + "loss": 0.1764, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 267 + }, + { + "epoch": 0.025772948021349232, + "grad_norm": 7.856597258496008, + "learning_rate": 4.996870537450576e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 268 + }, + { + "epoch": 0.025869115737846806, + "grad_norm": 4.8058172011325775, + "learning_rate": 4.996832265005154e-06, + "loss": 0.1842, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 269 + }, + { + "epoch": 0.025965283454344377, + "grad_norm": 3.3599185664772757, + "learning_rate": 4.996793760097694e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 270 + }, + { + "epoch": 0.026061451170841948, + "grad_norm": 9.234032199706705, + "learning_rate": 4.996755022731779e-06, + "loss": 0.2156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 271 + }, + { + "epoch": 0.026157618887339522, + "grad_norm": 7.310613479341366, + "learning_rate": 4.996716052911017e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 272 + }, + { + "epoch": 0.026253786603837093, + "grad_norm": 6.565220487504083, + "learning_rate": 4.996676850639036e-06, + "loss": 0.1854, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 273 + }, + { + "epoch": 0.026349954320334663, + "grad_norm": 3.953485774425993, + "learning_rate": 4.996637415919486e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 274 + }, + { + "epoch": 0.026446122036832234, + "grad_norm": 9.128688801994159, + "learning_rate": 4.996597748756039e-06, + "loss": 0.2093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 275 + }, + { + "epoch": 0.02654228975332981, + "grad_norm": 5.1117025850718125, + "learning_rate": 4.996557849152387e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 276 + }, + { + "epoch": 0.02663845746982738, + "grad_norm": 3.90278855505874, + "learning_rate": 4.9965177171122456e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 277 + }, + { + "epoch": 0.02673462518632495, + "grad_norm": 9.394858709102149, + "learning_rate": 4.996477352639351e-06, + "loss": 0.2261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 278 + }, + { + "epoch": 0.02683079290282252, + "grad_norm": 2.218885973442878, + "learning_rate": 4.9964367557374605e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 279 + }, + { + "epoch": 0.026926960619320095, + "grad_norm": 2.8088404079436438, + "learning_rate": 4.996395926410354e-06, + "loss": 0.1816, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 280 + }, + { + "epoch": 0.027023128335817666, + "grad_norm": 5.3837237449326425, + "learning_rate": 4.996354864661835e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 281 + }, + { + "epoch": 0.027119296052315237, + "grad_norm": 5.950869767030157, + "learning_rate": 4.996313570495724e-06, + "loss": 0.1863, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 282 + }, + { + "epoch": 0.02721546376881281, + "grad_norm": 3.528667711644685, + "learning_rate": 4.9962720439158665e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 283 + }, + { + "epoch": 0.02731163148531038, + "grad_norm": 2.250646657313932, + "learning_rate": 4.996230284926128e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 284 + }, + { + "epoch": 0.027407799201807952, + "grad_norm": 7.516155331423873, + "learning_rate": 4.996188293530397e-06, + "loss": 0.1762, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 285 + }, + { + "epoch": 0.027503966918305523, + "grad_norm": 3.686664747247407, + "learning_rate": 4.996146069732583e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 286 + }, + { + "epoch": 0.027600134634803097, + "grad_norm": 3.3946418280531114, + "learning_rate": 4.996103613536617e-06, + "loss": 0.1806, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 287 + }, + { + "epoch": 0.027696302351300668, + "grad_norm": 5.46977730942251, + "learning_rate": 4.996060924946452e-06, + "loss": 0.1703, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 288 + }, + { + "epoch": 0.02779247006779824, + "grad_norm": 2.825609328047587, + "learning_rate": 4.9960180039660626e-06, + "loss": 0.1777, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 289 + }, + { + "epoch": 0.027888637784295813, + "grad_norm": 3.576143740028724, + "learning_rate": 4.9959748505994445e-06, + "loss": 0.2191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 290 + }, + { + "epoch": 0.027984805500793384, + "grad_norm": 6.811940268057713, + "learning_rate": 4.9959314648506156e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 291 + }, + { + "epoch": 0.028080973217290955, + "grad_norm": 3.876212313850951, + "learning_rate": 4.995887846723615e-06, + "loss": 0.1886, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 292 + }, + { + "epoch": 0.028177140933788525, + "grad_norm": 3.851202929841782, + "learning_rate": 4.995843996222503e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 293 + }, + { + "epoch": 0.0282733086502861, + "grad_norm": 7.6449725725731, + "learning_rate": 4.995799913351364e-06, + "loss": 0.2174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 294 + }, + { + "epoch": 0.02836947636678367, + "grad_norm": 2.3784879368443232, + "learning_rate": 4.995755598114301e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 295 + }, + { + "epoch": 0.02846564408328124, + "grad_norm": 4.707064588251631, + "learning_rate": 4.99571105051544e-06, + "loss": 0.173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 296 + }, + { + "epoch": 0.028561811799778816, + "grad_norm": 2.279867527162495, + "learning_rate": 4.995666270558929e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 297 + }, + { + "epoch": 0.028657979516276386, + "grad_norm": 8.676166273816325, + "learning_rate": 4.995621258248937e-06, + "loss": 0.1808, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 298 + }, + { + "epoch": 0.028754147232773957, + "grad_norm": 6.482530938213627, + "learning_rate": 4.995576013589654e-06, + "loss": 0.1993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 299 + }, + { + "epoch": 0.028850314949271528, + "grad_norm": 2.4777183477436027, + "learning_rate": 4.995530536585293e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 300 + }, + { + "epoch": 0.028946482665769102, + "grad_norm": 11.162540997928469, + "learning_rate": 4.995484827240088e-06, + "loss": 0.2394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 301 + }, + { + "epoch": 0.029042650382266673, + "grad_norm": 7.127393417793652, + "learning_rate": 4.995438885558294e-06, + "loss": 0.1867, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 302 + }, + { + "epoch": 0.029138818098764244, + "grad_norm": 4.421540162595737, + "learning_rate": 4.995392711544189e-06, + "loss": 0.1872, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 303 + }, + { + "epoch": 0.029234985815261818, + "grad_norm": 3.760008351368607, + "learning_rate": 4.995346305202073e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 304 + }, + { + "epoch": 0.02933115353175939, + "grad_norm": 3.0651356495285897, + "learning_rate": 4.995299666536265e-06, + "loss": 0.1807, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 305 + }, + { + "epoch": 0.02942732124825696, + "grad_norm": 9.60576909481522, + "learning_rate": 4.995252795551106e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 306 + }, + { + "epoch": 0.02952348896475453, + "grad_norm": 5.556907763117439, + "learning_rate": 4.995205692250964e-06, + "loss": 0.1925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 307 + }, + { + "epoch": 0.029619656681252104, + "grad_norm": 3.403963946922097, + "learning_rate": 4.99515835664022e-06, + "loss": 0.1955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 308 + }, + { + "epoch": 0.029715824397749675, + "grad_norm": 2.432642761030167, + "learning_rate": 4.995110788723284e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 309 + }, + { + "epoch": 0.029811992114247246, + "grad_norm": 2.962038230710917, + "learning_rate": 4.995062988504583e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 310 + }, + { + "epoch": 0.02990815983074482, + "grad_norm": 2.7599414036831385, + "learning_rate": 4.9950149559885685e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 311 + }, + { + "epoch": 0.03000432754724239, + "grad_norm": 2.98184778329603, + "learning_rate": 4.994966691179712e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 312 + }, + { + "epoch": 0.030100495263739962, + "grad_norm": 3.3833666521287338, + "learning_rate": 4.994918194082506e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 313 + }, + { + "epoch": 0.030196662980237533, + "grad_norm": 3.836126663603744, + "learning_rate": 4.994869464701467e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 314 + }, + { + "epoch": 0.030292830696735107, + "grad_norm": 4.614000180620492, + "learning_rate": 4.994820503041132e-06, + "loss": 0.1833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 315 + }, + { + "epoch": 0.030388998413232678, + "grad_norm": 3.8353298369725257, + "learning_rate": 4.99477130910606e-06, + "loss": 0.1887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 316 + }, + { + "epoch": 0.03048516612973025, + "grad_norm": 7.677655638366124, + "learning_rate": 4.994721882900829e-06, + "loss": 0.1848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 317 + }, + { + "epoch": 0.030581333846227823, + "grad_norm": 3.9696475393083195, + "learning_rate": 4.994672224430042e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 318 + }, + { + "epoch": 0.030677501562725393, + "grad_norm": 10.936529487497348, + "learning_rate": 4.994622333698323e-06, + "loss": 0.2168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 319 + }, + { + "epoch": 0.030773669279222964, + "grad_norm": 13.240052150589905, + "learning_rate": 4.994572210710315e-06, + "loss": 0.2432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 320 + }, + { + "epoch": 0.030869836995720535, + "grad_norm": 7.260816648475866, + "learning_rate": 4.9945218554706864e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 321 + }, + { + "epoch": 0.03096600471221811, + "grad_norm": 3.2539029682589664, + "learning_rate": 4.994471267984125e-06, + "loss": 0.1758, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 322 + }, + { + "epoch": 0.03106217242871568, + "grad_norm": 7.054521155583637, + "learning_rate": 4.99442044825534e-06, + "loss": 0.1932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 323 + }, + { + "epoch": 0.03115834014521325, + "grad_norm": 4.51228830721065, + "learning_rate": 4.994369396289063e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 324 + }, + { + "epoch": 0.03125450786171082, + "grad_norm": 5.831876541567009, + "learning_rate": 4.994318112090048e-06, + "loss": 0.2059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 325 + }, + { + "epoch": 0.03135067557820839, + "grad_norm": 3.5102124955402405, + "learning_rate": 4.994266595663069e-06, + "loss": 0.1796, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 326 + }, + { + "epoch": 0.03144684329470597, + "grad_norm": 2.7101010844746254, + "learning_rate": 4.994214847012922e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 327 + }, + { + "epoch": 0.03154301101120354, + "grad_norm": 2.945979298851669, + "learning_rate": 4.994162866144425e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 328 + }, + { + "epoch": 0.03163917872770111, + "grad_norm": 2.4322098308153803, + "learning_rate": 4.994110653062419e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 329 + }, + { + "epoch": 0.03173534644419868, + "grad_norm": 3.541206328932846, + "learning_rate": 4.994058207771764e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 330 + }, + { + "epoch": 0.03183151416069625, + "grad_norm": 3.945871943114513, + "learning_rate": 4.994005530277342e-06, + "loss": 0.1637, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 331 + }, + { + "epoch": 0.031927681877193824, + "grad_norm": 4.761081029135694, + "learning_rate": 4.9939526205840585e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 332 + }, + { + "epoch": 0.032023849593691395, + "grad_norm": 10.660325015041401, + "learning_rate": 4.993899478696839e-06, + "loss": 0.2026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 333 + }, + { + "epoch": 0.03212001731018897, + "grad_norm": 3.1030991189559214, + "learning_rate": 4.993846104620633e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 334 + }, + { + "epoch": 0.03221618502668654, + "grad_norm": 6.877373454461039, + "learning_rate": 4.993792498360407e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 335 + }, + { + "epoch": 0.032312352743184114, + "grad_norm": 6.810275997524485, + "learning_rate": 4.993738659921153e-06, + "loss": 0.1991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 336 + }, + { + "epoch": 0.032408520459681685, + "grad_norm": 5.065406337048652, + "learning_rate": 4.993684589307885e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 337 + }, + { + "epoch": 0.032504688176179256, + "grad_norm": 5.719038921638035, + "learning_rate": 4.993630286525634e-06, + "loss": 0.1764, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 338 + }, + { + "epoch": 0.032600855892676826, + "grad_norm": 9.392516047217333, + "learning_rate": 4.993575751579458e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 339 + }, + { + "epoch": 0.0326970236091744, + "grad_norm": 2.3012547428029815, + "learning_rate": 4.993520984474435e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 340 + }, + { + "epoch": 0.032793191325671975, + "grad_norm": 2.6089207753828516, + "learning_rate": 4.993465985215662e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 341 + }, + { + "epoch": 0.032889359042169546, + "grad_norm": 3.4365647851399603, + "learning_rate": 4.993410753808261e-06, + "loss": 0.1958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 342 + }, + { + "epoch": 0.032985526758667116, + "grad_norm": 3.664368420780544, + "learning_rate": 4.993355290257373e-06, + "loss": 0.1598, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 343 + }, + { + "epoch": 0.03308169447516469, + "grad_norm": 8.013054668500535, + "learning_rate": 4.993299594568163e-06, + "loss": 0.1805, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 344 + }, + { + "epoch": 0.03317786219166226, + "grad_norm": 4.377947000494754, + "learning_rate": 4.993243666745815e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 345 + }, + { + "epoch": 0.03327402990815983, + "grad_norm": 3.7624730759803646, + "learning_rate": 4.9931875067955385e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 346 + }, + { + "epoch": 0.0333701976246574, + "grad_norm": 9.235417295008057, + "learning_rate": 4.9931311147225594e-06, + "loss": 0.233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 347 + }, + { + "epoch": 0.03346636534115498, + "grad_norm": 10.01824446388851, + "learning_rate": 4.993074490532129e-06, + "loss": 0.2098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 348 + }, + { + "epoch": 0.03356253305765255, + "grad_norm": 4.503114625948515, + "learning_rate": 4.993017634229519e-06, + "loss": 0.1612, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 349 + }, + { + "epoch": 0.03365870077415012, + "grad_norm": 11.281342325547232, + "learning_rate": 4.992960545820025e-06, + "loss": 0.2149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 350 + }, + { + "epoch": 0.03375486849064769, + "grad_norm": 9.882461365076743, + "learning_rate": 4.992903225308958e-06, + "loss": 0.2057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 351 + }, + { + "epoch": 0.03385103620714526, + "grad_norm": 8.89287677387412, + "learning_rate": 4.992845672701658e-06, + "loss": 0.1772, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 352 + }, + { + "epoch": 0.03394720392364283, + "grad_norm": 4.1086350896035295, + "learning_rate": 4.992787888003483e-06, + "loss": 0.1818, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 353 + }, + { + "epoch": 0.0340433716401404, + "grad_norm": 2.677133620717531, + "learning_rate": 4.992729871219811e-06, + "loss": 0.166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 354 + }, + { + "epoch": 0.03413953935663798, + "grad_norm": 3.0148737478642973, + "learning_rate": 4.9926716223560455e-06, + "loss": 0.175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 355 + }, + { + "epoch": 0.03423570707313555, + "grad_norm": 2.1933695449019632, + "learning_rate": 4.992613141417608e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 356 + }, + { + "epoch": 0.03433187478963312, + "grad_norm": 4.760912056133944, + "learning_rate": 4.992554428409945e-06, + "loss": 0.1892, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 357 + }, + { + "epoch": 0.03442804250613069, + "grad_norm": 2.821952076079073, + "learning_rate": 4.992495483338522e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 358 + }, + { + "epoch": 0.03452421022262826, + "grad_norm": 4.47853973659214, + "learning_rate": 4.992436306208826e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 359 + }, + { + "epoch": 0.03462037793912583, + "grad_norm": 5.855206630185654, + "learning_rate": 4.9923768970263675e-06, + "loss": 0.1942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 360 + }, + { + "epoch": 0.034716545655623404, + "grad_norm": 2.6642661013562194, + "learning_rate": 4.992317255796678e-06, + "loss": 0.1769, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 361 + }, + { + "epoch": 0.03481271337212098, + "grad_norm": 2.366887889260156, + "learning_rate": 4.99225738252531e-06, + "loss": 0.1726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 362 + }, + { + "epoch": 0.03490888108861855, + "grad_norm": 2.462289221130738, + "learning_rate": 4.992197277217837e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 363 + }, + { + "epoch": 0.035005048805116123, + "grad_norm": 2.4176875754226086, + "learning_rate": 4.992136939879857e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 364 + }, + { + "epoch": 0.035101216521613694, + "grad_norm": 2.961556730110372, + "learning_rate": 4.992076370516985e-06, + "loss": 0.1962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 365 + }, + { + "epoch": 0.035197384238111265, + "grad_norm": 2.842196786588062, + "learning_rate": 4.992015569134862e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 366 + }, + { + "epoch": 0.035293551954608836, + "grad_norm": 2.4864980527789755, + "learning_rate": 4.991954535739148e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 367 + }, + { + "epoch": 0.03538971967110641, + "grad_norm": 2.8573208988924246, + "learning_rate": 4.991893270335526e-06, + "loss": 0.1715, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 368 + }, + { + "epoch": 0.035485887387603984, + "grad_norm": 6.910167439619617, + "learning_rate": 4.991831772929698e-06, + "loss": 0.194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 369 + }, + { + "epoch": 0.035582055104101555, + "grad_norm": 4.880830730900904, + "learning_rate": 4.991770043527393e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 370 + }, + { + "epoch": 0.035678222820599126, + "grad_norm": 2.7073899638915413, + "learning_rate": 4.991708082134356e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 371 + }, + { + "epoch": 0.0357743905370967, + "grad_norm": 8.307122877312372, + "learning_rate": 4.991645888756356e-06, + "loss": 0.2012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 372 + }, + { + "epoch": 0.03587055825359427, + "grad_norm": 5.877330167247571, + "learning_rate": 4.991583463399183e-06, + "loss": 0.1774, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 373 + }, + { + "epoch": 0.03596672597009184, + "grad_norm": 2.678504614105921, + "learning_rate": 4.99152080606865e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 374 + }, + { + "epoch": 0.03606289368658941, + "grad_norm": 8.040561678224764, + "learning_rate": 4.99145791677059e-06, + "loss": 0.1756, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 375 + }, + { + "epoch": 0.03615906140308699, + "grad_norm": 3.2616236781985912, + "learning_rate": 4.9913947955108575e-06, + "loss": 0.1788, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 376 + }, + { + "epoch": 0.03625522911958456, + "grad_norm": 2.6147566036734062, + "learning_rate": 4.991331442295331e-06, + "loss": 0.1866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 377 + }, + { + "epoch": 0.03635139683608213, + "grad_norm": 9.226709384299498, + "learning_rate": 4.9912678571299075e-06, + "loss": 0.2106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 378 + }, + { + "epoch": 0.0364475645525797, + "grad_norm": 4.714903100572042, + "learning_rate": 4.9912040400205075e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 379 + }, + { + "epoch": 0.03654373226907727, + "grad_norm": 4.2384730651151425, + "learning_rate": 4.991139990973071e-06, + "loss": 0.1872, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 380 + }, + { + "epoch": 0.03663989998557484, + "grad_norm": 4.842272284122241, + "learning_rate": 4.991075709993565e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 381 + }, + { + "epoch": 0.03673606770207241, + "grad_norm": 2.7251649964358533, + "learning_rate": 4.9910111970879695e-06, + "loss": 0.1908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 382 + }, + { + "epoch": 0.03683223541856999, + "grad_norm": 5.2369994539992675, + "learning_rate": 4.990946452262294e-06, + "loss": 0.2043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 383 + }, + { + "epoch": 0.03692840313506756, + "grad_norm": 4.518824435045372, + "learning_rate": 4.990881475522566e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 384 + }, + { + "epoch": 0.03702457085156513, + "grad_norm": 2.5358918970713633, + "learning_rate": 4.9908162668748335e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 385 + }, + { + "epoch": 0.0371207385680627, + "grad_norm": 7.328353882608724, + "learning_rate": 4.990750826325169e-06, + "loss": 0.171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 386 + }, + { + "epoch": 0.03721690628456027, + "grad_norm": 9.4581423506621, + "learning_rate": 4.990685153879664e-06, + "loss": 0.2063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 387 + }, + { + "epoch": 0.03731307400105784, + "grad_norm": 6.609498143548996, + "learning_rate": 4.990619249544435e-06, + "loss": 0.1692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 388 + }, + { + "epoch": 0.037409241717555414, + "grad_norm": 4.488238125581944, + "learning_rate": 4.990553113325616e-06, + "loss": 0.1774, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 389 + }, + { + "epoch": 0.03750540943405299, + "grad_norm": 5.4691171665018885, + "learning_rate": 4.990486745229364e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 390 + }, + { + "epoch": 0.03760157715055056, + "grad_norm": 3.9565311618534924, + "learning_rate": 4.99042014526186e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 391 + }, + { + "epoch": 0.03769774486704813, + "grad_norm": 4.0528797034900945, + "learning_rate": 4.9903533134293035e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 392 + }, + { + "epoch": 0.037793912583545704, + "grad_norm": 6.640025021359708, + "learning_rate": 4.990286249737916e-06, + "loss": 0.17, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 393 + }, + { + "epoch": 0.037890080300043275, + "grad_norm": 2.0952797392287303, + "learning_rate": 4.9902189541939435e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 394 + }, + { + "epoch": 0.037986248016540845, + "grad_norm": 5.474268622712886, + "learning_rate": 4.9901514268036495e-06, + "loss": 0.1899, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 395 + }, + { + "epoch": 0.038082415733038416, + "grad_norm": 4.204638776141392, + "learning_rate": 4.990083667573321e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 396 + }, + { + "epoch": 0.038178583449535994, + "grad_norm": 3.323036718672076, + "learning_rate": 4.990015676509268e-06, + "loss": 0.2039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 397 + }, + { + "epoch": 0.038274751166033565, + "grad_norm": 8.892998601854913, + "learning_rate": 4.989947453617819e-06, + "loss": 0.1708, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 398 + }, + { + "epoch": 0.038370918882531135, + "grad_norm": 9.295995911187648, + "learning_rate": 4.989878998905327e-06, + "loss": 0.1689, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 399 + }, + { + "epoch": 0.038467086599028706, + "grad_norm": 2.322097884566123, + "learning_rate": 4.989810312378165e-06, + "loss": 0.1753, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 400 + }, + { + "epoch": 0.03856325431552628, + "grad_norm": 2.2043952396411357, + "learning_rate": 4.989741394042728e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 401 + }, + { + "epoch": 0.03865942203202385, + "grad_norm": 6.9127627416192015, + "learning_rate": 4.989672243905432e-06, + "loss": 0.188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 402 + }, + { + "epoch": 0.03875558974852142, + "grad_norm": 5.70341594775111, + "learning_rate": 4.989602861972715e-06, + "loss": 0.1713, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 403 + }, + { + "epoch": 0.038851757465018996, + "grad_norm": 4.970756178324331, + "learning_rate": 4.9895332482510374e-06, + "loss": 0.1965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 404 + }, + { + "epoch": 0.03894792518151657, + "grad_norm": 6.445387255316524, + "learning_rate": 4.989463402746879e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 405 + }, + { + "epoch": 0.03904409289801414, + "grad_norm": 2.691523271193351, + "learning_rate": 4.989393325466745e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 406 + }, + { + "epoch": 0.03914026061451171, + "grad_norm": 2.451272761755522, + "learning_rate": 4.989323016417158e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 407 + }, + { + "epoch": 0.03923642833100928, + "grad_norm": 4.665968313711161, + "learning_rate": 4.989252475604664e-06, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 408 + }, + { + "epoch": 0.03933259604750685, + "grad_norm": 4.552460106929886, + "learning_rate": 4.989181703035831e-06, + "loss": 0.176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 409 + }, + { + "epoch": 0.03942876376400442, + "grad_norm": 2.383009679343778, + "learning_rate": 4.989110698717248e-06, + "loss": 0.1782, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 410 + }, + { + "epoch": 0.039524931480502, + "grad_norm": 6.704110224411036, + "learning_rate": 4.989039462655526e-06, + "loss": 0.2146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 411 + }, + { + "epoch": 0.03962109919699957, + "grad_norm": 3.986082458271216, + "learning_rate": 4.988967994857297e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 412 + }, + { + "epoch": 0.03971726691349714, + "grad_norm": 3.9016513489872664, + "learning_rate": 4.988896295329215e-06, + "loss": 0.1946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 413 + }, + { + "epoch": 0.03981343462999471, + "grad_norm": 3.6233197052483255, + "learning_rate": 4.988824364077955e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 414 + }, + { + "epoch": 0.03990960234649228, + "grad_norm": 1.9498698004308326, + "learning_rate": 4.988752201110214e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 415 + }, + { + "epoch": 0.04000577006298985, + "grad_norm": 2.321628189939708, + "learning_rate": 4.988679806432712e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 416 + }, + { + "epoch": 0.04010193777948742, + "grad_norm": 2.4347781100257717, + "learning_rate": 4.988607180052188e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 417 + }, + { + "epoch": 0.040198105495985, + "grad_norm": 3.453554728815473, + "learning_rate": 4.9885343219754025e-06, + "loss": 0.192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 418 + }, + { + "epoch": 0.04029427321248257, + "grad_norm": 2.402139698550924, + "learning_rate": 4.988461232209141e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 419 + }, + { + "epoch": 0.04039044092898014, + "grad_norm": 3.4350660011950436, + "learning_rate": 4.988387910760206e-06, + "loss": 0.2022, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 420 + }, + { + "epoch": 0.04048660864547771, + "grad_norm": 3.9270346480266833, + "learning_rate": 4.988314357635427e-06, + "loss": 0.1692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 421 + }, + { + "epoch": 0.040582776361975284, + "grad_norm": 3.0176516386712278, + "learning_rate": 4.988240572841649e-06, + "loss": 0.1823, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 422 + }, + { + "epoch": 0.040678944078472855, + "grad_norm": 6.575327569334861, + "learning_rate": 4.988166556385744e-06, + "loss": 0.1772, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 423 + }, + { + "epoch": 0.040775111794970426, + "grad_norm": 5.276632214525805, + "learning_rate": 4.9880923082746015e-06, + "loss": 0.1761, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 424 + }, + { + "epoch": 0.040871279511468, + "grad_norm": 4.425500444117387, + "learning_rate": 4.988017828515135e-06, + "loss": 0.1903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 425 + }, + { + "epoch": 0.040967447227965574, + "grad_norm": 8.008042178026828, + "learning_rate": 4.987943117114278e-06, + "loss": 0.2023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 426 + }, + { + "epoch": 0.041063614944463145, + "grad_norm": 4.306820691126665, + "learning_rate": 4.987868174078987e-06, + "loss": 0.2157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 427 + }, + { + "epoch": 0.041159782660960716, + "grad_norm": 3.904204929171988, + "learning_rate": 4.98779299941624e-06, + "loss": 0.171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 428 + }, + { + "epoch": 0.041255950377458286, + "grad_norm": 9.177034508306177, + "learning_rate": 4.9877175931330345e-06, + "loss": 0.2128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 429 + }, + { + "epoch": 0.04135211809395586, + "grad_norm": 2.6561340304836674, + "learning_rate": 4.987641955236392e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 430 + }, + { + "epoch": 0.04144828581045343, + "grad_norm": 4.548944032415058, + "learning_rate": 4.987566085733354e-06, + "loss": 0.1859, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 431 + }, + { + "epoch": 0.041544453526951006, + "grad_norm": 5.2099363964543155, + "learning_rate": 4.987489984630985e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 432 + }, + { + "epoch": 0.041640621243448576, + "grad_norm": 2.322034170589204, + "learning_rate": 4.987413651936369e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 433 + }, + { + "epoch": 0.04173678895994615, + "grad_norm": 6.761920166734431, + "learning_rate": 4.987337087656614e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 434 + }, + { + "epoch": 0.04183295667644372, + "grad_norm": 3.292110055813091, + "learning_rate": 4.987260291798848e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 435 + }, + { + "epoch": 0.04192912439294129, + "grad_norm": 4.440964789651542, + "learning_rate": 4.987183264370221e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 436 + }, + { + "epoch": 0.04202529210943886, + "grad_norm": 4.697602384313586, + "learning_rate": 4.987106005377904e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 437 + }, + { + "epoch": 0.04212145982593643, + "grad_norm": 2.843665100393653, + "learning_rate": 4.98702851482909e-06, + "loss": 0.1912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 438 + }, + { + "epoch": 0.04221762754243401, + "grad_norm": 6.498041022981036, + "learning_rate": 4.986950792730995e-06, + "loss": 0.1623, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 439 + }, + { + "epoch": 0.04231379525893158, + "grad_norm": 7.440305022338962, + "learning_rate": 4.986872839090853e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 440 + }, + { + "epoch": 0.04240996297542915, + "grad_norm": 2.792123868131241, + "learning_rate": 4.986794653915923e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 441 + }, + { + "epoch": 0.04250613069192672, + "grad_norm": 7.581829898887273, + "learning_rate": 4.986716237213484e-06, + "loss": 0.1903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 442 + }, + { + "epoch": 0.04260229840842429, + "grad_norm": 10.707447508082462, + "learning_rate": 4.986637588990836e-06, + "loss": 0.2459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 443 + }, + { + "epoch": 0.04269846612492186, + "grad_norm": 6.907382295278481, + "learning_rate": 4.986558709255302e-06, + "loss": 0.1934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 444 + }, + { + "epoch": 0.04279463384141943, + "grad_norm": 1.9082804275780505, + "learning_rate": 4.986479598014228e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 445 + }, + { + "epoch": 0.04289080155791701, + "grad_norm": 4.847306508988469, + "learning_rate": 4.986400255274976e-06, + "loss": 0.1784, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 446 + }, + { + "epoch": 0.04298696927441458, + "grad_norm": 5.42616763732497, + "learning_rate": 4.986320681044935e-06, + "loss": 0.1738, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 447 + }, + { + "epoch": 0.04308313699091215, + "grad_norm": 2.773670636455946, + "learning_rate": 4.986240875331513e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 448 + }, + { + "epoch": 0.04317930470740972, + "grad_norm": 2.7977829570062465, + "learning_rate": 4.986160838142141e-06, + "loss": 0.1821, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 449 + }, + { + "epoch": 0.043275472423907294, + "grad_norm": 3.194066433203487, + "learning_rate": 4.98608056948427e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 450 + }, + { + "epoch": 0.043371640140404864, + "grad_norm": 4.839349938355792, + "learning_rate": 4.986000069365372e-06, + "loss": 0.2197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 451 + }, + { + "epoch": 0.043467807856902435, + "grad_norm": 3.41641494152851, + "learning_rate": 4.985919337792944e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 452 + }, + { + "epoch": 0.04356397557340001, + "grad_norm": 3.333733180406721, + "learning_rate": 4.985838374774501e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 453 + }, + { + "epoch": 0.043660143289897584, + "grad_norm": 2.104605679968661, + "learning_rate": 4.9857571803175805e-06, + "loss": 0.1728, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 454 + }, + { + "epoch": 0.043756311006395154, + "grad_norm": 2.920663846360698, + "learning_rate": 4.985675754429744e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 455 + }, + { + "epoch": 0.043852478722892725, + "grad_norm": 6.11301307609859, + "learning_rate": 4.9855940971185705e-06, + "loss": 0.1792, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 456 + }, + { + "epoch": 0.043948646439390296, + "grad_norm": 2.576753661250764, + "learning_rate": 4.985512208391663e-06, + "loss": 0.1763, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 457 + }, + { + "epoch": 0.04404481415588787, + "grad_norm": 2.125213818427191, + "learning_rate": 4.9854300882566455e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 458 + }, + { + "epoch": 0.04414098187238544, + "grad_norm": 5.4804995888868095, + "learning_rate": 4.985347736721165e-06, + "loss": 0.2023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 459 + }, + { + "epoch": 0.044237149588883015, + "grad_norm": 5.5279416182828385, + "learning_rate": 4.985265153792887e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 460 + }, + { + "epoch": 0.044333317305380586, + "grad_norm": 3.424341965750189, + "learning_rate": 4.9851823394795e-06, + "loss": 0.1821, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 461 + }, + { + "epoch": 0.04442948502187816, + "grad_norm": 6.954979146409939, + "learning_rate": 4.985099293788715e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 462 + }, + { + "epoch": 0.04452565273837573, + "grad_norm": 7.767765475336896, + "learning_rate": 4.985016016728264e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 463 + }, + { + "epoch": 0.0446218204548733, + "grad_norm": 6.8232154288785996, + "learning_rate": 4.9849325083059e-06, + "loss": 0.199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 464 + }, + { + "epoch": 0.04471798817137087, + "grad_norm": 3.6617173056575467, + "learning_rate": 4.984848768529398e-06, + "loss": 0.1908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 465 + }, + { + "epoch": 0.04481415588786844, + "grad_norm": 5.608510588245433, + "learning_rate": 4.984764797406555e-06, + "loss": 0.1792, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 466 + }, + { + "epoch": 0.04491032360436602, + "grad_norm": 2.7162395529804337, + "learning_rate": 4.984680594945187e-06, + "loss": 0.1848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 467 + }, + { + "epoch": 0.04500649132086359, + "grad_norm": 2.50312030831415, + "learning_rate": 4.9845961611531356e-06, + "loss": 0.1945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 468 + }, + { + "epoch": 0.04510265903736116, + "grad_norm": 3.6303401091209158, + "learning_rate": 4.984511496038261e-06, + "loss": 0.1854, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 469 + }, + { + "epoch": 0.04519882675385873, + "grad_norm": 2.0556836196309085, + "learning_rate": 4.9844265996084455e-06, + "loss": 0.1848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 470 + }, + { + "epoch": 0.0452949944703563, + "grad_norm": 1.9617049878018755, + "learning_rate": 4.9843414718715936e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 471 + }, + { + "epoch": 0.04539116218685387, + "grad_norm": 5.431904663178899, + "learning_rate": 4.98425611283563e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 472 + }, + { + "epoch": 0.04548732990335144, + "grad_norm": 2.300704504306812, + "learning_rate": 4.984170522508504e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 473 + }, + { + "epoch": 0.04558349761984902, + "grad_norm": 2.847398746694956, + "learning_rate": 4.984084700898182e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 474 + }, + { + "epoch": 0.04567966533634659, + "grad_norm": 2.9118735025603546, + "learning_rate": 4.983998648012655e-06, + "loss": 0.171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 475 + }, + { + "epoch": 0.04577583305284416, + "grad_norm": 6.507922372154419, + "learning_rate": 4.983912363859935e-06, + "loss": 0.1764, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 476 + }, + { + "epoch": 0.04587200076934173, + "grad_norm": 3.864918757922529, + "learning_rate": 4.983825848448055e-06, + "loss": 0.1904, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 477 + }, + { + "epoch": 0.0459681684858393, + "grad_norm": 2.444405120367698, + "learning_rate": 4.983739101785071e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 478 + }, + { + "epoch": 0.046064336202336874, + "grad_norm": 5.909843890464606, + "learning_rate": 4.983652123879058e-06, + "loss": 0.2013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 479 + }, + { + "epoch": 0.046160503918834445, + "grad_norm": 3.2655593878478872, + "learning_rate": 4.983564914738113e-06, + "loss": 0.1879, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 480 + }, + { + "epoch": 0.04625667163533202, + "grad_norm": 3.0767410904207657, + "learning_rate": 4.983477474370358e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 481 + }, + { + "epoch": 0.04635283935182959, + "grad_norm": 4.029708212570924, + "learning_rate": 4.983389802783933e-06, + "loss": 0.2082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 482 + }, + { + "epoch": 0.046449007068327164, + "grad_norm": 1.823465381278136, + "learning_rate": 4.983301899986999e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 483 + }, + { + "epoch": 0.046545174784824735, + "grad_norm": 4.942678342388377, + "learning_rate": 4.983213765987742e-06, + "loss": 0.1826, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 484 + }, + { + "epoch": 0.046641342501322305, + "grad_norm": 2.0670954914434416, + "learning_rate": 4.983125400794366e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 485 + }, + { + "epoch": 0.046737510217819876, + "grad_norm": 2.599428507910075, + "learning_rate": 4.983036804415099e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 486 + }, + { + "epoch": 0.04683367793431745, + "grad_norm": 4.5764284876050665, + "learning_rate": 4.982947976858189e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 487 + }, + { + "epoch": 0.046929845650815025, + "grad_norm": 3.0176229575962754, + "learning_rate": 4.982858918131906e-06, + "loss": 0.1719, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 488 + }, + { + "epoch": 0.047026013367312595, + "grad_norm": 7.153895964563839, + "learning_rate": 4.982769628244543e-06, + "loss": 0.2266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 489 + }, + { + "epoch": 0.047122181083810166, + "grad_norm": 4.514503132324562, + "learning_rate": 4.982680107204411e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 490 + }, + { + "epoch": 0.04721834880030774, + "grad_norm": 4.611497995262337, + "learning_rate": 4.982590355019846e-06, + "loss": 0.1906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 491 + }, + { + "epoch": 0.04731451651680531, + "grad_norm": 4.699213234213201, + "learning_rate": 4.982500371699204e-06, + "loss": 0.1778, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 492 + }, + { + "epoch": 0.04741068423330288, + "grad_norm": 2.378169345206849, + "learning_rate": 4.982410157250863e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 493 + }, + { + "epoch": 0.04750685194980045, + "grad_norm": 6.811290109185354, + "learning_rate": 4.982319711683221e-06, + "loss": 0.1779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 494 + }, + { + "epoch": 0.04760301966629803, + "grad_norm": 5.680237491028969, + "learning_rate": 4.9822290350047e-06, + "loss": 0.2091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 495 + }, + { + "epoch": 0.0476991873827956, + "grad_norm": 2.0191231432809933, + "learning_rate": 4.982138127223742e-06, + "loss": 0.1839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 496 + }, + { + "epoch": 0.04779535509929317, + "grad_norm": 4.895570154626528, + "learning_rate": 4.98204698834881e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 497 + }, + { + "epoch": 0.04789152281579074, + "grad_norm": 3.752861826000052, + "learning_rate": 4.9819556183883905e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 498 + }, + { + "epoch": 0.04798769053228831, + "grad_norm": 1.970447760168094, + "learning_rate": 4.981864017350989e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 499 + }, + { + "epoch": 0.04808385824878588, + "grad_norm": 3.531795928784247, + "learning_rate": 4.981772185245135e-06, + "loss": 0.1729, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 500 + }, + { + "epoch": 0.04818002596528345, + "grad_norm": 6.6789311052748115, + "learning_rate": 4.981680122079378e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 501 + }, + { + "epoch": 0.04827619368178103, + "grad_norm": 2.05496641161877, + "learning_rate": 4.981587827862289e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 502 + }, + { + "epoch": 0.0483723613982786, + "grad_norm": 2.394480111652223, + "learning_rate": 4.98149530260246e-06, + "loss": 0.1823, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 503 + }, + { + "epoch": 0.04846852911477617, + "grad_norm": 4.607605905713183, + "learning_rate": 4.981402546308508e-06, + "loss": 0.1677, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 504 + }, + { + "epoch": 0.04856469683127374, + "grad_norm": 2.394391142065083, + "learning_rate": 4.981309558989066e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 505 + }, + { + "epoch": 0.04866086454777131, + "grad_norm": 3.550959080396734, + "learning_rate": 4.981216340652793e-06, + "loss": 0.1714, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 506 + }, + { + "epoch": 0.04875703226426888, + "grad_norm": 4.950802743008274, + "learning_rate": 4.981122891308367e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 507 + }, + { + "epoch": 0.048853199980766454, + "grad_norm": 2.9343740767255357, + "learning_rate": 4.9810292109644894e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 508 + }, + { + "epoch": 0.04894936769726403, + "grad_norm": 2.554105602869106, + "learning_rate": 4.980935299629882e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 509 + }, + { + "epoch": 0.0490455354137616, + "grad_norm": 3.5544364490672122, + "learning_rate": 4.980841157313287e-06, + "loss": 0.1743, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 510 + }, + { + "epoch": 0.04914170313025917, + "grad_norm": 3.166081787024299, + "learning_rate": 4.98074678402347e-06, + "loss": 0.1836, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 511 + }, + { + "epoch": 0.049237870846756744, + "grad_norm": 3.4064835473276207, + "learning_rate": 4.9806521797692184e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 512 + }, + { + "epoch": 0.049334038563254315, + "grad_norm": 7.267952169354215, + "learning_rate": 4.980557344559339e-06, + "loss": 0.1959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 513 + }, + { + "epoch": 0.049430206279751886, + "grad_norm": 2.145171233003628, + "learning_rate": 4.980462278402661e-06, + "loss": 0.1773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 514 + }, + { + "epoch": 0.049526373996249456, + "grad_norm": 4.463133120880382, + "learning_rate": 4.9803669813080356e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 515 + }, + { + "epoch": 0.049622541712747034, + "grad_norm": 6.0984901927749435, + "learning_rate": 4.9802714532843355e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 516 + }, + { + "epoch": 0.049718709429244605, + "grad_norm": 7.766859958660499, + "learning_rate": 4.980175694340454e-06, + "loss": 0.1801, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 517 + }, + { + "epoch": 0.049814877145742176, + "grad_norm": 6.231382531411317, + "learning_rate": 4.980079704485308e-06, + "loss": 0.2133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 518 + }, + { + "epoch": 0.049911044862239747, + "grad_norm": 2.4752982407836392, + "learning_rate": 4.9799834837278335e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 519 + }, + { + "epoch": 0.05000721257873732, + "grad_norm": 8.797263984499606, + "learning_rate": 4.9798870320769884e-06, + "loss": 0.1905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 520 + }, + { + "epoch": 0.05010338029523489, + "grad_norm": 7.568775082605878, + "learning_rate": 4.979790349541754e-06, + "loss": 0.1912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 521 + }, + { + "epoch": 0.05019954801173246, + "grad_norm": 5.261363693853022, + "learning_rate": 4.97969343613113e-06, + "loss": 0.205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 522 + }, + { + "epoch": 0.05029571572823004, + "grad_norm": 5.0296230456892905, + "learning_rate": 4.9795962918541395e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 523 + }, + { + "epoch": 0.05039188344472761, + "grad_norm": 8.214098145317136, + "learning_rate": 4.979498916719829e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 524 + }, + { + "epoch": 0.05048805116122518, + "grad_norm": 4.6145292576950965, + "learning_rate": 4.979401310737262e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 525 + }, + { + "epoch": 0.05058421887772275, + "grad_norm": 4.702560171411328, + "learning_rate": 4.979303473915527e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 526 + }, + { + "epoch": 0.05068038659422032, + "grad_norm": 7.777518722344153, + "learning_rate": 4.979205406263733e-06, + "loss": 0.1934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 527 + }, + { + "epoch": 0.05077655431071789, + "grad_norm": 10.827745834673063, + "learning_rate": 4.979107107791009e-06, + "loss": 0.2025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 528 + }, + { + "epoch": 0.05087272202721546, + "grad_norm": 13.303156551477722, + "learning_rate": 4.979008578506509e-06, + "loss": 0.221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 529 + }, + { + "epoch": 0.05096888974371304, + "grad_norm": 4.951431339637838, + "learning_rate": 4.9789098184194055e-06, + "loss": 0.1653, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 530 + }, + { + "epoch": 0.05106505746021061, + "grad_norm": 4.283075381153029, + "learning_rate": 4.978810827538893e-06, + "loss": 0.1868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 531 + }, + { + "epoch": 0.05116122517670818, + "grad_norm": 8.070838545534823, + "learning_rate": 4.978711605874187e-06, + "loss": 0.2192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 532 + }, + { + "epoch": 0.05125739289320575, + "grad_norm": 6.169383653173978, + "learning_rate": 4.978612153434527e-06, + "loss": 0.1884, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 533 + }, + { + "epoch": 0.05135356060970332, + "grad_norm": 2.0265370847377397, + "learning_rate": 4.978512470229171e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 534 + }, + { + "epoch": 0.05144972832620089, + "grad_norm": 3.9869438640510344, + "learning_rate": 4.9784125562674005e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 535 + }, + { + "epoch": 0.051545896042698464, + "grad_norm": 4.705634806424531, + "learning_rate": 4.978312411558518e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 536 + }, + { + "epoch": 0.05164206375919604, + "grad_norm": 2.851039947519514, + "learning_rate": 4.9782120361118465e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 537 + }, + { + "epoch": 0.05173823147569361, + "grad_norm": 6.684460996473741, + "learning_rate": 4.978111429936732e-06, + "loss": 0.1863, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 538 + }, + { + "epoch": 0.05183439919219118, + "grad_norm": 2.8153876230113264, + "learning_rate": 4.978010593042541e-06, + "loss": 0.1841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 539 + }, + { + "epoch": 0.051930566908688754, + "grad_norm": 2.2106268207595385, + "learning_rate": 4.9779095254386605e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 540 + }, + { + "epoch": 0.052026734625186324, + "grad_norm": 3.778459478501077, + "learning_rate": 4.9778082271345015e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 541 + }, + { + "epoch": 0.052122902341683895, + "grad_norm": 4.357650615832147, + "learning_rate": 4.977706698139495e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 542 + }, + { + "epoch": 0.052219070058181466, + "grad_norm": 5.264689589007369, + "learning_rate": 4.977604938463094e-06, + "loss": 0.1807, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 543 + }, + { + "epoch": 0.052315237774679044, + "grad_norm": 3.999055386949776, + "learning_rate": 4.977502948114772e-06, + "loss": 0.2063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 544 + }, + { + "epoch": 0.052411405491176614, + "grad_norm": 2.712694577082044, + "learning_rate": 4.977400727104024e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 545 + }, + { + "epoch": 0.052507573207674185, + "grad_norm": 2.8239162324562765, + "learning_rate": 4.977298275440368e-06, + "loss": 0.1721, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 546 + }, + { + "epoch": 0.052603740924171756, + "grad_norm": 6.008573711476039, + "learning_rate": 4.9771955931333424e-06, + "loss": 0.1755, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 547 + }, + { + "epoch": 0.05269990864066933, + "grad_norm": 4.351264654029344, + "learning_rate": 4.977092680192507e-06, + "loss": 0.173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 548 + }, + { + "epoch": 0.0527960763571669, + "grad_norm": 4.531653800125192, + "learning_rate": 4.976989536627443e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 549 + }, + { + "epoch": 0.05289224407366447, + "grad_norm": 2.2987978644179408, + "learning_rate": 4.976886162447754e-06, + "loss": 0.1841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 550 + }, + { + "epoch": 0.052988411790162046, + "grad_norm": 3.2837888829417756, + "learning_rate": 4.976782557663065e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 551 + }, + { + "epoch": 0.05308457950665962, + "grad_norm": 2.79213429591128, + "learning_rate": 4.976678722283019e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 552 + }, + { + "epoch": 0.05318074722315719, + "grad_norm": 3.9658035840997314, + "learning_rate": 4.976574656317287e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 553 + }, + { + "epoch": 0.05327691493965476, + "grad_norm": 1.9140683168594785, + "learning_rate": 4.9764703597755555e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 554 + }, + { + "epoch": 0.05337308265615233, + "grad_norm": 3.259394266508305, + "learning_rate": 4.976365832667536e-06, + "loss": 0.1786, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 555 + }, + { + "epoch": 0.0534692503726499, + "grad_norm": 2.3887835908135355, + "learning_rate": 4.976261075002959e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 556 + }, + { + "epoch": 0.05356541808914747, + "grad_norm": 2.642051500041381, + "learning_rate": 4.9761560867915794e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 557 + }, + { + "epoch": 0.05366158580564504, + "grad_norm": 3.364932665958031, + "learning_rate": 4.9760508680431705e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 558 + }, + { + "epoch": 0.05375775352214262, + "grad_norm": 2.6558433079092363, + "learning_rate": 4.975945418767529e-06, + "loss": 0.1695, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 559 + }, + { + "epoch": 0.05385392123864019, + "grad_norm": 2.638767009142752, + "learning_rate": 4.975839738974473e-06, + "loss": 0.1677, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 560 + }, + { + "epoch": 0.05395008895513776, + "grad_norm": 1.9096007436068543, + "learning_rate": 4.975733828673841e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 561 + }, + { + "epoch": 0.05404625667163533, + "grad_norm": 2.6705238832564255, + "learning_rate": 4.975627687875494e-06, + "loss": 0.1674, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 562 + }, + { + "epoch": 0.0541424243881329, + "grad_norm": 2.7201172743141298, + "learning_rate": 4.975521316589312e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 563 + }, + { + "epoch": 0.05423859210463047, + "grad_norm": 2.5433709826677204, + "learning_rate": 4.975414714825201e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 564 + }, + { + "epoch": 0.054334759821128044, + "grad_norm": 2.7344849510884877, + "learning_rate": 4.975307882593085e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 565 + }, + { + "epoch": 0.05443092753762562, + "grad_norm": 3.4030406169283456, + "learning_rate": 4.975200819902911e-06, + "loss": 0.1815, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 566 + }, + { + "epoch": 0.05452709525412319, + "grad_norm": 2.5500259244251384, + "learning_rate": 4.975093526764645e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 567 + }, + { + "epoch": 0.05462326297062076, + "grad_norm": 4.137502417032435, + "learning_rate": 4.974986003188278e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 568 + }, + { + "epoch": 0.054719430687118334, + "grad_norm": 2.94470571107236, + "learning_rate": 4.97487824918382e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 569 + }, + { + "epoch": 0.054815598403615905, + "grad_norm": 3.9328154317929, + "learning_rate": 4.974770264761305e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 570 + }, + { + "epoch": 0.054911766120113475, + "grad_norm": 3.3762253864084597, + "learning_rate": 4.974662049930783e-06, + "loss": 0.1728, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 571 + }, + { + "epoch": 0.055007933836611046, + "grad_norm": 2.924935354875349, + "learning_rate": 4.974553604702332e-06, + "loss": 0.182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 572 + }, + { + "epoch": 0.055104101553108624, + "grad_norm": 3.8588105835679474, + "learning_rate": 4.974444929086048e-06, + "loss": 0.1976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 573 + }, + { + "epoch": 0.055200269269606195, + "grad_norm": 2.725224592714331, + "learning_rate": 4.974336023092049e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 574 + }, + { + "epoch": 0.055296436986103766, + "grad_norm": 2.1990158657377203, + "learning_rate": 4.974226886730474e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 575 + }, + { + "epoch": 0.055392604702601336, + "grad_norm": 1.9804366956826587, + "learning_rate": 4.974117520011484e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 576 + }, + { + "epoch": 0.05548877241909891, + "grad_norm": 3.8232948959205855, + "learning_rate": 4.974007922945261e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 577 + }, + { + "epoch": 0.05558494013559648, + "grad_norm": 2.7691356300027015, + "learning_rate": 4.973898095542009e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 578 + }, + { + "epoch": 0.05568110785209405, + "grad_norm": 3.660551045475443, + "learning_rate": 4.973788037811954e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 579 + }, + { + "epoch": 0.055777275568591626, + "grad_norm": 3.169641065661556, + "learning_rate": 4.9736777497653425e-06, + "loss": 0.1715, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 580 + }, + { + "epoch": 0.0558734432850892, + "grad_norm": 2.436539342312907, + "learning_rate": 4.973567231412442e-06, + "loss": 0.1692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 581 + }, + { + "epoch": 0.05596961100158677, + "grad_norm": 4.944467510059276, + "learning_rate": 4.973456482763542e-06, + "loss": 0.1878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 582 + }, + { + "epoch": 0.05606577871808434, + "grad_norm": 3.9005363000860442, + "learning_rate": 4.973345503828955e-06, + "loss": 0.1786, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 583 + }, + { + "epoch": 0.05616194643458191, + "grad_norm": 2.480867792497361, + "learning_rate": 4.973234294619011e-06, + "loss": 0.2123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 584 + }, + { + "epoch": 0.05625811415107948, + "grad_norm": 6.193561356556977, + "learning_rate": 4.973122855144066e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 585 + }, + { + "epoch": 0.05635428186757705, + "grad_norm": 7.204708573655963, + "learning_rate": 4.973011185414494e-06, + "loss": 0.2223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 586 + }, + { + "epoch": 0.05645044958407463, + "grad_norm": 2.485418991377669, + "learning_rate": 4.972899285440692e-06, + "loss": 0.2041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 587 + }, + { + "epoch": 0.0565466173005722, + "grad_norm": 2.29296180629462, + "learning_rate": 4.97278715523308e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 588 + }, + { + "epoch": 0.05664278501706977, + "grad_norm": 6.670625068590319, + "learning_rate": 4.972674794802095e-06, + "loss": 0.1892, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 589 + }, + { + "epoch": 0.05673895273356734, + "grad_norm": 3.747973310216784, + "learning_rate": 4.972562204158199e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 590 + }, + { + "epoch": 0.05683512045006491, + "grad_norm": 3.1439131898662933, + "learning_rate": 4.972449383311875e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 591 + }, + { + "epoch": 0.05693128816656248, + "grad_norm": 4.329975518469634, + "learning_rate": 4.972336332273626e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 592 + }, + { + "epoch": 0.05702745588306005, + "grad_norm": 2.0940355384691327, + "learning_rate": 4.972223051053979e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 593 + }, + { + "epoch": 0.05712362359955763, + "grad_norm": 4.59685412854628, + "learning_rate": 4.972109539663479e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 594 + }, + { + "epoch": 0.0572197913160552, + "grad_norm": 4.769841004349341, + "learning_rate": 4.971995798112695e-06, + "loss": 0.1672, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 595 + }, + { + "epoch": 0.05731595903255277, + "grad_norm": 2.1563724063117164, + "learning_rate": 4.9718818264122185e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 596 + }, + { + "epoch": 0.05741212674905034, + "grad_norm": 3.611149488473277, + "learning_rate": 4.971767624572657e-06, + "loss": 0.208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 597 + }, + { + "epoch": 0.057508294465547914, + "grad_norm": 4.025768409365946, + "learning_rate": 4.971653192604645e-06, + "loss": 0.1572, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 598 + }, + { + "epoch": 0.057604462182045485, + "grad_norm": 2.1634057329181386, + "learning_rate": 4.971538530518836e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 599 + }, + { + "epoch": 0.057700629898543056, + "grad_norm": 4.0315399665692135, + "learning_rate": 4.971423638325906e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 600 + }, + { + "epoch": 0.05779679761504063, + "grad_norm": 3.588010276252449, + "learning_rate": 4.971308516036551e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 601 + }, + { + "epoch": 0.057892965331538204, + "grad_norm": 2.659570778847642, + "learning_rate": 4.971193163661489e-06, + "loss": 0.1995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 602 + }, + { + "epoch": 0.057989133048035775, + "grad_norm": 1.9714657149960442, + "learning_rate": 4.971077581211461e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 603 + }, + { + "epoch": 0.058085300764533346, + "grad_norm": 3.6275304730460727, + "learning_rate": 4.970961768697228e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 604 + }, + { + "epoch": 0.05818146848103092, + "grad_norm": 4.954530569250082, + "learning_rate": 4.970845726129571e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 605 + }, + { + "epoch": 0.05827763619752849, + "grad_norm": 2.9807677934525083, + "learning_rate": 4.970729453519294e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 606 + }, + { + "epoch": 0.05837380391402606, + "grad_norm": 1.8755823520755845, + "learning_rate": 4.970612950877223e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 607 + }, + { + "epoch": 0.058469971630523636, + "grad_norm": 4.289821793006163, + "learning_rate": 4.970496218214205e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 608 + }, + { + "epoch": 0.05856613934702121, + "grad_norm": 4.316033445976973, + "learning_rate": 4.970379255541107e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 609 + }, + { + "epoch": 0.05866230706351878, + "grad_norm": 4.048445724410827, + "learning_rate": 4.970262062868821e-06, + "loss": 0.2182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 610 + }, + { + "epoch": 0.05875847478001635, + "grad_norm": 3.2606567727214038, + "learning_rate": 4.970144640208254e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 611 + }, + { + "epoch": 0.05885464249651392, + "grad_norm": 5.501314689492141, + "learning_rate": 4.9700269875703425e-06, + "loss": 0.193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 612 + }, + { + "epoch": 0.05895081021301149, + "grad_norm": 5.656717684909595, + "learning_rate": 4.969909104966037e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 613 + }, + { + "epoch": 0.05904697792950906, + "grad_norm": 4.06742769528626, + "learning_rate": 4.969790992406315e-06, + "loss": 0.2071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 614 + }, + { + "epoch": 0.05914314564600664, + "grad_norm": 3.4994263050651706, + "learning_rate": 4.969672649902172e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 615 + }, + { + "epoch": 0.05923931336250421, + "grad_norm": 6.940576566219236, + "learning_rate": 4.969554077464626e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 616 + }, + { + "epoch": 0.05933548107900178, + "grad_norm": 4.156163287309934, + "learning_rate": 4.969435275104717e-06, + "loss": 0.1665, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 617 + }, + { + "epoch": 0.05943164879549935, + "grad_norm": 4.657335220824134, + "learning_rate": 4.969316242833505e-06, + "loss": 0.1856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 618 + }, + { + "epoch": 0.05952781651199692, + "grad_norm": 2.031834649898916, + "learning_rate": 4.969196980662073e-06, + "loss": 0.1761, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 619 + }, + { + "epoch": 0.05962398422849449, + "grad_norm": 2.569517988346924, + "learning_rate": 4.969077488601525e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 620 + }, + { + "epoch": 0.05972015194499206, + "grad_norm": 3.291597749352657, + "learning_rate": 4.968957766662984e-06, + "loss": 0.1762, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 621 + }, + { + "epoch": 0.05981631966148964, + "grad_norm": 2.886353699460033, + "learning_rate": 4.9688378148576e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 622 + }, + { + "epoch": 0.05991248737798721, + "grad_norm": 3.34164140569111, + "learning_rate": 4.968717633196537e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 623 + }, + { + "epoch": 0.06000865509448478, + "grad_norm": 5.96060741007526, + "learning_rate": 4.968597221690986e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 624 + }, + { + "epoch": 0.06010482281098235, + "grad_norm": 2.073974496250981, + "learning_rate": 4.968476580352158e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 625 + }, + { + "epoch": 0.060200990527479924, + "grad_norm": 2.2309831457759133, + "learning_rate": 4.9683557091912845e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 626 + }, + { + "epoch": 0.060297158243977494, + "grad_norm": 2.345131402346797, + "learning_rate": 4.96823460821962e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 627 + }, + { + "epoch": 0.060393325960475065, + "grad_norm": 3.1722253845798005, + "learning_rate": 4.9681132774484376e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 628 + }, + { + "epoch": 0.06048949367697264, + "grad_norm": 4.219304361624622, + "learning_rate": 4.967991716889034e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 629 + }, + { + "epoch": 0.060585661393470214, + "grad_norm": 4.575372582532859, + "learning_rate": 4.967869926552728e-06, + "loss": 0.1903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 630 + }, + { + "epoch": 0.060681829109967785, + "grad_norm": 2.9039409787709975, + "learning_rate": 4.967747906450857e-06, + "loss": 0.1632, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 631 + }, + { + "epoch": 0.060777996826465355, + "grad_norm": 5.633854310484626, + "learning_rate": 4.967625656594782e-06, + "loss": 0.2059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 632 + }, + { + "epoch": 0.060874164542962926, + "grad_norm": 7.51295692047298, + "learning_rate": 4.967503176995886e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 633 + }, + { + "epoch": 0.0609703322594605, + "grad_norm": 4.091795115206479, + "learning_rate": 4.967380467665571e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 634 + }, + { + "epoch": 0.06106649997595807, + "grad_norm": 2.1976459259161985, + "learning_rate": 4.967257528615261e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 635 + }, + { + "epoch": 0.061162667692455645, + "grad_norm": 7.512341847460649, + "learning_rate": 4.967134359856404e-06, + "loss": 0.188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 636 + }, + { + "epoch": 0.061258835408953216, + "grad_norm": 6.50932654507481, + "learning_rate": 4.967010961400466e-06, + "loss": 0.1984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 637 + }, + { + "epoch": 0.06135500312545079, + "grad_norm": 2.10035948833998, + "learning_rate": 4.966887333258935e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 638 + }, + { + "epoch": 0.06145117084194836, + "grad_norm": 5.571591197392168, + "learning_rate": 4.966763475443322e-06, + "loss": 0.1707, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 639 + }, + { + "epoch": 0.06154733855844593, + "grad_norm": 6.751639205548714, + "learning_rate": 4.966639387965158e-06, + "loss": 0.1894, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 640 + }, + { + "epoch": 0.0616435062749435, + "grad_norm": 4.305925580092031, + "learning_rate": 4.966515070835997e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 641 + }, + { + "epoch": 0.06173967399144107, + "grad_norm": 2.81221187275891, + "learning_rate": 4.966390524067414e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 642 + }, + { + "epoch": 0.06183584170793865, + "grad_norm": 6.675165373623743, + "learning_rate": 4.966265747671002e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 643 + }, + { + "epoch": 0.06193200942443622, + "grad_norm": 8.151172318808133, + "learning_rate": 4.966140741658379e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 644 + }, + { + "epoch": 0.06202817714093379, + "grad_norm": 4.340056071439435, + "learning_rate": 4.966015506041184e-06, + "loss": 0.2014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 645 + }, + { + "epoch": 0.06212434485743136, + "grad_norm": 5.9397034462061935, + "learning_rate": 4.965890040831077e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 646 + }, + { + "epoch": 0.06222051257392893, + "grad_norm": 4.515578182851318, + "learning_rate": 4.9657643460397386e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 647 + }, + { + "epoch": 0.0623166802904265, + "grad_norm": 5.11908760883477, + "learning_rate": 4.965638421678871e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 648 + }, + { + "epoch": 0.06241284800692407, + "grad_norm": 2.3200141581021647, + "learning_rate": 4.965512267760198e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 649 + }, + { + "epoch": 0.06250901572342164, + "grad_norm": 3.0132511778375, + "learning_rate": 4.965385884295467e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 650 + }, + { + "epoch": 0.06260518343991922, + "grad_norm": 4.192985445532698, + "learning_rate": 4.965259271296442e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 651 + }, + { + "epoch": 0.06270135115641678, + "grad_norm": 2.6851052524869043, + "learning_rate": 4.965132428774913e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 652 + }, + { + "epoch": 0.06279751887291436, + "grad_norm": 3.7597319779491527, + "learning_rate": 4.965005356742687e-06, + "loss": 0.1795, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 653 + }, + { + "epoch": 0.06289368658941194, + "grad_norm": 2.642712806077776, + "learning_rate": 4.9648780552115975e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 654 + }, + { + "epoch": 0.0629898543059095, + "grad_norm": 2.605309145890935, + "learning_rate": 4.964750524193494e-06, + "loss": 0.1762, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 655 + }, + { + "epoch": 0.06308602202240708, + "grad_norm": 3.9903745212723902, + "learning_rate": 4.964622763700252e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 656 + }, + { + "epoch": 0.06318218973890465, + "grad_norm": 4.715133100864543, + "learning_rate": 4.964494773743766e-06, + "loss": 0.167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 657 + }, + { + "epoch": 0.06327835745540222, + "grad_norm": 2.415380681620333, + "learning_rate": 4.964366554335952e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 658 + }, + { + "epoch": 0.06337452517189979, + "grad_norm": 1.966482342054406, + "learning_rate": 4.964238105488748e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 659 + }, + { + "epoch": 0.06347069288839736, + "grad_norm": 2.261314672694237, + "learning_rate": 4.964109427214111e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 660 + }, + { + "epoch": 0.06356686060489494, + "grad_norm": 4.697820045683252, + "learning_rate": 4.963980519524023e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 661 + }, + { + "epoch": 0.0636630283213925, + "grad_norm": 1.640995270560215, + "learning_rate": 4.963851382430486e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 662 + }, + { + "epoch": 0.06375919603789008, + "grad_norm": 6.056926626671018, + "learning_rate": 4.963722015945522e-06, + "loss": 0.1832, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 663 + }, + { + "epoch": 0.06385536375438765, + "grad_norm": 2.2524899173921322, + "learning_rate": 4.963592420081177e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 664 + }, + { + "epoch": 0.06395153147088523, + "grad_norm": 2.8359071894575636, + "learning_rate": 4.963462594849515e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 665 + }, + { + "epoch": 0.06404769918738279, + "grad_norm": 4.269716530420733, + "learning_rate": 4.963332540262623e-06, + "loss": 0.2085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 666 + }, + { + "epoch": 0.06414386690388037, + "grad_norm": 5.332178148150021, + "learning_rate": 4.963202256332611e-06, + "loss": 0.1885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 667 + }, + { + "epoch": 0.06424003462037794, + "grad_norm": 3.449810844203969, + "learning_rate": 4.963071743071608e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 668 + }, + { + "epoch": 0.06433620233687551, + "grad_norm": 6.85183701009027, + "learning_rate": 4.962941000491764e-06, + "loss": 0.2035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 669 + }, + { + "epoch": 0.06443237005337309, + "grad_norm": 7.580487119740309, + "learning_rate": 4.962810028605253e-06, + "loss": 0.1991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 670 + }, + { + "epoch": 0.06452853776987065, + "grad_norm": 5.079484157424353, + "learning_rate": 4.962678827424269e-06, + "loss": 0.2046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 671 + }, + { + "epoch": 0.06462470548636823, + "grad_norm": 1.6048760688781742, + "learning_rate": 4.962547396961026e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 672 + }, + { + "epoch": 0.06472087320286579, + "grad_norm": 4.79545928167941, + "learning_rate": 4.96241573722776e-06, + "loss": 0.1609, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 673 + }, + { + "epoch": 0.06481704091936337, + "grad_norm": 8.811168699850278, + "learning_rate": 4.962283848236732e-06, + "loss": 0.1946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 674 + }, + { + "epoch": 0.06491320863586095, + "grad_norm": 4.141719691260224, + "learning_rate": 4.962151730000218e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 675 + }, + { + "epoch": 0.06500937635235851, + "grad_norm": 3.245721380510337, + "learning_rate": 4.962019382530521e-06, + "loss": 0.1609, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 676 + }, + { + "epoch": 0.06510554406885609, + "grad_norm": 6.137220832609861, + "learning_rate": 4.96188680583996e-06, + "loss": 0.178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 677 + }, + { + "epoch": 0.06520171178535365, + "grad_norm": 6.944797169201436, + "learning_rate": 4.961753999940882e-06, + "loss": 0.2197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 678 + }, + { + "epoch": 0.06529787950185123, + "grad_norm": 2.3718250877988254, + "learning_rate": 4.961620964845648e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 679 + }, + { + "epoch": 0.0653940472183488, + "grad_norm": 2.442906380254111, + "learning_rate": 4.961487700566646e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 680 + }, + { + "epoch": 0.06549021493484637, + "grad_norm": 2.666025112863461, + "learning_rate": 4.961354207116283e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 681 + }, + { + "epoch": 0.06558638265134395, + "grad_norm": 2.357242992626421, + "learning_rate": 4.9612204845069876e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 682 + }, + { + "epoch": 0.06568255036784151, + "grad_norm": 4.244740068025726, + "learning_rate": 4.9610865327512095e-06, + "loss": 0.2188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 683 + }, + { + "epoch": 0.06577871808433909, + "grad_norm": 2.426962083832988, + "learning_rate": 4.96095235186142e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 684 + }, + { + "epoch": 0.06587488580083666, + "grad_norm": 2.4146020362010234, + "learning_rate": 4.9608179418501125e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 685 + }, + { + "epoch": 0.06597105351733423, + "grad_norm": 3.590895048402382, + "learning_rate": 4.960683302729799e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 686 + }, + { + "epoch": 0.0660672212338318, + "grad_norm": 2.805659699236203, + "learning_rate": 4.960548434513016e-06, + "loss": 0.1661, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 687 + }, + { + "epoch": 0.06616338895032937, + "grad_norm": 3.7659612349693665, + "learning_rate": 4.960413337212321e-06, + "loss": 0.1861, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 688 + }, + { + "epoch": 0.06625955666682695, + "grad_norm": 2.1590919365519414, + "learning_rate": 4.96027801084029e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 689 + }, + { + "epoch": 0.06635572438332452, + "grad_norm": 1.9126701818403073, + "learning_rate": 4.960142455409525e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 690 + }, + { + "epoch": 0.0664518920998221, + "grad_norm": 1.9730939687131455, + "learning_rate": 4.960006670932643e-06, + "loss": 0.1597, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 691 + }, + { + "epoch": 0.06654805981631966, + "grad_norm": 3.558894829499622, + "learning_rate": 4.959870657422289e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 692 + }, + { + "epoch": 0.06664422753281724, + "grad_norm": 2.919357032107174, + "learning_rate": 4.959734414891125e-06, + "loss": 0.2015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 693 + }, + { + "epoch": 0.0667403952493148, + "grad_norm": 2.3145869047815437, + "learning_rate": 4.959597943351835e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 694 + }, + { + "epoch": 0.06683656296581238, + "grad_norm": 4.549813549939127, + "learning_rate": 4.959461242817125e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 695 + }, + { + "epoch": 0.06693273068230995, + "grad_norm": 4.50899516596997, + "learning_rate": 4.959324313299724e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 696 + }, + { + "epoch": 0.06702889839880752, + "grad_norm": 3.6240050926816063, + "learning_rate": 4.959187154812379e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 697 + }, + { + "epoch": 0.0671250661153051, + "grad_norm": 5.532564856105828, + "learning_rate": 4.95904976736786e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 698 + }, + { + "epoch": 0.06722123383180266, + "grad_norm": 2.8538292260609563, + "learning_rate": 4.958912150978957e-06, + "loss": 0.1934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 699 + }, + { + "epoch": 0.06731740154830024, + "grad_norm": 2.0775032152674773, + "learning_rate": 4.958774305658484e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 700 + }, + { + "epoch": 0.0674135692647978, + "grad_norm": 2.1814222132120764, + "learning_rate": 4.958636231419276e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 701 + }, + { + "epoch": 0.06750973698129538, + "grad_norm": 2.6532960676749684, + "learning_rate": 4.9584979282741856e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 702 + }, + { + "epoch": 0.06760590469779296, + "grad_norm": 3.173721604709478, + "learning_rate": 4.95835939623609e-06, + "loss": 0.1806, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 703 + }, + { + "epoch": 0.06770207241429052, + "grad_norm": 2.354494807448274, + "learning_rate": 4.958220635317886e-06, + "loss": 0.1826, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 704 + }, + { + "epoch": 0.0677982401307881, + "grad_norm": 3.40555750205329, + "learning_rate": 4.958081645532495e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 705 + }, + { + "epoch": 0.06789440784728566, + "grad_norm": 4.4314339473839715, + "learning_rate": 4.9579424268928565e-06, + "loss": 0.1812, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 706 + }, + { + "epoch": 0.06799057556378324, + "grad_norm": 2.519712383750418, + "learning_rate": 4.957802979411931e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 707 + }, + { + "epoch": 0.0680867432802808, + "grad_norm": 4.226249592776147, + "learning_rate": 4.957663303102702e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 708 + }, + { + "epoch": 0.06818291099677838, + "grad_norm": 7.613993202224982, + "learning_rate": 4.957523397978174e-06, + "loss": 0.2127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 709 + }, + { + "epoch": 0.06827907871327596, + "grad_norm": 8.590983948851617, + "learning_rate": 4.957383264051372e-06, + "loss": 0.2076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 710 + }, + { + "epoch": 0.06837524642977352, + "grad_norm": 2.3443803928082345, + "learning_rate": 4.957242901335344e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 711 + }, + { + "epoch": 0.0684714141462711, + "grad_norm": 23.89811290138034, + "learning_rate": 4.957102309843157e-06, + "loss": 0.1722, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 712 + }, + { + "epoch": 0.06856758186276866, + "grad_norm": 7.242818250435129, + "learning_rate": 4.9569614895879015e-06, + "loss": 0.1876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 713 + }, + { + "epoch": 0.06866374957926624, + "grad_norm": 6.109473217843938, + "learning_rate": 4.956820440582687e-06, + "loss": 0.1702, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 714 + }, + { + "epoch": 0.0687599172957638, + "grad_norm": 2.7075091118395727, + "learning_rate": 4.956679162840646e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 715 + }, + { + "epoch": 0.06885608501226138, + "grad_norm": 5.106250374702038, + "learning_rate": 4.956537656374933e-06, + "loss": 0.2216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 716 + }, + { + "epoch": 0.06895225272875896, + "grad_norm": 6.994586347964103, + "learning_rate": 4.956395921198721e-06, + "loss": 0.212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 717 + }, + { + "epoch": 0.06904842044525653, + "grad_norm": 6.888266333837762, + "learning_rate": 4.956253957325207e-06, + "loss": 0.205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 718 + }, + { + "epoch": 0.0691445881617541, + "grad_norm": 2.8762394088298935, + "learning_rate": 4.956111764767608e-06, + "loss": 0.1854, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 719 + }, + { + "epoch": 0.06924075587825167, + "grad_norm": 6.339206049357467, + "learning_rate": 4.955969343539162e-06, + "loss": 0.1884, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 720 + }, + { + "epoch": 0.06933692359474924, + "grad_norm": 7.404082097683994, + "learning_rate": 4.9558266936531295e-06, + "loss": 0.1846, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 721 + }, + { + "epoch": 0.06943309131124681, + "grad_norm": 9.533804358335876, + "learning_rate": 4.955683815122792e-06, + "loss": 0.2184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 722 + }, + { + "epoch": 0.06952925902774439, + "grad_norm": 2.726561993511579, + "learning_rate": 4.955540707961451e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 723 + }, + { + "epoch": 0.06962542674424196, + "grad_norm": 5.060197400180757, + "learning_rate": 4.955397372182429e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 724 + }, + { + "epoch": 0.06972159446073953, + "grad_norm": 5.143803599833961, + "learning_rate": 4.955253807799073e-06, + "loss": 0.1876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 725 + }, + { + "epoch": 0.0698177621772371, + "grad_norm": 3.028629572831003, + "learning_rate": 4.955110014824749e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 726 + }, + { + "epoch": 0.06991392989373467, + "grad_norm": 3.88543531264699, + "learning_rate": 4.954965993272844e-06, + "loss": 0.1972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 727 + }, + { + "epoch": 0.07001009761023225, + "grad_norm": 3.61505123836629, + "learning_rate": 4.9548217431567665e-06, + "loss": 0.1725, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 728 + }, + { + "epoch": 0.07010626532672981, + "grad_norm": 3.2116520683151206, + "learning_rate": 4.954677264489948e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 729 + }, + { + "epoch": 0.07020243304322739, + "grad_norm": 2.3414146679902794, + "learning_rate": 4.954532557285838e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 730 + }, + { + "epoch": 0.07029860075972497, + "grad_norm": 1.6305436181329913, + "learning_rate": 4.954387621557911e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 731 + }, + { + "epoch": 0.07039476847622253, + "grad_norm": 2.4095455219785586, + "learning_rate": 4.954242457319659e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 732 + }, + { + "epoch": 0.07049093619272011, + "grad_norm": 3.178559368741192, + "learning_rate": 4.9540970645845985e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 733 + }, + { + "epoch": 0.07058710390921767, + "grad_norm": 2.2294349291529705, + "learning_rate": 4.953951443366266e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 734 + }, + { + "epoch": 0.07068327162571525, + "grad_norm": 1.9778747794768081, + "learning_rate": 4.953805593678218e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 735 + }, + { + "epoch": 0.07077943934221281, + "grad_norm": 2.1334090172656484, + "learning_rate": 4.953659515534035e-06, + "loss": 0.187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 736 + }, + { + "epoch": 0.07087560705871039, + "grad_norm": 4.06556886341672, + "learning_rate": 4.953513208947316e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 737 + }, + { + "epoch": 0.07097177477520797, + "grad_norm": 3.095871431506739, + "learning_rate": 4.953366673931684e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 738 + }, + { + "epoch": 0.07106794249170553, + "grad_norm": 1.9122932361026033, + "learning_rate": 4.95321991050078e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 739 + }, + { + "epoch": 0.07116411020820311, + "grad_norm": 7.507636965325973, + "learning_rate": 4.95307291866827e-06, + "loss": 0.1915, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 740 + }, + { + "epoch": 0.07126027792470067, + "grad_norm": 5.744187860782019, + "learning_rate": 4.952925698447839e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 741 + }, + { + "epoch": 0.07135644564119825, + "grad_norm": 3.373838204760137, + "learning_rate": 4.952778249853192e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 742 + }, + { + "epoch": 0.07145261335769582, + "grad_norm": 4.686742392049189, + "learning_rate": 4.952630572898058e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 743 + }, + { + "epoch": 0.0715487810741934, + "grad_norm": 4.396441424631186, + "learning_rate": 4.952482667596187e-06, + "loss": 0.1694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 744 + }, + { + "epoch": 0.07164494879069097, + "grad_norm": 4.267440592258049, + "learning_rate": 4.9523345339613475e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 745 + }, + { + "epoch": 0.07174111650718853, + "grad_norm": 2.6706612445451343, + "learning_rate": 4.952186172007333e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 746 + }, + { + "epoch": 0.07183728422368611, + "grad_norm": 3.0131696221266724, + "learning_rate": 4.952037581747955e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 747 + }, + { + "epoch": 0.07193345194018368, + "grad_norm": 2.3027931547134206, + "learning_rate": 4.951888763197048e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 748 + }, + { + "epoch": 0.07202961965668125, + "grad_norm": 4.326991591229072, + "learning_rate": 4.951739716368467e-06, + "loss": 0.1721, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 749 + }, + { + "epoch": 0.07212578737317882, + "grad_norm": 3.1343552226913634, + "learning_rate": 4.951590441276091e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 750 + }, + { + "epoch": 0.0722219550896764, + "grad_norm": 2.0082743149183755, + "learning_rate": 4.951440937933814e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 751 + }, + { + "epoch": 0.07231812280617397, + "grad_norm": 3.3383307672407883, + "learning_rate": 4.95129120635556e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 752 + }, + { + "epoch": 0.07241429052267154, + "grad_norm": 3.030292664542476, + "learning_rate": 4.951141246555265e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 753 + }, + { + "epoch": 0.07251045823916911, + "grad_norm": 3.1052849055324985, + "learning_rate": 4.950991058546893e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 754 + }, + { + "epoch": 0.07260662595566668, + "grad_norm": 7.063521757773809, + "learning_rate": 4.950840642344426e-06, + "loss": 0.173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 755 + }, + { + "epoch": 0.07270279367216426, + "grad_norm": 5.174925918129424, + "learning_rate": 4.950689997961869e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 756 + }, + { + "epoch": 0.07279896138866182, + "grad_norm": 3.5541358549240702, + "learning_rate": 4.950539125413248e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 757 + }, + { + "epoch": 0.0728951291051594, + "grad_norm": 4.620571363338731, + "learning_rate": 4.950388024712608e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 758 + }, + { + "epoch": 0.07299129682165698, + "grad_norm": 3.710795848784329, + "learning_rate": 4.950236695874016e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 759 + }, + { + "epoch": 0.07308746453815454, + "grad_norm": 5.240677615893311, + "learning_rate": 4.9500851389115645e-06, + "loss": 0.1916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 760 + }, + { + "epoch": 0.07318363225465212, + "grad_norm": 2.0418619903426967, + "learning_rate": 4.949933353839362e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 761 + }, + { + "epoch": 0.07327979997114968, + "grad_norm": 4.942276932553591, + "learning_rate": 4.94978134067154e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 762 + }, + { + "epoch": 0.07337596768764726, + "grad_norm": 5.698100231820038, + "learning_rate": 4.949629099422251e-06, + "loss": 0.2102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 763 + }, + { + "epoch": 0.07347213540414482, + "grad_norm": 2.194055476128725, + "learning_rate": 4.94947663010567e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 764 + }, + { + "epoch": 0.0735683031206424, + "grad_norm": 2.480033339221814, + "learning_rate": 4.949323932735991e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 765 + }, + { + "epoch": 0.07366447083713998, + "grad_norm": 2.081918975855046, + "learning_rate": 4.949171007327433e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 766 + }, + { + "epoch": 0.07376063855363754, + "grad_norm": 1.9712357238740328, + "learning_rate": 4.949017853894231e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 767 + }, + { + "epoch": 0.07385680627013512, + "grad_norm": 2.1416066731784174, + "learning_rate": 4.948864472450646e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 768 + }, + { + "epoch": 0.07395297398663268, + "grad_norm": 2.4235668368937793, + "learning_rate": 4.948710863010958e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 769 + }, + { + "epoch": 0.07404914170313026, + "grad_norm": 2.5655192670770313, + "learning_rate": 4.948557025589467e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 770 + }, + { + "epoch": 0.07414530941962783, + "grad_norm": 2.751566601701056, + "learning_rate": 4.948402960200497e-06, + "loss": 0.1812, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 771 + }, + { + "epoch": 0.0742414771361254, + "grad_norm": 2.707751652824694, + "learning_rate": 4.948248666858392e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 772 + }, + { + "epoch": 0.07433764485262298, + "grad_norm": 5.974745664734082, + "learning_rate": 4.948094145577516e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 773 + }, + { + "epoch": 0.07443381256912054, + "grad_norm": 4.22030329531975, + "learning_rate": 4.9479393963722564e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 774 + }, + { + "epoch": 0.07452998028561812, + "grad_norm": 3.003023913098642, + "learning_rate": 4.947784419257021e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 775 + }, + { + "epoch": 0.07462614800211569, + "grad_norm": 2.8628737577757017, + "learning_rate": 4.947629214246238e-06, + "loss": 0.1632, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 776 + }, + { + "epoch": 0.07472231571861326, + "grad_norm": 2.0762537677070387, + "learning_rate": 4.947473781354356e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 777 + }, + { + "epoch": 0.07481848343511083, + "grad_norm": 4.139860733904425, + "learning_rate": 4.947318120595849e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 778 + }, + { + "epoch": 0.0749146511516084, + "grad_norm": 5.934438167331017, + "learning_rate": 4.947162231985208e-06, + "loss": 0.1733, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 779 + }, + { + "epoch": 0.07501081886810598, + "grad_norm": 3.7719038519685144, + "learning_rate": 4.947006115536947e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 780 + }, + { + "epoch": 0.07510698658460355, + "grad_norm": 1.7041654445047547, + "learning_rate": 4.946849771265601e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 781 + }, + { + "epoch": 0.07520315430110112, + "grad_norm": 79.71291174519715, + "learning_rate": 4.9466931991857255e-06, + "loss": 0.2, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 782 + }, + { + "epoch": 0.07529932201759869, + "grad_norm": 7.037981571903286, + "learning_rate": 4.9465363993118974e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 783 + }, + { + "epoch": 0.07539548973409627, + "grad_norm": 8.000034231726099, + "learning_rate": 4.946379371658717e-06, + "loss": 0.1875, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 784 + }, + { + "epoch": 0.07549165745059383, + "grad_norm": 2.9729162901853043, + "learning_rate": 4.946222116240802e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 785 + }, + { + "epoch": 0.07558782516709141, + "grad_norm": 5.062473066743448, + "learning_rate": 4.946064633072795e-06, + "loss": 0.1846, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 786 + }, + { + "epoch": 0.07568399288358899, + "grad_norm": 7.802742113150817, + "learning_rate": 4.9459069221693565e-06, + "loss": 0.2076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 787 + }, + { + "epoch": 0.07578016060008655, + "grad_norm": 5.3396615101621165, + "learning_rate": 4.945748983545172e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 788 + }, + { + "epoch": 0.07587632831658413, + "grad_norm": 2.1523202097176872, + "learning_rate": 4.945590817214944e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 789 + }, + { + "epoch": 0.07597249603308169, + "grad_norm": 2.7482438960006355, + "learning_rate": 4.9454324231933985e-06, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 790 + }, + { + "epoch": 0.07606866374957927, + "grad_norm": 2.407318227357452, + "learning_rate": 4.945273801495283e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 791 + }, + { + "epoch": 0.07616483146607683, + "grad_norm": 2.5515639536962635, + "learning_rate": 4.9451149521353655e-06, + "loss": 0.1815, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 792 + }, + { + "epoch": 0.07626099918257441, + "grad_norm": 5.275892690584097, + "learning_rate": 4.944955875128437e-06, + "loss": 0.1802, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 793 + }, + { + "epoch": 0.07635716689907199, + "grad_norm": 3.5547896596391495, + "learning_rate": 4.944796570489304e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 794 + }, + { + "epoch": 0.07645333461556955, + "grad_norm": 1.6005186782148235, + "learning_rate": 4.944637038232801e-06, + "loss": 0.1597, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 795 + }, + { + "epoch": 0.07654950233206713, + "grad_norm": 1.7193257024035025, + "learning_rate": 4.944477278373782e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 796 + }, + { + "epoch": 0.07664567004856469, + "grad_norm": 2.6444014572291605, + "learning_rate": 4.9443172909271174e-06, + "loss": 0.181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 797 + }, + { + "epoch": 0.07674183776506227, + "grad_norm": 2.0341363479785537, + "learning_rate": 4.944157075907705e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 798 + }, + { + "epoch": 0.07683800548155983, + "grad_norm": 3.0849493138752972, + "learning_rate": 4.943996633330463e-06, + "loss": 0.1871, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 799 + }, + { + "epoch": 0.07693417319805741, + "grad_norm": 2.211168446229395, + "learning_rate": 4.943835963210324e-06, + "loss": 0.1775, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 800 + }, + { + "epoch": 0.07703034091455499, + "grad_norm": 3.9209017559809745, + "learning_rate": 4.9436750655622514e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 801 + }, + { + "epoch": 0.07712650863105255, + "grad_norm": 1.7623120778762202, + "learning_rate": 4.943513940401223e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 802 + }, + { + "epoch": 0.07722267634755013, + "grad_norm": 3.6089785075558427, + "learning_rate": 4.943352587742241e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 803 + }, + { + "epoch": 0.0773188440640477, + "grad_norm": 3.303712329943295, + "learning_rate": 4.943191007600327e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 804 + }, + { + "epoch": 0.07741501178054527, + "grad_norm": 3.6696187008023444, + "learning_rate": 4.943029199990524e-06, + "loss": 0.1852, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 805 + }, + { + "epoch": 0.07751117949704284, + "grad_norm": 15.111077371405901, + "learning_rate": 4.942867164927899e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 806 + }, + { + "epoch": 0.07760734721354041, + "grad_norm": 2.0945978201725945, + "learning_rate": 4.942704902427537e-06, + "loss": 0.1668, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 807 + }, + { + "epoch": 0.07770351493003799, + "grad_norm": 2.228464570381408, + "learning_rate": 4.942542412504543e-06, + "loss": 0.186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 808 + }, + { + "epoch": 0.07779968264653556, + "grad_norm": 1.971191828003088, + "learning_rate": 4.9423796951740475e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 809 + }, + { + "epoch": 0.07789585036303313, + "grad_norm": 3.999570220396334, + "learning_rate": 4.9422167504512e-06, + "loss": 0.1817, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 810 + }, + { + "epoch": 0.0779920180795307, + "grad_norm": 2.411305179929605, + "learning_rate": 4.942053578351169e-06, + "loss": 0.1777, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 811 + }, + { + "epoch": 0.07808818579602828, + "grad_norm": 2.1746914491052305, + "learning_rate": 4.94189017888915e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 812 + }, + { + "epoch": 0.07818435351252584, + "grad_norm": 2.8384965746789077, + "learning_rate": 4.9417265520803515e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 813 + }, + { + "epoch": 0.07828052122902342, + "grad_norm": 1.8379551772733191, + "learning_rate": 4.941562697940011e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 814 + }, + { + "epoch": 0.078376688945521, + "grad_norm": 2.795456802851867, + "learning_rate": 4.941398616483382e-06, + "loss": 0.171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 815 + }, + { + "epoch": 0.07847285666201856, + "grad_norm": 4.41234524395592, + "learning_rate": 4.9412343077257415e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 816 + }, + { + "epoch": 0.07856902437851614, + "grad_norm": 5.556900608523529, + "learning_rate": 4.941069771682387e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 817 + }, + { + "epoch": 0.0786651920950137, + "grad_norm": 3.385065294192355, + "learning_rate": 4.940905008368638e-06, + "loss": 0.172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 818 + }, + { + "epoch": 0.07876135981151128, + "grad_norm": 4.769235005855317, + "learning_rate": 4.9407400177998335e-06, + "loss": 0.1946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 819 + }, + { + "epoch": 0.07885752752800884, + "grad_norm": 4.205054335176936, + "learning_rate": 4.9405747999913355e-06, + "loss": 0.1729, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 820 + }, + { + "epoch": 0.07895369524450642, + "grad_norm": 2.9190312997981036, + "learning_rate": 4.9404093549585245e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 821 + }, + { + "epoch": 0.079049862961004, + "grad_norm": 2.1436243688839585, + "learning_rate": 4.9402436827168054e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 822 + }, + { + "epoch": 0.07914603067750156, + "grad_norm": 4.639745359340067, + "learning_rate": 4.940077783281603e-06, + "loss": 0.1612, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 823 + }, + { + "epoch": 0.07924219839399914, + "grad_norm": 4.715972984185072, + "learning_rate": 4.939911656668361e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 824 + }, + { + "epoch": 0.0793383661104967, + "grad_norm": 3.709577322388753, + "learning_rate": 4.939745302892549e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 825 + }, + { + "epoch": 0.07943453382699428, + "grad_norm": 3.0689514922610166, + "learning_rate": 4.939578721969653e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 826 + }, + { + "epoch": 0.07953070154349184, + "grad_norm": 3.8252976332106394, + "learning_rate": 4.939411913915183e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 827 + }, + { + "epoch": 0.07962686925998942, + "grad_norm": 4.926518836951817, + "learning_rate": 4.939244878744669e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 828 + }, + { + "epoch": 0.079723036976487, + "grad_norm": 4.278855985254627, + "learning_rate": 4.939077616473662e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 829 + }, + { + "epoch": 0.07981920469298456, + "grad_norm": 2.241540768144528, + "learning_rate": 4.938910127117735e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 830 + }, + { + "epoch": 0.07991537240948214, + "grad_norm": 3.5375728592849622, + "learning_rate": 4.938742410692482e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 831 + }, + { + "epoch": 0.0800115401259797, + "grad_norm": 2.494052976671417, + "learning_rate": 4.938574467213519e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 832 + }, + { + "epoch": 0.08010770784247728, + "grad_norm": 2.537048928327182, + "learning_rate": 4.938406296696479e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 833 + }, + { + "epoch": 0.08020387555897485, + "grad_norm": 10.399505708663192, + "learning_rate": 4.93823789915702e-06, + "loss": 0.1851, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 834 + }, + { + "epoch": 0.08030004327547242, + "grad_norm": 2.6528933057024044, + "learning_rate": 4.938069274610823e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 835 + }, + { + "epoch": 0.08039621099197, + "grad_norm": 3.646597045991899, + "learning_rate": 4.937900423073585e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 836 + }, + { + "epoch": 0.08049237870846757, + "grad_norm": 2.6358386714927375, + "learning_rate": 4.937731344561027e-06, + "loss": 0.1481, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 837 + }, + { + "epoch": 0.08058854642496514, + "grad_norm": 2.4785378502075863, + "learning_rate": 4.937562039088891e-06, + "loss": 0.1947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 838 + }, + { + "epoch": 0.08068471414146271, + "grad_norm": 2.098625221957694, + "learning_rate": 4.937392506672939e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 839 + }, + { + "epoch": 0.08078088185796028, + "grad_norm": 2.387033897682772, + "learning_rate": 4.937222747328956e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 840 + }, + { + "epoch": 0.08087704957445785, + "grad_norm": 1.8367618562764028, + "learning_rate": 4.9370527610727455e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 841 + }, + { + "epoch": 0.08097321729095543, + "grad_norm": 1.8433972981925029, + "learning_rate": 4.936882547920136e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 842 + }, + { + "epoch": 0.081069385007453, + "grad_norm": 2.905702419938563, + "learning_rate": 4.936712107886973e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 843 + }, + { + "epoch": 0.08116555272395057, + "grad_norm": 3.674545989472059, + "learning_rate": 4.936541440989126e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 844 + }, + { + "epoch": 0.08126172044044815, + "grad_norm": 3.562421235879005, + "learning_rate": 4.9363705472424825e-06, + "loss": 0.169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 845 + }, + { + "epoch": 0.08135788815694571, + "grad_norm": 5.270984382019533, + "learning_rate": 4.936199426662956e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 846 + }, + { + "epoch": 0.08145405587344329, + "grad_norm": 6.50916133291551, + "learning_rate": 4.936028079266477e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 847 + }, + { + "epoch": 0.08155022358994085, + "grad_norm": 3.1718334734089755, + "learning_rate": 4.935856505068999e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 848 + }, + { + "epoch": 0.08164639130643843, + "grad_norm": 2.4521945184565874, + "learning_rate": 4.935684704086495e-06, + "loss": 0.1851, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 849 + }, + { + "epoch": 0.081742559022936, + "grad_norm": 4.77098764301756, + "learning_rate": 4.93551267633496e-06, + "loss": 0.1673, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 850 + }, + { + "epoch": 0.08183872673943357, + "grad_norm": 6.197952989983336, + "learning_rate": 4.9353404218304124e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 851 + }, + { + "epoch": 0.08193489445593115, + "grad_norm": 3.7436223667842725, + "learning_rate": 4.935167940588887e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 852 + }, + { + "epoch": 0.08203106217242871, + "grad_norm": 3.366270779562821, + "learning_rate": 4.934995232626444e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 853 + }, + { + "epoch": 0.08212722988892629, + "grad_norm": 2.0604681518195207, + "learning_rate": 4.934822297959161e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 854 + }, + { + "epoch": 0.08222339760542385, + "grad_norm": 4.763566306669775, + "learning_rate": 4.934649136603141e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 855 + }, + { + "epoch": 0.08231956532192143, + "grad_norm": 1.753231060899194, + "learning_rate": 4.934475748574506e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 856 + }, + { + "epoch": 0.08241573303841901, + "grad_norm": 2.1299409356395316, + "learning_rate": 4.934302133889397e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 857 + }, + { + "epoch": 0.08251190075491657, + "grad_norm": 4.241757036062691, + "learning_rate": 4.934128292563978e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 858 + }, + { + "epoch": 0.08260806847141415, + "grad_norm": 2.3243059027860857, + "learning_rate": 4.933954224614436e-06, + "loss": 0.1723, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 859 + }, + { + "epoch": 0.08270423618791171, + "grad_norm": 2.2296074635099274, + "learning_rate": 4.933779930056975e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 860 + }, + { + "epoch": 0.08280040390440929, + "grad_norm": 110.24656551590526, + "learning_rate": 4.933605408907824e-06, + "loss": 0.1843, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 861 + }, + { + "epoch": 0.08289657162090686, + "grad_norm": 3.40627046027653, + "learning_rate": 4.93343066118323e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 862 + }, + { + "epoch": 0.08299273933740443, + "grad_norm": 2.6023436269406517, + "learning_rate": 4.933255686899465e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 863 + }, + { + "epoch": 0.08308890705390201, + "grad_norm": 4.457108617246372, + "learning_rate": 4.933080486072817e-06, + "loss": 0.1759, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 864 + }, + { + "epoch": 0.08318507477039958, + "grad_norm": 5.053128463040712, + "learning_rate": 4.9329050587195995e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 865 + }, + { + "epoch": 0.08328124248689715, + "grad_norm": 2.9647449858159813, + "learning_rate": 4.932729404856143e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 866 + }, + { + "epoch": 0.08337741020339472, + "grad_norm": 6.403175424007541, + "learning_rate": 4.932553524498803e-06, + "loss": 0.218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 867 + }, + { + "epoch": 0.0834735779198923, + "grad_norm": 7.104456499791435, + "learning_rate": 4.9323774176639545e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 868 + }, + { + "epoch": 0.08356974563638986, + "grad_norm": 5.8622684908793525, + "learning_rate": 4.9322010843679936e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 869 + }, + { + "epoch": 0.08366591335288744, + "grad_norm": 1.6646134612307872, + "learning_rate": 4.932024524627337e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 870 + }, + { + "epoch": 0.08376208106938501, + "grad_norm": 4.1587930424116175, + "learning_rate": 4.931847738458423e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 871 + }, + { + "epoch": 0.08385824878588258, + "grad_norm": 3.1517132456141166, + "learning_rate": 4.93167072587771e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 872 + }, + { + "epoch": 0.08395441650238016, + "grad_norm": 2.866324997440314, + "learning_rate": 4.93149348690168e-06, + "loss": 0.1881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 873 + }, + { + "epoch": 0.08405058421887772, + "grad_norm": 2.9521436649368105, + "learning_rate": 4.9313160215468336e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 874 + }, + { + "epoch": 0.0841467519353753, + "grad_norm": 3.309371841265916, + "learning_rate": 4.931138329829693e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 875 + }, + { + "epoch": 0.08424291965187286, + "grad_norm": 4.591901013684351, + "learning_rate": 4.930960411766803e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 876 + }, + { + "epoch": 0.08433908736837044, + "grad_norm": 2.622624399317859, + "learning_rate": 4.930782267374726e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 877 + }, + { + "epoch": 0.08443525508486802, + "grad_norm": 5.739913601913215, + "learning_rate": 4.930603896670051e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 878 + }, + { + "epoch": 0.08453142280136558, + "grad_norm": 4.732084763357226, + "learning_rate": 4.930425299669381e-06, + "loss": 0.1789, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 879 + }, + { + "epoch": 0.08462759051786316, + "grad_norm": 2.1994491977801154, + "learning_rate": 4.9302464763893474e-06, + "loss": 0.1702, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 880 + }, + { + "epoch": 0.08472375823436072, + "grad_norm": 3.5463033824790795, + "learning_rate": 4.930067426846597e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 881 + }, + { + "epoch": 0.0848199259508583, + "grad_norm": 2.1136435613885363, + "learning_rate": 4.9298881510578014e-06, + "loss": 0.1589, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 882 + }, + { + "epoch": 0.08491609366735586, + "grad_norm": 1.7968248041546335, + "learning_rate": 4.9297086490396495e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 883 + }, + { + "epoch": 0.08501226138385344, + "grad_norm": 5.076990842114593, + "learning_rate": 4.9295289208088545e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 884 + }, + { + "epoch": 0.08510842910035102, + "grad_norm": 3.715599324955693, + "learning_rate": 4.92934896638215e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 885 + }, + { + "epoch": 0.08520459681684858, + "grad_norm": 2.226290665605598, + "learning_rate": 4.92916878577629e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 886 + }, + { + "epoch": 0.08530076453334616, + "grad_norm": 3.952996600888018, + "learning_rate": 4.92898837900805e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 887 + }, + { + "epoch": 0.08539693224984372, + "grad_norm": 5.837810835068602, + "learning_rate": 4.9288077460942266e-06, + "loss": 0.1771, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 888 + }, + { + "epoch": 0.0854930999663413, + "grad_norm": 3.8982160111911264, + "learning_rate": 4.928626887051636e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 889 + }, + { + "epoch": 0.08558926768283887, + "grad_norm": 2.5506047371655014, + "learning_rate": 4.9284458018971186e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 890 + }, + { + "epoch": 0.08568543539933644, + "grad_norm": 7.643937845009518, + "learning_rate": 4.928264490647532e-06, + "loss": 0.1804, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 891 + }, + { + "epoch": 0.08578160311583402, + "grad_norm": 5.0844017561132855, + "learning_rate": 4.928082953319757e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 892 + }, + { + "epoch": 0.08587777083233158, + "grad_norm": 3.1281137214219017, + "learning_rate": 4.927901189930698e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 893 + }, + { + "epoch": 0.08597393854882916, + "grad_norm": 4.078573418710696, + "learning_rate": 4.927719200497273e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 894 + }, + { + "epoch": 0.08607010626532673, + "grad_norm": 6.399622341548819, + "learning_rate": 4.927536985036429e-06, + "loss": 0.2035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 895 + }, + { + "epoch": 0.0861662739818243, + "grad_norm": 3.4915435050439436, + "learning_rate": 4.927354543565131e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 896 + }, + { + "epoch": 0.08626244169832187, + "grad_norm": 3.0253805158811304, + "learning_rate": 4.9271718761003625e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 897 + }, + { + "epoch": 0.08635860941481945, + "grad_norm": 2.1245930805349107, + "learning_rate": 4.926988982659132e-06, + "loss": 0.1999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 898 + }, + { + "epoch": 0.08645477713131702, + "grad_norm": 2.21557073216674, + "learning_rate": 4.926805863258468e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 899 + }, + { + "epoch": 0.08655094484781459, + "grad_norm": 1.824820218953632, + "learning_rate": 4.926622517915417e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 900 + }, + { + "epoch": 0.08664711256431216, + "grad_norm": 4.769117897338774, + "learning_rate": 4.926438946647052e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 901 + }, + { + "epoch": 0.08674328028080973, + "grad_norm": 1.7175508652503042, + "learning_rate": 4.926255149470461e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 902 + }, + { + "epoch": 0.0868394479973073, + "grad_norm": 4.379810399374017, + "learning_rate": 4.926071126402758e-06, + "loss": 0.1788, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 903 + }, + { + "epoch": 0.08693561571380487, + "grad_norm": 6.857865931048408, + "learning_rate": 4.925886877461076e-06, + "loss": 0.1897, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 904 + }, + { + "epoch": 0.08703178343030245, + "grad_norm": 4.329271287112135, + "learning_rate": 4.925702402662568e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 905 + }, + { + "epoch": 0.08712795114680003, + "grad_norm": 2.24867871439537, + "learning_rate": 4.92551770202441e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 906 + }, + { + "epoch": 0.08722411886329759, + "grad_norm": 5.306061242439935, + "learning_rate": 4.925332775563798e-06, + "loss": 0.1794, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 907 + }, + { + "epoch": 0.08732028657979517, + "grad_norm": 3.891639700403504, + "learning_rate": 4.925147623297949e-06, + "loss": 0.1944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 908 + }, + { + "epoch": 0.08741645429629273, + "grad_norm": 1.8608591767657272, + "learning_rate": 4.924962245244101e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 909 + }, + { + "epoch": 0.08751262201279031, + "grad_norm": 3.488310111974946, + "learning_rate": 4.924776641419513e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 910 + }, + { + "epoch": 0.08760878972928787, + "grad_norm": 5.941100443419898, + "learning_rate": 4.924590811841466e-06, + "loss": 0.2054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 911 + }, + { + "epoch": 0.08770495744578545, + "grad_norm": 5.185776499330717, + "learning_rate": 4.924404756527262e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 912 + }, + { + "epoch": 0.08780112516228303, + "grad_norm": 1.6836806747019533, + "learning_rate": 4.924218475494221e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 913 + }, + { + "epoch": 0.08789729287878059, + "grad_norm": 4.88990869875647, + "learning_rate": 4.924031968759688e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 914 + }, + { + "epoch": 0.08799346059527817, + "grad_norm": 5.538293126314864, + "learning_rate": 4.923845236341027e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 915 + }, + { + "epoch": 0.08808962831177573, + "grad_norm": 1.9174757774340947, + "learning_rate": 4.923658278255622e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 916 + }, + { + "epoch": 0.08818579602827331, + "grad_norm": 4.6571506416404365, + "learning_rate": 4.923471094520882e-06, + "loss": 0.1853, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 35.81, + "memory/max_mem_allocated(gib)": 35.81, + "step": 917 + }, + { + "epoch": 0.08828196374477087, + "grad_norm": 4.859844630246711, + "learning_rate": 4.923283685154231e-06, + "loss": 0.1695, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 918 + }, + { + "epoch": 0.08837813146126845, + "grad_norm": 4.825577509044426, + "learning_rate": 4.923096050173121e-06, + "loss": 0.174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 919 + }, + { + "epoch": 0.08847429917776603, + "grad_norm": 2.0145208338173024, + "learning_rate": 4.9229081895950185e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 920 + }, + { + "epoch": 0.0885704668942636, + "grad_norm": 6.4351899454695705, + "learning_rate": 4.922720103437414e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 921 + }, + { + "epoch": 0.08866663461076117, + "grad_norm": 5.882850191388657, + "learning_rate": 4.92253179171782e-06, + "loss": 0.1873, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 922 + }, + { + "epoch": 0.08876280232725874, + "grad_norm": 3.2271756903530555, + "learning_rate": 4.922343254453769e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 923 + }, + { + "epoch": 0.08885897004375631, + "grad_norm": 4.421809248324263, + "learning_rate": 4.922154491662813e-06, + "loss": 0.1743, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 924 + }, + { + "epoch": 0.08895513776025388, + "grad_norm": 5.010369432123867, + "learning_rate": 4.921965503362528e-06, + "loss": 0.1847, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 925 + }, + { + "epoch": 0.08905130547675146, + "grad_norm": 3.1662315818707834, + "learning_rate": 4.921776289570508e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 926 + }, + { + "epoch": 0.08914747319324903, + "grad_norm": 3.2132309346425147, + "learning_rate": 4.921586850304369e-06, + "loss": 0.1685, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 927 + }, + { + "epoch": 0.0892436409097466, + "grad_norm": 1.8825519174543817, + "learning_rate": 4.92139718558175e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 928 + }, + { + "epoch": 0.08933980862624417, + "grad_norm": 4.767332497348241, + "learning_rate": 4.921207295420309e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 929 + }, + { + "epoch": 0.08943597634274174, + "grad_norm": 4.006485095168844, + "learning_rate": 4.921017179837724e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 930 + }, + { + "epoch": 0.08953214405923932, + "grad_norm": 1.913166025312742, + "learning_rate": 4.920826838851696e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 931 + }, + { + "epoch": 0.08962831177573688, + "grad_norm": 3.326200790418381, + "learning_rate": 4.920636272479946e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 932 + }, + { + "epoch": 0.08972447949223446, + "grad_norm": 40.80196921200266, + "learning_rate": 4.920445480740217e-06, + "loss": 0.1798, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 933 + }, + { + "epoch": 0.08982064720873204, + "grad_norm": 3.467721400619604, + "learning_rate": 4.920254463650272e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 934 + }, + { + "epoch": 0.0899168149252296, + "grad_norm": 2.30444228069494, + "learning_rate": 4.920063221227895e-06, + "loss": 0.1745, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 935 + }, + { + "epoch": 0.09001298264172718, + "grad_norm": 4.826863385477717, + "learning_rate": 4.919871753490892e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 936 + }, + { + "epoch": 0.09010915035822474, + "grad_norm": 3.2245194937820143, + "learning_rate": 4.919680060457087e-06, + "loss": 0.2005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 937 + }, + { + "epoch": 0.09020531807472232, + "grad_norm": 1.8449619003158328, + "learning_rate": 4.919488142144329e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 938 + }, + { + "epoch": 0.09030148579121988, + "grad_norm": 4.4301263607313714, + "learning_rate": 4.919295998570485e-06, + "loss": 0.178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 939 + }, + { + "epoch": 0.09039765350771746, + "grad_norm": 4.118484154938419, + "learning_rate": 4.9191036297534455e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 940 + }, + { + "epoch": 0.09049382122421504, + "grad_norm": 2.24639944009791, + "learning_rate": 4.9189110357111205e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 941 + }, + { + "epoch": 0.0905899889407126, + "grad_norm": 2.037774289845885, + "learning_rate": 4.918718216461439e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 942 + }, + { + "epoch": 0.09068615665721018, + "grad_norm": 2.3358881447934667, + "learning_rate": 4.9185251720223556e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 943 + }, + { + "epoch": 0.09078232437370774, + "grad_norm": 4.079115616202101, + "learning_rate": 4.918331902411842e-06, + "loss": 0.1848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 944 + }, + { + "epoch": 0.09087849209020532, + "grad_norm": 1.9972082147741848, + "learning_rate": 4.918138407647892e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 945 + }, + { + "epoch": 0.09097465980670288, + "grad_norm": 5.153996741063057, + "learning_rate": 4.9179446877485204e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 946 + }, + { + "epoch": 0.09107082752320046, + "grad_norm": 2.0180625603867353, + "learning_rate": 4.917750742731764e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 947 + }, + { + "epoch": 0.09116699523969804, + "grad_norm": 2.78670751726553, + "learning_rate": 4.917556572615678e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 948 + }, + { + "epoch": 0.0912631629561956, + "grad_norm": 2.5993487337503742, + "learning_rate": 4.917362177418342e-06, + "loss": 0.166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 949 + }, + { + "epoch": 0.09135933067269318, + "grad_norm": 4.611472158549749, + "learning_rate": 4.917167557157854e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 950 + }, + { + "epoch": 0.09145549838919075, + "grad_norm": 2.186804117705656, + "learning_rate": 4.916972711852334e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 951 + }, + { + "epoch": 0.09155166610568832, + "grad_norm": 4.534719899734914, + "learning_rate": 4.916777641519921e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 952 + }, + { + "epoch": 0.09164783382218589, + "grad_norm": 5.353715961191147, + "learning_rate": 4.916582346178779e-06, + "loss": 0.178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 953 + }, + { + "epoch": 0.09174400153868346, + "grad_norm": 2.3172585447634195, + "learning_rate": 4.916386825847089e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 954 + }, + { + "epoch": 0.09184016925518104, + "grad_norm": 6.4530151178338135, + "learning_rate": 4.916191080543054e-06, + "loss": 0.1691, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 955 + }, + { + "epoch": 0.0919363369716786, + "grad_norm": 6.508434927032894, + "learning_rate": 4.915995110284901e-06, + "loss": 0.2186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 956 + }, + { + "epoch": 0.09203250468817618, + "grad_norm": 2.7173598440271935, + "learning_rate": 4.915798915090872e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 957 + }, + { + "epoch": 0.09212867240467375, + "grad_norm": 2.309649336583284, + "learning_rate": 4.915602494979236e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 958 + }, + { + "epoch": 0.09222484012117133, + "grad_norm": 4.810272570642766, + "learning_rate": 4.91540584996828e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 959 + }, + { + "epoch": 0.09232100783766889, + "grad_norm": 5.043948751830977, + "learning_rate": 4.91520898007631e-06, + "loss": 0.1802, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 960 + }, + { + "epoch": 0.09241717555416647, + "grad_norm": 2.3608919978700706, + "learning_rate": 4.9150118853216576e-06, + "loss": 0.1673, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 961 + }, + { + "epoch": 0.09251334327066404, + "grad_norm": 5.5749229774600755, + "learning_rate": 4.914814565722671e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 962 + }, + { + "epoch": 0.09260951098716161, + "grad_norm": 4.405406174211628, + "learning_rate": 4.914617021297723e-06, + "loss": 0.1849, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 963 + }, + { + "epoch": 0.09270567870365919, + "grad_norm": 2.8478886725526826, + "learning_rate": 4.914419252065204e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 964 + }, + { + "epoch": 0.09280184642015675, + "grad_norm": 4.911590724051824, + "learning_rate": 4.9142212580435275e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 965 + }, + { + "epoch": 0.09289801413665433, + "grad_norm": 1.7374794651162497, + "learning_rate": 4.914023039251128e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 966 + }, + { + "epoch": 0.09299418185315189, + "grad_norm": 2.6337203115601144, + "learning_rate": 4.9138245957064575e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 967 + }, + { + "epoch": 0.09309034956964947, + "grad_norm": 2.2642677586396247, + "learning_rate": 4.913625927427996e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 968 + }, + { + "epoch": 0.09318651728614705, + "grad_norm": 2.4601340928235267, + "learning_rate": 4.913427034434236e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 969 + }, + { + "epoch": 0.09328268500264461, + "grad_norm": 1.6747290009009026, + "learning_rate": 4.913227916743698e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 970 + }, + { + "epoch": 0.09337885271914219, + "grad_norm": 2.9988489857281504, + "learning_rate": 4.913028574374919e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 971 + }, + { + "epoch": 0.09347502043563975, + "grad_norm": 3.0581279110205024, + "learning_rate": 4.912829007346457e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 972 + }, + { + "epoch": 0.09357118815213733, + "grad_norm": 4.16066856468888, + "learning_rate": 4.912629215676895e-06, + "loss": 0.1645, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 973 + }, + { + "epoch": 0.0936673558686349, + "grad_norm": 3.1797436364415703, + "learning_rate": 4.912429199384833e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 974 + }, + { + "epoch": 0.09376352358513247, + "grad_norm": 3.207364027112306, + "learning_rate": 4.9122289584888926e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 975 + }, + { + "epoch": 0.09385969130163005, + "grad_norm": 2.565614005206595, + "learning_rate": 4.912028493007717e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 976 + }, + { + "epoch": 0.09395585901812761, + "grad_norm": 3.0864873415064844, + "learning_rate": 4.9118278029599695e-06, + "loss": 0.1839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 977 + }, + { + "epoch": 0.09405202673462519, + "grad_norm": 2.747181915271181, + "learning_rate": 4.911626888364337e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 978 + }, + { + "epoch": 0.09414819445112275, + "grad_norm": 1.5965513415763337, + "learning_rate": 4.9114257492395225e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 979 + }, + { + "epoch": 0.09424436216762033, + "grad_norm": 1.9997676728278713, + "learning_rate": 4.911224385604255e-06, + "loss": 0.1518, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 980 + }, + { + "epoch": 0.0943405298841179, + "grad_norm": 2.119127950245357, + "learning_rate": 4.911022797477281e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 981 + }, + { + "epoch": 0.09443669760061547, + "grad_norm": 2.279779734794393, + "learning_rate": 4.910820984877368e-06, + "loss": 0.1802, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 982 + }, + { + "epoch": 0.09453286531711305, + "grad_norm": 3.5505921898699806, + "learning_rate": 4.910618947823306e-06, + "loss": 0.1623, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 983 + }, + { + "epoch": 0.09462903303361062, + "grad_norm": 1.9393175906546636, + "learning_rate": 4.9104166863339065e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 984 + }, + { + "epoch": 0.0947252007501082, + "grad_norm": 1.7826719063337946, + "learning_rate": 4.910214200427999e-06, + "loss": 0.153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 985 + }, + { + "epoch": 0.09482136846660576, + "grad_norm": 4.664436797195586, + "learning_rate": 4.9100114901244355e-06, + "loss": 0.1874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 986 + }, + { + "epoch": 0.09491753618310333, + "grad_norm": 1.7926916597767868, + "learning_rate": 4.909808555442091e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 987 + }, + { + "epoch": 0.0950137038996009, + "grad_norm": 2.37616300385824, + "learning_rate": 4.9096053963998555e-06, + "loss": 0.2127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 988 + }, + { + "epoch": 0.09510987161609848, + "grad_norm": 5.3561356524280415, + "learning_rate": 4.909402013016647e-06, + "loss": 0.1715, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 989 + }, + { + "epoch": 0.09520603933259605, + "grad_norm": 5.537297597403147, + "learning_rate": 4.909198405311399e-06, + "loss": 0.2009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 990 + }, + { + "epoch": 0.09530220704909362, + "grad_norm": 3.4687278023350006, + "learning_rate": 4.9089945733030705e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 991 + }, + { + "epoch": 0.0953983747655912, + "grad_norm": 2.002631459099681, + "learning_rate": 4.908790517010637e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 992 + }, + { + "epoch": 0.09549454248208876, + "grad_norm": 5.187000893933616, + "learning_rate": 4.908586236453095e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 993 + }, + { + "epoch": 0.09559071019858634, + "grad_norm": 4.80394731609059, + "learning_rate": 4.908381731649467e-06, + "loss": 0.1779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 994 + }, + { + "epoch": 0.0956868779150839, + "grad_norm": 2.173042715228753, + "learning_rate": 4.9081770026187915e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 995 + }, + { + "epoch": 0.09578304563158148, + "grad_norm": 2.828661401038235, + "learning_rate": 4.907972049380129e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 996 + }, + { + "epoch": 0.09587921334807906, + "grad_norm": 8.120453783428545, + "learning_rate": 4.907766871952561e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 997 + }, + { + "epoch": 0.09597538106457662, + "grad_norm": 3.7683191677377756, + "learning_rate": 4.907561470355191e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 998 + }, + { + "epoch": 0.0960715487810742, + "grad_norm": 2.8948430450944334, + "learning_rate": 4.907355844607142e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 999 + }, + { + "epoch": 0.09616771649757176, + "grad_norm": 2.96354385801669, + "learning_rate": 4.907149994727559e-06, + "loss": 0.195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1000 + }, + { + "epoch": 0.09626388421406934, + "grad_norm": 3.301204546155342, + "learning_rate": 4.906943920735605e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1001 + }, + { + "epoch": 0.0963600519305669, + "grad_norm": 2.352015901593486, + "learning_rate": 4.906737622650469e-06, + "loss": 0.177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1002 + }, + { + "epoch": 0.09645621964706448, + "grad_norm": 3.7158896582491976, + "learning_rate": 4.906531100491356e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1003 + }, + { + "epoch": 0.09655238736356206, + "grad_norm": 5.746104368559695, + "learning_rate": 4.906324354277496e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1004 + }, + { + "epoch": 0.09664855508005962, + "grad_norm": 4.9953807916946875, + "learning_rate": 4.906117384028134e-06, + "loss": 0.1746, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1005 + }, + { + "epoch": 0.0967447227965572, + "grad_norm": 2.5666161686925357, + "learning_rate": 4.905910189762543e-06, + "loss": 0.1679, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1006 + }, + { + "epoch": 0.09684089051305476, + "grad_norm": 4.803453647726853, + "learning_rate": 4.905702771500011e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1007 + }, + { + "epoch": 0.09693705822955234, + "grad_norm": 3.3985584428887785, + "learning_rate": 4.90549512925985e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1008 + }, + { + "epoch": 0.0970332259460499, + "grad_norm": 2.054980185086461, + "learning_rate": 4.905287263061392e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1009 + }, + { + "epoch": 0.09712939366254748, + "grad_norm": 2.031925141504631, + "learning_rate": 4.90507917292399e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1010 + }, + { + "epoch": 0.09722556137904506, + "grad_norm": 6.548882335819579, + "learning_rate": 4.904870858867019e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1011 + }, + { + "epoch": 0.09732172909554263, + "grad_norm": 7.741186178364465, + "learning_rate": 4.904662320909871e-06, + "loss": 0.1742, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1012 + }, + { + "epoch": 0.0974178968120402, + "grad_norm": 4.871911482610331, + "learning_rate": 4.904453559071964e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1013 + }, + { + "epoch": 0.09751406452853777, + "grad_norm": 2.9256475910257183, + "learning_rate": 4.904244573372733e-06, + "loss": 0.2091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1014 + }, + { + "epoch": 0.09761023224503534, + "grad_norm": 3.8760523632066146, + "learning_rate": 4.904035363831635e-06, + "loss": 0.1868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1015 + }, + { + "epoch": 0.09770639996153291, + "grad_norm": 5.0976022849836955, + "learning_rate": 4.903825930468149e-06, + "loss": 0.1985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1016 + }, + { + "epoch": 0.09780256767803049, + "grad_norm": 6.016668874566554, + "learning_rate": 4.903616273301773e-06, + "loss": 0.1886, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1017 + }, + { + "epoch": 0.09789873539452806, + "grad_norm": 4.11222682600294, + "learning_rate": 4.903406392352027e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1018 + }, + { + "epoch": 0.09799490311102563, + "grad_norm": 1.9028753864158015, + "learning_rate": 4.903196287638451e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1019 + }, + { + "epoch": 0.0980910708275232, + "grad_norm": 3.7356325799470786, + "learning_rate": 4.902985959180608e-06, + "loss": 0.1589, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1020 + }, + { + "epoch": 0.09818723854402077, + "grad_norm": 3.872675183204256, + "learning_rate": 4.902775406998077e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1021 + }, + { + "epoch": 0.09828340626051835, + "grad_norm": 1.859892392822462, + "learning_rate": 4.902564631110464e-06, + "loss": 0.1901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1022 + }, + { + "epoch": 0.09837957397701591, + "grad_norm": 4.792382602593547, + "learning_rate": 4.9023536315373915e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1023 + }, + { + "epoch": 0.09847574169351349, + "grad_norm": 6.831226592987366, + "learning_rate": 4.902142408298504e-06, + "loss": 0.1921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1024 + }, + { + "epoch": 0.09857190941001107, + "grad_norm": 4.814642306647022, + "learning_rate": 4.901930961413468e-06, + "loss": 0.1845, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1025 + }, + { + "epoch": 0.09866807712650863, + "grad_norm": 2.0806050260402746, + "learning_rate": 4.901719290901969e-06, + "loss": 0.1648, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1026 + }, + { + "epoch": 0.09876424484300621, + "grad_norm": 2.505413741876185, + "learning_rate": 4.901507396783714e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1027 + }, + { + "epoch": 0.09886041255950377, + "grad_norm": 5.612510811935785, + "learning_rate": 4.901295279078431e-06, + "loss": 0.1835, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1028 + }, + { + "epoch": 0.09895658027600135, + "grad_norm": 6.522675097823008, + "learning_rate": 4.9010829378058695e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1029 + }, + { + "epoch": 0.09905274799249891, + "grad_norm": 2.2766554703156436, + "learning_rate": 4.900870372985798e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1030 + }, + { + "epoch": 0.09914891570899649, + "grad_norm": 3.2913431642760185, + "learning_rate": 4.900657584638008e-06, + "loss": 0.1672, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1031 + }, + { + "epoch": 0.09924508342549407, + "grad_norm": 4.020607434441996, + "learning_rate": 4.9004445727823095e-06, + "loss": 0.1838, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1032 + }, + { + "epoch": 0.09934125114199163, + "grad_norm": 5.653545522854231, + "learning_rate": 4.900231337438535e-06, + "loss": 0.1979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1033 + }, + { + "epoch": 0.09943741885848921, + "grad_norm": 5.650324300652377, + "learning_rate": 4.900017878626537e-06, + "loss": 0.1713, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1034 + }, + { + "epoch": 0.09953358657498677, + "grad_norm": 3.088006113717284, + "learning_rate": 4.899804196366191e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1035 + }, + { + "epoch": 0.09962975429148435, + "grad_norm": 1.935010922588275, + "learning_rate": 4.899590290677387e-06, + "loss": 0.1677, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1036 + }, + { + "epoch": 0.09972592200798192, + "grad_norm": 4.9573730639729465, + "learning_rate": 4.899376161580046e-06, + "loss": 0.186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1037 + }, + { + "epoch": 0.09982208972447949, + "grad_norm": 4.239602535022115, + "learning_rate": 4.8991618090941e-06, + "loss": 0.1763, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1038 + }, + { + "epoch": 0.09991825744097707, + "grad_norm": 3.3061824371368327, + "learning_rate": 4.8989472332395065e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1039 + }, + { + "epoch": 0.10001442515747463, + "grad_norm": 3.998907158705342, + "learning_rate": 4.8987324340362445e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1040 + }, + { + "epoch": 0.10011059287397221, + "grad_norm": 6.089080003187553, + "learning_rate": 4.89851741150431e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1041 + }, + { + "epoch": 0.10020676059046978, + "grad_norm": 4.172749294686642, + "learning_rate": 4.898302165663725e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1042 + }, + { + "epoch": 0.10030292830696735, + "grad_norm": 1.6399193650583972, + "learning_rate": 4.8980866965345275e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1043 + }, + { + "epoch": 0.10039909602346492, + "grad_norm": 2.257778829901676, + "learning_rate": 4.8978710041367785e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1044 + }, + { + "epoch": 0.1004952637399625, + "grad_norm": 4.930514578449121, + "learning_rate": 4.89765508849056e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1045 + }, + { + "epoch": 0.10059143145646007, + "grad_norm": 3.92073929283918, + "learning_rate": 4.8974389496159755e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1046 + }, + { + "epoch": 0.10068759917295764, + "grad_norm": 2.361008183232774, + "learning_rate": 4.897222587533146e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1047 + }, + { + "epoch": 0.10078376688945521, + "grad_norm": 4.030839429873037, + "learning_rate": 4.897006002262217e-06, + "loss": 0.1859, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1048 + }, + { + "epoch": 0.10087993460595278, + "grad_norm": 4.650524733937511, + "learning_rate": 4.896789193823352e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1049 + }, + { + "epoch": 0.10097610232245036, + "grad_norm": 4.094075085927085, + "learning_rate": 4.896572162236737e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1050 + }, + { + "epoch": 0.10107227003894792, + "grad_norm": 2.0475054261499874, + "learning_rate": 4.896354907522578e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1051 + }, + { + "epoch": 0.1011684377554455, + "grad_norm": 6.929145896381204, + "learning_rate": 4.8961374297011025e-06, + "loss": 0.211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1052 + }, + { + "epoch": 0.10126460547194308, + "grad_norm": 3.842255323360258, + "learning_rate": 4.895919728792559e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1053 + }, + { + "epoch": 0.10136077318844064, + "grad_norm": 2.033543617599435, + "learning_rate": 4.895701804817214e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1054 + }, + { + "epoch": 0.10145694090493822, + "grad_norm": 2.1254713511491383, + "learning_rate": 4.895483657795358e-06, + "loss": 0.1817, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1055 + }, + { + "epoch": 0.10155310862143578, + "grad_norm": 2.4080440293470775, + "learning_rate": 4.895265287747302e-06, + "loss": 0.1644, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1056 + }, + { + "epoch": 0.10164927633793336, + "grad_norm": 2.1455006085071835, + "learning_rate": 4.895046694693374e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1057 + }, + { + "epoch": 0.10174544405443092, + "grad_norm": 2.0431905812438016, + "learning_rate": 4.89482787865393e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1058 + }, + { + "epoch": 0.1018416117709285, + "grad_norm": 3.0045252961489046, + "learning_rate": 4.894608839649338e-06, + "loss": 0.184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1059 + }, + { + "epoch": 0.10193777948742608, + "grad_norm": 3.8232319008691595, + "learning_rate": 4.894389577699994e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1060 + }, + { + "epoch": 0.10203394720392364, + "grad_norm": 1.7772373576708826, + "learning_rate": 4.89417009282631e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1061 + }, + { + "epoch": 0.10213011492042122, + "grad_norm": 2.0169750171181744, + "learning_rate": 4.893950385048722e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1062 + }, + { + "epoch": 0.10222628263691878, + "grad_norm": 2.2917351480955825, + "learning_rate": 4.893730454387686e-06, + "loss": 0.1736, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1063 + }, + { + "epoch": 0.10232245035341636, + "grad_norm": 1.8410492714471287, + "learning_rate": 4.893510300863677e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1064 + }, + { + "epoch": 0.10241861806991392, + "grad_norm": 2.262822435208041, + "learning_rate": 4.893289924497191e-06, + "loss": 0.1641, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1065 + }, + { + "epoch": 0.1025147857864115, + "grad_norm": 2.4804909660856866, + "learning_rate": 4.893069325308747e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1066 + }, + { + "epoch": 0.10261095350290908, + "grad_norm": 2.1492089766224205, + "learning_rate": 4.892848503318884e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1067 + }, + { + "epoch": 0.10270712121940664, + "grad_norm": 2.256207383058035, + "learning_rate": 4.892627458548159e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1068 + }, + { + "epoch": 0.10280328893590422, + "grad_norm": 2.604220991041776, + "learning_rate": 4.892406191017154e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1069 + }, + { + "epoch": 0.10289945665240179, + "grad_norm": 2.46132355464141, + "learning_rate": 4.8921847007464685e-06, + "loss": 0.1808, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1070 + }, + { + "epoch": 0.10299562436889936, + "grad_norm": 1.8884043406659632, + "learning_rate": 4.8919629877567244e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1071 + }, + { + "epoch": 0.10309179208539693, + "grad_norm": 2.091129281447393, + "learning_rate": 4.8917410520685635e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1072 + }, + { + "epoch": 0.1031879598018945, + "grad_norm": 1.974400532062497, + "learning_rate": 4.891518893702648e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1073 + }, + { + "epoch": 0.10328412751839208, + "grad_norm": 2.241905426199005, + "learning_rate": 4.891296512679663e-06, + "loss": 0.1642, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1074 + }, + { + "epoch": 0.10338029523488965, + "grad_norm": 3.1732385969097314, + "learning_rate": 4.891073909020312e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1075 + }, + { + "epoch": 0.10347646295138722, + "grad_norm": 1.6833847855377686, + "learning_rate": 4.890851082745319e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1076 + }, + { + "epoch": 0.10357263066788479, + "grad_norm": 4.2333210440142235, + "learning_rate": 4.890628033875431e-06, + "loss": 0.1963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1077 + }, + { + "epoch": 0.10366879838438237, + "grad_norm": 3.011824410795779, + "learning_rate": 4.890404762431415e-06, + "loss": 0.1662, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1078 + }, + { + "epoch": 0.10376496610087993, + "grad_norm": 3.970674046960383, + "learning_rate": 4.8901812684340565e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1079 + }, + { + "epoch": 0.10386113381737751, + "grad_norm": 2.4909486966448418, + "learning_rate": 4.889957551904164e-06, + "loss": 0.1856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1080 + }, + { + "epoch": 0.10395730153387508, + "grad_norm": 7.901674277970363, + "learning_rate": 4.889733612862567e-06, + "loss": 0.1913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1081 + }, + { + "epoch": 0.10405346925037265, + "grad_norm": 10.963075887121333, + "learning_rate": 4.889509451330114e-06, + "loss": 0.224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1082 + }, + { + "epoch": 0.10414963696687023, + "grad_norm": 7.634374067811304, + "learning_rate": 4.889285067327676e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1083 + }, + { + "epoch": 0.10424580468336779, + "grad_norm": 2.425593288162242, + "learning_rate": 4.8890604608761426e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1084 + }, + { + "epoch": 0.10434197239986537, + "grad_norm": 5.199087904062965, + "learning_rate": 4.888835631996426e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1085 + }, + { + "epoch": 0.10443814011636293, + "grad_norm": 7.553810970662918, + "learning_rate": 4.888610580709458e-06, + "loss": 0.1795, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1086 + }, + { + "epoch": 0.10453430783286051, + "grad_norm": 6.616908453411606, + "learning_rate": 4.888385307036191e-06, + "loss": 0.2069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1087 + }, + { + "epoch": 0.10463047554935809, + "grad_norm": 5.253343682372634, + "learning_rate": 4.8881598109976e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1088 + }, + { + "epoch": 0.10472664326585565, + "grad_norm": 3.0075143439915553, + "learning_rate": 4.887934092614679e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1089 + }, + { + "epoch": 0.10482281098235323, + "grad_norm": 4.631782608395664, + "learning_rate": 4.887708151908442e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1090 + }, + { + "epoch": 0.10491897869885079, + "grad_norm": 4.813992863354463, + "learning_rate": 4.887481988899925e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1091 + }, + { + "epoch": 0.10501514641534837, + "grad_norm": 1.8778200556021125, + "learning_rate": 4.8872556036101845e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1092 + }, + { + "epoch": 0.10511131413184593, + "grad_norm": 1.8971910336599707, + "learning_rate": 4.887028996060299e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1093 + }, + { + "epoch": 0.10520748184834351, + "grad_norm": 3.902639989912697, + "learning_rate": 4.886802166271365e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1094 + }, + { + "epoch": 0.10530364956484109, + "grad_norm": 3.5587302001412486, + "learning_rate": 4.8865751142645005e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1095 + }, + { + "epoch": 0.10539981728133865, + "grad_norm": 2.472167327979283, + "learning_rate": 4.886347840060845e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1096 + }, + { + "epoch": 0.10549598499783623, + "grad_norm": 2.969537853566365, + "learning_rate": 4.886120343681559e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1097 + }, + { + "epoch": 0.1055921527143338, + "grad_norm": 4.053659191320208, + "learning_rate": 4.885892625147822e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1098 + }, + { + "epoch": 0.10568832043083137, + "grad_norm": 5.302991196127897, + "learning_rate": 4.885664684480837e-06, + "loss": 0.1858, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1099 + }, + { + "epoch": 0.10578448814732894, + "grad_norm": 2.6429091858464777, + "learning_rate": 4.885436521701824e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1100 + }, + { + "epoch": 0.10588065586382651, + "grad_norm": 2.216782536581722, + "learning_rate": 4.885208136832027e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1101 + }, + { + "epoch": 0.10597682358032409, + "grad_norm": 4.709042748361681, + "learning_rate": 4.884979529892708e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1102 + }, + { + "epoch": 0.10607299129682166, + "grad_norm": 2.103047795649484, + "learning_rate": 4.884750700905153e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1103 + }, + { + "epoch": 0.10616915901331923, + "grad_norm": 1.8419439893962397, + "learning_rate": 4.884521649890664e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1104 + }, + { + "epoch": 0.1062653267298168, + "grad_norm": 2.521394659007816, + "learning_rate": 4.884292376870567e-06, + "loss": 0.1647, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1105 + }, + { + "epoch": 0.10636149444631438, + "grad_norm": 4.469131909236566, + "learning_rate": 4.88406288186621e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1106 + }, + { + "epoch": 0.10645766216281194, + "grad_norm": 4.158946165852162, + "learning_rate": 4.883833164898957e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1107 + }, + { + "epoch": 0.10655382987930952, + "grad_norm": 3.8118849008561613, + "learning_rate": 4.883603225990197e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1108 + }, + { + "epoch": 0.1066499975958071, + "grad_norm": 2.0938043777954145, + "learning_rate": 4.8833730651613375e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1109 + }, + { + "epoch": 0.10674616531230466, + "grad_norm": 4.806332973855465, + "learning_rate": 4.8831426824338066e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1110 + }, + { + "epoch": 0.10684233302880224, + "grad_norm": 3.763337837800342, + "learning_rate": 4.882912077829054e-06, + "loss": 0.1675, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1111 + }, + { + "epoch": 0.1069385007452998, + "grad_norm": 3.228217465495403, + "learning_rate": 4.882681251368549e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1112 + }, + { + "epoch": 0.10703466846179738, + "grad_norm": 2.1519733189476473, + "learning_rate": 4.882450203073783e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1113 + }, + { + "epoch": 0.10713083617829494, + "grad_norm": 7.924880799203304, + "learning_rate": 4.882218932966267e-06, + "loss": 0.1957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1114 + }, + { + "epoch": 0.10722700389479252, + "grad_norm": 4.73094936075264, + "learning_rate": 4.881987441067534e-06, + "loss": 0.1844, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1115 + }, + { + "epoch": 0.10732317161129008, + "grad_norm": 2.6498485053844045, + "learning_rate": 4.881755727399134e-06, + "loss": 0.1685, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1116 + }, + { + "epoch": 0.10741933932778766, + "grad_norm": 1.9263793175152928, + "learning_rate": 4.881523791982642e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1117 + }, + { + "epoch": 0.10751550704428524, + "grad_norm": 5.176737649172263, + "learning_rate": 4.881291634839652e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1118 + }, + { + "epoch": 0.1076116747607828, + "grad_norm": 5.903338413046595, + "learning_rate": 4.881059255991777e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1119 + }, + { + "epoch": 0.10770784247728038, + "grad_norm": 7.565181739954795, + "learning_rate": 4.8808266554606535e-06, + "loss": 0.1878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1120 + }, + { + "epoch": 0.10780401019377794, + "grad_norm": 2.331003629032416, + "learning_rate": 4.880593833267937e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1121 + }, + { + "epoch": 0.10790017791027552, + "grad_norm": 3.304929853408971, + "learning_rate": 4.880360789435304e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1122 + }, + { + "epoch": 0.10799634562677309, + "grad_norm": 2.4256874401280877, + "learning_rate": 4.8801275239844506e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1123 + }, + { + "epoch": 0.10809251334327066, + "grad_norm": 2.270362643677664, + "learning_rate": 4.879894036937094e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1124 + }, + { + "epoch": 0.10818868105976824, + "grad_norm": 1.7677535709095766, + "learning_rate": 4.879660328314976e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1125 + }, + { + "epoch": 0.1082848487762658, + "grad_norm": 2.5928982129160127, + "learning_rate": 4.8794263981398516e-06, + "loss": 0.1885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1126 + }, + { + "epoch": 0.10838101649276338, + "grad_norm": 2.1535968371637244, + "learning_rate": 4.879192246433502e-06, + "loss": 0.175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1127 + }, + { + "epoch": 0.10847718420926095, + "grad_norm": 2.726214872471923, + "learning_rate": 4.878957873217727e-06, + "loss": 0.1758, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1128 + }, + { + "epoch": 0.10857335192575852, + "grad_norm": 2.9468213620664843, + "learning_rate": 4.8787232785143476e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1129 + }, + { + "epoch": 0.10866951964225609, + "grad_norm": 1.666982888921737, + "learning_rate": 4.878488462345206e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1130 + }, + { + "epoch": 0.10876568735875367, + "grad_norm": 4.815036339555525, + "learning_rate": 4.878253424732163e-06, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1131 + }, + { + "epoch": 0.10886185507525124, + "grad_norm": 1.8820421409347683, + "learning_rate": 4.878018165697102e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1132 + }, + { + "epoch": 0.10895802279174881, + "grad_norm": 2.7155210106980707, + "learning_rate": 4.877782685261926e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1133 + }, + { + "epoch": 0.10905419050824638, + "grad_norm": 3.155103047348218, + "learning_rate": 4.877546983448559e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1134 + }, + { + "epoch": 0.10915035822474395, + "grad_norm": 1.6905131338039119, + "learning_rate": 4.877311060278945e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1135 + }, + { + "epoch": 0.10924652594124153, + "grad_norm": 4.287479894425198, + "learning_rate": 4.877074915775049e-06, + "loss": 0.2126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1136 + }, + { + "epoch": 0.10934269365773909, + "grad_norm": 3.8034429750043413, + "learning_rate": 4.876838549958858e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1137 + }, + { + "epoch": 0.10943886137423667, + "grad_norm": 2.4598730186050126, + "learning_rate": 4.876601962852378e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1138 + }, + { + "epoch": 0.10953502909073425, + "grad_norm": 1.9715425388889836, + "learning_rate": 4.876365154477634e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1139 + }, + { + "epoch": 0.10963119680723181, + "grad_norm": 2.8443273886515343, + "learning_rate": 4.876128124856676e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1140 + }, + { + "epoch": 0.10972736452372939, + "grad_norm": 3.2689994102981457, + "learning_rate": 4.87589087401157e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1141 + }, + { + "epoch": 0.10982353224022695, + "grad_norm": 4.001315668736278, + "learning_rate": 4.875653401964408e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1142 + }, + { + "epoch": 0.10991969995672453, + "grad_norm": 1.96618260084879, + "learning_rate": 4.875415708737295e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1143 + }, + { + "epoch": 0.11001586767322209, + "grad_norm": 3.8245362204964035, + "learning_rate": 4.875177794352364e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1144 + }, + { + "epoch": 0.11011203538971967, + "grad_norm": 5.144248407724031, + "learning_rate": 4.8749396588317645e-06, + "loss": 0.1787, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1145 + }, + { + "epoch": 0.11020820310621725, + "grad_norm": 2.0876789844993544, + "learning_rate": 4.874701302197667e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1146 + }, + { + "epoch": 0.11030437082271481, + "grad_norm": 2.475826575263896, + "learning_rate": 4.8744627244722645e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1147 + }, + { + "epoch": 0.11040053853921239, + "grad_norm": 3.3759263954382606, + "learning_rate": 4.874223925677767e-06, + "loss": 0.1856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1148 + }, + { + "epoch": 0.11049670625570995, + "grad_norm": 5.106288728890496, + "learning_rate": 4.87398490583641e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1149 + }, + { + "epoch": 0.11059287397220753, + "grad_norm": 5.070199754925761, + "learning_rate": 4.8737456649704466e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1150 + }, + { + "epoch": 0.1106890416887051, + "grad_norm": 1.726050042981284, + "learning_rate": 4.8735062031021485e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1151 + }, + { + "epoch": 0.11078520940520267, + "grad_norm": 1.6957820897254545, + "learning_rate": 4.873266520253812e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1152 + }, + { + "epoch": 0.11088137712170025, + "grad_norm": 4.670959363793849, + "learning_rate": 4.873026616447752e-06, + "loss": 0.2036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1153 + }, + { + "epoch": 0.11097754483819781, + "grad_norm": 2.2575693087233457, + "learning_rate": 4.872786491706304e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1154 + }, + { + "epoch": 0.11107371255469539, + "grad_norm": 1.9425923758802808, + "learning_rate": 4.8725461460518255e-06, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1155 + }, + { + "epoch": 0.11116988027119296, + "grad_norm": 1.7161878112420388, + "learning_rate": 4.872305579506691e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1156 + }, + { + "epoch": 0.11126604798769053, + "grad_norm": 2.4475944068892197, + "learning_rate": 4.8720647920932995e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1157 + }, + { + "epoch": 0.1113622157041881, + "grad_norm": 3.4096428039194544, + "learning_rate": 4.871823783834068e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1158 + }, + { + "epoch": 0.11145838342068567, + "grad_norm": 1.7482506189802358, + "learning_rate": 4.871582554751437e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1159 + }, + { + "epoch": 0.11155455113718325, + "grad_norm": 3.398197861039264, + "learning_rate": 4.8713411048678635e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1160 + }, + { + "epoch": 0.11165071885368082, + "grad_norm": 4.2402621762178745, + "learning_rate": 4.871099434205829e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1161 + }, + { + "epoch": 0.1117468865701784, + "grad_norm": 2.6025775289468456, + "learning_rate": 4.870857542787833e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1162 + }, + { + "epoch": 0.11184305428667596, + "grad_norm": 5.146284646543006, + "learning_rate": 4.870615430636394e-06, + "loss": 0.1942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1163 + }, + { + "epoch": 0.11193922200317354, + "grad_norm": 4.901559486448873, + "learning_rate": 4.870373097774056e-06, + "loss": 0.172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1164 + }, + { + "epoch": 0.1120353897196711, + "grad_norm": 2.8406703465155396, + "learning_rate": 4.87013054422338e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1165 + }, + { + "epoch": 0.11213155743616868, + "grad_norm": 3.322887592317615, + "learning_rate": 4.86988777000695e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1166 + }, + { + "epoch": 0.11222772515266626, + "grad_norm": 3.7907395570949336, + "learning_rate": 4.869644775147366e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1167 + }, + { + "epoch": 0.11232389286916382, + "grad_norm": 3.501109083413477, + "learning_rate": 4.869401559667253e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1168 + }, + { + "epoch": 0.1124200605856614, + "grad_norm": 2.547564738150035, + "learning_rate": 4.869158123589256e-06, + "loss": 0.1574, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1169 + }, + { + "epoch": 0.11251622830215896, + "grad_norm": 3.9304341402713776, + "learning_rate": 4.868914466936038e-06, + "loss": 0.1791, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1170 + }, + { + "epoch": 0.11261239601865654, + "grad_norm": 3.8164373214811462, + "learning_rate": 4.868670589730284e-06, + "loss": 0.1736, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1171 + }, + { + "epoch": 0.1127085637351541, + "grad_norm": 1.7098068332253038, + "learning_rate": 4.8684264919947025e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1172 + }, + { + "epoch": 0.11280473145165168, + "grad_norm": 1.965937705298391, + "learning_rate": 4.868182173752015e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1173 + }, + { + "epoch": 0.11290089916814926, + "grad_norm": 1.9928305849859893, + "learning_rate": 4.867937635024972e-06, + "loss": 0.1779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1174 + }, + { + "epoch": 0.11299706688464682, + "grad_norm": 2.670514090292599, + "learning_rate": 4.86769287583634e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1175 + }, + { + "epoch": 0.1130932346011444, + "grad_norm": 2.09146030648479, + "learning_rate": 4.867447896208906e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1176 + }, + { + "epoch": 0.11318940231764196, + "grad_norm": 2.8052131559109412, + "learning_rate": 4.867202696165477e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1177 + }, + { + "epoch": 0.11328557003413954, + "grad_norm": 2.9039293317094623, + "learning_rate": 4.866957275728885e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1178 + }, + { + "epoch": 0.1133817377506371, + "grad_norm": 2.057780612195322, + "learning_rate": 4.8667116349219776e-06, + "loss": 0.1648, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1179 + }, + { + "epoch": 0.11347790546713468, + "grad_norm": 2.381495785834422, + "learning_rate": 4.866465773767625e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1180 + }, + { + "epoch": 0.11357407318363226, + "grad_norm": 1.8583329330485268, + "learning_rate": 4.866219692288716e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1181 + }, + { + "epoch": 0.11367024090012982, + "grad_norm": 2.339985709864965, + "learning_rate": 4.865973390508164e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1182 + }, + { + "epoch": 0.1137664086166274, + "grad_norm": 3.0433325066397274, + "learning_rate": 4.865726868448898e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1183 + }, + { + "epoch": 0.11386257633312497, + "grad_norm": 2.3497263751571773, + "learning_rate": 4.865480126133872e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1184 + }, + { + "epoch": 0.11395874404962254, + "grad_norm": 1.9682197955248815, + "learning_rate": 4.865233163586057e-06, + "loss": 0.1753, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1185 + }, + { + "epoch": 0.1140549117661201, + "grad_norm": 2.137984319827862, + "learning_rate": 4.864985980828446e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1186 + }, + { + "epoch": 0.11415107948261768, + "grad_norm": 2.092601679023735, + "learning_rate": 4.864738577884053e-06, + "loss": 0.1865, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1187 + }, + { + "epoch": 0.11424724719911526, + "grad_norm": 1.6552818436183936, + "learning_rate": 4.8644909547759124e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1188 + }, + { + "epoch": 0.11434341491561283, + "grad_norm": 3.39517544549968, + "learning_rate": 4.8642431115270766e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1189 + }, + { + "epoch": 0.1144395826321104, + "grad_norm": 2.4924924415704894, + "learning_rate": 4.863995048160622e-06, + "loss": 0.1696, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1190 + }, + { + "epoch": 0.11453575034860797, + "grad_norm": 2.180768342433135, + "learning_rate": 4.863746764699644e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1191 + }, + { + "epoch": 0.11463191806510555, + "grad_norm": 3.159964900951142, + "learning_rate": 4.863498261167258e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1192 + }, + { + "epoch": 0.11472808578160311, + "grad_norm": 1.545600333986663, + "learning_rate": 4.863249537586601e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1193 + }, + { + "epoch": 0.11482425349810069, + "grad_norm": 2.766203812511668, + "learning_rate": 4.863000593980829e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1194 + }, + { + "epoch": 0.11492042121459826, + "grad_norm": 2.7631937676667677, + "learning_rate": 4.86275143037312e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1195 + }, + { + "epoch": 0.11501658893109583, + "grad_norm": 2.1937510903542075, + "learning_rate": 4.862502046786671e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1196 + }, + { + "epoch": 0.1151127566475934, + "grad_norm": 2.5895985574889497, + "learning_rate": 4.862252443244702e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1197 + }, + { + "epoch": 0.11520892436409097, + "grad_norm": 2.8224333490298577, + "learning_rate": 4.86200261977045e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1198 + }, + { + "epoch": 0.11530509208058855, + "grad_norm": 1.9932954208449265, + "learning_rate": 4.861752576387175e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1199 + }, + { + "epoch": 0.11540125979708611, + "grad_norm": 2.5173482627328627, + "learning_rate": 4.861502313118157e-06, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1200 + }, + { + "epoch": 0.11549742751358369, + "grad_norm": 2.6331585285780275, + "learning_rate": 4.861251829986696e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1201 + }, + { + "epoch": 0.11559359523008127, + "grad_norm": 4.363205074578989, + "learning_rate": 4.8610011270161115e-06, + "loss": 0.2131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1202 + }, + { + "epoch": 0.11568976294657883, + "grad_norm": 5.613808547690115, + "learning_rate": 4.860750204229747e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1203 + }, + { + "epoch": 0.11578593066307641, + "grad_norm": 2.1752085218589556, + "learning_rate": 4.860499061650962e-06, + "loss": 0.161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1204 + }, + { + "epoch": 0.11588209837957397, + "grad_norm": 2.858482668867679, + "learning_rate": 4.8602476993031395e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1205 + }, + { + "epoch": 0.11597826609607155, + "grad_norm": 2.796157632179952, + "learning_rate": 4.859996117209682e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1206 + }, + { + "epoch": 0.11607443381256911, + "grad_norm": 2.1335918220787513, + "learning_rate": 4.859744315394013e-06, + "loss": 0.1637, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1207 + }, + { + "epoch": 0.11617060152906669, + "grad_norm": 1.3933962858937001, + "learning_rate": 4.859492293879574e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1208 + }, + { + "epoch": 0.11626676924556427, + "grad_norm": 3.8953634435446642, + "learning_rate": 4.859240052689831e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1209 + }, + { + "epoch": 0.11636293696206183, + "grad_norm": 4.6695432805106485, + "learning_rate": 4.858987591848268e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1210 + }, + { + "epoch": 0.11645910467855941, + "grad_norm": 3.673255832704691, + "learning_rate": 4.858734911378389e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1211 + }, + { + "epoch": 0.11655527239505697, + "grad_norm": 4.047645298469557, + "learning_rate": 4.85848201130372e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1212 + }, + { + "epoch": 0.11665144011155455, + "grad_norm": 6.493235382424457, + "learning_rate": 4.858228891647807e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1213 + }, + { + "epoch": 0.11674760782805212, + "grad_norm": 4.51860286887835, + "learning_rate": 4.857975552434215e-06, + "loss": 0.1753, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1214 + }, + { + "epoch": 0.1168437755445497, + "grad_norm": 2.832324195815928, + "learning_rate": 4.857721993686531e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1215 + }, + { + "epoch": 0.11693994326104727, + "grad_norm": 2.413303524644963, + "learning_rate": 4.857468215428362e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1216 + }, + { + "epoch": 0.11703611097754484, + "grad_norm": 4.9969811605799155, + "learning_rate": 4.857214217683336e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1217 + }, + { + "epoch": 0.11713227869404241, + "grad_norm": 5.357119127225423, + "learning_rate": 4.8569600004751e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1218 + }, + { + "epoch": 0.11722844641053998, + "grad_norm": 4.125811049919076, + "learning_rate": 4.856705563827323e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1219 + }, + { + "epoch": 0.11732461412703755, + "grad_norm": 3.2743388032543725, + "learning_rate": 4.856450907763693e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1220 + }, + { + "epoch": 0.11742078184353512, + "grad_norm": 3.233395944621555, + "learning_rate": 4.856196032307921e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1221 + }, + { + "epoch": 0.1175169495600327, + "grad_norm": 3.2238168025249783, + "learning_rate": 4.8559409374837356e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1222 + }, + { + "epoch": 0.11761311727653027, + "grad_norm": 2.036560715610317, + "learning_rate": 4.855685623314886e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1223 + }, + { + "epoch": 0.11770928499302784, + "grad_norm": 2.443455730388245, + "learning_rate": 4.855430089825143e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1224 + }, + { + "epoch": 0.11780545270952542, + "grad_norm": 5.602042380852946, + "learning_rate": 4.855174337038299e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1225 + }, + { + "epoch": 0.11790162042602298, + "grad_norm": 3.1863310668494624, + "learning_rate": 4.854918364978163e-06, + "loss": 0.1633, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1226 + }, + { + "epoch": 0.11799778814252056, + "grad_norm": 1.6160139524171169, + "learning_rate": 4.8546621736685675e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1227 + }, + { + "epoch": 0.11809395585901812, + "grad_norm": 2.1693004337103923, + "learning_rate": 4.854405763133365e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1228 + }, + { + "epoch": 0.1181901235755157, + "grad_norm": 3.094518890234746, + "learning_rate": 4.854149133396429e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1229 + }, + { + "epoch": 0.11828629129201328, + "grad_norm": 2.4271937945407145, + "learning_rate": 4.853892284481651e-06, + "loss": 0.1769, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1230 + }, + { + "epoch": 0.11838245900851084, + "grad_norm": 1.7016725839722098, + "learning_rate": 4.853635216412944e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1231 + }, + { + "epoch": 0.11847862672500842, + "grad_norm": 4.693213936445977, + "learning_rate": 4.853377929214243e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1232 + }, + { + "epoch": 0.11857479444150598, + "grad_norm": 4.794359563645956, + "learning_rate": 4.8531204229095016e-06, + "loss": 0.1721, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1233 + }, + { + "epoch": 0.11867096215800356, + "grad_norm": 3.0037817731619536, + "learning_rate": 4.852862697522696e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1234 + }, + { + "epoch": 0.11876712987450112, + "grad_norm": 4.873409455407243, + "learning_rate": 4.8526047530778175e-06, + "loss": 0.1665, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1235 + }, + { + "epoch": 0.1188632975909987, + "grad_norm": 5.919845338918447, + "learning_rate": 4.852346589598884e-06, + "loss": 0.1841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1236 + }, + { + "epoch": 0.11895946530749628, + "grad_norm": 3.1240759939197384, + "learning_rate": 4.85208820710993e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1237 + }, + { + "epoch": 0.11905563302399384, + "grad_norm": 1.8277800646500475, + "learning_rate": 4.851829605635014e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1238 + }, + { + "epoch": 0.11915180074049142, + "grad_norm": 3.4806285221750812, + "learning_rate": 4.8515707851982104e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1239 + }, + { + "epoch": 0.11924796845698898, + "grad_norm": 3.320703768330059, + "learning_rate": 4.851311745823616e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1240 + }, + { + "epoch": 0.11934413617348656, + "grad_norm": 4.197366642104336, + "learning_rate": 4.8510524875353495e-06, + "loss": 0.1866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1241 + }, + { + "epoch": 0.11944030388998413, + "grad_norm": 1.8180272781757445, + "learning_rate": 4.850793010357547e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1242 + }, + { + "epoch": 0.1195364716064817, + "grad_norm": 3.5200770827775765, + "learning_rate": 4.850533314314368e-06, + "loss": 0.1929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1243 + }, + { + "epoch": 0.11963263932297928, + "grad_norm": 4.014294062985323, + "learning_rate": 4.85027339942999e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1244 + }, + { + "epoch": 0.11972880703947684, + "grad_norm": 3.655452207275855, + "learning_rate": 4.850013265728612e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1245 + }, + { + "epoch": 0.11982497475597442, + "grad_norm": 4.064772596514522, + "learning_rate": 4.849752913234453e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1246 + }, + { + "epoch": 0.11992114247247199, + "grad_norm": 3.6777712475569992, + "learning_rate": 4.8494923419717534e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1247 + }, + { + "epoch": 0.12001731018896956, + "grad_norm": 3.1117515600678742, + "learning_rate": 4.849231551964771e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1248 + }, + { + "epoch": 0.12011347790546713, + "grad_norm": 2.5320067560814885, + "learning_rate": 4.8489705432377885e-06, + "loss": 0.184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1249 + }, + { + "epoch": 0.1202096456219647, + "grad_norm": 5.268248820961252, + "learning_rate": 4.848709315815106e-06, + "loss": 0.1705, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1250 + }, + { + "epoch": 0.12030581333846228, + "grad_norm": 2.670670809427387, + "learning_rate": 4.8484478697210435e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1251 + }, + { + "epoch": 0.12040198105495985, + "grad_norm": 2.271338524314211, + "learning_rate": 4.848186204979943e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1252 + }, + { + "epoch": 0.12049814877145743, + "grad_norm": 2.8555779996127293, + "learning_rate": 4.847924321616167e-06, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1253 + }, + { + "epoch": 0.12059431648795499, + "grad_norm": 1.9746769873499743, + "learning_rate": 4.847662219654095e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1254 + }, + { + "epoch": 0.12069048420445257, + "grad_norm": 2.9841366792364346, + "learning_rate": 4.847399899118132e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1255 + }, + { + "epoch": 0.12078665192095013, + "grad_norm": 2.392567615819829, + "learning_rate": 4.8471373600327e-06, + "loss": 0.1803, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1256 + }, + { + "epoch": 0.12088281963744771, + "grad_norm": 4.4326170045066915, + "learning_rate": 4.846874602422243e-06, + "loss": 0.1771, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1257 + }, + { + "epoch": 0.12097898735394529, + "grad_norm": 2.6763853162210522, + "learning_rate": 4.846611626311222e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1258 + }, + { + "epoch": 0.12107515507044285, + "grad_norm": 2.3588879971310837, + "learning_rate": 4.846348431724123e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1259 + }, + { + "epoch": 0.12117132278694043, + "grad_norm": 2.3631435509035774, + "learning_rate": 4.846085018685449e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1260 + }, + { + "epoch": 0.12126749050343799, + "grad_norm": 3.2307522177705663, + "learning_rate": 4.845821387219725e-06, + "loss": 0.1927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1261 + }, + { + "epoch": 0.12136365821993557, + "grad_norm": 2.009877729067259, + "learning_rate": 4.845557537351496e-06, + "loss": 0.1749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1262 + }, + { + "epoch": 0.12145982593643313, + "grad_norm": 2.785570346987296, + "learning_rate": 4.845293469105327e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1263 + }, + { + "epoch": 0.12155599365293071, + "grad_norm": 3.9421025698297187, + "learning_rate": 4.8450291825058036e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1264 + }, + { + "epoch": 0.12165216136942829, + "grad_norm": 1.781623793309018, + "learning_rate": 4.844764677577531e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1265 + }, + { + "epoch": 0.12174832908592585, + "grad_norm": 3.6051392139233824, + "learning_rate": 4.844499954345135e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1266 + }, + { + "epoch": 0.12184449680242343, + "grad_norm": 4.134510377998378, + "learning_rate": 4.844235012833265e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1267 + }, + { + "epoch": 0.121940664518921, + "grad_norm": 5.620268962835827, + "learning_rate": 4.843969853066584e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1268 + }, + { + "epoch": 0.12203683223541857, + "grad_norm": 2.1859781781134764, + "learning_rate": 4.843704475069781e-06, + "loss": 0.1721, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1269 + }, + { + "epoch": 0.12213299995191614, + "grad_norm": 4.612112365891879, + "learning_rate": 4.843438878867563e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1270 + }, + { + "epoch": 0.12222916766841371, + "grad_norm": 3.419263193432412, + "learning_rate": 4.843173064484659e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1271 + }, + { + "epoch": 0.12232533538491129, + "grad_norm": 2.715732059959344, + "learning_rate": 4.842907031945815e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1272 + }, + { + "epoch": 0.12242150310140885, + "grad_norm": 3.1822066869863725, + "learning_rate": 4.842640781275801e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1273 + }, + { + "epoch": 0.12251767081790643, + "grad_norm": 4.27005726971356, + "learning_rate": 4.842374312499405e-06, + "loss": 0.1772, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1274 + }, + { + "epoch": 0.122613838534404, + "grad_norm": 4.448170078342762, + "learning_rate": 4.842107625641436e-06, + "loss": 0.1889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1275 + }, + { + "epoch": 0.12271000625090157, + "grad_norm": 2.2894392326157376, + "learning_rate": 4.841840720726722e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1276 + }, + { + "epoch": 0.12280617396739914, + "grad_norm": 2.823609405257852, + "learning_rate": 4.841573597780116e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1277 + }, + { + "epoch": 0.12290234168389672, + "grad_norm": 3.062540351128736, + "learning_rate": 4.841306256826484e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1278 + }, + { + "epoch": 0.12299850940039429, + "grad_norm": 1.8886118137603058, + "learning_rate": 4.8410386978907195e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1279 + }, + { + "epoch": 0.12309467711689186, + "grad_norm": 2.552232813404578, + "learning_rate": 4.84077092099773e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1280 + }, + { + "epoch": 0.12319084483338943, + "grad_norm": 2.040281739001848, + "learning_rate": 4.840502926172449e-06, + "loss": 0.1787, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1281 + }, + { + "epoch": 0.123287012549887, + "grad_norm": 4.149866667760618, + "learning_rate": 4.840234713439825e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1282 + }, + { + "epoch": 0.12338318026638458, + "grad_norm": 4.434167682086409, + "learning_rate": 4.839966282824832e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1283 + }, + { + "epoch": 0.12347934798288214, + "grad_norm": 1.9379852956471968, + "learning_rate": 4.83969763435246e-06, + "loss": 0.1832, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1284 + }, + { + "epoch": 0.12357551569937972, + "grad_norm": 4.073875182096113, + "learning_rate": 4.839428768047721e-06, + "loss": 0.183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1285 + }, + { + "epoch": 0.1236716834158773, + "grad_norm": 1.7599756040255214, + "learning_rate": 4.839159683935647e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1286 + }, + { + "epoch": 0.12376785113237486, + "grad_norm": 2.3007060388877956, + "learning_rate": 4.838890382041292e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1287 + }, + { + "epoch": 0.12386401884887244, + "grad_norm": 2.253450367337834, + "learning_rate": 4.838620862389727e-06, + "loss": 0.1738, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1288 + }, + { + "epoch": 0.12396018656537, + "grad_norm": 4.353505967363967, + "learning_rate": 4.838351125006045e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1289 + }, + { + "epoch": 0.12405635428186758, + "grad_norm": 14.424983036983305, + "learning_rate": 4.838081169915361e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1290 + }, + { + "epoch": 0.12415252199836514, + "grad_norm": 6.895650118088729, + "learning_rate": 4.837810997142807e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1291 + }, + { + "epoch": 0.12424868971486272, + "grad_norm": 5.602182644021274, + "learning_rate": 4.837540606713538e-06, + "loss": 0.1767, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1292 + }, + { + "epoch": 0.1243448574313603, + "grad_norm": 3.8269785945450696, + "learning_rate": 4.837269998652727e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1293 + }, + { + "epoch": 0.12444102514785786, + "grad_norm": 2.9820342404999796, + "learning_rate": 4.836999172985569e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1294 + }, + { + "epoch": 0.12453719286435544, + "grad_norm": 2.495740298086224, + "learning_rate": 4.836728129737278e-06, + "loss": 0.1759, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1295 + }, + { + "epoch": 0.124633360580853, + "grad_norm": 1.912447763618808, + "learning_rate": 4.83645686893309e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1296 + }, + { + "epoch": 0.12472952829735058, + "grad_norm": 3.9078915763896367, + "learning_rate": 4.8361853905982595e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1297 + }, + { + "epoch": 0.12482569601384814, + "grad_norm": 3.334951028629852, + "learning_rate": 4.835913694758061e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1298 + }, + { + "epoch": 0.12492186373034572, + "grad_norm": 2.245456388296174, + "learning_rate": 4.835641781437792e-06, + "loss": 0.1653, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1299 + }, + { + "epoch": 0.1250180314468433, + "grad_norm": 3.234750704981536, + "learning_rate": 4.835369650662767e-06, + "loss": 0.181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1300 + }, + { + "epoch": 0.12511419916334088, + "grad_norm": 3.6020628992857566, + "learning_rate": 4.835097302458322e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1301 + }, + { + "epoch": 0.12521036687983844, + "grad_norm": 3.902850508698553, + "learning_rate": 4.834824736849816e-06, + "loss": 0.1598, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1302 + }, + { + "epoch": 0.125306534596336, + "grad_norm": 3.490977526655338, + "learning_rate": 4.834551953862622e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1303 + }, + { + "epoch": 0.12540270231283357, + "grad_norm": 1.935365224771563, + "learning_rate": 4.834278953522139e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1304 + }, + { + "epoch": 0.12549887002933116, + "grad_norm": 2.4015027795588817, + "learning_rate": 4.834005735853783e-06, + "loss": 0.1877, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1305 + }, + { + "epoch": 0.12559503774582872, + "grad_norm": 1.7210503208694317, + "learning_rate": 4.833732300882992e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1306 + }, + { + "epoch": 0.1256912054623263, + "grad_norm": 2.237164280206181, + "learning_rate": 4.833458648635223e-06, + "loss": 0.1666, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1307 + }, + { + "epoch": 0.12578737317882388, + "grad_norm": 1.6844060476888034, + "learning_rate": 4.833184779135954e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1308 + }, + { + "epoch": 0.12588354089532144, + "grad_norm": 3.164801615152479, + "learning_rate": 4.832910692410684e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1309 + }, + { + "epoch": 0.125979708611819, + "grad_norm": 2.3935723708702716, + "learning_rate": 4.83263638848493e-06, + "loss": 0.1796, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1310 + }, + { + "epoch": 0.12607587632831657, + "grad_norm": 3.6760722802693775, + "learning_rate": 4.832361867384231e-06, + "loss": 0.1852, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1311 + }, + { + "epoch": 0.12617204404481416, + "grad_norm": 1.9573638246583935, + "learning_rate": 4.8320871291341455e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1312 + }, + { + "epoch": 0.12626821176131173, + "grad_norm": 1.9035334471384797, + "learning_rate": 4.831812173760253e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1313 + }, + { + "epoch": 0.1263643794778093, + "grad_norm": 3.3147908239937314, + "learning_rate": 4.831537001288151e-06, + "loss": 0.161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1314 + }, + { + "epoch": 0.12646054719430688, + "grad_norm": 2.7981335401501735, + "learning_rate": 4.831261611743461e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1315 + }, + { + "epoch": 0.12655671491080445, + "grad_norm": 2.595894891194408, + "learning_rate": 4.8309860051518204e-06, + "loss": 0.1883, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1316 + }, + { + "epoch": 0.126652882627302, + "grad_norm": 3.2715863853781633, + "learning_rate": 4.8307101815388906e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1317 + }, + { + "epoch": 0.12674905034379957, + "grad_norm": 2.7924517581672195, + "learning_rate": 4.83043414093035e-06, + "loss": 0.1887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1318 + }, + { + "epoch": 0.12684521806029717, + "grad_norm": 3.1716575172396335, + "learning_rate": 4.8301578833519e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1319 + }, + { + "epoch": 0.12694138577679473, + "grad_norm": 2.95274815436, + "learning_rate": 4.829881408829262e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1320 + }, + { + "epoch": 0.1270375534932923, + "grad_norm": 2.7196694231595013, + "learning_rate": 4.8296047173881736e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1321 + }, + { + "epoch": 0.12713372120978988, + "grad_norm": 1.5254630951138395, + "learning_rate": 4.829327809054398e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1322 + }, + { + "epoch": 0.12722988892628745, + "grad_norm": 4.994192647977892, + "learning_rate": 4.829050683853715e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1323 + }, + { + "epoch": 0.127326056642785, + "grad_norm": 5.163653326093526, + "learning_rate": 4.828773341811925e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1324 + }, + { + "epoch": 0.12742222435928258, + "grad_norm": 1.954456850863554, + "learning_rate": 4.8284957829548515e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1325 + }, + { + "epoch": 0.12751839207578017, + "grad_norm": 1.9567561981775894, + "learning_rate": 4.828218007308335e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1326 + }, + { + "epoch": 0.12761455979227773, + "grad_norm": 1.619944917698965, + "learning_rate": 4.827940014898236e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1327 + }, + { + "epoch": 0.1277107275087753, + "grad_norm": 1.9711635115024246, + "learning_rate": 4.827661805750438e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1328 + }, + { + "epoch": 0.1278068952252729, + "grad_norm": 2.438593926174103, + "learning_rate": 4.827383379890842e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1329 + }, + { + "epoch": 0.12790306294177045, + "grad_norm": 1.9660732736702182, + "learning_rate": 4.8271047373453705e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1330 + }, + { + "epoch": 0.12799923065826802, + "grad_norm": 2.062706159931422, + "learning_rate": 4.8268258781399664e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1331 + }, + { + "epoch": 0.12809539837476558, + "grad_norm": 3.3683215298009834, + "learning_rate": 4.826546802300592e-06, + "loss": 0.1714, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1332 + }, + { + "epoch": 0.12819156609126317, + "grad_norm": 4.115598012791214, + "learning_rate": 4.8262675098532295e-06, + "loss": 0.1796, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1333 + }, + { + "epoch": 0.12828773380776073, + "grad_norm": 1.8422314013812244, + "learning_rate": 4.825988000823882e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1334 + }, + { + "epoch": 0.1283839015242583, + "grad_norm": 1.96077354347414, + "learning_rate": 4.8257082752385735e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1335 + }, + { + "epoch": 0.1284800692407559, + "grad_norm": 1.5740893818956327, + "learning_rate": 4.825428333123346e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1336 + }, + { + "epoch": 0.12857623695725345, + "grad_norm": 6.646630711396161, + "learning_rate": 4.8251481745042635e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1337 + }, + { + "epoch": 0.12867240467375102, + "grad_norm": 1.560363952488483, + "learning_rate": 4.82486779940741e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1338 + }, + { + "epoch": 0.12876857239024858, + "grad_norm": 2.004899729949743, + "learning_rate": 4.824587207858888e-06, + "loss": 0.1912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1339 + }, + { + "epoch": 0.12886474010674617, + "grad_norm": 2.517136010011621, + "learning_rate": 4.824306399884822e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1340 + }, + { + "epoch": 0.12896090782324374, + "grad_norm": 3.612808844022661, + "learning_rate": 4.8240253755113565e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1341 + }, + { + "epoch": 0.1290570755397413, + "grad_norm": 2.610733619686662, + "learning_rate": 4.823744134764655e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1342 + }, + { + "epoch": 0.1291532432562389, + "grad_norm": 2.274753558954632, + "learning_rate": 4.8234626776709014e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1343 + }, + { + "epoch": 0.12924941097273646, + "grad_norm": 2.209685839367773, + "learning_rate": 4.823181004256301e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1344 + }, + { + "epoch": 0.12934557868923402, + "grad_norm": 3.4276532966520588, + "learning_rate": 4.822899114547078e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1345 + }, + { + "epoch": 0.12944174640573158, + "grad_norm": 2.8197959255450327, + "learning_rate": 4.822617008569478e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1346 + }, + { + "epoch": 0.12953791412222918, + "grad_norm": 1.961274757190641, + "learning_rate": 4.822334686349765e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1347 + }, + { + "epoch": 0.12963408183872674, + "grad_norm": 3.2246590219960187, + "learning_rate": 4.822052147914223e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1348 + }, + { + "epoch": 0.1297302495552243, + "grad_norm": 2.9297466359816258, + "learning_rate": 4.821769393289159e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1349 + }, + { + "epoch": 0.1298264172717219, + "grad_norm": 2.2016222257297495, + "learning_rate": 4.821486422500898e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1350 + }, + { + "epoch": 0.12992258498821946, + "grad_norm": 3.38306043059212, + "learning_rate": 4.821203235575783e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1351 + }, + { + "epoch": 0.13001875270471702, + "grad_norm": 5.012495454460287, + "learning_rate": 4.8209198325401815e-06, + "loss": 0.1691, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1352 + }, + { + "epoch": 0.13011492042121459, + "grad_norm": 3.870818411113048, + "learning_rate": 4.82063621342048e-06, + "loss": 0.183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1353 + }, + { + "epoch": 0.13021108813771218, + "grad_norm": 2.6306173223335443, + "learning_rate": 4.820352378243083e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1354 + }, + { + "epoch": 0.13030725585420974, + "grad_norm": 3.5039643024953318, + "learning_rate": 4.820068327034416e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1355 + }, + { + "epoch": 0.1304034235707073, + "grad_norm": 2.6898278309585315, + "learning_rate": 4.819784059820925e-06, + "loss": 0.1722, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1356 + }, + { + "epoch": 0.1304995912872049, + "grad_norm": 2.4705413110272207, + "learning_rate": 4.819499576629077e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1357 + }, + { + "epoch": 0.13059575900370246, + "grad_norm": 3.7653858615697042, + "learning_rate": 4.8192148774853575e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1358 + }, + { + "epoch": 0.13069192672020002, + "grad_norm": 5.579379218078279, + "learning_rate": 4.818929962416273e-06, + "loss": 0.1879, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1359 + }, + { + "epoch": 0.1307880944366976, + "grad_norm": 2.90047478582572, + "learning_rate": 4.81864483144835e-06, + "loss": 0.1695, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1360 + }, + { + "epoch": 0.13088426215319518, + "grad_norm": 1.8181572764922729, + "learning_rate": 4.818359484608135e-06, + "loss": 0.1675, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1361 + }, + { + "epoch": 0.13098042986969274, + "grad_norm": 1.8175647010850677, + "learning_rate": 4.818073921922194e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1362 + }, + { + "epoch": 0.1310765975861903, + "grad_norm": 6.303530585909662, + "learning_rate": 4.8177881434171135e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1363 + }, + { + "epoch": 0.1311727653026879, + "grad_norm": 3.497213253665464, + "learning_rate": 4.817502149119502e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1364 + }, + { + "epoch": 0.13126893301918546, + "grad_norm": 2.288402441694263, + "learning_rate": 4.817215939055984e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1365 + }, + { + "epoch": 0.13136510073568303, + "grad_norm": 2.709817481991891, + "learning_rate": 4.816929513253209e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1366 + }, + { + "epoch": 0.1314612684521806, + "grad_norm": 4.754981167581109, + "learning_rate": 4.816642871737842e-06, + "loss": 0.1671, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1367 + }, + { + "epoch": 0.13155743616867818, + "grad_norm": 4.187087595636716, + "learning_rate": 4.816356014536571e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1368 + }, + { + "epoch": 0.13165360388517575, + "grad_norm": 2.491574713000908, + "learning_rate": 4.816068941676103e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1369 + }, + { + "epoch": 0.1317497716016733, + "grad_norm": 2.609752407904756, + "learning_rate": 4.815781653183165e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1370 + }, + { + "epoch": 0.1318459393181709, + "grad_norm": 2.8558701043758417, + "learning_rate": 4.815494149084506e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1371 + }, + { + "epoch": 0.13194210703466847, + "grad_norm": 5.635584940338874, + "learning_rate": 4.8152064294068905e-06, + "loss": 0.1685, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1372 + }, + { + "epoch": 0.13203827475116603, + "grad_norm": 4.572627744450616, + "learning_rate": 4.814918494177108e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1373 + }, + { + "epoch": 0.1321344424676636, + "grad_norm": 2.4746238834897505, + "learning_rate": 4.814630343421966e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1374 + }, + { + "epoch": 0.13223061018416118, + "grad_norm": 6.437386644338718, + "learning_rate": 4.814341977168292e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1375 + }, + { + "epoch": 0.13232677790065875, + "grad_norm": 3.4154502703774856, + "learning_rate": 4.814053395442933e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1376 + }, + { + "epoch": 0.1324229456171563, + "grad_norm": 3.454235016233218, + "learning_rate": 4.813764598272757e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1377 + }, + { + "epoch": 0.1325191133336539, + "grad_norm": 2.3758294198660344, + "learning_rate": 4.813475585684653e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1378 + }, + { + "epoch": 0.13261528105015147, + "grad_norm": 7.710789353681577, + "learning_rate": 4.813186357705528e-06, + "loss": 0.2254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1379 + }, + { + "epoch": 0.13271144876664903, + "grad_norm": 4.835538477430256, + "learning_rate": 4.81289691436231e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1380 + }, + { + "epoch": 0.1328076164831466, + "grad_norm": 9.926127579646304, + "learning_rate": 4.812607255681946e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1381 + }, + { + "epoch": 0.1329037841996442, + "grad_norm": 2.668080763006886, + "learning_rate": 4.812317381691405e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1382 + }, + { + "epoch": 0.13299995191614175, + "grad_norm": 4.184769725941863, + "learning_rate": 4.8120272924176755e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1383 + }, + { + "epoch": 0.13309611963263931, + "grad_norm": 3.5371918440435666, + "learning_rate": 4.811736987887765e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1384 + }, + { + "epoch": 0.1331922873491369, + "grad_norm": 2.661998217465835, + "learning_rate": 4.811446468128702e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1385 + }, + { + "epoch": 0.13328845506563447, + "grad_norm": 3.147313550419553, + "learning_rate": 4.811155733167534e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1386 + }, + { + "epoch": 0.13338462278213203, + "grad_norm": 2.30651889459696, + "learning_rate": 4.810864783031331e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1387 + }, + { + "epoch": 0.1334807904986296, + "grad_norm": 3.030845046187503, + "learning_rate": 4.810573617747178e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1388 + }, + { + "epoch": 0.1335769582151272, + "grad_norm": 2.0533148437719477, + "learning_rate": 4.810282237342186e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1389 + }, + { + "epoch": 0.13367312593162475, + "grad_norm": 1.9392938464088323, + "learning_rate": 4.809990641843484e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1390 + }, + { + "epoch": 0.13376929364812232, + "grad_norm": 2.8670113514059596, + "learning_rate": 4.809698831278217e-06, + "loss": 0.1749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1391 + }, + { + "epoch": 0.1338654613646199, + "grad_norm": 4.354495551236991, + "learning_rate": 4.8094068056735564e-06, + "loss": 0.1675, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1392 + }, + { + "epoch": 0.13396162908111747, + "grad_norm": 2.13380997782203, + "learning_rate": 4.80911456505669e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1393 + }, + { + "epoch": 0.13405779679761504, + "grad_norm": 1.512141379470364, + "learning_rate": 4.8088221094548246e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1394 + }, + { + "epoch": 0.1341539645141126, + "grad_norm": 2.0114767641217033, + "learning_rate": 4.80852943889519e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1395 + }, + { + "epoch": 0.1342501322306102, + "grad_norm": 2.3054706507934246, + "learning_rate": 4.808236553405035e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1396 + }, + { + "epoch": 0.13434629994710776, + "grad_norm": 2.6893711966269533, + "learning_rate": 4.807943453011627e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1397 + }, + { + "epoch": 0.13444246766360532, + "grad_norm": 4.098198880461284, + "learning_rate": 4.807650137742255e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1398 + }, + { + "epoch": 0.1345386353801029, + "grad_norm": 3.650775919093939, + "learning_rate": 4.807356607624228e-06, + "loss": 0.1856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1399 + }, + { + "epoch": 0.13463480309660047, + "grad_norm": 2.639006130476288, + "learning_rate": 4.807062862684874e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1400 + }, + { + "epoch": 0.13473097081309804, + "grad_norm": 2.563751737727995, + "learning_rate": 4.806768902951541e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1401 + }, + { + "epoch": 0.1348271385295956, + "grad_norm": 1.8681024064355143, + "learning_rate": 4.806474728451598e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1402 + }, + { + "epoch": 0.1349233062460932, + "grad_norm": 4.93825998054339, + "learning_rate": 4.806180339212432e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1403 + }, + { + "epoch": 0.13501947396259076, + "grad_norm": 3.5331463412169835, + "learning_rate": 4.805885735261454e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1404 + }, + { + "epoch": 0.13511564167908832, + "grad_norm": 2.8567964021030727, + "learning_rate": 4.80559091662609e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1405 + }, + { + "epoch": 0.1352118093955859, + "grad_norm": 2.853297113162807, + "learning_rate": 4.805295883333791e-06, + "loss": 0.2005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1406 + }, + { + "epoch": 0.13530797711208348, + "grad_norm": 5.10529030343333, + "learning_rate": 4.8050006354120235e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1407 + }, + { + "epoch": 0.13540414482858104, + "grad_norm": 2.8975795738844257, + "learning_rate": 4.804705172888277e-06, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1408 + }, + { + "epoch": 0.1355003125450786, + "grad_norm": 1.9124988880338631, + "learning_rate": 4.8044094957900585e-06, + "loss": 0.161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1409 + }, + { + "epoch": 0.1355964802615762, + "grad_norm": 3.0536433511603427, + "learning_rate": 4.804113604144899e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1410 + }, + { + "epoch": 0.13569264797807376, + "grad_norm": 3.746888200247611, + "learning_rate": 4.803817497980343e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1411 + }, + { + "epoch": 0.13578881569457132, + "grad_norm": 2.0565381177785795, + "learning_rate": 4.803521177323962e-06, + "loss": 0.1946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1412 + }, + { + "epoch": 0.13588498341106892, + "grad_norm": 3.354729034699641, + "learning_rate": 4.803224642203342e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1413 + }, + { + "epoch": 0.13598115112756648, + "grad_norm": 1.849806090832993, + "learning_rate": 4.802927892646093e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1414 + }, + { + "epoch": 0.13607731884406404, + "grad_norm": 3.3035128605110815, + "learning_rate": 4.802630928679843e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1415 + }, + { + "epoch": 0.1361734865605616, + "grad_norm": 2.502017859584253, + "learning_rate": 4.80233375033224e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1416 + }, + { + "epoch": 0.1362696542770592, + "grad_norm": 1.9463924588058006, + "learning_rate": 4.802036357630951e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1417 + }, + { + "epoch": 0.13636582199355676, + "grad_norm": 3.1750225625798594, + "learning_rate": 4.8017387506036654e-06, + "loss": 0.1924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1418 + }, + { + "epoch": 0.13646198971005433, + "grad_norm": 2.037989019972167, + "learning_rate": 4.80144092927809e-06, + "loss": 0.1906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1419 + }, + { + "epoch": 0.13655815742655192, + "grad_norm": 3.464042229990336, + "learning_rate": 4.801142893681955e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1420 + }, + { + "epoch": 0.13665432514304948, + "grad_norm": 1.64191532245769, + "learning_rate": 4.800844643843007e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1421 + }, + { + "epoch": 0.13675049285954705, + "grad_norm": 2.2366595391644077, + "learning_rate": 4.800546179789013e-06, + "loss": 0.1673, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1422 + }, + { + "epoch": 0.1368466605760446, + "grad_norm": 2.214506555888449, + "learning_rate": 4.8002475015477614e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1423 + }, + { + "epoch": 0.1369428282925422, + "grad_norm": 3.6670296455882365, + "learning_rate": 4.799948609147061e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1424 + }, + { + "epoch": 0.13703899600903977, + "grad_norm": 4.395085473498923, + "learning_rate": 4.799649502614738e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1425 + }, + { + "epoch": 0.13713516372553733, + "grad_norm": 3.803825287808495, + "learning_rate": 4.799350181978641e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1426 + }, + { + "epoch": 0.13723133144203492, + "grad_norm": 2.010332542057895, + "learning_rate": 4.799050647266639e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1427 + }, + { + "epoch": 0.13732749915853248, + "grad_norm": 1.750807052049032, + "learning_rate": 4.798750898506616e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1428 + }, + { + "epoch": 0.13742366687503005, + "grad_norm": 3.526225539916364, + "learning_rate": 4.798450935726483e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1429 + }, + { + "epoch": 0.1375198345915276, + "grad_norm": 3.0573001907270205, + "learning_rate": 4.798150758954164e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1430 + }, + { + "epoch": 0.1376160023080252, + "grad_norm": 2.3626568808166444, + "learning_rate": 4.797850368217609e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1431 + }, + { + "epoch": 0.13771217002452277, + "grad_norm": 1.731787841365359, + "learning_rate": 4.797549763544784e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1432 + }, + { + "epoch": 0.13780833774102033, + "grad_norm": 3.5479271402929116, + "learning_rate": 4.797248944963677e-06, + "loss": 0.1905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1433 + }, + { + "epoch": 0.13790450545751792, + "grad_norm": 2.81281803978612, + "learning_rate": 4.796947912502295e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1434 + }, + { + "epoch": 0.1380006731740155, + "grad_norm": 2.2899804825156806, + "learning_rate": 4.796646666188663e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1435 + }, + { + "epoch": 0.13809684089051305, + "grad_norm": 2.858347034830046, + "learning_rate": 4.79634520605083e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1436 + }, + { + "epoch": 0.13819300860701061, + "grad_norm": 2.287068169341035, + "learning_rate": 4.796043532116861e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1437 + }, + { + "epoch": 0.1382891763235082, + "grad_norm": 2.5856867725147477, + "learning_rate": 4.795741644414844e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1438 + }, + { + "epoch": 0.13838534404000577, + "grad_norm": 1.8950725034724416, + "learning_rate": 4.795439542972886e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1439 + }, + { + "epoch": 0.13848151175650333, + "grad_norm": 2.7479922370397274, + "learning_rate": 4.795137227819113e-06, + "loss": 0.1675, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1440 + }, + { + "epoch": 0.13857767947300093, + "grad_norm": 3.706339089276095, + "learning_rate": 4.794834698981671e-06, + "loss": 0.1891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1441 + }, + { + "epoch": 0.1386738471894985, + "grad_norm": 3.425020245975623, + "learning_rate": 4.794531956488726e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1442 + }, + { + "epoch": 0.13877001490599605, + "grad_norm": 1.8609428226535203, + "learning_rate": 4.794229000368464e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1443 + }, + { + "epoch": 0.13886618262249362, + "grad_norm": 1.613580265934467, + "learning_rate": 4.793925830649093e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1444 + }, + { + "epoch": 0.1389623503389912, + "grad_norm": 1.707337795715193, + "learning_rate": 4.793622447358837e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1445 + }, + { + "epoch": 0.13905851805548877, + "grad_norm": 2.9395118497198176, + "learning_rate": 4.793318850525943e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1446 + }, + { + "epoch": 0.13915468577198634, + "grad_norm": 2.2919152954194604, + "learning_rate": 4.793015040178676e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1447 + }, + { + "epoch": 0.13925085348848393, + "grad_norm": 2.9306115037964577, + "learning_rate": 4.792711016345322e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1448 + }, + { + "epoch": 0.1393470212049815, + "grad_norm": 3.7784009012316297, + "learning_rate": 4.792406779054185e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1449 + }, + { + "epoch": 0.13944318892147906, + "grad_norm": 2.9182188193897662, + "learning_rate": 4.792102328333592e-06, + "loss": 0.1995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1450 + }, + { + "epoch": 0.13953935663797662, + "grad_norm": 5.169669557925822, + "learning_rate": 4.791797664211889e-06, + "loss": 0.2036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1451 + }, + { + "epoch": 0.1396355243544742, + "grad_norm": 2.886214198247278, + "learning_rate": 4.791492786717439e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1452 + }, + { + "epoch": 0.13973169207097177, + "grad_norm": 3.4795711921172376, + "learning_rate": 4.791187695878627e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1453 + }, + { + "epoch": 0.13982785978746934, + "grad_norm": 2.017417282685003, + "learning_rate": 4.79088239172386e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1454 + }, + { + "epoch": 0.13992402750396693, + "grad_norm": 5.37542311614928, + "learning_rate": 4.790576874281561e-06, + "loss": 0.1662, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1455 + }, + { + "epoch": 0.1400201952204645, + "grad_norm": 3.5921755317934307, + "learning_rate": 4.790271143580174e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1456 + }, + { + "epoch": 0.14011636293696206, + "grad_norm": 4.473297452679177, + "learning_rate": 4.789965199648165e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1457 + }, + { + "epoch": 0.14021253065345962, + "grad_norm": 1.8628187103306397, + "learning_rate": 4.789659042514018e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1458 + }, + { + "epoch": 0.1403086983699572, + "grad_norm": 3.8500101491351244, + "learning_rate": 4.789352672206235e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1459 + }, + { + "epoch": 0.14040486608645478, + "grad_norm": 5.047441038546661, + "learning_rate": 4.7890460887533415e-06, + "loss": 0.1691, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1460 + }, + { + "epoch": 0.14050103380295234, + "grad_norm": 1.6423177230055785, + "learning_rate": 4.788739292183883e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1461 + }, + { + "epoch": 0.14059720151944993, + "grad_norm": 1.8129558832362984, + "learning_rate": 4.78843228252642e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1462 + }, + { + "epoch": 0.1406933692359475, + "grad_norm": 2.642414345687333, + "learning_rate": 4.788125059809536e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1463 + }, + { + "epoch": 0.14078953695244506, + "grad_norm": 2.8493495974394536, + "learning_rate": 4.787817624061838e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1464 + }, + { + "epoch": 0.14088570466894262, + "grad_norm": 1.9488539210241227, + "learning_rate": 4.787509975311945e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1465 + }, + { + "epoch": 0.14098187238544022, + "grad_norm": 3.1826786024289597, + "learning_rate": 4.7872021135885024e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1466 + }, + { + "epoch": 0.14107804010193778, + "grad_norm": 1.9889013898769632, + "learning_rate": 4.786894038920172e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1467 + }, + { + "epoch": 0.14117420781843534, + "grad_norm": 2.7098090444447265, + "learning_rate": 4.7865857513356365e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1468 + }, + { + "epoch": 0.14127037553493293, + "grad_norm": 2.730833971050497, + "learning_rate": 4.786277250863599e-06, + "loss": 0.1833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1469 + }, + { + "epoch": 0.1413665432514305, + "grad_norm": 2.2472362670803516, + "learning_rate": 4.7859685375327804e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1470 + }, + { + "epoch": 0.14146271096792806, + "grad_norm": 3.817659068301691, + "learning_rate": 4.7856596113719244e-06, + "loss": 0.1647, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1471 + }, + { + "epoch": 0.14155887868442563, + "grad_norm": 2.8556137234312735, + "learning_rate": 4.785350472409792e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1472 + }, + { + "epoch": 0.14165504640092322, + "grad_norm": 1.9151903084856565, + "learning_rate": 4.785041120675165e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1473 + }, + { + "epoch": 0.14175121411742078, + "grad_norm": 1.4579237171728778, + "learning_rate": 4.784731556196844e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1474 + }, + { + "epoch": 0.14184738183391835, + "grad_norm": 1.9628642449044251, + "learning_rate": 4.784421779003652e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1475 + }, + { + "epoch": 0.14194354955041594, + "grad_norm": 3.3684990348866237, + "learning_rate": 4.784111789124429e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1476 + }, + { + "epoch": 0.1420397172669135, + "grad_norm": 1.9281647819616472, + "learning_rate": 4.783801586588037e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1477 + }, + { + "epoch": 0.14213588498341106, + "grad_norm": 3.073669979477227, + "learning_rate": 4.783491171423354e-06, + "loss": 0.1949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1478 + }, + { + "epoch": 0.14223205269990863, + "grad_norm": 6.198892686919367, + "learning_rate": 4.783180543659285e-06, + "loss": 0.1911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1479 + }, + { + "epoch": 0.14232822041640622, + "grad_norm": 7.905698075127842, + "learning_rate": 4.782869703324746e-06, + "loss": 0.1874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1480 + }, + { + "epoch": 0.14242438813290378, + "grad_norm": 6.820363372654018, + "learning_rate": 4.78255865044868e-06, + "loss": 0.1786, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1481 + }, + { + "epoch": 0.14252055584940135, + "grad_norm": 3.348784012072715, + "learning_rate": 4.7822473850600444e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1482 + }, + { + "epoch": 0.14261672356589894, + "grad_norm": 2.4779657795464676, + "learning_rate": 4.781935907187821e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1483 + }, + { + "epoch": 0.1427128912823965, + "grad_norm": 6.395590761128773, + "learning_rate": 4.78162421686101e-06, + "loss": 0.1955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1484 + }, + { + "epoch": 0.14280905899889407, + "grad_norm": 8.171398746409283, + "learning_rate": 4.781312314108627e-06, + "loss": 0.1927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1485 + }, + { + "epoch": 0.14290522671539163, + "grad_norm": 4.3437840451167675, + "learning_rate": 4.781000198959714e-06, + "loss": 0.1919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1486 + }, + { + "epoch": 0.14300139443188922, + "grad_norm": 2.09196578729743, + "learning_rate": 4.780687871443329e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1487 + }, + { + "epoch": 0.1430975621483868, + "grad_norm": 1.6454752913164272, + "learning_rate": 4.78037533158855e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1488 + }, + { + "epoch": 0.14319372986488435, + "grad_norm": 6.198453858330595, + "learning_rate": 4.780062579424476e-06, + "loss": 0.1709, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1489 + }, + { + "epoch": 0.14328989758138194, + "grad_norm": 5.463470390208941, + "learning_rate": 4.779749614980225e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1490 + }, + { + "epoch": 0.1433860652978795, + "grad_norm": 4.669771646424334, + "learning_rate": 4.779436438284936e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1491 + }, + { + "epoch": 0.14348223301437707, + "grad_norm": 2.3543400345733625, + "learning_rate": 4.7791230493677645e-06, + "loss": 0.1629, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1492 + }, + { + "epoch": 0.14357840073087463, + "grad_norm": 4.178221410751321, + "learning_rate": 4.778809448257889e-06, + "loss": 0.1985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1493 + }, + { + "epoch": 0.14367456844737223, + "grad_norm": 6.843961513072696, + "learning_rate": 4.778495634984506e-06, + "loss": 0.1745, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1494 + }, + { + "epoch": 0.1437707361638698, + "grad_norm": 5.537724578385352, + "learning_rate": 4.778181609576832e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1495 + }, + { + "epoch": 0.14386690388036735, + "grad_norm": 5.331799514389587, + "learning_rate": 4.777867372064105e-06, + "loss": 0.1739, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1496 + }, + { + "epoch": 0.14396307159686494, + "grad_norm": 1.5573947154479957, + "learning_rate": 4.777552922475583e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1497 + }, + { + "epoch": 0.1440592393133625, + "grad_norm": 3.7336658509798712, + "learning_rate": 4.777238260840538e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1498 + }, + { + "epoch": 0.14415540702986007, + "grad_norm": 5.00930778322853, + "learning_rate": 4.776923387188268e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1499 + }, + { + "epoch": 0.14425157474635764, + "grad_norm": 2.0871946035606017, + "learning_rate": 4.7766083015480876e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1500 + }, + { + "epoch": 0.14434774246285523, + "grad_norm": 1.7663047333051936, + "learning_rate": 4.776293003949335e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1501 + }, + { + "epoch": 0.1444439101793528, + "grad_norm": 2.165181600738446, + "learning_rate": 4.775977494421362e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1502 + }, + { + "epoch": 0.14454007789585036, + "grad_norm": 4.802217644044827, + "learning_rate": 4.775661772993545e-06, + "loss": 0.2127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1503 + }, + { + "epoch": 0.14463624561234795, + "grad_norm": 3.525884953522755, + "learning_rate": 4.775345839695279e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1504 + }, + { + "epoch": 0.1447324133288455, + "grad_norm": 2.1470376366145403, + "learning_rate": 4.775029694555977e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1505 + }, + { + "epoch": 0.14482858104534307, + "grad_norm": 2.4557533847029966, + "learning_rate": 4.774713337605073e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1506 + }, + { + "epoch": 0.14492474876184064, + "grad_norm": 3.7935172676252256, + "learning_rate": 4.774396768872022e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1507 + }, + { + "epoch": 0.14502091647833823, + "grad_norm": 2.35718922802582, + "learning_rate": 4.7740799883862966e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1508 + }, + { + "epoch": 0.1451170841948358, + "grad_norm": 2.064704663728152, + "learning_rate": 4.77376299617739e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1509 + }, + { + "epoch": 0.14521325191133336, + "grad_norm": 5.602618039845969, + "learning_rate": 4.773445792274815e-06, + "loss": 0.1834, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1510 + }, + { + "epoch": 0.14530941962783095, + "grad_norm": 3.212382371370513, + "learning_rate": 4.773128376708104e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1511 + }, + { + "epoch": 0.1454055873443285, + "grad_norm": 1.7869060666818437, + "learning_rate": 4.77281074950681e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1512 + }, + { + "epoch": 0.14550175506082608, + "grad_norm": 2.3673431020439035, + "learning_rate": 4.772492910700503e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1513 + }, + { + "epoch": 0.14559792277732364, + "grad_norm": 3.751255529210783, + "learning_rate": 4.772174860318779e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1514 + }, + { + "epoch": 0.14569409049382123, + "grad_norm": 2.453019837539611, + "learning_rate": 4.771856598391245e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1515 + }, + { + "epoch": 0.1457902582103188, + "grad_norm": 2.6758970927743393, + "learning_rate": 4.771538124947533e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1516 + }, + { + "epoch": 0.14588642592681636, + "grad_norm": 2.2692296047418052, + "learning_rate": 4.771219440017295e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1517 + }, + { + "epoch": 0.14598259364331395, + "grad_norm": 1.9204650614630887, + "learning_rate": 4.7709005436302004e-06, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1518 + }, + { + "epoch": 0.14607876135981152, + "grad_norm": 1.8576103229262562, + "learning_rate": 4.770581435815941e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1519 + }, + { + "epoch": 0.14617492907630908, + "grad_norm": 1.8169668161765522, + "learning_rate": 4.770262116604224e-06, + "loss": 0.1616, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1520 + }, + { + "epoch": 0.14627109679280664, + "grad_norm": 1.8967165233886112, + "learning_rate": 4.769942586024782e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1521 + }, + { + "epoch": 0.14636726450930423, + "grad_norm": 2.231307372224021, + "learning_rate": 4.769622844107361e-06, + "loss": 0.1758, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1522 + }, + { + "epoch": 0.1464634322258018, + "grad_norm": 1.7040351345874047, + "learning_rate": 4.769302890881732e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1523 + }, + { + "epoch": 0.14655959994229936, + "grad_norm": 2.5471944002854063, + "learning_rate": 4.7689827263776825e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1524 + }, + { + "epoch": 0.14665576765879695, + "grad_norm": 2.126822177578371, + "learning_rate": 4.768662350625022e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1525 + }, + { + "epoch": 0.14675193537529452, + "grad_norm": 3.16356492414152, + "learning_rate": 4.768341763653577e-06, + "loss": 0.1632, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1526 + }, + { + "epoch": 0.14684810309179208, + "grad_norm": 1.7107658045648164, + "learning_rate": 4.7680209654931956e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1527 + }, + { + "epoch": 0.14694427080828965, + "grad_norm": 1.645973476813343, + "learning_rate": 4.767699956173745e-06, + "loss": 0.1665, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1528 + }, + { + "epoch": 0.14704043852478724, + "grad_norm": 2.7992712574599925, + "learning_rate": 4.767378735725111e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1529 + }, + { + "epoch": 0.1471366062412848, + "grad_norm": 2.3783534869567853, + "learning_rate": 4.767057304177202e-06, + "loss": 0.1832, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1530 + }, + { + "epoch": 0.14723277395778236, + "grad_norm": 3.79438881287844, + "learning_rate": 4.766735661559944e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1531 + }, + { + "epoch": 0.14732894167427996, + "grad_norm": 3.193003316038265, + "learning_rate": 4.766413807903281e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1532 + }, + { + "epoch": 0.14742510939077752, + "grad_norm": 10.852477845276347, + "learning_rate": 4.7660917432371804e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1533 + }, + { + "epoch": 0.14752127710727508, + "grad_norm": 1.7104936436827827, + "learning_rate": 4.765769467591626e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1534 + }, + { + "epoch": 0.14761744482377265, + "grad_norm": 2.309086528446379, + "learning_rate": 4.765446980996622e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1535 + }, + { + "epoch": 0.14771361254027024, + "grad_norm": 2.212913453734571, + "learning_rate": 4.765124283482195e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1536 + }, + { + "epoch": 0.1478097802567678, + "grad_norm": 2.1939414826528476, + "learning_rate": 4.764801375078389e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1537 + }, + { + "epoch": 0.14790594797326537, + "grad_norm": 2.4901485002800094, + "learning_rate": 4.764478255815264e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1538 + }, + { + "epoch": 0.14800211568976296, + "grad_norm": 3.1123923655529975, + "learning_rate": 4.764154925722908e-06, + "loss": 0.1874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1539 + }, + { + "epoch": 0.14809828340626052, + "grad_norm": 2.4513916991559688, + "learning_rate": 4.763831384831421e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1540 + }, + { + "epoch": 0.1481944511227581, + "grad_norm": 2.108978988859602, + "learning_rate": 4.7635076331709264e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1541 + }, + { + "epoch": 0.14829061883925565, + "grad_norm": 1.9947031510158073, + "learning_rate": 4.763183670771566e-06, + "loss": 0.1787, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1542 + }, + { + "epoch": 0.14838678655575324, + "grad_norm": 2.6777616256358843, + "learning_rate": 4.7628594976635035e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1543 + }, + { + "epoch": 0.1484829542722508, + "grad_norm": 2.0474673739355262, + "learning_rate": 4.7625351138769175e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1544 + }, + { + "epoch": 0.14857912198874837, + "grad_norm": 2.037154987330932, + "learning_rate": 4.76221051944201e-06, + "loss": 0.1713, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1545 + }, + { + "epoch": 0.14867528970524596, + "grad_norm": 2.1467306618930406, + "learning_rate": 4.761885714389003e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1546 + }, + { + "epoch": 0.14877145742174352, + "grad_norm": 4.115416205486556, + "learning_rate": 4.761560698748135e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1547 + }, + { + "epoch": 0.1488676251382411, + "grad_norm": 2.486720446313294, + "learning_rate": 4.761235472549667e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1548 + }, + { + "epoch": 0.14896379285473865, + "grad_norm": 2.6920145754820655, + "learning_rate": 4.760910035823877e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1549 + }, + { + "epoch": 0.14905996057123624, + "grad_norm": 3.05525653255896, + "learning_rate": 4.760584388601066e-06, + "loss": 0.1671, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1550 + }, + { + "epoch": 0.1491561282877338, + "grad_norm": 1.8154928288352226, + "learning_rate": 4.7602585309115516e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1551 + }, + { + "epoch": 0.14925229600423137, + "grad_norm": 2.3601973202183224, + "learning_rate": 4.759932462785672e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1552 + }, + { + "epoch": 0.14934846372072896, + "grad_norm": 1.850560364501208, + "learning_rate": 4.759606184253786e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1553 + }, + { + "epoch": 0.14944463143722653, + "grad_norm": 1.8182277637946682, + "learning_rate": 4.75927969534627e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1554 + }, + { + "epoch": 0.1495407991537241, + "grad_norm": 1.5550313855839186, + "learning_rate": 4.758952996093521e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1555 + }, + { + "epoch": 0.14963696687022165, + "grad_norm": 3.2894945340045303, + "learning_rate": 4.758626086525956e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1556 + }, + { + "epoch": 0.14973313458671925, + "grad_norm": 1.6838884367308664, + "learning_rate": 4.758298966674012e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1557 + }, + { + "epoch": 0.1498293023032168, + "grad_norm": 1.4119995609640088, + "learning_rate": 4.757971636568143e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1558 + }, + { + "epoch": 0.14992547001971437, + "grad_norm": 2.636019553775688, + "learning_rate": 4.757644096238825e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1559 + }, + { + "epoch": 0.15002163773621197, + "grad_norm": 3.187297253526712, + "learning_rate": 4.757316345716554e-06, + "loss": 0.2023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1560 + }, + { + "epoch": 0.15011780545270953, + "grad_norm": 3.526872114335604, + "learning_rate": 4.7569883850318426e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1561 + }, + { + "epoch": 0.1502139731692071, + "grad_norm": 5.133089662756706, + "learning_rate": 4.7566602142152264e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1562 + }, + { + "epoch": 0.15031014088570466, + "grad_norm": 4.750716846336702, + "learning_rate": 4.756331833297259e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1563 + }, + { + "epoch": 0.15040630860220225, + "grad_norm": 2.163260810695798, + "learning_rate": 4.756003242308513e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1564 + }, + { + "epoch": 0.1505024763186998, + "grad_norm": 2.758536825657497, + "learning_rate": 4.755674441279581e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1565 + }, + { + "epoch": 0.15059864403519738, + "grad_norm": 2.2860134971994723, + "learning_rate": 4.755345430241075e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1566 + }, + { + "epoch": 0.15069481175169497, + "grad_norm": 1.6849524203308879, + "learning_rate": 4.755016209223629e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1567 + }, + { + "epoch": 0.15079097946819253, + "grad_norm": 2.927661146464179, + "learning_rate": 4.754686778257891e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1568 + }, + { + "epoch": 0.1508871471846901, + "grad_norm": 2.469498602068315, + "learning_rate": 4.754357137374535e-06, + "loss": 0.1968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1569 + }, + { + "epoch": 0.15098331490118766, + "grad_norm": 4.0875965529263905, + "learning_rate": 4.754027286604249e-06, + "loss": 0.1813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1570 + }, + { + "epoch": 0.15107948261768525, + "grad_norm": 2.57449314375476, + "learning_rate": 4.753697225977745e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1571 + }, + { + "epoch": 0.15117565033418282, + "grad_norm": 1.6119514057670568, + "learning_rate": 4.753366955525751e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1572 + }, + { + "epoch": 0.15127181805068038, + "grad_norm": 2.9269297501069165, + "learning_rate": 4.753036475279018e-06, + "loss": 0.1795, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1573 + }, + { + "epoch": 0.15136798576717797, + "grad_norm": 5.412457888326617, + "learning_rate": 4.7527057852683125e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1574 + }, + { + "epoch": 0.15146415348367553, + "grad_norm": 2.210668322992871, + "learning_rate": 4.752374885524424e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1575 + }, + { + "epoch": 0.1515603212001731, + "grad_norm": 1.7923377527890578, + "learning_rate": 4.75204377607816e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1576 + }, + { + "epoch": 0.15165648891667066, + "grad_norm": 3.613891291924149, + "learning_rate": 4.751712456960348e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1577 + }, + { + "epoch": 0.15175265663316825, + "grad_norm": 3.0155923359091354, + "learning_rate": 4.751380928201834e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1578 + }, + { + "epoch": 0.15184882434966582, + "grad_norm": 2.0864121314495967, + "learning_rate": 4.751049189833484e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1579 + }, + { + "epoch": 0.15194499206616338, + "grad_norm": 2.37638701053874, + "learning_rate": 4.750717241886186e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1580 + }, + { + "epoch": 0.15204115978266097, + "grad_norm": 2.1944392925877385, + "learning_rate": 4.750385084390841e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1581 + }, + { + "epoch": 0.15213732749915854, + "grad_norm": 2.5247184353756555, + "learning_rate": 4.750052717378378e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.24, + "memory/max_mem_allocated(gib)": 36.24, + "step": 1582 + }, + { + "epoch": 0.1522334952156561, + "grad_norm": 2.1856557116576294, + "learning_rate": 4.74972014087974e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1583 + }, + { + "epoch": 0.15232966293215366, + "grad_norm": 1.9186322011585946, + "learning_rate": 4.74938735492589e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1584 + }, + { + "epoch": 0.15242583064865126, + "grad_norm": 2.5874876510522222, + "learning_rate": 4.749054359547812e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1585 + }, + { + "epoch": 0.15252199836514882, + "grad_norm": 2.1171171259897954, + "learning_rate": 4.748721154776508e-06, + "loss": 0.1642, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1586 + }, + { + "epoch": 0.15261816608164638, + "grad_norm": 2.5809925064584944, + "learning_rate": 4.7483877406430015e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1587 + }, + { + "epoch": 0.15271433379814398, + "grad_norm": 2.426169559511099, + "learning_rate": 4.748054117178333e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1588 + }, + { + "epoch": 0.15281050151464154, + "grad_norm": 1.6989633216191262, + "learning_rate": 4.747720284413565e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1589 + }, + { + "epoch": 0.1529066692311391, + "grad_norm": 1.888711083630733, + "learning_rate": 4.747386242379778e-06, + "loss": 0.1738, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1590 + }, + { + "epoch": 0.15300283694763667, + "grad_norm": 1.9369924608216103, + "learning_rate": 4.747051991108071e-06, + "loss": 0.1662, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1591 + }, + { + "epoch": 0.15309900466413426, + "grad_norm": 1.5651201395856122, + "learning_rate": 4.746717530629565e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1592 + }, + { + "epoch": 0.15319517238063182, + "grad_norm": 3.3899171331409126, + "learning_rate": 4.746382860975399e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1593 + }, + { + "epoch": 0.15329134009712939, + "grad_norm": 1.9195003234781833, + "learning_rate": 4.746047982176732e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1594 + }, + { + "epoch": 0.15338750781362698, + "grad_norm": 3.250369626011323, + "learning_rate": 4.7457128942647415e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1595 + }, + { + "epoch": 0.15348367553012454, + "grad_norm": 3.5331773468143757, + "learning_rate": 4.745377597270625e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1596 + }, + { + "epoch": 0.1535798432466221, + "grad_norm": 3.982066479969497, + "learning_rate": 4.745042091225599e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1597 + }, + { + "epoch": 0.15367601096311967, + "grad_norm": 4.1505897466786905, + "learning_rate": 4.744706376160902e-06, + "loss": 0.1668, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1598 + }, + { + "epoch": 0.15377217867961726, + "grad_norm": 1.6610050090493902, + "learning_rate": 4.744370452107789e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1599 + }, + { + "epoch": 0.15386834639611482, + "grad_norm": 2.229175631591748, + "learning_rate": 4.744034319097536e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1600 + }, + { + "epoch": 0.1539645141126124, + "grad_norm": 2.279816105734618, + "learning_rate": 4.743697977161436e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1601 + }, + { + "epoch": 0.15406068182910998, + "grad_norm": 2.24493517987255, + "learning_rate": 4.743361426330805e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1602 + }, + { + "epoch": 0.15415684954560754, + "grad_norm": 1.8629389300119237, + "learning_rate": 4.743024666636977e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1603 + }, + { + "epoch": 0.1542530172621051, + "grad_norm": 1.7350371024316185, + "learning_rate": 4.742687698111305e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1604 + }, + { + "epoch": 0.15434918497860267, + "grad_norm": 4.318647467858579, + "learning_rate": 4.74235052078516e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1605 + }, + { + "epoch": 0.15444535269510026, + "grad_norm": 2.7366718202092732, + "learning_rate": 4.742013134689937e-06, + "loss": 0.1637, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1606 + }, + { + "epoch": 0.15454152041159783, + "grad_norm": 3.4940994595507506, + "learning_rate": 4.741675539857047e-06, + "loss": 0.1771, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1607 + }, + { + "epoch": 0.1546376881280954, + "grad_norm": 2.4591259841511914, + "learning_rate": 4.741337736317919e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1608 + }, + { + "epoch": 0.15473385584459298, + "grad_norm": 1.702289142190429, + "learning_rate": 4.740999724104004e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1609 + }, + { + "epoch": 0.15483002356109055, + "grad_norm": 3.6100115550127687, + "learning_rate": 4.740661503246774e-06, + "loss": 0.1654, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1610 + }, + { + "epoch": 0.1549261912775881, + "grad_norm": 1.6344689186599843, + "learning_rate": 4.740323073777716e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1611 + }, + { + "epoch": 0.15502235899408567, + "grad_norm": 1.5375261979202617, + "learning_rate": 4.73998443572834e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1612 + }, + { + "epoch": 0.15511852671058327, + "grad_norm": 9.082616857281526, + "learning_rate": 4.739645589130174e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1613 + }, + { + "epoch": 0.15521469442708083, + "grad_norm": 3.966615844520262, + "learning_rate": 4.7393065340147645e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1614 + }, + { + "epoch": 0.1553108621435784, + "grad_norm": 2.1029321919797503, + "learning_rate": 4.73896727041368e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1615 + }, + { + "epoch": 0.15540702986007598, + "grad_norm": 1.8059359062064102, + "learning_rate": 4.738627798358506e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1616 + }, + { + "epoch": 0.15550319757657355, + "grad_norm": 1.9448463858420133, + "learning_rate": 4.738288117880849e-06, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1617 + }, + { + "epoch": 0.1555993652930711, + "grad_norm": 3.3915019319621376, + "learning_rate": 4.737948229012332e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1618 + }, + { + "epoch": 0.15569553300956868, + "grad_norm": 3.359614058934946, + "learning_rate": 4.737608131784603e-06, + "loss": 0.1871, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1619 + }, + { + "epoch": 0.15579170072606627, + "grad_norm": 1.8551501662695853, + "learning_rate": 4.7372678262293235e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1620 + }, + { + "epoch": 0.15588786844256383, + "grad_norm": 1.9288187343981646, + "learning_rate": 4.736927312378178e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1621 + }, + { + "epoch": 0.1559840361590614, + "grad_norm": 1.8361894906104155, + "learning_rate": 4.736586590262869e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1622 + }, + { + "epoch": 0.156080203875559, + "grad_norm": 1.5649653843193472, + "learning_rate": 4.7362456599151185e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1623 + }, + { + "epoch": 0.15617637159205655, + "grad_norm": 1.453650369150626, + "learning_rate": 4.7359045213666675e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1624 + }, + { + "epoch": 0.15627253930855411, + "grad_norm": 2.6940407080506046, + "learning_rate": 4.735563174649278e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1625 + }, + { + "epoch": 0.15636870702505168, + "grad_norm": 1.8429908336077616, + "learning_rate": 4.735221619794731e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1626 + }, + { + "epoch": 0.15646487474154927, + "grad_norm": 4.479013603189904, + "learning_rate": 4.734879856834825e-06, + "loss": 0.1771, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1627 + }, + { + "epoch": 0.15656104245804683, + "grad_norm": 1.7824160119556802, + "learning_rate": 4.734537885801378e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1628 + }, + { + "epoch": 0.1566572101745444, + "grad_norm": 1.8460984995552827, + "learning_rate": 4.73419570672623e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1629 + }, + { + "epoch": 0.156753377891042, + "grad_norm": 2.170984088085211, + "learning_rate": 4.73385331964124e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1630 + }, + { + "epoch": 0.15684954560753955, + "grad_norm": 2.9573938526820913, + "learning_rate": 4.733510724578282e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1631 + }, + { + "epoch": 0.15694571332403712, + "grad_norm": 1.7911249211168825, + "learning_rate": 4.733167921569255e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1632 + }, + { + "epoch": 0.15704188104053468, + "grad_norm": 2.2966279584160936, + "learning_rate": 4.732824910646074e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1633 + }, + { + "epoch": 0.15713804875703227, + "grad_norm": 2.0180060341470334, + "learning_rate": 4.732481691840675e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1634 + }, + { + "epoch": 0.15723421647352984, + "grad_norm": 1.7141143023679446, + "learning_rate": 4.732138265185011e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1635 + }, + { + "epoch": 0.1573303841900274, + "grad_norm": 4.224816423479289, + "learning_rate": 4.731794630711058e-06, + "loss": 0.1696, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1636 + }, + { + "epoch": 0.157426551906525, + "grad_norm": 1.6373741509595774, + "learning_rate": 4.731450788450809e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1637 + }, + { + "epoch": 0.15752271962302256, + "grad_norm": 4.035845720746798, + "learning_rate": 4.731106738436275e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1638 + }, + { + "epoch": 0.15761888733952012, + "grad_norm": 3.2780410090279664, + "learning_rate": 4.73076248069949e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1639 + }, + { + "epoch": 0.15771505505601768, + "grad_norm": 4.345487199424142, + "learning_rate": 4.7304180152725035e-06, + "loss": 0.1722, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1640 + }, + { + "epoch": 0.15781122277251527, + "grad_norm": 3.9416952123605555, + "learning_rate": 4.730073342187388e-06, + "loss": 0.1868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1641 + }, + { + "epoch": 0.15790739048901284, + "grad_norm": 3.0657736326499054, + "learning_rate": 4.729728461476232e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1642 + }, + { + "epoch": 0.1580035582055104, + "grad_norm": 2.3083561323539965, + "learning_rate": 4.729383373171146e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1643 + }, + { + "epoch": 0.158099725922008, + "grad_norm": 3.32845600487202, + "learning_rate": 4.7290380773042575e-06, + "loss": 0.1902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1644 + }, + { + "epoch": 0.15819589363850556, + "grad_norm": 3.98632280589456, + "learning_rate": 4.7286925739077154e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1645 + }, + { + "epoch": 0.15829206135500312, + "grad_norm": 1.37750213026959, + "learning_rate": 4.728346863013688e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1646 + }, + { + "epoch": 0.15838822907150069, + "grad_norm": 4.457482602688131, + "learning_rate": 4.7280009446543595e-06, + "loss": 0.1957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1647 + }, + { + "epoch": 0.15848439678799828, + "grad_norm": 5.076826365509181, + "learning_rate": 4.727654818861937e-06, + "loss": 0.1835, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1648 + }, + { + "epoch": 0.15858056450449584, + "grad_norm": 2.128296997149509, + "learning_rate": 4.727308485668647e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1649 + }, + { + "epoch": 0.1586767322209934, + "grad_norm": 1.8471122756521934, + "learning_rate": 4.726961945106732e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1650 + }, + { + "epoch": 0.158772899937491, + "grad_norm": 2.4841826209125943, + "learning_rate": 4.726615197208457e-06, + "loss": 0.1671, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1651 + }, + { + "epoch": 0.15886906765398856, + "grad_norm": 1.9731687472817874, + "learning_rate": 4.726268242006106e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1652 + }, + { + "epoch": 0.15896523537048612, + "grad_norm": 3.367358639089522, + "learning_rate": 4.72592107953198e-06, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1653 + }, + { + "epoch": 0.1590614030869837, + "grad_norm": 2.5754202188157405, + "learning_rate": 4.725573709818402e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1654 + }, + { + "epoch": 0.15915757080348128, + "grad_norm": 1.853055143677654, + "learning_rate": 4.725226132897711e-06, + "loss": 0.1518, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1655 + }, + { + "epoch": 0.15925373851997884, + "grad_norm": 2.7861452626754173, + "learning_rate": 4.724878348802271e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1656 + }, + { + "epoch": 0.1593499062364764, + "grad_norm": 1.5101510695443439, + "learning_rate": 4.724530357564458e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1657 + }, + { + "epoch": 0.159446073952974, + "grad_norm": 1.6877966910037523, + "learning_rate": 4.7241821592166725e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1658 + }, + { + "epoch": 0.15954224166947156, + "grad_norm": 3.355479444496203, + "learning_rate": 4.723833753791333e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1659 + }, + { + "epoch": 0.15963840938596913, + "grad_norm": 2.0885838755530535, + "learning_rate": 4.723485141320877e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1660 + }, + { + "epoch": 0.1597345771024667, + "grad_norm": 3.2377529975097685, + "learning_rate": 4.723136321837761e-06, + "loss": 0.1863, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1661 + }, + { + "epoch": 0.15983074481896428, + "grad_norm": 2.223238618041825, + "learning_rate": 4.722787295374461e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1662 + }, + { + "epoch": 0.15992691253546185, + "grad_norm": 2.203492618171948, + "learning_rate": 4.722438061963471e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1663 + }, + { + "epoch": 0.1600230802519594, + "grad_norm": 2.241314084151752, + "learning_rate": 4.7220886216373095e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1664 + }, + { + "epoch": 0.160119247968457, + "grad_norm": 3.196204701673622, + "learning_rate": 4.7217389744285065e-06, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1665 + }, + { + "epoch": 0.16021541568495457, + "grad_norm": 3.551974665073654, + "learning_rate": 4.721389120369617e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1666 + }, + { + "epoch": 0.16031158340145213, + "grad_norm": 2.1402450290133204, + "learning_rate": 4.721039059493212e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1667 + }, + { + "epoch": 0.1604077511179497, + "grad_norm": 1.533365421068111, + "learning_rate": 4.720688791831885e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1668 + }, + { + "epoch": 0.16050391883444728, + "grad_norm": 1.4759019847160688, + "learning_rate": 4.720338317418246e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1669 + }, + { + "epoch": 0.16060008655094485, + "grad_norm": 1.5811417997422086, + "learning_rate": 4.719987636284924e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1670 + }, + { + "epoch": 0.1606962542674424, + "grad_norm": 2.7369295270527036, + "learning_rate": 4.71963674846457e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1671 + }, + { + "epoch": 0.16079242198394, + "grad_norm": 2.2808586157478907, + "learning_rate": 4.719285653989852e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1672 + }, + { + "epoch": 0.16088858970043757, + "grad_norm": 1.674782219026824, + "learning_rate": 4.718934352893459e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1673 + }, + { + "epoch": 0.16098475741693513, + "grad_norm": 1.6727391683412967, + "learning_rate": 4.718582845208096e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1674 + }, + { + "epoch": 0.1610809251334327, + "grad_norm": 2.618120364906876, + "learning_rate": 4.718231130966491e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1675 + }, + { + "epoch": 0.1611770928499303, + "grad_norm": 1.9552969309736254, + "learning_rate": 4.71787921020139e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1676 + }, + { + "epoch": 0.16127326056642785, + "grad_norm": 3.129936717336938, + "learning_rate": 4.717527082945555e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1677 + }, + { + "epoch": 0.16136942828292541, + "grad_norm": 2.607156145069165, + "learning_rate": 4.717174749231772e-06, + "loss": 0.1958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1678 + }, + { + "epoch": 0.161465595999423, + "grad_norm": 5.954754989762589, + "learning_rate": 4.716822209092845e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1679 + }, + { + "epoch": 0.16156176371592057, + "grad_norm": 4.988995031411342, + "learning_rate": 4.716469462561595e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1680 + }, + { + "epoch": 0.16165793143241813, + "grad_norm": 1.8823594298373665, + "learning_rate": 4.716116509670865e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1681 + }, + { + "epoch": 0.1617540991489157, + "grad_norm": 1.7436389855397638, + "learning_rate": 4.715763350453514e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1682 + }, + { + "epoch": 0.1618502668654133, + "grad_norm": 4.370959725984151, + "learning_rate": 4.715409984942424e-06, + "loss": 0.1899, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1683 + }, + { + "epoch": 0.16194643458191085, + "grad_norm": 2.519900113211129, + "learning_rate": 4.7150564131704935e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1684 + }, + { + "epoch": 0.16204260229840842, + "grad_norm": 1.8724297865518857, + "learning_rate": 4.714702635170641e-06, + "loss": 0.1824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1685 + }, + { + "epoch": 0.162138770014906, + "grad_norm": 2.766693988207766, + "learning_rate": 4.714348650975804e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1686 + }, + { + "epoch": 0.16223493773140357, + "grad_norm": 3.0291241102754536, + "learning_rate": 4.7139944606189404e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 36.43, + "memory/max_mem_allocated(gib)": 36.43, + "step": 1687 + }, + { + "epoch": 0.16233110544790114, + "grad_norm": 1.992025378527675, + "learning_rate": 4.7136400641330245e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1688 + }, + { + "epoch": 0.1624272731643987, + "grad_norm": 2.0292282613016193, + "learning_rate": 4.713285461551054e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1689 + }, + { + "epoch": 0.1625234408808963, + "grad_norm": 1.5998058736745184, + "learning_rate": 4.7129306529060415e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1690 + }, + { + "epoch": 0.16261960859739386, + "grad_norm": 2.044915757371715, + "learning_rate": 4.712575638231022e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1691 + }, + { + "epoch": 0.16271577631389142, + "grad_norm": 2.9741124556456056, + "learning_rate": 4.712220417559047e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1692 + }, + { + "epoch": 0.162811944030389, + "grad_norm": 1.8768486449908288, + "learning_rate": 4.711864990923189e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1693 + }, + { + "epoch": 0.16290811174688657, + "grad_norm": 1.7014135612959262, + "learning_rate": 4.71150935835654e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1694 + }, + { + "epoch": 0.16300427946338414, + "grad_norm": 1.771430564133045, + "learning_rate": 4.711153519892209e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1695 + }, + { + "epoch": 0.1631004471798817, + "grad_norm": 2.254434516187504, + "learning_rate": 4.710797475563327e-06, + "loss": 0.1729, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1696 + }, + { + "epoch": 0.1631966148963793, + "grad_norm": 1.7606439214070424, + "learning_rate": 4.710441225403042e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1697 + }, + { + "epoch": 0.16329278261287686, + "grad_norm": 1.7384308843979286, + "learning_rate": 4.710084769444521e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1698 + }, + { + "epoch": 0.16338895032937442, + "grad_norm": 4.292430013614909, + "learning_rate": 4.7097281077209525e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1699 + }, + { + "epoch": 0.163485118045872, + "grad_norm": 2.8661842378642204, + "learning_rate": 4.709371240265543e-06, + "loss": 0.1825, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1700 + }, + { + "epoch": 0.16358128576236958, + "grad_norm": 1.852891925164844, + "learning_rate": 4.7090141671115165e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1701 + }, + { + "epoch": 0.16367745347886714, + "grad_norm": 3.423518852829672, + "learning_rate": 4.708656888292118e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1702 + }, + { + "epoch": 0.1637736211953647, + "grad_norm": 4.716190370513745, + "learning_rate": 4.708299403840611e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1703 + }, + { + "epoch": 0.1638697889118623, + "grad_norm": 2.987505686073353, + "learning_rate": 4.707941713790279e-06, + "loss": 0.1854, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1704 + }, + { + "epoch": 0.16396595662835986, + "grad_norm": 2.9151972015974055, + "learning_rate": 4.707583818174422e-06, + "loss": 0.196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1705 + }, + { + "epoch": 0.16406212434485742, + "grad_norm": 3.871925144720012, + "learning_rate": 4.7072257170263635e-06, + "loss": 0.1701, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1706 + }, + { + "epoch": 0.16415829206135502, + "grad_norm": 2.5317019444348063, + "learning_rate": 4.706867410379443e-06, + "loss": 0.1771, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1707 + }, + { + "epoch": 0.16425445977785258, + "grad_norm": 1.8399595361076297, + "learning_rate": 4.706508898267019e-06, + "loss": 0.1743, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1708 + }, + { + "epoch": 0.16435062749435014, + "grad_norm": 4.138501421689619, + "learning_rate": 4.706150180722471e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1709 + }, + { + "epoch": 0.1644467952108477, + "grad_norm": 2.7378549900686266, + "learning_rate": 4.705791257779196e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1710 + }, + { + "epoch": 0.1645429629273453, + "grad_norm": 3.8567592952347405, + "learning_rate": 4.7054321294706105e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1711 + }, + { + "epoch": 0.16463913064384286, + "grad_norm": 1.7190243822665492, + "learning_rate": 4.7050727958301505e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1712 + }, + { + "epoch": 0.16473529836034043, + "grad_norm": 4.328699354610528, + "learning_rate": 4.704713256891272e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1713 + }, + { + "epoch": 0.16483146607683802, + "grad_norm": 3.715616567586296, + "learning_rate": 4.704353512687447e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1714 + }, + { + "epoch": 0.16492763379333558, + "grad_norm": 1.9569762609563133, + "learning_rate": 4.703993563252172e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1715 + }, + { + "epoch": 0.16502380150983315, + "grad_norm": 4.332246186521662, + "learning_rate": 4.703633408618955e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1716 + }, + { + "epoch": 0.1651199692263307, + "grad_norm": 4.330846046423564, + "learning_rate": 4.7032730488213306e-06, + "loss": 0.1864, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1717 + }, + { + "epoch": 0.1652161369428283, + "grad_norm": 5.3080406847794785, + "learning_rate": 4.702912483892849e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1718 + }, + { + "epoch": 0.16531230465932586, + "grad_norm": 3.5145811985619426, + "learning_rate": 4.702551713867078e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1719 + }, + { + "epoch": 0.16540847237582343, + "grad_norm": 4.498908574381529, + "learning_rate": 4.702190738777608e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1720 + }, + { + "epoch": 0.16550464009232102, + "grad_norm": 4.512332225185438, + "learning_rate": 4.701829558658047e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1721 + }, + { + "epoch": 0.16560080780881858, + "grad_norm": 4.274344632503572, + "learning_rate": 4.70146817354202e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1722 + }, + { + "epoch": 0.16569697552531615, + "grad_norm": 3.1963124297919094, + "learning_rate": 4.701106583463174e-06, + "loss": 0.1679, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1723 + }, + { + "epoch": 0.1657931432418137, + "grad_norm": 3.712455378073907, + "learning_rate": 4.700744788455175e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1724 + }, + { + "epoch": 0.1658893109583113, + "grad_norm": 3.7258894687298074, + "learning_rate": 4.700382788551705e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1725 + }, + { + "epoch": 0.16598547867480887, + "grad_norm": 8.736273911065561, + "learning_rate": 4.700020583786469e-06, + "loss": 0.1869, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1726 + }, + { + "epoch": 0.16608164639130643, + "grad_norm": 5.486525867975805, + "learning_rate": 4.699658174193189e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1727 + }, + { + "epoch": 0.16617781410780402, + "grad_norm": 2.798793722885619, + "learning_rate": 4.699295559805606e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1728 + }, + { + "epoch": 0.1662739818243016, + "grad_norm": 3.7652589687843356, + "learning_rate": 4.6989327406574795e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1729 + }, + { + "epoch": 0.16637014954079915, + "grad_norm": 5.531021840919187, + "learning_rate": 4.698569716782591e-06, + "loss": 0.1981, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1730 + }, + { + "epoch": 0.16646631725729671, + "grad_norm": 5.171613730889411, + "learning_rate": 4.698206488214737e-06, + "loss": 0.1647, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1731 + }, + { + "epoch": 0.1665624849737943, + "grad_norm": 2.726225288694653, + "learning_rate": 4.697843054987738e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1732 + }, + { + "epoch": 0.16665865269029187, + "grad_norm": 1.8377932836518562, + "learning_rate": 4.697479417135427e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1733 + }, + { + "epoch": 0.16675482040678943, + "grad_norm": 6.300062799294015, + "learning_rate": 4.697115574691661e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1734 + }, + { + "epoch": 0.16685098812328703, + "grad_norm": 5.449042075984239, + "learning_rate": 4.6967515276903154e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1735 + }, + { + "epoch": 0.1669471558397846, + "grad_norm": 2.4166907714424544, + "learning_rate": 4.696387276165284e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1736 + }, + { + "epoch": 0.16704332355628215, + "grad_norm": 1.9856223564769941, + "learning_rate": 4.6960228201504794e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1737 + }, + { + "epoch": 0.16713949127277972, + "grad_norm": 2.215679711352222, + "learning_rate": 4.695658159679833e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1738 + }, + { + "epoch": 0.1672356589892773, + "grad_norm": 4.875339286217297, + "learning_rate": 4.695293294787298e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1739 + }, + { + "epoch": 0.16733182670577487, + "grad_norm": 3.3204091366511923, + "learning_rate": 4.69492822550684e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1740 + }, + { + "epoch": 0.16742799442227244, + "grad_norm": 3.2542158521733793, + "learning_rate": 4.6945629518724514e-06, + "loss": 0.172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1741 + }, + { + "epoch": 0.16752416213877003, + "grad_norm": 3.07389253113286, + "learning_rate": 4.694197473918139e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1742 + }, + { + "epoch": 0.1676203298552676, + "grad_norm": 3.797269728458725, + "learning_rate": 4.693831791677931e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1743 + }, + { + "epoch": 0.16771649757176516, + "grad_norm": 4.028548216864453, + "learning_rate": 4.693465905185871e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1744 + }, + { + "epoch": 0.16781266528826272, + "grad_norm": 3.3115545697780377, + "learning_rate": 4.693099814476026e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1745 + }, + { + "epoch": 0.1679088330047603, + "grad_norm": 2.543847987420865, + "learning_rate": 4.692733519582481e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1746 + }, + { + "epoch": 0.16800500072125787, + "grad_norm": 4.6032785206688684, + "learning_rate": 4.692367020539336e-06, + "loss": 0.1984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1747 + }, + { + "epoch": 0.16810116843775544, + "grad_norm": 2.469252165230505, + "learning_rate": 4.6920003173807145e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1748 + }, + { + "epoch": 0.16819733615425303, + "grad_norm": 3.2373682103671713, + "learning_rate": 4.691633410140759e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1749 + }, + { + "epoch": 0.1682935038707506, + "grad_norm": 1.9651228546243478, + "learning_rate": 4.691266298853629e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1750 + }, + { + "epoch": 0.16838967158724816, + "grad_norm": 4.740474178253229, + "learning_rate": 4.690898983553501e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1751 + }, + { + "epoch": 0.16848583930374572, + "grad_norm": 5.143805072319282, + "learning_rate": 4.690531464274577e-06, + "loss": 0.1739, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1752 + }, + { + "epoch": 0.1685820070202433, + "grad_norm": 5.4352219609363805, + "learning_rate": 4.69016374105107e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1753 + }, + { + "epoch": 0.16867817473674088, + "grad_norm": 2.5536378686444627, + "learning_rate": 4.68979581391722e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1754 + }, + { + "epoch": 0.16877434245323844, + "grad_norm": 2.9873580718741035, + "learning_rate": 4.68942768290728e-06, + "loss": 0.1825, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1755 + }, + { + "epoch": 0.16887051016973603, + "grad_norm": 2.5999864817335205, + "learning_rate": 4.689059348055524e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1756 + }, + { + "epoch": 0.1689666778862336, + "grad_norm": 3.64964219119091, + "learning_rate": 4.688690809396244e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1757 + }, + { + "epoch": 0.16906284560273116, + "grad_norm": 2.592635291191585, + "learning_rate": 4.688322066963756e-06, + "loss": 0.1616, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1758 + }, + { + "epoch": 0.16915901331922872, + "grad_norm": 1.698767162128104, + "learning_rate": 4.687953120792386e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1759 + }, + { + "epoch": 0.16925518103572632, + "grad_norm": 2.228543480191015, + "learning_rate": 4.687583970916487e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1760 + }, + { + "epoch": 0.16935134875222388, + "grad_norm": 2.9773032887290607, + "learning_rate": 4.687214617370426e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1761 + }, + { + "epoch": 0.16944751646872144, + "grad_norm": 1.4915223230994397, + "learning_rate": 4.686845060188593e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1762 + }, + { + "epoch": 0.16954368418521903, + "grad_norm": 3.4353313972140374, + "learning_rate": 4.686475299405393e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1763 + }, + { + "epoch": 0.1696398519017166, + "grad_norm": 3.6984721988178637, + "learning_rate": 4.686105335055252e-06, + "loss": 0.1735, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1764 + }, + { + "epoch": 0.16973601961821416, + "grad_norm": 3.0717451885733325, + "learning_rate": 4.685735167172616e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1765 + }, + { + "epoch": 0.16983218733471173, + "grad_norm": 3.4912283897079037, + "learning_rate": 4.685364795791947e-06, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1766 + }, + { + "epoch": 0.16992835505120932, + "grad_norm": 4.756073315666731, + "learning_rate": 4.68499422094773e-06, + "loss": 0.189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1767 + }, + { + "epoch": 0.17002452276770688, + "grad_norm": 2.9268584211299, + "learning_rate": 4.684623442674463e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1768 + }, + { + "epoch": 0.17012069048420445, + "grad_norm": 1.5062432140964122, + "learning_rate": 4.684252461006668e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1769 + }, + { + "epoch": 0.17021685820070204, + "grad_norm": 2.9269702156712425, + "learning_rate": 4.6838812759788864e-06, + "loss": 0.1791, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1770 + }, + { + "epoch": 0.1703130259171996, + "grad_norm": 3.63830368901965, + "learning_rate": 4.6835098876256745e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1771 + }, + { + "epoch": 0.17040919363369716, + "grad_norm": 2.0638744543789334, + "learning_rate": 4.683138295981611e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1772 + }, + { + "epoch": 0.17050536135019473, + "grad_norm": 1.9354583730827055, + "learning_rate": 4.6827665010812895e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1773 + }, + { + "epoch": 0.17060152906669232, + "grad_norm": 3.222970261435365, + "learning_rate": 4.6823945029593275e-06, + "loss": 0.1645, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1774 + }, + { + "epoch": 0.17069769678318988, + "grad_norm": 2.31064427399832, + "learning_rate": 4.682022301650359e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1775 + }, + { + "epoch": 0.17079386449968745, + "grad_norm": 1.6069274663149442, + "learning_rate": 4.681649897189036e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1776 + }, + { + "epoch": 0.17089003221618504, + "grad_norm": 1.818308968461769, + "learning_rate": 4.681277289610031e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1777 + }, + { + "epoch": 0.1709861999326826, + "grad_norm": 2.0073535262644207, + "learning_rate": 4.680904478948034e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1778 + }, + { + "epoch": 0.17108236764918017, + "grad_norm": 2.0384705283051687, + "learning_rate": 4.680531465237755e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1779 + }, + { + "epoch": 0.17117853536567773, + "grad_norm": 2.820375194262684, + "learning_rate": 4.680158248513924e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1780 + }, + { + "epoch": 0.17127470308217532, + "grad_norm": 2.2045013650049254, + "learning_rate": 4.6797848288112866e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1781 + }, + { + "epoch": 0.1713708707986729, + "grad_norm": 1.743479416849207, + "learning_rate": 4.679411206164611e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1782 + }, + { + "epoch": 0.17146703851517045, + "grad_norm": 3.618282985188849, + "learning_rate": 4.6790373806086805e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1783 + }, + { + "epoch": 0.17156320623166804, + "grad_norm": 3.0846367580003498, + "learning_rate": 4.678663352178301e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1784 + }, + { + "epoch": 0.1716593739481656, + "grad_norm": 1.908123783273653, + "learning_rate": 4.678289120908295e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1785 + }, + { + "epoch": 0.17175554166466317, + "grad_norm": 3.225852698695338, + "learning_rate": 4.677914686833504e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1786 + }, + { + "epoch": 0.17185170938116073, + "grad_norm": 4.410765445505864, + "learning_rate": 4.6775400499887894e-06, + "loss": 0.1929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1787 + }, + { + "epoch": 0.17194787709765832, + "grad_norm": 1.4672326266621163, + "learning_rate": 4.67716521040903e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1788 + }, + { + "epoch": 0.1720440448141559, + "grad_norm": 1.7590896211449454, + "learning_rate": 4.6767901681291265e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1789 + }, + { + "epoch": 0.17214021253065345, + "grad_norm": 3.447122340124772, + "learning_rate": 4.676414923183995e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1790 + }, + { + "epoch": 0.17223638024715104, + "grad_norm": 4.267019154792011, + "learning_rate": 4.676039475608571e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1791 + }, + { + "epoch": 0.1723325479636486, + "grad_norm": 3.6598285212375785, + "learning_rate": 4.675663825437811e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1792 + }, + { + "epoch": 0.17242871568014617, + "grad_norm": 1.3906073006690107, + "learning_rate": 4.675287972706689e-06, + "loss": 0.1413, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1793 + }, + { + "epoch": 0.17252488339664374, + "grad_norm": 1.9364616596253301, + "learning_rate": 4.674911917450198e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1794 + }, + { + "epoch": 0.17262105111314133, + "grad_norm": 4.077118121126141, + "learning_rate": 4.674535659703349e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1795 + }, + { + "epoch": 0.1727172188296389, + "grad_norm": 3.869009537807247, + "learning_rate": 4.674159199501173e-06, + "loss": 0.1779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1796 + }, + { + "epoch": 0.17281338654613645, + "grad_norm": 2.4269611979764925, + "learning_rate": 4.67378253687872e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1797 + }, + { + "epoch": 0.17290955426263405, + "grad_norm": 2.178690350420728, + "learning_rate": 4.673405671871057e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1798 + }, + { + "epoch": 0.1730057219791316, + "grad_norm": 2.9904097710949276, + "learning_rate": 4.673028604513273e-06, + "loss": 0.185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1799 + }, + { + "epoch": 0.17310188969562917, + "grad_norm": 4.7213820379454905, + "learning_rate": 4.6726513348404736e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1800 + }, + { + "epoch": 0.17319805741212674, + "grad_norm": 1.9047601135978252, + "learning_rate": 4.6722738628877816e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1801 + }, + { + "epoch": 0.17329422512862433, + "grad_norm": 2.4644640666078628, + "learning_rate": 4.671896188690344e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1802 + }, + { + "epoch": 0.1733903928451219, + "grad_norm": 2.221221208841029, + "learning_rate": 4.671518312283321e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1803 + }, + { + "epoch": 0.17348656056161946, + "grad_norm": 3.0371482124807545, + "learning_rate": 4.6711402337018945e-06, + "loss": 0.2079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1804 + }, + { + "epoch": 0.17358272827811705, + "grad_norm": 1.8693951583061135, + "learning_rate": 4.670761952981264e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1805 + }, + { + "epoch": 0.1736788959946146, + "grad_norm": 3.192577680389678, + "learning_rate": 4.67038347015665e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1806 + }, + { + "epoch": 0.17377506371111218, + "grad_norm": 1.7059453533333209, + "learning_rate": 4.670004785263289e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1807 + }, + { + "epoch": 0.17387123142760974, + "grad_norm": 2.63221872470425, + "learning_rate": 4.669625898336439e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1808 + }, + { + "epoch": 0.17396739914410733, + "grad_norm": 2.1357097777525444, + "learning_rate": 4.669246809411373e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1809 + }, + { + "epoch": 0.1740635668606049, + "grad_norm": 3.1269909057059775, + "learning_rate": 4.668867518523388e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1810 + }, + { + "epoch": 0.17415973457710246, + "grad_norm": 2.2098416541452646, + "learning_rate": 4.6684880257077955e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1811 + }, + { + "epoch": 0.17425590229360005, + "grad_norm": 3.0583247449599056, + "learning_rate": 4.668108330999929e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1812 + }, + { + "epoch": 0.17435207001009762, + "grad_norm": 1.8383991989650552, + "learning_rate": 4.667728434435136e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1813 + }, + { + "epoch": 0.17444823772659518, + "grad_norm": 2.2007684623607093, + "learning_rate": 4.6673483360487884e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1814 + }, + { + "epoch": 0.17454440544309274, + "grad_norm": 1.6402547939951075, + "learning_rate": 4.666968035876274e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1815 + }, + { + "epoch": 0.17464057315959033, + "grad_norm": 1.915419291726314, + "learning_rate": 4.666587533952998e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1816 + }, + { + "epoch": 0.1747367408760879, + "grad_norm": 3.0251438528141303, + "learning_rate": 4.6662068303143906e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1817 + }, + { + "epoch": 0.17483290859258546, + "grad_norm": 2.5721256024478514, + "learning_rate": 4.665825924995891e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1818 + }, + { + "epoch": 0.17492907630908305, + "grad_norm": 2.602091726993969, + "learning_rate": 4.665444818032966e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1819 + }, + { + "epoch": 0.17502524402558062, + "grad_norm": 1.7602528844277727, + "learning_rate": 4.665063509461098e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1820 + }, + { + "epoch": 0.17512141174207818, + "grad_norm": 3.129079924449285, + "learning_rate": 4.664681999315784e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1821 + }, + { + "epoch": 0.17521757945857575, + "grad_norm": 1.5116970505897742, + "learning_rate": 4.664300287632548e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1822 + }, + { + "epoch": 0.17531374717507334, + "grad_norm": 4.521059679899405, + "learning_rate": 4.663918374446927e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1823 + }, + { + "epoch": 0.1754099148915709, + "grad_norm": 4.192274456012072, + "learning_rate": 4.663536259794477e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1824 + }, + { + "epoch": 0.17550608260806846, + "grad_norm": 1.9460819370467577, + "learning_rate": 4.663153943710775e-06, + "loss": 0.184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1825 + }, + { + "epoch": 0.17560225032456606, + "grad_norm": 3.7888881563127126, + "learning_rate": 4.662771426231417e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1826 + }, + { + "epoch": 0.17569841804106362, + "grad_norm": 1.829612181557914, + "learning_rate": 4.662388707392014e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1827 + }, + { + "epoch": 0.17579458575756118, + "grad_norm": 1.6551843174144052, + "learning_rate": 4.662005787228199e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1828 + }, + { + "epoch": 0.17589075347405875, + "grad_norm": 2.063873552699673, + "learning_rate": 4.661622665775623e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1829 + }, + { + "epoch": 0.17598692119055634, + "grad_norm": 1.929733516229497, + "learning_rate": 4.661239343069956e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1830 + }, + { + "epoch": 0.1760830889070539, + "grad_norm": 1.717069268371783, + "learning_rate": 4.660855819146887e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1831 + }, + { + "epoch": 0.17617925662355147, + "grad_norm": 4.092106825523075, + "learning_rate": 4.660472094042121e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1832 + }, + { + "epoch": 0.17627542434004906, + "grad_norm": 1.7602417837045496, + "learning_rate": 4.660088167791386e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1833 + }, + { + "epoch": 0.17637159205654662, + "grad_norm": 1.5708187569291419, + "learning_rate": 4.659704040430426e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1834 + }, + { + "epoch": 0.17646775977304419, + "grad_norm": 1.8130848943569835, + "learning_rate": 4.659319711995003e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1835 + }, + { + "epoch": 0.17656392748954175, + "grad_norm": 1.934132774272246, + "learning_rate": 4.658935182520902e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1836 + }, + { + "epoch": 0.17666009520603934, + "grad_norm": 2.1557988547592473, + "learning_rate": 4.65855045204392e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1837 + }, + { + "epoch": 0.1767562629225369, + "grad_norm": 2.275692654539129, + "learning_rate": 4.65816552059988e-06, + "loss": 0.1876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1838 + }, + { + "epoch": 0.17685243063903447, + "grad_norm": 1.8019821189810825, + "learning_rate": 4.657780388224619e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1839 + }, + { + "epoch": 0.17694859835553206, + "grad_norm": 5.026206793126472, + "learning_rate": 4.657395054953992e-06, + "loss": 0.1745, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1840 + }, + { + "epoch": 0.17704476607202962, + "grad_norm": 3.4025226681556076, + "learning_rate": 4.657009520823877e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1841 + }, + { + "epoch": 0.1771409337885272, + "grad_norm": 3.017333692051989, + "learning_rate": 4.656623785870168e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1842 + }, + { + "epoch": 0.17723710150502475, + "grad_norm": 3.1992148423415316, + "learning_rate": 4.656237850128776e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1843 + }, + { + "epoch": 0.17733326922152234, + "grad_norm": 1.5171562684373352, + "learning_rate": 4.655851713635635e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1844 + }, + { + "epoch": 0.1774294369380199, + "grad_norm": 1.9672312058996408, + "learning_rate": 4.655465376426694e-06, + "loss": 0.1775, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1845 + }, + { + "epoch": 0.17752560465451747, + "grad_norm": 2.1989582635242964, + "learning_rate": 4.655078838537924e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1846 + }, + { + "epoch": 0.17762177237101506, + "grad_norm": 2.3332668483925914, + "learning_rate": 4.65469210000531e-06, + "loss": 0.1864, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1847 + }, + { + "epoch": 0.17771794008751263, + "grad_norm": 3.3422662448364338, + "learning_rate": 4.65430516086486e-06, + "loss": 0.1695, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1848 + }, + { + "epoch": 0.1778141078040102, + "grad_norm": 1.5695860676544442, + "learning_rate": 4.653918021152599e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1849 + }, + { + "epoch": 0.17791027552050775, + "grad_norm": 2.0907910507606116, + "learning_rate": 4.653530680904571e-06, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1850 + }, + { + "epoch": 0.17800644323700535, + "grad_norm": 1.8330736107796408, + "learning_rate": 4.653143140156838e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1851 + }, + { + "epoch": 0.1781026109535029, + "grad_norm": 1.7066826102248533, + "learning_rate": 4.652755398945481e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1852 + }, + { + "epoch": 0.17819877867000047, + "grad_norm": 1.5385144378313977, + "learning_rate": 4.6523674573066e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1853 + }, + { + "epoch": 0.17829494638649807, + "grad_norm": 4.6490602414989715, + "learning_rate": 4.651979315276314e-06, + "loss": 0.1813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1854 + }, + { + "epoch": 0.17839111410299563, + "grad_norm": 2.6974769279780695, + "learning_rate": 4.65159097289076e-06, + "loss": 0.1721, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1855 + }, + { + "epoch": 0.1784872818194932, + "grad_norm": 1.603903171179299, + "learning_rate": 4.6512024301860925e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1856 + }, + { + "epoch": 0.17858344953599076, + "grad_norm": 1.9517245841971522, + "learning_rate": 4.650813687198487e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1857 + }, + { + "epoch": 0.17867961725248835, + "grad_norm": 1.5938626366503894, + "learning_rate": 4.650424743964136e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1858 + }, + { + "epoch": 0.1787757849689859, + "grad_norm": 1.6955621325710868, + "learning_rate": 4.6500356005192514e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1859 + }, + { + "epoch": 0.17887195268548348, + "grad_norm": 1.8356989457234523, + "learning_rate": 4.649646256900064e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1860 + }, + { + "epoch": 0.17896812040198107, + "grad_norm": 3.6311983635393528, + "learning_rate": 4.649256713142823e-06, + "loss": 0.1921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1861 + }, + { + "epoch": 0.17906428811847863, + "grad_norm": 1.4084806134041972, + "learning_rate": 4.6488669692837954e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1862 + }, + { + "epoch": 0.1791604558349762, + "grad_norm": 2.0478829403922494, + "learning_rate": 4.648477025359266e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1863 + }, + { + "epoch": 0.17925662355147376, + "grad_norm": 1.687709843262149, + "learning_rate": 4.648086881405542e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1864 + }, + { + "epoch": 0.17935279126797135, + "grad_norm": 1.6152707417399188, + "learning_rate": 4.647696537458947e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1865 + }, + { + "epoch": 0.17944895898446891, + "grad_norm": 2.0634198997496416, + "learning_rate": 4.647305993555821e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1866 + }, + { + "epoch": 0.17954512670096648, + "grad_norm": 2.5290050777207886, + "learning_rate": 4.646915249732526e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1867 + }, + { + "epoch": 0.17964129441746407, + "grad_norm": 3.456643647723134, + "learning_rate": 4.646524306025442e-06, + "loss": 0.1737, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1868 + }, + { + "epoch": 0.17973746213396163, + "grad_norm": 2.1265479509210783, + "learning_rate": 4.646133162470965e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1869 + }, + { + "epoch": 0.1798336298504592, + "grad_norm": 2.3686440742345765, + "learning_rate": 4.645741819105513e-06, + "loss": 0.1726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1870 + }, + { + "epoch": 0.17992979756695676, + "grad_norm": 2.338807606924923, + "learning_rate": 4.645350275965521e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1871 + }, + { + "epoch": 0.18002596528345435, + "grad_norm": 1.5286869971147872, + "learning_rate": 4.644958533087443e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1872 + }, + { + "epoch": 0.18012213299995192, + "grad_norm": 2.1362466991316773, + "learning_rate": 4.644566590507751e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1873 + }, + { + "epoch": 0.18021830071644948, + "grad_norm": 3.9383963510782873, + "learning_rate": 4.644174448262935e-06, + "loss": 0.1633, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1874 + }, + { + "epoch": 0.18031446843294707, + "grad_norm": 1.6873791330898378, + "learning_rate": 4.643782106389506e-06, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1875 + }, + { + "epoch": 0.18041063614944464, + "grad_norm": 2.1025201935801117, + "learning_rate": 4.643389564923993e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1876 + }, + { + "epoch": 0.1805068038659422, + "grad_norm": 1.5914030641980987, + "learning_rate": 4.642996823902939e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1877 + }, + { + "epoch": 0.18060297158243976, + "grad_norm": 1.7047459161782168, + "learning_rate": 4.642603883362913e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1878 + }, + { + "epoch": 0.18069913929893736, + "grad_norm": 2.5054281508214977, + "learning_rate": 4.642210743340496e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1879 + }, + { + "epoch": 0.18079530701543492, + "grad_norm": 1.8293341329566808, + "learning_rate": 4.641817403872293e-06, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1880 + }, + { + "epoch": 0.18089147473193248, + "grad_norm": 1.9485920174958902, + "learning_rate": 4.6414238649949235e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1881 + }, + { + "epoch": 0.18098764244843007, + "grad_norm": 3.623162588441379, + "learning_rate": 4.641030126745028e-06, + "loss": 0.1814, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1882 + }, + { + "epoch": 0.18108381016492764, + "grad_norm": 3.042948910545865, + "learning_rate": 4.640636189159263e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1883 + }, + { + "epoch": 0.1811799778814252, + "grad_norm": 3.0132728191005853, + "learning_rate": 4.640242052274307e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1884 + }, + { + "epoch": 0.18127614559792277, + "grad_norm": 1.8517997345988777, + "learning_rate": 4.639847716126855e-06, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1885 + }, + { + "epoch": 0.18137231331442036, + "grad_norm": 3.904058010138637, + "learning_rate": 4.639453180753619e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1886 + }, + { + "epoch": 0.18146848103091792, + "grad_norm": 4.666656340152816, + "learning_rate": 4.639058446191333e-06, + "loss": 0.1726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1887 + }, + { + "epoch": 0.18156464874741549, + "grad_norm": 2.54971249256196, + "learning_rate": 4.638663512476748e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1888 + }, + { + "epoch": 0.18166081646391308, + "grad_norm": 2.6414587143674564, + "learning_rate": 4.6382683796466325e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1889 + }, + { + "epoch": 0.18175698418041064, + "grad_norm": 3.3086750241633665, + "learning_rate": 4.637873047737775e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1890 + }, + { + "epoch": 0.1818531518969082, + "grad_norm": 2.050217937868591, + "learning_rate": 4.637477516786982e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1891 + }, + { + "epoch": 0.18194931961340577, + "grad_norm": 2.0495481287963164, + "learning_rate": 4.637081786831079e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1892 + }, + { + "epoch": 0.18204548732990336, + "grad_norm": 1.9809881125920377, + "learning_rate": 4.636685857906908e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1893 + }, + { + "epoch": 0.18214165504640092, + "grad_norm": 4.177785275105216, + "learning_rate": 4.636289730051332e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1894 + }, + { + "epoch": 0.1822378227628985, + "grad_norm": 2.3295742025882404, + "learning_rate": 4.635893403301233e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1895 + }, + { + "epoch": 0.18233399047939608, + "grad_norm": 2.5495589169873623, + "learning_rate": 4.635496877693507e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1896 + }, + { + "epoch": 0.18243015819589364, + "grad_norm": 1.7212622308132517, + "learning_rate": 4.635100153265075e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1897 + }, + { + "epoch": 0.1825263259123912, + "grad_norm": 2.87405500950079, + "learning_rate": 4.634703230052871e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1898 + }, + { + "epoch": 0.18262249362888877, + "grad_norm": 2.6885099241727746, + "learning_rate": 4.63430610809385e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1899 + }, + { + "epoch": 0.18271866134538636, + "grad_norm": 2.727097339131977, + "learning_rate": 4.633908787424986e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1900 + }, + { + "epoch": 0.18281482906188393, + "grad_norm": 1.5206718952258995, + "learning_rate": 4.63351126808327e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1901 + }, + { + "epoch": 0.1829109967783815, + "grad_norm": 3.56296953230852, + "learning_rate": 4.633113550105711e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1902 + }, + { + "epoch": 0.18300716449487908, + "grad_norm": 4.180181768631916, + "learning_rate": 4.63271563352934e-06, + "loss": 0.1841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1903 + }, + { + "epoch": 0.18310333221137665, + "grad_norm": 2.0464161756796457, + "learning_rate": 4.632317518391203e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1904 + }, + { + "epoch": 0.1831994999278742, + "grad_norm": 3.2082145516721483, + "learning_rate": 4.631919204728365e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1905 + }, + { + "epoch": 0.18329566764437177, + "grad_norm": 3.193787142930617, + "learning_rate": 4.631520692577912e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1906 + }, + { + "epoch": 0.18339183536086937, + "grad_norm": 2.1051383422153522, + "learning_rate": 4.6311219819769445e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1907 + }, + { + "epoch": 0.18348800307736693, + "grad_norm": 1.6768972163142075, + "learning_rate": 4.630723072962584e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1908 + }, + { + "epoch": 0.1835841707938645, + "grad_norm": 1.7417780814957162, + "learning_rate": 4.630323965571971e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1909 + }, + { + "epoch": 0.18368033851036208, + "grad_norm": 1.5271563865752709, + "learning_rate": 4.629924659842263e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1910 + }, + { + "epoch": 0.18377650622685965, + "grad_norm": 1.9798481001753698, + "learning_rate": 4.629525155810637e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1911 + }, + { + "epoch": 0.1838726739433572, + "grad_norm": 1.746330626035113, + "learning_rate": 4.629125453514286e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1912 + }, + { + "epoch": 0.18396884165985478, + "grad_norm": 1.6678757938527542, + "learning_rate": 4.628725552990426e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1913 + }, + { + "epoch": 0.18406500937635237, + "grad_norm": 1.6520429092552003, + "learning_rate": 4.628325454276287e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1914 + }, + { + "epoch": 0.18416117709284993, + "grad_norm": 2.354242788055145, + "learning_rate": 4.62792515740912e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1915 + }, + { + "epoch": 0.1842573448093475, + "grad_norm": 2.552885538999381, + "learning_rate": 4.627524662426194e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1916 + }, + { + "epoch": 0.1843535125258451, + "grad_norm": 1.6150433798890107, + "learning_rate": 4.627123969364796e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1917 + }, + { + "epoch": 0.18444968024234265, + "grad_norm": 2.556318156144233, + "learning_rate": 4.626723078262232e-06, + "loss": 0.2013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1918 + }, + { + "epoch": 0.18454584795884021, + "grad_norm": 2.427712205687643, + "learning_rate": 4.6263219891558245e-06, + "loss": 0.1838, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1919 + }, + { + "epoch": 0.18464201567533778, + "grad_norm": 2.85951984616984, + "learning_rate": 4.625920702082918e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1920 + }, + { + "epoch": 0.18473818339183537, + "grad_norm": 4.456773108312897, + "learning_rate": 4.625519217080872e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1921 + }, + { + "epoch": 0.18483435110833293, + "grad_norm": 2.6060267095099428, + "learning_rate": 4.625117534187067e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1922 + }, + { + "epoch": 0.1849305188248305, + "grad_norm": 2.499868838814011, + "learning_rate": 4.624715653438899e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1923 + }, + { + "epoch": 0.1850266865413281, + "grad_norm": 4.104396310356637, + "learning_rate": 4.624313574873787e-06, + "loss": 0.1839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1924 + }, + { + "epoch": 0.18512285425782565, + "grad_norm": 1.7975185496716966, + "learning_rate": 4.623911298529163e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1925 + }, + { + "epoch": 0.18521902197432322, + "grad_norm": 4.226265641684893, + "learning_rate": 4.623508824442481e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1926 + }, + { + "epoch": 0.18531518969082078, + "grad_norm": 5.3159479418853115, + "learning_rate": 4.6231061526512135e-06, + "loss": 0.1895, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1927 + }, + { + "epoch": 0.18541135740731837, + "grad_norm": 1.4175512455325276, + "learning_rate": 4.622703283192849e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1928 + }, + { + "epoch": 0.18550752512381594, + "grad_norm": 2.308766490246478, + "learning_rate": 4.622300216104895e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1929 + }, + { + "epoch": 0.1856036928403135, + "grad_norm": 2.7527999725610712, + "learning_rate": 4.621896951424882e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1930 + }, + { + "epoch": 0.1856998605568111, + "grad_norm": 2.9716047777996972, + "learning_rate": 4.6214934891903505e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1931 + }, + { + "epoch": 0.18579602827330866, + "grad_norm": 2.081826881415647, + "learning_rate": 4.621089829438867e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1932 + }, + { + "epoch": 0.18589219598980622, + "grad_norm": 2.9275927890840174, + "learning_rate": 4.620685972208012e-06, + "loss": 0.2002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1933 + }, + { + "epoch": 0.18598836370630378, + "grad_norm": 1.6092066288587341, + "learning_rate": 4.620281917535386e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1934 + }, + { + "epoch": 0.18608453142280137, + "grad_norm": 4.390326837557633, + "learning_rate": 4.6198776654586076e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1935 + }, + { + "epoch": 0.18618069913929894, + "grad_norm": 4.16849563493153, + "learning_rate": 4.619473216015313e-06, + "loss": 0.1918, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1936 + }, + { + "epoch": 0.1862768668557965, + "grad_norm": 1.9557033597775642, + "learning_rate": 4.619068569243159e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1937 + }, + { + "epoch": 0.1863730345722941, + "grad_norm": 3.0296974264760435, + "learning_rate": 4.618663725179819e-06, + "loss": 0.1737, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1938 + }, + { + "epoch": 0.18646920228879166, + "grad_norm": 1.7273447831852116, + "learning_rate": 4.618258683862985e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1939 + }, + { + "epoch": 0.18656537000528922, + "grad_norm": 3.183666807709935, + "learning_rate": 4.617853445330367e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1940 + }, + { + "epoch": 0.18666153772178679, + "grad_norm": 1.6387590548461997, + "learning_rate": 4.6174480096196936e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1941 + }, + { + "epoch": 0.18675770543828438, + "grad_norm": 3.1737820005930666, + "learning_rate": 4.617042376768713e-06, + "loss": 0.1709, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1942 + }, + { + "epoch": 0.18685387315478194, + "grad_norm": 4.610067605487461, + "learning_rate": 4.616636546815191e-06, + "loss": 0.1726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1943 + }, + { + "epoch": 0.1869500408712795, + "grad_norm": 2.704465876112703, + "learning_rate": 4.616230519796909e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1944 + }, + { + "epoch": 0.1870462085877771, + "grad_norm": 1.516711484830348, + "learning_rate": 4.615824295751673e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1945 + }, + { + "epoch": 0.18714237630427466, + "grad_norm": 3.0866588542594924, + "learning_rate": 4.615417874717301e-06, + "loss": 0.1603, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1946 + }, + { + "epoch": 0.18723854402077222, + "grad_norm": 4.620926968300338, + "learning_rate": 4.6150112567316315e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1947 + }, + { + "epoch": 0.1873347117372698, + "grad_norm": 1.6401568622976002, + "learning_rate": 4.6146044418325234e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1948 + }, + { + "epoch": 0.18743087945376738, + "grad_norm": 1.8112342989983665, + "learning_rate": 4.614197430057852e-06, + "loss": 0.1814, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1949 + }, + { + "epoch": 0.18752704717026494, + "grad_norm": 3.3708755670221127, + "learning_rate": 4.613790221445511e-06, + "loss": 0.1898, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1950 + }, + { + "epoch": 0.1876232148867625, + "grad_norm": 2.577500663368199, + "learning_rate": 4.613382816033413e-06, + "loss": 0.182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1951 + }, + { + "epoch": 0.1877193826032601, + "grad_norm": 1.7836697327435567, + "learning_rate": 4.612975213859487e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1952 + }, + { + "epoch": 0.18781555031975766, + "grad_norm": 1.563234489079954, + "learning_rate": 4.612567414961684e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1953 + }, + { + "epoch": 0.18791171803625523, + "grad_norm": 2.3145868156733744, + "learning_rate": 4.61215941937797e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1954 + }, + { + "epoch": 0.1880078857527528, + "grad_norm": 3.710935190615885, + "learning_rate": 4.611751227146331e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1955 + }, + { + "epoch": 0.18810405346925038, + "grad_norm": 2.223195553782686, + "learning_rate": 4.61134283830477e-06, + "loss": 0.1762, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1956 + }, + { + "epoch": 0.18820022118574795, + "grad_norm": 3.0855570120528206, + "learning_rate": 4.610934252891311e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1957 + }, + { + "epoch": 0.1882963889022455, + "grad_norm": 2.22449657342721, + "learning_rate": 4.610525470943992e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1958 + }, + { + "epoch": 0.1883925566187431, + "grad_norm": 2.899272925130672, + "learning_rate": 4.610116492500874e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1959 + }, + { + "epoch": 0.18848872433524066, + "grad_norm": 14.980890607151256, + "learning_rate": 4.6097073176000325e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1960 + }, + { + "epoch": 0.18858489205173823, + "grad_norm": 4.27491213741973, + "learning_rate": 4.609297946279563e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1961 + }, + { + "epoch": 0.1886810597682358, + "grad_norm": 1.8209168842184273, + "learning_rate": 4.60888837857758e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1962 + }, + { + "epoch": 0.18877722748473338, + "grad_norm": 11.311504593553268, + "learning_rate": 4.608478614532215e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1963 + }, + { + "epoch": 0.18887339520123095, + "grad_norm": 2.0778030226433875, + "learning_rate": 4.608068654181617e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1964 + }, + { + "epoch": 0.1889695629177285, + "grad_norm": 4.225120434921866, + "learning_rate": 4.607658497563956e-06, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1965 + }, + { + "epoch": 0.1890657306342261, + "grad_norm": 3.9196940201126345, + "learning_rate": 4.607248144717419e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1966 + }, + { + "epoch": 0.18916189835072367, + "grad_norm": 2.3634406776030032, + "learning_rate": 4.60683759568021e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1967 + }, + { + "epoch": 0.18925806606722123, + "grad_norm": 2.0806749631690904, + "learning_rate": 4.606426850490551e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1968 + }, + { + "epoch": 0.1893542337837188, + "grad_norm": 5.1082769328283515, + "learning_rate": 4.606015909186686e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1969 + }, + { + "epoch": 0.1894504015002164, + "grad_norm": 4.623676988821101, + "learning_rate": 4.605604771806873e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1970 + }, + { + "epoch": 0.18954656921671395, + "grad_norm": 12.973517454726421, + "learning_rate": 4.605193438389392e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1971 + }, + { + "epoch": 0.18964273693321151, + "grad_norm": 3.5273761376728414, + "learning_rate": 4.6047819089725365e-06, + "loss": 0.1696, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1972 + }, + { + "epoch": 0.1897389046497091, + "grad_norm": 2.256050128470194, + "learning_rate": 4.604370183594624e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1973 + }, + { + "epoch": 0.18983507236620667, + "grad_norm": 5.874365325990946, + "learning_rate": 4.6039582622939855e-06, + "loss": 0.1911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1974 + }, + { + "epoch": 0.18993124008270423, + "grad_norm": 7.532347559319205, + "learning_rate": 4.603546145108972e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1975 + }, + { + "epoch": 0.1900274077992018, + "grad_norm": 9.01673168485166, + "learning_rate": 4.603133832077953e-06, + "loss": 0.1756, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1976 + }, + { + "epoch": 0.1901235755156994, + "grad_norm": 2.2467189398744827, + "learning_rate": 4.602721323239317e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1977 + }, + { + "epoch": 0.19021974323219695, + "grad_norm": 4.742036350737353, + "learning_rate": 4.602308618631468e-06, + "loss": 0.1612, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1978 + }, + { + "epoch": 0.19031591094869452, + "grad_norm": 1.911869113929992, + "learning_rate": 4.601895718292832e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1979 + }, + { + "epoch": 0.1904120786651921, + "grad_norm": 1.968663497905805, + "learning_rate": 4.601482622261848e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1980 + }, + { + "epoch": 0.19050824638168967, + "grad_norm": 14.415214171553298, + "learning_rate": 4.60106933057698e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1981 + }, + { + "epoch": 0.19060441409818724, + "grad_norm": 2.412741123374002, + "learning_rate": 4.600655843276704e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1982 + }, + { + "epoch": 0.1907005818146848, + "grad_norm": 1.8818492830156617, + "learning_rate": 4.600242160399519e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1983 + }, + { + "epoch": 0.1907967495311824, + "grad_norm": 2.4161516835321377, + "learning_rate": 4.599828281983938e-06, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1984 + }, + { + "epoch": 0.19089291724767996, + "grad_norm": 2.8251444191943813, + "learning_rate": 4.5994142080684956e-06, + "loss": 0.1864, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1985 + }, + { + "epoch": 0.19098908496417752, + "grad_norm": 23.608274973017757, + "learning_rate": 4.598999938691742e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1986 + }, + { + "epoch": 0.1910852526806751, + "grad_norm": 2.3871278560764675, + "learning_rate": 4.598585473892247e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1987 + }, + { + "epoch": 0.19118142039717267, + "grad_norm": 3.6150388587506472, + "learning_rate": 4.5981708137086e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1988 + }, + { + "epoch": 0.19127758811367024, + "grad_norm": 2.6182562061071777, + "learning_rate": 4.597755958179407e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1989 + }, + { + "epoch": 0.1913737558301678, + "grad_norm": 1.775604854874827, + "learning_rate": 4.597340907343289e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1990 + }, + { + "epoch": 0.1914699235466654, + "grad_norm": 7.989853785492756, + "learning_rate": 4.596925661238891e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1991 + }, + { + "epoch": 0.19156609126316296, + "grad_norm": 1.8694549991543001, + "learning_rate": 4.596510219904874e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1992 + }, + { + "epoch": 0.19166225897966052, + "grad_norm": 2.0893695367791127, + "learning_rate": 4.596094583379916e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1993 + }, + { + "epoch": 0.1917584266961581, + "grad_norm": 2.3356031056638633, + "learning_rate": 4.595678751702713e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1994 + }, + { + "epoch": 0.19185459441265568, + "grad_norm": 2.2101304331214613, + "learning_rate": 4.595262724911981e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1995 + }, + { + "epoch": 0.19195076212915324, + "grad_norm": 1.7447394662156794, + "learning_rate": 4.594846503046453e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1996 + }, + { + "epoch": 0.1920469298456508, + "grad_norm": 1.6024825483514342, + "learning_rate": 4.594430086144881e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1997 + }, + { + "epoch": 0.1921430975621484, + "grad_norm": 2.13287598566656, + "learning_rate": 4.594013474246034e-06, + "loss": 0.186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1998 + }, + { + "epoch": 0.19223926527864596, + "grad_norm": 3.359730582185539, + "learning_rate": 4.5935966673887e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 1999 + }, + { + "epoch": 0.19233543299514352, + "grad_norm": 2.508389313701477, + "learning_rate": 4.593179665611685e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2000 + }, + { + "epoch": 0.19243160071164112, + "grad_norm": 3.90638853702495, + "learning_rate": 4.592762468953812e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2001 + }, + { + "epoch": 0.19252776842813868, + "grad_norm": 3.0095508426775073, + "learning_rate": 4.592345077453925e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2002 + }, + { + "epoch": 0.19262393614463624, + "grad_norm": 2.2003112177737703, + "learning_rate": 4.591927491150882e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2003 + }, + { + "epoch": 0.1927201038611338, + "grad_norm": 3.3057727310026643, + "learning_rate": 4.591509710083563e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2004 + }, + { + "epoch": 0.1928162715776314, + "grad_norm": 4.248783900009912, + "learning_rate": 4.591091734290864e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2005 + }, + { + "epoch": 0.19291243929412896, + "grad_norm": 2.6906446812224827, + "learning_rate": 4.590673563811701e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2006 + }, + { + "epoch": 0.19300860701062653, + "grad_norm": 1.6672914441104263, + "learning_rate": 4.590255198685006e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2007 + }, + { + "epoch": 0.19310477472712412, + "grad_norm": 1.7959047882325043, + "learning_rate": 4.589836638949729e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2008 + }, + { + "epoch": 0.19320094244362168, + "grad_norm": 1.6224872695249377, + "learning_rate": 4.58941788464484e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2009 + }, + { + "epoch": 0.19329711016011925, + "grad_norm": 5.335158359448938, + "learning_rate": 4.588998935809325e-06, + "loss": 0.1926, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2010 + }, + { + "epoch": 0.1933932778766168, + "grad_norm": 2.945365792358804, + "learning_rate": 4.588579792482192e-06, + "loss": 0.1763, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2011 + }, + { + "epoch": 0.1934894455931144, + "grad_norm": 1.925591575954031, + "learning_rate": 4.5881604547024625e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2012 + }, + { + "epoch": 0.19358561330961196, + "grad_norm": 6.57595261721495, + "learning_rate": 4.587740922509177e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2013 + }, + { + "epoch": 0.19368178102610953, + "grad_norm": 5.0335748166075, + "learning_rate": 4.5873211959413975e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2014 + }, + { + "epoch": 0.19377794874260712, + "grad_norm": 3.707030353014794, + "learning_rate": 4.586901275038201e-06, + "loss": 0.1666, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2015 + }, + { + "epoch": 0.19387411645910468, + "grad_norm": 2.1792216832905313, + "learning_rate": 4.586481159838682e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2016 + }, + { + "epoch": 0.19397028417560225, + "grad_norm": 2.066240412748503, + "learning_rate": 4.586060850381956e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2017 + }, + { + "epoch": 0.1940664518920998, + "grad_norm": 3.2685359474925044, + "learning_rate": 4.585640346707153e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2018 + }, + { + "epoch": 0.1941626196085974, + "grad_norm": 2.56703456712552, + "learning_rate": 4.585219648853426e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2019 + }, + { + "epoch": 0.19425878732509497, + "grad_norm": 2.062845058076797, + "learning_rate": 4.584798756859941e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2020 + }, + { + "epoch": 0.19435495504159253, + "grad_norm": 2.490575825314293, + "learning_rate": 4.584377670765884e-06, + "loss": 0.1787, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2021 + }, + { + "epoch": 0.19445112275809012, + "grad_norm": 4.645256043644444, + "learning_rate": 4.583956390610461e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2022 + }, + { + "epoch": 0.1945472904745877, + "grad_norm": 4.8854314326110355, + "learning_rate": 4.5835349164328926e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2023 + }, + { + "epoch": 0.19464345819108525, + "grad_norm": 4.174702224356266, + "learning_rate": 4.58311324827242e-06, + "loss": 0.1729, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2024 + }, + { + "epoch": 0.19473962590758281, + "grad_norm": 2.3894799435598633, + "learning_rate": 4.582691386168302e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2025 + }, + { + "epoch": 0.1948357936240804, + "grad_norm": 3.1138594264639363, + "learning_rate": 4.5822693301598144e-06, + "loss": 0.1834, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2026 + }, + { + "epoch": 0.19493196134057797, + "grad_norm": 2.803207568035933, + "learning_rate": 4.581847080286253e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2027 + }, + { + "epoch": 0.19502812905707553, + "grad_norm": 3.7549031798306163, + "learning_rate": 4.5814246365869285e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2028 + }, + { + "epoch": 0.19512429677357312, + "grad_norm": 2.0237184785298608, + "learning_rate": 4.581001999101173e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2029 + }, + { + "epoch": 0.1952204644900707, + "grad_norm": 4.595981836004327, + "learning_rate": 4.580579167868336e-06, + "loss": 0.1661, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2030 + }, + { + "epoch": 0.19531663220656825, + "grad_norm": 5.890565559244078, + "learning_rate": 4.580156142927782e-06, + "loss": 0.1839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2031 + }, + { + "epoch": 0.19541279992306582, + "grad_norm": 2.9923904866980053, + "learning_rate": 4.579732924318898e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2032 + }, + { + "epoch": 0.1955089676395634, + "grad_norm": 1.8892714258174481, + "learning_rate": 4.579309512081085e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2033 + }, + { + "epoch": 0.19560513535606097, + "grad_norm": 3.6633719117467236, + "learning_rate": 4.578885906253765e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2034 + }, + { + "epoch": 0.19570130307255854, + "grad_norm": 3.271351290449148, + "learning_rate": 4.5784621068763766e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2035 + }, + { + "epoch": 0.19579747078905613, + "grad_norm": 4.00070899982933, + "learning_rate": 4.578038113988376e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2036 + }, + { + "epoch": 0.1958936385055537, + "grad_norm": 2.8994045539970608, + "learning_rate": 4.57761392762924e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2037 + }, + { + "epoch": 0.19598980622205125, + "grad_norm": 3.3226565670765935, + "learning_rate": 4.5771895478384596e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2038 + }, + { + "epoch": 0.19608597393854882, + "grad_norm": 4.583593886061687, + "learning_rate": 4.576764974655546e-06, + "loss": 0.1812, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2039 + }, + { + "epoch": 0.1961821416550464, + "grad_norm": 2.6249364758969636, + "learning_rate": 4.5763402081200295e-06, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2040 + }, + { + "epoch": 0.19627830937154397, + "grad_norm": 3.0141364425039523, + "learning_rate": 4.575915248271456e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2041 + }, + { + "epoch": 0.19637447708804154, + "grad_norm": 2.211681695006481, + "learning_rate": 4.57549009514939e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2042 + }, + { + "epoch": 0.19647064480453913, + "grad_norm": 4.360340645738712, + "learning_rate": 4.575064748793416e-06, + "loss": 0.1867, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2043 + }, + { + "epoch": 0.1965668125210367, + "grad_norm": 6.023373715943655, + "learning_rate": 4.574639209243134e-06, + "loss": 0.1661, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2044 + }, + { + "epoch": 0.19666298023753426, + "grad_norm": 3.518386865489176, + "learning_rate": 4.574213476538162e-06, + "loss": 0.1765, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2045 + }, + { + "epoch": 0.19675914795403182, + "grad_norm": 2.4525210538311546, + "learning_rate": 4.5737875507181375e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2046 + }, + { + "epoch": 0.1968553156705294, + "grad_norm": 2.0610165226190786, + "learning_rate": 4.573361431822717e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2047 + }, + { + "epoch": 0.19695148338702698, + "grad_norm": 2.591456128318878, + "learning_rate": 4.5729351198915715e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2048 + }, + { + "epoch": 0.19704765110352454, + "grad_norm": 2.8514374167020535, + "learning_rate": 4.572508614964392e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2049 + }, + { + "epoch": 0.19714381882002213, + "grad_norm": 1.7786904016141722, + "learning_rate": 4.572081917080888e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2050 + }, + { + "epoch": 0.1972399865365197, + "grad_norm": 3.4172146178503864, + "learning_rate": 4.571655026280785e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2051 + }, + { + "epoch": 0.19733615425301726, + "grad_norm": 3.9801089245223347, + "learning_rate": 4.57122794260383e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2052 + }, + { + "epoch": 0.19743232196951482, + "grad_norm": 2.0645315675215703, + "learning_rate": 4.570800666089784e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2053 + }, + { + "epoch": 0.19752848968601242, + "grad_norm": 1.7858202067542976, + "learning_rate": 4.570373196778427e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2054 + }, + { + "epoch": 0.19762465740250998, + "grad_norm": 2.474784808812143, + "learning_rate": 4.56994553470956e-06, + "loss": 0.1738, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2055 + }, + { + "epoch": 0.19772082511900754, + "grad_norm": 1.7257687078374995, + "learning_rate": 4.569517679922997e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2056 + }, + { + "epoch": 0.19781699283550513, + "grad_norm": 3.2585704734625938, + "learning_rate": 4.569089632458574e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2057 + }, + { + "epoch": 0.1979131605520027, + "grad_norm": 2.676944300717021, + "learning_rate": 4.568661392356143e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2058 + }, + { + "epoch": 0.19800932826850026, + "grad_norm": 7.940382270285948, + "learning_rate": 4.568232959655574e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2059 + }, + { + "epoch": 0.19810549598499783, + "grad_norm": 1.6483785854550974, + "learning_rate": 4.567804334396756e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2060 + }, + { + "epoch": 0.19820166370149542, + "grad_norm": 2.068659082024925, + "learning_rate": 4.567375516619595e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2061 + }, + { + "epoch": 0.19829783141799298, + "grad_norm": 2.2682667373803853, + "learning_rate": 4.5669465063640135e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2062 + }, + { + "epoch": 0.19839399913449055, + "grad_norm": 2.78298808290213, + "learning_rate": 4.566517303669956e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2063 + }, + { + "epoch": 0.19849016685098814, + "grad_norm": 2.5117008444242197, + "learning_rate": 4.566087908577382e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2064 + }, + { + "epoch": 0.1985863345674857, + "grad_norm": 5.250587499539838, + "learning_rate": 4.565658321126267e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2065 + }, + { + "epoch": 0.19868250228398326, + "grad_norm": 2.2327519968868543, + "learning_rate": 4.56522854135661e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2066 + }, + { + "epoch": 0.19877867000048083, + "grad_norm": 2.919407018619772, + "learning_rate": 4.564798569308423e-06, + "loss": 0.1744, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2067 + }, + { + "epoch": 0.19887483771697842, + "grad_norm": 2.002501291976568, + "learning_rate": 4.564368405021738e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2068 + }, + { + "epoch": 0.19897100543347598, + "grad_norm": 2.9422459474988494, + "learning_rate": 4.563938048536603e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2069 + }, + { + "epoch": 0.19906717314997355, + "grad_norm": 1.737204294494121, + "learning_rate": 4.5635074998930885e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2070 + }, + { + "epoch": 0.19916334086647114, + "grad_norm": 1.8567003271085396, + "learning_rate": 4.563076759131277e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2071 + }, + { + "epoch": 0.1992595085829687, + "grad_norm": 2.1993489882570847, + "learning_rate": 4.5626458262912745e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2072 + }, + { + "epoch": 0.19935567629946627, + "grad_norm": 2.1574485967570647, + "learning_rate": 4.562214701413199e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2073 + }, + { + "epoch": 0.19945184401596383, + "grad_norm": 25.147995045917433, + "learning_rate": 4.561783384537189e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2074 + }, + { + "epoch": 0.19954801173246142, + "grad_norm": 2.8663340581678067, + "learning_rate": 4.561351875703406e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2075 + }, + { + "epoch": 0.19964417944895899, + "grad_norm": 3.2149864222458984, + "learning_rate": 4.56092017495202e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2076 + }, + { + "epoch": 0.19974034716545655, + "grad_norm": 2.241472826327328, + "learning_rate": 4.5604882823232255e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2077 + }, + { + "epoch": 0.19983651488195414, + "grad_norm": 1.7816523734640375, + "learning_rate": 4.560056197857232e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2078 + }, + { + "epoch": 0.1999326825984517, + "grad_norm": 5.255390764600174, + "learning_rate": 4.559623921594269e-06, + "loss": 0.1701, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2079 + }, + { + "epoch": 0.20002885031494927, + "grad_norm": 2.5116775354224643, + "learning_rate": 4.559191453574582e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2080 + }, + { + "epoch": 0.20012501803144683, + "grad_norm": 2.674142674686076, + "learning_rate": 4.558758793838435e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2081 + }, + { + "epoch": 0.20022118574794442, + "grad_norm": 1.5587346142877707, + "learning_rate": 4.5583259424261104e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2082 + }, + { + "epoch": 0.200317353464442, + "grad_norm": 2.7087408042994516, + "learning_rate": 4.557892899377906e-06, + "loss": 0.1806, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2083 + }, + { + "epoch": 0.20041352118093955, + "grad_norm": 1.7774070998085196, + "learning_rate": 4.5574596647341414e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2084 + }, + { + "epoch": 0.20050968889743714, + "grad_norm": 2.0742474591162785, + "learning_rate": 4.5570262385351514e-06, + "loss": 0.1726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2085 + }, + { + "epoch": 0.2006058566139347, + "grad_norm": 3.7348858019395577, + "learning_rate": 4.556592620821288e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2086 + }, + { + "epoch": 0.20070202433043227, + "grad_norm": 2.587785100882935, + "learning_rate": 4.556158811632924e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2087 + }, + { + "epoch": 0.20079819204692984, + "grad_norm": 1.878358134276031, + "learning_rate": 4.555724811010447e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2088 + }, + { + "epoch": 0.20089435976342743, + "grad_norm": 4.274500369377992, + "learning_rate": 4.555290618994263e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2089 + }, + { + "epoch": 0.200990527479925, + "grad_norm": 2.4703394082539267, + "learning_rate": 4.554856235624799e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2090 + }, + { + "epoch": 0.20108669519642255, + "grad_norm": 3.8858501731041497, + "learning_rate": 4.5544216609424954e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2091 + }, + { + "epoch": 0.20118286291292015, + "grad_norm": 2.396028274946494, + "learning_rate": 4.553986894987813e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2092 + }, + { + "epoch": 0.2012790306294177, + "grad_norm": 1.965746514377709, + "learning_rate": 4.553551937801229e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2093 + }, + { + "epoch": 0.20137519834591527, + "grad_norm": 2.907375068799356, + "learning_rate": 4.553116789423241e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2094 + }, + { + "epoch": 0.20147136606241284, + "grad_norm": 2.2158052508336707, + "learning_rate": 4.55268144989436e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2095 + }, + { + "epoch": 0.20156753377891043, + "grad_norm": 2.7742017692156793, + "learning_rate": 4.5522459192551175e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2096 + }, + { + "epoch": 0.201663701495408, + "grad_norm": 3.3168829837597484, + "learning_rate": 4.551810197546065e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2097 + }, + { + "epoch": 0.20175986921190556, + "grad_norm": 2.8235948574794842, + "learning_rate": 4.551374284807767e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2098 + }, + { + "epoch": 0.20185603692840315, + "grad_norm": 2.4021742202460548, + "learning_rate": 4.55093818108081e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2099 + }, + { + "epoch": 0.2019522046449007, + "grad_norm": 2.5347611916933417, + "learning_rate": 4.550501886405795e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2100 + }, + { + "epoch": 0.20204837236139828, + "grad_norm": 2.49451651379713, + "learning_rate": 4.550065400823344e-06, + "loss": 0.1603, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2101 + }, + { + "epoch": 0.20214454007789584, + "grad_norm": 2.4765053735944442, + "learning_rate": 4.549628724374092e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2102 + }, + { + "epoch": 0.20224070779439343, + "grad_norm": 4.495788870235578, + "learning_rate": 4.549191857098698e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2103 + }, + { + "epoch": 0.202336875510891, + "grad_norm": 1.8110750748627427, + "learning_rate": 4.548754799037834e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2104 + }, + { + "epoch": 0.20243304322738856, + "grad_norm": 2.9060806711533935, + "learning_rate": 4.548317550232192e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2105 + }, + { + "epoch": 0.20252921094388615, + "grad_norm": 4.24533164936001, + "learning_rate": 4.54788011072248e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2106 + }, + { + "epoch": 0.20262537866038371, + "grad_norm": 10.386679398924231, + "learning_rate": 4.547442480549425e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2107 + }, + { + "epoch": 0.20272154637688128, + "grad_norm": 2.028971782075699, + "learning_rate": 4.547004659753774e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2108 + }, + { + "epoch": 0.20281771409337884, + "grad_norm": 3.614462158930392, + "learning_rate": 4.546566648376286e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2109 + }, + { + "epoch": 0.20291388180987643, + "grad_norm": 2.391143876614221, + "learning_rate": 4.546128446457743e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2110 + }, + { + "epoch": 0.203010049526374, + "grad_norm": 1.954811693152315, + "learning_rate": 4.5456900540389416e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2111 + }, + { + "epoch": 0.20310621724287156, + "grad_norm": 7.301087942043066, + "learning_rate": 4.545251471160698e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2112 + }, + { + "epoch": 0.20320238495936915, + "grad_norm": 3.8662394172298, + "learning_rate": 4.5448126978638464e-06, + "loss": 0.1759, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2113 + }, + { + "epoch": 0.20329855267586672, + "grad_norm": 4.462251349856689, + "learning_rate": 4.544373734189237e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2114 + }, + { + "epoch": 0.20339472039236428, + "grad_norm": 2.622207658082742, + "learning_rate": 4.543934580177738e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2115 + }, + { + "epoch": 0.20349088810886184, + "grad_norm": 4.463866560173349, + "learning_rate": 4.543495235870237e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2116 + }, + { + "epoch": 0.20358705582535944, + "grad_norm": 7.922550341731097, + "learning_rate": 4.543055701307637e-06, + "loss": 0.1979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2117 + }, + { + "epoch": 0.203683223541857, + "grad_norm": 3.018729188013921, + "learning_rate": 4.542615976530861e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2118 + }, + { + "epoch": 0.20377939125835456, + "grad_norm": 3.0483893109416678, + "learning_rate": 4.5421760615808475e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2119 + }, + { + "epoch": 0.20387555897485216, + "grad_norm": 1.7374588061785157, + "learning_rate": 4.541735956498555e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2120 + }, + { + "epoch": 0.20397172669134972, + "grad_norm": 3.761295339849798, + "learning_rate": 4.541295661324957e-06, + "loss": 0.1722, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2121 + }, + { + "epoch": 0.20406789440784728, + "grad_norm": 4.688014404624404, + "learning_rate": 4.540855176101048e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2122 + }, + { + "epoch": 0.20416406212434485, + "grad_norm": 4.282913409509182, + "learning_rate": 4.540414500867837e-06, + "loss": 0.1814, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2123 + }, + { + "epoch": 0.20426022984084244, + "grad_norm": 2.2529752273555257, + "learning_rate": 4.539973635666352e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2124 + }, + { + "epoch": 0.20435639755734, + "grad_norm": 4.121725269679789, + "learning_rate": 4.539532580537641e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2125 + }, + { + "epoch": 0.20445256527383757, + "grad_norm": 3.129199918357228, + "learning_rate": 4.539091335522764e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2126 + }, + { + "epoch": 0.20454873299033516, + "grad_norm": 3.5092783929812157, + "learning_rate": 4.538649900662805e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2127 + }, + { + "epoch": 0.20464490070683272, + "grad_norm": 1.9452129947362085, + "learning_rate": 4.538208275998861e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2128 + }, + { + "epoch": 0.20474106842333029, + "grad_norm": 1.8176062928682946, + "learning_rate": 4.53776646157205e-06, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2129 + }, + { + "epoch": 0.20483723613982785, + "grad_norm": 3.6190438492969412, + "learning_rate": 4.537324457423505e-06, + "loss": 0.1742, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2130 + }, + { + "epoch": 0.20493340385632544, + "grad_norm": 1.549500286825324, + "learning_rate": 4.5368822635943785e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2131 + }, + { + "epoch": 0.205029571572823, + "grad_norm": 7.897668286761604, + "learning_rate": 4.53643988012584e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2132 + }, + { + "epoch": 0.20512573928932057, + "grad_norm": 1.9219194009306955, + "learning_rate": 4.535997307059075e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2133 + }, + { + "epoch": 0.20522190700581816, + "grad_norm": 2.973311165349479, + "learning_rate": 4.535554544435291e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2134 + }, + { + "epoch": 0.20531807472231572, + "grad_norm": 2.1731126470079927, + "learning_rate": 4.535111592295709e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2135 + }, + { + "epoch": 0.2054142424388133, + "grad_norm": 2.6066191072364444, + "learning_rate": 4.534668450681569e-06, + "loss": 0.1898, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2136 + }, + { + "epoch": 0.20551041015531085, + "grad_norm": 1.9052223380970192, + "learning_rate": 4.534225119634128e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2137 + }, + { + "epoch": 0.20560657787180844, + "grad_norm": 1.4441304237330235, + "learning_rate": 4.533781599194663e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2138 + }, + { + "epoch": 0.205702745588306, + "grad_norm": 2.258339307208297, + "learning_rate": 4.533337889404466e-06, + "loss": 0.1653, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2139 + }, + { + "epoch": 0.20579891330480357, + "grad_norm": 3.9961367781259907, + "learning_rate": 4.532893990304848e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2140 + }, + { + "epoch": 0.20589508102130116, + "grad_norm": 3.2739982838141803, + "learning_rate": 4.532449901937136e-06, + "loss": 0.1703, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2141 + }, + { + "epoch": 0.20599124873779873, + "grad_norm": 2.5330161004926484, + "learning_rate": 4.532005624342678e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2142 + }, + { + "epoch": 0.2060874164542963, + "grad_norm": 2.113480818225016, + "learning_rate": 4.531561157562836e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2143 + }, + { + "epoch": 0.20618358417079385, + "grad_norm": 2.877779025892288, + "learning_rate": 4.531116501638992e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2144 + }, + { + "epoch": 0.20627975188729145, + "grad_norm": 1.8940515711952637, + "learning_rate": 4.530671656612544e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2145 + }, + { + "epoch": 0.206375919603789, + "grad_norm": 2.1604221478306296, + "learning_rate": 4.5302266225249085e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2146 + }, + { + "epoch": 0.20647208732028657, + "grad_norm": 3.5630999767292253, + "learning_rate": 4.529781399417519e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2147 + }, + { + "epoch": 0.20656825503678417, + "grad_norm": 1.951697084702034, + "learning_rate": 4.529335987331828e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2148 + }, + { + "epoch": 0.20666442275328173, + "grad_norm": 1.53553995202698, + "learning_rate": 4.528890386309304e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2149 + }, + { + "epoch": 0.2067605904697793, + "grad_norm": 8.075332327971337, + "learning_rate": 4.528444596391433e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2150 + }, + { + "epoch": 0.20685675818627686, + "grad_norm": 2.074229886273976, + "learning_rate": 4.527998617619721e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2151 + }, + { + "epoch": 0.20695292590277445, + "grad_norm": 2.1600361093269242, + "learning_rate": 4.527552450035689e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2152 + }, + { + "epoch": 0.207049093619272, + "grad_norm": 2.116001873200312, + "learning_rate": 4.527106093680875e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2153 + }, + { + "epoch": 0.20714526133576958, + "grad_norm": 2.066675427719814, + "learning_rate": 4.526659548596838e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2154 + }, + { + "epoch": 0.20724142905226717, + "grad_norm": 1.5242248670275933, + "learning_rate": 4.526212814825152e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2155 + }, + { + "epoch": 0.20733759676876473, + "grad_norm": 2.5441590186731355, + "learning_rate": 4.525765892407409e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2156 + }, + { + "epoch": 0.2074337644852623, + "grad_norm": 2.2094361964753473, + "learning_rate": 4.525318781385219e-06, + "loss": 0.1777, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2157 + }, + { + "epoch": 0.20752993220175986, + "grad_norm": 2.3914933137428944, + "learning_rate": 4.52487148180021e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2158 + }, + { + "epoch": 0.20762609991825745, + "grad_norm": 4.915870417195939, + "learning_rate": 4.524423993694024e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2159 + }, + { + "epoch": 0.20772226763475501, + "grad_norm": 2.0059588927231196, + "learning_rate": 4.523976317108326e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2160 + }, + { + "epoch": 0.20781843535125258, + "grad_norm": 2.183576232495579, + "learning_rate": 4.523528452084796e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2161 + }, + { + "epoch": 0.20791460306775017, + "grad_norm": 2.305454985008706, + "learning_rate": 4.52308039866513e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2162 + }, + { + "epoch": 0.20801077078424773, + "grad_norm": 3.3612423848739086, + "learning_rate": 4.522632156891045e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2163 + }, + { + "epoch": 0.2081069385007453, + "grad_norm": 2.3804268798870285, + "learning_rate": 4.5221837268042715e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2164 + }, + { + "epoch": 0.20820310621724286, + "grad_norm": 1.9877352629220584, + "learning_rate": 4.52173510844656e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2165 + }, + { + "epoch": 0.20829927393374045, + "grad_norm": 3.9218464628568706, + "learning_rate": 4.521286301859679e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2166 + }, + { + "epoch": 0.20839544165023802, + "grad_norm": 3.1194194615963733, + "learning_rate": 4.520837307085413e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2167 + }, + { + "epoch": 0.20849160936673558, + "grad_norm": 1.6958269711927827, + "learning_rate": 4.520388124165564e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2168 + }, + { + "epoch": 0.20858777708323317, + "grad_norm": 1.5670311353230741, + "learning_rate": 4.519938753141955e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2169 + }, + { + "epoch": 0.20868394479973074, + "grad_norm": 1.9806133448624506, + "learning_rate": 4.51948919405642e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2170 + }, + { + "epoch": 0.2087801125162283, + "grad_norm": 2.0395705961665893, + "learning_rate": 4.519039446950817e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2171 + }, + { + "epoch": 0.20887628023272586, + "grad_norm": 2.1579063880756624, + "learning_rate": 4.518589511867018e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2172 + }, + { + "epoch": 0.20897244794922346, + "grad_norm": 2.086889773382307, + "learning_rate": 4.518139388846912e-06, + "loss": 0.1542, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2173 + }, + { + "epoch": 0.20906861566572102, + "grad_norm": 2.939608607481383, + "learning_rate": 4.517689077932407e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2174 + }, + { + "epoch": 0.20916478338221858, + "grad_norm": 2.293157586123462, + "learning_rate": 4.51723857916543e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2175 + }, + { + "epoch": 0.20926095109871617, + "grad_norm": 1.6410237655035995, + "learning_rate": 4.516787892587921e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2176 + }, + { + "epoch": 0.20935711881521374, + "grad_norm": 2.405752002414777, + "learning_rate": 4.516337018241843e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2177 + }, + { + "epoch": 0.2094532865317113, + "grad_norm": 3.04401422967592, + "learning_rate": 4.515885956169172e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2178 + }, + { + "epoch": 0.20954945424820887, + "grad_norm": 3.3137111391345404, + "learning_rate": 4.515434706411902e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2179 + }, + { + "epoch": 0.20964562196470646, + "grad_norm": 3.6386755671272843, + "learning_rate": 4.514983269012048e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2180 + }, + { + "epoch": 0.20974178968120402, + "grad_norm": 1.660413396872898, + "learning_rate": 4.51453164401164e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2181 + }, + { + "epoch": 0.20983795739770159, + "grad_norm": 2.140739374400501, + "learning_rate": 4.514079831452724e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2182 + }, + { + "epoch": 0.20993412511419918, + "grad_norm": 2.094007096174126, + "learning_rate": 4.513627831377365e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2183 + }, + { + "epoch": 0.21003029283069674, + "grad_norm": 2.2979202439673942, + "learning_rate": 4.513175643827647e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2184 + }, + { + "epoch": 0.2101264605471943, + "grad_norm": 2.348527722865646, + "learning_rate": 4.5127232688456685e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2185 + }, + { + "epoch": 0.21022262826369187, + "grad_norm": 3.130724795044068, + "learning_rate": 4.5122707064735476e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2186 + }, + { + "epoch": 0.21031879598018946, + "grad_norm": 3.1758886270642646, + "learning_rate": 4.511817956753419e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2187 + }, + { + "epoch": 0.21041496369668702, + "grad_norm": 2.777554565954599, + "learning_rate": 4.511365019727435e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2188 + }, + { + "epoch": 0.2105111314131846, + "grad_norm": 4.007403673645355, + "learning_rate": 4.510911895437765e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2189 + }, + { + "epoch": 0.21060729912968218, + "grad_norm": 5.260667739918676, + "learning_rate": 4.510458583926596e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2190 + }, + { + "epoch": 0.21070346684617974, + "grad_norm": 1.3688487497785629, + "learning_rate": 4.510005085236133e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2191 + }, + { + "epoch": 0.2107996345626773, + "grad_norm": 2.8905560712286387, + "learning_rate": 4.509551399408598e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2192 + }, + { + "epoch": 0.21089580227917487, + "grad_norm": 3.1502741561893766, + "learning_rate": 4.50909752648623e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2193 + }, + { + "epoch": 0.21099196999567246, + "grad_norm": 3.1724810276971454, + "learning_rate": 4.508643466511286e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2194 + }, + { + "epoch": 0.21108813771217003, + "grad_norm": 1.6147200862934776, + "learning_rate": 4.508189219526042e-06, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2195 + }, + { + "epoch": 0.2111843054286676, + "grad_norm": 3.1660195441640475, + "learning_rate": 4.507734785572787e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2196 + }, + { + "epoch": 0.21128047314516518, + "grad_norm": 2.7833287212392555, + "learning_rate": 4.507280164693831e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2197 + }, + { + "epoch": 0.21137664086166275, + "grad_norm": 1.8733067086340178, + "learning_rate": 4.5068253569315e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2198 + }, + { + "epoch": 0.2114728085781603, + "grad_norm": 2.1416078923465554, + "learning_rate": 4.506370362328139e-06, + "loss": 0.1884, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2199 + }, + { + "epoch": 0.21156897629465787, + "grad_norm": 2.741358332411568, + "learning_rate": 4.5059151809261085e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2200 + }, + { + "epoch": 0.21166514401115546, + "grad_norm": 4.012738728452819, + "learning_rate": 4.505459812767788e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2201 + }, + { + "epoch": 0.21176131172765303, + "grad_norm": 2.0031166198116868, + "learning_rate": 4.505004257895572e-06, + "loss": 0.1753, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2202 + }, + { + "epoch": 0.2118574794441506, + "grad_norm": 2.2335223732832703, + "learning_rate": 4.504548516351875e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2203 + }, + { + "epoch": 0.21195364716064818, + "grad_norm": 4.359328946863823, + "learning_rate": 4.504092588179128e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2204 + }, + { + "epoch": 0.21204981487714575, + "grad_norm": 5.71945015940943, + "learning_rate": 4.503636473419779e-06, + "loss": 0.1938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2205 + }, + { + "epoch": 0.2121459825936433, + "grad_norm": 1.9535610840505517, + "learning_rate": 4.5031801721162925e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2206 + }, + { + "epoch": 0.21224215031014088, + "grad_norm": 2.285999969335077, + "learning_rate": 4.502723684311153e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2207 + }, + { + "epoch": 0.21233831802663847, + "grad_norm": 2.9690351708911784, + "learning_rate": 4.50226701004686e-06, + "loss": 0.1836, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2208 + }, + { + "epoch": 0.21243448574313603, + "grad_norm": 1.438529829747233, + "learning_rate": 4.501810149365931e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2209 + }, + { + "epoch": 0.2125306534596336, + "grad_norm": 1.655235902505253, + "learning_rate": 4.501353102310901e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2210 + }, + { + "epoch": 0.2126268211761312, + "grad_norm": 3.282187062076839, + "learning_rate": 4.500895868924323e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2211 + }, + { + "epoch": 0.21272298889262875, + "grad_norm": 2.7625488087890524, + "learning_rate": 4.500438449248766e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2212 + }, + { + "epoch": 0.21281915660912631, + "grad_norm": 2.0612342334142286, + "learning_rate": 4.4999808433268185e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2213 + }, + { + "epoch": 0.21291532432562388, + "grad_norm": 1.739059631558733, + "learning_rate": 4.499523051201082e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2214 + }, + { + "epoch": 0.21301149204212147, + "grad_norm": 6.826162741314616, + "learning_rate": 4.499065072914181e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2215 + }, + { + "epoch": 0.21310765975861903, + "grad_norm": 3.0060152109751614, + "learning_rate": 4.498606908508754e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2216 + }, + { + "epoch": 0.2132038274751166, + "grad_norm": 2.318811826341658, + "learning_rate": 4.498148558027455e-06, + "loss": 0.1589, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2217 + }, + { + "epoch": 0.2132999951916142, + "grad_norm": 2.0810128050392014, + "learning_rate": 4.497690021512961e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2218 + }, + { + "epoch": 0.21339616290811175, + "grad_norm": 3.6724508959037623, + "learning_rate": 4.497231299007961e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2219 + }, + { + "epoch": 0.21349233062460932, + "grad_norm": 3.108447728288807, + "learning_rate": 4.496772390555164e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2220 + }, + { + "epoch": 0.21358849834110688, + "grad_norm": 2.0146605766371186, + "learning_rate": 4.496313296197296e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2221 + }, + { + "epoch": 0.21368466605760447, + "grad_norm": 1.4791316358794768, + "learning_rate": 4.4958540159771e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2222 + }, + { + "epoch": 0.21378083377410204, + "grad_norm": 2.7604235748134256, + "learning_rate": 4.495394549937335e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2223 + }, + { + "epoch": 0.2138770014905996, + "grad_norm": 1.8887291273580333, + "learning_rate": 4.494934898120779e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2224 + }, + { + "epoch": 0.2139731692070972, + "grad_norm": 1.7308768234179093, + "learning_rate": 4.494475060570228e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2225 + }, + { + "epoch": 0.21406933692359476, + "grad_norm": 1.5330469429154334, + "learning_rate": 4.494015037328494e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2226 + }, + { + "epoch": 0.21416550464009232, + "grad_norm": 1.6857886882201998, + "learning_rate": 4.4935548284384044e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2227 + }, + { + "epoch": 0.21426167235658988, + "grad_norm": 1.5078244354440737, + "learning_rate": 4.4930944339428085e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2228 + }, + { + "epoch": 0.21435784007308747, + "grad_norm": 1.5280933556743923, + "learning_rate": 4.492633853884569e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2229 + }, + { + "epoch": 0.21445400778958504, + "grad_norm": 3.6887820447407114, + "learning_rate": 4.492173088306567e-06, + "loss": 0.1679, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2230 + }, + { + "epoch": 0.2145501755060826, + "grad_norm": 1.6562736123359176, + "learning_rate": 4.491712137251702e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2231 + }, + { + "epoch": 0.21464634322258017, + "grad_norm": 1.598258311138271, + "learning_rate": 4.491251000762889e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2232 + }, + { + "epoch": 0.21474251093907776, + "grad_norm": 2.3686570130541327, + "learning_rate": 4.490789678883062e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2233 + }, + { + "epoch": 0.21483867865557532, + "grad_norm": 3.3289265021386347, + "learning_rate": 4.49032817165517e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2234 + }, + { + "epoch": 0.21493484637207289, + "grad_norm": 3.9229102877445086, + "learning_rate": 4.489866479122181e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2235 + }, + { + "epoch": 0.21503101408857048, + "grad_norm": 1.8104162253446674, + "learning_rate": 4.489404601327081e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2236 + }, + { + "epoch": 0.21512718180506804, + "grad_norm": 3.13404781655629, + "learning_rate": 4.4889425383128705e-06, + "loss": 0.1674, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2237 + }, + { + "epoch": 0.2152233495215656, + "grad_norm": 3.162268123148348, + "learning_rate": 4.48848029012257e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2238 + }, + { + "epoch": 0.21531951723806317, + "grad_norm": 1.9897263124214966, + "learning_rate": 4.488017856799216e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2239 + }, + { + "epoch": 0.21541568495456076, + "grad_norm": 2.7125421687077553, + "learning_rate": 4.487555238385862e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2240 + }, + { + "epoch": 0.21551185267105832, + "grad_norm": 3.082996649505479, + "learning_rate": 4.487092434925579e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2241 + }, + { + "epoch": 0.2156080203875559, + "grad_norm": 2.2135727971586947, + "learning_rate": 4.486629446461456e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2242 + }, + { + "epoch": 0.21570418810405348, + "grad_norm": 2.2397615730135634, + "learning_rate": 4.486166273036597e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2243 + }, + { + "epoch": 0.21580035582055104, + "grad_norm": 2.333854566788364, + "learning_rate": 4.485702914694127e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2244 + }, + { + "epoch": 0.2158965235370486, + "grad_norm": 2.6377578753753808, + "learning_rate": 4.485239371477183e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2245 + }, + { + "epoch": 0.21599269125354617, + "grad_norm": 2.0064599355464763, + "learning_rate": 4.4847756434289244e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2246 + }, + { + "epoch": 0.21608885897004376, + "grad_norm": 1.9469507210213028, + "learning_rate": 4.484311730592525e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2247 + }, + { + "epoch": 0.21618502668654133, + "grad_norm": 5.483175505522201, + "learning_rate": 4.483847633011177e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2248 + }, + { + "epoch": 0.2162811944030389, + "grad_norm": 2.4842559149005052, + "learning_rate": 4.4833833507280884e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2249 + }, + { + "epoch": 0.21637736211953648, + "grad_norm": 3.044627831932235, + "learning_rate": 4.482918883786484e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2250 + }, + { + "epoch": 0.21647352983603405, + "grad_norm": 2.0766298623953854, + "learning_rate": 4.48245423222961e-06, + "loss": 0.171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2251 + }, + { + "epoch": 0.2165696975525316, + "grad_norm": 1.8905499559575312, + "learning_rate": 4.481989396100725e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2252 + }, + { + "epoch": 0.21666586526902917, + "grad_norm": 2.1875718567595737, + "learning_rate": 4.481524375443105e-06, + "loss": 0.1833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2253 + }, + { + "epoch": 0.21676203298552676, + "grad_norm": 1.469027831118352, + "learning_rate": 4.481059170300047e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2254 + }, + { + "epoch": 0.21685820070202433, + "grad_norm": 1.7973974370268526, + "learning_rate": 4.480593780714863e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2255 + }, + { + "epoch": 0.2169543684185219, + "grad_norm": 2.286453652358947, + "learning_rate": 4.480128206730881e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2256 + }, + { + "epoch": 0.21705053613501948, + "grad_norm": 2.9799718620811695, + "learning_rate": 4.479662448391447e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2257 + }, + { + "epoch": 0.21714670385151705, + "grad_norm": 1.6397446300213172, + "learning_rate": 4.4791965057399245e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2258 + }, + { + "epoch": 0.2172428715680146, + "grad_norm": 1.5272309476622647, + "learning_rate": 4.478730378819695e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2259 + }, + { + "epoch": 0.21733903928451218, + "grad_norm": 1.8787098406288412, + "learning_rate": 4.478264067674155e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2260 + }, + { + "epoch": 0.21743520700100977, + "grad_norm": 1.8659968734193344, + "learning_rate": 4.47779757234672e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2261 + }, + { + "epoch": 0.21753137471750733, + "grad_norm": 1.6549869129371138, + "learning_rate": 4.477330892880824e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2262 + }, + { + "epoch": 0.2176275424340049, + "grad_norm": 1.854721505392942, + "learning_rate": 4.476864029319912e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2263 + }, + { + "epoch": 0.2177237101505025, + "grad_norm": 1.819929708405582, + "learning_rate": 4.476396981707454e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2264 + }, + { + "epoch": 0.21781987786700005, + "grad_norm": 1.6414297135438114, + "learning_rate": 4.4759297500869305e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2265 + }, + { + "epoch": 0.21791604558349761, + "grad_norm": 2.5743494968326965, + "learning_rate": 4.475462334501845e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2266 + }, + { + "epoch": 0.21801221329999518, + "grad_norm": 3.3232274501309287, + "learning_rate": 4.474994734995713e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2267 + }, + { + "epoch": 0.21810838101649277, + "grad_norm": 2.7001264164291134, + "learning_rate": 4.474526951612069e-06, + "loss": 0.172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2268 + }, + { + "epoch": 0.21820454873299033, + "grad_norm": 1.854923580150378, + "learning_rate": 4.474058984394466e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2269 + }, + { + "epoch": 0.2183007164494879, + "grad_norm": 2.4840042460382654, + "learning_rate": 4.473590833386474e-06, + "loss": 0.1905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2270 + }, + { + "epoch": 0.2183968841659855, + "grad_norm": 2.60307927917448, + "learning_rate": 4.473122498631679e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2271 + }, + { + "epoch": 0.21849305188248305, + "grad_norm": 3.8046434021599853, + "learning_rate": 4.4726539801736815e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2272 + }, + { + "epoch": 0.21858921959898062, + "grad_norm": 2.052759728604495, + "learning_rate": 4.472185278056104e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2273 + }, + { + "epoch": 0.21868538731547818, + "grad_norm": 1.7203808444806763, + "learning_rate": 4.471716392322585e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2274 + }, + { + "epoch": 0.21878155503197577, + "grad_norm": 1.7458817201776518, + "learning_rate": 4.4712473230167775e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2275 + }, + { + "epoch": 0.21887772274847334, + "grad_norm": 2.9548351172876615, + "learning_rate": 4.470778070182353e-06, + "loss": 0.1739, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2276 + }, + { + "epoch": 0.2189738904649709, + "grad_norm": 3.7275724269240293, + "learning_rate": 4.470308633863001e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2277 + }, + { + "epoch": 0.2190700581814685, + "grad_norm": 1.425889853800219, + "learning_rate": 4.469839014102427e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2278 + }, + { + "epoch": 0.21916622589796605, + "grad_norm": 1.6173525090224699, + "learning_rate": 4.469369210944354e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2279 + }, + { + "epoch": 0.21926239361446362, + "grad_norm": 2.4894451068016625, + "learning_rate": 4.4688992244325215e-06, + "loss": 0.1768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2280 + }, + { + "epoch": 0.21935856133096118, + "grad_norm": 1.6796042626439813, + "learning_rate": 4.468429054610688e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2281 + }, + { + "epoch": 0.21945472904745877, + "grad_norm": 2.4445286132144166, + "learning_rate": 4.4679587015226255e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2282 + }, + { + "epoch": 0.21955089676395634, + "grad_norm": 2.758150151077313, + "learning_rate": 4.467488165212127e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2283 + }, + { + "epoch": 0.2196470644804539, + "grad_norm": 2.2225159791249376, + "learning_rate": 4.467017445723e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2284 + }, + { + "epoch": 0.2197432321969515, + "grad_norm": 1.9160532324025847, + "learning_rate": 4.466546543099069e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2285 + }, + { + "epoch": 0.21983939991344906, + "grad_norm": 2.3734885806017556, + "learning_rate": 4.466075457384179e-06, + "loss": 0.1604, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2286 + }, + { + "epoch": 0.21993556762994662, + "grad_norm": 1.7788604633454455, + "learning_rate": 4.465604188622187e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2287 + }, + { + "epoch": 0.22003173534644418, + "grad_norm": 3.4021237781391, + "learning_rate": 4.4651327368569695e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2288 + }, + { + "epoch": 0.22012790306294178, + "grad_norm": 2.5732843559556633, + "learning_rate": 4.46466110213242e-06, + "loss": 0.1886, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2289 + }, + { + "epoch": 0.22022407077943934, + "grad_norm": 3.6414238930693315, + "learning_rate": 4.464189284492451e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2290 + }, + { + "epoch": 0.2203202384959369, + "grad_norm": 2.2368790336048376, + "learning_rate": 4.463717283980986e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2291 + }, + { + "epoch": 0.2204164062124345, + "grad_norm": 1.74404354723576, + "learning_rate": 4.463245100641974e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2292 + }, + { + "epoch": 0.22051257392893206, + "grad_norm": 1.4880412216613799, + "learning_rate": 4.462772734519375e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2293 + }, + { + "epoch": 0.22060874164542962, + "grad_norm": 5.355435851492861, + "learning_rate": 4.462300185657167e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2294 + }, + { + "epoch": 0.2207049093619272, + "grad_norm": 2.673738853979303, + "learning_rate": 4.461827454099346e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2295 + }, + { + "epoch": 0.22080107707842478, + "grad_norm": 1.4667742248324818, + "learning_rate": 4.461354539889923e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2296 + }, + { + "epoch": 0.22089724479492234, + "grad_norm": 1.516563427449705, + "learning_rate": 4.460881443072931e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2297 + }, + { + "epoch": 0.2209934125114199, + "grad_norm": 1.6849815502811127, + "learning_rate": 4.460408163692414e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2298 + }, + { + "epoch": 0.2210895802279175, + "grad_norm": 4.152576438087265, + "learning_rate": 4.4599347017924364e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2299 + }, + { + "epoch": 0.22118574794441506, + "grad_norm": 2.964685818220466, + "learning_rate": 4.459461057417078e-06, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2300 + }, + { + "epoch": 0.22128191566091263, + "grad_norm": 2.577411364570856, + "learning_rate": 4.4589872306104385e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2301 + }, + { + "epoch": 0.2213780833774102, + "grad_norm": 2.0708336971954666, + "learning_rate": 4.458513221416631e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2302 + }, + { + "epoch": 0.22147425109390778, + "grad_norm": 1.649099146559874, + "learning_rate": 4.458039029879787e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2303 + }, + { + "epoch": 0.22157041881040535, + "grad_norm": 2.542532664169132, + "learning_rate": 4.457564656044056e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2304 + }, + { + "epoch": 0.2216665865269029, + "grad_norm": 1.7446565170488615, + "learning_rate": 4.457090099953602e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2305 + }, + { + "epoch": 0.2217627542434005, + "grad_norm": 1.9218136604637825, + "learning_rate": 4.456615361652609e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2306 + }, + { + "epoch": 0.22185892195989806, + "grad_norm": 1.9838349896874021, + "learning_rate": 4.456140441185276e-06, + "loss": 0.1894, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2307 + }, + { + "epoch": 0.22195508967639563, + "grad_norm": 2.259099493401245, + "learning_rate": 4.455665338595819e-06, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2308 + }, + { + "epoch": 0.2220512573928932, + "grad_norm": 3.2128650661896083, + "learning_rate": 4.455190053928471e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2309 + }, + { + "epoch": 0.22214742510939078, + "grad_norm": 1.8534494110384248, + "learning_rate": 4.454714587227485e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2310 + }, + { + "epoch": 0.22224359282588835, + "grad_norm": 3.4578209611370707, + "learning_rate": 4.454238938537124e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2311 + }, + { + "epoch": 0.2223397605423859, + "grad_norm": 4.150680539176948, + "learning_rate": 4.453763107901676e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2312 + }, + { + "epoch": 0.2224359282588835, + "grad_norm": 3.2896067183140807, + "learning_rate": 4.453287095365439e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2313 + }, + { + "epoch": 0.22253209597538107, + "grad_norm": 1.5757504504556108, + "learning_rate": 4.452810900972734e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2314 + }, + { + "epoch": 0.22262826369187863, + "grad_norm": 1.9593710200753465, + "learning_rate": 4.452334524767895e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2315 + }, + { + "epoch": 0.2227244314083762, + "grad_norm": 2.835012908793872, + "learning_rate": 4.451857966795272e-06, + "loss": 0.1838, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2316 + }, + { + "epoch": 0.22282059912487379, + "grad_norm": 1.7826575363817498, + "learning_rate": 4.451381227099238e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2317 + }, + { + "epoch": 0.22291676684137135, + "grad_norm": 2.0216118215696746, + "learning_rate": 4.450904305724174e-06, + "loss": 0.1742, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2318 + }, + { + "epoch": 0.2230129345578689, + "grad_norm": 1.2864942453330213, + "learning_rate": 4.450427202714486e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2319 + }, + { + "epoch": 0.2231091022743665, + "grad_norm": 1.8852062795968776, + "learning_rate": 4.449949918114593e-06, + "loss": 0.1623, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2320 + }, + { + "epoch": 0.22320526999086407, + "grad_norm": 1.4566665524355091, + "learning_rate": 4.449472451968931e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2321 + }, + { + "epoch": 0.22330143770736163, + "grad_norm": 1.8796971706596652, + "learning_rate": 4.448994804321953e-06, + "loss": 0.1739, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2322 + }, + { + "epoch": 0.2233976054238592, + "grad_norm": 4.171067204290255, + "learning_rate": 4.4485169752181314e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2323 + }, + { + "epoch": 0.2234937731403568, + "grad_norm": 1.9176356452267929, + "learning_rate": 4.4480389647019505e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2324 + }, + { + "epoch": 0.22358994085685435, + "grad_norm": 1.5279276505583377, + "learning_rate": 4.447560772817916e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2325 + }, + { + "epoch": 0.22368610857335192, + "grad_norm": 2.146923865078497, + "learning_rate": 4.447082399610549e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2326 + }, + { + "epoch": 0.2237822762898495, + "grad_norm": 2.516750571084733, + "learning_rate": 4.446603845124388e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2327 + }, + { + "epoch": 0.22387844400634707, + "grad_norm": 2.586677407767482, + "learning_rate": 4.446125109403987e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2328 + }, + { + "epoch": 0.22397461172284464, + "grad_norm": 1.607145345308202, + "learning_rate": 4.445646192493917e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2329 + }, + { + "epoch": 0.2240707794393422, + "grad_norm": 4.256588442735172, + "learning_rate": 4.445167094438767e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2330 + }, + { + "epoch": 0.2241669471558398, + "grad_norm": 1.7269753106601182, + "learning_rate": 4.4446878152831414e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2331 + }, + { + "epoch": 0.22426311487233735, + "grad_norm": 2.283821234265774, + "learning_rate": 4.444208355071665e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2332 + }, + { + "epoch": 0.22435928258883492, + "grad_norm": 1.8762789061354963, + "learning_rate": 4.443728713848975e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2333 + }, + { + "epoch": 0.2244554503053325, + "grad_norm": 2.219848329924958, + "learning_rate": 4.443248891659727e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2334 + }, + { + "epoch": 0.22455161802183007, + "grad_norm": 1.8553769381142893, + "learning_rate": 4.442768888548595e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2335 + }, + { + "epoch": 0.22464778573832764, + "grad_norm": 2.1647106542739087, + "learning_rate": 4.442288704560268e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2336 + }, + { + "epoch": 0.2247439534548252, + "grad_norm": 2.9399557429069447, + "learning_rate": 4.441808339739453e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2337 + }, + { + "epoch": 0.2248401211713228, + "grad_norm": 1.7813466592693876, + "learning_rate": 4.441327794130872e-06, + "loss": 0.1572, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2338 + }, + { + "epoch": 0.22493628888782036, + "grad_norm": 3.1935464194479137, + "learning_rate": 4.4408470677792666e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2339 + }, + { + "epoch": 0.22503245660431792, + "grad_norm": 4.715034972309358, + "learning_rate": 4.440366160729393e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2340 + }, + { + "epoch": 0.2251286243208155, + "grad_norm": 2.0218323617534195, + "learning_rate": 4.439885073026024e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2341 + }, + { + "epoch": 0.22522479203731308, + "grad_norm": 1.872821689637631, + "learning_rate": 4.439403804713952e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2342 + }, + { + "epoch": 0.22532095975381064, + "grad_norm": 2.58047166851578, + "learning_rate": 4.438922355837985e-06, + "loss": 0.1829, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2343 + }, + { + "epoch": 0.2254171274703082, + "grad_norm": 2.196216565228009, + "learning_rate": 4.438440726442944e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2344 + }, + { + "epoch": 0.2255132951868058, + "grad_norm": 1.5611892768402709, + "learning_rate": 4.4379589165736736e-06, + "loss": 0.1703, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2345 + }, + { + "epoch": 0.22560946290330336, + "grad_norm": 1.5792475307133267, + "learning_rate": 4.43747692627503e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2346 + }, + { + "epoch": 0.22570563061980092, + "grad_norm": 4.502437617772488, + "learning_rate": 4.436994755591887e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2347 + }, + { + "epoch": 0.22580179833629851, + "grad_norm": 1.7319771095011665, + "learning_rate": 4.436512404569136e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2348 + }, + { + "epoch": 0.22589796605279608, + "grad_norm": 1.4776667246516768, + "learning_rate": 4.436029873251688e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2349 + }, + { + "epoch": 0.22599413376929364, + "grad_norm": 2.571753503850667, + "learning_rate": 4.435547161684466e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2350 + }, + { + "epoch": 0.2260903014857912, + "grad_norm": 3.497500580487125, + "learning_rate": 4.435064269912411e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2351 + }, + { + "epoch": 0.2261864692022888, + "grad_norm": 3.2223544881581008, + "learning_rate": 4.434581197980483e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2352 + }, + { + "epoch": 0.22628263691878636, + "grad_norm": 1.7549796061733456, + "learning_rate": 4.434097945933657e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2353 + }, + { + "epoch": 0.22637880463528393, + "grad_norm": 4.450523309389014, + "learning_rate": 4.433614513816925e-06, + "loss": 0.1518, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2354 + }, + { + "epoch": 0.22647497235178152, + "grad_norm": 4.3020038945300065, + "learning_rate": 4.4331309016752955e-06, + "loss": 0.1918, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2355 + }, + { + "epoch": 0.22657114006827908, + "grad_norm": 3.5721013382043463, + "learning_rate": 4.432647109553795e-06, + "loss": 0.1887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2356 + }, + { + "epoch": 0.22666730778477664, + "grad_norm": 1.4415565801920251, + "learning_rate": 4.432163137497465e-06, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2357 + }, + { + "epoch": 0.2267634755012742, + "grad_norm": 3.424027122655799, + "learning_rate": 4.431678985551366e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2358 + }, + { + "epoch": 0.2268596432177718, + "grad_norm": 3.5879397166627376, + "learning_rate": 4.431194653760571e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2359 + }, + { + "epoch": 0.22695581093426936, + "grad_norm": 4.21760407299575, + "learning_rate": 4.430710142170176e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2360 + }, + { + "epoch": 0.22705197865076693, + "grad_norm": 1.4566150607036767, + "learning_rate": 4.430225450825288e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2361 + }, + { + "epoch": 0.22714814636726452, + "grad_norm": 4.084435469866067, + "learning_rate": 4.429740579771035e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2362 + }, + { + "epoch": 0.22724431408376208, + "grad_norm": 5.89547400989522, + "learning_rate": 4.4292555290525584e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2363 + }, + { + "epoch": 0.22734048180025965, + "grad_norm": 2.7932299554743323, + "learning_rate": 4.428770298715018e-06, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2364 + }, + { + "epoch": 0.2274366495167572, + "grad_norm": 2.2938941240338275, + "learning_rate": 4.428284888803591e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2365 + }, + { + "epoch": 0.2275328172332548, + "grad_norm": 4.280912859333127, + "learning_rate": 4.42779929936347e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2366 + }, + { + "epoch": 0.22762898494975237, + "grad_norm": 6.971997439516706, + "learning_rate": 4.427313530439863e-06, + "loss": 0.1932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2367 + }, + { + "epoch": 0.22772515266624993, + "grad_norm": 2.2747015228552274, + "learning_rate": 4.426827582077999e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2368 + }, + { + "epoch": 0.22782132038274752, + "grad_norm": 3.0578237145019314, + "learning_rate": 4.426341454323121e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2369 + }, + { + "epoch": 0.22791748809924509, + "grad_norm": 1.661456499969686, + "learning_rate": 4.425855147220487e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2370 + }, + { + "epoch": 0.22801365581574265, + "grad_norm": 2.785421159434939, + "learning_rate": 4.425368660815374e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2371 + }, + { + "epoch": 0.2281098235322402, + "grad_norm": 2.95475576615448, + "learning_rate": 4.424881995153076e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2372 + }, + { + "epoch": 0.2282059912487378, + "grad_norm": 3.733034410181649, + "learning_rate": 4.4243951502789025e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2373 + }, + { + "epoch": 0.22830215896523537, + "grad_norm": 1.9699186197324718, + "learning_rate": 4.423908126238181e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2374 + }, + { + "epoch": 0.22839832668173293, + "grad_norm": 2.097827067988091, + "learning_rate": 4.423420923076254e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2375 + }, + { + "epoch": 0.22849449439823052, + "grad_norm": 2.0849562233560173, + "learning_rate": 4.422933540838481e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2376 + }, + { + "epoch": 0.2285906621147281, + "grad_norm": 2.178615493398161, + "learning_rate": 4.42244597957024e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2377 + }, + { + "epoch": 0.22868682983122565, + "grad_norm": 1.8369593568259879, + "learning_rate": 4.4219582393169225e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2378 + }, + { + "epoch": 0.22878299754772322, + "grad_norm": 1.8791654431867975, + "learning_rate": 4.42147032012394e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2379 + }, + { + "epoch": 0.2288791652642208, + "grad_norm": 2.099631427163901, + "learning_rate": 4.420982222036719e-06, + "loss": 0.1876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2380 + }, + { + "epoch": 0.22897533298071837, + "grad_norm": 1.6119931723783736, + "learning_rate": 4.420493945100702e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2381 + }, + { + "epoch": 0.22907150069721594, + "grad_norm": 10.199590599726106, + "learning_rate": 4.4200054893613484e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2382 + }, + { + "epoch": 0.22916766841371353, + "grad_norm": 2.788782012028489, + "learning_rate": 4.419516854864137e-06, + "loss": 0.1574, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2383 + }, + { + "epoch": 0.2292638361302111, + "grad_norm": 2.8462700496834543, + "learning_rate": 4.419028041654559e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2384 + }, + { + "epoch": 0.22936000384670865, + "grad_norm": 1.8162200785084253, + "learning_rate": 4.418539049778126e-06, + "loss": 0.1662, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2385 + }, + { + "epoch": 0.22945617156320622, + "grad_norm": 1.6659310706007762, + "learning_rate": 4.418049879280363e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2386 + }, + { + "epoch": 0.2295523392797038, + "grad_norm": 4.110686486341808, + "learning_rate": 4.417560530206814e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2387 + }, + { + "epoch": 0.22964850699620137, + "grad_norm": 2.92218451890772, + "learning_rate": 4.4170710026030376e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2388 + }, + { + "epoch": 0.22974467471269894, + "grad_norm": 2.5667847014517986, + "learning_rate": 4.416581296514612e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2389 + }, + { + "epoch": 0.22984084242919653, + "grad_norm": 1.9281176446504575, + "learning_rate": 4.416091411987128e-06, + "loss": 0.1779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2390 + }, + { + "epoch": 0.2299370101456941, + "grad_norm": 3.8021764364870245, + "learning_rate": 4.4156013490661976e-06, + "loss": 0.1809, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2391 + }, + { + "epoch": 0.23003317786219166, + "grad_norm": 3.2639830075174623, + "learning_rate": 4.415111107797445e-06, + "loss": 0.1906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2392 + }, + { + "epoch": 0.23012934557868922, + "grad_norm": 2.6878491760221523, + "learning_rate": 4.414620688226515e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2393 + }, + { + "epoch": 0.2302255132951868, + "grad_norm": 1.4343097563235612, + "learning_rate": 4.4141300903990655e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2394 + }, + { + "epoch": 0.23032168101168438, + "grad_norm": 4.406322776523549, + "learning_rate": 4.413639314360772e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2395 + }, + { + "epoch": 0.23041784872818194, + "grad_norm": 4.026229514926398, + "learning_rate": 4.413148360157329e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2396 + }, + { + "epoch": 0.23051401644467953, + "grad_norm": 4.017325879952406, + "learning_rate": 4.412657227834444e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2397 + }, + { + "epoch": 0.2306101841611771, + "grad_norm": 2.9585038092051796, + "learning_rate": 4.412165917437845e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2398 + }, + { + "epoch": 0.23070635187767466, + "grad_norm": 1.6057998502809494, + "learning_rate": 4.411674429013272e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2399 + }, + { + "epoch": 0.23080251959417222, + "grad_norm": 1.882573204634922, + "learning_rate": 4.411182762606484e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2400 + }, + { + "epoch": 0.23089868731066981, + "grad_norm": 4.703174543321979, + "learning_rate": 4.410690918263258e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2401 + }, + { + "epoch": 0.23099485502716738, + "grad_norm": 3.098570486396412, + "learning_rate": 4.410198896029386e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2402 + }, + { + "epoch": 0.23109102274366494, + "grad_norm": 3.1497198309782584, + "learning_rate": 4.4097066959506765e-06, + "loss": 0.1829, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2403 + }, + { + "epoch": 0.23118719046016253, + "grad_norm": 1.8497309179526684, + "learning_rate": 4.409214318072953e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2404 + }, + { + "epoch": 0.2312833581766601, + "grad_norm": 2.145074468179908, + "learning_rate": 4.4087217624420595e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2405 + }, + { + "epoch": 0.23137952589315766, + "grad_norm": 2.9544763023386253, + "learning_rate": 4.408229029103853e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2406 + }, + { + "epoch": 0.23147569360965523, + "grad_norm": 3.659304215987337, + "learning_rate": 4.407736118104208e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2407 + }, + { + "epoch": 0.23157186132615282, + "grad_norm": 3.230210709947806, + "learning_rate": 4.407243029489018e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2408 + }, + { + "epoch": 0.23166802904265038, + "grad_norm": 2.3775882328661178, + "learning_rate": 4.406749763304188e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2409 + }, + { + "epoch": 0.23176419675914794, + "grad_norm": 2.0246225403409657, + "learning_rate": 4.406256319595645e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2410 + }, + { + "epoch": 0.23186036447564554, + "grad_norm": 3.5556271228872514, + "learning_rate": 4.405762698409328e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2411 + }, + { + "epoch": 0.2319565321921431, + "grad_norm": 3.8896394503503364, + "learning_rate": 4.4052688997911965e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2412 + }, + { + "epoch": 0.23205269990864066, + "grad_norm": 3.839102519501271, + "learning_rate": 4.404774923787223e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2413 + }, + { + "epoch": 0.23214886762513823, + "grad_norm": 1.465748133617128, + "learning_rate": 4.404280770443398e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2414 + }, + { + "epoch": 0.23224503534163582, + "grad_norm": 1.3338343887092547, + "learning_rate": 4.40378643980573e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2415 + }, + { + "epoch": 0.23234120305813338, + "grad_norm": 4.094393023496669, + "learning_rate": 4.4032919319202415e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2416 + }, + { + "epoch": 0.23243737077463095, + "grad_norm": 3.2984100459047414, + "learning_rate": 4.402797246832971e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2417 + }, + { + "epoch": 0.23253353849112854, + "grad_norm": 3.6319290370580926, + "learning_rate": 4.402302384589979e-06, + "loss": 0.1758, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2418 + }, + { + "epoch": 0.2326297062076261, + "grad_norm": 2.366749119628972, + "learning_rate": 4.401807345237336e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2419 + }, + { + "epoch": 0.23272587392412367, + "grad_norm": 2.5513803724187705, + "learning_rate": 4.401312128821131e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2420 + }, + { + "epoch": 0.23282204164062123, + "grad_norm": 2.1897470557523753, + "learning_rate": 4.400816735387471e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2421 + }, + { + "epoch": 0.23291820935711882, + "grad_norm": 2.1371569387588405, + "learning_rate": 4.400321164982479e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2422 + }, + { + "epoch": 0.23301437707361639, + "grad_norm": 2.4604572126104443, + "learning_rate": 4.399825417652292e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2423 + }, + { + "epoch": 0.23311054479011395, + "grad_norm": 1.459592642904204, + "learning_rate": 4.399329493443067e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2424 + }, + { + "epoch": 0.23320671250661154, + "grad_norm": 3.5649442008312255, + "learning_rate": 4.398833392400977e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2425 + }, + { + "epoch": 0.2333028802231091, + "grad_norm": 2.2179176972774677, + "learning_rate": 4.3983371145722085e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2426 + }, + { + "epoch": 0.23339904793960667, + "grad_norm": 2.2360797526914657, + "learning_rate": 4.397840660002967e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2427 + }, + { + "epoch": 0.23349521565610423, + "grad_norm": 2.731900616848117, + "learning_rate": 4.3973440287394744e-06, + "loss": 0.1616, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2428 + }, + { + "epoch": 0.23359138337260182, + "grad_norm": 1.5987634347969444, + "learning_rate": 4.396847220827967e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2429 + }, + { + "epoch": 0.2336875510890994, + "grad_norm": 1.4466117917079298, + "learning_rate": 4.3963502363147e-06, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2430 + }, + { + "epoch": 0.23378371880559695, + "grad_norm": 1.8114472926526937, + "learning_rate": 4.3958530752459435e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2431 + }, + { + "epoch": 0.23387988652209454, + "grad_norm": 2.134733275924138, + "learning_rate": 4.3953557376679856e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2432 + }, + { + "epoch": 0.2339760542385921, + "grad_norm": 3.4271377523789686, + "learning_rate": 4.3948582236271295e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2433 + }, + { + "epoch": 0.23407222195508967, + "grad_norm": 3.1905526211676882, + "learning_rate": 4.394360533169693e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2434 + }, + { + "epoch": 0.23416838967158723, + "grad_norm": 1.8001526604013078, + "learning_rate": 4.393862666342017e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2435 + }, + { + "epoch": 0.23426455738808483, + "grad_norm": 2.192541517570524, + "learning_rate": 4.3933646231904505e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2436 + }, + { + "epoch": 0.2343607251045824, + "grad_norm": 2.8495782838559576, + "learning_rate": 4.392866403761363e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2437 + }, + { + "epoch": 0.23445689282107995, + "grad_norm": 2.3179113658520594, + "learning_rate": 4.392368008101141e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2438 + }, + { + "epoch": 0.23455306053757755, + "grad_norm": 1.6199419191232203, + "learning_rate": 4.391869436256187e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2439 + }, + { + "epoch": 0.2346492282540751, + "grad_norm": 2.739446154019609, + "learning_rate": 4.391370688272919e-06, + "loss": 0.1572, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2440 + }, + { + "epoch": 0.23474539597057267, + "grad_norm": 3.6953218928531997, + "learning_rate": 4.390871764197771e-06, + "loss": 0.1697, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2441 + }, + { + "epoch": 0.23484156368707024, + "grad_norm": 3.7616414250648127, + "learning_rate": 4.390372664077195e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2442 + }, + { + "epoch": 0.23493773140356783, + "grad_norm": 1.5672749443563592, + "learning_rate": 4.389873387957659e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2443 + }, + { + "epoch": 0.2350338991200654, + "grad_norm": 3.156134375809509, + "learning_rate": 4.3893739358856465e-06, + "loss": 0.176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2444 + }, + { + "epoch": 0.23513006683656296, + "grad_norm": 3.1808448737857806, + "learning_rate": 4.3888743079076565e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2445 + }, + { + "epoch": 0.23522623455306055, + "grad_norm": 2.464340968937731, + "learning_rate": 4.388374504070209e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2446 + }, + { + "epoch": 0.2353224022695581, + "grad_norm": 1.3827240493215385, + "learning_rate": 4.387874524419835e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2447 + }, + { + "epoch": 0.23541856998605568, + "grad_norm": 2.6281049254015207, + "learning_rate": 4.387374369003083e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2448 + }, + { + "epoch": 0.23551473770255324, + "grad_norm": 2.90136640487279, + "learning_rate": 4.386874037866521e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2449 + }, + { + "epoch": 0.23561090541905083, + "grad_norm": 3.065558449013342, + "learning_rate": 4.38637353105673e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2450 + }, + { + "epoch": 0.2357070731355484, + "grad_norm": 1.8595995911182035, + "learning_rate": 4.38587284862031e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2451 + }, + { + "epoch": 0.23580324085204596, + "grad_norm": 3.2247346672257446, + "learning_rate": 4.385371990603874e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2452 + }, + { + "epoch": 0.23589940856854355, + "grad_norm": 3.4412785229506477, + "learning_rate": 4.384870957054054e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2453 + }, + { + "epoch": 0.23599557628504111, + "grad_norm": 2.816432559685803, + "learning_rate": 4.384369748017498e-06, + "loss": 0.153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2454 + }, + { + "epoch": 0.23609174400153868, + "grad_norm": 2.286039574478326, + "learning_rate": 4.38386836354087e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2455 + }, + { + "epoch": 0.23618791171803624, + "grad_norm": 3.3883483224720052, + "learning_rate": 4.383366803670849e-06, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2456 + }, + { + "epoch": 0.23628407943453383, + "grad_norm": 2.1886008280909874, + "learning_rate": 4.382865068454133e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2457 + }, + { + "epoch": 0.2363802471510314, + "grad_norm": 2.03025596651272, + "learning_rate": 4.382363157937436e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2458 + }, + { + "epoch": 0.23647641486752896, + "grad_norm": 2.8354873562199505, + "learning_rate": 4.3818610721674836e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2459 + }, + { + "epoch": 0.23657258258402655, + "grad_norm": 1.7516148183820337, + "learning_rate": 4.381358811191025e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2460 + }, + { + "epoch": 0.23666875030052412, + "grad_norm": 2.1771035063514064, + "learning_rate": 4.3808563750548205e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2461 + }, + { + "epoch": 0.23676491801702168, + "grad_norm": 1.8264444303902658, + "learning_rate": 4.380353763805648e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2462 + }, + { + "epoch": 0.23686108573351924, + "grad_norm": 1.9246213793404836, + "learning_rate": 4.379850977490303e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2463 + }, + { + "epoch": 0.23695725345001684, + "grad_norm": 1.8150298382301058, + "learning_rate": 4.379348016155596e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2464 + }, + { + "epoch": 0.2370534211665144, + "grad_norm": 2.1461343148519827, + "learning_rate": 4.378844879848354e-06, + "loss": 0.1724, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2465 + }, + { + "epoch": 0.23714958888301196, + "grad_norm": 1.7722932503105346, + "learning_rate": 4.37834156861542e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2466 + }, + { + "epoch": 0.23724575659950956, + "grad_norm": 2.139522455070026, + "learning_rate": 4.377838082503654e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2467 + }, + { + "epoch": 0.23734192431600712, + "grad_norm": 1.8464279612803574, + "learning_rate": 4.377334421559933e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2468 + }, + { + "epoch": 0.23743809203250468, + "grad_norm": 2.6496156952668213, + "learning_rate": 4.3768305858311465e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2469 + }, + { + "epoch": 0.23753425974900225, + "grad_norm": 2.6203670970678807, + "learning_rate": 4.376326575364206e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2470 + }, + { + "epoch": 0.23763042746549984, + "grad_norm": 1.934361096246436, + "learning_rate": 4.375822390206034e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2471 + }, + { + "epoch": 0.2377265951819974, + "grad_norm": 2.635360264420313, + "learning_rate": 4.375318030403573e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2472 + }, + { + "epoch": 0.23782276289849497, + "grad_norm": 5.69725738307451, + "learning_rate": 4.374813496003779e-06, + "loss": 0.2003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2473 + }, + { + "epoch": 0.23791893061499256, + "grad_norm": 3.8268819797637095, + "learning_rate": 4.374308787053629e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2474 + }, + { + "epoch": 0.23801509833149012, + "grad_norm": 2.07266030296227, + "learning_rate": 4.373803903600108e-06, + "loss": 0.1666, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2475 + }, + { + "epoch": 0.23811126604798769, + "grad_norm": 2.5134377262570275, + "learning_rate": 4.373298845690224e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2476 + }, + { + "epoch": 0.23820743376448525, + "grad_norm": 4.709627118282262, + "learning_rate": 4.372793613371e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2477 + }, + { + "epoch": 0.23830360148098284, + "grad_norm": 2.0275230843372527, + "learning_rate": 4.372288206689475e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2478 + }, + { + "epoch": 0.2383997691974804, + "grad_norm": 2.9807159217740953, + "learning_rate": 4.371782625692702e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2479 + }, + { + "epoch": 0.23849593691397797, + "grad_norm": 1.5060605250740424, + "learning_rate": 4.3712768704277535e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2480 + }, + { + "epoch": 0.23859210463047556, + "grad_norm": 1.8494353627475655, + "learning_rate": 4.370770940941716e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2481 + }, + { + "epoch": 0.23868827234697312, + "grad_norm": 3.322901709968213, + "learning_rate": 4.3702648372816915e-06, + "loss": 0.1657, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2482 + }, + { + "epoch": 0.2387844400634707, + "grad_norm": 5.437347594045105, + "learning_rate": 4.369758559494803e-06, + "loss": 0.1709, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2483 + }, + { + "epoch": 0.23888060777996825, + "grad_norm": 5.187399778208711, + "learning_rate": 4.369252107628183e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2484 + }, + { + "epoch": 0.23897677549646584, + "grad_norm": 1.6077679713908195, + "learning_rate": 4.368745481728987e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2485 + }, + { + "epoch": 0.2390729432129634, + "grad_norm": 2.9267794060906294, + "learning_rate": 4.368238681844381e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2486 + }, + { + "epoch": 0.23916911092946097, + "grad_norm": 1.7122693014845611, + "learning_rate": 4.3677317080215485e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2487 + }, + { + "epoch": 0.23926527864595856, + "grad_norm": 4.061191540362085, + "learning_rate": 4.367224560307693e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2488 + }, + { + "epoch": 0.23936144636245613, + "grad_norm": 3.9492250951178955, + "learning_rate": 4.366717238750029e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2489 + }, + { + "epoch": 0.2394576140789537, + "grad_norm": 2.1843592396658154, + "learning_rate": 4.3662097433957915e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2490 + }, + { + "epoch": 0.23955378179545125, + "grad_norm": 4.219883462389933, + "learning_rate": 4.365702074292227e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2491 + }, + { + "epoch": 0.23964994951194885, + "grad_norm": 2.928625612709626, + "learning_rate": 4.365194231486604e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2492 + }, + { + "epoch": 0.2397461172284464, + "grad_norm": 1.5421119727875938, + "learning_rate": 4.364686215026203e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2493 + }, + { + "epoch": 0.23984228494494397, + "grad_norm": 2.605909328536507, + "learning_rate": 4.364178024958321e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2494 + }, + { + "epoch": 0.23993845266144156, + "grad_norm": 2.0364426888683322, + "learning_rate": 4.363669661330272e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2495 + }, + { + "epoch": 0.24003462037793913, + "grad_norm": 1.7043039634305928, + "learning_rate": 4.363161124189387e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2496 + }, + { + "epoch": 0.2401307880944367, + "grad_norm": 1.701190780227914, + "learning_rate": 4.362652413583012e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2497 + }, + { + "epoch": 0.24022695581093426, + "grad_norm": 1.9782446734933237, + "learning_rate": 4.362143529558508e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2498 + }, + { + "epoch": 0.24032312352743185, + "grad_norm": 1.5818701258637666, + "learning_rate": 4.361634472163255e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2499 + }, + { + "epoch": 0.2404192912439294, + "grad_norm": 2.648704182846576, + "learning_rate": 4.361125241444647e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2500 + }, + { + "epoch": 0.24051545896042698, + "grad_norm": 2.3854752444503573, + "learning_rate": 4.3606158374500955e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2501 + }, + { + "epoch": 0.24061162667692457, + "grad_norm": 2.8796184103435567, + "learning_rate": 4.360106260227027e-06, + "loss": 0.1646, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2502 + }, + { + "epoch": 0.24070779439342213, + "grad_norm": 2.222867184025376, + "learning_rate": 4.3595965098228846e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2503 + }, + { + "epoch": 0.2408039621099197, + "grad_norm": 2.5966331053876446, + "learning_rate": 4.359086586285127e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2504 + }, + { + "epoch": 0.24090012982641726, + "grad_norm": 3.2474130681113915, + "learning_rate": 4.358576489661229e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2505 + }, + { + "epoch": 0.24099629754291485, + "grad_norm": 2.3535439521805097, + "learning_rate": 4.358066219998684e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2506 + }, + { + "epoch": 0.24109246525941241, + "grad_norm": 2.204408409025312, + "learning_rate": 4.357555777344999e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2507 + }, + { + "epoch": 0.24118863297590998, + "grad_norm": 2.4126776989582384, + "learning_rate": 4.357045161747696e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2508 + }, + { + "epoch": 0.24128480069240757, + "grad_norm": 1.365741527948637, + "learning_rate": 4.356534373254316e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2509 + }, + { + "epoch": 0.24138096840890513, + "grad_norm": 1.688895636305673, + "learning_rate": 4.356023411912415e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2510 + }, + { + "epoch": 0.2414771361254027, + "grad_norm": 1.45180471259398, + "learning_rate": 4.355512277769565e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2511 + }, + { + "epoch": 0.24157330384190026, + "grad_norm": 2.4158561707882162, + "learning_rate": 4.355000970873352e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2512 + }, + { + "epoch": 0.24166947155839785, + "grad_norm": 1.6540879253396779, + "learning_rate": 4.354489491271383e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2513 + }, + { + "epoch": 0.24176563927489542, + "grad_norm": 2.474217459872148, + "learning_rate": 4.3539778390112765e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2514 + }, + { + "epoch": 0.24186180699139298, + "grad_norm": 1.6868201083745682, + "learning_rate": 4.353466014140669e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2515 + }, + { + "epoch": 0.24195797470789057, + "grad_norm": 2.263911063078163, + "learning_rate": 4.352954016707213e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2516 + }, + { + "epoch": 0.24205414242438814, + "grad_norm": 2.044378804946721, + "learning_rate": 4.352441846758576e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2517 + }, + { + "epoch": 0.2421503101408857, + "grad_norm": 1.5913258048638823, + "learning_rate": 4.3519295043424445e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2518 + }, + { + "epoch": 0.24224647785738326, + "grad_norm": 2.374445120517673, + "learning_rate": 4.351416989506517e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2519 + }, + { + "epoch": 0.24234264557388085, + "grad_norm": 2.726777204624261, + "learning_rate": 4.350904302298511e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2520 + }, + { + "epoch": 0.24243881329037842, + "grad_norm": 3.878943889013396, + "learning_rate": 4.35039144276616e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2521 + }, + { + "epoch": 0.24253498100687598, + "grad_norm": 2.2244181340252536, + "learning_rate": 4.34987841095721e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2522 + }, + { + "epoch": 0.24263114872337357, + "grad_norm": 2.7444491671710947, + "learning_rate": 4.3493652069194284e-06, + "loss": 0.1867, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2523 + }, + { + "epoch": 0.24272731643987114, + "grad_norm": 1.4848763420379776, + "learning_rate": 4.348851830700594e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2524 + }, + { + "epoch": 0.2428234841563687, + "grad_norm": 1.900510732434665, + "learning_rate": 4.348338282348504e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2525 + }, + { + "epoch": 0.24291965187286627, + "grad_norm": 1.9639143116668212, + "learning_rate": 4.347824561910973e-06, + "loss": 0.1713, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2526 + }, + { + "epoch": 0.24301581958936386, + "grad_norm": 4.377090753990501, + "learning_rate": 4.347310669435827e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2527 + }, + { + "epoch": 0.24311198730586142, + "grad_norm": 1.4414680313197747, + "learning_rate": 4.346796604970913e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2528 + }, + { + "epoch": 0.24320815502235898, + "grad_norm": 1.7436549353944253, + "learning_rate": 4.34628236856409e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2529 + }, + { + "epoch": 0.24330432273885658, + "grad_norm": 1.332937201534459, + "learning_rate": 4.3457679602632364e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2530 + }, + { + "epoch": 0.24340049045535414, + "grad_norm": 1.9333414492100809, + "learning_rate": 4.345253380116245e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2531 + }, + { + "epoch": 0.2434966581718517, + "grad_norm": 1.7561882391687003, + "learning_rate": 4.344738628171024e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2532 + }, + { + "epoch": 0.24359282588834927, + "grad_norm": 1.8372722680053983, + "learning_rate": 4.344223704475497e-06, + "loss": 0.1756, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2533 + }, + { + "epoch": 0.24368899360484686, + "grad_norm": 3.4673564351563058, + "learning_rate": 4.343708609077607e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2534 + }, + { + "epoch": 0.24378516132134442, + "grad_norm": 2.0985587551333253, + "learning_rate": 4.34319334202531e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2535 + }, + { + "epoch": 0.243881329037842, + "grad_norm": 1.4359749085990985, + "learning_rate": 4.34267790336658e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2536 + }, + { + "epoch": 0.24397749675433958, + "grad_norm": 2.2421760176124694, + "learning_rate": 4.342162293149403e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2537 + }, + { + "epoch": 0.24407366447083714, + "grad_norm": 1.4596639529556734, + "learning_rate": 4.341646511421786e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2538 + }, + { + "epoch": 0.2441698321873347, + "grad_norm": 1.9138239899878153, + "learning_rate": 4.341130558231749e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2539 + }, + { + "epoch": 0.24426599990383227, + "grad_norm": 1.7358024538470695, + "learning_rate": 4.3406144336273284e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2540 + }, + { + "epoch": 0.24436216762032986, + "grad_norm": 1.6957848040729837, + "learning_rate": 4.340098137656577e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2541 + }, + { + "epoch": 0.24445833533682743, + "grad_norm": 1.772783835928572, + "learning_rate": 4.3395816703675646e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2542 + }, + { + "epoch": 0.244554503053325, + "grad_norm": 2.560168812329499, + "learning_rate": 4.339065031808374e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2543 + }, + { + "epoch": 0.24465067076982258, + "grad_norm": 1.7916419370644503, + "learning_rate": 4.338548222027107e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2544 + }, + { + "epoch": 0.24474683848632015, + "grad_norm": 1.9262664649341352, + "learning_rate": 4.338031241071878e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2545 + }, + { + "epoch": 0.2448430062028177, + "grad_norm": 3.0379885207557766, + "learning_rate": 4.337514088990822e-06, + "loss": 0.1752, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2546 + }, + { + "epoch": 0.24493917391931527, + "grad_norm": 1.4250907727776025, + "learning_rate": 4.3369967658320855e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2547 + }, + { + "epoch": 0.24503534163581286, + "grad_norm": 2.1639889092793783, + "learning_rate": 4.336479271643833e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2548 + }, + { + "epoch": 0.24513150935231043, + "grad_norm": 2.44040006734636, + "learning_rate": 4.335961606474246e-06, + "loss": 0.176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2549 + }, + { + "epoch": 0.245227677068808, + "grad_norm": 2.8684365159344862, + "learning_rate": 4.335443770371519e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2550 + }, + { + "epoch": 0.24532384478530558, + "grad_norm": 2.820282804193236, + "learning_rate": 4.3349257633838645e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2551 + }, + { + "epoch": 0.24542001250180315, + "grad_norm": 2.153053896998703, + "learning_rate": 4.33440758555951e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2552 + }, + { + "epoch": 0.2455161802183007, + "grad_norm": 2.572874703244823, + "learning_rate": 4.3338892369467e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2553 + }, + { + "epoch": 0.24561234793479828, + "grad_norm": 2.6291830048878584, + "learning_rate": 4.333370717593694e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2554 + }, + { + "epoch": 0.24570851565129587, + "grad_norm": 1.902833655224174, + "learning_rate": 4.332852027548768e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2555 + }, + { + "epoch": 0.24580468336779343, + "grad_norm": 1.7651874872446578, + "learning_rate": 4.332333166860212e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2556 + }, + { + "epoch": 0.245900851084291, + "grad_norm": 2.616742469475775, + "learning_rate": 4.3318141355763355e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2557 + }, + { + "epoch": 0.24599701880078859, + "grad_norm": 1.3281736624006324, + "learning_rate": 4.33129493374546e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2558 + }, + { + "epoch": 0.24609318651728615, + "grad_norm": 1.6658856883260722, + "learning_rate": 4.330775561415925e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2559 + }, + { + "epoch": 0.2461893542337837, + "grad_norm": 2.5649623748904853, + "learning_rate": 4.330256018636086e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2560 + }, + { + "epoch": 0.24628552195028128, + "grad_norm": 1.8020041048524964, + "learning_rate": 4.329736305454314e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2561 + }, + { + "epoch": 0.24638168966677887, + "grad_norm": 2.1783339609218757, + "learning_rate": 4.3292164219189945e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2562 + }, + { + "epoch": 0.24647785738327643, + "grad_norm": 2.100751539735779, + "learning_rate": 4.328696368078532e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2563 + }, + { + "epoch": 0.246574025099774, + "grad_norm": 2.5627971827171763, + "learning_rate": 4.3281761439813434e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2564 + }, + { + "epoch": 0.2466701928162716, + "grad_norm": 2.763914859185331, + "learning_rate": 4.327655749675864e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2565 + }, + { + "epoch": 0.24676636053276915, + "grad_norm": 2.053317083256849, + "learning_rate": 4.327135185210543e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2566 + }, + { + "epoch": 0.24686252824926672, + "grad_norm": 1.7473659533242651, + "learning_rate": 4.326614450633847e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2567 + }, + { + "epoch": 0.24695869596576428, + "grad_norm": 2.3876364140066175, + "learning_rate": 4.326093545994258e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2568 + }, + { + "epoch": 0.24705486368226187, + "grad_norm": 3.397479800163436, + "learning_rate": 4.325572471340274e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2569 + }, + { + "epoch": 0.24715103139875944, + "grad_norm": 3.2478728618580024, + "learning_rate": 4.325051226720407e-06, + "loss": 0.1714, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2570 + }, + { + "epoch": 0.247247199115257, + "grad_norm": 2.5587016420980593, + "learning_rate": 4.324529812183188e-06, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2571 + }, + { + "epoch": 0.2473433668317546, + "grad_norm": 1.511011893868987, + "learning_rate": 4.32400822777716e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2572 + }, + { + "epoch": 0.24743953454825215, + "grad_norm": 5.129670283203776, + "learning_rate": 4.323486473550886e-06, + "loss": 0.1731, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2573 + }, + { + "epoch": 0.24753570226474972, + "grad_norm": 3.27422974573811, + "learning_rate": 4.322964549552943e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2574 + }, + { + "epoch": 0.24763186998124728, + "grad_norm": 1.5605959934963671, + "learning_rate": 4.322442455831923e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2575 + }, + { + "epoch": 0.24772803769774487, + "grad_norm": 3.189665850245758, + "learning_rate": 4.3219201924364325e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2576 + }, + { + "epoch": 0.24782420541424244, + "grad_norm": 2.006753589041686, + "learning_rate": 4.321397759415099e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2577 + }, + { + "epoch": 0.24792037313074, + "grad_norm": 2.567026840248009, + "learning_rate": 4.32087515681656e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2578 + }, + { + "epoch": 0.2480165408472376, + "grad_norm": 3.335824855496015, + "learning_rate": 4.3203523846894715e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2579 + }, + { + "epoch": 0.24811270856373516, + "grad_norm": 6.088127790857105, + "learning_rate": 4.319829443082506e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2580 + }, + { + "epoch": 0.24820887628023272, + "grad_norm": 1.7663954521161667, + "learning_rate": 4.319306332044351e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2581 + }, + { + "epoch": 0.24830504399673028, + "grad_norm": 1.8994877275799134, + "learning_rate": 4.318783051623707e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2582 + }, + { + "epoch": 0.24840121171322788, + "grad_norm": 3.191962128140281, + "learning_rate": 4.3182596018692965e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2583 + }, + { + "epoch": 0.24849737942972544, + "grad_norm": 2.464331349724923, + "learning_rate": 4.317735982829852e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2584 + }, + { + "epoch": 0.248593547146223, + "grad_norm": 2.061719714269973, + "learning_rate": 4.317212194554125e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2585 + }, + { + "epoch": 0.2486897148627206, + "grad_norm": 1.484361203489042, + "learning_rate": 4.31668823709088e-06, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2586 + }, + { + "epoch": 0.24878588257921816, + "grad_norm": 2.397876250891789, + "learning_rate": 4.3161641104889005e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2587 + }, + { + "epoch": 0.24888205029571572, + "grad_norm": 3.730184698786333, + "learning_rate": 4.315639814796984e-06, + "loss": 0.1518, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2588 + }, + { + "epoch": 0.2489782180122133, + "grad_norm": 2.460428258659253, + "learning_rate": 4.315115350063942e-06, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2589 + }, + { + "epoch": 0.24907438572871088, + "grad_norm": 2.0721186443343504, + "learning_rate": 4.3145907163386064e-06, + "loss": 0.1735, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2590 + }, + { + "epoch": 0.24917055344520844, + "grad_norm": 1.7028361656813016, + "learning_rate": 4.31406591366982e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2591 + }, + { + "epoch": 0.249266721161706, + "grad_norm": 2.1352516110780146, + "learning_rate": 4.313540942106445e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2592 + }, + { + "epoch": 0.2493628888782036, + "grad_norm": 3.052754502911557, + "learning_rate": 4.3130158016973555e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2593 + }, + { + "epoch": 0.24945905659470116, + "grad_norm": 1.7677617535170915, + "learning_rate": 4.312490492491446e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2594 + }, + { + "epoch": 0.24955522431119873, + "grad_norm": 1.6470182099581858, + "learning_rate": 4.311965014537623e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2595 + }, + { + "epoch": 0.2496513920276963, + "grad_norm": 1.5500481053151443, + "learning_rate": 4.311439367884809e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2596 + }, + { + "epoch": 0.24974755974419388, + "grad_norm": 1.8517786538796448, + "learning_rate": 4.310913552581945e-06, + "loss": 0.1693, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2597 + }, + { + "epoch": 0.24984372746069144, + "grad_norm": 2.1836332810675176, + "learning_rate": 4.310387568677985e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2598 + }, + { + "epoch": 0.249939895177189, + "grad_norm": 1.423149067286036, + "learning_rate": 4.3098614162219e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2599 + }, + { + "epoch": 0.2500360628936866, + "grad_norm": 2.2187905076530274, + "learning_rate": 4.309335095262675e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2600 + }, + { + "epoch": 0.25013223061018414, + "grad_norm": 1.8881430612852608, + "learning_rate": 4.308808605849315e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2601 + }, + { + "epoch": 0.25022839832668176, + "grad_norm": 1.5868486886699973, + "learning_rate": 4.308281948030834e-06, + "loss": 0.1788, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2602 + }, + { + "epoch": 0.2503245660431793, + "grad_norm": 3.303216832935008, + "learning_rate": 4.307755121856266e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2603 + }, + { + "epoch": 0.2504207337596769, + "grad_norm": 5.243216779450708, + "learning_rate": 4.307228127374662e-06, + "loss": 0.1671, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2604 + }, + { + "epoch": 0.25051690147617445, + "grad_norm": 2.738802144056169, + "learning_rate": 4.306700964635085e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2605 + }, + { + "epoch": 0.250613069192672, + "grad_norm": 1.8673838898017294, + "learning_rate": 4.306173633686617e-06, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2606 + }, + { + "epoch": 0.2507092369091696, + "grad_norm": 2.7164165285986717, + "learning_rate": 4.305646134578351e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2607 + }, + { + "epoch": 0.25080540462566714, + "grad_norm": 3.794079473821737, + "learning_rate": 4.305118467359402e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2608 + }, + { + "epoch": 0.25090157234216476, + "grad_norm": 1.9101296871122677, + "learning_rate": 4.3045906320788945e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2609 + }, + { + "epoch": 0.2509977400586623, + "grad_norm": 2.2738965532397, + "learning_rate": 4.304062628785973e-06, + "loss": 0.1679, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2610 + }, + { + "epoch": 0.2510939077751599, + "grad_norm": 4.307923842182, + "learning_rate": 4.303534457529796e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2611 + }, + { + "epoch": 0.25119007549165745, + "grad_norm": 3.9438583954809205, + "learning_rate": 4.303006118359536e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2612 + }, + { + "epoch": 0.251286243208155, + "grad_norm": 2.5951830279407653, + "learning_rate": 4.302477611324386e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2613 + }, + { + "epoch": 0.2513824109246526, + "grad_norm": 1.9279830271500462, + "learning_rate": 4.301948936473549e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2614 + }, + { + "epoch": 0.25147857864115014, + "grad_norm": 4.002365447534261, + "learning_rate": 4.301420093856247e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2615 + }, + { + "epoch": 0.25157474635764776, + "grad_norm": 3.0358263943416213, + "learning_rate": 4.300891083521717e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2616 + }, + { + "epoch": 0.2516709140741453, + "grad_norm": 2.233097716035774, + "learning_rate": 4.300361905519211e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2617 + }, + { + "epoch": 0.2517670817906429, + "grad_norm": 2.1098077054763227, + "learning_rate": 4.2998325598979955e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2618 + }, + { + "epoch": 0.25186324950714045, + "grad_norm": 2.6876190188487206, + "learning_rate": 4.299303046707356e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2619 + }, + { + "epoch": 0.251959417223638, + "grad_norm": 1.9513039055948722, + "learning_rate": 4.298773365996591e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2620 + }, + { + "epoch": 0.2520555849401356, + "grad_norm": 1.920737002246904, + "learning_rate": 4.298243517815016e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2621 + }, + { + "epoch": 0.25215175265663314, + "grad_norm": 2.0507771601325917, + "learning_rate": 4.297713502211959e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2622 + }, + { + "epoch": 0.25224792037313076, + "grad_norm": 1.8872728950597724, + "learning_rate": 4.29718331923677e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2623 + }, + { + "epoch": 0.2523440880896283, + "grad_norm": 2.598787861946971, + "learning_rate": 4.296652968938807e-06, + "loss": 0.1775, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2624 + }, + { + "epoch": 0.2524402558061259, + "grad_norm": 1.652104398062554, + "learning_rate": 4.296122451367448e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2625 + }, + { + "epoch": 0.25253642352262345, + "grad_norm": 2.1767033524531976, + "learning_rate": 4.295591766572086e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2626 + }, + { + "epoch": 0.252632591239121, + "grad_norm": 1.841208752475105, + "learning_rate": 4.295060914602129e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2627 + }, + { + "epoch": 0.2527287589556186, + "grad_norm": 3.727333118639568, + "learning_rate": 4.294529895507e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2628 + }, + { + "epoch": 0.25282492667211615, + "grad_norm": 1.4624087672723, + "learning_rate": 4.29399870933614e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2629 + }, + { + "epoch": 0.25292109438861377, + "grad_norm": 3.538248102177666, + "learning_rate": 4.293467356139003e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2630 + }, + { + "epoch": 0.25301726210511133, + "grad_norm": 2.020974887557321, + "learning_rate": 4.29293583596506e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2631 + }, + { + "epoch": 0.2531134298216089, + "grad_norm": 1.9871004700676411, + "learning_rate": 4.2924041488637966e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2632 + }, + { + "epoch": 0.25320959753810646, + "grad_norm": 1.509089340421768, + "learning_rate": 4.291872294884714e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2633 + }, + { + "epoch": 0.253305765254604, + "grad_norm": 2.803956391426246, + "learning_rate": 4.29134027407733e-06, + "loss": 0.1833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2634 + }, + { + "epoch": 0.2534019329711016, + "grad_norm": 1.5205482220233464, + "learning_rate": 4.290808086491176e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2635 + }, + { + "epoch": 0.25349810068759915, + "grad_norm": 1.467814445663865, + "learning_rate": 4.2902757321758016e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2636 + }, + { + "epoch": 0.25359426840409677, + "grad_norm": 1.7164067249268284, + "learning_rate": 4.28974321118077e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2637 + }, + { + "epoch": 0.25369043612059433, + "grad_norm": 1.9230791023932665, + "learning_rate": 4.28921052355566e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2638 + }, + { + "epoch": 0.2537866038370919, + "grad_norm": 1.5810627589622641, + "learning_rate": 4.288677669350066e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2639 + }, + { + "epoch": 0.25388277155358946, + "grad_norm": 2.141927089224831, + "learning_rate": 4.288144648613601e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2640 + }, + { + "epoch": 0.253978939270087, + "grad_norm": 1.5244707567249738, + "learning_rate": 4.2876114613958865e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2641 + }, + { + "epoch": 0.2540751069865846, + "grad_norm": 2.1451444735091716, + "learning_rate": 4.287078107746566e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2642 + }, + { + "epoch": 0.25417127470308215, + "grad_norm": 1.9887971056670857, + "learning_rate": 4.286544587715297e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2643 + }, + { + "epoch": 0.25426744241957977, + "grad_norm": 3.785264129879613, + "learning_rate": 4.286010901351749e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2644 + }, + { + "epoch": 0.25436361013607733, + "grad_norm": 1.4132539759904736, + "learning_rate": 4.285477048705612e-06, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2645 + }, + { + "epoch": 0.2544597778525749, + "grad_norm": 1.7372117580137638, + "learning_rate": 4.2849430298265885e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2646 + }, + { + "epoch": 0.25455594556907246, + "grad_norm": 2.356265546701071, + "learning_rate": 4.284408844764398e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2647 + }, + { + "epoch": 0.25465211328557, + "grad_norm": 2.609034262594304, + "learning_rate": 4.283874493568772e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2648 + }, + { + "epoch": 0.2547482810020676, + "grad_norm": 2.7484262375153072, + "learning_rate": 4.283339976289463e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2649 + }, + { + "epoch": 0.25484444871856515, + "grad_norm": 2.1950123251081375, + "learning_rate": 4.282805292976234e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2650 + }, + { + "epoch": 0.2549406164350628, + "grad_norm": 1.6178172601847265, + "learning_rate": 4.282270443678867e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2651 + }, + { + "epoch": 0.25503678415156034, + "grad_norm": 2.3244863769219655, + "learning_rate": 4.281735428447158e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2652 + }, + { + "epoch": 0.2551329518680579, + "grad_norm": 3.329136063551712, + "learning_rate": 4.281200247330917e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2653 + }, + { + "epoch": 0.25522911958455546, + "grad_norm": 2.5820755390744536, + "learning_rate": 4.280664900379972e-06, + "loss": 0.1749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2654 + }, + { + "epoch": 0.255325287301053, + "grad_norm": 1.973164090031972, + "learning_rate": 4.280129387644164e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2655 + }, + { + "epoch": 0.2554214550175506, + "grad_norm": 1.7327976491111527, + "learning_rate": 4.279593709173352e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2656 + }, + { + "epoch": 0.25551762273404816, + "grad_norm": 2.318893536063179, + "learning_rate": 4.279057865017408e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2657 + }, + { + "epoch": 0.2556137904505458, + "grad_norm": 3.6293058138814698, + "learning_rate": 4.278521855226223e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2658 + }, + { + "epoch": 0.25570995816704334, + "grad_norm": 1.4507622707147598, + "learning_rate": 4.277985679849699e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2659 + }, + { + "epoch": 0.2558061258835409, + "grad_norm": 1.8135222627986023, + "learning_rate": 4.277449338937754e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2660 + }, + { + "epoch": 0.25590229360003847, + "grad_norm": 1.3569356257653054, + "learning_rate": 4.276912832540328e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2661 + }, + { + "epoch": 0.25599846131653603, + "grad_norm": 2.5978162211665614, + "learning_rate": 4.276376160707365e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2662 + }, + { + "epoch": 0.2560946290330336, + "grad_norm": 2.888783935394536, + "learning_rate": 4.275839323488834e-06, + "loss": 0.1695, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2663 + }, + { + "epoch": 0.25619079674953116, + "grad_norm": 1.6365485473880375, + "learning_rate": 4.2753023209347164e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2664 + }, + { + "epoch": 0.2562869644660288, + "grad_norm": 2.2905847670286317, + "learning_rate": 4.274765153095008e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2665 + }, + { + "epoch": 0.25638313218252634, + "grad_norm": 1.75095803157189, + "learning_rate": 4.274227820019718e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2666 + }, + { + "epoch": 0.2564792998990239, + "grad_norm": 2.147303150046039, + "learning_rate": 4.273690321758879e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2667 + }, + { + "epoch": 0.25657546761552147, + "grad_norm": 1.3189044282134001, + "learning_rate": 4.2731526583625284e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2668 + }, + { + "epoch": 0.25667163533201903, + "grad_norm": 1.5006119316269793, + "learning_rate": 4.272614829880728e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2669 + }, + { + "epoch": 0.2567678030485166, + "grad_norm": 2.3773193368178833, + "learning_rate": 4.272076836363548e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2670 + }, + { + "epoch": 0.25686397076501416, + "grad_norm": 2.0050038483162127, + "learning_rate": 4.271538677861079e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2671 + }, + { + "epoch": 0.2569601384815118, + "grad_norm": 1.6298132599729567, + "learning_rate": 4.2710003544234255e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2672 + }, + { + "epoch": 0.25705630619800934, + "grad_norm": 2.3035408062190057, + "learning_rate": 4.270461866100705e-06, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2673 + }, + { + "epoch": 0.2571524739145069, + "grad_norm": 2.4120107015988848, + "learning_rate": 4.269923212943054e-06, + "loss": 0.166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2674 + }, + { + "epoch": 0.25724864163100447, + "grad_norm": 2.0965789572171847, + "learning_rate": 4.269384395000622e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2675 + }, + { + "epoch": 0.25734480934750203, + "grad_norm": 2.0656897113167996, + "learning_rate": 4.268845412323573e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2676 + }, + { + "epoch": 0.2574409770639996, + "grad_norm": 2.5941781900014522, + "learning_rate": 4.268306264962091e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2677 + }, + { + "epoch": 0.25753714478049716, + "grad_norm": 3.895980566040003, + "learning_rate": 4.267766952966369e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2678 + }, + { + "epoch": 0.2576333124969948, + "grad_norm": 1.8519642774116114, + "learning_rate": 4.26722747638662e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2679 + }, + { + "epoch": 0.25772948021349235, + "grad_norm": 2.030945972001445, + "learning_rate": 4.266687835273071e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2680 + }, + { + "epoch": 0.2578256479299899, + "grad_norm": 2.2832698021423834, + "learning_rate": 4.266148029675963e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2681 + }, + { + "epoch": 0.2579218156464875, + "grad_norm": 2.6911973405580456, + "learning_rate": 4.265608059645554e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2682 + }, + { + "epoch": 0.25801798336298504, + "grad_norm": 1.7197155849721681, + "learning_rate": 4.265067925232117e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2683 + }, + { + "epoch": 0.2581141510794826, + "grad_norm": 2.6243821271220207, + "learning_rate": 4.26452762648594e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2684 + }, + { + "epoch": 0.25821031879598016, + "grad_norm": 3.166740199028551, + "learning_rate": 4.263987163457326e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2685 + }, + { + "epoch": 0.2583064865124778, + "grad_norm": 2.781670505714261, + "learning_rate": 4.263446536196593e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2686 + }, + { + "epoch": 0.25840265422897535, + "grad_norm": 1.3533631975605662, + "learning_rate": 4.262905744754075e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2687 + }, + { + "epoch": 0.2584988219454729, + "grad_norm": 1.767250616046382, + "learning_rate": 4.262364789180123e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2688 + }, + { + "epoch": 0.2585949896619705, + "grad_norm": 2.9856772641111124, + "learning_rate": 4.2618236695251e-06, + "loss": 0.1827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2689 + }, + { + "epoch": 0.25869115737846804, + "grad_norm": 4.314190937249563, + "learning_rate": 4.261282385839386e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2690 + }, + { + "epoch": 0.2587873250949656, + "grad_norm": 2.8388780936284457, + "learning_rate": 4.2607409381733756e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2691 + }, + { + "epoch": 0.25888349281146317, + "grad_norm": 1.6389394521884866, + "learning_rate": 4.26019932657748e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2692 + }, + { + "epoch": 0.2589796605279608, + "grad_norm": 2.5002297855384703, + "learning_rate": 4.259657551102123e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2693 + }, + { + "epoch": 0.25907582824445835, + "grad_norm": 3.1660713361437587, + "learning_rate": 4.259115611797749e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2694 + }, + { + "epoch": 0.2591719959609559, + "grad_norm": 2.7178988584776715, + "learning_rate": 4.258573508714809e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2695 + }, + { + "epoch": 0.2592681636774535, + "grad_norm": 2.6782639379864137, + "learning_rate": 4.258031241903778e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2696 + }, + { + "epoch": 0.25936433139395104, + "grad_norm": 1.629117763967276, + "learning_rate": 4.2574888114151415e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2697 + }, + { + "epoch": 0.2594604991104486, + "grad_norm": 2.9441984314592116, + "learning_rate": 4.256946217299402e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2698 + }, + { + "epoch": 0.25955666682694617, + "grad_norm": 1.6117613037812275, + "learning_rate": 4.256403459607075e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2699 + }, + { + "epoch": 0.2596528345434438, + "grad_norm": 1.7254553469957534, + "learning_rate": 4.255860538388694e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2700 + }, + { + "epoch": 0.25974900225994135, + "grad_norm": 2.1482788252336498, + "learning_rate": 4.255317453694806e-06, + "loss": 0.1597, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2701 + }, + { + "epoch": 0.2598451699764389, + "grad_norm": 1.4360876000400473, + "learning_rate": 4.254774205575974e-06, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2702 + }, + { + "epoch": 0.2599413376929365, + "grad_norm": 1.8000017742297088, + "learning_rate": 4.254230794082775e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2703 + }, + { + "epoch": 0.26003750540943404, + "grad_norm": 1.8801438027705186, + "learning_rate": 4.253687219265803e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2704 + }, + { + "epoch": 0.2601336731259316, + "grad_norm": 1.6270240163582401, + "learning_rate": 4.2531434811756675e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2705 + }, + { + "epoch": 0.26022984084242917, + "grad_norm": 2.0985826305838606, + "learning_rate": 4.252599579862989e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2706 + }, + { + "epoch": 0.2603260085589268, + "grad_norm": 3.99041705396002, + "learning_rate": 4.252055515378409e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2707 + }, + { + "epoch": 0.26042217627542436, + "grad_norm": 2.034709332623861, + "learning_rate": 4.2515112877725794e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2708 + }, + { + "epoch": 0.2605183439919219, + "grad_norm": 1.9909801609977409, + "learning_rate": 4.2509668970961706e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2709 + }, + { + "epoch": 0.2606145117084195, + "grad_norm": 1.7944979615688248, + "learning_rate": 4.250422343399867e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2710 + }, + { + "epoch": 0.26071067942491705, + "grad_norm": 2.559171715474942, + "learning_rate": 4.249877626734366e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2711 + }, + { + "epoch": 0.2608068471414146, + "grad_norm": 2.997656229198389, + "learning_rate": 4.249332747150386e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2712 + }, + { + "epoch": 0.2609030148579122, + "grad_norm": 2.5109750893491505, + "learning_rate": 4.248787704698653e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2713 + }, + { + "epoch": 0.2609991825744098, + "grad_norm": 3.8791492777465444, + "learning_rate": 4.2482424994299145e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2714 + }, + { + "epoch": 0.26109535029090736, + "grad_norm": 3.9011871795902686, + "learning_rate": 4.24769713139493e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2715 + }, + { + "epoch": 0.2611915180074049, + "grad_norm": 1.8094027424991646, + "learning_rate": 4.247151600644474e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2716 + }, + { + "epoch": 0.2612876857239025, + "grad_norm": 3.0984798056447262, + "learning_rate": 4.246605907229337e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2717 + }, + { + "epoch": 0.26138385344040005, + "grad_norm": 2.017979572968647, + "learning_rate": 4.246060051200325e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2718 + }, + { + "epoch": 0.2614800211568976, + "grad_norm": 1.5031861721472901, + "learning_rate": 4.24551403260826e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2719 + }, + { + "epoch": 0.2615761888733952, + "grad_norm": 1.6983333253331283, + "learning_rate": 4.244967851503975e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2720 + }, + { + "epoch": 0.2616723565898928, + "grad_norm": 2.9079378639239404, + "learning_rate": 4.244421507938323e-06, + "loss": 0.1665, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2721 + }, + { + "epoch": 0.26176852430639036, + "grad_norm": 1.854611354651322, + "learning_rate": 4.2438750019621705e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2722 + }, + { + "epoch": 0.2618646920228879, + "grad_norm": 1.1801049505506078, + "learning_rate": 4.243328333626398e-06, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2723 + }, + { + "epoch": 0.2619608597393855, + "grad_norm": 1.7100764510476376, + "learning_rate": 4.242781502981901e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2724 + }, + { + "epoch": 0.26205702745588305, + "grad_norm": 5.0209376850946645, + "learning_rate": 4.242234510079591e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2725 + }, + { + "epoch": 0.2621531951723806, + "grad_norm": 2.006565309160717, + "learning_rate": 4.2416873549703966e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2726 + }, + { + "epoch": 0.2622493628888782, + "grad_norm": 2.565723200127062, + "learning_rate": 4.2411400377052585e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2727 + }, + { + "epoch": 0.2623455306053758, + "grad_norm": 2.370431921133859, + "learning_rate": 4.240592558335131e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2728 + }, + { + "epoch": 0.26244169832187336, + "grad_norm": 3.811494530838555, + "learning_rate": 4.240044916910989e-06, + "loss": 0.1681, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2729 + }, + { + "epoch": 0.2625378660383709, + "grad_norm": 2.6924589916415558, + "learning_rate": 4.239497113483819e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2730 + }, + { + "epoch": 0.2626340337548685, + "grad_norm": 2.698961174362866, + "learning_rate": 4.238949148104623e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2731 + }, + { + "epoch": 0.26273020147136605, + "grad_norm": 2.2625720056120198, + "learning_rate": 4.238401020824416e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2732 + }, + { + "epoch": 0.2628263691878636, + "grad_norm": 2.117236485481754, + "learning_rate": 4.2378527316942336e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2733 + }, + { + "epoch": 0.2629225369043612, + "grad_norm": 5.853631505913193, + "learning_rate": 4.23730428076512e-06, + "loss": 0.1712, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2734 + }, + { + "epoch": 0.2630187046208588, + "grad_norm": 5.838470738957785, + "learning_rate": 4.236755668088139e-06, + "loss": 0.1844, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2735 + }, + { + "epoch": 0.26311487233735636, + "grad_norm": 3.4223698046448208, + "learning_rate": 4.236206893714369e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2736 + }, + { + "epoch": 0.26321104005385393, + "grad_norm": 1.8156313561365645, + "learning_rate": 4.235657957694899e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2737 + }, + { + "epoch": 0.2633072077703515, + "grad_norm": 2.9669135021092012, + "learning_rate": 4.23510886008084e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2738 + }, + { + "epoch": 0.26340337548684906, + "grad_norm": 4.107284938689361, + "learning_rate": 4.2345596009233126e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2739 + }, + { + "epoch": 0.2634995432033466, + "grad_norm": 2.5486926400698535, + "learning_rate": 4.234010180273455e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2740 + }, + { + "epoch": 0.2635957109198442, + "grad_norm": 2.611064904712184, + "learning_rate": 4.233460598182419e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2741 + }, + { + "epoch": 0.2636918786363418, + "grad_norm": 1.7943566207197752, + "learning_rate": 4.232910854701374e-06, + "loss": 0.1671, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2742 + }, + { + "epoch": 0.26378804635283937, + "grad_norm": 2.496752717053439, + "learning_rate": 4.232360949881501e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2743 + }, + { + "epoch": 0.26388421406933693, + "grad_norm": 1.8196759898996469, + "learning_rate": 4.231810883773999e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2744 + }, + { + "epoch": 0.2639803817858345, + "grad_norm": 3.500282120655264, + "learning_rate": 4.231260656430079e-06, + "loss": 0.1632, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2745 + }, + { + "epoch": 0.26407654950233206, + "grad_norm": 1.9900762806260965, + "learning_rate": 4.2307102679009695e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2746 + }, + { + "epoch": 0.2641727172188296, + "grad_norm": 1.4947264563305576, + "learning_rate": 4.230159718237914e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2747 + }, + { + "epoch": 0.2642688849353272, + "grad_norm": 4.485141657386912, + "learning_rate": 4.229609007492169e-06, + "loss": 0.1815, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2748 + }, + { + "epoch": 0.2643650526518248, + "grad_norm": 2.103378039836295, + "learning_rate": 4.229058135715009e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2749 + }, + { + "epoch": 0.26446122036832237, + "grad_norm": 3.030009144625689, + "learning_rate": 4.22850710295772e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2750 + }, + { + "epoch": 0.26455738808481993, + "grad_norm": 2.9345010295519733, + "learning_rate": 4.227955909271604e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2751 + }, + { + "epoch": 0.2646535558013175, + "grad_norm": 2.095632521065474, + "learning_rate": 4.22740455470798e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2752 + }, + { + "epoch": 0.26474972351781506, + "grad_norm": 5.302625333639302, + "learning_rate": 4.226853039318181e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2753 + }, + { + "epoch": 0.2648458912343126, + "grad_norm": 3.6229572853864376, + "learning_rate": 4.226301363153553e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2754 + }, + { + "epoch": 0.2649420589508102, + "grad_norm": 2.0132354589356827, + "learning_rate": 4.225749526265461e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2755 + }, + { + "epoch": 0.2650382266673078, + "grad_norm": 3.3155258736050754, + "learning_rate": 4.2251975287052804e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2756 + }, + { + "epoch": 0.26513439438380537, + "grad_norm": 2.6646981151220346, + "learning_rate": 4.224645370524405e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2757 + }, + { + "epoch": 0.26523056210030294, + "grad_norm": 1.5357398314745834, + "learning_rate": 4.224093051774241e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2758 + }, + { + "epoch": 0.2653267298168005, + "grad_norm": 3.0361140382626797, + "learning_rate": 4.223540572506212e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2759 + }, + { + "epoch": 0.26542289753329806, + "grad_norm": 3.9940898468259167, + "learning_rate": 4.2229879327717545e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2760 + }, + { + "epoch": 0.2655190652497956, + "grad_norm": 2.3540191620194575, + "learning_rate": 4.222435132622322e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2761 + }, + { + "epoch": 0.2656152329662932, + "grad_norm": 1.9177526881915317, + "learning_rate": 4.22188217210938e-06, + "loss": 0.1752, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2762 + }, + { + "epoch": 0.2657114006827908, + "grad_norm": 1.5507994699006178, + "learning_rate": 4.221329051284413e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2763 + }, + { + "epoch": 0.2658075683992884, + "grad_norm": 2.5134455154893973, + "learning_rate": 4.220775770198916e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2764 + }, + { + "epoch": 0.26590373611578594, + "grad_norm": 3.2846759023521805, + "learning_rate": 4.220222328904402e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2765 + }, + { + "epoch": 0.2659999038322835, + "grad_norm": 3.4324423917507287, + "learning_rate": 4.219668727452397e-06, + "loss": 0.1638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2766 + }, + { + "epoch": 0.26609607154878107, + "grad_norm": 1.3420945918103284, + "learning_rate": 4.219114965894444e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2767 + }, + { + "epoch": 0.26619223926527863, + "grad_norm": 3.64692424784706, + "learning_rate": 4.218561044282099e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2768 + }, + { + "epoch": 0.2662884069817762, + "grad_norm": 4.692945103810281, + "learning_rate": 4.218006962666934e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2769 + }, + { + "epoch": 0.2663845746982738, + "grad_norm": 2.0361232032590366, + "learning_rate": 4.217452721100535e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2770 + }, + { + "epoch": 0.2664807424147714, + "grad_norm": 1.929289094382446, + "learning_rate": 4.2168983196345045e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2771 + }, + { + "epoch": 0.26657691013126894, + "grad_norm": 1.2796718978281876, + "learning_rate": 4.216343758320458e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2772 + }, + { + "epoch": 0.2666730778477665, + "grad_norm": 1.5904149056764068, + "learning_rate": 4.215789037210026e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2773 + }, + { + "epoch": 0.26676924556426407, + "grad_norm": 2.1393837828889404, + "learning_rate": 4.2152341563548565e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2774 + }, + { + "epoch": 0.26686541328076163, + "grad_norm": 1.820793507910899, + "learning_rate": 4.214679115806609e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2775 + }, + { + "epoch": 0.2669615809972592, + "grad_norm": 2.029032861252616, + "learning_rate": 4.21412391561696e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2776 + }, + { + "epoch": 0.2670577487137568, + "grad_norm": 1.6592768887461318, + "learning_rate": 4.2135685558376e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2777 + }, + { + "epoch": 0.2671539164302544, + "grad_norm": 2.438788525054358, + "learning_rate": 4.213013036520234e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2778 + }, + { + "epoch": 0.26725008414675194, + "grad_norm": 2.3868760142367993, + "learning_rate": 4.212457357716583e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2779 + }, + { + "epoch": 0.2673462518632495, + "grad_norm": 2.5447156044319357, + "learning_rate": 4.211901519478382e-06, + "loss": 0.1803, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2780 + }, + { + "epoch": 0.26744241957974707, + "grad_norm": 1.816490528643606, + "learning_rate": 4.211345521857382e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2781 + }, + { + "epoch": 0.26753858729624463, + "grad_norm": 1.8553963594355665, + "learning_rate": 4.2107893649053465e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2782 + }, + { + "epoch": 0.2676347550127422, + "grad_norm": 2.246019861283282, + "learning_rate": 4.210233048674056e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2783 + }, + { + "epoch": 0.2677309227292398, + "grad_norm": 2.851862105772791, + "learning_rate": 4.209676573215304e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2784 + }, + { + "epoch": 0.2678270904457374, + "grad_norm": 2.6802691185565006, + "learning_rate": 4.209119938580901e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2785 + }, + { + "epoch": 0.26792325816223495, + "grad_norm": 2.1015809264460223, + "learning_rate": 4.208563144822673e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2786 + }, + { + "epoch": 0.2680194258787325, + "grad_norm": 3.033052761758936, + "learning_rate": 4.208006191992455e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2787 + }, + { + "epoch": 0.2681155935952301, + "grad_norm": 2.708258915254861, + "learning_rate": 4.207449080142104e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2788 + }, + { + "epoch": 0.26821176131172764, + "grad_norm": 1.4621304693277255, + "learning_rate": 4.206891809323488e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2789 + }, + { + "epoch": 0.2683079290282252, + "grad_norm": 1.524375116704816, + "learning_rate": 4.206334379588491e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2790 + }, + { + "epoch": 0.2684040967447228, + "grad_norm": 1.9259830528044657, + "learning_rate": 4.205776790989008e-06, + "loss": 0.168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2791 + }, + { + "epoch": 0.2685002644612204, + "grad_norm": 5.4265577313567865, + "learning_rate": 4.205219043576955e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2792 + }, + { + "epoch": 0.26859643217771795, + "grad_norm": 2.9482761498686103, + "learning_rate": 4.204661137404261e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2793 + }, + { + "epoch": 0.2686925998942155, + "grad_norm": 3.0431284922378583, + "learning_rate": 4.204103072522866e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2794 + }, + { + "epoch": 0.2687887676107131, + "grad_norm": 4.49264593676043, + "learning_rate": 4.203544848984729e-06, + "loss": 0.1614, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2795 + }, + { + "epoch": 0.26888493532721064, + "grad_norm": 2.7914497378668655, + "learning_rate": 4.2029864668418195e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2796 + }, + { + "epoch": 0.2689811030437082, + "grad_norm": 2.2357650145107515, + "learning_rate": 4.202427926146128e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2797 + }, + { + "epoch": 0.2690772707602058, + "grad_norm": 1.7540042852217193, + "learning_rate": 4.201869226949654e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2798 + }, + { + "epoch": 0.2691734384767034, + "grad_norm": 1.360164805301451, + "learning_rate": 4.201310369304416e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2799 + }, + { + "epoch": 0.26926960619320095, + "grad_norm": 1.4743585380183828, + "learning_rate": 4.200751353262442e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2800 + }, + { + "epoch": 0.2693657739096985, + "grad_norm": 2.0216146170740226, + "learning_rate": 4.200192178875781e-06, + "loss": 0.1637, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2801 + }, + { + "epoch": 0.2694619416261961, + "grad_norm": 1.6820833910718083, + "learning_rate": 4.199632846196491e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2802 + }, + { + "epoch": 0.26955810934269364, + "grad_norm": 1.4702898058065177, + "learning_rate": 4.19907335527665e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2803 + }, + { + "epoch": 0.2696542770591912, + "grad_norm": 2.0053763878404887, + "learning_rate": 4.198513706168345e-06, + "loss": 0.1674, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2804 + }, + { + "epoch": 0.2697504447756888, + "grad_norm": 1.8514351793664416, + "learning_rate": 4.197953898923686e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2805 + }, + { + "epoch": 0.2698466124921864, + "grad_norm": 2.450694424561535, + "learning_rate": 4.197393933594788e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2806 + }, + { + "epoch": 0.26994278020868395, + "grad_norm": 2.2707360543635033, + "learning_rate": 4.196833810233786e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2807 + }, + { + "epoch": 0.2700389479251815, + "grad_norm": 1.9894240649663684, + "learning_rate": 4.196273528892831e-06, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2808 + }, + { + "epoch": 0.2701351156416791, + "grad_norm": 1.791822567766143, + "learning_rate": 4.195713089624085e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2809 + }, + { + "epoch": 0.27023128335817664, + "grad_norm": 2.757476197067628, + "learning_rate": 4.195152492479727e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2810 + }, + { + "epoch": 0.2703274510746742, + "grad_norm": 2.0183324683291146, + "learning_rate": 4.194591737511951e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2811 + }, + { + "epoch": 0.2704236187911718, + "grad_norm": 2.0456463525186885, + "learning_rate": 4.194030824772964e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2812 + }, + { + "epoch": 0.2705197865076694, + "grad_norm": 2.2891371505620204, + "learning_rate": 4.193469754314989e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2813 + }, + { + "epoch": 0.27061595422416695, + "grad_norm": 2.3826233164245947, + "learning_rate": 4.192908526190262e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2814 + }, + { + "epoch": 0.2707121219406645, + "grad_norm": 1.7318210602740558, + "learning_rate": 4.1923471404510366e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2815 + }, + { + "epoch": 0.2708082896571621, + "grad_norm": 1.6893385878984357, + "learning_rate": 4.191785597149577e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2816 + }, + { + "epoch": 0.27090445737365965, + "grad_norm": 1.9899922961057548, + "learning_rate": 4.191223896338168e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2817 + }, + { + "epoch": 0.2710006250901572, + "grad_norm": 2.4905256346847637, + "learning_rate": 4.190662038069102e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2818 + }, + { + "epoch": 0.27109679280665483, + "grad_norm": 1.5046031150951822, + "learning_rate": 4.1901000223946905e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2819 + }, + { + "epoch": 0.2711929605231524, + "grad_norm": 3.3953014316950254, + "learning_rate": 4.1895378493672615e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2820 + }, + { + "epoch": 0.27128912823964996, + "grad_norm": 2.3267943065323973, + "learning_rate": 4.188975519039151e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2821 + }, + { + "epoch": 0.2713852959561475, + "grad_norm": 1.7814779323792431, + "learning_rate": 4.188413031462716e-06, + "loss": 0.1616, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2822 + }, + { + "epoch": 0.2714814636726451, + "grad_norm": 1.4093697337212987, + "learning_rate": 4.187850386690324e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2823 + }, + { + "epoch": 0.27157763138914265, + "grad_norm": 2.686361925045463, + "learning_rate": 4.1872875847743605e-06, + "loss": 0.1727, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2824 + }, + { + "epoch": 0.2716737991056402, + "grad_norm": 2.6866676761051016, + "learning_rate": 4.186724625767223e-06, + "loss": 0.1723, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2825 + }, + { + "epoch": 0.27176996682213783, + "grad_norm": 1.2374994214398813, + "learning_rate": 4.186161509721324e-06, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2826 + }, + { + "epoch": 0.2718661345386354, + "grad_norm": 2.016996722048179, + "learning_rate": 4.185598236689092e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2827 + }, + { + "epoch": 0.27196230225513296, + "grad_norm": 1.562200432623705, + "learning_rate": 4.18503480672297e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2828 + }, + { + "epoch": 0.2720584699716305, + "grad_norm": 1.5654524212112195, + "learning_rate": 4.184471219875412e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2829 + }, + { + "epoch": 0.2721546376881281, + "grad_norm": 1.6635429702055236, + "learning_rate": 4.183907476198893e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2830 + }, + { + "epoch": 0.27225080540462565, + "grad_norm": 1.8212480890656413, + "learning_rate": 4.183343575745898e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2831 + }, + { + "epoch": 0.2723469731211232, + "grad_norm": 2.8703762330341, + "learning_rate": 4.182779518568925e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2832 + }, + { + "epoch": 0.27244314083762083, + "grad_norm": 2.8474886404926347, + "learning_rate": 4.182215304720494e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2833 + }, + { + "epoch": 0.2725393085541184, + "grad_norm": 2.1674959663130062, + "learning_rate": 4.181650934253132e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2834 + }, + { + "epoch": 0.27263547627061596, + "grad_norm": 1.9751876645029418, + "learning_rate": 4.181086407219383e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2835 + }, + { + "epoch": 0.2727316439871135, + "grad_norm": 1.9264600429636303, + "learning_rate": 4.180521723671807e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2836 + }, + { + "epoch": 0.2728278117036111, + "grad_norm": 2.497783999507007, + "learning_rate": 4.179956883662979e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2837 + }, + { + "epoch": 0.27292397942010865, + "grad_norm": 1.8218162604923942, + "learning_rate": 4.179391887245484e-06, + "loss": 0.1855, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2838 + }, + { + "epoch": 0.2730201471366062, + "grad_norm": 2.536382057357218, + "learning_rate": 4.178826734471928e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2839 + }, + { + "epoch": 0.27311631485310384, + "grad_norm": 1.569953212151433, + "learning_rate": 4.178261425394926e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2840 + }, + { + "epoch": 0.2732124825696014, + "grad_norm": 1.4810804705171072, + "learning_rate": 4.177695960067111e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2841 + }, + { + "epoch": 0.27330865028609896, + "grad_norm": 1.46899293672254, + "learning_rate": 4.177130338541129e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2842 + }, + { + "epoch": 0.27340481800259653, + "grad_norm": 1.4572502846433968, + "learning_rate": 4.17656456086964e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2843 + }, + { + "epoch": 0.2735009857190941, + "grad_norm": 1.7630285425067687, + "learning_rate": 4.1759986271053215e-06, + "loss": 0.1694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2844 + }, + { + "epoch": 0.27359715343559166, + "grad_norm": 1.529603737217085, + "learning_rate": 4.175432537300862e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2845 + }, + { + "epoch": 0.2736933211520892, + "grad_norm": 1.7628431960601232, + "learning_rate": 4.174866291508967e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2846 + }, + { + "epoch": 0.27378948886858684, + "grad_norm": 1.4767952207586335, + "learning_rate": 4.174299889782355e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2847 + }, + { + "epoch": 0.2738856565850844, + "grad_norm": 1.5182367807632455, + "learning_rate": 4.173733332173759e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2848 + }, + { + "epoch": 0.27398182430158197, + "grad_norm": 1.9203959366009467, + "learning_rate": 4.1731666187359284e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2849 + }, + { + "epoch": 0.27407799201807953, + "grad_norm": 2.1806336528687926, + "learning_rate": 4.172599749521625e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2850 + }, + { + "epoch": 0.2741741597345771, + "grad_norm": 2.4859195467641078, + "learning_rate": 4.172032724583626e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2851 + }, + { + "epoch": 0.27427032745107466, + "grad_norm": 2.492040143477236, + "learning_rate": 4.171465543974723e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2852 + }, + { + "epoch": 0.2743664951675722, + "grad_norm": 1.7677268295929753, + "learning_rate": 4.170898207747722e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2853 + }, + { + "epoch": 0.27446266288406984, + "grad_norm": 1.6954169841709383, + "learning_rate": 4.170330715955444e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2854 + }, + { + "epoch": 0.2745588306005674, + "grad_norm": 2.8207902855349927, + "learning_rate": 4.169763068650724e-06, + "loss": 0.1829, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2855 + }, + { + "epoch": 0.27465499831706497, + "grad_norm": 2.081281322343631, + "learning_rate": 4.1691952658864106e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2856 + }, + { + "epoch": 0.27475116603356253, + "grad_norm": 1.699148313552351, + "learning_rate": 4.1686273077153696e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2857 + }, + { + "epoch": 0.2748473337500601, + "grad_norm": 3.433800663976148, + "learning_rate": 4.1680591941904765e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2858 + }, + { + "epoch": 0.27494350146655766, + "grad_norm": 1.4875582684666662, + "learning_rate": 4.1674909253646285e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2859 + }, + { + "epoch": 0.2750396691830552, + "grad_norm": 4.399257727627472, + "learning_rate": 4.16692250129073e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2860 + }, + { + "epoch": 0.27513583689955284, + "grad_norm": 1.4872681717415572, + "learning_rate": 4.166353922021703e-06, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2861 + }, + { + "epoch": 0.2752320046160504, + "grad_norm": 2.3491445137615043, + "learning_rate": 4.165785187610484e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2862 + }, + { + "epoch": 0.27532817233254797, + "grad_norm": 2.041768308479665, + "learning_rate": 4.165216298110025e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2863 + }, + { + "epoch": 0.27542434004904554, + "grad_norm": 1.3632814231706565, + "learning_rate": 4.16464725357329e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2864 + }, + { + "epoch": 0.2755205077655431, + "grad_norm": 2.4407530267336277, + "learning_rate": 4.164078054053259e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2865 + }, + { + "epoch": 0.27561667548204066, + "grad_norm": 1.888722888779849, + "learning_rate": 4.163508699602926e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2866 + }, + { + "epoch": 0.2757128431985382, + "grad_norm": 1.9664922265526679, + "learning_rate": 4.162939190275301e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2867 + }, + { + "epoch": 0.27580901091503585, + "grad_norm": 1.8247003493431038, + "learning_rate": 4.1623695261234025e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2868 + }, + { + "epoch": 0.2759051786315334, + "grad_norm": 1.8124626466727962, + "learning_rate": 4.161799707200273e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2869 + }, + { + "epoch": 0.276001346348031, + "grad_norm": 1.8466435670736647, + "learning_rate": 4.16122973355896e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2870 + }, + { + "epoch": 0.27609751406452854, + "grad_norm": 2.354897034217568, + "learning_rate": 4.160659605252533e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2871 + }, + { + "epoch": 0.2761936817810261, + "grad_norm": 4.064713617445389, + "learning_rate": 4.160089322334071e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2872 + }, + { + "epoch": 0.27628984949752367, + "grad_norm": 1.9993194839685062, + "learning_rate": 4.159518884856669e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2873 + }, + { + "epoch": 0.27638601721402123, + "grad_norm": 1.6221894180346943, + "learning_rate": 4.158948292873436e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2874 + }, + { + "epoch": 0.27648218493051885, + "grad_norm": 1.8993091004194225, + "learning_rate": 4.158377546437496e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2875 + }, + { + "epoch": 0.2765783526470164, + "grad_norm": 1.3961567240111696, + "learning_rate": 4.1578066456019885e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2876 + }, + { + "epoch": 0.276674520363514, + "grad_norm": 1.6899658284038193, + "learning_rate": 4.1572355904200635e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2877 + }, + { + "epoch": 0.27677068808001154, + "grad_norm": 4.187109896985375, + "learning_rate": 4.156664380944889e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2878 + }, + { + "epoch": 0.2768668557965091, + "grad_norm": 3.424383879100955, + "learning_rate": 4.156093017229646e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2879 + }, + { + "epoch": 0.27696302351300667, + "grad_norm": 2.220895727961929, + "learning_rate": 4.15552149932753e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2880 + }, + { + "epoch": 0.27705919122950423, + "grad_norm": 1.4595243544707248, + "learning_rate": 4.154949827291752e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2881 + }, + { + "epoch": 0.27715535894600185, + "grad_norm": 2.529996601379151, + "learning_rate": 4.154378001175535e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2882 + }, + { + "epoch": 0.2772515266624994, + "grad_norm": 4.944719547672777, + "learning_rate": 4.153806021032118e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2883 + }, + { + "epoch": 0.277347694378997, + "grad_norm": 3.448524129990632, + "learning_rate": 4.153233886914754e-06, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2884 + }, + { + "epoch": 0.27744386209549454, + "grad_norm": 2.565205084848433, + "learning_rate": 4.15266159887671e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2885 + }, + { + "epoch": 0.2775400298119921, + "grad_norm": 3.836104177250528, + "learning_rate": 4.152089156971268e-06, + "loss": 0.1798, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2886 + }, + { + "epoch": 0.27763619752848967, + "grad_norm": 1.7312097658667647, + "learning_rate": 4.151516561251724e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2887 + }, + { + "epoch": 0.27773236524498723, + "grad_norm": 3.5735288412990283, + "learning_rate": 4.150943811771387e-06, + "loss": 0.187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2888 + }, + { + "epoch": 0.27782853296148485, + "grad_norm": 5.108297929407153, + "learning_rate": 4.150370908583583e-06, + "loss": 0.1988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2889 + }, + { + "epoch": 0.2779247006779824, + "grad_norm": 1.8060723938453704, + "learning_rate": 4.149797851741651e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2890 + }, + { + "epoch": 0.27802086839448, + "grad_norm": 2.4537149419562447, + "learning_rate": 4.149224641298943e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2891 + }, + { + "epoch": 0.27811703611097754, + "grad_norm": 2.988048212966022, + "learning_rate": 4.148651277308827e-06, + "loss": 0.1664, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2892 + }, + { + "epoch": 0.2782132038274751, + "grad_norm": 3.3776597914002697, + "learning_rate": 4.148077759824686e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2893 + }, + { + "epoch": 0.2783093715439727, + "grad_norm": 2.3175940788179825, + "learning_rate": 4.147504088899913e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2894 + }, + { + "epoch": 0.27840553926047024, + "grad_norm": 1.9162406796258775, + "learning_rate": 4.146930264587922e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2895 + }, + { + "epoch": 0.27850170697696786, + "grad_norm": 2.454401825442742, + "learning_rate": 4.146356286942136e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2896 + }, + { + "epoch": 0.2785978746934654, + "grad_norm": 2.308398876465031, + "learning_rate": 4.145782156015993e-06, + "loss": 0.1598, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2897 + }, + { + "epoch": 0.278694042409963, + "grad_norm": 1.4475147973745366, + "learning_rate": 4.145207871862947e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2898 + }, + { + "epoch": 0.27879021012646055, + "grad_norm": 1.703684408429225, + "learning_rate": 4.144633434536467e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2899 + }, + { + "epoch": 0.2788863778429581, + "grad_norm": 2.751793825029544, + "learning_rate": 4.144058844090032e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2900 + }, + { + "epoch": 0.2789825455594557, + "grad_norm": 1.9462329103934983, + "learning_rate": 4.14348410057714e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2901 + }, + { + "epoch": 0.27907871327595324, + "grad_norm": 1.8122902331643367, + "learning_rate": 4.142909204051299e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2902 + }, + { + "epoch": 0.27917488099245086, + "grad_norm": 1.7170059661635786, + "learning_rate": 4.142334154566036e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2903 + }, + { + "epoch": 0.2792710487089484, + "grad_norm": 2.8417879290492833, + "learning_rate": 4.1417589521748895e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2904 + }, + { + "epoch": 0.279367216425446, + "grad_norm": 3.5768855675848195, + "learning_rate": 4.14118359693141e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2905 + }, + { + "epoch": 0.27946338414194355, + "grad_norm": 1.219364710905213, + "learning_rate": 4.140608088889167e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2906 + }, + { + "epoch": 0.2795595518584411, + "grad_norm": 2.3054444265094753, + "learning_rate": 4.140032428101741e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2907 + }, + { + "epoch": 0.2796557195749387, + "grad_norm": 1.95983443429832, + "learning_rate": 4.139456614622728e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2908 + }, + { + "epoch": 0.27975188729143624, + "grad_norm": 1.5371616968292312, + "learning_rate": 4.1388806485057375e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2909 + }, + { + "epoch": 0.27984805500793386, + "grad_norm": 1.7753068811420736, + "learning_rate": 4.138304529804393e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2910 + }, + { + "epoch": 0.2799442227244314, + "grad_norm": 2.248766773447542, + "learning_rate": 4.137728258572334e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2911 + }, + { + "epoch": 0.280040390440929, + "grad_norm": 3.5762209827218663, + "learning_rate": 4.137151834863213e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2912 + }, + { + "epoch": 0.28013655815742655, + "grad_norm": 2.242071580220004, + "learning_rate": 4.136575258730695e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2913 + }, + { + "epoch": 0.2802327258739241, + "grad_norm": 2.303697492607958, + "learning_rate": 4.135998530228463e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2914 + }, + { + "epoch": 0.2803288935904217, + "grad_norm": 1.9687760476030414, + "learning_rate": 4.135421649410211e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2915 + }, + { + "epoch": 0.28042506130691924, + "grad_norm": 1.6177380009030988, + "learning_rate": 4.1348446163296464e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2916 + }, + { + "epoch": 0.28052122902341686, + "grad_norm": 3.3300269284985813, + "learning_rate": 4.1342674310404955e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2917 + }, + { + "epoch": 0.2806173967399144, + "grad_norm": 2.5430225657617638, + "learning_rate": 4.1336900935964944e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2918 + }, + { + "epoch": 0.280713564456412, + "grad_norm": 1.471926940179751, + "learning_rate": 4.133112604051394e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2919 + }, + { + "epoch": 0.28080973217290955, + "grad_norm": 3.4235022436332, + "learning_rate": 4.1325349624589625e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2920 + }, + { + "epoch": 0.2809058998894071, + "grad_norm": 1.4974398078226516, + "learning_rate": 4.131957168872979e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2921 + }, + { + "epoch": 0.2810020676059047, + "grad_norm": 1.4202796370702788, + "learning_rate": 4.131379223347237e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2922 + }, + { + "epoch": 0.28109823532240225, + "grad_norm": 1.3167819641171934, + "learning_rate": 4.130801125935545e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2923 + }, + { + "epoch": 0.28119440303889986, + "grad_norm": 1.5902479998803598, + "learning_rate": 4.130222876691726e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2924 + }, + { + "epoch": 0.28129057075539743, + "grad_norm": 1.6306551164067389, + "learning_rate": 4.129644475669617e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2925 + }, + { + "epoch": 0.281386738471895, + "grad_norm": 3.2082082368604135, + "learning_rate": 4.1290659229230675e-06, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2926 + }, + { + "epoch": 0.28148290618839256, + "grad_norm": 3.944464459457621, + "learning_rate": 4.1284872185059425e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2927 + }, + { + "epoch": 0.2815790739048901, + "grad_norm": 1.9175195722137972, + "learning_rate": 4.127908362472121e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2928 + }, + { + "epoch": 0.2816752416213877, + "grad_norm": 2.4393578087952137, + "learning_rate": 4.127329354875498e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2929 + }, + { + "epoch": 0.28177140933788525, + "grad_norm": 5.387294307261637, + "learning_rate": 4.126750195769978e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2930 + }, + { + "epoch": 0.28186757705438287, + "grad_norm": 2.6637957752225496, + "learning_rate": 4.126170885209485e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2931 + }, + { + "epoch": 0.28196374477088043, + "grad_norm": 2.853314395143913, + "learning_rate": 4.125591423247952e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2932 + }, + { + "epoch": 0.282059912487378, + "grad_norm": 3.535518772487249, + "learning_rate": 4.12501180993933e-06, + "loss": 0.1722, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2933 + }, + { + "epoch": 0.28215608020387556, + "grad_norm": 2.7198050495872548, + "learning_rate": 4.1244320453375815e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2934 + }, + { + "epoch": 0.2822522479203731, + "grad_norm": 4.369703766404477, + "learning_rate": 4.123852129496686e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2935 + }, + { + "epoch": 0.2823484156368707, + "grad_norm": 2.4326059605972343, + "learning_rate": 4.123272062470633e-06, + "loss": 0.1481, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2936 + }, + { + "epoch": 0.28244458335336825, + "grad_norm": 1.8516609571323248, + "learning_rate": 4.12269184431343e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2937 + }, + { + "epoch": 0.28254075106986587, + "grad_norm": 1.7124430215953925, + "learning_rate": 4.122111475079097e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2938 + }, + { + "epoch": 0.28263691878636343, + "grad_norm": 4.462569099537295, + "learning_rate": 4.1215309548216665e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2939 + }, + { + "epoch": 0.282733086502861, + "grad_norm": 3.489028065656719, + "learning_rate": 4.120950283595188e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2940 + }, + { + "epoch": 0.28282925421935856, + "grad_norm": 2.2110480661649605, + "learning_rate": 4.120369461453723e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2941 + }, + { + "epoch": 0.2829254219358561, + "grad_norm": 2.0373492807570686, + "learning_rate": 4.119788488451347e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2942 + }, + { + "epoch": 0.2830215896523537, + "grad_norm": 2.5475891141604183, + "learning_rate": 4.119207364642152e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2943 + }, + { + "epoch": 0.28311775736885125, + "grad_norm": 4.2998374195284, + "learning_rate": 4.1186260900802405e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2944 + }, + { + "epoch": 0.28321392508534887, + "grad_norm": 3.171950426403356, + "learning_rate": 4.118044664819732e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2945 + }, + { + "epoch": 0.28331009280184644, + "grad_norm": 2.282820409466144, + "learning_rate": 4.117463088914758e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2946 + }, + { + "epoch": 0.283406260518344, + "grad_norm": 1.501587725863949, + "learning_rate": 4.1168813624194646e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2947 + }, + { + "epoch": 0.28350242823484156, + "grad_norm": 5.70615947684056, + "learning_rate": 4.1162994853880135e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2948 + }, + { + "epoch": 0.2835985959513391, + "grad_norm": 4.12255822082511, + "learning_rate": 4.115717457874579e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2949 + }, + { + "epoch": 0.2836947636678367, + "grad_norm": 3.353206619984528, + "learning_rate": 4.115135279933347e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2950 + }, + { + "epoch": 0.28379093138433426, + "grad_norm": 2.2789531520176944, + "learning_rate": 4.114552951618523e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2951 + }, + { + "epoch": 0.2838870991008319, + "grad_norm": 1.5861811356005335, + "learning_rate": 4.11397047298432e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2952 + }, + { + "epoch": 0.28398326681732944, + "grad_norm": 2.207334150420979, + "learning_rate": 4.113387844084972e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2953 + }, + { + "epoch": 0.284079434533827, + "grad_norm": 4.0760222496596965, + "learning_rate": 4.112805064974722e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2954 + }, + { + "epoch": 0.28417560225032457, + "grad_norm": 2.582611595339732, + "learning_rate": 4.112222135707827e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2955 + }, + { + "epoch": 0.28427176996682213, + "grad_norm": 3.245499423801564, + "learning_rate": 4.111639056338561e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2956 + }, + { + "epoch": 0.2843679376833197, + "grad_norm": 2.1836831870909235, + "learning_rate": 4.111055826921211e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2957 + }, + { + "epoch": 0.28446410539981726, + "grad_norm": 3.7750066957223773, + "learning_rate": 4.110472447510074e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2958 + }, + { + "epoch": 0.2845602731163149, + "grad_norm": 2.4820491703669227, + "learning_rate": 4.109888918159467e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2959 + }, + { + "epoch": 0.28465644083281244, + "grad_norm": 4.280108258822207, + "learning_rate": 4.109305238923718e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2960 + }, + { + "epoch": 0.28475260854931, + "grad_norm": 2.5169462574835424, + "learning_rate": 4.108721409857168e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2961 + }, + { + "epoch": 0.28484877626580757, + "grad_norm": 2.9450044009204794, + "learning_rate": 4.1081374310141756e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2962 + }, + { + "epoch": 0.28494494398230513, + "grad_norm": 1.7422047149770845, + "learning_rate": 4.107553302449107e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2963 + }, + { + "epoch": 0.2850411116988027, + "grad_norm": 2.5299263550373805, + "learning_rate": 4.106969024216348e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2964 + }, + { + "epoch": 0.28513727941530026, + "grad_norm": 1.97404951221777, + "learning_rate": 4.106384596370299e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2965 + }, + { + "epoch": 0.2852334471317979, + "grad_norm": 2.1825466970910745, + "learning_rate": 4.105800018965368e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2966 + }, + { + "epoch": 0.28532961484829544, + "grad_norm": 1.94887262608177, + "learning_rate": 4.105215292055982e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2967 + }, + { + "epoch": 0.285425782564793, + "grad_norm": 2.079415853003359, + "learning_rate": 4.1046304156965825e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2968 + }, + { + "epoch": 0.28552195028129057, + "grad_norm": 3.1208083308949246, + "learning_rate": 4.104045389941621e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2969 + }, + { + "epoch": 0.28561811799778813, + "grad_norm": 3.642581273064696, + "learning_rate": 4.103460214845566e-06, + "loss": 0.1716, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2970 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 1.5651533687348655, + "learning_rate": 4.102874890462898e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2971 + }, + { + "epoch": 0.28581045343078326, + "grad_norm": 1.4878236051082008, + "learning_rate": 4.102289416848114e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2972 + }, + { + "epoch": 0.2859066211472809, + "grad_norm": 2.5980632706755227, + "learning_rate": 4.101703794055721e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2973 + }, + { + "epoch": 0.28600278886377845, + "grad_norm": 2.634161560365412, + "learning_rate": 4.101118022140245e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2974 + }, + { + "epoch": 0.286098956580276, + "grad_norm": 2.6429107221238293, + "learning_rate": 4.10053210115622e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2975 + }, + { + "epoch": 0.2861951242967736, + "grad_norm": 1.6387314750300657, + "learning_rate": 4.0999460311582e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2976 + }, + { + "epoch": 0.28629129201327114, + "grad_norm": 2.9183695214469925, + "learning_rate": 4.099359812200746e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2977 + }, + { + "epoch": 0.2863874597297687, + "grad_norm": 1.6316177534574459, + "learning_rate": 4.098773444338439e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2978 + }, + { + "epoch": 0.28648362744626626, + "grad_norm": 2.022475883687625, + "learning_rate": 4.098186927625872e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2979 + }, + { + "epoch": 0.2865797951627639, + "grad_norm": 1.411741344100575, + "learning_rate": 4.09760026211765e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2980 + }, + { + "epoch": 0.28667596287926145, + "grad_norm": 1.5251104959443795, + "learning_rate": 4.0970134478683935e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2981 + }, + { + "epoch": 0.286772130595759, + "grad_norm": 3.096080984387432, + "learning_rate": 4.096426484932737e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2982 + }, + { + "epoch": 0.2868682983122566, + "grad_norm": 1.4959330361065108, + "learning_rate": 4.095839373365327e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2983 + }, + { + "epoch": 0.28696446602875414, + "grad_norm": 1.9452775478684525, + "learning_rate": 4.095252113220827e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2984 + }, + { + "epoch": 0.2870606337452517, + "grad_norm": 4.1752325317593915, + "learning_rate": 4.094664704553912e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2985 + }, + { + "epoch": 0.28715680146174927, + "grad_norm": 2.4242861319804505, + "learning_rate": 4.094077147419271e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2986 + }, + { + "epoch": 0.2872529691782469, + "grad_norm": 1.790156994616347, + "learning_rate": 4.093489441871608e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2987 + }, + { + "epoch": 0.28734913689474445, + "grad_norm": 10.145961629682446, + "learning_rate": 4.092901587965639e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2988 + }, + { + "epoch": 0.287445304611242, + "grad_norm": 1.611369711332651, + "learning_rate": 4.092313585756095e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2989 + }, + { + "epoch": 0.2875414723277396, + "grad_norm": 2.438482643642364, + "learning_rate": 4.091725435297721e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2990 + }, + { + "epoch": 0.28763764004423714, + "grad_norm": 1.8122165271141488, + "learning_rate": 4.091137136645275e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2991 + }, + { + "epoch": 0.2877338077607347, + "grad_norm": 2.1157444596228965, + "learning_rate": 4.0905486898535305e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2992 + }, + { + "epoch": 0.28782997547723227, + "grad_norm": 1.619241881322533, + "learning_rate": 4.089960094977272e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2993 + }, + { + "epoch": 0.2879261431937299, + "grad_norm": 1.523380768225293, + "learning_rate": 4.089371352071301e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2994 + }, + { + "epoch": 0.28802231091022745, + "grad_norm": 2.139263285200855, + "learning_rate": 4.088782461190429e-06, + "loss": 0.1518, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2995 + }, + { + "epoch": 0.288118478626725, + "grad_norm": 1.4537127874272413, + "learning_rate": 4.0881934223894845e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2996 + }, + { + "epoch": 0.2882146463432226, + "grad_norm": 3.5119357321448623, + "learning_rate": 4.087604235723308e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2997 + }, + { + "epoch": 0.28831081405972014, + "grad_norm": 2.465611607272761, + "learning_rate": 4.0870149012467565e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2998 + }, + { + "epoch": 0.2884069817762177, + "grad_norm": 2.758665286839218, + "learning_rate": 4.086425419014696e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 2999 + }, + { + "epoch": 0.28850314949271527, + "grad_norm": 1.7135401020809702, + "learning_rate": 4.0858357890820115e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3000 + }, + { + "epoch": 0.2885993172092129, + "grad_norm": 1.6758228599134462, + "learning_rate": 4.085246011503596e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3001 + }, + { + "epoch": 0.28869548492571045, + "grad_norm": 1.4380182406706625, + "learning_rate": 4.084656086334363e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3002 + }, + { + "epoch": 0.288791652642208, + "grad_norm": 3.3339308808864287, + "learning_rate": 4.0840660136292335e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3003 + }, + { + "epoch": 0.2888878203587056, + "grad_norm": 1.6987243573041573, + "learning_rate": 4.083475793443146e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3004 + }, + { + "epoch": 0.28898398807520315, + "grad_norm": 1.67902669710059, + "learning_rate": 4.082885425831052e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3005 + }, + { + "epoch": 0.2890801557917007, + "grad_norm": 1.585725437514706, + "learning_rate": 4.082294910847915e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3006 + }, + { + "epoch": 0.2891763235081983, + "grad_norm": 2.533794232026945, + "learning_rate": 4.081704248548715e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3007 + }, + { + "epoch": 0.2892724912246959, + "grad_norm": 1.6279977490319963, + "learning_rate": 4.081113438988443e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3008 + }, + { + "epoch": 0.28936865894119346, + "grad_norm": 1.8267283978227231, + "learning_rate": 4.080522482222107e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3009 + }, + { + "epoch": 0.289464826657691, + "grad_norm": 2.3147248682641965, + "learning_rate": 4.079931378304724e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3010 + }, + { + "epoch": 0.2895609943741886, + "grad_norm": 1.5589715755612659, + "learning_rate": 4.07934012729133e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3011 + }, + { + "epoch": 0.28965716209068615, + "grad_norm": 2.6814599605310354, + "learning_rate": 4.0787487292369715e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3012 + }, + { + "epoch": 0.2897533298071837, + "grad_norm": 2.357662531812302, + "learning_rate": 4.078157184196708e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3013 + }, + { + "epoch": 0.2898494975236813, + "grad_norm": 3.4701492704227137, + "learning_rate": 4.077565492225615e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3014 + }, + { + "epoch": 0.2899456652401789, + "grad_norm": 2.4697178515316205, + "learning_rate": 4.0769736533787806e-06, + "loss": 0.1609, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3015 + }, + { + "epoch": 0.29004183295667646, + "grad_norm": 3.7343777262219437, + "learning_rate": 4.076381667711306e-06, + "loss": 0.1604, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3016 + }, + { + "epoch": 0.290138000673174, + "grad_norm": 2.530523956135657, + "learning_rate": 4.075789535278309e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3017 + }, + { + "epoch": 0.2902341683896716, + "grad_norm": 1.5094473607421244, + "learning_rate": 4.075197256134915e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3018 + }, + { + "epoch": 0.29033033610616915, + "grad_norm": 4.760826222287577, + "learning_rate": 4.074604830336269e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3019 + }, + { + "epoch": 0.2904265038226667, + "grad_norm": 4.2548793789047465, + "learning_rate": 4.0740122579375284e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3020 + }, + { + "epoch": 0.2905226715391643, + "grad_norm": 3.033163959684899, + "learning_rate": 4.073419538993862e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3021 + }, + { + "epoch": 0.2906188392556619, + "grad_norm": 1.9622535057208308, + "learning_rate": 4.0728266735604545e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3022 + }, + { + "epoch": 0.29071500697215946, + "grad_norm": 4.082507593452347, + "learning_rate": 4.072233661692502e-06, + "loss": 0.1666, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3023 + }, + { + "epoch": 0.290811174688657, + "grad_norm": 2.9793851054244964, + "learning_rate": 4.071640503445217e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3024 + }, + { + "epoch": 0.2909073424051546, + "grad_norm": 3.073992062461636, + "learning_rate": 4.071047198873822e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3025 + }, + { + "epoch": 0.29100351012165215, + "grad_norm": 3.409137798652185, + "learning_rate": 4.0704537480335575e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3026 + }, + { + "epoch": 0.2910996778381497, + "grad_norm": 1.5869058329114796, + "learning_rate": 4.069860150979675e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3027 + }, + { + "epoch": 0.2911958455546473, + "grad_norm": 1.3927971182002503, + "learning_rate": 4.069266407767439e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3028 + }, + { + "epoch": 0.2912920132711449, + "grad_norm": 3.5805103815809454, + "learning_rate": 4.06867251845213e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3029 + }, + { + "epoch": 0.29138818098764246, + "grad_norm": 4.176709213481559, + "learning_rate": 4.068078483089041e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3030 + }, + { + "epoch": 0.29148434870414003, + "grad_norm": 1.958968404803494, + "learning_rate": 4.067484301733476e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3031 + }, + { + "epoch": 0.2915805164206376, + "grad_norm": 1.6288880122826892, + "learning_rate": 4.066889974440757e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3032 + }, + { + "epoch": 0.29167668413713516, + "grad_norm": 1.9076307945275168, + "learning_rate": 4.0662955012662165e-06, + "loss": 0.1513, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3033 + }, + { + "epoch": 0.2917728518536327, + "grad_norm": 3.052461299856914, + "learning_rate": 4.065700882265202e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3034 + }, + { + "epoch": 0.2918690195701303, + "grad_norm": 2.6409427979062956, + "learning_rate": 4.065106117493075e-06, + "loss": 0.17, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3035 + }, + { + "epoch": 0.2919651872866279, + "grad_norm": 2.0203349095058343, + "learning_rate": 4.064511207005209e-06, + "loss": 0.1838, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3036 + }, + { + "epoch": 0.29206135500312547, + "grad_norm": 1.7260956834463845, + "learning_rate": 4.063916150856991e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3037 + }, + { + "epoch": 0.29215752271962303, + "grad_norm": 1.4446083782220012, + "learning_rate": 4.0633209491038236e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3038 + }, + { + "epoch": 0.2922536904361206, + "grad_norm": 1.3484940025178178, + "learning_rate": 4.062725601801122e-06, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3039 + }, + { + "epoch": 0.29234985815261816, + "grad_norm": 1.832688218563239, + "learning_rate": 4.062130109004313e-06, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3040 + }, + { + "epoch": 0.2924460258691157, + "grad_norm": 1.8079925761546736, + "learning_rate": 4.061534470768841e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3041 + }, + { + "epoch": 0.2925421935856133, + "grad_norm": 2.068651028752883, + "learning_rate": 4.060938687150159e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3042 + }, + { + "epoch": 0.2926383613021109, + "grad_norm": 1.5258399092713186, + "learning_rate": 4.060342758203737e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3043 + }, + { + "epoch": 0.29273452901860847, + "grad_norm": 2.3688737484568816, + "learning_rate": 4.05974668398506e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3044 + }, + { + "epoch": 0.29283069673510603, + "grad_norm": 1.4655138161942696, + "learning_rate": 4.05915046454962e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3045 + }, + { + "epoch": 0.2929268644516036, + "grad_norm": 2.4104285828251313, + "learning_rate": 4.058554099952931e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3046 + }, + { + "epoch": 0.29302303216810116, + "grad_norm": 1.3975426653763028, + "learning_rate": 4.057957590250512e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3047 + }, + { + "epoch": 0.2931191998845987, + "grad_norm": 2.14873972778045, + "learning_rate": 4.057360935497903e-06, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3048 + }, + { + "epoch": 0.2932153676010963, + "grad_norm": 2.4204173400569737, + "learning_rate": 4.056764135750652e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3049 + }, + { + "epoch": 0.2933115353175939, + "grad_norm": 1.9484020553531798, + "learning_rate": 4.056167191064325e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3050 + }, + { + "epoch": 0.29340770303409147, + "grad_norm": 1.6599684304518432, + "learning_rate": 4.0555701014944975e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3051 + }, + { + "epoch": 0.29350387075058904, + "grad_norm": 2.853210219042054, + "learning_rate": 4.05497286709676e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3052 + }, + { + "epoch": 0.2936000384670866, + "grad_norm": 1.9539603676513162, + "learning_rate": 4.054375487926719e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3053 + }, + { + "epoch": 0.29369620618358416, + "grad_norm": 1.8658225692219461, + "learning_rate": 4.053777964039989e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3054 + }, + { + "epoch": 0.2937923739000817, + "grad_norm": 1.3487797703777704, + "learning_rate": 4.053180295492203e-06, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3055 + }, + { + "epoch": 0.2938885416165793, + "grad_norm": 1.5827121417981995, + "learning_rate": 4.052582482339004e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3056 + }, + { + "epoch": 0.2939847093330769, + "grad_norm": 1.5523807978510458, + "learning_rate": 4.051984524636054e-06, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3057 + }, + { + "epoch": 0.2940808770495745, + "grad_norm": 2.247718273150611, + "learning_rate": 4.05138642243902e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3058 + }, + { + "epoch": 0.29417704476607204, + "grad_norm": 6.911705096078711, + "learning_rate": 4.05078817580359e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3059 + }, + { + "epoch": 0.2942732124825696, + "grad_norm": 2.5644280780753195, + "learning_rate": 4.0501897847854596e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3060 + }, + { + "epoch": 0.29436938019906717, + "grad_norm": 1.9996411343743885, + "learning_rate": 4.049591249440344e-06, + "loss": 0.1766, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3061 + }, + { + "epoch": 0.29446554791556473, + "grad_norm": 3.196454294754469, + "learning_rate": 4.048992569823965e-06, + "loss": 0.1672, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3062 + }, + { + "epoch": 0.2945617156320623, + "grad_norm": 1.3827594679417434, + "learning_rate": 4.048393745992064e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3063 + }, + { + "epoch": 0.2946578833485599, + "grad_norm": 3.2979945462605658, + "learning_rate": 4.047794778000394e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3064 + }, + { + "epoch": 0.2947540510650575, + "grad_norm": 1.4759964315538434, + "learning_rate": 4.0471956659047166e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3065 + }, + { + "epoch": 0.29485021878155504, + "grad_norm": 2.5734612851695378, + "learning_rate": 4.046596409760814e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3066 + }, + { + "epoch": 0.2949463864980526, + "grad_norm": 2.9516917061110273, + "learning_rate": 4.045997009624477e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3067 + }, + { + "epoch": 0.29504255421455017, + "grad_norm": 1.3179288580773563, + "learning_rate": 4.045397465551513e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3068 + }, + { + "epoch": 0.29513872193104773, + "grad_norm": 1.678609358753075, + "learning_rate": 4.044797777597741e-06, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3069 + }, + { + "epoch": 0.2952348896475453, + "grad_norm": 1.4477654519001606, + "learning_rate": 4.044197945818992e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3070 + }, + { + "epoch": 0.2953310573640429, + "grad_norm": 2.037368364810704, + "learning_rate": 4.043597970271113e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3071 + }, + { + "epoch": 0.2954272250805405, + "grad_norm": 1.8675779926508351, + "learning_rate": 4.0429978510099645e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3072 + }, + { + "epoch": 0.29552339279703804, + "grad_norm": 2.182211985123866, + "learning_rate": 4.042397588091417e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3073 + }, + { + "epoch": 0.2956195605135356, + "grad_norm": 1.7461208812502351, + "learning_rate": 4.041797181571358e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3074 + }, + { + "epoch": 0.29571572823003317, + "grad_norm": 1.8017983886925826, + "learning_rate": 4.041196631505687e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3075 + }, + { + "epoch": 0.29581189594653073, + "grad_norm": 3.2486709477203113, + "learning_rate": 4.040595937950317e-06, + "loss": 0.1766, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3076 + }, + { + "epoch": 0.2959080636630283, + "grad_norm": 2.187724571112232, + "learning_rate": 4.039995100961174e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3077 + }, + { + "epoch": 0.2960042313795259, + "grad_norm": 2.0684455251550298, + "learning_rate": 4.039394120594197e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3078 + }, + { + "epoch": 0.2961003990960235, + "grad_norm": 1.6744215611283637, + "learning_rate": 4.03879299690534e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3079 + }, + { + "epoch": 0.29619656681252104, + "grad_norm": 2.3935527437640034, + "learning_rate": 4.038191729950569e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3080 + }, + { + "epoch": 0.2962927345290186, + "grad_norm": 2.202024172956137, + "learning_rate": 4.037590319785863e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3081 + }, + { + "epoch": 0.2963889022455162, + "grad_norm": 2.1692172282634656, + "learning_rate": 4.0369887664672155e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3082 + }, + { + "epoch": 0.29648506996201374, + "grad_norm": 2.040690074204512, + "learning_rate": 4.036387070050632e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3083 + }, + { + "epoch": 0.2965812376785113, + "grad_norm": 2.094001154714757, + "learning_rate": 4.035785230592134e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3084 + }, + { + "epoch": 0.2966774053950089, + "grad_norm": 2.1607127242407, + "learning_rate": 4.035183248147752e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3085 + }, + { + "epoch": 0.2967735731115065, + "grad_norm": 2.140741101769555, + "learning_rate": 4.034581122773535e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3086 + }, + { + "epoch": 0.29686974082800405, + "grad_norm": 1.428758215744914, + "learning_rate": 4.033978854525541e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3087 + }, + { + "epoch": 0.2969659085445016, + "grad_norm": 2.2124904368429537, + "learning_rate": 4.033376443459842e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3088 + }, + { + "epoch": 0.2970620762609992, + "grad_norm": 1.6112257641013803, + "learning_rate": 4.0327738896325255e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3089 + }, + { + "epoch": 0.29715824397749674, + "grad_norm": 1.7802885128202084, + "learning_rate": 4.03217119309969e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3090 + }, + { + "epoch": 0.2972544116939943, + "grad_norm": 2.2690251150859435, + "learning_rate": 4.031568353917449e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3091 + }, + { + "epoch": 0.2973505794104919, + "grad_norm": 2.556737453624386, + "learning_rate": 4.030965372141927e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3092 + }, + { + "epoch": 0.2974467471269895, + "grad_norm": 1.8772415926415693, + "learning_rate": 4.030362247829266e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3093 + }, + { + "epoch": 0.29754291484348705, + "grad_norm": 1.5397080600543884, + "learning_rate": 4.029758981035617e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3094 + }, + { + "epoch": 0.2976390825599846, + "grad_norm": 1.6122219899455632, + "learning_rate": 4.029155571817146e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3095 + }, + { + "epoch": 0.2977352502764822, + "grad_norm": 1.5628352162304981, + "learning_rate": 4.028552020230031e-06, + "loss": 0.153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3096 + }, + { + "epoch": 0.29783141799297974, + "grad_norm": 1.3878473628149257, + "learning_rate": 4.027948326330465e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3097 + }, + { + "epoch": 0.2979275857094773, + "grad_norm": 2.623044878612912, + "learning_rate": 4.027344490174655e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3098 + }, + { + "epoch": 0.2980237534259749, + "grad_norm": 1.3209434314890103, + "learning_rate": 4.026740511818818e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3099 + }, + { + "epoch": 0.2981199211424725, + "grad_norm": 1.638846027240983, + "learning_rate": 4.026136391319187e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3100 + }, + { + "epoch": 0.29821608885897005, + "grad_norm": 1.5719881026851348, + "learning_rate": 4.025532128732007e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3101 + }, + { + "epoch": 0.2983122565754676, + "grad_norm": 2.147675598631479, + "learning_rate": 4.024927724113537e-06, + "loss": 0.1626, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3102 + }, + { + "epoch": 0.2984084242919652, + "grad_norm": 1.9609142407478903, + "learning_rate": 4.024323177520047e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3103 + }, + { + "epoch": 0.29850459200846274, + "grad_norm": 2.039958579912127, + "learning_rate": 4.023718489007825e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3104 + }, + { + "epoch": 0.2986007597249603, + "grad_norm": 1.2352090995871785, + "learning_rate": 4.023113658633166e-06, + "loss": 0.0942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3105 + }, + { + "epoch": 0.2986969274414579, + "grad_norm": 2.08374222962766, + "learning_rate": 4.022508686452385e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3106 + }, + { + "epoch": 0.2987930951579555, + "grad_norm": 1.4923196282943905, + "learning_rate": 4.021903572521802e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3107 + }, + { + "epoch": 0.29888926287445305, + "grad_norm": 1.3259243483330434, + "learning_rate": 4.0212983168977585e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3108 + }, + { + "epoch": 0.2989854305909506, + "grad_norm": 4.88040813084182, + "learning_rate": 4.020692919636604e-06, + "loss": 0.1881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3109 + }, + { + "epoch": 0.2990815983074482, + "grad_norm": 2.6929751900712704, + "learning_rate": 4.020087380794703e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3110 + }, + { + "epoch": 0.29917776602394575, + "grad_norm": 1.619565223946857, + "learning_rate": 4.019481700428432e-06, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3111 + }, + { + "epoch": 0.2992739337404433, + "grad_norm": 1.819077510811767, + "learning_rate": 4.018875878594184e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3112 + }, + { + "epoch": 0.29937010145694093, + "grad_norm": 1.651928111527298, + "learning_rate": 4.01826991534836e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3113 + }, + { + "epoch": 0.2994662691734385, + "grad_norm": 2.100762744788707, + "learning_rate": 4.017663810747377e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3114 + }, + { + "epoch": 0.29956243688993606, + "grad_norm": 3.126497608729077, + "learning_rate": 4.017057564847667e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3115 + }, + { + "epoch": 0.2996586046064336, + "grad_norm": 1.646146740126967, + "learning_rate": 4.016451177705672e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3116 + }, + { + "epoch": 0.2997547723229312, + "grad_norm": 3.0826669299484646, + "learning_rate": 4.015844649377849e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3117 + }, + { + "epoch": 0.29985094003942875, + "grad_norm": 2.123153155534617, + "learning_rate": 4.015237979920666e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3118 + }, + { + "epoch": 0.2999471077559263, + "grad_norm": 2.168869955949615, + "learning_rate": 4.014631169390607e-06, + "loss": 0.1685, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3119 + }, + { + "epoch": 0.30004327547242393, + "grad_norm": 1.594614002267937, + "learning_rate": 4.014024217844167e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3120 + }, + { + "epoch": 0.3001394431889215, + "grad_norm": 2.7141398502447864, + "learning_rate": 4.013417125337855e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3121 + }, + { + "epoch": 0.30023561090541906, + "grad_norm": 1.4641479156003936, + "learning_rate": 4.0128098919281934e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3122 + }, + { + "epoch": 0.3003317786219166, + "grad_norm": 1.5789365701010565, + "learning_rate": 4.0122025176717175e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3123 + }, + { + "epoch": 0.3004279463384142, + "grad_norm": 1.504903155163296, + "learning_rate": 4.011595002624974e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3124 + }, + { + "epoch": 0.30052411405491175, + "grad_norm": 1.914091874386886, + "learning_rate": 4.010987346844526e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3125 + }, + { + "epoch": 0.3006202817714093, + "grad_norm": 2.877265578039474, + "learning_rate": 4.010379550386947e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3126 + }, + { + "epoch": 0.30071644948790693, + "grad_norm": 2.6312872118178547, + "learning_rate": 4.009771613308825e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3127 + }, + { + "epoch": 0.3008126172044045, + "grad_norm": 1.6256323475972347, + "learning_rate": 4.009163535666761e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3128 + }, + { + "epoch": 0.30090878492090206, + "grad_norm": 1.6041099612436476, + "learning_rate": 4.008555317517367e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3129 + }, + { + "epoch": 0.3010049526373996, + "grad_norm": 3.828041169535868, + "learning_rate": 4.007946958917273e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3130 + }, + { + "epoch": 0.3011011203538972, + "grad_norm": 3.5175587517823903, + "learning_rate": 4.007338459923115e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3131 + }, + { + "epoch": 0.30119728807039475, + "grad_norm": 1.9905804093114141, + "learning_rate": 4.006729820591548e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3132 + }, + { + "epoch": 0.3012934557868923, + "grad_norm": 3.4431635837676744, + "learning_rate": 4.0061210409792384e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3133 + }, + { + "epoch": 0.30138962350338994, + "grad_norm": 1.6494011132710613, + "learning_rate": 4.005512121142864e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3134 + }, + { + "epoch": 0.3014857912198875, + "grad_norm": 2.188503080959889, + "learning_rate": 4.004903061139118e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3135 + }, + { + "epoch": 0.30158195893638506, + "grad_norm": 3.492773060300036, + "learning_rate": 4.004293861024706e-06, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3136 + }, + { + "epoch": 0.3016781266528826, + "grad_norm": 2.9972857531328887, + "learning_rate": 4.0036845208563444e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3137 + }, + { + "epoch": 0.3017742943693802, + "grad_norm": 3.0111432096292994, + "learning_rate": 4.003075040690766e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3138 + }, + { + "epoch": 0.30187046208587776, + "grad_norm": 1.5911648775629295, + "learning_rate": 4.002465420584715e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3139 + }, + { + "epoch": 0.3019666298023753, + "grad_norm": 3.671187990609838, + "learning_rate": 4.001855660594948e-06, + "loss": 0.1707, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3140 + }, + { + "epoch": 0.30206279751887294, + "grad_norm": 4.17474126269656, + "learning_rate": 4.001245760778235e-06, + "loss": 0.1686, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3141 + }, + { + "epoch": 0.3021589652353705, + "grad_norm": 4.154302194357014, + "learning_rate": 4.000635721191361e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3142 + }, + { + "epoch": 0.30225513295186807, + "grad_norm": 2.468468048512689, + "learning_rate": 4.000025541891122e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3143 + }, + { + "epoch": 0.30235130066836563, + "grad_norm": 1.541123288094981, + "learning_rate": 3.999415222934325e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3144 + }, + { + "epoch": 0.3024474683848632, + "grad_norm": 3.2233076667835494, + "learning_rate": 3.998804764377796e-06, + "loss": 0.1641, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3145 + }, + { + "epoch": 0.30254363610136076, + "grad_norm": 3.7948886104242314, + "learning_rate": 3.9981941662783675e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3146 + }, + { + "epoch": 0.3026398038178583, + "grad_norm": 4.851439936445401, + "learning_rate": 3.99758342869289e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3147 + }, + { + "epoch": 0.30273597153435594, + "grad_norm": 2.946209211391996, + "learning_rate": 3.996972551678224e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3148 + }, + { + "epoch": 0.3028321392508535, + "grad_norm": 1.7848984366078815, + "learning_rate": 3.996361535291242e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3149 + }, + { + "epoch": 0.30292830696735107, + "grad_norm": 1.8623841749858971, + "learning_rate": 3.995750379588835e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3150 + }, + { + "epoch": 0.30302447468384863, + "grad_norm": 4.868678967755518, + "learning_rate": 3.9951390846279004e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3151 + }, + { + "epoch": 0.3031206424003462, + "grad_norm": 3.488311879488009, + "learning_rate": 3.994527650465352e-06, + "loss": 0.1702, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3152 + }, + { + "epoch": 0.30321681011684376, + "grad_norm": 4.027957099383312, + "learning_rate": 3.993916077158118e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3153 + }, + { + "epoch": 0.3033129778333413, + "grad_norm": 3.222641484794737, + "learning_rate": 3.993304364763135e-06, + "loss": 0.1637, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3154 + }, + { + "epoch": 0.30340914554983894, + "grad_norm": 1.822394363837865, + "learning_rate": 3.9926925133373565e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3155 + }, + { + "epoch": 0.3035053132663365, + "grad_norm": 1.9269762620685915, + "learning_rate": 3.9920805229377465e-06, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3156 + }, + { + "epoch": 0.30360148098283407, + "grad_norm": 3.212679143578993, + "learning_rate": 3.991468393621284e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3157 + }, + { + "epoch": 0.30369764869933163, + "grad_norm": 1.3750939855925934, + "learning_rate": 3.990856125444961e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3158 + }, + { + "epoch": 0.3037938164158292, + "grad_norm": 1.214705939890834, + "learning_rate": 3.990243718465779e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3159 + }, + { + "epoch": 0.30388998413232676, + "grad_norm": 1.4737449118238766, + "learning_rate": 3.989631172740756e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3160 + }, + { + "epoch": 0.3039861518488243, + "grad_norm": 2.0694387503533274, + "learning_rate": 3.989018488326921e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3161 + }, + { + "epoch": 0.30408231956532195, + "grad_norm": 2.490776390841118, + "learning_rate": 3.988405665281319e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3162 + }, + { + "epoch": 0.3041784872818195, + "grad_norm": 2.6645059586857287, + "learning_rate": 3.987792703661001e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3163 + }, + { + "epoch": 0.3042746549983171, + "grad_norm": 2.310689178235655, + "learning_rate": 3.987179603523041e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3164 + }, + { + "epoch": 0.30437082271481464, + "grad_norm": 2.2596312815954946, + "learning_rate": 3.986566364924516e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3165 + }, + { + "epoch": 0.3044669904313122, + "grad_norm": 1.8575237869255283, + "learning_rate": 3.985952987922521e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3166 + }, + { + "epoch": 0.30456315814780976, + "grad_norm": 1.7933301254674177, + "learning_rate": 3.985339472574165e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3167 + }, + { + "epoch": 0.30465932586430733, + "grad_norm": 1.908298113367027, + "learning_rate": 3.9847258189365664e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3168 + }, + { + "epoch": 0.30475549358080495, + "grad_norm": 3.4896234085051274, + "learning_rate": 3.984112027066859e-06, + "loss": 0.163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3169 + }, + { + "epoch": 0.3048516612973025, + "grad_norm": 4.765928801387326, + "learning_rate": 3.983498097022188e-06, + "loss": 0.1589, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3170 + }, + { + "epoch": 0.3049478290138001, + "grad_norm": 2.429633188978177, + "learning_rate": 3.982884028859712e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3171 + }, + { + "epoch": 0.30504399673029764, + "grad_norm": 4.096738117699969, + "learning_rate": 3.982269822636602e-06, + "loss": 0.1685, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3172 + }, + { + "epoch": 0.3051401644467952, + "grad_norm": 2.202348539484553, + "learning_rate": 3.981655478410043e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3173 + }, + { + "epoch": 0.30523633216329277, + "grad_norm": 2.0437130139132713, + "learning_rate": 3.981040996237231e-06, + "loss": 0.1818, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3174 + }, + { + "epoch": 0.30533249987979033, + "grad_norm": 2.1095043678363705, + "learning_rate": 3.980426376175378e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3175 + }, + { + "epoch": 0.30542866759628795, + "grad_norm": 4.3017581957519635, + "learning_rate": 3.979811618281706e-06, + "loss": 0.1773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3176 + }, + { + "epoch": 0.3055248353127855, + "grad_norm": 2.3591908052607464, + "learning_rate": 3.97919672261345e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3177 + }, + { + "epoch": 0.3056210030292831, + "grad_norm": 3.250418363154981, + "learning_rate": 3.978581689227859e-06, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3178 + }, + { + "epoch": 0.30571717074578064, + "grad_norm": 1.5272823269129394, + "learning_rate": 3.977966518182194e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3179 + }, + { + "epoch": 0.3058133384622782, + "grad_norm": 1.425594032954778, + "learning_rate": 3.97735120953373e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3180 + }, + { + "epoch": 0.30590950617877577, + "grad_norm": 4.578499030795914, + "learning_rate": 3.976735763339753e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3181 + }, + { + "epoch": 0.30600567389527333, + "grad_norm": 4.626037363926163, + "learning_rate": 3.976120179657563e-06, + "loss": 0.1886, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3182 + }, + { + "epoch": 0.30610184161177095, + "grad_norm": 2.667953377884374, + "learning_rate": 3.975504458544472e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3183 + }, + { + "epoch": 0.3061980093282685, + "grad_norm": 1.6453538620684893, + "learning_rate": 3.974888600057808e-06, + "loss": 0.1643, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3184 + }, + { + "epoch": 0.3062941770447661, + "grad_norm": 1.912554595686197, + "learning_rate": 3.974272604254906e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3185 + }, + { + "epoch": 0.30639034476126364, + "grad_norm": 4.7194322329966525, + "learning_rate": 3.973656471193118e-06, + "loss": 0.1823, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3186 + }, + { + "epoch": 0.3064865124777612, + "grad_norm": 2.4268525602130753, + "learning_rate": 3.973040200929808e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3187 + }, + { + "epoch": 0.30658268019425877, + "grad_norm": 1.4962777361665711, + "learning_rate": 3.972423793522352e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3188 + }, + { + "epoch": 0.30667884791075634, + "grad_norm": 1.4320617833475682, + "learning_rate": 3.97180724902814e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3189 + }, + { + "epoch": 0.30677501562725396, + "grad_norm": 1.6189737818369392, + "learning_rate": 3.971190567504573e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3190 + }, + { + "epoch": 0.3068711833437515, + "grad_norm": 1.8735427424253501, + "learning_rate": 3.970573749009066e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3191 + }, + { + "epoch": 0.3069673510602491, + "grad_norm": 1.4738382335080218, + "learning_rate": 3.969956793599048e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3192 + }, + { + "epoch": 0.30706351877674665, + "grad_norm": 1.977945747812471, + "learning_rate": 3.969339701331957e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3193 + }, + { + "epoch": 0.3071596864932442, + "grad_norm": 1.603227278818987, + "learning_rate": 3.9687224722652475e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3194 + }, + { + "epoch": 0.3072558542097418, + "grad_norm": 1.5784120059669153, + "learning_rate": 3.968105106456385e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3195 + }, + { + "epoch": 0.30735202192623934, + "grad_norm": 2.8379136918567496, + "learning_rate": 3.9674876039628475e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3196 + }, + { + "epoch": 0.30744818964273696, + "grad_norm": 2.3540759902881, + "learning_rate": 3.966869964842127e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3197 + }, + { + "epoch": 0.3075443573592345, + "grad_norm": 1.580036334936654, + "learning_rate": 3.966252189151726e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3198 + }, + { + "epoch": 0.3076405250757321, + "grad_norm": 2.9716124072464094, + "learning_rate": 3.965634276949163e-06, + "loss": 0.1773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3199 + }, + { + "epoch": 0.30773669279222965, + "grad_norm": 1.5606359014757925, + "learning_rate": 3.965016228291966e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3200 + }, + { + "epoch": 0.3078328605087272, + "grad_norm": 1.7247943798892902, + "learning_rate": 3.964398043237677e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3201 + }, + { + "epoch": 0.3079290282252248, + "grad_norm": 1.917998709660406, + "learning_rate": 3.963779721843852e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3202 + }, + { + "epoch": 0.30802519594172234, + "grad_norm": 1.8538292013853037, + "learning_rate": 3.963161264168057e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3203 + }, + { + "epoch": 0.30812136365821996, + "grad_norm": 1.4554443478477679, + "learning_rate": 3.962542670267874e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3204 + }, + { + "epoch": 0.3082175313747175, + "grad_norm": 1.8291106369195667, + "learning_rate": 3.9619239402008935e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3205 + }, + { + "epoch": 0.3083136990912151, + "grad_norm": 2.7357290810733472, + "learning_rate": 3.961305074024722e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3206 + }, + { + "epoch": 0.30840986680771265, + "grad_norm": 3.5718541397076327, + "learning_rate": 3.960686071796978e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3207 + }, + { + "epoch": 0.3085060345242102, + "grad_norm": 2.853081717657014, + "learning_rate": 3.960066933575293e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3208 + }, + { + "epoch": 0.3086022022407078, + "grad_norm": 2.028364687898123, + "learning_rate": 3.959447659417309e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3209 + }, + { + "epoch": 0.30869836995720534, + "grad_norm": 1.6134099431071498, + "learning_rate": 3.958828249380683e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3210 + }, + { + "epoch": 0.30879453767370296, + "grad_norm": 3.3236064080846264, + "learning_rate": 3.958208703523083e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3211 + }, + { + "epoch": 0.3088907053902005, + "grad_norm": 4.201804886564339, + "learning_rate": 3.957589021902191e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3212 + }, + { + "epoch": 0.3089868731066981, + "grad_norm": 2.414715467616343, + "learning_rate": 3.956969204575701e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3213 + }, + { + "epoch": 0.30908304082319565, + "grad_norm": 1.6525518218616806, + "learning_rate": 3.95634925160132e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3214 + }, + { + "epoch": 0.3091792085396932, + "grad_norm": 3.2657069350364556, + "learning_rate": 3.955729163036768e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3215 + }, + { + "epoch": 0.3092753762561908, + "grad_norm": 3.579023669426612, + "learning_rate": 3.955108938939774e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3216 + }, + { + "epoch": 0.30937154397268835, + "grad_norm": 2.457549380521865, + "learning_rate": 3.954488579368087e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3217 + }, + { + "epoch": 0.30946771168918596, + "grad_norm": 2.580072248788887, + "learning_rate": 3.953868084379461e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3218 + }, + { + "epoch": 0.30956387940568353, + "grad_norm": 1.8359952544201745, + "learning_rate": 3.953247454031666e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3219 + }, + { + "epoch": 0.3096600471221811, + "grad_norm": 3.1512805493751808, + "learning_rate": 3.9526266883824865e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3220 + }, + { + "epoch": 0.30975621483867866, + "grad_norm": 2.328792972215604, + "learning_rate": 3.952005787489716e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3221 + }, + { + "epoch": 0.3098523825551762, + "grad_norm": 2.83061701720574, + "learning_rate": 3.95138475141116e-06, + "loss": 0.1745, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3222 + }, + { + "epoch": 0.3099485502716738, + "grad_norm": 2.3796922330041994, + "learning_rate": 3.950763580204643e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3223 + }, + { + "epoch": 0.31004471798817135, + "grad_norm": 4.024266022896973, + "learning_rate": 3.950142273927996e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3224 + }, + { + "epoch": 0.31014088570466897, + "grad_norm": 2.188610482462271, + "learning_rate": 3.949520832639063e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3225 + }, + { + "epoch": 0.31023705342116653, + "grad_norm": 1.8414505153058434, + "learning_rate": 3.9488992563957036e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3226 + }, + { + "epoch": 0.3103332211376641, + "grad_norm": 1.5398321997913658, + "learning_rate": 3.948277545255787e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3227 + }, + { + "epoch": 0.31042938885416166, + "grad_norm": 3.3033500506802835, + "learning_rate": 3.947655699277197e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3228 + }, + { + "epoch": 0.3105255565706592, + "grad_norm": 1.9286130689366425, + "learning_rate": 3.9470337185178296e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3229 + }, + { + "epoch": 0.3106217242871568, + "grad_norm": 1.6581883449265942, + "learning_rate": 3.946411603035592e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3230 + }, + { + "epoch": 0.31071789200365435, + "grad_norm": 1.7450794458646577, + "learning_rate": 3.945789352888406e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3231 + }, + { + "epoch": 0.31081405972015197, + "grad_norm": 1.9062543240053784, + "learning_rate": 3.9451669681342034e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3232 + }, + { + "epoch": 0.31091022743664953, + "grad_norm": 3.5395615665119395, + "learning_rate": 3.944544448830931e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3233 + }, + { + "epoch": 0.3110063951531471, + "grad_norm": 3.894187348567796, + "learning_rate": 3.943921795036547e-06, + "loss": 0.1738, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3234 + }, + { + "epoch": 0.31110256286964466, + "grad_norm": 2.039473067243105, + "learning_rate": 3.9432990068090215e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3235 + }, + { + "epoch": 0.3111987305861422, + "grad_norm": 1.6573573804201271, + "learning_rate": 3.9426760842063385e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3236 + }, + { + "epoch": 0.3112948983026398, + "grad_norm": 1.9024300528264115, + "learning_rate": 3.9420530272864935e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3237 + }, + { + "epoch": 0.31139106601913735, + "grad_norm": 1.5326340632430848, + "learning_rate": 3.9414298361074956e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3238 + }, + { + "epoch": 0.31148723373563497, + "grad_norm": 1.3883119293343622, + "learning_rate": 3.940806510727364e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3239 + }, + { + "epoch": 0.31158340145213254, + "grad_norm": 2.2175106882795355, + "learning_rate": 3.940183051204133e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3240 + }, + { + "epoch": 0.3116795691686301, + "grad_norm": 1.9379747460699328, + "learning_rate": 3.939559457595849e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3241 + }, + { + "epoch": 0.31177573688512766, + "grad_norm": 1.5167413308928743, + "learning_rate": 3.938935729960569e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3242 + }, + { + "epoch": 0.3118719046016252, + "grad_norm": 3.041942160398312, + "learning_rate": 3.938311868356366e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3243 + }, + { + "epoch": 0.3119680723181228, + "grad_norm": 1.4811326688271864, + "learning_rate": 3.93768787284132e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3244 + }, + { + "epoch": 0.31206424003462035, + "grad_norm": 1.5722549521518574, + "learning_rate": 3.93706374347353e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3245 + }, + { + "epoch": 0.312160407751118, + "grad_norm": 1.497922424313909, + "learning_rate": 3.936439480311102e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3246 + }, + { + "epoch": 0.31225657546761554, + "grad_norm": 2.328377018880371, + "learning_rate": 3.935815083412156e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3247 + }, + { + "epoch": 0.3123527431841131, + "grad_norm": 1.6977750719630589, + "learning_rate": 3.9351905528348285e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3248 + }, + { + "epoch": 0.31244891090061067, + "grad_norm": 2.5165974030157487, + "learning_rate": 3.934565888637261e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3249 + }, + { + "epoch": 0.31254507861710823, + "grad_norm": 3.5538730184256706, + "learning_rate": 3.933941090877615e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3250 + }, + { + "epoch": 0.3126412463336058, + "grad_norm": 1.926230722294267, + "learning_rate": 3.93331615961406e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3251 + }, + { + "epoch": 0.31273741405010336, + "grad_norm": 2.524790084929846, + "learning_rate": 3.932691094904777e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3252 + }, + { + "epoch": 0.312833581766601, + "grad_norm": 2.233665706802587, + "learning_rate": 3.932065896807962e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3253 + }, + { + "epoch": 0.31292974948309854, + "grad_norm": 2.6433833202153507, + "learning_rate": 3.931440565381824e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3254 + }, + { + "epoch": 0.3130259171995961, + "grad_norm": 2.3742254090772783, + "learning_rate": 3.93081510068458e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3255 + }, + { + "epoch": 0.31312208491609367, + "grad_norm": 2.1271611068384404, + "learning_rate": 3.930189502774467e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3256 + }, + { + "epoch": 0.31321825263259123, + "grad_norm": 1.6330969464049647, + "learning_rate": 3.929563771709728e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3257 + }, + { + "epoch": 0.3133144203490888, + "grad_norm": 2.80464161571369, + "learning_rate": 3.928937907548619e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3258 + }, + { + "epoch": 0.31341058806558636, + "grad_norm": 3.0568794484094006, + "learning_rate": 3.928311910349411e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3259 + }, + { + "epoch": 0.313506755782084, + "grad_norm": 3.9862287266238043, + "learning_rate": 3.927685780170385e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3260 + }, + { + "epoch": 0.31360292349858154, + "grad_norm": 1.4760927715176748, + "learning_rate": 3.927059517069836e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3261 + }, + { + "epoch": 0.3136990912150791, + "grad_norm": 2.406296759910554, + "learning_rate": 3.926433121106072e-06, + "loss": 0.185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3262 + }, + { + "epoch": 0.31379525893157667, + "grad_norm": 1.39203832645767, + "learning_rate": 3.9258065923374104e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3263 + }, + { + "epoch": 0.31389142664807423, + "grad_norm": 1.5293710394640332, + "learning_rate": 3.9251799308221835e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3264 + }, + { + "epoch": 0.3139875943645718, + "grad_norm": 2.427841164465484, + "learning_rate": 3.924553136618736e-06, + "loss": 0.1644, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3265 + }, + { + "epoch": 0.31408376208106936, + "grad_norm": 2.2224330910804, + "learning_rate": 3.9239262097854235e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3266 + }, + { + "epoch": 0.314179929797567, + "grad_norm": 2.081023773019089, + "learning_rate": 3.923299150380615e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3267 + }, + { + "epoch": 0.31427609751406455, + "grad_norm": 1.6878564334090589, + "learning_rate": 3.922671958462691e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3268 + }, + { + "epoch": 0.3143722652305621, + "grad_norm": 1.5499849415113671, + "learning_rate": 3.922044634090044e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3269 + }, + { + "epoch": 0.3144684329470597, + "grad_norm": 2.237251358206742, + "learning_rate": 3.921417177321081e-06, + "loss": 0.1481, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3270 + }, + { + "epoch": 0.31456460066355724, + "grad_norm": 3.46397092419438, + "learning_rate": 3.9207895882142194e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3271 + }, + { + "epoch": 0.3146607683800548, + "grad_norm": 2.8975710989655963, + "learning_rate": 3.92016186682789e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3272 + }, + { + "epoch": 0.31475693609655236, + "grad_norm": 1.9415776100877389, + "learning_rate": 3.919534013220535e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3273 + }, + { + "epoch": 0.31485310381305, + "grad_norm": 1.630928130199589, + "learning_rate": 3.9189060274506095e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3274 + }, + { + "epoch": 0.31494927152954755, + "grad_norm": 1.576447214300854, + "learning_rate": 3.91827790957658e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3275 + }, + { + "epoch": 0.3150454392460451, + "grad_norm": 2.0898190325018775, + "learning_rate": 3.917649659656927e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3276 + }, + { + "epoch": 0.3151416069625427, + "grad_norm": 1.5223290326400927, + "learning_rate": 3.9170212777501425e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3277 + }, + { + "epoch": 0.31523777467904024, + "grad_norm": 1.3582691710311525, + "learning_rate": 3.916392763914729e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3278 + }, + { + "epoch": 0.3153339423955378, + "grad_norm": 1.657730917271063, + "learning_rate": 3.915764118209205e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3279 + }, + { + "epoch": 0.31543011011203537, + "grad_norm": 2.7144596724467687, + "learning_rate": 3.915135340692098e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3280 + }, + { + "epoch": 0.315526277828533, + "grad_norm": 1.6207273162360865, + "learning_rate": 3.914506431421948e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3281 + }, + { + "epoch": 0.31562244554503055, + "grad_norm": 1.7488704431977486, + "learning_rate": 3.91387739045731e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3282 + }, + { + "epoch": 0.3157186132615281, + "grad_norm": 2.073418226001628, + "learning_rate": 3.913248217856748e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3283 + }, + { + "epoch": 0.3158147809780257, + "grad_norm": 1.732776816551614, + "learning_rate": 3.912618913678842e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3284 + }, + { + "epoch": 0.31591094869452324, + "grad_norm": 1.4915433100771212, + "learning_rate": 3.911989477982179e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3285 + }, + { + "epoch": 0.3160071164110208, + "grad_norm": 1.801864823089328, + "learning_rate": 3.911359910825363e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3286 + }, + { + "epoch": 0.31610328412751837, + "grad_norm": 1.793406335659566, + "learning_rate": 3.910730212267009e-06, + "loss": 0.1091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3287 + }, + { + "epoch": 0.316199451844016, + "grad_norm": 1.591280361500901, + "learning_rate": 3.910100382365741e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3288 + }, + { + "epoch": 0.31629561956051355, + "grad_norm": 2.49280374280744, + "learning_rate": 3.909470421180202e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3289 + }, + { + "epoch": 0.3163917872770111, + "grad_norm": 3.1179134584730894, + "learning_rate": 3.908840328769039e-06, + "loss": 0.1866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3290 + }, + { + "epoch": 0.3164879549935087, + "grad_norm": 1.9797257240109918, + "learning_rate": 3.908210105190917e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3291 + }, + { + "epoch": 0.31658412271000624, + "grad_norm": 2.3939866194029475, + "learning_rate": 3.907579750504513e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3292 + }, + { + "epoch": 0.3166802904265038, + "grad_norm": 2.020024202108835, + "learning_rate": 3.906949264768513e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3293 + }, + { + "epoch": 0.31677645814300137, + "grad_norm": 2.7578070569246207, + "learning_rate": 3.906318648041617e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3294 + }, + { + "epoch": 0.316872625859499, + "grad_norm": 1.518063459015049, + "learning_rate": 3.905687900382539e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3295 + }, + { + "epoch": 0.31696879357599655, + "grad_norm": 2.0858004878226595, + "learning_rate": 3.905057021850001e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3296 + }, + { + "epoch": 0.3170649612924941, + "grad_norm": 1.533392450223872, + "learning_rate": 3.9044260125027405e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3297 + }, + { + "epoch": 0.3171611290089917, + "grad_norm": 1.9733979274373374, + "learning_rate": 3.903794872399506e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3298 + }, + { + "epoch": 0.31725729672548925, + "grad_norm": 1.664641989623424, + "learning_rate": 3.90316360159906e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3299 + }, + { + "epoch": 0.3173534644419868, + "grad_norm": 1.5747939156259676, + "learning_rate": 3.902532200160174e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3300 + }, + { + "epoch": 0.3174496321584844, + "grad_norm": 2.164155940441642, + "learning_rate": 3.901900668141633e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3301 + }, + { + "epoch": 0.317545799874982, + "grad_norm": 1.8965429541483987, + "learning_rate": 3.901269005602235e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3302 + }, + { + "epoch": 0.31764196759147956, + "grad_norm": 2.3629434393885043, + "learning_rate": 3.90063721260079e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3303 + }, + { + "epoch": 0.3177381353079771, + "grad_norm": 2.0089126696398383, + "learning_rate": 3.900005289196119e-06, + "loss": 0.1691, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3304 + }, + { + "epoch": 0.3178343030244747, + "grad_norm": 1.8772421879784893, + "learning_rate": 3.899373235447056e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3305 + }, + { + "epoch": 0.31793047074097225, + "grad_norm": 3.030587970461331, + "learning_rate": 3.898741051412446e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3306 + }, + { + "epoch": 0.3180266384574698, + "grad_norm": 2.1407618094431586, + "learning_rate": 3.8981087371511495e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3307 + }, + { + "epoch": 0.3181228061739674, + "grad_norm": 3.5464103722109503, + "learning_rate": 3.897476292722034e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3308 + }, + { + "epoch": 0.318218973890465, + "grad_norm": 2.566887835703098, + "learning_rate": 3.896843718183983e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3309 + }, + { + "epoch": 0.31831514160696256, + "grad_norm": 3.5563626267002575, + "learning_rate": 3.896211013595893e-06, + "loss": 0.1617, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3310 + }, + { + "epoch": 0.3184113093234601, + "grad_norm": 1.6039079850061702, + "learning_rate": 3.895578179016667e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3311 + }, + { + "epoch": 0.3185074770399577, + "grad_norm": 1.7055746285680198, + "learning_rate": 3.894945214505226e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3312 + }, + { + "epoch": 0.31860364475645525, + "grad_norm": 2.0501804169431805, + "learning_rate": 3.894312120120499e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3313 + }, + { + "epoch": 0.3186998124729528, + "grad_norm": 1.4750942907155746, + "learning_rate": 3.89367889592143e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3314 + }, + { + "epoch": 0.3187959801894504, + "grad_norm": 1.5133471819131206, + "learning_rate": 3.893045541966975e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3315 + }, + { + "epoch": 0.318892147905948, + "grad_norm": 1.7434539523640515, + "learning_rate": 3.892412058316099e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3316 + }, + { + "epoch": 0.31898831562244556, + "grad_norm": 2.1094619190936514, + "learning_rate": 3.891778445027782e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3317 + }, + { + "epoch": 0.3190844833389431, + "grad_norm": 2.199030716173122, + "learning_rate": 3.891144702161015e-06, + "loss": 0.1714, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3318 + }, + { + "epoch": 0.3191806510554407, + "grad_norm": 2.1020846033569436, + "learning_rate": 3.890510829774802e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3319 + }, + { + "epoch": 0.31927681877193825, + "grad_norm": 1.8701123155270294, + "learning_rate": 3.889876827928156e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3320 + }, + { + "epoch": 0.3193729864884358, + "grad_norm": 3.1679437728071003, + "learning_rate": 3.889242696680108e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3321 + }, + { + "epoch": 0.3194691542049334, + "grad_norm": 1.8390648671749426, + "learning_rate": 3.888608436089694e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3322 + }, + { + "epoch": 0.319565321921431, + "grad_norm": 1.6250607996757074, + "learning_rate": 3.887974046215968e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3323 + }, + { + "epoch": 0.31966148963792856, + "grad_norm": 1.7954376695425187, + "learning_rate": 3.88733952711799e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3324 + }, + { + "epoch": 0.31975765735442613, + "grad_norm": 1.53352062035147, + "learning_rate": 3.88670487885484e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3325 + }, + { + "epoch": 0.3198538250709237, + "grad_norm": 2.7796606936419885, + "learning_rate": 3.886070101485602e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3326 + }, + { + "epoch": 0.31994999278742126, + "grad_norm": 1.368717982759322, + "learning_rate": 3.885435195069377e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3327 + }, + { + "epoch": 0.3200461605039188, + "grad_norm": 2.4131948993932117, + "learning_rate": 3.8848001596652765e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3328 + }, + { + "epoch": 0.3201423282204164, + "grad_norm": 1.7674644213658672, + "learning_rate": 3.884164995332423e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3329 + }, + { + "epoch": 0.320238495936914, + "grad_norm": 1.6505323476977785, + "learning_rate": 3.883529702129954e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3330 + }, + { + "epoch": 0.32033466365341157, + "grad_norm": 1.6260363779543208, + "learning_rate": 3.882894280117015e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3331 + }, + { + "epoch": 0.32043083136990913, + "grad_norm": 1.6689420738525296, + "learning_rate": 3.882258729352768e-06, + "loss": 0.1558, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3332 + }, + { + "epoch": 0.3205269990864067, + "grad_norm": 1.6997421909809893, + "learning_rate": 3.881623049896382e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3333 + }, + { + "epoch": 0.32062316680290426, + "grad_norm": 1.675740619129593, + "learning_rate": 3.880987241807042e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3334 + }, + { + "epoch": 0.3207193345194018, + "grad_norm": 3.316518414011548, + "learning_rate": 3.880351305143945e-06, + "loss": 0.1672, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3335 + }, + { + "epoch": 0.3208155022358994, + "grad_norm": 3.0359136109263636, + "learning_rate": 3.879715239966294e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3336 + }, + { + "epoch": 0.320911669952397, + "grad_norm": 1.4068466281485537, + "learning_rate": 3.879079046333313e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3337 + }, + { + "epoch": 0.32100783766889457, + "grad_norm": 1.880479224039766, + "learning_rate": 3.87844272430423e-06, + "loss": 0.1629, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3338 + }, + { + "epoch": 0.32110400538539213, + "grad_norm": 1.6979252028796719, + "learning_rate": 3.87780627393829e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3339 + }, + { + "epoch": 0.3212001731018897, + "grad_norm": 2.008726837100485, + "learning_rate": 3.877169695294749e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3340 + }, + { + "epoch": 0.32129634081838726, + "grad_norm": 1.6943366675121712, + "learning_rate": 3.876532988432873e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3341 + }, + { + "epoch": 0.3213925085348848, + "grad_norm": 3.9103980551011324, + "learning_rate": 3.875896153411941e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3342 + }, + { + "epoch": 0.3214886762513824, + "grad_norm": 2.381790663144979, + "learning_rate": 3.875259190291245e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3343 + }, + { + "epoch": 0.32158484396788, + "grad_norm": 1.8405960003663842, + "learning_rate": 3.874622099130087e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3344 + }, + { + "epoch": 0.32168101168437757, + "grad_norm": 1.503024188967063, + "learning_rate": 3.873984879987784e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3345 + }, + { + "epoch": 0.32177717940087514, + "grad_norm": 4.345668962311327, + "learning_rate": 3.873347532923661e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3346 + }, + { + "epoch": 0.3218733471173727, + "grad_norm": 4.361095459479599, + "learning_rate": 3.8727100579970575e-06, + "loss": 0.1741, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3347 + }, + { + "epoch": 0.32196951483387026, + "grad_norm": 4.168483416014258, + "learning_rate": 3.872072455267324e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3348 + }, + { + "epoch": 0.3220656825503678, + "grad_norm": 3.883786768562271, + "learning_rate": 3.871434724793823e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3349 + }, + { + "epoch": 0.3221618502668654, + "grad_norm": 1.6569998993013313, + "learning_rate": 3.87079686663593e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3350 + }, + { + "epoch": 0.322258017983363, + "grad_norm": 2.801784580707976, + "learning_rate": 3.87015888085303e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3351 + }, + { + "epoch": 0.3223541856998606, + "grad_norm": 1.8297501058712002, + "learning_rate": 3.869520767504521e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3352 + }, + { + "epoch": 0.32245035341635814, + "grad_norm": 1.3561502363967028, + "learning_rate": 3.868882526649814e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3353 + }, + { + "epoch": 0.3225465211328557, + "grad_norm": 2.0983028089391973, + "learning_rate": 3.868244158348331e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3354 + }, + { + "epoch": 0.32264268884935327, + "grad_norm": 1.333482417888719, + "learning_rate": 3.867605662659506e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3355 + }, + { + "epoch": 0.32273885656585083, + "grad_norm": 1.9627287854654762, + "learning_rate": 3.8669670396427845e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3356 + }, + { + "epoch": 0.3228350242823484, + "grad_norm": 1.6345723005408266, + "learning_rate": 3.866328289357623e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3357 + }, + { + "epoch": 0.322931191998846, + "grad_norm": 1.1517016554991593, + "learning_rate": 3.865689411863493e-06, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3358 + }, + { + "epoch": 0.3230273597153436, + "grad_norm": 1.3414415906569945, + "learning_rate": 3.865050407219875e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3359 + }, + { + "epoch": 0.32312352743184114, + "grad_norm": 1.827227363863631, + "learning_rate": 3.8644112754862614e-06, + "loss": 0.0932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3360 + }, + { + "epoch": 0.3232196951483387, + "grad_norm": 1.8975546652765443, + "learning_rate": 3.8637720167221586e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3361 + }, + { + "epoch": 0.32331586286483627, + "grad_norm": 1.7667641247122667, + "learning_rate": 3.863132630987081e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3362 + }, + { + "epoch": 0.32341203058133383, + "grad_norm": 1.7168167624798099, + "learning_rate": 3.862493118340559e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3363 + }, + { + "epoch": 0.3235081982978314, + "grad_norm": 3.0395865610229427, + "learning_rate": 3.861853478842132e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3364 + }, + { + "epoch": 0.323604366014329, + "grad_norm": 4.043319523926623, + "learning_rate": 3.861213712551353e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3365 + }, + { + "epoch": 0.3237005337308266, + "grad_norm": 2.8631903772340586, + "learning_rate": 3.860573819527788e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3366 + }, + { + "epoch": 0.32379670144732414, + "grad_norm": 2.868523368186496, + "learning_rate": 3.859933799831008e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3367 + }, + { + "epoch": 0.3238928691638217, + "grad_norm": 2.4117781220968935, + "learning_rate": 3.8592936535206044e-06, + "loss": 0.1684, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3368 + }, + { + "epoch": 0.32398903688031927, + "grad_norm": 2.1677473497253583, + "learning_rate": 3.858653380656175e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3369 + }, + { + "epoch": 0.32408520459681683, + "grad_norm": 2.0777936515063358, + "learning_rate": 3.858012981297332e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3370 + }, + { + "epoch": 0.3241813723133144, + "grad_norm": 2.451332372805943, + "learning_rate": 3.857372455503698e-06, + "loss": 0.1817, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3371 + }, + { + "epoch": 0.324277540029812, + "grad_norm": 1.8274149584525532, + "learning_rate": 3.856731803334906e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3372 + }, + { + "epoch": 0.3243737077463096, + "grad_norm": 1.5682268537634472, + "learning_rate": 3.856091024850605e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3373 + }, + { + "epoch": 0.32446987546280714, + "grad_norm": 2.8432284431229835, + "learning_rate": 3.855450120110452e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3374 + }, + { + "epoch": 0.3245660431793047, + "grad_norm": 2.4605688204080987, + "learning_rate": 3.854809089174119e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3375 + }, + { + "epoch": 0.3246622108958023, + "grad_norm": 3.2299612690379247, + "learning_rate": 3.8541679321012836e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3376 + }, + { + "epoch": 0.32475837861229984, + "grad_norm": 1.3900451468082378, + "learning_rate": 3.853526648951643e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3377 + }, + { + "epoch": 0.3248545463287974, + "grad_norm": 1.533417162559959, + "learning_rate": 3.852885239784902e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3378 + }, + { + "epoch": 0.324950714045295, + "grad_norm": 3.562088783735683, + "learning_rate": 3.852243704660777e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3379 + }, + { + "epoch": 0.3250468817617926, + "grad_norm": 1.4583186490917597, + "learning_rate": 3.8516020436389945e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3380 + }, + { + "epoch": 0.32514304947829015, + "grad_norm": 1.7552816425615982, + "learning_rate": 3.850960256779298e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3381 + }, + { + "epoch": 0.3252392171947877, + "grad_norm": 2.442376276625991, + "learning_rate": 3.850318344141439e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3382 + }, + { + "epoch": 0.3253353849112853, + "grad_norm": 4.136280514509425, + "learning_rate": 3.8496763057851806e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3383 + }, + { + "epoch": 0.32543155262778284, + "grad_norm": 1.8863946859918195, + "learning_rate": 3.8490341417702985e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3384 + }, + { + "epoch": 0.3255277203442804, + "grad_norm": 1.451787539371319, + "learning_rate": 3.848391852156581e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3385 + }, + { + "epoch": 0.325623888060778, + "grad_norm": 1.6871552116097583, + "learning_rate": 3.8477494370038245e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3386 + }, + { + "epoch": 0.3257200557772756, + "grad_norm": 1.5963766734406297, + "learning_rate": 3.847106896371843e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3387 + }, + { + "epoch": 0.32581622349377315, + "grad_norm": 2.548289625122867, + "learning_rate": 3.846464230320457e-06, + "loss": 0.1884, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3388 + }, + { + "epoch": 0.3259123912102707, + "grad_norm": 1.810538196155107, + "learning_rate": 3.8458214389095005e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3389 + }, + { + "epoch": 0.3260085589267683, + "grad_norm": 3.241750871109343, + "learning_rate": 3.845178522198819e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3390 + }, + { + "epoch": 0.32610472664326584, + "grad_norm": 1.2728381871409447, + "learning_rate": 3.844535480248271e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3391 + }, + { + "epoch": 0.3262008943597634, + "grad_norm": 1.5488491363401007, + "learning_rate": 3.843892313117724e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3392 + }, + { + "epoch": 0.326297062076261, + "grad_norm": 1.5607184203585502, + "learning_rate": 3.8432490208670605e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3393 + }, + { + "epoch": 0.3263932297927586, + "grad_norm": 1.5036930827072754, + "learning_rate": 3.84260560355617e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3394 + }, + { + "epoch": 0.32648939750925615, + "grad_norm": 1.8606039487155044, + "learning_rate": 3.8419620612449595e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3395 + }, + { + "epoch": 0.3265855652257537, + "grad_norm": 1.8447447931987573, + "learning_rate": 3.841318393993342e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3396 + }, + { + "epoch": 0.3266817329422513, + "grad_norm": 1.6060219568982275, + "learning_rate": 3.840674601861247e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3397 + }, + { + "epoch": 0.32677790065874884, + "grad_norm": 1.7272184975070561, + "learning_rate": 3.840030684908611e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3398 + }, + { + "epoch": 0.3268740683752464, + "grad_norm": 2.1296733539008925, + "learning_rate": 3.839386643195388e-06, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3399 + }, + { + "epoch": 0.326970236091744, + "grad_norm": 2.2347607805262437, + "learning_rate": 3.838742476781535e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3400 + }, + { + "epoch": 0.3270664038082416, + "grad_norm": 2.5760422688012925, + "learning_rate": 3.83809818572703e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3401 + }, + { + "epoch": 0.32716257152473915, + "grad_norm": 2.881943200918519, + "learning_rate": 3.8374537700918555e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3402 + }, + { + "epoch": 0.3272587392412367, + "grad_norm": 2.0313609179367424, + "learning_rate": 3.836809229936011e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3403 + }, + { + "epoch": 0.3273549069577343, + "grad_norm": 2.323511549767307, + "learning_rate": 3.836164565319503e-06, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3404 + }, + { + "epoch": 0.32745107467423185, + "grad_norm": 1.7396223881438762, + "learning_rate": 3.835519776302352e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3405 + }, + { + "epoch": 0.3275472423907294, + "grad_norm": 3.639254536819985, + "learning_rate": 3.834874862944591e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3406 + }, + { + "epoch": 0.32764341010722703, + "grad_norm": 4.091205106630501, + "learning_rate": 3.834229825306261e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3407 + }, + { + "epoch": 0.3277395778237246, + "grad_norm": 4.7852133841464655, + "learning_rate": 3.833584663447418e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3408 + }, + { + "epoch": 0.32783574554022216, + "grad_norm": 4.07935097750167, + "learning_rate": 3.832939377428129e-06, + "loss": 0.1667, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3409 + }, + { + "epoch": 0.3279319132567197, + "grad_norm": 1.8098349420666746, + "learning_rate": 3.83229396730847e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3410 + }, + { + "epoch": 0.3280280809732173, + "grad_norm": 1.7448464846793896, + "learning_rate": 3.831648433148533e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3411 + }, + { + "epoch": 0.32812424868971485, + "grad_norm": 3.139581639008674, + "learning_rate": 3.831002775008417e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3412 + }, + { + "epoch": 0.3282204164062124, + "grad_norm": 5.010695234925121, + "learning_rate": 3.830356992948234e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3413 + }, + { + "epoch": 0.32831658412271003, + "grad_norm": 5.733767819848105, + "learning_rate": 3.829711087028111e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3414 + }, + { + "epoch": 0.3284127518392076, + "grad_norm": 2.9922780951463666, + "learning_rate": 3.829065057308182e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3415 + }, + { + "epoch": 0.32850891955570516, + "grad_norm": 1.3880914318354887, + "learning_rate": 3.828418903848593e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3416 + }, + { + "epoch": 0.3286050872722027, + "grad_norm": 2.4211694278922806, + "learning_rate": 3.827772626709505e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3417 + }, + { + "epoch": 0.3287012549887003, + "grad_norm": 4.98945177601356, + "learning_rate": 3.827126225951087e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3418 + }, + { + "epoch": 0.32879742270519785, + "grad_norm": 2.9336391516760267, + "learning_rate": 3.82647970163352e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3419 + }, + { + "epoch": 0.3288935904216954, + "grad_norm": 1.5318796211221382, + "learning_rate": 3.825833053816998e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3420 + }, + { + "epoch": 0.32898975813819303, + "grad_norm": 1.7870518006962053, + "learning_rate": 3.825186282561727e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3421 + }, + { + "epoch": 0.3290859258546906, + "grad_norm": 1.6457005103915472, + "learning_rate": 3.824539387927921e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3422 + }, + { + "epoch": 0.32918209357118816, + "grad_norm": 2.940198105716714, + "learning_rate": 3.823892369975808e-06, + "loss": 0.1592, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3423 + }, + { + "epoch": 0.3292782612876857, + "grad_norm": 2.7294283916184585, + "learning_rate": 3.823245228765628e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3424 + }, + { + "epoch": 0.3293744290041833, + "grad_norm": 1.7003877944841186, + "learning_rate": 3.822597964357632e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3425 + }, + { + "epoch": 0.32947059672068085, + "grad_norm": 1.8768820718320247, + "learning_rate": 3.821950576812081e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3426 + }, + { + "epoch": 0.3295667644371784, + "grad_norm": 1.9340782724795846, + "learning_rate": 3.82130306618925e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3427 + }, + { + "epoch": 0.32966293215367604, + "grad_norm": 3.49663721108772, + "learning_rate": 3.820655432549423e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3428 + }, + { + "epoch": 0.3297590998701736, + "grad_norm": 1.881113695879017, + "learning_rate": 3.820007675952896e-06, + "loss": 0.1797, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3429 + }, + { + "epoch": 0.32985526758667116, + "grad_norm": 2.1244163175213866, + "learning_rate": 3.819359796459978e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3430 + }, + { + "epoch": 0.3299514353031687, + "grad_norm": 2.361906068217545, + "learning_rate": 3.818711794130988e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3431 + }, + { + "epoch": 0.3300476030196663, + "grad_norm": 2.056054790015113, + "learning_rate": 3.8180636690262565e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3432 + }, + { + "epoch": 0.33014377073616386, + "grad_norm": 4.5943563775019065, + "learning_rate": 3.817415421206126e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3433 + }, + { + "epoch": 0.3302399384526614, + "grad_norm": 1.4226068877518472, + "learning_rate": 3.816767050730951e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3434 + }, + { + "epoch": 0.33033610616915904, + "grad_norm": 1.8511427720188707, + "learning_rate": 3.816118557661095e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3435 + }, + { + "epoch": 0.3304322738856566, + "grad_norm": 2.037829232553068, + "learning_rate": 3.815469942056936e-06, + "loss": 0.1711, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3436 + }, + { + "epoch": 0.33052844160215417, + "grad_norm": 3.1068338128055784, + "learning_rate": 3.81482120397886e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3437 + }, + { + "epoch": 0.33062460931865173, + "grad_norm": 1.8427637304342055, + "learning_rate": 3.8141723434872692e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3438 + }, + { + "epoch": 0.3307207770351493, + "grad_norm": 1.4721780237209685, + "learning_rate": 3.813523360642572e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3439 + }, + { + "epoch": 0.33081694475164686, + "grad_norm": 1.579436885743819, + "learning_rate": 3.812874255505191e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3440 + }, + { + "epoch": 0.3309131124681444, + "grad_norm": 1.3353879994296822, + "learning_rate": 3.8122250281355607e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3441 + }, + { + "epoch": 0.33100928018464204, + "grad_norm": 1.2525824150822993, + "learning_rate": 3.811575678594124e-06, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3442 + }, + { + "epoch": 0.3311054479011396, + "grad_norm": 1.4501454039282826, + "learning_rate": 3.810926206941339e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3443 + }, + { + "epoch": 0.33120161561763717, + "grad_norm": 1.7571661178181932, + "learning_rate": 3.8102766132376727e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3444 + }, + { + "epoch": 0.33129778333413473, + "grad_norm": 2.1010742862237266, + "learning_rate": 3.8096268975436045e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3445 + }, + { + "epoch": 0.3313939510506323, + "grad_norm": 2.4559669294677366, + "learning_rate": 3.8089770599196234e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3446 + }, + { + "epoch": 0.33149011876712986, + "grad_norm": 2.104217638011683, + "learning_rate": 3.808327100426233e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3447 + }, + { + "epoch": 0.3315862864836274, + "grad_norm": 2.1496888174347775, + "learning_rate": 3.8076770191239444e-06, + "loss": 0.1715, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3448 + }, + { + "epoch": 0.33168245420012504, + "grad_norm": 1.665934454230772, + "learning_rate": 3.8070268160732836e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3449 + }, + { + "epoch": 0.3317786219166226, + "grad_norm": 1.9272273917453508, + "learning_rate": 3.8063764913347843e-06, + "loss": 0.1413, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3450 + }, + { + "epoch": 0.33187478963312017, + "grad_norm": 1.7018054719948639, + "learning_rate": 3.805726044968996e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3451 + }, + { + "epoch": 0.33197095734961773, + "grad_norm": 2.744949502649936, + "learning_rate": 3.8050754770364763e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3452 + }, + { + "epoch": 0.3320671250661153, + "grad_norm": 2.0346276672535546, + "learning_rate": 3.8044247875977937e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3453 + }, + { + "epoch": 0.33216329278261286, + "grad_norm": 1.6717146094114717, + "learning_rate": 3.8037739767135295e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3454 + }, + { + "epoch": 0.3322594604991104, + "grad_norm": 1.535600994086691, + "learning_rate": 3.803123044444278e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3455 + }, + { + "epoch": 0.33235562821560805, + "grad_norm": 2.364829618991945, + "learning_rate": 3.8024719908506403e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3456 + }, + { + "epoch": 0.3324517959321056, + "grad_norm": 2.675752827046769, + "learning_rate": 3.8018208159932325e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3457 + }, + { + "epoch": 0.3325479636486032, + "grad_norm": 1.7351880229558918, + "learning_rate": 3.801169519932681e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3458 + }, + { + "epoch": 0.33264413136510074, + "grad_norm": 1.368301308608558, + "learning_rate": 3.8005181027296224e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3459 + }, + { + "epoch": 0.3327402990815983, + "grad_norm": 1.5621821201655994, + "learning_rate": 3.7998665644447064e-06, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3460 + }, + { + "epoch": 0.33283646679809586, + "grad_norm": 1.4211852530716724, + "learning_rate": 3.7992149051385925e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3461 + }, + { + "epoch": 0.33293263451459343, + "grad_norm": 1.3493065189932378, + "learning_rate": 3.7985631248719522e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3462 + }, + { + "epoch": 0.33302880223109105, + "grad_norm": 3.2600486428427042, + "learning_rate": 3.7979112237054673e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3463 + }, + { + "epoch": 0.3331249699475886, + "grad_norm": 1.7925200890610584, + "learning_rate": 3.797259201699833e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3464 + }, + { + "epoch": 0.3332211376640862, + "grad_norm": 1.5305557034551862, + "learning_rate": 3.7966070589157533e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3465 + }, + { + "epoch": 0.33331730538058374, + "grad_norm": 1.9100150077881293, + "learning_rate": 3.7959547954139448e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3466 + }, + { + "epoch": 0.3334134730970813, + "grad_norm": 2.625048425204003, + "learning_rate": 3.795302411255135e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3467 + }, + { + "epoch": 0.33350964081357887, + "grad_norm": 2.2978260803866997, + "learning_rate": 3.7946499065000625e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3468 + }, + { + "epoch": 0.33360580853007643, + "grad_norm": 3.3966807080223465, + "learning_rate": 3.7939972812094782e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3469 + }, + { + "epoch": 0.33370197624657405, + "grad_norm": 3.1474605708703822, + "learning_rate": 3.793344535444142e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3470 + }, + { + "epoch": 0.3337981439630716, + "grad_norm": 3.67471369620293, + "learning_rate": 3.7926916692648273e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3471 + }, + { + "epoch": 0.3338943116795692, + "grad_norm": 6.258001866504819, + "learning_rate": 3.7920386827323186e-06, + "loss": 0.166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3472 + }, + { + "epoch": 0.33399047939606674, + "grad_norm": 4.265896666503928, + "learning_rate": 3.791385575907408e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3473 + }, + { + "epoch": 0.3340866471125643, + "grad_norm": 2.3219156838867696, + "learning_rate": 3.7907323488509047e-06, + "loss": 0.1805, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3474 + }, + { + "epoch": 0.33418281482906187, + "grad_norm": 1.650308435570107, + "learning_rate": 3.7900790016236232e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3475 + }, + { + "epoch": 0.33427898254555943, + "grad_norm": 2.2681845047868663, + "learning_rate": 3.789425534286394e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3476 + }, + { + "epoch": 0.33437515026205705, + "grad_norm": 1.4887954250773836, + "learning_rate": 3.788771946900056e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3477 + }, + { + "epoch": 0.3344713179785546, + "grad_norm": 2.4650673492280806, + "learning_rate": 3.7881182395254594e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3478 + }, + { + "epoch": 0.3345674856950522, + "grad_norm": 3.7989701741785766, + "learning_rate": 3.787464412223468e-06, + "loss": 0.1778, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3479 + }, + { + "epoch": 0.33466365341154974, + "grad_norm": 2.5333878653823536, + "learning_rate": 3.786810465054953e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3480 + }, + { + "epoch": 0.3347598211280473, + "grad_norm": 2.7136390085288675, + "learning_rate": 3.786156398080799e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3481 + }, + { + "epoch": 0.33485598884454487, + "grad_norm": 2.1944515815405587, + "learning_rate": 3.785502211361902e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3482 + }, + { + "epoch": 0.33495215656104244, + "grad_norm": 2.2414388687395417, + "learning_rate": 3.784847904959169e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3483 + }, + { + "epoch": 0.33504832427754005, + "grad_norm": 2.7026481163734792, + "learning_rate": 3.7841934789335167e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3484 + }, + { + "epoch": 0.3351444919940376, + "grad_norm": 2.2875266652695947, + "learning_rate": 3.783538933345874e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3485 + }, + { + "epoch": 0.3352406597105352, + "grad_norm": 2.832120720787964, + "learning_rate": 3.782884268257182e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3486 + }, + { + "epoch": 0.33533682742703275, + "grad_norm": 3.9643065521134124, + "learning_rate": 3.7822294837283915e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3487 + }, + { + "epoch": 0.3354329951435303, + "grad_norm": 4.68332064722307, + "learning_rate": 3.7815745798204646e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3488 + }, + { + "epoch": 0.3355291628600279, + "grad_norm": 4.362817533993084, + "learning_rate": 3.780919556594374e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3489 + }, + { + "epoch": 0.33562533057652544, + "grad_norm": 1.8757843408563293, + "learning_rate": 3.780264414111105e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3490 + }, + { + "epoch": 0.33572149829302306, + "grad_norm": 1.572667786905571, + "learning_rate": 3.7796091524316534e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3491 + }, + { + "epoch": 0.3358176660095206, + "grad_norm": 4.26108629208917, + "learning_rate": 3.7789537716170257e-06, + "loss": 0.1701, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3492 + }, + { + "epoch": 0.3359138337260182, + "grad_norm": 5.741227846530718, + "learning_rate": 3.778298271728238e-06, + "loss": 0.2191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3493 + }, + { + "epoch": 0.33601000144251575, + "grad_norm": 4.077494037877225, + "learning_rate": 3.7776426528263223e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3494 + }, + { + "epoch": 0.3361061691590133, + "grad_norm": 2.4484578495393707, + "learning_rate": 3.776986914972316e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3495 + }, + { + "epoch": 0.3362023368755109, + "grad_norm": 1.6221920027682255, + "learning_rate": 3.776331058227271e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3496 + }, + { + "epoch": 0.33629850459200844, + "grad_norm": 3.0602466218762827, + "learning_rate": 3.77567508265225e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3497 + }, + { + "epoch": 0.33639467230850606, + "grad_norm": 2.156749811334613, + "learning_rate": 3.7750189883083244e-06, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3498 + }, + { + "epoch": 0.3364908400250036, + "grad_norm": 1.7319303220702862, + "learning_rate": 3.7743627752565802e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3499 + }, + { + "epoch": 0.3365870077415012, + "grad_norm": 1.8992543842237517, + "learning_rate": 3.773706443558112e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3500 + }, + { + "epoch": 0.33668317545799875, + "grad_norm": 1.8999321623821177, + "learning_rate": 3.7730499932740263e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3501 + }, + { + "epoch": 0.3367793431744963, + "grad_norm": 1.947895904541882, + "learning_rate": 3.77239342446544e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3502 + }, + { + "epoch": 0.3368755108909939, + "grad_norm": 2.368457336347278, + "learning_rate": 3.771736737193481e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3503 + }, + { + "epoch": 0.33697167860749144, + "grad_norm": 3.1012675343887612, + "learning_rate": 3.7710799315192904e-06, + "loss": 0.1781, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3504 + }, + { + "epoch": 0.33706784632398906, + "grad_norm": 1.6913968415679321, + "learning_rate": 3.7704230075040177e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3505 + }, + { + "epoch": 0.3371640140404866, + "grad_norm": 4.365446722279089, + "learning_rate": 3.7697659652088237e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3506 + }, + { + "epoch": 0.3372601817569842, + "grad_norm": 1.1814669086800493, + "learning_rate": 3.769108804694882e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3507 + }, + { + "epoch": 0.33735634947348175, + "grad_norm": 4.476102305603176, + "learning_rate": 3.7684515260233757e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3508 + }, + { + "epoch": 0.3374525171899793, + "grad_norm": 3.6736483240391262, + "learning_rate": 3.7677941292554985e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3509 + }, + { + "epoch": 0.3375486849064769, + "grad_norm": 2.3445453211620175, + "learning_rate": 3.767136614452458e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3510 + }, + { + "epoch": 0.33764485262297445, + "grad_norm": 3.1689445372151277, + "learning_rate": 3.766478981675468e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3511 + }, + { + "epoch": 0.33774102033947206, + "grad_norm": 4.130364970769895, + "learning_rate": 3.7658212309857576e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3512 + }, + { + "epoch": 0.33783718805596963, + "grad_norm": 2.7561298545969715, + "learning_rate": 3.765163362444564e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3513 + }, + { + "epoch": 0.3379333557724672, + "grad_norm": 3.5520213620406342, + "learning_rate": 3.7645053761131383e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3514 + }, + { + "epoch": 0.33802952348896476, + "grad_norm": 1.596448867697678, + "learning_rate": 3.76384727205274e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3515 + }, + { + "epoch": 0.3381256912054623, + "grad_norm": 1.5230898482020936, + "learning_rate": 3.7631890503246395e-06, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3516 + }, + { + "epoch": 0.3382218589219599, + "grad_norm": 4.298300228437727, + "learning_rate": 3.76253071099012e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3517 + }, + { + "epoch": 0.33831802663845745, + "grad_norm": 2.5777152809324537, + "learning_rate": 3.7618722541104746e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3518 + }, + { + "epoch": 0.33841419435495507, + "grad_norm": 2.772643643798645, + "learning_rate": 3.7612136797470084e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3519 + }, + { + "epoch": 0.33851036207145263, + "grad_norm": 1.8692274604846095, + "learning_rate": 3.7605549879610346e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3520 + }, + { + "epoch": 0.3386065297879502, + "grad_norm": 1.7531208084703678, + "learning_rate": 3.7598961788138806e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3521 + }, + { + "epoch": 0.33870269750444776, + "grad_norm": 1.8960194936772792, + "learning_rate": 3.759237252366883e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3522 + }, + { + "epoch": 0.3387988652209453, + "grad_norm": 2.594643564341, + "learning_rate": 3.7585782086813898e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3523 + }, + { + "epoch": 0.3388950329374429, + "grad_norm": 2.0465061361814048, + "learning_rate": 3.75791904781876e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3524 + }, + { + "epoch": 0.33899120065394045, + "grad_norm": 1.9574383882816764, + "learning_rate": 3.7572597698403622e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3525 + }, + { + "epoch": 0.33908736837043807, + "grad_norm": 1.54579125206169, + "learning_rate": 3.7566003748075786e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3526 + }, + { + "epoch": 0.33918353608693563, + "grad_norm": 1.867485033466897, + "learning_rate": 3.7559408627818e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3527 + }, + { + "epoch": 0.3392797038034332, + "grad_norm": 2.0453772333764197, + "learning_rate": 3.755281233824428e-06, + "loss": 0.1861, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3528 + }, + { + "epoch": 0.33937587151993076, + "grad_norm": 1.4209162509543631, + "learning_rate": 3.754621487996878e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3529 + }, + { + "epoch": 0.3394720392364283, + "grad_norm": 1.6396675234160172, + "learning_rate": 3.753961625360572e-06, + "loss": 0.0957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3530 + }, + { + "epoch": 0.3395682069529259, + "grad_norm": 3.469530562619558, + "learning_rate": 3.7533016459769454e-06, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3531 + }, + { + "epoch": 0.33966437466942345, + "grad_norm": 4.240424577579516, + "learning_rate": 3.752641549907445e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3532 + }, + { + "epoch": 0.33976054238592107, + "grad_norm": 2.010037682364415, + "learning_rate": 3.751981337213528e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3533 + }, + { + "epoch": 0.33985671010241864, + "grad_norm": 1.6533271858443366, + "learning_rate": 3.75132100795666e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3534 + }, + { + "epoch": 0.3399528778189162, + "grad_norm": 2.770262029857963, + "learning_rate": 3.750660562198321e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3535 + }, + { + "epoch": 0.34004904553541376, + "grad_norm": 1.7277615443056131, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3536 + }, + { + "epoch": 0.3401452132519113, + "grad_norm": 1.792873983111737, + "learning_rate": 3.7493393214231976e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3537 + }, + { + "epoch": 0.3402413809684089, + "grad_norm": 1.8799988067689475, + "learning_rate": 3.7486785265294244e-06, + "loss": 0.1694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3538 + }, + { + "epoch": 0.34033754868490645, + "grad_norm": 2.825125649061633, + "learning_rate": 3.748017615380202e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3539 + }, + { + "epoch": 0.3404337164014041, + "grad_norm": 1.6755268169498787, + "learning_rate": 3.747356588037064e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3540 + }, + { + "epoch": 0.34052988411790164, + "grad_norm": 2.3579825880940977, + "learning_rate": 3.7466954445615524e-06, + "loss": 0.1632, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3541 + }, + { + "epoch": 0.3406260518343992, + "grad_norm": 2.2245095474357206, + "learning_rate": 3.7460341850152233e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3542 + }, + { + "epoch": 0.34072221955089677, + "grad_norm": 2.432755620223817, + "learning_rate": 3.7453728094596396e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3543 + }, + { + "epoch": 0.34081838726739433, + "grad_norm": 2.3062831198912566, + "learning_rate": 3.74471131795638e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3544 + }, + { + "epoch": 0.3409145549838919, + "grad_norm": 1.816955713696935, + "learning_rate": 3.7440497105670295e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3545 + }, + { + "epoch": 0.34101072270038946, + "grad_norm": 2.2013545040870355, + "learning_rate": 3.743387987353184e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3546 + }, + { + "epoch": 0.3411068904168871, + "grad_norm": 1.6279277955185487, + "learning_rate": 3.7427261483764555e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3547 + }, + { + "epoch": 0.34120305813338464, + "grad_norm": 2.248649604186158, + "learning_rate": 3.7420641936984597e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3548 + }, + { + "epoch": 0.3412992258498822, + "grad_norm": 2.564505649888573, + "learning_rate": 3.7414021233808286e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3549 + }, + { + "epoch": 0.34139539356637977, + "grad_norm": 1.9378309035992056, + "learning_rate": 3.7407399374852016e-06, + "loss": 0.1696, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3550 + }, + { + "epoch": 0.34149156128287733, + "grad_norm": 1.712347603880287, + "learning_rate": 3.7400776360732304e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3551 + }, + { + "epoch": 0.3415877289993749, + "grad_norm": 2.3906455694923587, + "learning_rate": 3.739415219206577e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3552 + }, + { + "epoch": 0.34168389671587246, + "grad_norm": 6.395932462588307, + "learning_rate": 3.7387526869469153e-06, + "loss": 0.2182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3553 + }, + { + "epoch": 0.3417800644323701, + "grad_norm": 2.198867076951673, + "learning_rate": 3.7380900393559273e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3554 + }, + { + "epoch": 0.34187623214886764, + "grad_norm": 1.4055789884094962, + "learning_rate": 3.737427276495308e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3555 + }, + { + "epoch": 0.3419723998653652, + "grad_norm": 1.859608779576356, + "learning_rate": 3.7367643984267633e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3556 + }, + { + "epoch": 0.34206856758186277, + "grad_norm": 2.1211235483770277, + "learning_rate": 3.736101405212007e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3557 + }, + { + "epoch": 0.34216473529836033, + "grad_norm": 2.047969218994944, + "learning_rate": 3.7354382969127676e-06, + "loss": 0.2046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3558 + }, + { + "epoch": 0.3422609030148579, + "grad_norm": 1.7535075314828004, + "learning_rate": 3.7347750735907812e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3559 + }, + { + "epoch": 0.34235707073135546, + "grad_norm": 1.492055172220804, + "learning_rate": 3.7341117353077964e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3560 + }, + { + "epoch": 0.3424532384478531, + "grad_norm": 2.208428670764531, + "learning_rate": 3.7334482821255715e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3561 + }, + { + "epoch": 0.34254940616435064, + "grad_norm": 2.489016477599394, + "learning_rate": 3.732784714105876e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3562 + }, + { + "epoch": 0.3426455738808482, + "grad_norm": 1.4807383178091802, + "learning_rate": 3.7321210313104892e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3563 + }, + { + "epoch": 0.3427417415973458, + "grad_norm": 1.8365927369683137, + "learning_rate": 3.7314572338012033e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3564 + }, + { + "epoch": 0.34283790931384334, + "grad_norm": 1.4509631456404162, + "learning_rate": 3.7307933216398183e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3565 + }, + { + "epoch": 0.3429340770303409, + "grad_norm": 1.8341896989880409, + "learning_rate": 3.730129294888146e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3566 + }, + { + "epoch": 0.34303024474683846, + "grad_norm": 2.323111857050732, + "learning_rate": 3.729465153608012e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3567 + }, + { + "epoch": 0.3431264124633361, + "grad_norm": 1.5916905016470626, + "learning_rate": 3.7288008978612457e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3568 + }, + { + "epoch": 0.34322258017983365, + "grad_norm": 1.6490346823147084, + "learning_rate": 3.7281365277096937e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3569 + }, + { + "epoch": 0.3433187478963312, + "grad_norm": 2.614727864239716, + "learning_rate": 3.72747204321521e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3570 + }, + { + "epoch": 0.3434149156128288, + "grad_norm": 1.8672676040874776, + "learning_rate": 3.726807444439661e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3571 + }, + { + "epoch": 0.34351108332932634, + "grad_norm": 2.7099147870863574, + "learning_rate": 3.7261427314449205e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3572 + }, + { + "epoch": 0.3436072510458239, + "grad_norm": 1.7141354545294873, + "learning_rate": 3.725477904292877e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3573 + }, + { + "epoch": 0.34370341876232147, + "grad_norm": 2.0430033535990795, + "learning_rate": 3.724812963045427e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3574 + }, + { + "epoch": 0.3437995864788191, + "grad_norm": 1.6188582406007161, + "learning_rate": 3.724147907764478e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3575 + }, + { + "epoch": 0.34389575419531665, + "grad_norm": 2.436569003047287, + "learning_rate": 3.72348273851195e-06, + "loss": 0.1785, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3576 + }, + { + "epoch": 0.3439919219118142, + "grad_norm": 1.972235440583619, + "learning_rate": 3.722817455349771e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3577 + }, + { + "epoch": 0.3440880896283118, + "grad_norm": 1.9544464787026445, + "learning_rate": 3.72215205833988e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3578 + }, + { + "epoch": 0.34418425734480934, + "grad_norm": 2.8555768207954606, + "learning_rate": 3.7214865475442295e-06, + "loss": 0.1603, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3579 + }, + { + "epoch": 0.3442804250613069, + "grad_norm": 1.9300623963819412, + "learning_rate": 3.7208209230247785e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3580 + }, + { + "epoch": 0.34437659277780447, + "grad_norm": 2.0126070083800296, + "learning_rate": 3.7201551848434987e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3581 + }, + { + "epoch": 0.3444727604943021, + "grad_norm": 3.119035372085778, + "learning_rate": 3.719489333062373e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3582 + }, + { + "epoch": 0.34456892821079965, + "grad_norm": 1.4052931969534441, + "learning_rate": 3.7188233677433926e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3583 + }, + { + "epoch": 0.3446650959272972, + "grad_norm": 2.1219495469806615, + "learning_rate": 3.718157288948563e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3584 + }, + { + "epoch": 0.3447612636437948, + "grad_norm": 3.2018114666882456, + "learning_rate": 3.717491096739896e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3585 + }, + { + "epoch": 0.34485743136029234, + "grad_norm": 1.5464247235182766, + "learning_rate": 3.7168247911794176e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3586 + }, + { + "epoch": 0.3449535990767899, + "grad_norm": 2.8629680139347844, + "learning_rate": 3.7161583723291607e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3587 + }, + { + "epoch": 0.34504976679328747, + "grad_norm": 2.060812636705361, + "learning_rate": 3.715491840251172e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3588 + }, + { + "epoch": 0.3451459345097851, + "grad_norm": 1.6315806320991104, + "learning_rate": 3.7148251950075077e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3589 + }, + { + "epoch": 0.34524210222628265, + "grad_norm": 1.4388281784057482, + "learning_rate": 3.7141584366602333e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3590 + }, + { + "epoch": 0.3453382699427802, + "grad_norm": 1.4303751393384996, + "learning_rate": 3.713491565271427e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3591 + }, + { + "epoch": 0.3454344376592778, + "grad_norm": 1.4346155284879327, + "learning_rate": 3.7128245809031765e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3592 + }, + { + "epoch": 0.34553060537577535, + "grad_norm": 1.4264436977665564, + "learning_rate": 3.7121574836175783e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3593 + }, + { + "epoch": 0.3456267730922729, + "grad_norm": 1.6631512428511535, + "learning_rate": 3.7114902734767433e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3594 + }, + { + "epoch": 0.3457229408087705, + "grad_norm": 1.899127721548443, + "learning_rate": 3.7108229505427886e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3595 + }, + { + "epoch": 0.3458191085252681, + "grad_norm": 2.4606658974457822, + "learning_rate": 3.710155514877844e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3596 + }, + { + "epoch": 0.34591527624176566, + "grad_norm": 1.6096879876801837, + "learning_rate": 3.709487966544051e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3597 + }, + { + "epoch": 0.3460114439582632, + "grad_norm": 2.1098071372677087, + "learning_rate": 3.7088203056035587e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3598 + }, + { + "epoch": 0.3461076116747608, + "grad_norm": 4.673538307545185, + "learning_rate": 3.7081525321185292e-06, + "loss": 0.1648, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3599 + }, + { + "epoch": 0.34620377939125835, + "grad_norm": 2.0692638391205636, + "learning_rate": 3.7074846461511336e-06, + "loss": 0.1914, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3600 + }, + { + "epoch": 0.3462999471077559, + "grad_norm": 1.471759615307652, + "learning_rate": 3.7068166477635546e-06, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3601 + }, + { + "epoch": 0.3463961148242535, + "grad_norm": 2.721148279596545, + "learning_rate": 3.706148537017984e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3602 + }, + { + "epoch": 0.3464922825407511, + "grad_norm": 1.407476383208731, + "learning_rate": 3.7054803139766247e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3603 + }, + { + "epoch": 0.34658845025724866, + "grad_norm": 1.626602961330779, + "learning_rate": 3.7048119787016907e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3604 + }, + { + "epoch": 0.3466846179737462, + "grad_norm": 1.3917932608624022, + "learning_rate": 3.7041435312554053e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3605 + }, + { + "epoch": 0.3467807856902438, + "grad_norm": 3.051109608765476, + "learning_rate": 3.7034749717000034e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3606 + }, + { + "epoch": 0.34687695340674135, + "grad_norm": 2.423374625609326, + "learning_rate": 3.702806300097729e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3607 + }, + { + "epoch": 0.3469731211232389, + "grad_norm": 1.4182664496606943, + "learning_rate": 3.702137516510838e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3608 + }, + { + "epoch": 0.3470692888397365, + "grad_norm": 2.43240549231704, + "learning_rate": 3.701468621001596e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3609 + }, + { + "epoch": 0.3471654565562341, + "grad_norm": 1.9816146080030765, + "learning_rate": 3.7007996136322787e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3610 + }, + { + "epoch": 0.34726162427273166, + "grad_norm": 1.8947263096983131, + "learning_rate": 3.700130494465173e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3611 + }, + { + "epoch": 0.3473577919892292, + "grad_norm": 1.820327698928535, + "learning_rate": 3.699461263562575e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3612 + }, + { + "epoch": 0.3474539597057268, + "grad_norm": 3.9943351610134386, + "learning_rate": 3.698791920986792e-06, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3613 + }, + { + "epoch": 0.34755012742222435, + "grad_norm": 1.8726972779430116, + "learning_rate": 3.6981224668001427e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3614 + }, + { + "epoch": 0.3476462951387219, + "grad_norm": 1.9726570610206815, + "learning_rate": 3.697452901064954e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3615 + }, + { + "epoch": 0.3477424628552195, + "grad_norm": 1.7785054592274019, + "learning_rate": 3.6967832238435645e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3616 + }, + { + "epoch": 0.3478386305717171, + "grad_norm": 2.349742116229806, + "learning_rate": 3.6961134351983245e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3617 + }, + { + "epoch": 0.34793479828821466, + "grad_norm": 2.6940567703922667, + "learning_rate": 3.695443535191591e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3618 + }, + { + "epoch": 0.34803096600471223, + "grad_norm": 1.5780188045633499, + "learning_rate": 3.6947735238857347e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3619 + }, + { + "epoch": 0.3481271337212098, + "grad_norm": 3.8893823571132327, + "learning_rate": 3.694103401343136e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3620 + }, + { + "epoch": 0.34822330143770736, + "grad_norm": 1.4814199214647712, + "learning_rate": 3.6934331676261837e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3621 + }, + { + "epoch": 0.3483194691542049, + "grad_norm": 1.945761450899897, + "learning_rate": 3.69276282279728e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3622 + }, + { + "epoch": 0.3484156368707025, + "grad_norm": 1.5285477689786275, + "learning_rate": 3.6920923669188345e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3623 + }, + { + "epoch": 0.3485118045872001, + "grad_norm": 1.5728643549755872, + "learning_rate": 3.6914218000532697e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3624 + }, + { + "epoch": 0.34860797230369767, + "grad_norm": 1.8720308303590956, + "learning_rate": 3.6907511222630167e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3625 + }, + { + "epoch": 0.34870414002019523, + "grad_norm": 2.9025200680555088, + "learning_rate": 3.6900803336105174e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3626 + }, + { + "epoch": 0.3488003077366928, + "grad_norm": 2.3479285801543273, + "learning_rate": 3.6894094341582244e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3627 + }, + { + "epoch": 0.34889647545319036, + "grad_norm": 2.0857546719284095, + "learning_rate": 3.6887384239685996e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3628 + }, + { + "epoch": 0.3489926431696879, + "grad_norm": 2.5092615351787306, + "learning_rate": 3.688067303104117e-06, + "loss": 0.1641, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3629 + }, + { + "epoch": 0.3490888108861855, + "grad_norm": 1.7306786177245943, + "learning_rate": 3.6873960716272587e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3630 + }, + { + "epoch": 0.3491849786026831, + "grad_norm": 2.016541733865802, + "learning_rate": 3.6867247296005193e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3631 + }, + { + "epoch": 0.34928114631918067, + "grad_norm": 1.6138686208688544, + "learning_rate": 3.686053277086401e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3632 + }, + { + "epoch": 0.34937731403567823, + "grad_norm": 2.1284315622332413, + "learning_rate": 3.6853817141474196e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3633 + }, + { + "epoch": 0.3494734817521758, + "grad_norm": 2.2232722052866825, + "learning_rate": 3.684710040846099e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3634 + }, + { + "epoch": 0.34956964946867336, + "grad_norm": 1.8201884432782296, + "learning_rate": 3.6840382572449733e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3635 + }, + { + "epoch": 0.3496658171851709, + "grad_norm": 1.9744285838558, + "learning_rate": 3.683366363406588e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3636 + }, + { + "epoch": 0.3497619849016685, + "grad_norm": 1.8449858082166102, + "learning_rate": 3.6826943593934984e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3637 + }, + { + "epoch": 0.3498581526181661, + "grad_norm": 2.0439788682674074, + "learning_rate": 3.6820222452682695e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3638 + }, + { + "epoch": 0.34995432033466367, + "grad_norm": 1.4854109876199688, + "learning_rate": 3.6813500210934767e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3639 + }, + { + "epoch": 0.35005048805116123, + "grad_norm": 1.6607158590878892, + "learning_rate": 3.6806776869317074e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3640 + }, + { + "epoch": 0.3501466557676588, + "grad_norm": 1.6477844151223484, + "learning_rate": 3.680005242845556e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3641 + }, + { + "epoch": 0.35024282348415636, + "grad_norm": 1.413350723570996, + "learning_rate": 3.67933268889763e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3642 + }, + { + "epoch": 0.3503389912006539, + "grad_norm": 1.6471807919049668, + "learning_rate": 3.678660025150545e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3643 + }, + { + "epoch": 0.3504351589171515, + "grad_norm": 2.2196421743605588, + "learning_rate": 3.6779872516669294e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3644 + }, + { + "epoch": 0.3505313266336491, + "grad_norm": 1.651124989726437, + "learning_rate": 3.67731436850942e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3645 + }, + { + "epoch": 0.3506274943501467, + "grad_norm": 1.7933897422826774, + "learning_rate": 3.676641375740662e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3646 + }, + { + "epoch": 0.35072366206664424, + "grad_norm": 1.6200208809742935, + "learning_rate": 3.6759682734233153e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3647 + }, + { + "epoch": 0.3508198297831418, + "grad_norm": 1.3340789540636238, + "learning_rate": 3.675295061620047e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3648 + }, + { + "epoch": 0.35091599749963936, + "grad_norm": 3.610010298018949, + "learning_rate": 3.6746217403935345e-06, + "loss": 0.1619, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3649 + }, + { + "epoch": 0.35101216521613693, + "grad_norm": 1.9380332855532083, + "learning_rate": 3.673948309806466e-06, + "loss": 0.1832, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3650 + }, + { + "epoch": 0.3511083329326345, + "grad_norm": 2.5007981872586447, + "learning_rate": 3.6732747699215404e-06, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3651 + }, + { + "epoch": 0.3512045006491321, + "grad_norm": 2.1765789037754604, + "learning_rate": 3.6726011208014656e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3652 + }, + { + "epoch": 0.3513006683656297, + "grad_norm": 2.1121303705878662, + "learning_rate": 3.6719273625089595e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3653 + }, + { + "epoch": 0.35139683608212724, + "grad_norm": 2.2640696818685373, + "learning_rate": 3.6712534951067523e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3654 + }, + { + "epoch": 0.3514930037986248, + "grad_norm": 1.5679394040661851, + "learning_rate": 3.6705795186575823e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3655 + }, + { + "epoch": 0.35158917151512237, + "grad_norm": 1.391475749667012, + "learning_rate": 3.669905433224199e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3656 + }, + { + "epoch": 0.35168533923161993, + "grad_norm": 3.1296178575444182, + "learning_rate": 3.6692312388693607e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3657 + }, + { + "epoch": 0.3517815069481175, + "grad_norm": 3.6771181868024456, + "learning_rate": 3.668556935655837e-06, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3658 + }, + { + "epoch": 0.3518776746646151, + "grad_norm": 1.7934923907242286, + "learning_rate": 3.6678825236464076e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3659 + }, + { + "epoch": 0.3519738423811127, + "grad_norm": 1.9041850896483665, + "learning_rate": 3.667208002903863e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3660 + }, + { + "epoch": 0.35207001009761024, + "grad_norm": 1.7631250197239525, + "learning_rate": 3.666533373491002e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3661 + }, + { + "epoch": 0.3521661778141078, + "grad_norm": 1.5111019402321164, + "learning_rate": 3.6658586354706343e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3662 + }, + { + "epoch": 0.35226234553060537, + "grad_norm": 3.0587207227638427, + "learning_rate": 3.6651837889055804e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3663 + }, + { + "epoch": 0.35235851324710293, + "grad_norm": 1.3856184533678118, + "learning_rate": 3.66450883385867e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3664 + }, + { + "epoch": 0.3524546809636005, + "grad_norm": 1.8175122007324702, + "learning_rate": 3.663833770392744e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3665 + }, + { + "epoch": 0.3525508486800981, + "grad_norm": 2.859054033423153, + "learning_rate": 3.663158598570652e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3666 + }, + { + "epoch": 0.3526470163965957, + "grad_norm": 1.7249324242412227, + "learning_rate": 3.662483318455254e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3667 + }, + { + "epoch": 0.35274318411309324, + "grad_norm": 2.620740246924695, + "learning_rate": 3.661807930109422e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3668 + }, + { + "epoch": 0.3528393518295908, + "grad_norm": 3.1894215296880177, + "learning_rate": 3.661132433596035e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3669 + }, + { + "epoch": 0.35293551954608837, + "grad_norm": 1.41725740568474, + "learning_rate": 3.6604568289779847e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3670 + }, + { + "epoch": 0.35303168726258594, + "grad_norm": 1.4400248898854675, + "learning_rate": 3.6597811163181708e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3671 + }, + { + "epoch": 0.3531278549790835, + "grad_norm": 1.763914462744932, + "learning_rate": 3.6591052956795043e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3672 + }, + { + "epoch": 0.3532240226955811, + "grad_norm": 2.4380431792278, + "learning_rate": 3.6584293671249068e-06, + "loss": 0.1676, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3673 + }, + { + "epoch": 0.3533201904120787, + "grad_norm": 2.1397387614075876, + "learning_rate": 3.657753330717308e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3674 + }, + { + "epoch": 0.35341635812857625, + "grad_norm": 1.8722462941540612, + "learning_rate": 3.6570771865196494e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3675 + }, + { + "epoch": 0.3535125258450738, + "grad_norm": 1.9406561022528572, + "learning_rate": 3.656400934594882e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3676 + }, + { + "epoch": 0.3536086935615714, + "grad_norm": 1.6877110909868451, + "learning_rate": 3.655724575005967e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3677 + }, + { + "epoch": 0.35370486127806894, + "grad_norm": 2.824838294096814, + "learning_rate": 3.655048107815874e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3678 + }, + { + "epoch": 0.3538010289945665, + "grad_norm": 2.0331944436977913, + "learning_rate": 3.654371533087586e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3679 + }, + { + "epoch": 0.3538971967110641, + "grad_norm": 1.9687860218970736, + "learning_rate": 3.6536948508840915e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3680 + }, + { + "epoch": 0.3539933644275617, + "grad_norm": 1.7244621686050632, + "learning_rate": 3.653018061268393e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3681 + }, + { + "epoch": 0.35408953214405925, + "grad_norm": 1.4317072815435237, + "learning_rate": 3.6523411643035025e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3682 + }, + { + "epoch": 0.3541856998605568, + "grad_norm": 2.2121060151646925, + "learning_rate": 3.6516641600524387e-06, + "loss": 0.167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3683 + }, + { + "epoch": 0.3542818675770544, + "grad_norm": 1.373972800328741, + "learning_rate": 3.6509870485782345e-06, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3684 + }, + { + "epoch": 0.35437803529355194, + "grad_norm": 2.076452079174394, + "learning_rate": 3.6503098299439297e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3685 + }, + { + "epoch": 0.3544742030100495, + "grad_norm": 1.7542220554213956, + "learning_rate": 3.6496325042125755e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3686 + }, + { + "epoch": 0.3545703707265471, + "grad_norm": 2.2554377554809215, + "learning_rate": 3.6489550714472337e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3687 + }, + { + "epoch": 0.3546665384430447, + "grad_norm": 3.18149667969303, + "learning_rate": 3.648277531710974e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3688 + }, + { + "epoch": 0.35476270615954225, + "grad_norm": 1.6227218622556885, + "learning_rate": 3.647599885066877e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3689 + }, + { + "epoch": 0.3548588738760398, + "grad_norm": 2.545332162685798, + "learning_rate": 3.6469221315780357e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3690 + }, + { + "epoch": 0.3549550415925374, + "grad_norm": 3.2874209368051717, + "learning_rate": 3.646244271307548e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3691 + }, + { + "epoch": 0.35505120930903494, + "grad_norm": 2.652265098170067, + "learning_rate": 3.6455663043185264e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3692 + }, + { + "epoch": 0.3551473770255325, + "grad_norm": 1.5982086646161004, + "learning_rate": 3.6448882306740907e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3693 + }, + { + "epoch": 0.3552435447420301, + "grad_norm": 3.3205443888412383, + "learning_rate": 3.6442100504373723e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3694 + }, + { + "epoch": 0.3553397124585277, + "grad_norm": 2.660303106039654, + "learning_rate": 3.6435317636715104e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3695 + }, + { + "epoch": 0.35543588017502525, + "grad_norm": 1.9269095205360303, + "learning_rate": 3.6428533704396566e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3696 + }, + { + "epoch": 0.3555320478915228, + "grad_norm": 1.5608135872903706, + "learning_rate": 3.6421748708049703e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3697 + }, + { + "epoch": 0.3556282156080204, + "grad_norm": 2.041228035397127, + "learning_rate": 3.6414962648306227e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3698 + }, + { + "epoch": 0.35572438332451795, + "grad_norm": 2.3363323131558857, + "learning_rate": 3.640817552579793e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3699 + }, + { + "epoch": 0.3558205510410155, + "grad_norm": 2.038023959818221, + "learning_rate": 3.6401387341156715e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3700 + }, + { + "epoch": 0.35591671875751313, + "grad_norm": 4.231074850695885, + "learning_rate": 3.6394598095014577e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3701 + }, + { + "epoch": 0.3560128864740107, + "grad_norm": 3.627167040537172, + "learning_rate": 3.638780778800362e-06, + "loss": 0.1699, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3702 + }, + { + "epoch": 0.35610905419050826, + "grad_norm": 4.262012501176726, + "learning_rate": 3.638101642075604e-06, + "loss": 0.1626, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3703 + }, + { + "epoch": 0.3562052219070058, + "grad_norm": 2.013606017910042, + "learning_rate": 3.637422399390413e-06, + "loss": 0.1663, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3704 + }, + { + "epoch": 0.3563013896235034, + "grad_norm": 1.4345224526354203, + "learning_rate": 3.6367430508080283e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3705 + }, + { + "epoch": 0.35639755734000095, + "grad_norm": 2.0248974462802205, + "learning_rate": 3.636063596391699e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3706 + }, + { + "epoch": 0.3564937250564985, + "grad_norm": 3.4947301760937997, + "learning_rate": 3.6353840362046856e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3707 + }, + { + "epoch": 0.35658989277299613, + "grad_norm": 2.593588244646934, + "learning_rate": 3.6347043703102547e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3708 + }, + { + "epoch": 0.3566860604894937, + "grad_norm": 1.426808391800696, + "learning_rate": 3.634024598771687e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3709 + }, + { + "epoch": 0.35678222820599126, + "grad_norm": 1.7717481648572708, + "learning_rate": 3.6333447216522703e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3710 + }, + { + "epoch": 0.3568783959224888, + "grad_norm": 2.8168359654499238, + "learning_rate": 3.632664739015303e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3711 + }, + { + "epoch": 0.3569745636389864, + "grad_norm": 3.756652705199463, + "learning_rate": 3.631984650924094e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3712 + }, + { + "epoch": 0.35707073135548395, + "grad_norm": 2.4083840793787576, + "learning_rate": 3.6313044574419604e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3713 + }, + { + "epoch": 0.3571668990719815, + "grad_norm": 1.9612883535095849, + "learning_rate": 3.6306241586322307e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3714 + }, + { + "epoch": 0.35726306678847913, + "grad_norm": 1.7515956932713035, + "learning_rate": 3.629943754558243e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3715 + }, + { + "epoch": 0.3573592345049767, + "grad_norm": 1.8393172602086743, + "learning_rate": 3.6292632452833436e-06, + "loss": 0.1757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3716 + }, + { + "epoch": 0.35745540222147426, + "grad_norm": 1.699540472188078, + "learning_rate": 3.628582630870891e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3717 + }, + { + "epoch": 0.3575515699379718, + "grad_norm": 1.9868592358275554, + "learning_rate": 3.627901911384252e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3718 + }, + { + "epoch": 0.3576477376544694, + "grad_norm": 1.4547093539818476, + "learning_rate": 3.6272210868868035e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3719 + }, + { + "epoch": 0.35774390537096695, + "grad_norm": 1.8464528499734332, + "learning_rate": 3.6265401574419316e-06, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3720 + }, + { + "epoch": 0.3578400730874645, + "grad_norm": 4.059419906280464, + "learning_rate": 3.6258591231130335e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3721 + }, + { + "epoch": 0.35793624080396214, + "grad_norm": 2.1849692214008813, + "learning_rate": 3.6251779839635155e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3722 + }, + { + "epoch": 0.3580324085204597, + "grad_norm": 3.2774465224144813, + "learning_rate": 3.6244967400567925e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3723 + }, + { + "epoch": 0.35812857623695726, + "grad_norm": 1.8682501069724233, + "learning_rate": 3.623815391456291e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3724 + }, + { + "epoch": 0.3582247439534548, + "grad_norm": 2.6787047675982603, + "learning_rate": 3.6231339382254465e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3725 + }, + { + "epoch": 0.3583209116699524, + "grad_norm": 1.6421360209689544, + "learning_rate": 3.6224523804277035e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3726 + }, + { + "epoch": 0.35841707938644995, + "grad_norm": 1.7714148954417337, + "learning_rate": 3.6217707181265176e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3727 + }, + { + "epoch": 0.3585132471029475, + "grad_norm": 2.788717154961419, + "learning_rate": 3.621088951385353e-06, + "loss": 0.1644, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3728 + }, + { + "epoch": 0.35860941481944514, + "grad_norm": 1.6961281769078904, + "learning_rate": 3.620407080267685e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3729 + }, + { + "epoch": 0.3587055825359427, + "grad_norm": 1.6086546930253425, + "learning_rate": 3.6197251048369963e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3730 + }, + { + "epoch": 0.35880175025244027, + "grad_norm": 1.5472643509136699, + "learning_rate": 3.6190430251567817e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3731 + }, + { + "epoch": 0.35889791796893783, + "grad_norm": 1.813099120007204, + "learning_rate": 3.6183608412905447e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3732 + }, + { + "epoch": 0.3589940856854354, + "grad_norm": 1.3399484572024152, + "learning_rate": 3.6176785533017975e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3733 + }, + { + "epoch": 0.35909025340193296, + "grad_norm": 1.4337857732829618, + "learning_rate": 3.6169961612540648e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3734 + }, + { + "epoch": 0.3591864211184305, + "grad_norm": 1.8952842154139935, + "learning_rate": 3.6163136652108777e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3735 + }, + { + "epoch": 0.35928258883492814, + "grad_norm": 2.253379491614209, + "learning_rate": 3.615631065235779e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3736 + }, + { + "epoch": 0.3593787565514257, + "grad_norm": 2.928962644546054, + "learning_rate": 3.614948361392321e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3737 + }, + { + "epoch": 0.35947492426792327, + "grad_norm": 1.4895752225027905, + "learning_rate": 3.614265553744064e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3738 + }, + { + "epoch": 0.35957109198442083, + "grad_norm": 1.6027965963195216, + "learning_rate": 3.613582642354581e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3739 + }, + { + "epoch": 0.3596672597009184, + "grad_norm": 3.089144374096695, + "learning_rate": 3.6128996272874523e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3740 + }, + { + "epoch": 0.35976342741741596, + "grad_norm": 1.3355058872845382, + "learning_rate": 3.612216508606268e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3741 + }, + { + "epoch": 0.3598595951339135, + "grad_norm": 1.582858170980822, + "learning_rate": 3.6115332863746294e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3742 + }, + { + "epoch": 0.35995576285041114, + "grad_norm": 1.8323883715234597, + "learning_rate": 3.6108499606561454e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3743 + }, + { + "epoch": 0.3600519305669087, + "grad_norm": 2.1990197244080263, + "learning_rate": 3.6101665315144357e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3744 + }, + { + "epoch": 0.36014809828340627, + "grad_norm": 1.397136930570369, + "learning_rate": 3.6094829990131296e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3745 + }, + { + "epoch": 0.36024426599990383, + "grad_norm": 2.0766825463560163, + "learning_rate": 3.6087993632158668e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3746 + }, + { + "epoch": 0.3603404337164014, + "grad_norm": 2.590915503257744, + "learning_rate": 3.6081156241862936e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3747 + }, + { + "epoch": 0.36043660143289896, + "grad_norm": 1.8309178426175825, + "learning_rate": 3.6074317819880694e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3748 + }, + { + "epoch": 0.3605327691493965, + "grad_norm": 3.0890118925691943, + "learning_rate": 3.6067478366848626e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3749 + }, + { + "epoch": 0.36062893686589415, + "grad_norm": 2.7784635775868005, + "learning_rate": 3.606063788340348e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3750 + }, + { + "epoch": 0.3607251045823917, + "grad_norm": 3.0898813811434054, + "learning_rate": 3.6053796370182154e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3751 + }, + { + "epoch": 0.3608212722988893, + "grad_norm": 2.564085962544186, + "learning_rate": 3.604695382782159e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3752 + }, + { + "epoch": 0.36091744001538684, + "grad_norm": 1.923220473991118, + "learning_rate": 3.604011025695885e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3753 + }, + { + "epoch": 0.3610136077318844, + "grad_norm": 4.570744902679773, + "learning_rate": 3.60332656582311e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3754 + }, + { + "epoch": 0.36110977544838196, + "grad_norm": 3.1546591982179297, + "learning_rate": 3.6026420032275584e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3755 + }, + { + "epoch": 0.36120594316487953, + "grad_norm": 3.9991514070041965, + "learning_rate": 3.6019573379729644e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3756 + }, + { + "epoch": 0.36130211088137715, + "grad_norm": 3.579761079050834, + "learning_rate": 3.601272570123074e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3757 + }, + { + "epoch": 0.3613982785978747, + "grad_norm": 3.2137529335936708, + "learning_rate": 3.600587699741639e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3758 + }, + { + "epoch": 0.3614944463143723, + "grad_norm": 1.5805539095922934, + "learning_rate": 3.5999027268924237e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3759 + }, + { + "epoch": 0.36159061403086984, + "grad_norm": 3.7957996667468876, + "learning_rate": 3.5992176516392007e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3760 + }, + { + "epoch": 0.3616867817473674, + "grad_norm": 4.405621854391282, + "learning_rate": 3.5985324740457527e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3761 + }, + { + "epoch": 0.36178294946386497, + "grad_norm": 5.809553587706243, + "learning_rate": 3.5978471941758715e-06, + "loss": 0.1977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3762 + }, + { + "epoch": 0.36187911718036253, + "grad_norm": 4.768472741187805, + "learning_rate": 3.5971618120933587e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3763 + }, + { + "epoch": 0.36197528489686015, + "grad_norm": 3.884599745374599, + "learning_rate": 3.5964763278620246e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3764 + }, + { + "epoch": 0.3620714526133577, + "grad_norm": 2.181776187560919, + "learning_rate": 3.595790741545691e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3765 + }, + { + "epoch": 0.3621676203298553, + "grad_norm": 1.6272561662749563, + "learning_rate": 3.5951050532081864e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3766 + }, + { + "epoch": 0.36226378804635284, + "grad_norm": 3.796328608279665, + "learning_rate": 3.594419262913351e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3767 + }, + { + "epoch": 0.3623599557628504, + "grad_norm": 3.1446516951251486, + "learning_rate": 3.593733370725035e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3768 + }, + { + "epoch": 0.36245612347934797, + "grad_norm": 1.8476524423138534, + "learning_rate": 3.5930473767070943e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3769 + }, + { + "epoch": 0.36255229119584553, + "grad_norm": 1.3261087719430023, + "learning_rate": 3.5923612809233987e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3770 + }, + { + "epoch": 0.36264845891234315, + "grad_norm": 2.472341294494833, + "learning_rate": 3.5916750834378256e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3771 + }, + { + "epoch": 0.3627446266288407, + "grad_norm": 2.9026158495041012, + "learning_rate": 3.590988784314261e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3772 + }, + { + "epoch": 0.3628407943453383, + "grad_norm": 3.8122660320987163, + "learning_rate": 3.590302383616602e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3773 + }, + { + "epoch": 0.36293696206183584, + "grad_norm": 1.7802564252836697, + "learning_rate": 3.5896158814087546e-06, + "loss": 0.1673, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3774 + }, + { + "epoch": 0.3630331297783334, + "grad_norm": 2.618333364058984, + "learning_rate": 3.588929277754633e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3775 + }, + { + "epoch": 0.36312929749483097, + "grad_norm": 1.6441057736266937, + "learning_rate": 3.5882425727181625e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3776 + }, + { + "epoch": 0.36322546521132854, + "grad_norm": 1.6427705612630412, + "learning_rate": 3.5875557663632776e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3777 + }, + { + "epoch": 0.36332163292782615, + "grad_norm": 1.464063894441826, + "learning_rate": 3.5868688587539214e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3778 + }, + { + "epoch": 0.3634178006443237, + "grad_norm": 1.6173531167229072, + "learning_rate": 3.586181849954047e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3779 + }, + { + "epoch": 0.3635139683608213, + "grad_norm": 1.8318611936661453, + "learning_rate": 3.5854947400276164e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3780 + }, + { + "epoch": 0.36361013607731885, + "grad_norm": 2.8780490009113, + "learning_rate": 3.5848075290386023e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3781 + }, + { + "epoch": 0.3637063037938164, + "grad_norm": 1.9934197015877027, + "learning_rate": 3.584120217050986e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3782 + }, + { + "epoch": 0.363802471510314, + "grad_norm": 2.2176183625332024, + "learning_rate": 3.5834328041287573e-06, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3783 + }, + { + "epoch": 0.36389863922681154, + "grad_norm": 1.3663555032947792, + "learning_rate": 3.5827452903359174e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3784 + }, + { + "epoch": 0.36399480694330916, + "grad_norm": 2.081017462850938, + "learning_rate": 3.5820576757364743e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3785 + }, + { + "epoch": 0.3640909746598067, + "grad_norm": 2.334370426798357, + "learning_rate": 3.581369960394448e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3786 + }, + { + "epoch": 0.3641871423763043, + "grad_norm": 1.980894351424639, + "learning_rate": 3.580682144373866e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3787 + }, + { + "epoch": 0.36428331009280185, + "grad_norm": 1.5593397215756053, + "learning_rate": 3.579994227738767e-06, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3788 + }, + { + "epoch": 0.3643794778092994, + "grad_norm": 2.1726671988896102, + "learning_rate": 3.579306210553196e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3789 + }, + { + "epoch": 0.364475645525797, + "grad_norm": 2.549533273611075, + "learning_rate": 3.578618092881211e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3790 + }, + { + "epoch": 0.36457181324229454, + "grad_norm": 2.940951115417944, + "learning_rate": 3.5779298747868778e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3791 + }, + { + "epoch": 0.36466798095879216, + "grad_norm": 2.0467623536062747, + "learning_rate": 3.5772415563342703e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3792 + }, + { + "epoch": 0.3647641486752897, + "grad_norm": 1.8802421235727995, + "learning_rate": 3.5765531375874736e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3793 + }, + { + "epoch": 0.3648603163917873, + "grad_norm": 1.4797252274213013, + "learning_rate": 3.5758646186105813e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3794 + }, + { + "epoch": 0.36495648410828485, + "grad_norm": 2.253712457889852, + "learning_rate": 3.5751759994676955e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3795 + }, + { + "epoch": 0.3650526518247824, + "grad_norm": 3.2347988461082178, + "learning_rate": 3.5744872802229296e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3796 + }, + { + "epoch": 0.36514881954128, + "grad_norm": 1.552794129252368, + "learning_rate": 3.573798460940405e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3797 + }, + { + "epoch": 0.36524498725777754, + "grad_norm": 2.136275117275785, + "learning_rate": 3.5731095416842526e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3798 + }, + { + "epoch": 0.36534115497427516, + "grad_norm": 1.6627705105508936, + "learning_rate": 3.5724205225186137e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3799 + }, + { + "epoch": 0.3654373226907727, + "grad_norm": 2.1556035162291134, + "learning_rate": 3.5717314035076355e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3800 + }, + { + "epoch": 0.3655334904072703, + "grad_norm": 1.764998778791862, + "learning_rate": 3.5710421847154797e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3801 + }, + { + "epoch": 0.36562965812376785, + "grad_norm": 1.655452208541358, + "learning_rate": 3.5703528662063123e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3802 + }, + { + "epoch": 0.3657258258402654, + "grad_norm": 1.5648684927811654, + "learning_rate": 3.569663448044313e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3803 + }, + { + "epoch": 0.365821993556763, + "grad_norm": 1.4507207607650596, + "learning_rate": 3.5689739302936656e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3804 + }, + { + "epoch": 0.36591816127326054, + "grad_norm": 2.9879791063763923, + "learning_rate": 3.568284313018569e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3805 + }, + { + "epoch": 0.36601432898975816, + "grad_norm": 1.8160579709982412, + "learning_rate": 3.5675945962832265e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3806 + }, + { + "epoch": 0.36611049670625573, + "grad_norm": 1.8340223094933041, + "learning_rate": 3.566904780151854e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3807 + }, + { + "epoch": 0.3662066644227533, + "grad_norm": 2.0815634355679062, + "learning_rate": 3.566214864688674e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3808 + }, + { + "epoch": 0.36630283213925086, + "grad_norm": 1.7216922548146552, + "learning_rate": 3.565524849957921e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3809 + }, + { + "epoch": 0.3663989998557484, + "grad_norm": 2.7697710789583, + "learning_rate": 3.5648347360238367e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3810 + }, + { + "epoch": 0.366495167572246, + "grad_norm": 1.8642764542606438, + "learning_rate": 3.5641445229506715e-06, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3811 + }, + { + "epoch": 0.36659133528874355, + "grad_norm": 1.8258461193464828, + "learning_rate": 3.5634542108026878e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3812 + }, + { + "epoch": 0.36668750300524117, + "grad_norm": 1.8728996400064726, + "learning_rate": 3.5627637996441542e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3813 + }, + { + "epoch": 0.36678367072173873, + "grad_norm": 1.8136065061664952, + "learning_rate": 3.5620732895393518e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3814 + }, + { + "epoch": 0.3668798384382363, + "grad_norm": 3.256652960025018, + "learning_rate": 3.5613826805525664e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3815 + }, + { + "epoch": 0.36697600615473386, + "grad_norm": 1.8532361414635412, + "learning_rate": 3.5606919727480984e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3816 + }, + { + "epoch": 0.3670721738712314, + "grad_norm": 1.5963166219130767, + "learning_rate": 3.560001166190252e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3817 + }, + { + "epoch": 0.367168341587729, + "grad_norm": 2.3619163186154206, + "learning_rate": 3.5593102609433456e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3818 + }, + { + "epoch": 0.36726450930422655, + "grad_norm": 2.7462141298878837, + "learning_rate": 3.5586192570717026e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3819 + }, + { + "epoch": 0.36736067702072417, + "grad_norm": 1.7101061450542525, + "learning_rate": 3.5579281546396582e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3820 + }, + { + "epoch": 0.36745684473722173, + "grad_norm": 2.2585971843317836, + "learning_rate": 3.5572369537115565e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3821 + }, + { + "epoch": 0.3675530124537193, + "grad_norm": 1.5513772758150794, + "learning_rate": 3.556545654351749e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3822 + }, + { + "epoch": 0.36764918017021686, + "grad_norm": 5.032535519465666, + "learning_rate": 3.555854256624598e-06, + "loss": 0.1413, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3823 + }, + { + "epoch": 0.3677453478867144, + "grad_norm": 3.0191530638875586, + "learning_rate": 3.555162760594475e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3824 + }, + { + "epoch": 0.367841515603212, + "grad_norm": 2.367894940798602, + "learning_rate": 3.5544711663257602e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3825 + }, + { + "epoch": 0.36793768331970955, + "grad_norm": 1.7920636399649406, + "learning_rate": 3.5537794738828423e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3826 + }, + { + "epoch": 0.36803385103620717, + "grad_norm": 1.7604960228234707, + "learning_rate": 3.5530876833301203e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3827 + }, + { + "epoch": 0.36813001875270474, + "grad_norm": 2.156192114566903, + "learning_rate": 3.5523957947320015e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3828 + }, + { + "epoch": 0.3682261864692023, + "grad_norm": 3.199620322692651, + "learning_rate": 3.5517038081529026e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3829 + }, + { + "epoch": 0.36832235418569986, + "grad_norm": 1.9512850416327887, + "learning_rate": 3.5510117236572504e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3830 + }, + { + "epoch": 0.3684185219021974, + "grad_norm": 2.96579710714061, + "learning_rate": 3.550319541309479e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3831 + }, + { + "epoch": 0.368514689618695, + "grad_norm": 2.359850937681529, + "learning_rate": 3.549627261174032e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3832 + }, + { + "epoch": 0.36861085733519255, + "grad_norm": 1.809638530432946, + "learning_rate": 3.548934883315365e-06, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3833 + }, + { + "epoch": 0.3687070250516902, + "grad_norm": 2.0353770903111252, + "learning_rate": 3.548242407797937e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3834 + }, + { + "epoch": 0.36880319276818774, + "grad_norm": 1.899397572121076, + "learning_rate": 3.547549834686222e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3835 + }, + { + "epoch": 0.3688993604846853, + "grad_norm": 1.7632536213961623, + "learning_rate": 3.546857164044699e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3836 + }, + { + "epoch": 0.36899552820118287, + "grad_norm": 2.2408562143350323, + "learning_rate": 3.546164395937858e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3837 + }, + { + "epoch": 0.36909169591768043, + "grad_norm": 1.5966356163738895, + "learning_rate": 3.5454715304301983e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3838 + }, + { + "epoch": 0.369187863634178, + "grad_norm": 1.5276807706779456, + "learning_rate": 3.5447785675862266e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3839 + }, + { + "epoch": 0.36928403135067556, + "grad_norm": 1.5030235537331722, + "learning_rate": 3.54408550747046e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3840 + }, + { + "epoch": 0.3693801990671732, + "grad_norm": 1.3583020016547154, + "learning_rate": 3.5433923501474255e-06, + "loss": 0.1091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3841 + }, + { + "epoch": 0.36947636678367074, + "grad_norm": 3.043157683644244, + "learning_rate": 3.5426990956816563e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3842 + }, + { + "epoch": 0.3695725345001683, + "grad_norm": 1.567528208781221, + "learning_rate": 3.5420057441376964e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3843 + }, + { + "epoch": 0.36966870221666587, + "grad_norm": 1.74958782565891, + "learning_rate": 3.5413122955801004e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3844 + }, + { + "epoch": 0.36976486993316343, + "grad_norm": 1.88360585187046, + "learning_rate": 3.540618750073428e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3845 + }, + { + "epoch": 0.369861037649661, + "grad_norm": 1.576351906837215, + "learning_rate": 3.5399251076822515e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3846 + }, + { + "epoch": 0.36995720536615856, + "grad_norm": 1.6691516607980925, + "learning_rate": 3.539231368471152e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3847 + }, + { + "epoch": 0.3700533730826562, + "grad_norm": 1.5110678869429726, + "learning_rate": 3.5385375325047167e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3848 + }, + { + "epoch": 0.37014954079915374, + "grad_norm": 1.5805567879719173, + "learning_rate": 3.5378435998475447e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3849 + }, + { + "epoch": 0.3702457085156513, + "grad_norm": 1.767249827663855, + "learning_rate": 3.5371495705642417e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3850 + }, + { + "epoch": 0.37034187623214887, + "grad_norm": 2.202357423366731, + "learning_rate": 3.5364554447194254e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3851 + }, + { + "epoch": 0.37043804394864643, + "grad_norm": 1.9615240810287866, + "learning_rate": 3.5357612223777206e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3852 + }, + { + "epoch": 0.370534211665144, + "grad_norm": 1.3957479966596154, + "learning_rate": 3.5350669036037606e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3853 + }, + { + "epoch": 0.37063037938164156, + "grad_norm": 1.362802179804993, + "learning_rate": 3.5343724884621888e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3854 + }, + { + "epoch": 0.3707265470981392, + "grad_norm": 1.7504752253573284, + "learning_rate": 3.533677977017658e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3855 + }, + { + "epoch": 0.37082271481463674, + "grad_norm": 1.4025890640173093, + "learning_rate": 3.532983369334827e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3856 + }, + { + "epoch": 0.3709188825311343, + "grad_norm": 2.3523554907831015, + "learning_rate": 3.5322886654783677e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3857 + }, + { + "epoch": 0.3710150502476319, + "grad_norm": 1.591648139415688, + "learning_rate": 3.531593865512958e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3858 + }, + { + "epoch": 0.37111121796412944, + "grad_norm": 1.8258903250356469, + "learning_rate": 3.530898969503287e-06, + "loss": 0.1742, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3859 + }, + { + "epoch": 0.371207385680627, + "grad_norm": 1.513000229169696, + "learning_rate": 3.530203977514049e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3860 + }, + { + "epoch": 0.37130355339712456, + "grad_norm": 1.883459235545708, + "learning_rate": 3.529508889609952e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3861 + }, + { + "epoch": 0.3713997211136222, + "grad_norm": 2.28523049781746, + "learning_rate": 3.52881370585571e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3862 + }, + { + "epoch": 0.37149588883011975, + "grad_norm": 2.452355787633517, + "learning_rate": 3.528118426316046e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3863 + }, + { + "epoch": 0.3715920565466173, + "grad_norm": 1.6875633080527705, + "learning_rate": 3.527423051055692e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3864 + }, + { + "epoch": 0.3716882242631149, + "grad_norm": 1.840770465194262, + "learning_rate": 3.526727580139391e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3865 + }, + { + "epoch": 0.37178439197961244, + "grad_norm": 3.54573076442055, + "learning_rate": 3.5260320136318927e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3866 + }, + { + "epoch": 0.37188055969611, + "grad_norm": 2.7751398348790635, + "learning_rate": 3.5253363515979555e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3867 + }, + { + "epoch": 0.37197672741260757, + "grad_norm": 1.9815365432297194, + "learning_rate": 3.5246405941023476e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3868 + }, + { + "epoch": 0.3720728951291052, + "grad_norm": 3.514980084615635, + "learning_rate": 3.523944741209847e-06, + "loss": 0.1659, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3869 + }, + { + "epoch": 0.37216906284560275, + "grad_norm": 1.9335983985730496, + "learning_rate": 3.5232487929852386e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3870 + }, + { + "epoch": 0.3722652305621003, + "grad_norm": 3.9998154943420943, + "learning_rate": 3.5225527494933175e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3871 + }, + { + "epoch": 0.3723613982785979, + "grad_norm": 2.01763027066919, + "learning_rate": 3.5218566107988872e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3872 + }, + { + "epoch": 0.37245756599509544, + "grad_norm": 2.8641080299864923, + "learning_rate": 3.5211603769667603e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3873 + }, + { + "epoch": 0.372553733711593, + "grad_norm": 3.552533869839717, + "learning_rate": 3.520464048061758e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3874 + }, + { + "epoch": 0.37264990142809057, + "grad_norm": 1.7438787700499527, + "learning_rate": 3.51976762414871e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3875 + }, + { + "epoch": 0.3727460691445882, + "grad_norm": 1.8514401709760486, + "learning_rate": 3.519071105292456e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3876 + }, + { + "epoch": 0.37284223686108575, + "grad_norm": 1.5834399623479702, + "learning_rate": 3.518374491557844e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3877 + }, + { + "epoch": 0.3729384045775833, + "grad_norm": 1.2604428088095887, + "learning_rate": 3.5176777830097292e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3878 + }, + { + "epoch": 0.3730345722940809, + "grad_norm": 2.4880521290716624, + "learning_rate": 3.5169809797129782e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3879 + }, + { + "epoch": 0.37313074001057844, + "grad_norm": 3.110803237201221, + "learning_rate": 3.516284081732466e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3880 + }, + { + "epoch": 0.373226907727076, + "grad_norm": 3.8851029344939874, + "learning_rate": 3.515587089133075e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3881 + }, + { + "epoch": 0.37332307544357357, + "grad_norm": 1.537545442750314, + "learning_rate": 3.5148900019796966e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3882 + }, + { + "epoch": 0.3734192431600712, + "grad_norm": 1.4434163881715352, + "learning_rate": 3.5141928203372326e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3883 + }, + { + "epoch": 0.37351541087656875, + "grad_norm": 1.5705212376587816, + "learning_rate": 3.513495544270592e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3884 + }, + { + "epoch": 0.3736115785930663, + "grad_norm": 2.2171176633679446, + "learning_rate": 3.512798173844693e-06, + "loss": 0.1594, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3885 + }, + { + "epoch": 0.3737077463095639, + "grad_norm": 1.4947246101606588, + "learning_rate": 3.5121007091244636e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3886 + }, + { + "epoch": 0.37380391402606145, + "grad_norm": 2.398931573070145, + "learning_rate": 3.511403150174838e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3887 + }, + { + "epoch": 0.373900081742559, + "grad_norm": 1.7756546404472755, + "learning_rate": 3.5107054970607624e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3888 + }, + { + "epoch": 0.3739962494590566, + "grad_norm": 1.9285561443394605, + "learning_rate": 3.51000774984719e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3889 + }, + { + "epoch": 0.3740924171755542, + "grad_norm": 1.6853415317094307, + "learning_rate": 3.5093099085990826e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3890 + }, + { + "epoch": 0.37418858489205176, + "grad_norm": 2.0926349942931886, + "learning_rate": 3.508611973381412e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3891 + }, + { + "epoch": 0.3742847526085493, + "grad_norm": 3.147646493233957, + "learning_rate": 3.507913944259157e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3892 + }, + { + "epoch": 0.3743809203250469, + "grad_norm": 2.1454363950505755, + "learning_rate": 3.507215821297306e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3893 + }, + { + "epoch": 0.37447708804154445, + "grad_norm": 2.194157153062541, + "learning_rate": 3.5065176045608575e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3894 + }, + { + "epoch": 0.374573255758042, + "grad_norm": 1.5869224340326906, + "learning_rate": 3.505819294114815e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3895 + }, + { + "epoch": 0.3746694234745396, + "grad_norm": 4.255822391751172, + "learning_rate": 3.505120890024195e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3896 + }, + { + "epoch": 0.3747655911910372, + "grad_norm": 2.7792681554478627, + "learning_rate": 3.504422392354021e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3897 + }, + { + "epoch": 0.37486175890753476, + "grad_norm": 2.42887888808134, + "learning_rate": 3.503723801169324e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3898 + }, + { + "epoch": 0.3749579266240323, + "grad_norm": 2.4544103366615144, + "learning_rate": 3.5030251165351446e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3899 + }, + { + "epoch": 0.3750540943405299, + "grad_norm": 4.330701896979695, + "learning_rate": 3.5023263385165346e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3900 + }, + { + "epoch": 0.37515026205702745, + "grad_norm": 2.668073009061238, + "learning_rate": 3.5016274671785497e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3901 + }, + { + "epoch": 0.375246429773525, + "grad_norm": 3.0743365587374685, + "learning_rate": 3.500928502586258e-06, + "loss": 0.1865, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3902 + }, + { + "epoch": 0.3753425974900226, + "grad_norm": 1.779847993928931, + "learning_rate": 3.5002294448047347e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3903 + }, + { + "epoch": 0.3754387652065202, + "grad_norm": 2.3620135214514955, + "learning_rate": 3.499530293899064e-06, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3904 + }, + { + "epoch": 0.37553493292301776, + "grad_norm": 4.127995761161169, + "learning_rate": 3.4988310499343385e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3905 + }, + { + "epoch": 0.3756311006395153, + "grad_norm": 3.4133180226645146, + "learning_rate": 3.4981317129756603e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3906 + }, + { + "epoch": 0.3757272683560129, + "grad_norm": 1.4777963195414092, + "learning_rate": 3.4974322830881398e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3907 + }, + { + "epoch": 0.37582343607251045, + "grad_norm": 1.5349866757160482, + "learning_rate": 3.496732760336895e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3908 + }, + { + "epoch": 0.375919603789008, + "grad_norm": 1.554448872684915, + "learning_rate": 3.4960331447870546e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3909 + }, + { + "epoch": 0.3760157715055056, + "grad_norm": 2.139368842594886, + "learning_rate": 3.495333436503753e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3910 + }, + { + "epoch": 0.3761119392220032, + "grad_norm": 1.6394847689704493, + "learning_rate": 3.4946336355521373e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3911 + }, + { + "epoch": 0.37620810693850076, + "grad_norm": 1.4183542720239823, + "learning_rate": 3.4939337419973584e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3912 + }, + { + "epoch": 0.3763042746549983, + "grad_norm": 1.7192385996024349, + "learning_rate": 3.49323375590458e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3913 + }, + { + "epoch": 0.3764004423714959, + "grad_norm": 1.4616357378618756, + "learning_rate": 3.492533677338973e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3914 + }, + { + "epoch": 0.37649661008799346, + "grad_norm": 3.6748929100966383, + "learning_rate": 3.491833506365715e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3915 + }, + { + "epoch": 0.376592777804491, + "grad_norm": 2.1665144480098575, + "learning_rate": 3.491133243049995e-06, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3916 + }, + { + "epoch": 0.3766889455209886, + "grad_norm": 2.24441285869291, + "learning_rate": 3.4904328874570103e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3917 + }, + { + "epoch": 0.3767851132374862, + "grad_norm": 2.4184409687508364, + "learning_rate": 3.4897324396519637e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3918 + }, + { + "epoch": 0.37688128095398377, + "grad_norm": 1.8389070160656975, + "learning_rate": 3.4890318997000716e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3919 + }, + { + "epoch": 0.37697744867048133, + "grad_norm": 2.8088997305986445, + "learning_rate": 3.4883312676665537e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3920 + }, + { + "epoch": 0.3770736163869789, + "grad_norm": 1.9364860230121044, + "learning_rate": 3.4876305436166423e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3921 + }, + { + "epoch": 0.37716978410347646, + "grad_norm": 2.195227130784086, + "learning_rate": 3.4869297276155765e-06, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3922 + }, + { + "epoch": 0.377265951819974, + "grad_norm": 2.1361321005886182, + "learning_rate": 3.4862288197286036e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3923 + }, + { + "epoch": 0.3773621195364716, + "grad_norm": 2.17176409489351, + "learning_rate": 3.4855278200209815e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3924 + }, + { + "epoch": 0.3774582872529692, + "grad_norm": 2.6827175248233566, + "learning_rate": 3.4848267285579733e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3925 + }, + { + "epoch": 0.37755445496946677, + "grad_norm": 1.6695393026894392, + "learning_rate": 3.484125545404854e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3926 + }, + { + "epoch": 0.37765062268596433, + "grad_norm": 3.195069931530337, + "learning_rate": 3.4834242706269045e-06, + "loss": 0.185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3927 + }, + { + "epoch": 0.3777467904024619, + "grad_norm": 1.4659303595160595, + "learning_rate": 3.4827229042894174e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3928 + }, + { + "epoch": 0.37784295811895946, + "grad_norm": 2.539969158521965, + "learning_rate": 3.4820214464576895e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3929 + }, + { + "epoch": 0.377939125835457, + "grad_norm": 1.894003226045691, + "learning_rate": 3.4813198971970297e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3930 + }, + { + "epoch": 0.3780352935519546, + "grad_norm": 1.8913429581548342, + "learning_rate": 3.4806182565727547e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3931 + }, + { + "epoch": 0.3781314612684522, + "grad_norm": 1.6250959524135782, + "learning_rate": 3.479916524650188e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3932 + }, + { + "epoch": 0.37822762898494977, + "grad_norm": 2.09760408365385, + "learning_rate": 3.4792147014946642e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3933 + }, + { + "epoch": 0.37832379670144733, + "grad_norm": 1.7206284593849979, + "learning_rate": 3.478512787171523e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3934 + }, + { + "epoch": 0.3784199644179449, + "grad_norm": 1.7037406506262802, + "learning_rate": 3.4778107817461158e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3935 + }, + { + "epoch": 0.37851613213444246, + "grad_norm": 1.568956940026115, + "learning_rate": 3.477108685283803e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3936 + }, + { + "epoch": 0.37861229985094, + "grad_norm": 1.7429937998834575, + "learning_rate": 3.476406497849948e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3937 + }, + { + "epoch": 0.3787084675674376, + "grad_norm": 1.7670742264197947, + "learning_rate": 3.475704219509929e-06, + "loss": 0.1748, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3938 + }, + { + "epoch": 0.3788046352839352, + "grad_norm": 1.5836367754136926, + "learning_rate": 3.47500185032913e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3939 + }, + { + "epoch": 0.3789008030004328, + "grad_norm": 2.431437059857243, + "learning_rate": 3.4742993903729423e-06, + "loss": 0.1715, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3940 + }, + { + "epoch": 0.37899697071693034, + "grad_norm": 3.01891680646217, + "learning_rate": 3.473596839706768e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3941 + }, + { + "epoch": 0.3790931384334279, + "grad_norm": 1.49199788246084, + "learning_rate": 3.4728941983960156e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3942 + }, + { + "epoch": 0.37918930614992546, + "grad_norm": 2.230630055142937, + "learning_rate": 3.4721914665061036e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3943 + }, + { + "epoch": 0.37928547386642303, + "grad_norm": 3.527955601439248, + "learning_rate": 3.4714886441024576e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3944 + }, + { + "epoch": 0.3793816415829206, + "grad_norm": 2.277593596019612, + "learning_rate": 3.4707857312505137e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3945 + }, + { + "epoch": 0.3794778092994182, + "grad_norm": 3.0646903035077395, + "learning_rate": 3.4700827280157134e-06, + "loss": 0.1593, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3946 + }, + { + "epoch": 0.3795739770159158, + "grad_norm": 1.5899021842653667, + "learning_rate": 3.46937963446351e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3947 + }, + { + "epoch": 0.37967014473241334, + "grad_norm": 2.355504692566089, + "learning_rate": 3.4686764506593616e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3948 + }, + { + "epoch": 0.3797663124489109, + "grad_norm": 2.5327725387685582, + "learning_rate": 3.467973176668738e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3949 + }, + { + "epoch": 0.37986248016540847, + "grad_norm": 2.6194830683376438, + "learning_rate": 3.467269812557115e-06, + "loss": 0.1612, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3950 + }, + { + "epoch": 0.37995864788190603, + "grad_norm": 1.7923398260709127, + "learning_rate": 3.4665663583899788e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3951 + }, + { + "epoch": 0.3800548155984036, + "grad_norm": 2.9850979827416073, + "learning_rate": 3.4658628142328215e-06, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3952 + }, + { + "epoch": 0.3801509833149012, + "grad_norm": 2.384213312193729, + "learning_rate": 3.465159180151147e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3953 + }, + { + "epoch": 0.3802471510313988, + "grad_norm": 1.8629469849716849, + "learning_rate": 3.4644554562104638e-06, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3954 + }, + { + "epoch": 0.38034331874789634, + "grad_norm": 1.9434454955046747, + "learning_rate": 3.4637516424762908e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3955 + }, + { + "epoch": 0.3804394864643939, + "grad_norm": 3.9526756836154435, + "learning_rate": 3.463047739014156e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3956 + }, + { + "epoch": 0.38053565418089147, + "grad_norm": 2.0045800084266263, + "learning_rate": 3.4623437458895947e-06, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3957 + }, + { + "epoch": 0.38063182189738903, + "grad_norm": 3.3041448208285846, + "learning_rate": 3.461639663168149e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3958 + }, + { + "epoch": 0.3807279896138866, + "grad_norm": 1.8155628641446337, + "learning_rate": 3.4609354909153736e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3959 + }, + { + "epoch": 0.3808241573303842, + "grad_norm": 3.310168069401396, + "learning_rate": 3.460231229196826e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3960 + }, + { + "epoch": 0.3809203250468818, + "grad_norm": 4.199049541611783, + "learning_rate": 3.4595268780780773e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3961 + }, + { + "epoch": 0.38101649276337934, + "grad_norm": 3.4982929021081115, + "learning_rate": 3.4588224376247027e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3962 + }, + { + "epoch": 0.3811126604798769, + "grad_norm": 1.6880986575055477, + "learning_rate": 3.4581179079022885e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3963 + }, + { + "epoch": 0.38120882819637447, + "grad_norm": 1.3780800353298317, + "learning_rate": 3.4574132889764287e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3964 + }, + { + "epoch": 0.38130499591287204, + "grad_norm": 1.6005653652339848, + "learning_rate": 3.4567085809127247e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3965 + }, + { + "epoch": 0.3814011636293696, + "grad_norm": 2.3033751475185418, + "learning_rate": 3.4560037837767867e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3966 + }, + { + "epoch": 0.3814973313458672, + "grad_norm": 2.8299038518706556, + "learning_rate": 3.4552988976342344e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3967 + }, + { + "epoch": 0.3815934990623648, + "grad_norm": 3.2550862333699038, + "learning_rate": 3.4545939225506935e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3968 + }, + { + "epoch": 0.38168966677886235, + "grad_norm": 1.9028982492331603, + "learning_rate": 3.453888858591799e-06, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3969 + }, + { + "epoch": 0.3817858344953599, + "grad_norm": 2.7515117359668744, + "learning_rate": 3.4531837058231952e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3970 + }, + { + "epoch": 0.3818820022118575, + "grad_norm": 1.5250283468377674, + "learning_rate": 3.4524784643105335e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3971 + }, + { + "epoch": 0.38197816992835504, + "grad_norm": 3.587538838109574, + "learning_rate": 3.451773134119474e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3972 + }, + { + "epoch": 0.3820743376448526, + "grad_norm": 4.4848295178858235, + "learning_rate": 3.4510677153156834e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3973 + }, + { + "epoch": 0.3821705053613502, + "grad_norm": 5.003433218247958, + "learning_rate": 3.4503622079648407e-06, + "loss": 0.1921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3974 + }, + { + "epoch": 0.3822666730778478, + "grad_norm": 3.7149215308802837, + "learning_rate": 3.449656612132629e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3975 + }, + { + "epoch": 0.38236284079434535, + "grad_norm": 2.971244410655715, + "learning_rate": 3.4489509278847415e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3976 + }, + { + "epoch": 0.3824590085108429, + "grad_norm": 1.9602668003644752, + "learning_rate": 3.448245155286879e-06, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3977 + }, + { + "epoch": 0.3825551762273405, + "grad_norm": 1.6725567834625004, + "learning_rate": 3.4475392944047514e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3978 + }, + { + "epoch": 0.38265134394383804, + "grad_norm": 4.821605185727039, + "learning_rate": 3.4468333453040763e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3979 + }, + { + "epoch": 0.3827475116603356, + "grad_norm": 6.039475490112973, + "learning_rate": 3.446127308050579e-06, + "loss": 0.178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3980 + }, + { + "epoch": 0.3828436793768332, + "grad_norm": 4.76285919649256, + "learning_rate": 3.445421182709995e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3981 + }, + { + "epoch": 0.3829398470933308, + "grad_norm": 3.349114208624762, + "learning_rate": 3.4447149693480647e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3982 + }, + { + "epoch": 0.38303601480982835, + "grad_norm": 3.4264338837856396, + "learning_rate": 3.4440086680305394e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3983 + }, + { + "epoch": 0.3831321825263259, + "grad_norm": 2.0830295024037557, + "learning_rate": 3.443302278823178e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3984 + }, + { + "epoch": 0.3832283502428235, + "grad_norm": 3.5830462206782796, + "learning_rate": 3.4425958017917465e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3985 + }, + { + "epoch": 0.38332451795932104, + "grad_norm": 6.192295803552633, + "learning_rate": 3.441889237002021e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3986 + }, + { + "epoch": 0.3834206856758186, + "grad_norm": 4.785102148097483, + "learning_rate": 3.441182584519783e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3987 + }, + { + "epoch": 0.3835168533923162, + "grad_norm": 6.969064028346616, + "learning_rate": 3.4404758444108253e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3988 + }, + { + "epoch": 0.3836130211088138, + "grad_norm": 3.0779834354994255, + "learning_rate": 3.4397690167409475e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3989 + }, + { + "epoch": 0.38370918882531135, + "grad_norm": 2.474735049089316, + "learning_rate": 3.439062101575956e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3990 + }, + { + "epoch": 0.3838053565418089, + "grad_norm": 1.5003365878594332, + "learning_rate": 3.438355098981667e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3991 + }, + { + "epoch": 0.3839015242583065, + "grad_norm": 1.864305839277102, + "learning_rate": 3.437648009023905e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3992 + }, + { + "epoch": 0.38399769197480405, + "grad_norm": 3.9910472034187454, + "learning_rate": 3.4369408317685016e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3993 + }, + { + "epoch": 0.3840938596913016, + "grad_norm": 3.9557966172399137, + "learning_rate": 3.4362335672812964e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3994 + }, + { + "epoch": 0.38419002740779923, + "grad_norm": 2.5437853925862814, + "learning_rate": 3.4355262156281388e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3995 + }, + { + "epoch": 0.3842861951242968, + "grad_norm": 1.5105658573425405, + "learning_rate": 3.4348187768748847e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3996 + }, + { + "epoch": 0.38438236284079436, + "grad_norm": 1.6492101063947386, + "learning_rate": 3.4341112510873983e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3997 + }, + { + "epoch": 0.3844785305572919, + "grad_norm": 1.8659447310604926, + "learning_rate": 3.4334036383315527e-06, + "loss": 0.1631, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3998 + }, + { + "epoch": 0.3845746982737895, + "grad_norm": 2.340938265282556, + "learning_rate": 3.432695938673228e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 3999 + }, + { + "epoch": 0.38467086599028705, + "grad_norm": 2.398156206021967, + "learning_rate": 3.431988152178315e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4000 + }, + { + "epoch": 0.3847670337067846, + "grad_norm": 3.4390054751037242, + "learning_rate": 3.4312802789127075e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4001 + }, + { + "epoch": 0.38486320142328223, + "grad_norm": 1.3979013623488372, + "learning_rate": 3.4305723189423123e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4002 + }, + { + "epoch": 0.3849593691397798, + "grad_norm": 3.241804420955442, + "learning_rate": 3.4298642723330427e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4003 + }, + { + "epoch": 0.38505553685627736, + "grad_norm": 1.7336014304073573, + "learning_rate": 3.429156139150819e-06, + "loss": 0.169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4004 + }, + { + "epoch": 0.3851517045727749, + "grad_norm": 2.6439449433843922, + "learning_rate": 3.4284479194615706e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4005 + }, + { + "epoch": 0.3852478722892725, + "grad_norm": 1.4933097214165725, + "learning_rate": 3.427739613331235e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4006 + }, + { + "epoch": 0.38534404000577005, + "grad_norm": 2.0616738799831174, + "learning_rate": 3.4270312208257573e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4007 + }, + { + "epoch": 0.3854402077222676, + "grad_norm": 2.6039598777058757, + "learning_rate": 3.4263227420110905e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4008 + }, + { + "epoch": 0.38553637543876523, + "grad_norm": 2.280181741452553, + "learning_rate": 3.425614176953197e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4009 + }, + { + "epoch": 0.3856325431552628, + "grad_norm": 1.2104269685582423, + "learning_rate": 3.4249055257180443e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4010 + }, + { + "epoch": 0.38572871087176036, + "grad_norm": 1.3631037155596124, + "learning_rate": 3.424196788371611e-06, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4011 + }, + { + "epoch": 0.3858248785882579, + "grad_norm": 2.4671973173376665, + "learning_rate": 3.4234879649798836e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4012 + }, + { + "epoch": 0.3859210463047555, + "grad_norm": 1.3886876751791715, + "learning_rate": 3.4227790556088532e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4013 + }, + { + "epoch": 0.38601721402125305, + "grad_norm": 1.5858869621285003, + "learning_rate": 3.4220700603245225e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4014 + }, + { + "epoch": 0.3861133817377506, + "grad_norm": 1.669296267248191, + "learning_rate": 3.4213609791929016e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4015 + }, + { + "epoch": 0.38620954945424824, + "grad_norm": 1.683132181284683, + "learning_rate": 3.420651812280006e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4016 + }, + { + "epoch": 0.3863057171707458, + "grad_norm": 1.4407913620749824, + "learning_rate": 3.419942559651863e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4017 + }, + { + "epoch": 0.38640188488724336, + "grad_norm": 5.093628825038953, + "learning_rate": 3.4192332213745054e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4018 + }, + { + "epoch": 0.3864980526037409, + "grad_norm": 2.805982342030978, + "learning_rate": 3.418523797513974e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4019 + }, + { + "epoch": 0.3865942203202385, + "grad_norm": 3.005479918937827, + "learning_rate": 3.4178142881363192e-06, + "loss": 0.213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4020 + }, + { + "epoch": 0.38669038803673605, + "grad_norm": 3.1569165211222927, + "learning_rate": 3.4171046933075973e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4021 + }, + { + "epoch": 0.3867865557532336, + "grad_norm": 1.6055871355360785, + "learning_rate": 3.4163950130938737e-06, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4022 + }, + { + "epoch": 0.38688272346973124, + "grad_norm": 3.553067009448617, + "learning_rate": 3.415685247561222e-06, + "loss": 0.1866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4023 + }, + { + "epoch": 0.3869788911862288, + "grad_norm": 2.6290887624185064, + "learning_rate": 3.414975396775724e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4024 + }, + { + "epoch": 0.38707505890272637, + "grad_norm": 1.492257101647212, + "learning_rate": 3.414265460803467e-06, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4025 + }, + { + "epoch": 0.38717122661922393, + "grad_norm": 2.6193340823646527, + "learning_rate": 3.4135554397105497e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4026 + }, + { + "epoch": 0.3872673943357215, + "grad_norm": 2.091782656660269, + "learning_rate": 3.4128453335630755e-06, + "loss": 0.1682, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4027 + }, + { + "epoch": 0.38736356205221906, + "grad_norm": 1.9945423776175242, + "learning_rate": 3.4121351424271593e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4028 + }, + { + "epoch": 0.3874597297687166, + "grad_norm": 2.2822603179809655, + "learning_rate": 3.411424866368919e-06, + "loss": 0.1842, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4029 + }, + { + "epoch": 0.38755589748521424, + "grad_norm": 1.6967609026766797, + "learning_rate": 3.410714505454486e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4030 + }, + { + "epoch": 0.3876520652017118, + "grad_norm": 1.8470896782423878, + "learning_rate": 3.410004059749996e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4031 + }, + { + "epoch": 0.38774823291820937, + "grad_norm": 1.5685446033621189, + "learning_rate": 3.409293529321593e-06, + "loss": 0.1668, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4032 + }, + { + "epoch": 0.38784440063470693, + "grad_norm": 1.6405857333499965, + "learning_rate": 3.4085829142354293e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4033 + }, + { + "epoch": 0.3879405683512045, + "grad_norm": 1.2613090811057692, + "learning_rate": 3.407872214557666e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4034 + }, + { + "epoch": 0.38803673606770206, + "grad_norm": 1.8950523104877892, + "learning_rate": 3.4071614303544704e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4035 + }, + { + "epoch": 0.3881329037841996, + "grad_norm": 2.1387247597165304, + "learning_rate": 3.4064505616920183e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4036 + }, + { + "epoch": 0.38822907150069724, + "grad_norm": 1.2838074578623293, + "learning_rate": 3.4057396086364957e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4037 + }, + { + "epoch": 0.3883252392171948, + "grad_norm": 1.614562082555579, + "learning_rate": 3.405028571254091e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4038 + }, + { + "epoch": 0.38842140693369237, + "grad_norm": 1.626817130304867, + "learning_rate": 3.4043174496110066e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4039 + }, + { + "epoch": 0.38851757465018993, + "grad_norm": 1.9520491857096545, + "learning_rate": 3.4036062437734484e-06, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4040 + }, + { + "epoch": 0.3886137423666875, + "grad_norm": 1.684774586997172, + "learning_rate": 3.402894953807632e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4041 + }, + { + "epoch": 0.38870991008318506, + "grad_norm": 1.9907852027386954, + "learning_rate": 3.4021835797797807e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4042 + }, + { + "epoch": 0.3888060777996826, + "grad_norm": 1.8206183878424589, + "learning_rate": 3.4014721217561247e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4043 + }, + { + "epoch": 0.38890224551618024, + "grad_norm": 2.1280674870995795, + "learning_rate": 3.400760579802903e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4044 + }, + { + "epoch": 0.3889984132326778, + "grad_norm": 1.9434026352564602, + "learning_rate": 3.4000489539863627e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4045 + }, + { + "epoch": 0.3890945809491754, + "grad_norm": 1.8957530793434683, + "learning_rate": 3.399337244372758e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4046 + }, + { + "epoch": 0.38919074866567294, + "grad_norm": 1.5394189022666267, + "learning_rate": 3.3986254510283502e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4047 + }, + { + "epoch": 0.3892869163821705, + "grad_norm": 1.6927443253583776, + "learning_rate": 3.39791357401941e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4048 + }, + { + "epoch": 0.38938308409866806, + "grad_norm": 2.1730535826375523, + "learning_rate": 3.3972016134122154e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4049 + }, + { + "epoch": 0.38947925181516563, + "grad_norm": 1.6751764619657172, + "learning_rate": 3.396489569273051e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4050 + }, + { + "epoch": 0.38957541953166325, + "grad_norm": 1.6863233040447665, + "learning_rate": 3.3957774416682116e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4051 + }, + { + "epoch": 0.3896715872481608, + "grad_norm": 2.680531366558751, + "learning_rate": 3.3950652306639965e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4052 + }, + { + "epoch": 0.3897677549646584, + "grad_norm": 1.5572827869490409, + "learning_rate": 3.3943529363267146e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4053 + }, + { + "epoch": 0.38986392268115594, + "grad_norm": 2.2436590231006823, + "learning_rate": 3.3936405587226847e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4054 + }, + { + "epoch": 0.3899600903976535, + "grad_norm": 1.573473891838622, + "learning_rate": 3.392928097918229e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4055 + }, + { + "epoch": 0.39005625811415107, + "grad_norm": 1.590626677686798, + "learning_rate": 3.39221555397968e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4056 + }, + { + "epoch": 0.39015242583064863, + "grad_norm": 1.4141361037368128, + "learning_rate": 3.391502926973378e-06, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4057 + }, + { + "epoch": 0.39024859354714625, + "grad_norm": 1.6157051076096856, + "learning_rate": 3.3907902169656704e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4058 + }, + { + "epoch": 0.3903447612636438, + "grad_norm": 1.7744998253990412, + "learning_rate": 3.3900774240229125e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4059 + }, + { + "epoch": 0.3904409289801414, + "grad_norm": 1.6397196312602065, + "learning_rate": 3.3893645482114663e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4060 + }, + { + "epoch": 0.39053709669663894, + "grad_norm": 1.9801432695458283, + "learning_rate": 3.3886515895977036e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4061 + }, + { + "epoch": 0.3906332644131365, + "grad_norm": 1.3858665736798657, + "learning_rate": 3.387938548248003e-06, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4062 + }, + { + "epoch": 0.39072943212963407, + "grad_norm": 1.89193266850798, + "learning_rate": 3.3872254242287504e-06, + "loss": 0.1641, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4063 + }, + { + "epoch": 0.39082559984613163, + "grad_norm": 1.7653739379208442, + "learning_rate": 3.386512217606339e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4064 + }, + { + "epoch": 0.39092176756262925, + "grad_norm": 1.5563266432684622, + "learning_rate": 3.3857989284471713e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4065 + }, + { + "epoch": 0.3910179352791268, + "grad_norm": 1.4464010767877802, + "learning_rate": 3.3850855568176567e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4066 + }, + { + "epoch": 0.3911141029956244, + "grad_norm": 1.5440252340045155, + "learning_rate": 3.38437210278421e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4067 + }, + { + "epoch": 0.39121027071212194, + "grad_norm": 1.7224887692810804, + "learning_rate": 3.383658566413258e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4068 + }, + { + "epoch": 0.3913064384286195, + "grad_norm": 1.8890841122124329, + "learning_rate": 3.3829449477712323e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4069 + }, + { + "epoch": 0.39140260614511707, + "grad_norm": 2.6706197340028686, + "learning_rate": 3.382231246924572e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4070 + }, + { + "epoch": 0.39149877386161464, + "grad_norm": 3.166556246947509, + "learning_rate": 3.381517463939726e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4071 + }, + { + "epoch": 0.39159494157811225, + "grad_norm": 3.8846139861411046, + "learning_rate": 3.3808035988831483e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4072 + }, + { + "epoch": 0.3916911092946098, + "grad_norm": 1.7910318109960603, + "learning_rate": 3.380089651821302e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4073 + }, + { + "epoch": 0.3917872770111074, + "grad_norm": 1.759573204069387, + "learning_rate": 3.379375622820658e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4074 + }, + { + "epoch": 0.39188344472760495, + "grad_norm": 2.768439849565482, + "learning_rate": 3.378661511947694e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4075 + }, + { + "epoch": 0.3919796124441025, + "grad_norm": 1.4996030453340987, + "learning_rate": 3.3779473192688954e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4076 + }, + { + "epoch": 0.3920757801606001, + "grad_norm": 2.551371095224532, + "learning_rate": 3.377233044850756e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4077 + }, + { + "epoch": 0.39217194787709764, + "grad_norm": 2.236600214759873, + "learning_rate": 3.376518688759777e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4078 + }, + { + "epoch": 0.39226811559359526, + "grad_norm": 1.6736817891688813, + "learning_rate": 3.3758042510624668e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4079 + }, + { + "epoch": 0.3923642833100928, + "grad_norm": 2.364893571582755, + "learning_rate": 3.3750897318253407e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4080 + }, + { + "epoch": 0.3924604510265904, + "grad_norm": 2.444077498359661, + "learning_rate": 3.3743751311149232e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4081 + }, + { + "epoch": 0.39255661874308795, + "grad_norm": 4.63865251062283, + "learning_rate": 3.3736604489977465e-06, + "loss": 0.1834, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4082 + }, + { + "epoch": 0.3926527864595855, + "grad_norm": 2.4531438387186832, + "learning_rate": 3.3729456855403477e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4083 + }, + { + "epoch": 0.3927489541760831, + "grad_norm": 1.4840183436350336, + "learning_rate": 3.372230840809274e-06, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4084 + }, + { + "epoch": 0.39284512189258064, + "grad_norm": 2.4872327082044077, + "learning_rate": 3.3715159148710795e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4085 + }, + { + "epoch": 0.39294128960907826, + "grad_norm": 2.2692835736115904, + "learning_rate": 3.370800907792325e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4086 + }, + { + "epoch": 0.3930374573255758, + "grad_norm": 2.155804183543574, + "learning_rate": 3.3700858196395823e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4087 + }, + { + "epoch": 0.3931336250420734, + "grad_norm": 1.6107301997672747, + "learning_rate": 3.369370650479425e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4088 + }, + { + "epoch": 0.39322979275857095, + "grad_norm": 1.8707246980610792, + "learning_rate": 3.3686554003784387e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4089 + }, + { + "epoch": 0.3933259604750685, + "grad_norm": 2.313192967879629, + "learning_rate": 3.367940069403216e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4090 + }, + { + "epoch": 0.3934221281915661, + "grad_norm": 2.193554667687966, + "learning_rate": 3.3672246576203543e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4091 + }, + { + "epoch": 0.39351829590806364, + "grad_norm": 1.922629868140291, + "learning_rate": 3.3665091650964614e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4092 + }, + { + "epoch": 0.39361446362456126, + "grad_norm": 1.7251785361804781, + "learning_rate": 3.365793591898152e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4093 + }, + { + "epoch": 0.3937106313410588, + "grad_norm": 1.5770075616783095, + "learning_rate": 3.3650779380920475e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4094 + }, + { + "epoch": 0.3938067990575564, + "grad_norm": 1.8754107423483188, + "learning_rate": 3.364362203744777e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4095 + }, + { + "epoch": 0.39390296677405395, + "grad_norm": 1.5998606686995043, + "learning_rate": 3.363646388922978e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4096 + }, + { + "epoch": 0.3939991344905515, + "grad_norm": 2.9855321837312907, + "learning_rate": 3.3629304936932948e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4097 + }, + { + "epoch": 0.3940953022070491, + "grad_norm": 1.6709846993393014, + "learning_rate": 3.3622145181223786e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4098 + }, + { + "epoch": 0.39419146992354664, + "grad_norm": 1.7839470865762228, + "learning_rate": 3.3614984622768896e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4099 + }, + { + "epoch": 0.39428763764004426, + "grad_norm": 2.4010118992258107, + "learning_rate": 3.3607823262234936e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4100 + }, + { + "epoch": 0.39438380535654183, + "grad_norm": 2.2160526225220556, + "learning_rate": 3.3600661100288664e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4101 + }, + { + "epoch": 0.3944799730730394, + "grad_norm": 1.7493258062088255, + "learning_rate": 3.3593498137596877e-06, + "loss": 0.1565, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4102 + }, + { + "epoch": 0.39457614078953696, + "grad_norm": 1.7840964836402406, + "learning_rate": 3.3586334374826474e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4103 + }, + { + "epoch": 0.3946723085060345, + "grad_norm": 1.972488115868015, + "learning_rate": 3.3579169812644434e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4104 + }, + { + "epoch": 0.3947684762225321, + "grad_norm": 1.5318361920232229, + "learning_rate": 3.3572004451717783e-06, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4105 + }, + { + "epoch": 0.39486464393902965, + "grad_norm": 1.8240468947664665, + "learning_rate": 3.356483829271365e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4106 + }, + { + "epoch": 0.39496081165552727, + "grad_norm": 2.657426432839009, + "learning_rate": 3.355767133629921e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4107 + }, + { + "epoch": 0.39505697937202483, + "grad_norm": 2.363597499016477, + "learning_rate": 3.3550503583141726e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4108 + }, + { + "epoch": 0.3951531470885224, + "grad_norm": 1.4155739034546497, + "learning_rate": 3.3543335033908543e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4109 + }, + { + "epoch": 0.39524931480501996, + "grad_norm": 1.6383560426929609, + "learning_rate": 3.3536165689267073e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4110 + }, + { + "epoch": 0.3953454825215175, + "grad_norm": 2.132983646228702, + "learning_rate": 3.3528995549884802e-06, + "loss": 0.1627, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4111 + }, + { + "epoch": 0.3954416502380151, + "grad_norm": 2.046300639934102, + "learning_rate": 3.352182461642929e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4112 + }, + { + "epoch": 0.39553781795451265, + "grad_norm": 2.2377644530313003, + "learning_rate": 3.3514652889568168e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4113 + }, + { + "epoch": 0.39563398567101027, + "grad_norm": 1.560587809145121, + "learning_rate": 3.3507480369969134e-06, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4114 + }, + { + "epoch": 0.39573015338750783, + "grad_norm": 1.549454260126186, + "learning_rate": 3.3500307058299995e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4115 + }, + { + "epoch": 0.3958263211040054, + "grad_norm": 1.8781341181893416, + "learning_rate": 3.349313295522859e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4116 + }, + { + "epoch": 0.39592248882050296, + "grad_norm": 2.1503124126722764, + "learning_rate": 3.3485958061422844e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4117 + }, + { + "epoch": 0.3960186565370005, + "grad_norm": 3.979512095012615, + "learning_rate": 3.3478782377550777e-06, + "loss": 0.1574, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4118 + }, + { + "epoch": 0.3961148242534981, + "grad_norm": 2.2775008260303347, + "learning_rate": 3.3471605904280446e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4119 + }, + { + "epoch": 0.39621099196999565, + "grad_norm": 2.3333678331677645, + "learning_rate": 3.3464428642280004e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4120 + }, + { + "epoch": 0.39630715968649327, + "grad_norm": 2.341477366068207, + "learning_rate": 3.345725059221769e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4121 + }, + { + "epoch": 0.39640332740299083, + "grad_norm": 2.271869340124124, + "learning_rate": 3.3450071754761782e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4122 + }, + { + "epoch": 0.3964994951194884, + "grad_norm": 2.8230111310723176, + "learning_rate": 3.344289213058066e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4123 + }, + { + "epoch": 0.39659566283598596, + "grad_norm": 3.5090828927701843, + "learning_rate": 3.343571172034276e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4124 + }, + { + "epoch": 0.3966918305524835, + "grad_norm": 1.3965657799271212, + "learning_rate": 3.3428530524716603e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4125 + }, + { + "epoch": 0.3967879982689811, + "grad_norm": 2.1783645070632844, + "learning_rate": 3.342134854437078e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4126 + }, + { + "epoch": 0.39688416598547865, + "grad_norm": 3.7006993559485313, + "learning_rate": 3.3414165779973946e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4127 + }, + { + "epoch": 0.3969803337019763, + "grad_norm": 1.8929084268775216, + "learning_rate": 3.340698223219484e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4128 + }, + { + "epoch": 0.39707650141847384, + "grad_norm": 1.4786074042097697, + "learning_rate": 3.3399797901702273e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4129 + }, + { + "epoch": 0.3971726691349714, + "grad_norm": 1.9725049784459532, + "learning_rate": 3.3392612789165124e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4130 + }, + { + "epoch": 0.39726883685146896, + "grad_norm": 3.2793748639692906, + "learning_rate": 3.338542689525235e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4131 + }, + { + "epoch": 0.39736500456796653, + "grad_norm": 2.1705922429901405, + "learning_rate": 3.3378240220632972e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4132 + }, + { + "epoch": 0.3974611722844641, + "grad_norm": 2.492439026415919, + "learning_rate": 3.3371052765976096e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4133 + }, + { + "epoch": 0.39755734000096166, + "grad_norm": 1.746985835731032, + "learning_rate": 3.3363864531950884e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4134 + }, + { + "epoch": 0.3976535077174593, + "grad_norm": 3.1670087031361147, + "learning_rate": 3.33566755192266e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4135 + }, + { + "epoch": 0.39774967543395684, + "grad_norm": 1.87859187885045, + "learning_rate": 3.3349485728472536e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4136 + }, + { + "epoch": 0.3978458431504544, + "grad_norm": 3.6114274342104005, + "learning_rate": 3.33422951603581e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4137 + }, + { + "epoch": 0.39794201086695197, + "grad_norm": 2.489334915346787, + "learning_rate": 3.3335103815552744e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4138 + }, + { + "epoch": 0.39803817858344953, + "grad_norm": 2.55960116574276, + "learning_rate": 3.3327911694726013e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4139 + }, + { + "epoch": 0.3981343462999471, + "grad_norm": 1.8941374923729675, + "learning_rate": 3.3320718798547503e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4140 + }, + { + "epoch": 0.39823051401644466, + "grad_norm": 1.4661938416181715, + "learning_rate": 3.3313525127686897e-06, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4141 + }, + { + "epoch": 0.3983266817329423, + "grad_norm": 2.6648974045748193, + "learning_rate": 3.330633068281394e-06, + "loss": 0.1644, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4142 + }, + { + "epoch": 0.39842284944943984, + "grad_norm": 2.9448420019295445, + "learning_rate": 3.3299135464598464e-06, + "loss": 0.1688, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4143 + }, + { + "epoch": 0.3985190171659374, + "grad_norm": 1.9652617352657524, + "learning_rate": 3.329193947371036e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4144 + }, + { + "epoch": 0.39861518488243497, + "grad_norm": 1.604254831053422, + "learning_rate": 3.328474271081959e-06, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4145 + }, + { + "epoch": 0.39871135259893253, + "grad_norm": 2.098327897847917, + "learning_rate": 3.3277545176596203e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4146 + }, + { + "epoch": 0.3988075203154301, + "grad_norm": 2.5939073261356143, + "learning_rate": 3.32703468717103e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4147 + }, + { + "epoch": 0.39890368803192766, + "grad_norm": 1.30698986180651, + "learning_rate": 3.3263147796832068e-06, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4148 + }, + { + "epoch": 0.3989998557484253, + "grad_norm": 1.3986044722100763, + "learning_rate": 3.3255947952631764e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4149 + }, + { + "epoch": 0.39909602346492284, + "grad_norm": 1.6136374187535019, + "learning_rate": 3.32487473397797e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4150 + }, + { + "epoch": 0.3991921911814204, + "grad_norm": 5.708936078784523, + "learning_rate": 3.3241545958946286e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4151 + }, + { + "epoch": 0.39928835889791797, + "grad_norm": 1.5115045767695163, + "learning_rate": 3.3234343810801995e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4152 + }, + { + "epoch": 0.39938452661441554, + "grad_norm": 1.643550770222992, + "learning_rate": 3.3227140896017353e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4153 + }, + { + "epoch": 0.3994806943309131, + "grad_norm": 2.3308646784543776, + "learning_rate": 3.3219937215262977e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4154 + }, + { + "epoch": 0.39957686204741066, + "grad_norm": 2.481059429142308, + "learning_rate": 3.321273276920955e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4155 + }, + { + "epoch": 0.3996730297639083, + "grad_norm": 1.4321298091157366, + "learning_rate": 3.320552755852783e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4156 + }, + { + "epoch": 0.39976919748040585, + "grad_norm": 1.9922735072878652, + "learning_rate": 3.319832158388864e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4157 + }, + { + "epoch": 0.3998653651969034, + "grad_norm": 2.174565539917981, + "learning_rate": 3.319111484596287e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4158 + }, + { + "epoch": 0.399961532913401, + "grad_norm": 1.7542975255643078, + "learning_rate": 3.318390734542149e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4159 + }, + { + "epoch": 0.40005770062989854, + "grad_norm": 1.7684314656246527, + "learning_rate": 3.3176699082935546e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4160 + }, + { + "epoch": 0.4001538683463961, + "grad_norm": 2.0519180502671053, + "learning_rate": 3.3169490059176142e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4161 + }, + { + "epoch": 0.40025003606289367, + "grad_norm": 1.7267446786248435, + "learning_rate": 3.3162280274814453e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4162 + }, + { + "epoch": 0.4003462037793913, + "grad_norm": 1.879264859336858, + "learning_rate": 3.315506973052174e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4163 + }, + { + "epoch": 0.40044237149588885, + "grad_norm": 2.629120781202945, + "learning_rate": 3.3147858426969316e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4164 + }, + { + "epoch": 0.4005385392123864, + "grad_norm": 2.276645839914186, + "learning_rate": 3.3140646364828577e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4165 + }, + { + "epoch": 0.400634706928884, + "grad_norm": 4.343533956008309, + "learning_rate": 3.313343354477099e-06, + "loss": 0.1673, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4166 + }, + { + "epoch": 0.40073087464538154, + "grad_norm": 2.5032892560504103, + "learning_rate": 3.312621996746808e-06, + "loss": 0.1894, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4167 + }, + { + "epoch": 0.4008270423618791, + "grad_norm": 2.556251793083037, + "learning_rate": 3.3119005633591462e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4168 + }, + { + "epoch": 0.40092321007837667, + "grad_norm": 2.252005702712235, + "learning_rate": 3.3111790543812806e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4169 + }, + { + "epoch": 0.4010193777948743, + "grad_norm": 4.7436658025541005, + "learning_rate": 3.310457469880385e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4170 + }, + { + "epoch": 0.40111554551137185, + "grad_norm": 4.142397537372533, + "learning_rate": 3.3097358099236416e-06, + "loss": 0.1876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4171 + }, + { + "epoch": 0.4012117132278694, + "grad_norm": 3.2524074161198793, + "learning_rate": 3.3090140745782393e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4172 + }, + { + "epoch": 0.401307880944367, + "grad_norm": 1.5177550215906168, + "learning_rate": 3.3082922639113724e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4173 + }, + { + "epoch": 0.40140404866086454, + "grad_norm": 1.8360655200742428, + "learning_rate": 3.3075703779902454e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4174 + }, + { + "epoch": 0.4015002163773621, + "grad_norm": 1.5010308489421211, + "learning_rate": 3.306848416882066e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4175 + }, + { + "epoch": 0.40159638409385967, + "grad_norm": 2.4503190648561524, + "learning_rate": 3.3061263806540513e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4176 + }, + { + "epoch": 0.4016925518103573, + "grad_norm": 1.9405557171682755, + "learning_rate": 3.3054042693734258e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4177 + }, + { + "epoch": 0.40178871952685485, + "grad_norm": 1.3500988433155372, + "learning_rate": 3.304682083107419e-06, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4178 + }, + { + "epoch": 0.4018848872433524, + "grad_norm": 2.2123858578284494, + "learning_rate": 3.303959821923269e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4179 + }, + { + "epoch": 0.40198105495985, + "grad_norm": 1.6260870623451724, + "learning_rate": 3.303237485888221e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4180 + }, + { + "epoch": 0.40207722267634755, + "grad_norm": 1.8824104920412188, + "learning_rate": 3.3025150750695243e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4181 + }, + { + "epoch": 0.4021733903928451, + "grad_norm": 1.8010107836493539, + "learning_rate": 3.3017925895344405e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4182 + }, + { + "epoch": 0.4022695581093427, + "grad_norm": 1.9687391313835814, + "learning_rate": 3.3010700293502317e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4183 + }, + { + "epoch": 0.4023657258258403, + "grad_norm": 1.571256044712099, + "learning_rate": 3.3003473945841725e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4184 + }, + { + "epoch": 0.40246189354233786, + "grad_norm": 2.2753119352720446, + "learning_rate": 3.2996246853035417e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4185 + }, + { + "epoch": 0.4025580612588354, + "grad_norm": 2.0600545101440946, + "learning_rate": 3.2989019015756253e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4186 + }, + { + "epoch": 0.402654228975333, + "grad_norm": 2.1815333802089927, + "learning_rate": 3.2981790434677167e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4187 + }, + { + "epoch": 0.40275039669183055, + "grad_norm": 1.949370979256937, + "learning_rate": 3.297456111047116e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4188 + }, + { + "epoch": 0.4028465644083281, + "grad_norm": 2.6734923524723047, + "learning_rate": 3.29673310438113e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4189 + }, + { + "epoch": 0.4029427321248257, + "grad_norm": 2.6948492306873093, + "learning_rate": 3.296010023537072e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4190 + }, + { + "epoch": 0.4030388998413233, + "grad_norm": 2.625130940680087, + "learning_rate": 3.2952868685822647e-06, + "loss": 0.1624, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4191 + }, + { + "epoch": 0.40313506755782086, + "grad_norm": 2.19600311645824, + "learning_rate": 3.294563639584034e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4192 + }, + { + "epoch": 0.4032312352743184, + "grad_norm": 1.8416919285367666, + "learning_rate": 3.2938403366097154e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4193 + }, + { + "epoch": 0.403327402990816, + "grad_norm": 1.3536094580200846, + "learning_rate": 3.293116959726651e-06, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4194 + }, + { + "epoch": 0.40342357070731355, + "grad_norm": 1.5486413139297417, + "learning_rate": 3.2923935090021875e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4195 + }, + { + "epoch": 0.4035197384238111, + "grad_norm": 2.6923253903119937, + "learning_rate": 3.291669984503682e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4196 + }, + { + "epoch": 0.4036159061403087, + "grad_norm": 2.7263370644113047, + "learning_rate": 3.290946386298495e-06, + "loss": 0.1754, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4197 + }, + { + "epoch": 0.4037120738568063, + "grad_norm": 1.6017345456158363, + "learning_rate": 3.290222714453997e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4198 + }, + { + "epoch": 0.40380824157330386, + "grad_norm": 1.5404039121665614, + "learning_rate": 3.289498969037563e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4199 + }, + { + "epoch": 0.4039044092898014, + "grad_norm": 2.873724159163823, + "learning_rate": 3.2887751501165755e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4200 + }, + { + "epoch": 0.404000577006299, + "grad_norm": 2.7454820097651873, + "learning_rate": 3.2880512577584244e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4201 + }, + { + "epoch": 0.40409674472279655, + "grad_norm": 2.874312449544724, + "learning_rate": 3.2873272920305065e-06, + "loss": 0.1881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4202 + }, + { + "epoch": 0.4041929124392941, + "grad_norm": 2.1009909340140793, + "learning_rate": 3.2866032530002247e-06, + "loss": 0.1687, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4203 + }, + { + "epoch": 0.4042890801557917, + "grad_norm": 1.96237676624004, + "learning_rate": 3.2858791407349894e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4204 + }, + { + "epoch": 0.4043852478722893, + "grad_norm": 3.7155317360279008, + "learning_rate": 3.2851549553022168e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4205 + }, + { + "epoch": 0.40448141558878686, + "grad_norm": 2.461453777692559, + "learning_rate": 3.2844306967693306e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4206 + }, + { + "epoch": 0.4045775833052844, + "grad_norm": 1.5495799880296102, + "learning_rate": 3.283706365203762e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4207 + }, + { + "epoch": 0.404673751021782, + "grad_norm": 2.2945591568020474, + "learning_rate": 3.282981960672948e-06, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4208 + }, + { + "epoch": 0.40476991873827955, + "grad_norm": 1.1904471398908514, + "learning_rate": 3.282257483244332e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4209 + }, + { + "epoch": 0.4048660864547771, + "grad_norm": 1.7366053006213993, + "learning_rate": 3.2815329329853656e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4210 + }, + { + "epoch": 0.4049622541712747, + "grad_norm": 1.5314906921313052, + "learning_rate": 3.2808083099635063e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4211 + }, + { + "epoch": 0.4050584218877723, + "grad_norm": 2.337030693459673, + "learning_rate": 3.280083614246218e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4212 + }, + { + "epoch": 0.40515458960426987, + "grad_norm": 1.6134645095927482, + "learning_rate": 3.279358845900973e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4213 + }, + { + "epoch": 0.40525075732076743, + "grad_norm": 1.554158474170206, + "learning_rate": 3.278634004995248e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4214 + }, + { + "epoch": 0.405346925037265, + "grad_norm": 2.5728215935290972, + "learning_rate": 3.2779090915965285e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4215 + }, + { + "epoch": 0.40544309275376256, + "grad_norm": 2.241455885018332, + "learning_rate": 3.2771841057723064e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4216 + }, + { + "epoch": 0.4055392604702601, + "grad_norm": 1.797131752710931, + "learning_rate": 3.276459047590079e-06, + "loss": 0.1537, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4217 + }, + { + "epoch": 0.4056354281867577, + "grad_norm": 1.554281951587333, + "learning_rate": 3.275733917117351e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4218 + }, + { + "epoch": 0.4057315959032553, + "grad_norm": 1.6025562857886904, + "learning_rate": 3.275008714421635e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4219 + }, + { + "epoch": 0.40582776361975287, + "grad_norm": 3.4283616401451935, + "learning_rate": 3.2742834395704486e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4220 + }, + { + "epoch": 0.40592393133625043, + "grad_norm": 2.6436861800508233, + "learning_rate": 3.273558092631318e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4221 + }, + { + "epoch": 0.406020099052748, + "grad_norm": 1.6108484138262453, + "learning_rate": 3.2728326736717743e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4222 + }, + { + "epoch": 0.40611626676924556, + "grad_norm": 2.0552151464569723, + "learning_rate": 3.2721071827593553e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4223 + }, + { + "epoch": 0.4062124344857431, + "grad_norm": 1.460534318050376, + "learning_rate": 3.2713816199616078e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4224 + }, + { + "epoch": 0.4063086022022407, + "grad_norm": 4.21313395226526, + "learning_rate": 3.2706559853460818e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4225 + }, + { + "epoch": 0.4064047699187383, + "grad_norm": 2.5162546586026817, + "learning_rate": 3.269930278980337e-06, + "loss": 0.1723, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4226 + }, + { + "epoch": 0.40650093763523587, + "grad_norm": 2.653940671952781, + "learning_rate": 3.2692045009319397e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4227 + }, + { + "epoch": 0.40659710535173343, + "grad_norm": 2.337489583303464, + "learning_rate": 3.2684786512684598e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4228 + }, + { + "epoch": 0.406693273068231, + "grad_norm": 1.8963750456526198, + "learning_rate": 3.2677527300574772e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4229 + }, + { + "epoch": 0.40678944078472856, + "grad_norm": 4.13289328277335, + "learning_rate": 3.2670267373665778e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4230 + }, + { + "epoch": 0.4068856085012261, + "grad_norm": 3.6454643936014564, + "learning_rate": 3.2663006732633516e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4231 + }, + { + "epoch": 0.4069817762177237, + "grad_norm": 1.8205339125274713, + "learning_rate": 3.265574537815398e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4232 + }, + { + "epoch": 0.4070779439342213, + "grad_norm": 1.9204683318424867, + "learning_rate": 3.2648483310903235e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4233 + }, + { + "epoch": 0.4071741116507189, + "grad_norm": 2.1938681200484056, + "learning_rate": 3.264122053155738e-06, + "loss": 0.1577, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4234 + }, + { + "epoch": 0.40727027936721644, + "grad_norm": 1.848201522002737, + "learning_rate": 3.2633957040792624e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4235 + }, + { + "epoch": 0.407366447083714, + "grad_norm": 2.638234019615287, + "learning_rate": 3.262669283928519e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4236 + }, + { + "epoch": 0.40746261480021156, + "grad_norm": 2.130976573054674, + "learning_rate": 3.261942792771142e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4237 + }, + { + "epoch": 0.40755878251670913, + "grad_norm": 1.6904296886090266, + "learning_rate": 3.261216230674768e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4238 + }, + { + "epoch": 0.4076549502332067, + "grad_norm": 1.4948940359393008, + "learning_rate": 3.2604895977070428e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4239 + }, + { + "epoch": 0.4077511179497043, + "grad_norm": 1.8928448452209437, + "learning_rate": 3.2597628939356174e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4240 + }, + { + "epoch": 0.4078472856662019, + "grad_norm": 2.1850705110947413, + "learning_rate": 3.2590361194281513e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4241 + }, + { + "epoch": 0.40794345338269944, + "grad_norm": 4.023214773086458, + "learning_rate": 3.258309274252307e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4242 + }, + { + "epoch": 0.408039621099197, + "grad_norm": 3.1056379302424846, + "learning_rate": 3.2575823584757578e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4243 + }, + { + "epoch": 0.40813578881569457, + "grad_norm": 4.459099923359694, + "learning_rate": 3.2568553721661807e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4244 + }, + { + "epoch": 0.40823195653219213, + "grad_norm": 2.6653702375863815, + "learning_rate": 3.2561283153912606e-06, + "loss": 0.1538, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4245 + }, + { + "epoch": 0.4083281242486897, + "grad_norm": 2.1350445467045827, + "learning_rate": 3.255401188218687e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4246 + }, + { + "epoch": 0.4084242919651873, + "grad_norm": 1.5447010382108581, + "learning_rate": 3.2546739907161603e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4247 + }, + { + "epoch": 0.4085204596816849, + "grad_norm": 2.3538683342152398, + "learning_rate": 3.2539467229513816e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4248 + }, + { + "epoch": 0.40861662739818244, + "grad_norm": 3.3207882838324423, + "learning_rate": 3.253219384992064e-06, + "loss": 0.1625, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4249 + }, + { + "epoch": 0.40871279511468, + "grad_norm": 2.03709823710016, + "learning_rate": 3.252491976905923e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4250 + }, + { + "epoch": 0.40880896283117757, + "grad_norm": 3.277142208065997, + "learning_rate": 3.2517644987606827e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4251 + }, + { + "epoch": 0.40890513054767513, + "grad_norm": 2.0187391937921855, + "learning_rate": 3.2510369506240747e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4252 + }, + { + "epoch": 0.4090012982641727, + "grad_norm": 2.1363838773253745, + "learning_rate": 3.2503093325638334e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4253 + }, + { + "epoch": 0.4090974659806703, + "grad_norm": 1.77508393860351, + "learning_rate": 3.2495816446477046e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4254 + }, + { + "epoch": 0.4091936336971679, + "grad_norm": 1.7608672076187746, + "learning_rate": 3.248853886943436e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4255 + }, + { + "epoch": 0.40928980141366544, + "grad_norm": 3.420141868127544, + "learning_rate": 3.248126059518785e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4256 + }, + { + "epoch": 0.409385969130163, + "grad_norm": 1.4901749230112702, + "learning_rate": 3.2473981624415133e-06, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4257 + }, + { + "epoch": 0.40948213684666057, + "grad_norm": 2.7363512832198853, + "learning_rate": 3.2466701957793916e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4258 + }, + { + "epoch": 0.40957830456315814, + "grad_norm": 1.5791327674716205, + "learning_rate": 3.2459421596001943e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4259 + }, + { + "epoch": 0.4096744722796557, + "grad_norm": 1.5859996249107355, + "learning_rate": 3.2452140539717047e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4260 + }, + { + "epoch": 0.4097706399961533, + "grad_norm": 1.965936782631608, + "learning_rate": 3.2444858789617104e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4261 + }, + { + "epoch": 0.4098668077126509, + "grad_norm": 1.5141889668454498, + "learning_rate": 3.2437576346380077e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4262 + }, + { + "epoch": 0.40996297542914845, + "grad_norm": 1.6605767758908871, + "learning_rate": 3.2430293210683976e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4263 + }, + { + "epoch": 0.410059143145646, + "grad_norm": 1.8837142973034844, + "learning_rate": 3.2423009383206876e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4264 + }, + { + "epoch": 0.4101553108621436, + "grad_norm": 1.624695023658855, + "learning_rate": 3.2415724864626925e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4265 + }, + { + "epoch": 0.41025147857864114, + "grad_norm": 1.643477794890842, + "learning_rate": 3.240843965562234e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4266 + }, + { + "epoch": 0.4103476462951387, + "grad_norm": 1.3726424359077674, + "learning_rate": 3.2401153756871378e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4267 + }, + { + "epoch": 0.4104438140116363, + "grad_norm": 2.968315385415517, + "learning_rate": 3.239386716905239e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4268 + }, + { + "epoch": 0.4105399817281339, + "grad_norm": 4.616427773035442, + "learning_rate": 3.2386579892843777e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4269 + }, + { + "epoch": 0.41063614944463145, + "grad_norm": 2.1744138181731474, + "learning_rate": 3.2379291928924e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4270 + }, + { + "epoch": 0.410732317161129, + "grad_norm": 1.8824690399495814, + "learning_rate": 3.2372003277971594e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4271 + }, + { + "epoch": 0.4108284848776266, + "grad_norm": 1.6167611712550225, + "learning_rate": 3.236471394066515e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4272 + }, + { + "epoch": 0.41092465259412414, + "grad_norm": 2.7118529032704153, + "learning_rate": 3.2357423917683312e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4273 + }, + { + "epoch": 0.4110208203106217, + "grad_norm": 6.093165342904416, + "learning_rate": 3.2350133209704816e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4274 + }, + { + "epoch": 0.4111169880271193, + "grad_norm": 2.467863882593607, + "learning_rate": 3.2342841817408453e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4275 + }, + { + "epoch": 0.4112131557436169, + "grad_norm": 3.532677783714886, + "learning_rate": 3.233554974147306e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4276 + }, + { + "epoch": 0.41130932346011445, + "grad_norm": 2.3182616606408994, + "learning_rate": 3.2328256982577555e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4277 + }, + { + "epoch": 0.411405491176612, + "grad_norm": 2.23270848688876, + "learning_rate": 3.2320963541400913e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4278 + }, + { + "epoch": 0.4115016588931096, + "grad_norm": 3.7258020508745546, + "learning_rate": 3.2313669418622168e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4279 + }, + { + "epoch": 0.41159782660960714, + "grad_norm": 2.7364481466036525, + "learning_rate": 3.2306374614920434e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4280 + }, + { + "epoch": 0.4116939943261047, + "grad_norm": 4.032399027476922, + "learning_rate": 3.229907913097487e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4281 + }, + { + "epoch": 0.4117901620426023, + "grad_norm": 1.7529026927328548, + "learning_rate": 3.2291782967464706e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4282 + }, + { + "epoch": 0.4118863297590999, + "grad_norm": 2.1045709444493146, + "learning_rate": 3.2284486125069243e-06, + "loss": 0.1737, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4283 + }, + { + "epoch": 0.41198249747559745, + "grad_norm": 1.8938703572188957, + "learning_rate": 3.2277188604467826e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4284 + }, + { + "epoch": 0.412078665192095, + "grad_norm": 3.320819411862361, + "learning_rate": 3.2269890406339875e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4285 + }, + { + "epoch": 0.4121748329085926, + "grad_norm": 1.8656924396710826, + "learning_rate": 3.2262591531364895e-06, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4286 + }, + { + "epoch": 0.41227100062509014, + "grad_norm": 2.4452974242864003, + "learning_rate": 3.2255291980222402e-06, + "loss": 0.173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4287 + }, + { + "epoch": 0.4123671683415877, + "grad_norm": 2.058146184913063, + "learning_rate": 3.2247991753592018e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4288 + }, + { + "epoch": 0.41246333605808533, + "grad_norm": 1.8217206626741818, + "learning_rate": 3.2240690852153418e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4289 + }, + { + "epoch": 0.4125595037745829, + "grad_norm": 2.432367520332849, + "learning_rate": 3.2233389276586325e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4290 + }, + { + "epoch": 0.41265567149108046, + "grad_norm": 1.7285399269601578, + "learning_rate": 3.2226087027570546e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4291 + }, + { + "epoch": 0.412751839207578, + "grad_norm": 1.5463863934753086, + "learning_rate": 3.2218784105785932e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4292 + }, + { + "epoch": 0.4128480069240756, + "grad_norm": 1.2872641749115312, + "learning_rate": 3.2211480511912413e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4293 + }, + { + "epoch": 0.41294417464057315, + "grad_norm": 2.4580994980504243, + "learning_rate": 3.2204176246629978e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4294 + }, + { + "epoch": 0.4130403423570707, + "grad_norm": 2.13359431251544, + "learning_rate": 3.2196871310618655e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4295 + }, + { + "epoch": 0.41313651007356833, + "grad_norm": 2.7446577217451464, + "learning_rate": 3.2189565704558573e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4296 + }, + { + "epoch": 0.4132326777900659, + "grad_norm": 1.6598575731515097, + "learning_rate": 3.2182259429129904e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4297 + }, + { + "epoch": 0.41332884550656346, + "grad_norm": 1.9851040906583366, + "learning_rate": 3.2174952485012866e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4298 + }, + { + "epoch": 0.413425013223061, + "grad_norm": 3.351542476214577, + "learning_rate": 3.216764487288777e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4299 + }, + { + "epoch": 0.4135211809395586, + "grad_norm": 3.284384780217176, + "learning_rate": 3.2160336593434977e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4300 + }, + { + "epoch": 0.41361734865605615, + "grad_norm": 3.4905705320790963, + "learning_rate": 3.2153027647334893e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4301 + }, + { + "epoch": 0.4137135163725537, + "grad_norm": 2.7596614302177533, + "learning_rate": 3.2145718035268024e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4302 + }, + { + "epoch": 0.41380968408905133, + "grad_norm": 1.7593182597302137, + "learning_rate": 3.213840775791489e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4303 + }, + { + "epoch": 0.4139058518055489, + "grad_norm": 2.8375668742330444, + "learning_rate": 3.213109681595612e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4304 + }, + { + "epoch": 0.41400201952204646, + "grad_norm": 5.168747910225692, + "learning_rate": 3.2123785210072373e-06, + "loss": 0.1611, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4305 + }, + { + "epoch": 0.414098187238544, + "grad_norm": 4.189841833587993, + "learning_rate": 3.211647294094437e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4306 + }, + { + "epoch": 0.4141943549550416, + "grad_norm": 5.124215170545894, + "learning_rate": 3.210916000925292e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4307 + }, + { + "epoch": 0.41429052267153915, + "grad_norm": 4.360275875937034, + "learning_rate": 3.210184641567887e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4308 + }, + { + "epoch": 0.4143866903880367, + "grad_norm": 1.7439836194543021, + "learning_rate": 3.209453216090314e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4309 + }, + { + "epoch": 0.41448285810453434, + "grad_norm": 1.6381375372248184, + "learning_rate": 3.20872172456067e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4310 + }, + { + "epoch": 0.4145790258210319, + "grad_norm": 3.0577247299142396, + "learning_rate": 3.2079901670470603e-06, + "loss": 0.1634, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4311 + }, + { + "epoch": 0.41467519353752946, + "grad_norm": 3.5659491402237276, + "learning_rate": 3.2072585436175927e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4312 + }, + { + "epoch": 0.414771361254027, + "grad_norm": 4.6635941273392225, + "learning_rate": 3.2065268543403856e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4313 + }, + { + "epoch": 0.4148675289705246, + "grad_norm": 2.310122108098694, + "learning_rate": 3.20579509928356e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4314 + }, + { + "epoch": 0.41496369668702215, + "grad_norm": 1.5876323584175274, + "learning_rate": 3.2050632785152446e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4315 + }, + { + "epoch": 0.4150598644035197, + "grad_norm": 1.8308206826568294, + "learning_rate": 3.2043313921035747e-06, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4316 + }, + { + "epoch": 0.41515603212001734, + "grad_norm": 2.0584539860319957, + "learning_rate": 3.2035994401166896e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4317 + }, + { + "epoch": 0.4152521998365149, + "grad_norm": 3.0798679062622485, + "learning_rate": 3.2028674226227374e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4318 + }, + { + "epoch": 0.41534836755301247, + "grad_norm": 3.3908823569302387, + "learning_rate": 3.2021353396898704e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4319 + }, + { + "epoch": 0.41544453526951003, + "grad_norm": 2.594849757295579, + "learning_rate": 3.201403191386247e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4320 + }, + { + "epoch": 0.4155407029860076, + "grad_norm": 1.6293995380818131, + "learning_rate": 3.2006709777800333e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4321 + }, + { + "epoch": 0.41563687070250516, + "grad_norm": 2.036305681061889, + "learning_rate": 3.1999386989394e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4322 + }, + { + "epoch": 0.4157330384190027, + "grad_norm": 2.734250783552418, + "learning_rate": 3.1992063549325237e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4323 + }, + { + "epoch": 0.41582920613550034, + "grad_norm": 2.841420369495549, + "learning_rate": 3.1984739458275882e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4324 + }, + { + "epoch": 0.4159253738519979, + "grad_norm": 1.5411973333102056, + "learning_rate": 3.1977414716927837e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4325 + }, + { + "epoch": 0.41602154156849547, + "grad_norm": 1.3988869592345679, + "learning_rate": 3.1970089325963034e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4326 + }, + { + "epoch": 0.41611770928499303, + "grad_norm": 2.361813556682572, + "learning_rate": 3.19627632860635e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4327 + }, + { + "epoch": 0.4162138770014906, + "grad_norm": 2.144330347378796, + "learning_rate": 3.195543659791132e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4328 + }, + { + "epoch": 0.41631004471798816, + "grad_norm": 1.9253974949004078, + "learning_rate": 3.1948109262188614e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4329 + }, + { + "epoch": 0.4164062124344857, + "grad_norm": 2.0810002711280027, + "learning_rate": 3.1940781279577584e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4330 + }, + { + "epoch": 0.41650238015098334, + "grad_norm": 2.315017184377141, + "learning_rate": 3.193345265076048e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4331 + }, + { + "epoch": 0.4165985478674809, + "grad_norm": 2.291856809379826, + "learning_rate": 3.1926123376419628e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4332 + }, + { + "epoch": 0.41669471558397847, + "grad_norm": 1.6246433984453563, + "learning_rate": 3.1918793457237395e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4333 + }, + { + "epoch": 0.41679088330047603, + "grad_norm": 1.8684440853401263, + "learning_rate": 3.1911462893896215e-06, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4334 + }, + { + "epoch": 0.4168870510169736, + "grad_norm": 1.7608909619120208, + "learning_rate": 3.190413168707859e-06, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4335 + }, + { + "epoch": 0.41698321873347116, + "grad_norm": 1.6727668895858487, + "learning_rate": 3.189679983746708e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4336 + }, + { + "epoch": 0.4170793864499687, + "grad_norm": 2.0565251000870934, + "learning_rate": 3.1889467345744298e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4337 + }, + { + "epoch": 0.41717555416646634, + "grad_norm": 1.7312713517725917, + "learning_rate": 3.18821342125929e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4338 + }, + { + "epoch": 0.4172717218829639, + "grad_norm": 1.9323241247034413, + "learning_rate": 3.1874800438695654e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4339 + }, + { + "epoch": 0.4173678895994615, + "grad_norm": 1.8278874988444824, + "learning_rate": 3.1867466024735327e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4340 + }, + { + "epoch": 0.41746405731595904, + "grad_norm": 1.6058469152857953, + "learning_rate": 3.186013097139479e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4341 + }, + { + "epoch": 0.4175602250324566, + "grad_norm": 3.4464916521000952, + "learning_rate": 3.1852795279356946e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4342 + }, + { + "epoch": 0.41765639274895416, + "grad_norm": 2.8230409674518095, + "learning_rate": 3.1845458949304776e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4343 + }, + { + "epoch": 0.4177525604654517, + "grad_norm": 2.6114055587815215, + "learning_rate": 3.1838121981921307e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4344 + }, + { + "epoch": 0.41784872818194935, + "grad_norm": 1.663095484447808, + "learning_rate": 3.183078437788964e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4345 + }, + { + "epoch": 0.4179448958984469, + "grad_norm": 2.543260402488203, + "learning_rate": 3.1823446137892915e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4346 + }, + { + "epoch": 0.4180410636149445, + "grad_norm": 2.928354854431933, + "learning_rate": 3.1816107262614358e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4347 + }, + { + "epoch": 0.41813723133144204, + "grad_norm": 4.770019824946205, + "learning_rate": 3.1808767752737225e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4348 + }, + { + "epoch": 0.4182333990479396, + "grad_norm": 2.039331048969681, + "learning_rate": 3.1801427608944845e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4349 + }, + { + "epoch": 0.41832956676443717, + "grad_norm": 2.9556353562885485, + "learning_rate": 3.179408683192061e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4350 + }, + { + "epoch": 0.41842573448093473, + "grad_norm": 1.3819732733763044, + "learning_rate": 3.178674542234797e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4351 + }, + { + "epoch": 0.41852190219743235, + "grad_norm": 1.6779537301610967, + "learning_rate": 3.177940338091043e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4352 + }, + { + "epoch": 0.4186180699139299, + "grad_norm": 2.6399630988873706, + "learning_rate": 3.1772060708291547e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4353 + }, + { + "epoch": 0.4187142376304275, + "grad_norm": 6.440714162496347, + "learning_rate": 3.1764717405174948e-06, + "loss": 0.1774, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4354 + }, + { + "epoch": 0.41881040534692504, + "grad_norm": 6.530082323496014, + "learning_rate": 3.1757373472244324e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4355 + }, + { + "epoch": 0.4189065730634226, + "grad_norm": 3.6732009464042608, + "learning_rate": 3.17500289101834e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4356 + }, + { + "epoch": 0.41900274077992017, + "grad_norm": 3.9175642505534314, + "learning_rate": 3.1742683719675983e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4357 + }, + { + "epoch": 0.41909890849641773, + "grad_norm": 1.9282975265790483, + "learning_rate": 3.1735337901405932e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4358 + }, + { + "epoch": 0.41919507621291535, + "grad_norm": 1.8895390513970596, + "learning_rate": 3.1727991456057168e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4359 + }, + { + "epoch": 0.4192912439294129, + "grad_norm": 1.3870641726396045, + "learning_rate": 3.1720644384313647e-06, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4360 + }, + { + "epoch": 0.4193874116459105, + "grad_norm": 2.231160075388256, + "learning_rate": 3.171329668685942e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4361 + }, + { + "epoch": 0.41948357936240804, + "grad_norm": 2.7430552731182845, + "learning_rate": 3.1705948364378577e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4362 + }, + { + "epoch": 0.4195797470789056, + "grad_norm": 3.2149989967854684, + "learning_rate": 3.169859941755525e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4363 + }, + { + "epoch": 0.41967591479540317, + "grad_norm": 1.6922914520958439, + "learning_rate": 3.1691249847073672e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4364 + }, + { + "epoch": 0.41977208251190073, + "grad_norm": 1.586615946551502, + "learning_rate": 3.168389965361809e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4365 + }, + { + "epoch": 0.41986825022839835, + "grad_norm": 1.899548971467034, + "learning_rate": 3.1676548837872833e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4366 + }, + { + "epoch": 0.4199644179448959, + "grad_norm": 1.6243917697436285, + "learning_rate": 3.166919740052228e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4367 + }, + { + "epoch": 0.4200605856613935, + "grad_norm": 2.405174827310574, + "learning_rate": 3.1661845342250874e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4368 + }, + { + "epoch": 0.42015675337789105, + "grad_norm": 1.7616193345797622, + "learning_rate": 3.165449266374312e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4369 + }, + { + "epoch": 0.4202529210943886, + "grad_norm": 1.3009300451026973, + "learning_rate": 3.164713936568355e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4370 + }, + { + "epoch": 0.4203490888108862, + "grad_norm": 1.379340699894823, + "learning_rate": 3.16397854487568e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4371 + }, + { + "epoch": 0.42044525652738374, + "grad_norm": 2.0999713369184176, + "learning_rate": 3.163243091364752e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4372 + }, + { + "epoch": 0.42054142424388136, + "grad_norm": 2.38774909995553, + "learning_rate": 3.162507576104046e-06, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4373 + }, + { + "epoch": 0.4206375919603789, + "grad_norm": 1.6658249036137338, + "learning_rate": 3.1617719991620384e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4374 + }, + { + "epoch": 0.4207337596768765, + "grad_norm": 1.669288569425239, + "learning_rate": 3.1610363606072147e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4375 + }, + { + "epoch": 0.42082992739337405, + "grad_norm": 2.6436494164631905, + "learning_rate": 3.1603006605080642e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4376 + }, + { + "epoch": 0.4209260951098716, + "grad_norm": 2.6558473523462185, + "learning_rate": 3.1595648989330827e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4377 + }, + { + "epoch": 0.4210222628263692, + "grad_norm": 2.3295814855064254, + "learning_rate": 3.1588290759507736e-06, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4378 + }, + { + "epoch": 0.42111843054286674, + "grad_norm": 2.1646219903700934, + "learning_rate": 3.158093191629641e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4379 + }, + { + "epoch": 0.42121459825936436, + "grad_norm": 1.6477815345799793, + "learning_rate": 3.1573572460381992e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4380 + }, + { + "epoch": 0.4213107659758619, + "grad_norm": 1.955306140285481, + "learning_rate": 3.156621239244967e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4381 + }, + { + "epoch": 0.4214069336923595, + "grad_norm": 1.8768975986783851, + "learning_rate": 3.1558851713184685e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4382 + }, + { + "epoch": 0.42150310140885705, + "grad_norm": 1.8734645843395537, + "learning_rate": 3.1551490423272337e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4383 + }, + { + "epoch": 0.4215992691253546, + "grad_norm": 1.5677022325660226, + "learning_rate": 3.154412852339798e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4384 + }, + { + "epoch": 0.4216954368418522, + "grad_norm": 1.6156488391239314, + "learning_rate": 3.1536766014247033e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4385 + }, + { + "epoch": 0.42179160455834974, + "grad_norm": 3.778070392710542, + "learning_rate": 3.152940289650496e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4386 + }, + { + "epoch": 0.42188777227484736, + "grad_norm": 1.5617217651741706, + "learning_rate": 3.15220391708573e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4387 + }, + { + "epoch": 0.4219839399913449, + "grad_norm": 1.4320119151625919, + "learning_rate": 3.151467483798961e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4388 + }, + { + "epoch": 0.4220801077078425, + "grad_norm": 2.676217921741684, + "learning_rate": 3.150730989858756e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4389 + }, + { + "epoch": 0.42217627542434005, + "grad_norm": 2.520996041451931, + "learning_rate": 3.1499944353336824e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4390 + }, + { + "epoch": 0.4222724431408376, + "grad_norm": 1.8416411248887645, + "learning_rate": 3.149257820292317e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4391 + }, + { + "epoch": 0.4223686108573352, + "grad_norm": 1.4191371205059364, + "learning_rate": 3.1485211448032397e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4392 + }, + { + "epoch": 0.42246477857383274, + "grad_norm": 1.6674310702671336, + "learning_rate": 3.1477844089350375e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4393 + }, + { + "epoch": 0.42256094629033036, + "grad_norm": 1.8249589946220033, + "learning_rate": 3.147047612756302e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4394 + }, + { + "epoch": 0.4226571140068279, + "grad_norm": 1.930375560526496, + "learning_rate": 3.146310756335632e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4395 + }, + { + "epoch": 0.4227532817233255, + "grad_norm": 2.778292346465222, + "learning_rate": 3.1455738397416304e-06, + "loss": 0.1597, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4396 + }, + { + "epoch": 0.42284944943982306, + "grad_norm": 1.669800488061598, + "learning_rate": 3.144836863042906e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4397 + }, + { + "epoch": 0.4229456171563206, + "grad_norm": 2.2239355913004033, + "learning_rate": 3.1440998263080736e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4398 + }, + { + "epoch": 0.4230417848728182, + "grad_norm": 2.026549090123563, + "learning_rate": 3.1433627296057527e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4399 + }, + { + "epoch": 0.42313795258931575, + "grad_norm": 1.4250769235473097, + "learning_rate": 3.1426255730045703e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4400 + }, + { + "epoch": 0.42323412030581337, + "grad_norm": 2.523602849273706, + "learning_rate": 3.141888356573157e-06, + "loss": 0.176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4401 + }, + { + "epoch": 0.42333028802231093, + "grad_norm": 1.6119509487603179, + "learning_rate": 3.14115108038015e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4402 + }, + { + "epoch": 0.4234264557388085, + "grad_norm": 1.5459266122740052, + "learning_rate": 3.140413744494191e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4403 + }, + { + "epoch": 0.42352262345530606, + "grad_norm": 2.045643068859521, + "learning_rate": 3.1396763489839295e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4404 + }, + { + "epoch": 0.4236187911718036, + "grad_norm": 1.2613407361173663, + "learning_rate": 3.138938893918017e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4405 + }, + { + "epoch": 0.4237149588883012, + "grad_norm": 1.4940640301217105, + "learning_rate": 3.1382013793651143e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4406 + }, + { + "epoch": 0.42381112660479875, + "grad_norm": 1.5569071737363156, + "learning_rate": 3.1374638053938855e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4407 + }, + { + "epoch": 0.42390729432129637, + "grad_norm": 2.849756054655189, + "learning_rate": 3.1367261720730007e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4408 + }, + { + "epoch": 0.42400346203779393, + "grad_norm": 1.8458956828975495, + "learning_rate": 3.1359884794711358e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4409 + }, + { + "epoch": 0.4240996297542915, + "grad_norm": 1.5451956591635054, + "learning_rate": 3.1352507276569723e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4410 + }, + { + "epoch": 0.42419579747078906, + "grad_norm": 1.9146195340349417, + "learning_rate": 3.134512916699196e-06, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4411 + }, + { + "epoch": 0.4242919651872866, + "grad_norm": 2.5000926476473317, + "learning_rate": 3.1337750466665e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4412 + }, + { + "epoch": 0.4243881329037842, + "grad_norm": 2.3512286109665945, + "learning_rate": 3.133037117627582e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4413 + }, + { + "epoch": 0.42448430062028175, + "grad_norm": 1.8505307488516274, + "learning_rate": 3.1322991296511453e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4414 + }, + { + "epoch": 0.42458046833677937, + "grad_norm": 1.609559098307385, + "learning_rate": 3.131561082805898e-06, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4415 + }, + { + "epoch": 0.42467663605327693, + "grad_norm": 1.386562420067834, + "learning_rate": 3.1308229771605546e-06, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4416 + }, + { + "epoch": 0.4247728037697745, + "grad_norm": 2.0192554812786705, + "learning_rate": 3.1300848127838357e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4417 + }, + { + "epoch": 0.42486897148627206, + "grad_norm": 10.012830274967369, + "learning_rate": 3.1293465897444646e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4418 + }, + { + "epoch": 0.4249651392027696, + "grad_norm": 2.330473329287184, + "learning_rate": 3.128608308111174e-06, + "loss": 0.1819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4419 + }, + { + "epoch": 0.4250613069192672, + "grad_norm": 1.9824655853106246, + "learning_rate": 3.127869967952698e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4420 + }, + { + "epoch": 0.42515747463576475, + "grad_norm": 2.2106917881799597, + "learning_rate": 3.1271315693377804e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4421 + }, + { + "epoch": 0.4252536423522624, + "grad_norm": 1.8385839384850142, + "learning_rate": 3.1263931123351653e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4422 + }, + { + "epoch": 0.42534981006875994, + "grad_norm": 1.885697815214579, + "learning_rate": 3.1256545970136084e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4423 + }, + { + "epoch": 0.4254459777852575, + "grad_norm": 1.7827414229793872, + "learning_rate": 3.1249160234418646e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4424 + }, + { + "epoch": 0.42554214550175506, + "grad_norm": 3.2411134564557136, + "learning_rate": 3.1241773916886987e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4425 + }, + { + "epoch": 0.42563831321825263, + "grad_norm": 2.440130916563199, + "learning_rate": 3.123438701822879e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4426 + }, + { + "epoch": 0.4257344809347502, + "grad_norm": 2.061947755894546, + "learning_rate": 3.1226999539131798e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4427 + }, + { + "epoch": 0.42583064865124776, + "grad_norm": 3.5379288528100536, + "learning_rate": 3.1219611480283794e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4428 + }, + { + "epoch": 0.4259268163677454, + "grad_norm": 3.286222742892855, + "learning_rate": 3.121222284237265e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4429 + }, + { + "epoch": 0.42602298408424294, + "grad_norm": 2.9119023718369226, + "learning_rate": 3.120483362608625e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4430 + }, + { + "epoch": 0.4261191518007405, + "grad_norm": 2.2226170112122228, + "learning_rate": 3.119744383211256e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4431 + }, + { + "epoch": 0.42621531951723807, + "grad_norm": 1.3929563441325519, + "learning_rate": 3.1190053461139584e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4432 + }, + { + "epoch": 0.42631148723373563, + "grad_norm": 2.6515089867653425, + "learning_rate": 3.118266251385539e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4433 + }, + { + "epoch": 0.4264076549502332, + "grad_norm": 2.43863190183227, + "learning_rate": 3.1175270990948096e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4434 + }, + { + "epoch": 0.42650382266673076, + "grad_norm": 3.0513882394589094, + "learning_rate": 3.116787889310587e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4435 + }, + { + "epoch": 0.4265999903832284, + "grad_norm": 1.7494379034652767, + "learning_rate": 3.116048622101694e-06, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4436 + }, + { + "epoch": 0.42669615809972594, + "grad_norm": 1.759245483355571, + "learning_rate": 3.1153092975369585e-06, + "loss": 0.1809, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4437 + }, + { + "epoch": 0.4267923258162235, + "grad_norm": 1.4730870887335266, + "learning_rate": 3.1145699156852133e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4438 + }, + { + "epoch": 0.42688849353272107, + "grad_norm": 3.0968985630728567, + "learning_rate": 3.1138304766152968e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4439 + }, + { + "epoch": 0.42698466124921863, + "grad_norm": 4.485620828328783, + "learning_rate": 3.1130909803960533e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4440 + }, + { + "epoch": 0.4270808289657162, + "grad_norm": 2.4047683790173915, + "learning_rate": 3.112351427096332e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4441 + }, + { + "epoch": 0.42717699668221376, + "grad_norm": 5.0537735284340854, + "learning_rate": 3.111611816784987e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4442 + }, + { + "epoch": 0.4272731643987114, + "grad_norm": 2.3554649902807445, + "learning_rate": 3.1108721495308786e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4443 + }, + { + "epoch": 0.42736933211520894, + "grad_norm": 1.716421684165657, + "learning_rate": 3.1101324254028716e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4444 + }, + { + "epoch": 0.4274654998317065, + "grad_norm": 1.655772558797147, + "learning_rate": 3.1093926444698363e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4445 + }, + { + "epoch": 0.42756166754820407, + "grad_norm": 2.135849736037642, + "learning_rate": 3.108652806800648e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4446 + }, + { + "epoch": 0.42765783526470164, + "grad_norm": 1.7331867544274888, + "learning_rate": 3.1079129124641878e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4447 + }, + { + "epoch": 0.4277540029811992, + "grad_norm": 3.790746272438669, + "learning_rate": 3.107172961529343e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4448 + }, + { + "epoch": 0.42785017069769676, + "grad_norm": 4.048842097326632, + "learning_rate": 3.1064329540650033e-06, + "loss": 0.1604, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4449 + }, + { + "epoch": 0.4279463384141944, + "grad_norm": 2.8110716218967866, + "learning_rate": 3.1056928901400667e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4450 + }, + { + "epoch": 0.42804250613069195, + "grad_norm": 2.0475364173702078, + "learning_rate": 3.1049527698234355e-06, + "loss": 0.156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4451 + }, + { + "epoch": 0.4281386738471895, + "grad_norm": 3.716432365924084, + "learning_rate": 3.1042125931840168e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4452 + }, + { + "epoch": 0.4282348415636871, + "grad_norm": 2.6657517573624787, + "learning_rate": 3.1034723602907213e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4453 + }, + { + "epoch": 0.42833100928018464, + "grad_norm": 3.3636517088861715, + "learning_rate": 3.1027320712124693e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4454 + }, + { + "epoch": 0.4284271769966822, + "grad_norm": 2.4562524428147436, + "learning_rate": 3.101991726018182e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4455 + }, + { + "epoch": 0.42852334471317977, + "grad_norm": 1.6747966407039008, + "learning_rate": 3.101251324776788e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4456 + }, + { + "epoch": 0.42861951242967733, + "grad_norm": 3.442717721491139, + "learning_rate": 3.1005108675572214e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4457 + }, + { + "epoch": 0.42871568014617495, + "grad_norm": 1.9603139363231232, + "learning_rate": 3.0997703544284197e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4458 + }, + { + "epoch": 0.4288118478626725, + "grad_norm": 3.0165433253912277, + "learning_rate": 3.099029785459328e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4459 + }, + { + "epoch": 0.4289080155791701, + "grad_norm": 1.981158684763557, + "learning_rate": 3.0982891607188948e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4460 + }, + { + "epoch": 0.42900418329566764, + "grad_norm": 1.5538389755238546, + "learning_rate": 3.097548480276074e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4461 + }, + { + "epoch": 0.4291003510121652, + "grad_norm": 1.3439279047513852, + "learning_rate": 3.0968077441998255e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4462 + }, + { + "epoch": 0.42919651872866277, + "grad_norm": 3.2802863896921357, + "learning_rate": 3.0960669525591136e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4463 + }, + { + "epoch": 0.42929268644516033, + "grad_norm": 2.227354881074669, + "learning_rate": 3.095326105422908e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4464 + }, + { + "epoch": 0.42938885416165795, + "grad_norm": 1.522608364586999, + "learning_rate": 3.0945852028601834e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4465 + }, + { + "epoch": 0.4294850218781555, + "grad_norm": 1.447084945624809, + "learning_rate": 3.093844244939921e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4466 + }, + { + "epoch": 0.4295811895946531, + "grad_norm": 1.393825555511878, + "learning_rate": 3.093103231731105e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4467 + }, + { + "epoch": 0.42967735731115064, + "grad_norm": 1.501030307515629, + "learning_rate": 3.0923621633027257e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4468 + }, + { + "epoch": 0.4297735250276482, + "grad_norm": 2.655109630083745, + "learning_rate": 3.0916210397237794e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4469 + }, + { + "epoch": 0.42986969274414577, + "grad_norm": 1.360518600657614, + "learning_rate": 3.090879861063266e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4470 + }, + { + "epoch": 0.42996586046064333, + "grad_norm": 1.6059889769770028, + "learning_rate": 3.090138627390193e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4471 + }, + { + "epoch": 0.43006202817714095, + "grad_norm": 2.1002034545709463, + "learning_rate": 3.089397338773569e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4472 + }, + { + "epoch": 0.4301581958936385, + "grad_norm": 1.9322057209187886, + "learning_rate": 3.088655995282411e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4473 + }, + { + "epoch": 0.4302543636101361, + "grad_norm": 2.926022309785817, + "learning_rate": 3.08791459698574e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4474 + }, + { + "epoch": 0.43035053132663365, + "grad_norm": 1.4546831919636234, + "learning_rate": 3.0871731439525825e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4475 + }, + { + "epoch": 0.4304466990431312, + "grad_norm": 1.4626545166402953, + "learning_rate": 3.0864316362519705e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4476 + }, + { + "epoch": 0.4305428667596288, + "grad_norm": 1.579249500947801, + "learning_rate": 3.085690073952939e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4477 + }, + { + "epoch": 0.43063903447612634, + "grad_norm": 1.2368931183027698, + "learning_rate": 3.0849484571245297e-06, + "loss": 0.0911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4478 + }, + { + "epoch": 0.43073520219262396, + "grad_norm": 1.6246472361826674, + "learning_rate": 3.084206785835791e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4479 + }, + { + "epoch": 0.4308313699091215, + "grad_norm": 2.6678975462366252, + "learning_rate": 3.0834650601557724e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4480 + }, + { + "epoch": 0.4309275376256191, + "grad_norm": 1.7273182455390452, + "learning_rate": 3.082723280153532e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4481 + }, + { + "epoch": 0.43102370534211665, + "grad_norm": 1.6653544199297403, + "learning_rate": 3.0819814458981304e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4482 + }, + { + "epoch": 0.4311198730586142, + "grad_norm": 1.4025961279149488, + "learning_rate": 3.081239557458636e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4483 + }, + { + "epoch": 0.4312160407751118, + "grad_norm": 1.9165350912793755, + "learning_rate": 3.08049761490412e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4484 + }, + { + "epoch": 0.43131220849160934, + "grad_norm": 1.461946700924558, + "learning_rate": 3.0797556183036582e-06, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4485 + }, + { + "epoch": 0.43140837620810696, + "grad_norm": 2.2353477334373246, + "learning_rate": 3.079013567726334e-06, + "loss": 0.1706, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4486 + }, + { + "epoch": 0.4315045439246045, + "grad_norm": 1.6278147031816097, + "learning_rate": 3.0782714632412343e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4487 + }, + { + "epoch": 0.4316007116411021, + "grad_norm": 1.7633889457964242, + "learning_rate": 3.07752930491745e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4488 + }, + { + "epoch": 0.43169687935759965, + "grad_norm": 1.9256814529259387, + "learning_rate": 3.0767870928240785e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4489 + }, + { + "epoch": 0.4317930470740972, + "grad_norm": 2.618143029022194, + "learning_rate": 3.0760448270302225e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4490 + }, + { + "epoch": 0.4318892147905948, + "grad_norm": 2.1606668333923587, + "learning_rate": 3.075302507604988e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4491 + }, + { + "epoch": 0.43198538250709234, + "grad_norm": 2.1595382878783367, + "learning_rate": 3.074560134617488e-06, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4492 + }, + { + "epoch": 0.43208155022358996, + "grad_norm": 1.5743080526639934, + "learning_rate": 3.0738177081368394e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4493 + }, + { + "epoch": 0.4321777179400875, + "grad_norm": 2.310359767625047, + "learning_rate": 3.0730752282321635e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4494 + }, + { + "epoch": 0.4322738856565851, + "grad_norm": 2.4660155095875136, + "learning_rate": 3.0723326949725874e-06, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4495 + }, + { + "epoch": 0.43237005337308265, + "grad_norm": 1.258216201685617, + "learning_rate": 3.071590108427244e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4496 + }, + { + "epoch": 0.4324662210895802, + "grad_norm": 1.820423877035841, + "learning_rate": 3.0708474686652683e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4497 + }, + { + "epoch": 0.4325623888060778, + "grad_norm": 2.3559322494037263, + "learning_rate": 3.0701047757558046e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4498 + }, + { + "epoch": 0.43265855652257534, + "grad_norm": 2.3581115207603247, + "learning_rate": 3.069362029767997e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4499 + }, + { + "epoch": 0.43275472423907296, + "grad_norm": 1.778817303031886, + "learning_rate": 3.068619230770999e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4500 + }, + { + "epoch": 0.4328508919555705, + "grad_norm": 3.0905111630647477, + "learning_rate": 3.067876378833967e-06, + "loss": 0.1542, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4501 + }, + { + "epoch": 0.4329470596720681, + "grad_norm": 2.1977959405843133, + "learning_rate": 3.0671334740260627e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4502 + }, + { + "epoch": 0.43304322738856565, + "grad_norm": 1.6573767685859406, + "learning_rate": 3.0663905164164517e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4503 + }, + { + "epoch": 0.4331393951050632, + "grad_norm": 2.9140930162723646, + "learning_rate": 3.0656475060743065e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4504 + }, + { + "epoch": 0.4332355628215608, + "grad_norm": 1.586321771747133, + "learning_rate": 3.0649044430688017e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4505 + }, + { + "epoch": 0.43333173053805835, + "grad_norm": 3.309390472706263, + "learning_rate": 3.06416132746912e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4506 + }, + { + "epoch": 0.43342789825455597, + "grad_norm": 1.8652474650627715, + "learning_rate": 3.0634181593444486e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4507 + }, + { + "epoch": 0.43352406597105353, + "grad_norm": 1.562370559460585, + "learning_rate": 3.062674938763976e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4508 + }, + { + "epoch": 0.4336202336875511, + "grad_norm": 1.5452758341278465, + "learning_rate": 3.0619316657968994e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4509 + }, + { + "epoch": 0.43371640140404866, + "grad_norm": 2.170883888297069, + "learning_rate": 3.0611883405124203e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4510 + }, + { + "epoch": 0.4338125691205462, + "grad_norm": 1.6958415370963236, + "learning_rate": 3.060444962979743e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4511 + }, + { + "epoch": 0.4339087368370438, + "grad_norm": 1.537484594857061, + "learning_rate": 3.0597015332680792e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4512 + }, + { + "epoch": 0.43400490455354135, + "grad_norm": 1.6466044538092695, + "learning_rate": 3.058958051446643e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4513 + }, + { + "epoch": 0.43410107227003897, + "grad_norm": 2.4771968157826456, + "learning_rate": 3.0582145175846546e-06, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4514 + }, + { + "epoch": 0.43419723998653653, + "grad_norm": 2.1043480888641763, + "learning_rate": 3.057470931751341e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4515 + }, + { + "epoch": 0.4342934077030341, + "grad_norm": 2.1951321317278594, + "learning_rate": 3.0567272940159306e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4516 + }, + { + "epoch": 0.43438957541953166, + "grad_norm": 1.776207541561505, + "learning_rate": 3.055983604447658e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4517 + }, + { + "epoch": 0.4344857431360292, + "grad_norm": 1.4785771317151917, + "learning_rate": 3.055239863115763e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4518 + }, + { + "epoch": 0.4345819108525268, + "grad_norm": 1.4329689912714139, + "learning_rate": 3.0544960700894917e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4519 + }, + { + "epoch": 0.43467807856902435, + "grad_norm": 2.8702137430889905, + "learning_rate": 3.0537522254380902e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4520 + }, + { + "epoch": 0.43477424628552197, + "grad_norm": 1.5329034707464089, + "learning_rate": 3.053008329230815e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4521 + }, + { + "epoch": 0.43487041400201953, + "grad_norm": 1.7247662210190116, + "learning_rate": 3.052264381536923e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4522 + }, + { + "epoch": 0.4349665817185171, + "grad_norm": 1.4958488201699829, + "learning_rate": 3.0515203824256793e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4523 + }, + { + "epoch": 0.43506274943501466, + "grad_norm": 2.206902520447862, + "learning_rate": 3.050776331966352e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4524 + }, + { + "epoch": 0.4351589171515122, + "grad_norm": 1.8657023739266878, + "learning_rate": 3.050032230228214e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4525 + }, + { + "epoch": 0.4352550848680098, + "grad_norm": 1.9639295501889669, + "learning_rate": 3.0492880772805433e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4526 + }, + { + "epoch": 0.43535125258450735, + "grad_norm": 1.762362859291837, + "learning_rate": 3.048543873192622e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4527 + }, + { + "epoch": 0.435447420301005, + "grad_norm": 2.333050101131083, + "learning_rate": 3.047799618033739e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4528 + }, + { + "epoch": 0.43554358801750254, + "grad_norm": 1.8504785207619685, + "learning_rate": 3.0470553118731853e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4529 + }, + { + "epoch": 0.4356397557340001, + "grad_norm": 1.4277227304129656, + "learning_rate": 3.0463109547802585e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4530 + }, + { + "epoch": 0.43573592345049766, + "grad_norm": 1.8975480063175962, + "learning_rate": 3.0455665468242597e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4531 + }, + { + "epoch": 0.43583209116699523, + "grad_norm": 3.1256789888048804, + "learning_rate": 3.0448220880744963e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4532 + }, + { + "epoch": 0.4359282588834928, + "grad_norm": 4.2129417243534855, + "learning_rate": 3.0440775786002787e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4533 + }, + { + "epoch": 0.43602442659999036, + "grad_norm": 4.419269129851718, + "learning_rate": 3.043333018470923e-06, + "loss": 0.1718, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4534 + }, + { + "epoch": 0.436120594316488, + "grad_norm": 2.100348894578219, + "learning_rate": 3.0425884077557498e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4535 + }, + { + "epoch": 0.43621676203298554, + "grad_norm": 1.9238413189722083, + "learning_rate": 3.041843746524085e-06, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4536 + }, + { + "epoch": 0.4363129297494831, + "grad_norm": 1.736273166837243, + "learning_rate": 3.0410990348452572e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4537 + }, + { + "epoch": 0.43640909746598067, + "grad_norm": 1.8838888219483363, + "learning_rate": 3.040354272788603e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4538 + }, + { + "epoch": 0.43650526518247823, + "grad_norm": 2.1674922123498055, + "learning_rate": 3.0396094604234607e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4539 + }, + { + "epoch": 0.4366014328989758, + "grad_norm": 3.1327658523556847, + "learning_rate": 3.0388645978191745e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4540 + }, + { + "epoch": 0.43669760061547336, + "grad_norm": 1.962674327752294, + "learning_rate": 3.0381196850450934e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4541 + }, + { + "epoch": 0.436793768331971, + "grad_norm": 1.6026419803627938, + "learning_rate": 3.0373747221705703e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4542 + }, + { + "epoch": 0.43688993604846854, + "grad_norm": 1.415388248024053, + "learning_rate": 3.0366297092649646e-06, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4543 + }, + { + "epoch": 0.4369861037649661, + "grad_norm": 3.07988618383198, + "learning_rate": 3.035884646397637e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4544 + }, + { + "epoch": 0.43708227148146367, + "grad_norm": 2.4340391508078354, + "learning_rate": 3.0351395336379565e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4545 + }, + { + "epoch": 0.43717843919796123, + "grad_norm": 4.064889126321989, + "learning_rate": 3.0343943710552953e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4546 + }, + { + "epoch": 0.4372746069144588, + "grad_norm": 2.4912422939023333, + "learning_rate": 3.0336491587190296e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4547 + }, + { + "epoch": 0.43737077463095636, + "grad_norm": 2.150360776201604, + "learning_rate": 3.0329038966985404e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4548 + }, + { + "epoch": 0.437466942347454, + "grad_norm": 2.152687291630453, + "learning_rate": 3.0321585850632145e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4549 + }, + { + "epoch": 0.43756311006395154, + "grad_norm": 1.7641587813179815, + "learning_rate": 3.0314132238824416e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4550 + }, + { + "epoch": 0.4376592777804491, + "grad_norm": 1.5946489494003966, + "learning_rate": 3.0306678132256183e-06, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4551 + }, + { + "epoch": 0.43775544549694667, + "grad_norm": 2.2082892525307836, + "learning_rate": 3.029922353162143e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4552 + }, + { + "epoch": 0.43785161321344424, + "grad_norm": 1.7591270516293178, + "learning_rate": 3.02917684376142e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4553 + }, + { + "epoch": 0.4379477809299418, + "grad_norm": 1.7813615389394875, + "learning_rate": 3.0284312850928598e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4554 + }, + { + "epoch": 0.43804394864643936, + "grad_norm": 2.559798967401432, + "learning_rate": 3.027685677225874e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4555 + }, + { + "epoch": 0.438140116362937, + "grad_norm": 2.04247992678009, + "learning_rate": 3.0269400202298823e-06, + "loss": 0.1591, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4556 + }, + { + "epoch": 0.43823628407943455, + "grad_norm": 3.3844963096492138, + "learning_rate": 3.0261943141743074e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4557 + }, + { + "epoch": 0.4383324517959321, + "grad_norm": 1.4188120681684724, + "learning_rate": 3.0254485591285753e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4558 + }, + { + "epoch": 0.4384286195124297, + "grad_norm": 1.6209528546172696, + "learning_rate": 3.0247027551621187e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4559 + }, + { + "epoch": 0.43852478722892724, + "grad_norm": 2.6166859074196847, + "learning_rate": 3.0239569023443756e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4560 + }, + { + "epoch": 0.4386209549454248, + "grad_norm": 2.3612395008410796, + "learning_rate": 3.023211000744784e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4561 + }, + { + "epoch": 0.43871712266192237, + "grad_norm": 2.880330064946174, + "learning_rate": 3.0224650504327914e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4562 + }, + { + "epoch": 0.43881329037842, + "grad_norm": 2.453120663700398, + "learning_rate": 3.0217190514778473e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4563 + }, + { + "epoch": 0.43890945809491755, + "grad_norm": 1.8620785583327033, + "learning_rate": 3.020973003949406e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4564 + }, + { + "epoch": 0.4390056258114151, + "grad_norm": 1.9240998843919535, + "learning_rate": 3.0202269079169273e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4565 + }, + { + "epoch": 0.4391017935279127, + "grad_norm": 1.6864286994353272, + "learning_rate": 3.0194807634498747e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4566 + }, + { + "epoch": 0.43919796124441024, + "grad_norm": 1.9137247529332502, + "learning_rate": 3.0187345706177155e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4567 + }, + { + "epoch": 0.4392941289609078, + "grad_norm": 1.6268829723221299, + "learning_rate": 3.017988329489923e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4568 + }, + { + "epoch": 0.43939029667740537, + "grad_norm": 2.036328196323848, + "learning_rate": 3.0172420401359748e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4569 + }, + { + "epoch": 0.439486464393903, + "grad_norm": 2.2275036840558675, + "learning_rate": 3.016495702625351e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4570 + }, + { + "epoch": 0.43958263211040055, + "grad_norm": 2.5179906122946107, + "learning_rate": 3.0157493170275394e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4571 + }, + { + "epoch": 0.4396787998268981, + "grad_norm": 1.477705858253644, + "learning_rate": 3.0150028834120293e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4572 + }, + { + "epoch": 0.4397749675433957, + "grad_norm": 2.617913038758477, + "learning_rate": 3.014256401848316e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4573 + }, + { + "epoch": 0.43987113525989324, + "grad_norm": 1.8203554053775501, + "learning_rate": 3.0135098724058997e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4574 + }, + { + "epoch": 0.4399673029763908, + "grad_norm": 2.259260045753822, + "learning_rate": 3.012763295154284e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4575 + }, + { + "epoch": 0.44006347069288837, + "grad_norm": 1.7168159498389348, + "learning_rate": 3.012016670162977e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4576 + }, + { + "epoch": 0.440159638409386, + "grad_norm": 1.7078560004610648, + "learning_rate": 3.011269997501492e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4577 + }, + { + "epoch": 0.44025580612588355, + "grad_norm": 2.3304570261967608, + "learning_rate": 3.010523277239346e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4578 + }, + { + "epoch": 0.4403519738423811, + "grad_norm": 2.3731998445666753, + "learning_rate": 3.009776509446061e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4579 + }, + { + "epoch": 0.4404481415588787, + "grad_norm": 1.825727353513804, + "learning_rate": 3.0090296941911633e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4580 + }, + { + "epoch": 0.44054430927537624, + "grad_norm": 1.290945275994053, + "learning_rate": 3.0082828315441824e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4581 + }, + { + "epoch": 0.4406404769918738, + "grad_norm": 1.7956589121732047, + "learning_rate": 3.0075359215746553e-06, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4582 + }, + { + "epoch": 0.4407366447083714, + "grad_norm": 1.9003331778705066, + "learning_rate": 3.006788964352119e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4583 + }, + { + "epoch": 0.440832812424869, + "grad_norm": 2.31516147983518, + "learning_rate": 3.00604195994612e-06, + "loss": 0.1807, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4584 + }, + { + "epoch": 0.44092898014136656, + "grad_norm": 2.0844522230675127, + "learning_rate": 3.0052949084262033e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4585 + }, + { + "epoch": 0.4410251478578641, + "grad_norm": 1.4412655355729762, + "learning_rate": 3.0045478098619245e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4586 + }, + { + "epoch": 0.4411213155743617, + "grad_norm": 2.1640755660605917, + "learning_rate": 3.0038006643228384e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4587 + }, + { + "epoch": 0.44121748329085925, + "grad_norm": 2.060256975343296, + "learning_rate": 3.0030534718785074e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4588 + }, + { + "epoch": 0.4413136510073568, + "grad_norm": 1.9970197709059698, + "learning_rate": 3.002306232598497e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4589 + }, + { + "epoch": 0.4414098187238544, + "grad_norm": 2.0666712739664903, + "learning_rate": 3.001558946552377e-06, + "loss": 0.1776, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4590 + }, + { + "epoch": 0.441505986440352, + "grad_norm": 1.5164009978039714, + "learning_rate": 3.0008116138097226e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4591 + }, + { + "epoch": 0.44160215415684956, + "grad_norm": 1.945305708175589, + "learning_rate": 3.0000642344401115e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4592 + }, + { + "epoch": 0.4416983218733471, + "grad_norm": 2.1976508631807157, + "learning_rate": 2.999316808513127e-06, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4593 + }, + { + "epoch": 0.4417944895898447, + "grad_norm": 3.120689867240909, + "learning_rate": 2.9985693360983576e-06, + "loss": 0.1698, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4594 + }, + { + "epoch": 0.44189065730634225, + "grad_norm": 2.260213614579008, + "learning_rate": 2.997821817265394e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4595 + }, + { + "epoch": 0.4419868250228398, + "grad_norm": 1.6848118075383618, + "learning_rate": 2.9970742520838324e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4596 + }, + { + "epoch": 0.4420829927393374, + "grad_norm": 2.9830365923506093, + "learning_rate": 2.996326640623273e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4597 + }, + { + "epoch": 0.442179160455835, + "grad_norm": 3.5643074751556054, + "learning_rate": 2.9955789829533216e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4598 + }, + { + "epoch": 0.44227532817233256, + "grad_norm": 2.4556568361068485, + "learning_rate": 2.9948312791435862e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4599 + }, + { + "epoch": 0.4423714958888301, + "grad_norm": 1.7704520546827411, + "learning_rate": 2.9940835292636806e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4600 + }, + { + "epoch": 0.4424676636053277, + "grad_norm": 1.7378827475938066, + "learning_rate": 2.993335733383222e-06, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4601 + }, + { + "epoch": 0.44256383132182525, + "grad_norm": 3.6204727996513144, + "learning_rate": 2.992587891571833e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4602 + }, + { + "epoch": 0.4426599990383228, + "grad_norm": 1.7995807704019522, + "learning_rate": 2.9918400038991384e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4603 + }, + { + "epoch": 0.4427561667548204, + "grad_norm": 1.9912494522768762, + "learning_rate": 2.9910920704347696e-06, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4604 + }, + { + "epoch": 0.442852334471318, + "grad_norm": 1.6929091130564204, + "learning_rate": 2.990344091248362e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4605 + }, + { + "epoch": 0.44294850218781556, + "grad_norm": 2.933356910726805, + "learning_rate": 2.9895960664095523e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4606 + }, + { + "epoch": 0.4430446699043131, + "grad_norm": 1.500919187964214, + "learning_rate": 2.988847995987986e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4607 + }, + { + "epoch": 0.4431408376208107, + "grad_norm": 1.5987597751143432, + "learning_rate": 2.9880998800533095e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4608 + }, + { + "epoch": 0.44323700533730825, + "grad_norm": 3.787857761346214, + "learning_rate": 2.9873517186751746e-06, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4609 + }, + { + "epoch": 0.4433331730538058, + "grad_norm": 1.7558210513792516, + "learning_rate": 2.986603511923237e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4610 + }, + { + "epoch": 0.4434293407703034, + "grad_norm": 1.5556945444822128, + "learning_rate": 2.9858552598671577e-06, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4611 + }, + { + "epoch": 0.443525508486801, + "grad_norm": 1.9810648749582183, + "learning_rate": 2.9851069625765995e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4612 + }, + { + "epoch": 0.44362167620329856, + "grad_norm": 2.406926308292192, + "learning_rate": 2.984358620121233e-06, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4613 + }, + { + "epoch": 0.44371784391979613, + "grad_norm": 3.622967785884757, + "learning_rate": 2.9836102325707282e-06, + "loss": 0.1702, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4614 + }, + { + "epoch": 0.4438140116362937, + "grad_norm": 2.2520923684550334, + "learning_rate": 2.9828617999947647e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4615 + }, + { + "epoch": 0.44391017935279126, + "grad_norm": 2.9338692920758978, + "learning_rate": 2.9821133224630226e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4616 + }, + { + "epoch": 0.4440063470692888, + "grad_norm": 2.2287654136331407, + "learning_rate": 2.9813648000451877e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4617 + }, + { + "epoch": 0.4441025147857864, + "grad_norm": 1.87257227170585, + "learning_rate": 2.980616232810949e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4618 + }, + { + "epoch": 0.444198682502284, + "grad_norm": 1.9827263689862316, + "learning_rate": 2.97986762083e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4619 + }, + { + "epoch": 0.44429485021878157, + "grad_norm": 4.408586545713174, + "learning_rate": 2.9791189641720385e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4620 + }, + { + "epoch": 0.44439101793527913, + "grad_norm": 2.3770311745380397, + "learning_rate": 2.9783702629067675e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4621 + }, + { + "epoch": 0.4444871856517767, + "grad_norm": 2.002874057421743, + "learning_rate": 2.9776215171038925e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4622 + }, + { + "epoch": 0.44458335336827426, + "grad_norm": 2.718279706713074, + "learning_rate": 2.976872726833123e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4623 + }, + { + "epoch": 0.4446795210847718, + "grad_norm": 1.6030748053946118, + "learning_rate": 2.9761238921641753e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4624 + }, + { + "epoch": 0.4447756888012694, + "grad_norm": 2.9260597589709816, + "learning_rate": 2.975375013166767e-06, + "loss": 0.153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4625 + }, + { + "epoch": 0.444871856517767, + "grad_norm": 2.929297029682053, + "learning_rate": 2.9746260899106205e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4626 + }, + { + "epoch": 0.44496802423426457, + "grad_norm": 3.372795200805037, + "learning_rate": 2.9738771224654635e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4627 + }, + { + "epoch": 0.44506419195076213, + "grad_norm": 1.7414464264237326, + "learning_rate": 2.973128110901026e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4628 + }, + { + "epoch": 0.4451603596672597, + "grad_norm": 1.8856182315210794, + "learning_rate": 2.9723790552870434e-06, + "loss": 0.1747, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4629 + }, + { + "epoch": 0.44525652738375726, + "grad_norm": 1.499124859015934, + "learning_rate": 2.9716299556932554e-06, + "loss": 0.1523, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4630 + }, + { + "epoch": 0.4453526951002548, + "grad_norm": 1.6732802701994272, + "learning_rate": 2.9708808121894047e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4631 + }, + { + "epoch": 0.4454488628167524, + "grad_norm": 1.9309256362590288, + "learning_rate": 2.970131624845239e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4632 + }, + { + "epoch": 0.44554503053325, + "grad_norm": 1.291851042825265, + "learning_rate": 2.969382393730509e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4633 + }, + { + "epoch": 0.44564119824974757, + "grad_norm": 1.3837809406898494, + "learning_rate": 2.968633118914971e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4634 + }, + { + "epoch": 0.44573736596624514, + "grad_norm": 2.5186463263461363, + "learning_rate": 2.967883800468384e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4635 + }, + { + "epoch": 0.4458335336827427, + "grad_norm": 2.1633358672772243, + "learning_rate": 2.967134438460513e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4636 + }, + { + "epoch": 0.44592970139924026, + "grad_norm": 3.9374201384446543, + "learning_rate": 2.966385032961123e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4637 + }, + { + "epoch": 0.4460258691157378, + "grad_norm": 1.5256612485732655, + "learning_rate": 2.965635584039989e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4638 + }, + { + "epoch": 0.4461220368322354, + "grad_norm": 1.33887432991447, + "learning_rate": 2.9648860917668835e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4639 + }, + { + "epoch": 0.446218204548733, + "grad_norm": 1.4495643775193916, + "learning_rate": 2.9641365562115886e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4640 + }, + { + "epoch": 0.4463143722652306, + "grad_norm": 1.5235063556768376, + "learning_rate": 2.9633869774438884e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4641 + }, + { + "epoch": 0.44641053998172814, + "grad_norm": 1.438530512796499, + "learning_rate": 2.9626373555335684e-06, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4642 + }, + { + "epoch": 0.4465067076982257, + "grad_norm": 2.4350597457522576, + "learning_rate": 2.9618876905504228e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4643 + }, + { + "epoch": 0.44660287541472327, + "grad_norm": 1.7286002558418432, + "learning_rate": 2.9611379825642466e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4644 + }, + { + "epoch": 0.44669904313122083, + "grad_norm": 1.8917464264288941, + "learning_rate": 2.9603882316448397e-06, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4645 + }, + { + "epoch": 0.4467952108477184, + "grad_norm": 1.4604273642956425, + "learning_rate": 2.959638437862006e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4646 + }, + { + "epoch": 0.446891378564216, + "grad_norm": 1.951020089096933, + "learning_rate": 2.958888601285554e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4647 + }, + { + "epoch": 0.4469875462807136, + "grad_norm": 1.5249349613476373, + "learning_rate": 2.958138721985294e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4648 + }, + { + "epoch": 0.44708371399721114, + "grad_norm": 1.8086645214491168, + "learning_rate": 2.957388800031044e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4649 + }, + { + "epoch": 0.4471798817137087, + "grad_norm": 2.509994418319259, + "learning_rate": 2.956638835492622e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4650 + }, + { + "epoch": 0.44727604943020627, + "grad_norm": 3.0777602141302127, + "learning_rate": 2.955888828439853e-06, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4651 + }, + { + "epoch": 0.44737221714670383, + "grad_norm": 2.10169366811575, + "learning_rate": 2.955138778942564e-06, + "loss": 0.1678, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4652 + }, + { + "epoch": 0.4474683848632014, + "grad_norm": 1.9318622988919294, + "learning_rate": 2.954388687070587e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4653 + }, + { + "epoch": 0.447564552579699, + "grad_norm": 1.9107953657545926, + "learning_rate": 2.9536385528937566e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4654 + }, + { + "epoch": 0.4476607202961966, + "grad_norm": 1.8439821949485449, + "learning_rate": 2.952888376481915e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4655 + }, + { + "epoch": 0.44775688801269414, + "grad_norm": 2.0292446533395636, + "learning_rate": 2.9521381579049026e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4656 + }, + { + "epoch": 0.4478530557291917, + "grad_norm": 2.848358846179711, + "learning_rate": 2.951387897232569e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4657 + }, + { + "epoch": 0.44794922344568927, + "grad_norm": 4.3341024349494655, + "learning_rate": 2.950637594534765e-06, + "loss": 0.1615, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4658 + }, + { + "epoch": 0.44804539116218683, + "grad_norm": 1.641515354008128, + "learning_rate": 2.949887249881345e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4659 + }, + { + "epoch": 0.4481415588786844, + "grad_norm": 1.5776991905371571, + "learning_rate": 2.949136863342169e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4660 + }, + { + "epoch": 0.448237726595182, + "grad_norm": 2.4617076820485884, + "learning_rate": 2.948386434987101e-06, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4661 + }, + { + "epoch": 0.4483338943116796, + "grad_norm": 2.300064742539839, + "learning_rate": 2.9476359648860066e-06, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4662 + }, + { + "epoch": 0.44843006202817715, + "grad_norm": 1.3754260530716076, + "learning_rate": 2.9468854531087567e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4663 + }, + { + "epoch": 0.4485262297446747, + "grad_norm": 1.5935767984565234, + "learning_rate": 2.9461348997252263e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4664 + }, + { + "epoch": 0.4486223974611723, + "grad_norm": 1.6127744617991921, + "learning_rate": 2.945384304805294e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4665 + }, + { + "epoch": 0.44871856517766984, + "grad_norm": 1.653601319966831, + "learning_rate": 2.9446336684188433e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4666 + }, + { + "epoch": 0.4488147328941674, + "grad_norm": 1.2815799973011746, + "learning_rate": 2.943882990635759e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4667 + }, + { + "epoch": 0.448910900610665, + "grad_norm": 1.641224039127705, + "learning_rate": 2.9431322715259313e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4668 + }, + { + "epoch": 0.4490070683271626, + "grad_norm": 1.5700511341998191, + "learning_rate": 2.9423815111592557e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4669 + }, + { + "epoch": 0.44910323604366015, + "grad_norm": 1.6658116963460647, + "learning_rate": 2.9416307096056284e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4670 + }, + { + "epoch": 0.4491994037601577, + "grad_norm": 2.424640589887683, + "learning_rate": 2.940879866934952e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4671 + }, + { + "epoch": 0.4492955714766553, + "grad_norm": 1.9712920814290684, + "learning_rate": 2.9401289832171325e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4672 + }, + { + "epoch": 0.44939173919315284, + "grad_norm": 2.109162276035765, + "learning_rate": 2.939378058522078e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4673 + }, + { + "epoch": 0.4494879069096504, + "grad_norm": 1.8395712490788945, + "learning_rate": 2.938627092919703e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4674 + }, + { + "epoch": 0.449584074626148, + "grad_norm": 1.4755253710307332, + "learning_rate": 2.937876086479924e-06, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4675 + }, + { + "epoch": 0.4496802423426456, + "grad_norm": 1.7749341007632353, + "learning_rate": 2.9371250392726613e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4676 + }, + { + "epoch": 0.44977641005914315, + "grad_norm": 1.7939838423483843, + "learning_rate": 2.9363739513678397e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4677 + }, + { + "epoch": 0.4498725777756407, + "grad_norm": 1.5034385511837915, + "learning_rate": 2.9356228228353882e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4678 + }, + { + "epoch": 0.4499687454921383, + "grad_norm": 2.078631303578714, + "learning_rate": 2.9348716537452383e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4679 + }, + { + "epoch": 0.45006491320863584, + "grad_norm": 1.913002993784918, + "learning_rate": 2.9341204441673267e-06, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4680 + }, + { + "epoch": 0.4501610809251334, + "grad_norm": 1.685786046904764, + "learning_rate": 2.9333691941715915e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4681 + }, + { + "epoch": 0.450257248641631, + "grad_norm": 2.8714383324794683, + "learning_rate": 2.932617903827978e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4682 + }, + { + "epoch": 0.4503534163581286, + "grad_norm": 3.0906630136857434, + "learning_rate": 2.931866573206432e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4683 + }, + { + "epoch": 0.45044958407462615, + "grad_norm": 3.5215389103153907, + "learning_rate": 2.9311152023769056e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4684 + }, + { + "epoch": 0.4505457517911237, + "grad_norm": 1.5733933700343963, + "learning_rate": 2.9303637914093532e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4685 + }, + { + "epoch": 0.4506419195076213, + "grad_norm": 1.6230673456902753, + "learning_rate": 2.929612340373733e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4686 + }, + { + "epoch": 0.45073808722411884, + "grad_norm": 2.075377024879417, + "learning_rate": 2.928860849340007e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4687 + }, + { + "epoch": 0.4508342549406164, + "grad_norm": 1.7465293072222026, + "learning_rate": 2.9281093183781406e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4688 + }, + { + "epoch": 0.450930422657114, + "grad_norm": 1.722013177812104, + "learning_rate": 2.9273577475581056e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4689 + }, + { + "epoch": 0.4510265903736116, + "grad_norm": 2.1508703391026125, + "learning_rate": 2.9266061369498732e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4690 + }, + { + "epoch": 0.45112275809010915, + "grad_norm": 2.3174963758315132, + "learning_rate": 2.9258544866234206e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4691 + }, + { + "epoch": 0.4512189258066067, + "grad_norm": 1.566780356116064, + "learning_rate": 2.9251027966487304e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4692 + }, + { + "epoch": 0.4513150935231043, + "grad_norm": 1.985029877851998, + "learning_rate": 2.9243510670957854e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4693 + }, + { + "epoch": 0.45141126123960185, + "grad_norm": 2.965584277448096, + "learning_rate": 2.923599298034574e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4694 + }, + { + "epoch": 0.4515074289560994, + "grad_norm": 1.4972972839343044, + "learning_rate": 2.9228474895350872e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4695 + }, + { + "epoch": 0.45160359667259703, + "grad_norm": 1.8757965250221453, + "learning_rate": 2.922095641667322e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4696 + }, + { + "epoch": 0.4516997643890946, + "grad_norm": 1.5932475572021771, + "learning_rate": 2.9213437545012775e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4697 + }, + { + "epoch": 0.45179593210559216, + "grad_norm": 1.5516776093152378, + "learning_rate": 2.9205918281069545e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4698 + }, + { + "epoch": 0.4518920998220897, + "grad_norm": 1.9445106559828205, + "learning_rate": 2.9198398625543623e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4699 + }, + { + "epoch": 0.4519882675385873, + "grad_norm": 2.9917802389162365, + "learning_rate": 2.9190878579135077e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4700 + }, + { + "epoch": 0.45208443525508485, + "grad_norm": 2.076743758723371, + "learning_rate": 2.918335814254408e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4701 + }, + { + "epoch": 0.4521806029715824, + "grad_norm": 2.962109089065507, + "learning_rate": 2.9175837316470775e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4702 + }, + { + "epoch": 0.45227677068808003, + "grad_norm": 2.8657982809688987, + "learning_rate": 2.916831610161539e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4703 + }, + { + "epoch": 0.4523729384045776, + "grad_norm": 1.4464517003312845, + "learning_rate": 2.9160794498678164e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4704 + }, + { + "epoch": 0.45246910612107516, + "grad_norm": 3.224859191677756, + "learning_rate": 2.9153272508359375e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4705 + }, + { + "epoch": 0.4525652738375727, + "grad_norm": 5.531392690667879, + "learning_rate": 2.9145750131359356e-06, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4706 + }, + { + "epoch": 0.4526614415540703, + "grad_norm": 3.8949154309853604, + "learning_rate": 2.9138227368378446e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4707 + }, + { + "epoch": 0.45275760927056785, + "grad_norm": 3.5677019380264134, + "learning_rate": 2.913070422011705e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4708 + }, + { + "epoch": 0.4528537769870654, + "grad_norm": 1.869603587707582, + "learning_rate": 2.9123180687275576e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4709 + }, + { + "epoch": 0.45294994470356303, + "grad_norm": 2.929297559397855, + "learning_rate": 2.9115656770554494e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4710 + }, + { + "epoch": 0.4530461124200606, + "grad_norm": 2.1236530201110204, + "learning_rate": 2.9108132470654316e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4711 + }, + { + "epoch": 0.45314228013655816, + "grad_norm": 3.3652419250206917, + "learning_rate": 2.9100607788275547e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4712 + }, + { + "epoch": 0.4532384478530557, + "grad_norm": 4.075288285930219, + "learning_rate": 2.9093082724118776e-06, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4713 + }, + { + "epoch": 0.4533346155695533, + "grad_norm": 2.514986867685762, + "learning_rate": 2.908555727888461e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4714 + }, + { + "epoch": 0.45343078328605085, + "grad_norm": 1.6090881223418763, + "learning_rate": 2.907803145327367e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4715 + }, + { + "epoch": 0.4535269510025484, + "grad_norm": 1.9856875076963167, + "learning_rate": 2.9070505247986654e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4716 + }, + { + "epoch": 0.45362311871904604, + "grad_norm": 1.5386153816053125, + "learning_rate": 2.9062978663724257e-06, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4717 + }, + { + "epoch": 0.4537192864355436, + "grad_norm": 2.4510380513395797, + "learning_rate": 2.905545170118723e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4718 + }, + { + "epoch": 0.45381545415204116, + "grad_norm": 4.3216804285197306, + "learning_rate": 2.904792436107635e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4719 + }, + { + "epoch": 0.45391162186853873, + "grad_norm": 3.0985098056140483, + "learning_rate": 2.904039664409244e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4720 + }, + { + "epoch": 0.4540077895850363, + "grad_norm": 1.72588033177283, + "learning_rate": 2.9032868550936345e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4721 + }, + { + "epoch": 0.45410395730153386, + "grad_norm": 1.4024593687687368, + "learning_rate": 2.9025340082308955e-06, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4722 + }, + { + "epoch": 0.4542001250180314, + "grad_norm": 2.226401319510764, + "learning_rate": 2.901781123891119e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4723 + }, + { + "epoch": 0.45429629273452904, + "grad_norm": 1.9243757287146055, + "learning_rate": 2.9010282021444008e-06, + "loss": 0.1533, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4724 + }, + { + "epoch": 0.4543924604510266, + "grad_norm": 2.1938059710466637, + "learning_rate": 2.9002752430608403e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4725 + }, + { + "epoch": 0.45448862816752417, + "grad_norm": 1.8878680853465273, + "learning_rate": 2.8995222467105395e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4726 + }, + { + "epoch": 0.45458479588402173, + "grad_norm": 2.257472962872771, + "learning_rate": 2.8987692131636045e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4727 + }, + { + "epoch": 0.4546809636005193, + "grad_norm": 1.41704604978301, + "learning_rate": 2.8980161424901453e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4728 + }, + { + "epoch": 0.45477713131701686, + "grad_norm": 1.663860828227663, + "learning_rate": 2.897263034760275e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4729 + }, + { + "epoch": 0.4548732990335144, + "grad_norm": 2.0582951123683286, + "learning_rate": 2.8965098900441086e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4730 + }, + { + "epoch": 0.45496946675001204, + "grad_norm": 1.6764067303433283, + "learning_rate": 2.8957567084117677e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4731 + }, + { + "epoch": 0.4550656344665096, + "grad_norm": 2.536523295128465, + "learning_rate": 2.895003489933375e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4732 + }, + { + "epoch": 0.45516180218300717, + "grad_norm": 2.0245460430146025, + "learning_rate": 2.8942502346790566e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4733 + }, + { + "epoch": 0.45525796989950473, + "grad_norm": 1.6256736987020166, + "learning_rate": 2.8934969427189442e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4734 + }, + { + "epoch": 0.4553541376160023, + "grad_norm": 1.5831210620242597, + "learning_rate": 2.8927436141231695e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4735 + }, + { + "epoch": 0.45545030533249986, + "grad_norm": 1.4070234095783056, + "learning_rate": 2.8919902489618713e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4736 + }, + { + "epoch": 0.4555464730489974, + "grad_norm": 1.615155615482962, + "learning_rate": 2.8912368473051876e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4737 + }, + { + "epoch": 0.45564264076549504, + "grad_norm": 2.184602512637066, + "learning_rate": 2.8904834092232643e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4738 + }, + { + "epoch": 0.4557388084819926, + "grad_norm": 1.7794314374373112, + "learning_rate": 2.8897299347862486e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4739 + }, + { + "epoch": 0.45583497619849017, + "grad_norm": 1.5213587062662979, + "learning_rate": 2.888976424064289e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4740 + }, + { + "epoch": 0.45593114391498774, + "grad_norm": 1.919091751390636, + "learning_rate": 2.8882228771275424e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4741 + }, + { + "epoch": 0.4560273116314853, + "grad_norm": 2.6893800651051345, + "learning_rate": 2.8874692940461634e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4742 + }, + { + "epoch": 0.45612347934798286, + "grad_norm": 2.2471362036537457, + "learning_rate": 2.8867156748903148e-06, + "loss": 0.1652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4743 + }, + { + "epoch": 0.4562196470644804, + "grad_norm": 1.4371815068029525, + "learning_rate": 2.8859620197301584e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4744 + }, + { + "epoch": 0.45631581478097805, + "grad_norm": 1.6031826788263106, + "learning_rate": 2.8852083286358647e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4745 + }, + { + "epoch": 0.4564119824974756, + "grad_norm": 1.7747177761501858, + "learning_rate": 2.8844546016776014e-06, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4746 + }, + { + "epoch": 0.4565081502139732, + "grad_norm": 1.8345140227566419, + "learning_rate": 2.8837008389255443e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4747 + }, + { + "epoch": 0.45660431793047074, + "grad_norm": 1.690500181567897, + "learning_rate": 2.88294704044987e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4748 + }, + { + "epoch": 0.4567004856469683, + "grad_norm": 1.9099336893890242, + "learning_rate": 2.88219320632076e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4749 + }, + { + "epoch": 0.45679665336346587, + "grad_norm": 2.0856236653079008, + "learning_rate": 2.8814393366083975e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4750 + }, + { + "epoch": 0.45689282107996343, + "grad_norm": 1.869482977867498, + "learning_rate": 2.8806854313829707e-06, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4751 + }, + { + "epoch": 0.45698898879646105, + "grad_norm": 2.931973182009861, + "learning_rate": 2.879931490714669e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4752 + }, + { + "epoch": 0.4570851565129586, + "grad_norm": 1.8598222237229134, + "learning_rate": 2.8791775146736888e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4753 + }, + { + "epoch": 0.4571813242294562, + "grad_norm": 1.3294987950762946, + "learning_rate": 2.8784235033302243e-06, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4754 + }, + { + "epoch": 0.45727749194595374, + "grad_norm": 2.7067546486941927, + "learning_rate": 2.8776694567544782e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4755 + }, + { + "epoch": 0.4573736596624513, + "grad_norm": 1.3568187908668765, + "learning_rate": 2.876915375016654e-06, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4756 + }, + { + "epoch": 0.45746982737894887, + "grad_norm": 2.0653647137575004, + "learning_rate": 2.876161258186958e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4757 + }, + { + "epoch": 0.45756599509544643, + "grad_norm": 3.0193638796244637, + "learning_rate": 2.875407106335601e-06, + "loss": 0.1775, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4758 + }, + { + "epoch": 0.45766216281194405, + "grad_norm": 1.6805669236510763, + "learning_rate": 2.874652919532797e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4759 + }, + { + "epoch": 0.4577583305284416, + "grad_norm": 2.2750610633077115, + "learning_rate": 2.8738986978487625e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4760 + }, + { + "epoch": 0.4578544982449392, + "grad_norm": 1.9763792737802865, + "learning_rate": 2.8731444413537187e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4761 + }, + { + "epoch": 0.45795066596143674, + "grad_norm": 2.499730412711037, + "learning_rate": 2.872390150117887e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4762 + }, + { + "epoch": 0.4580468336779343, + "grad_norm": 2.4504840349966317, + "learning_rate": 2.871635824211495e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4763 + }, + { + "epoch": 0.45814300139443187, + "grad_norm": 1.9957187709536846, + "learning_rate": 2.8708814637047734e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4764 + }, + { + "epoch": 0.45823916911092943, + "grad_norm": 2.3300457793800575, + "learning_rate": 2.870127068667954e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4765 + }, + { + "epoch": 0.45833533682742705, + "grad_norm": 1.462765304555601, + "learning_rate": 2.8693726391712734e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4766 + }, + { + "epoch": 0.4584315045439246, + "grad_norm": 2.527930719415992, + "learning_rate": 2.868618175284972e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4767 + }, + { + "epoch": 0.4585276722604222, + "grad_norm": 2.678170194846237, + "learning_rate": 2.8678636770792907e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4768 + }, + { + "epoch": 0.45862383997691974, + "grad_norm": 2.8228089211460765, + "learning_rate": 2.8671091446244765e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4769 + }, + { + "epoch": 0.4587200076934173, + "grad_norm": 1.9681831176719167, + "learning_rate": 2.866354577990779e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4770 + }, + { + "epoch": 0.4588161754099149, + "grad_norm": 1.9699859464934484, + "learning_rate": 2.8655999772484495e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4771 + }, + { + "epoch": 0.45891234312641244, + "grad_norm": 1.8712099437795797, + "learning_rate": 2.8648453424677435e-06, + "loss": 0.1661, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4772 + }, + { + "epoch": 0.45900851084291006, + "grad_norm": 2.3385879090556276, + "learning_rate": 2.8640906737189207e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4773 + }, + { + "epoch": 0.4591046785594076, + "grad_norm": 2.0010946555140743, + "learning_rate": 2.8633359710722412e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4774 + }, + { + "epoch": 0.4592008462759052, + "grad_norm": 4.164835983504363, + "learning_rate": 2.8625812345979716e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4775 + }, + { + "epoch": 0.45929701399240275, + "grad_norm": 1.959252840657313, + "learning_rate": 2.8618264643663783e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4776 + }, + { + "epoch": 0.4593931817089003, + "grad_norm": 1.9261468895215974, + "learning_rate": 2.8610716604477333e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4777 + }, + { + "epoch": 0.4594893494253979, + "grad_norm": 1.7922178711413124, + "learning_rate": 2.860316822912312e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4778 + }, + { + "epoch": 0.45958551714189544, + "grad_norm": 1.2140925146142854, + "learning_rate": 2.8595619518303894e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4779 + }, + { + "epoch": 0.45968168485839306, + "grad_norm": 1.738729747536717, + "learning_rate": 2.8588070472722486e-06, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4780 + }, + { + "epoch": 0.4597778525748906, + "grad_norm": 2.8012210155928865, + "learning_rate": 2.8580521093081726e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4781 + }, + { + "epoch": 0.4598740202913882, + "grad_norm": 2.6083936017333333, + "learning_rate": 2.8572971380084476e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4782 + }, + { + "epoch": 0.45997018800788575, + "grad_norm": 1.4498700289825066, + "learning_rate": 2.856542133443363e-06, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4783 + }, + { + "epoch": 0.4600663557243833, + "grad_norm": 2.2960409885385573, + "learning_rate": 2.8557870956832135e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4784 + }, + { + "epoch": 0.4601625234408809, + "grad_norm": 2.189128953685603, + "learning_rate": 2.8550320247982937e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4785 + }, + { + "epoch": 0.46025869115737844, + "grad_norm": 2.1590396926747335, + "learning_rate": 2.8542769208589038e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4786 + }, + { + "epoch": 0.46035485887387606, + "grad_norm": 1.7208180919483627, + "learning_rate": 2.853521783935346e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4787 + }, + { + "epoch": 0.4604510265903736, + "grad_norm": 2.252894626334138, + "learning_rate": 2.8527666140979253e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4788 + }, + { + "epoch": 0.4605471943068712, + "grad_norm": 1.6002394438869614, + "learning_rate": 2.8520114114169507e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4789 + }, + { + "epoch": 0.46064336202336875, + "grad_norm": 1.8269647238881763, + "learning_rate": 2.8512561759627322e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4790 + }, + { + "epoch": 0.4607395297398663, + "grad_norm": 1.6567705308439353, + "learning_rate": 2.8505009078055856e-06, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4791 + }, + { + "epoch": 0.4608356974563639, + "grad_norm": 1.6307960809314765, + "learning_rate": 2.8497456070158285e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4792 + }, + { + "epoch": 0.46093186517286144, + "grad_norm": 1.9046717112223142, + "learning_rate": 2.8489902736637807e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4793 + }, + { + "epoch": 0.46102803288935906, + "grad_norm": 1.8097698589244497, + "learning_rate": 2.8482349078197667e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4794 + }, + { + "epoch": 0.4611242006058566, + "grad_norm": 1.6209694386593922, + "learning_rate": 2.847479509554113e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4795 + }, + { + "epoch": 0.4612203683223542, + "grad_norm": 1.4035917279006715, + "learning_rate": 2.8467240789371493e-06, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4796 + }, + { + "epoch": 0.46131653603885175, + "grad_norm": 1.5605976250607825, + "learning_rate": 2.8459686160392075e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4797 + }, + { + "epoch": 0.4614127037553493, + "grad_norm": 2.140308092270191, + "learning_rate": 2.8452131209306237e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4798 + }, + { + "epoch": 0.4615088714718469, + "grad_norm": 2.1980962114727087, + "learning_rate": 2.8444575936817377e-06, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4799 + }, + { + "epoch": 0.46160503918834445, + "grad_norm": 2.527500219757651, + "learning_rate": 2.8437020343628896e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4800 + }, + { + "epoch": 0.46170120690484207, + "grad_norm": 3.266641608215607, + "learning_rate": 2.8429464430444255e-06, + "loss": 0.1545, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4801 + }, + { + "epoch": 0.46179737462133963, + "grad_norm": 1.716795290702543, + "learning_rate": 2.842190819796691e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4802 + }, + { + "epoch": 0.4618935423378372, + "grad_norm": 1.9342797059751018, + "learning_rate": 2.8414351646900397e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4803 + }, + { + "epoch": 0.46198971005433476, + "grad_norm": 1.8760970989934773, + "learning_rate": 2.840679477794822e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4804 + }, + { + "epoch": 0.4620858777708323, + "grad_norm": 2.640928388401656, + "learning_rate": 2.839923759181396e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4805 + }, + { + "epoch": 0.4621820454873299, + "grad_norm": 3.234018948037753, + "learning_rate": 2.8391680089201224e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4806 + }, + { + "epoch": 0.46227821320382745, + "grad_norm": 1.6512621814125485, + "learning_rate": 2.8384122270813615e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4807 + }, + { + "epoch": 0.46237438092032507, + "grad_norm": 2.4341529258407273, + "learning_rate": 2.8376564137354797e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4808 + }, + { + "epoch": 0.46247054863682263, + "grad_norm": 2.335508233880386, + "learning_rate": 2.8369005689528454e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4809 + }, + { + "epoch": 0.4625667163533202, + "grad_norm": 1.4581172684402939, + "learning_rate": 2.8361446928038298e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4810 + }, + { + "epoch": 0.46266288406981776, + "grad_norm": 1.3729579585547367, + "learning_rate": 2.835388785358807e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4811 + }, + { + "epoch": 0.4627590517863153, + "grad_norm": 2.1690890429180008, + "learning_rate": 2.8346328466881544e-06, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4812 + }, + { + "epoch": 0.4628552195028129, + "grad_norm": 1.7723010114314863, + "learning_rate": 2.833876876862251e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4813 + }, + { + "epoch": 0.46295138721931045, + "grad_norm": 1.9439802126067436, + "learning_rate": 2.833120875951481e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4814 + }, + { + "epoch": 0.46304755493580807, + "grad_norm": 1.507300353436094, + "learning_rate": 2.832364844026229e-06, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4815 + }, + { + "epoch": 0.46314372265230563, + "grad_norm": 4.089246757080969, + "learning_rate": 2.831608781156885e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4816 + }, + { + "epoch": 0.4632398903688032, + "grad_norm": 3.3758007448083958, + "learning_rate": 2.83085268741384e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4817 + }, + { + "epoch": 0.46333605808530076, + "grad_norm": 2.4397926926315803, + "learning_rate": 2.830096562867487e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4818 + }, + { + "epoch": 0.4634322258017983, + "grad_norm": 3.361930729403923, + "learning_rate": 2.8293404075882253e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4819 + }, + { + "epoch": 0.4635283935182959, + "grad_norm": 2.4559926515814294, + "learning_rate": 2.8285842216464544e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4820 + }, + { + "epoch": 0.46362456123479345, + "grad_norm": 2.0515172193794795, + "learning_rate": 2.8278280051125767e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4821 + }, + { + "epoch": 0.4637207289512911, + "grad_norm": 3.8391983462973163, + "learning_rate": 2.827071758056999e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4822 + }, + { + "epoch": 0.46381689666778864, + "grad_norm": 4.202379962775832, + "learning_rate": 2.82631548055013e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4823 + }, + { + "epoch": 0.4639130643842862, + "grad_norm": 4.670522773658363, + "learning_rate": 2.82555917266238e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4824 + }, + { + "epoch": 0.46400923210078376, + "grad_norm": 5.838120111439052, + "learning_rate": 2.824802834464164e-06, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4825 + }, + { + "epoch": 0.4641053998172813, + "grad_norm": 2.170039759560659, + "learning_rate": 2.8240464660259003e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4826 + }, + { + "epoch": 0.4642015675337789, + "grad_norm": 2.231239752231562, + "learning_rate": 2.8232900674180076e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4827 + }, + { + "epoch": 0.46429773525027646, + "grad_norm": 1.6462559758189859, + "learning_rate": 2.8225336387109087e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4828 + }, + { + "epoch": 0.4643939029667741, + "grad_norm": 1.5628693613284603, + "learning_rate": 2.8217771799750304e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4829 + }, + { + "epoch": 0.46449007068327164, + "grad_norm": 1.200575983030578, + "learning_rate": 2.8210206912808e-06, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4830 + }, + { + "epoch": 0.4645862383997692, + "grad_norm": 5.548990734491997, + "learning_rate": 2.820264172698649e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4831 + }, + { + "epoch": 0.46468240611626677, + "grad_norm": 4.388904341821171, + "learning_rate": 2.8195076242990124e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4832 + }, + { + "epoch": 0.46477857383276433, + "grad_norm": 3.458181863959494, + "learning_rate": 2.818751046152325e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4833 + }, + { + "epoch": 0.4648747415492619, + "grad_norm": 2.257355661262846, + "learning_rate": 2.8179944383290277e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4834 + }, + { + "epoch": 0.46497090926575946, + "grad_norm": 2.419847010383347, + "learning_rate": 2.817237800899562e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4835 + }, + { + "epoch": 0.4650670769822571, + "grad_norm": 3.517109153679133, + "learning_rate": 2.8164811339343736e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4836 + }, + { + "epoch": 0.46516324469875464, + "grad_norm": 3.2377054666448526, + "learning_rate": 2.8157244375039105e-06, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4837 + }, + { + "epoch": 0.4652594124152522, + "grad_norm": 2.26015734849712, + "learning_rate": 2.814967711678622e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4838 + }, + { + "epoch": 0.46535558013174977, + "grad_norm": 1.8255348669781422, + "learning_rate": 2.8142109565289628e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4839 + }, + { + "epoch": 0.46545174784824733, + "grad_norm": 2.112597907282783, + "learning_rate": 2.813454172125389e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4840 + }, + { + "epoch": 0.4655479155647449, + "grad_norm": 2.6979246387971245, + "learning_rate": 2.8126973585383578e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4841 + }, + { + "epoch": 0.46564408328124246, + "grad_norm": 2.5434103478366685, + "learning_rate": 2.811940515838331e-06, + "loss": 0.1692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4842 + }, + { + "epoch": 0.4657402509977401, + "grad_norm": 1.9300353995620851, + "learning_rate": 2.8111836440957747e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4843 + }, + { + "epoch": 0.46583641871423764, + "grad_norm": 2.2330435895999967, + "learning_rate": 2.8104267433811533e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4844 + }, + { + "epoch": 0.4659325864307352, + "grad_norm": 2.4221544794832885, + "learning_rate": 2.809669813764938e-06, + "loss": 0.1534, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4845 + }, + { + "epoch": 0.46602875414723277, + "grad_norm": 2.6471386363938825, + "learning_rate": 2.8089128553175997e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4846 + }, + { + "epoch": 0.46612492186373033, + "grad_norm": 2.7826710860289103, + "learning_rate": 2.8081558681096154e-06, + "loss": 0.169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4847 + }, + { + "epoch": 0.4662210895802279, + "grad_norm": 1.4843103225444794, + "learning_rate": 2.80739885221146e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4848 + }, + { + "epoch": 0.46631725729672546, + "grad_norm": 1.9136797338061817, + "learning_rate": 2.8066418076936167e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4849 + }, + { + "epoch": 0.4664134250132231, + "grad_norm": 3.0405619178350163, + "learning_rate": 2.805884734626566e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4850 + }, + { + "epoch": 0.46650959272972065, + "grad_norm": 1.9456266451301063, + "learning_rate": 2.8051276330807953e-06, + "loss": 0.1522, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4851 + }, + { + "epoch": 0.4666057604462182, + "grad_norm": 3.2126917296413353, + "learning_rate": 2.804370503126791e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4852 + }, + { + "epoch": 0.4667019281627158, + "grad_norm": 1.4907394006492456, + "learning_rate": 2.8036133448350456e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4853 + }, + { + "epoch": 0.46679809587921334, + "grad_norm": 1.3373609885487756, + "learning_rate": 2.802856158276052e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4854 + }, + { + "epoch": 0.4668942635957109, + "grad_norm": 2.118461856044651, + "learning_rate": 2.802098943520307e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4855 + }, + { + "epoch": 0.46699043131220846, + "grad_norm": 1.2607590181089656, + "learning_rate": 2.8013417006383078e-06, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4856 + }, + { + "epoch": 0.4670865990287061, + "grad_norm": 4.075099063798292, + "learning_rate": 2.800584429700558e-06, + "loss": 0.1766, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4857 + }, + { + "epoch": 0.46718276674520365, + "grad_norm": 1.8167183358568828, + "learning_rate": 2.79982713077756e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4858 + }, + { + "epoch": 0.4672789344617012, + "grad_norm": 3.5554426028290727, + "learning_rate": 2.799069803939821e-06, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4859 + }, + { + "epoch": 0.4673751021781988, + "grad_norm": 1.5517068868066948, + "learning_rate": 2.79831244925785e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4860 + }, + { + "epoch": 0.46747126989469634, + "grad_norm": 2.08286004778087, + "learning_rate": 2.7975550668021596e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4861 + }, + { + "epoch": 0.4675674376111939, + "grad_norm": 1.725335819372062, + "learning_rate": 2.796797656643263e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4862 + }, + { + "epoch": 0.46766360532769147, + "grad_norm": 2.1353615269354984, + "learning_rate": 2.7960402188516785e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4863 + }, + { + "epoch": 0.4677597730441891, + "grad_norm": 3.540539343107429, + "learning_rate": 2.7952827534979247e-06, + "loss": 0.155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4864 + }, + { + "epoch": 0.46785594076068665, + "grad_norm": 1.348745109217925, + "learning_rate": 2.7945252606525244e-06, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4865 + }, + { + "epoch": 0.4679521084771842, + "grad_norm": 1.6522487995723973, + "learning_rate": 2.793767740386002e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4866 + }, + { + "epoch": 0.4680482761936818, + "grad_norm": 1.903814435367904, + "learning_rate": 2.793010192768884e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4867 + }, + { + "epoch": 0.46814444391017934, + "grad_norm": 3.6100353145018826, + "learning_rate": 2.792252617871702e-06, + "loss": 0.1413, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4868 + }, + { + "epoch": 0.4682406116266769, + "grad_norm": 2.2775079286066386, + "learning_rate": 2.791495015764986e-06, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4869 + }, + { + "epoch": 0.46833677934317447, + "grad_norm": 2.0093393089306826, + "learning_rate": 2.790737386519273e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4870 + }, + { + "epoch": 0.4684329470596721, + "grad_norm": 1.5036901826471598, + "learning_rate": 2.789979730205099e-06, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4871 + }, + { + "epoch": 0.46852911477616965, + "grad_norm": 1.757429499355293, + "learning_rate": 2.7892220468930044e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4872 + }, + { + "epoch": 0.4686252824926672, + "grad_norm": 1.7574080131758958, + "learning_rate": 2.788464336653532e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4873 + }, + { + "epoch": 0.4687214502091648, + "grad_norm": 3.245941333096877, + "learning_rate": 2.787706599557226e-06, + "loss": 0.188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4874 + }, + { + "epoch": 0.46881761792566234, + "grad_norm": 3.1282047549657745, + "learning_rate": 2.7869488356746344e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4875 + }, + { + "epoch": 0.4689137856421599, + "grad_norm": 3.156367488461506, + "learning_rate": 2.7861910450763068e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4876 + }, + { + "epoch": 0.46900995335865747, + "grad_norm": 1.7344730356747808, + "learning_rate": 2.7854332278327956e-06, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4877 + }, + { + "epoch": 0.4691061210751551, + "grad_norm": 1.777638558750368, + "learning_rate": 2.784675384014656e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4878 + }, + { + "epoch": 0.46920228879165266, + "grad_norm": 2.7291778936932958, + "learning_rate": 2.7839175136924457e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4879 + }, + { + "epoch": 0.4692984565081502, + "grad_norm": 3.283888232533703, + "learning_rate": 2.783159616936723e-06, + "loss": 0.1735, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4880 + }, + { + "epoch": 0.4693946242246478, + "grad_norm": 2.856686820635486, + "learning_rate": 2.7824016938180525e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4881 + }, + { + "epoch": 0.46949079194114535, + "grad_norm": 1.5110378856695994, + "learning_rate": 2.7816437444069973e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4882 + }, + { + "epoch": 0.4695869596576429, + "grad_norm": 2.1123961331048147, + "learning_rate": 2.7808857687741245e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4883 + }, + { + "epoch": 0.4696831273741405, + "grad_norm": 1.6580274549380205, + "learning_rate": 2.7801277669900045e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4884 + }, + { + "epoch": 0.4697792950906381, + "grad_norm": 1.5548470282458604, + "learning_rate": 2.7793697391252093e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4885 + }, + { + "epoch": 0.46987546280713566, + "grad_norm": 2.265127266934149, + "learning_rate": 2.778611685250313e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4886 + }, + { + "epoch": 0.4699716305236332, + "grad_norm": 1.4346100224105869, + "learning_rate": 2.7778536054358927e-06, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4887 + }, + { + "epoch": 0.4700677982401308, + "grad_norm": 2.4896642317015383, + "learning_rate": 2.7770954997525277e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4888 + }, + { + "epoch": 0.47016396595662835, + "grad_norm": 1.6920333746767966, + "learning_rate": 2.7763373682708e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4889 + }, + { + "epoch": 0.4702601336731259, + "grad_norm": 1.8022354956703768, + "learning_rate": 2.775579211061294e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4890 + }, + { + "epoch": 0.4703563013896235, + "grad_norm": 1.6555094730966766, + "learning_rate": 2.7748210281945954e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4891 + }, + { + "epoch": 0.4704524691061211, + "grad_norm": 2.0239490803599285, + "learning_rate": 2.7740628197412933e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4892 + }, + { + "epoch": 0.47054863682261866, + "grad_norm": 1.6278411300316475, + "learning_rate": 2.7733045857719805e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4893 + }, + { + "epoch": 0.4706448045391162, + "grad_norm": 1.8025203868261788, + "learning_rate": 2.7725463263572483e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4894 + }, + { + "epoch": 0.4707409722556138, + "grad_norm": 1.3488352731006217, + "learning_rate": 2.771788041567694e-06, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4895 + }, + { + "epoch": 0.47083713997211135, + "grad_norm": 1.8249683775077687, + "learning_rate": 2.7710297314739164e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4896 + }, + { + "epoch": 0.4709333076886089, + "grad_norm": 2.686757754730412, + "learning_rate": 2.7702713961465168e-06, + "loss": 0.1554, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4897 + }, + { + "epoch": 0.4710294754051065, + "grad_norm": 1.7347078693868967, + "learning_rate": 2.7695130356560955e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4898 + }, + { + "epoch": 0.4711256431216041, + "grad_norm": 2.0939591397080974, + "learning_rate": 2.7687546500732617e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4899 + }, + { + "epoch": 0.47122181083810166, + "grad_norm": 1.913889042417052, + "learning_rate": 2.76799623946862e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4900 + }, + { + "epoch": 0.4713179785545992, + "grad_norm": 1.268966007601755, + "learning_rate": 2.767237803912783e-06, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4901 + }, + { + "epoch": 0.4714141462710968, + "grad_norm": 1.4601810981000278, + "learning_rate": 2.766479343476361e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4902 + }, + { + "epoch": 0.47151031398759435, + "grad_norm": 1.9434561587343375, + "learning_rate": 2.76572085822997e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4903 + }, + { + "epoch": 0.4716064817040919, + "grad_norm": 2.9722577530047656, + "learning_rate": 2.764962348244228e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4904 + }, + { + "epoch": 0.4717026494205895, + "grad_norm": 3.902487457713469, + "learning_rate": 2.764203813589752e-06, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4905 + }, + { + "epoch": 0.4717988171370871, + "grad_norm": 2.1657962110557296, + "learning_rate": 2.763445254337166e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4906 + }, + { + "epoch": 0.47189498485358466, + "grad_norm": 2.7578283435756843, + "learning_rate": 2.7626866705570927e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4907 + }, + { + "epoch": 0.47199115257008223, + "grad_norm": 2.041298019999595, + "learning_rate": 2.7619280623201583e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4908 + }, + { + "epoch": 0.4720873202865798, + "grad_norm": 3.5703138274013733, + "learning_rate": 2.761169429696992e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4909 + }, + { + "epoch": 0.47218348800307736, + "grad_norm": 2.778008053949123, + "learning_rate": 2.7604107727582253e-06, + "loss": 0.1478, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4910 + }, + { + "epoch": 0.4722796557195749, + "grad_norm": 1.5467448393824805, + "learning_rate": 2.759652091574489e-06, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4911 + }, + { + "epoch": 0.4723758234360725, + "grad_norm": 1.4103829513280353, + "learning_rate": 2.7588933862164198e-06, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4912 + }, + { + "epoch": 0.4724719911525701, + "grad_norm": 1.5261445830851397, + "learning_rate": 2.758134656754656e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4913 + }, + { + "epoch": 0.47256815886906767, + "grad_norm": 2.4645760758440716, + "learning_rate": 2.7573759032598367e-06, + "loss": 0.159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4914 + }, + { + "epoch": 0.47266432658556523, + "grad_norm": 2.479171164476364, + "learning_rate": 2.7566171258026033e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4915 + }, + { + "epoch": 0.4727604943020628, + "grad_norm": 2.5507616271729856, + "learning_rate": 2.7558583244536007e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4916 + }, + { + "epoch": 0.47285666201856036, + "grad_norm": 2.442159723746032, + "learning_rate": 2.7550994992834758e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4917 + }, + { + "epoch": 0.4729528297350579, + "grad_norm": 1.6357957547861715, + "learning_rate": 2.754340650362877e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4918 + }, + { + "epoch": 0.4730489974515555, + "grad_norm": 1.795123542437674, + "learning_rate": 2.7535817777624546e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4919 + }, + { + "epoch": 0.4731451651680531, + "grad_norm": 3.1770112297710265, + "learning_rate": 2.7528228815528622e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4920 + }, + { + "epoch": 0.47324133288455067, + "grad_norm": 1.1952262678382766, + "learning_rate": 2.7520639618047566e-06, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4921 + }, + { + "epoch": 0.47333750060104823, + "grad_norm": 1.8875776725529216, + "learning_rate": 2.751305018588793e-06, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4922 + }, + { + "epoch": 0.4734336683175458, + "grad_norm": 1.8021364044280381, + "learning_rate": 2.750546051975632e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4923 + }, + { + "epoch": 0.47352983603404336, + "grad_norm": 1.9471124928426589, + "learning_rate": 2.749787062035937e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4924 + }, + { + "epoch": 0.4736260037505409, + "grad_norm": 1.5579283961939354, + "learning_rate": 2.7490280488403693e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4925 + }, + { + "epoch": 0.4737221714670385, + "grad_norm": 1.7240151717944359, + "learning_rate": 2.7482690124595974e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4926 + }, + { + "epoch": 0.4738183391835361, + "grad_norm": 2.032344863126599, + "learning_rate": 2.747509952964289e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4927 + }, + { + "epoch": 0.47391450690003367, + "grad_norm": 1.666361040425772, + "learning_rate": 2.746750870425114e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4928 + }, + { + "epoch": 0.47401067461653124, + "grad_norm": 2.6597621358161354, + "learning_rate": 2.745991764912746e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4929 + }, + { + "epoch": 0.4741068423330288, + "grad_norm": 2.245957723069067, + "learning_rate": 2.7452326364978595e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4930 + }, + { + "epoch": 0.47420301004952636, + "grad_norm": 3.5681679605826337, + "learning_rate": 2.7444734852511317e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4931 + }, + { + "epoch": 0.4742991777660239, + "grad_norm": 1.8641692130182974, + "learning_rate": 2.743714311243241e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4932 + }, + { + "epoch": 0.4743953454825215, + "grad_norm": 2.5715877415615402, + "learning_rate": 2.7429551145448695e-06, + "loss": 0.1539, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4933 + }, + { + "epoch": 0.4744915131990191, + "grad_norm": 2.2782299267095505, + "learning_rate": 2.7421958952266997e-06, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4934 + }, + { + "epoch": 0.4745876809155167, + "grad_norm": 2.0056173766708723, + "learning_rate": 2.7414366533594177e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4935 + }, + { + "epoch": 0.47468384863201424, + "grad_norm": 1.4359211780879584, + "learning_rate": 2.7406773890137104e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4936 + }, + { + "epoch": 0.4747800163485118, + "grad_norm": 1.9400731512547307, + "learning_rate": 2.7399181022602683e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4937 + }, + { + "epoch": 0.47487618406500937, + "grad_norm": 1.5994263381069724, + "learning_rate": 2.7391587931697826e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4938 + }, + { + "epoch": 0.47497235178150693, + "grad_norm": 1.5859371940373763, + "learning_rate": 2.7383994618129466e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4939 + }, + { + "epoch": 0.4750685194980045, + "grad_norm": 2.106312444178519, + "learning_rate": 2.7376401082604563e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4940 + }, + { + "epoch": 0.4751646872145021, + "grad_norm": 1.2617366502743985, + "learning_rate": 2.7368807325830115e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4941 + }, + { + "epoch": 0.4752608549309997, + "grad_norm": 2.5094836785138934, + "learning_rate": 2.7361213348513092e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4942 + }, + { + "epoch": 0.47535702264749724, + "grad_norm": 1.9732665080662848, + "learning_rate": 2.7353619151360544e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4943 + }, + { + "epoch": 0.4754531903639948, + "grad_norm": 1.8377119932734083, + "learning_rate": 2.7346024735079483e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4944 + }, + { + "epoch": 0.47554935808049237, + "grad_norm": 2.099979429948944, + "learning_rate": 2.733843010037699e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4945 + }, + { + "epoch": 0.47564552579698993, + "grad_norm": 2.1351925681055524, + "learning_rate": 2.7330835247960145e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4946 + }, + { + "epoch": 0.4757416935134875, + "grad_norm": 1.2551196290007565, + "learning_rate": 2.732324017853605e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4947 + }, + { + "epoch": 0.4758378612299851, + "grad_norm": 1.5107061655417642, + "learning_rate": 2.731564489281181e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4948 + }, + { + "epoch": 0.4759340289464827, + "grad_norm": 1.4993486646887504, + "learning_rate": 2.730804939149459e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4949 + }, + { + "epoch": 0.47603019666298024, + "grad_norm": 3.336457742021535, + "learning_rate": 2.7300453675291534e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4950 + }, + { + "epoch": 0.4761263643794778, + "grad_norm": 2.6588270185180978, + "learning_rate": 2.729285774490984e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4951 + }, + { + "epoch": 0.47622253209597537, + "grad_norm": 2.7285576349108656, + "learning_rate": 2.72852616010567e-06, + "loss": 0.1551, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4952 + }, + { + "epoch": 0.47631869981247293, + "grad_norm": 1.9041595358618364, + "learning_rate": 2.727766524443934e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4953 + }, + { + "epoch": 0.4764148675289705, + "grad_norm": 2.874095921335191, + "learning_rate": 2.7270068675765e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4954 + }, + { + "epoch": 0.4765110352454681, + "grad_norm": 1.3829957188050648, + "learning_rate": 2.726247189574095e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4955 + }, + { + "epoch": 0.4766072029619657, + "grad_norm": 4.0258625289794745, + "learning_rate": 2.7254874905074457e-06, + "loss": 0.1692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4956 + }, + { + "epoch": 0.47670337067846325, + "grad_norm": 2.419359491707287, + "learning_rate": 2.7247277704472835e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4957 + }, + { + "epoch": 0.4767995383949608, + "grad_norm": 3.5263825294793874, + "learning_rate": 2.72396802946434e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4958 + }, + { + "epoch": 0.4768957061114584, + "grad_norm": 2.9470533914484247, + "learning_rate": 2.723208267629348e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4959 + }, + { + "epoch": 0.47699187382795594, + "grad_norm": 1.5133533384435358, + "learning_rate": 2.722448485013046e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4960 + }, + { + "epoch": 0.4770880415444535, + "grad_norm": 1.4468528022367932, + "learning_rate": 2.72168868168617e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4961 + }, + { + "epoch": 0.4771842092609511, + "grad_norm": 3.1069346191933924, + "learning_rate": 2.7209288577194592e-06, + "loss": 0.1485, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4962 + }, + { + "epoch": 0.4772803769774487, + "grad_norm": 1.604751799609428, + "learning_rate": 2.720169013183658e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4963 + }, + { + "epoch": 0.47737654469394625, + "grad_norm": 2.159303298347465, + "learning_rate": 2.7194091481495076e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4964 + }, + { + "epoch": 0.4774727124104438, + "grad_norm": 1.351638726083811, + "learning_rate": 2.718649262687754e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4965 + }, + { + "epoch": 0.4775688801269414, + "grad_norm": 2.5808734275007117, + "learning_rate": 2.717889356869146e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4966 + }, + { + "epoch": 0.47766504784343894, + "grad_norm": 1.506145843533422, + "learning_rate": 2.717129430764431e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4967 + }, + { + "epoch": 0.4777612155599365, + "grad_norm": 1.939250485656713, + "learning_rate": 2.7163694844443617e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4968 + }, + { + "epoch": 0.4778573832764341, + "grad_norm": 3.2924164530975286, + "learning_rate": 2.715609517979691e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4969 + }, + { + "epoch": 0.4779535509929317, + "grad_norm": 4.90942258170056, + "learning_rate": 2.7148495314411725e-06, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4970 + }, + { + "epoch": 0.47804971870942925, + "grad_norm": 3.2112847020811643, + "learning_rate": 2.7140895248995656e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4971 + }, + { + "epoch": 0.4781458864259268, + "grad_norm": 4.159567914833032, + "learning_rate": 2.7133294984256264e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4972 + }, + { + "epoch": 0.4782420541424244, + "grad_norm": 1.9519721556444365, + "learning_rate": 2.712569452090117e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4973 + }, + { + "epoch": 0.47833822185892194, + "grad_norm": 1.4456786977004974, + "learning_rate": 2.7118093859638e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4974 + }, + { + "epoch": 0.4784343895754195, + "grad_norm": 1.9351243489659569, + "learning_rate": 2.7110493001174387e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4975 + }, + { + "epoch": 0.4785305572919171, + "grad_norm": 1.5526250984891028, + "learning_rate": 2.7102891946217998e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4976 + }, + { + "epoch": 0.4786267250084147, + "grad_norm": 2.4229716962401877, + "learning_rate": 2.7095290695476516e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4977 + }, + { + "epoch": 0.47872289272491225, + "grad_norm": 2.561553337191645, + "learning_rate": 2.708768924965763e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4978 + }, + { + "epoch": 0.4788190604414098, + "grad_norm": 1.490892866373188, + "learning_rate": 2.7080087609469064e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4979 + }, + { + "epoch": 0.4789152281579074, + "grad_norm": 1.9365631284518776, + "learning_rate": 2.707248577561854e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4980 + }, + { + "epoch": 0.47901139587440494, + "grad_norm": 2.074439197350928, + "learning_rate": 2.7064883748813825e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4981 + }, + { + "epoch": 0.4791075635909025, + "grad_norm": 1.2976909379284023, + "learning_rate": 2.7057281529762676e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4982 + }, + { + "epoch": 0.4792037313074001, + "grad_norm": 4.15883427207237, + "learning_rate": 2.704967911917289e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4983 + }, + { + "epoch": 0.4792998990238977, + "grad_norm": 1.666191496371792, + "learning_rate": 2.7042076517752264e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4984 + }, + { + "epoch": 0.47939606674039525, + "grad_norm": 2.044774080953479, + "learning_rate": 2.703447372620863e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4985 + }, + { + "epoch": 0.4794922344568928, + "grad_norm": 1.490208263158419, + "learning_rate": 2.7026870745249824e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4986 + }, + { + "epoch": 0.4795884021733904, + "grad_norm": 2.0098237557431524, + "learning_rate": 2.7019267575583704e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4987 + }, + { + "epoch": 0.47968456988988795, + "grad_norm": 3.010219674105935, + "learning_rate": 2.7011664217918154e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4988 + }, + { + "epoch": 0.4797807376063855, + "grad_norm": 2.784287903119644, + "learning_rate": 2.700406067296105e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4989 + }, + { + "epoch": 0.47987690532288313, + "grad_norm": 3.4265254234006046, + "learning_rate": 2.6996456941420325e-06, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4990 + }, + { + "epoch": 0.4799730730393807, + "grad_norm": 2.104319911710938, + "learning_rate": 2.6988853024003903e-06, + "loss": 0.1564, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4991 + }, + { + "epoch": 0.48006924075587826, + "grad_norm": 1.6663043133711817, + "learning_rate": 2.6981248921419713e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4992 + }, + { + "epoch": 0.4801654084723758, + "grad_norm": 1.4529776653086846, + "learning_rate": 2.6973644634375736e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4993 + }, + { + "epoch": 0.4802615761888734, + "grad_norm": 1.9472278626269406, + "learning_rate": 2.696604016357994e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4994 + }, + { + "epoch": 0.48035774390537095, + "grad_norm": 1.6748736850217778, + "learning_rate": 2.695843550974034e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4995 + }, + { + "epoch": 0.4804539116218685, + "grad_norm": 3.2340274695855804, + "learning_rate": 2.6950830673564932e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4996 + }, + { + "epoch": 0.48055007933836613, + "grad_norm": 1.4545661557147884, + "learning_rate": 2.6943225655761757e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4997 + }, + { + "epoch": 0.4806462470548637, + "grad_norm": 1.8150967768749426, + "learning_rate": 2.693562045703886e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4998 + }, + { + "epoch": 0.48074241477136126, + "grad_norm": 2.9646269757843102, + "learning_rate": 2.692801507810431e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 4999 + }, + { + "epoch": 0.4808385824878588, + "grad_norm": 3.305705519658127, + "learning_rate": 2.6920409519666173e-06, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5000 + }, + { + "epoch": 0.4809347502043564, + "grad_norm": 2.565203814794851, + "learning_rate": 2.6912803782432566e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5001 + }, + { + "epoch": 0.48103091792085395, + "grad_norm": 1.6145037461710325, + "learning_rate": 2.690519786711161e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5002 + }, + { + "epoch": 0.4811270856373515, + "grad_norm": 1.354291005867259, + "learning_rate": 2.6897591774411412e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5003 + }, + { + "epoch": 0.48122325335384913, + "grad_norm": 1.6657566532981851, + "learning_rate": 2.688998550504014e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5004 + }, + { + "epoch": 0.4813194210703467, + "grad_norm": 2.109814045054282, + "learning_rate": 2.6882379059705953e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5005 + }, + { + "epoch": 0.48141558878684426, + "grad_norm": 2.017906422748668, + "learning_rate": 2.6874772439117037e-06, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5006 + }, + { + "epoch": 0.4815117565033418, + "grad_norm": 2.3938868801582234, + "learning_rate": 2.6867165643981576e-06, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5007 + }, + { + "epoch": 0.4816079242198394, + "grad_norm": 2.13374810474849, + "learning_rate": 2.68595586750078e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5008 + }, + { + "epoch": 0.48170409193633695, + "grad_norm": 1.8305265653736496, + "learning_rate": 2.6851951532903924e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5009 + }, + { + "epoch": 0.4818002596528345, + "grad_norm": 1.869725654843498, + "learning_rate": 2.684434421837821e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5010 + }, + { + "epoch": 0.48189642736933214, + "grad_norm": 1.4233863162095322, + "learning_rate": 2.6836736732138906e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5011 + }, + { + "epoch": 0.4819925950858297, + "grad_norm": 2.207300235201485, + "learning_rate": 2.6829129074894306e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5012 + }, + { + "epoch": 0.48208876280232726, + "grad_norm": 2.516296883786387, + "learning_rate": 2.682152124735268e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5013 + }, + { + "epoch": 0.48218493051882483, + "grad_norm": 1.5691838552005741, + "learning_rate": 2.6813913250222368e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5014 + }, + { + "epoch": 0.4822810982353224, + "grad_norm": 1.5414629975116054, + "learning_rate": 2.6806305084211663e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5015 + }, + { + "epoch": 0.48237726595181996, + "grad_norm": 1.4653207256729088, + "learning_rate": 2.679869675002894e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5016 + }, + { + "epoch": 0.4824734336683175, + "grad_norm": 1.8839687535750418, + "learning_rate": 2.679108824838253e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5017 + }, + { + "epoch": 0.48256960138481514, + "grad_norm": 1.8647778650671212, + "learning_rate": 2.678347957998081e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5018 + }, + { + "epoch": 0.4826657691013127, + "grad_norm": 1.5727440420017125, + "learning_rate": 2.6775870745532183e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5019 + }, + { + "epoch": 0.48276193681781027, + "grad_norm": 1.4526151933959708, + "learning_rate": 2.6768261745745037e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5020 + }, + { + "epoch": 0.48285810453430783, + "grad_norm": 1.3401418435674264, + "learning_rate": 2.67606525813278e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5021 + }, + { + "epoch": 0.4829542722508054, + "grad_norm": 2.2547648290577125, + "learning_rate": 2.6753043252988903e-06, + "loss": 0.167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5022 + }, + { + "epoch": 0.48305043996730296, + "grad_norm": 1.5135039374117034, + "learning_rate": 2.6745433761436794e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5023 + }, + { + "epoch": 0.4831466076838005, + "grad_norm": 1.9358762962883307, + "learning_rate": 2.673782410737995e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5024 + }, + { + "epoch": 0.48324277540029814, + "grad_norm": 2.028375986720193, + "learning_rate": 2.673021429152683e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5025 + }, + { + "epoch": 0.4833389431167957, + "grad_norm": 1.662851006235321, + "learning_rate": 2.672260431458594e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5026 + }, + { + "epoch": 0.48343511083329327, + "grad_norm": 1.6268297674155177, + "learning_rate": 2.6714994177265794e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5027 + }, + { + "epoch": 0.48353127854979083, + "grad_norm": 1.997028321617164, + "learning_rate": 2.6707383880274918e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5028 + }, + { + "epoch": 0.4836274462662884, + "grad_norm": 2.307210718904671, + "learning_rate": 2.669977342432184e-06, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5029 + }, + { + "epoch": 0.48372361398278596, + "grad_norm": 1.7436357980987962, + "learning_rate": 2.6692162810115123e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5030 + }, + { + "epoch": 0.4838197816992835, + "grad_norm": 1.5845270822145283, + "learning_rate": 2.668455203836334e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5031 + }, + { + "epoch": 0.48391594941578114, + "grad_norm": 1.835555149029603, + "learning_rate": 2.667694110977506e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5032 + }, + { + "epoch": 0.4840121171322787, + "grad_norm": 2.3768146502599174, + "learning_rate": 2.6669330025058903e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5033 + }, + { + "epoch": 0.48410828484877627, + "grad_norm": 1.6840633142013688, + "learning_rate": 2.666171878492346e-06, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5034 + }, + { + "epoch": 0.48420445256527384, + "grad_norm": 1.6293084251403662, + "learning_rate": 2.6654107390077377e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5035 + }, + { + "epoch": 0.4843006202817714, + "grad_norm": 2.0463826958009568, + "learning_rate": 2.6646495841229288e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5036 + }, + { + "epoch": 0.48439678799826896, + "grad_norm": 1.6757542347583256, + "learning_rate": 2.6638884139087844e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5037 + }, + { + "epoch": 0.4844929557147665, + "grad_norm": 1.447841014369048, + "learning_rate": 2.6631272284361733e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5038 + }, + { + "epoch": 0.48458912343126415, + "grad_norm": 1.5086239228012457, + "learning_rate": 2.6623660277759616e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5039 + }, + { + "epoch": 0.4846852911477617, + "grad_norm": 1.8848311871317898, + "learning_rate": 2.6616048119990214e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5040 + }, + { + "epoch": 0.4847814588642593, + "grad_norm": 1.344004690964011, + "learning_rate": 2.6608435811762223e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5041 + }, + { + "epoch": 0.48487762658075684, + "grad_norm": 1.8211897136249875, + "learning_rate": 2.6600823353784384e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5042 + }, + { + "epoch": 0.4849737942972544, + "grad_norm": 1.696358605210304, + "learning_rate": 2.6593210746765423e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5043 + }, + { + "epoch": 0.48506996201375197, + "grad_norm": 2.4668738018206082, + "learning_rate": 2.6585597991414115e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5044 + }, + { + "epoch": 0.48516612973024953, + "grad_norm": 1.9248830010388915, + "learning_rate": 2.6577985088439212e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5045 + }, + { + "epoch": 0.48526229744674715, + "grad_norm": 1.5738129037387472, + "learning_rate": 2.6570372038549507e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5046 + }, + { + "epoch": 0.4853584651632447, + "grad_norm": 2.8049775942378274, + "learning_rate": 2.656275884245379e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5047 + }, + { + "epoch": 0.4854546328797423, + "grad_norm": 2.2814330702357735, + "learning_rate": 2.6555145500860864e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5048 + }, + { + "epoch": 0.48555080059623984, + "grad_norm": 2.1651621629215443, + "learning_rate": 2.654753201447956e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5049 + }, + { + "epoch": 0.4856469683127374, + "grad_norm": 2.4791515150317385, + "learning_rate": 2.6539918384018724e-06, + "loss": 0.154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5050 + }, + { + "epoch": 0.48574313602923497, + "grad_norm": 1.4366826034550242, + "learning_rate": 2.653230461018719e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5051 + }, + { + "epoch": 0.48583930374573253, + "grad_norm": 3.0799810342592346, + "learning_rate": 2.652469069369384e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5052 + }, + { + "epoch": 0.48593547146223015, + "grad_norm": 2.516194746464343, + "learning_rate": 2.6517076635247525e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5053 + }, + { + "epoch": 0.4860316391787277, + "grad_norm": 3.5169191222392695, + "learning_rate": 2.6509462435557155e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5054 + }, + { + "epoch": 0.4861278068952253, + "grad_norm": 2.4354385083250123, + "learning_rate": 2.6501848095331627e-06, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5055 + }, + { + "epoch": 0.48622397461172284, + "grad_norm": 1.3200275045069023, + "learning_rate": 2.6494233615279865e-06, + "loss": 0.0911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5056 + }, + { + "epoch": 0.4863201423282204, + "grad_norm": 2.414769546567168, + "learning_rate": 2.648661899611078e-06, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5057 + }, + { + "epoch": 0.48641631004471797, + "grad_norm": 2.1164189887996856, + "learning_rate": 2.6479004238533336e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5058 + }, + { + "epoch": 0.48651247776121553, + "grad_norm": 2.8298006039229153, + "learning_rate": 2.647138934325647e-06, + "loss": 0.148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5059 + }, + { + "epoch": 0.48660864547771315, + "grad_norm": 1.7786070111183834, + "learning_rate": 2.6463774310989154e-06, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5060 + }, + { + "epoch": 0.4867048131942107, + "grad_norm": 1.885600895020101, + "learning_rate": 2.6456159142440387e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5061 + }, + { + "epoch": 0.4868009809107083, + "grad_norm": 2.303427156778499, + "learning_rate": 2.644854383831914e-06, + "loss": 0.1466, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5062 + }, + { + "epoch": 0.48689714862720584, + "grad_norm": 2.627170967227888, + "learning_rate": 2.6440928399334424e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5063 + }, + { + "epoch": 0.4869933163437034, + "grad_norm": 1.6843456247900581, + "learning_rate": 2.6433312826195266e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5064 + }, + { + "epoch": 0.487089484060201, + "grad_norm": 1.8560300370666027, + "learning_rate": 2.642569711961069e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5065 + }, + { + "epoch": 0.48718565177669854, + "grad_norm": 1.8420286644181634, + "learning_rate": 2.641808128028974e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5066 + }, + { + "epoch": 0.48728181949319616, + "grad_norm": 2.0396554999376333, + "learning_rate": 2.6410465308941473e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5067 + }, + { + "epoch": 0.4873779872096937, + "grad_norm": 1.9884799995992002, + "learning_rate": 2.6402849206274954e-06, + "loss": 0.1493, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5068 + }, + { + "epoch": 0.4874741549261913, + "grad_norm": 1.8423649062041432, + "learning_rate": 2.6395232972999276e-06, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5069 + }, + { + "epoch": 0.48757032264268885, + "grad_norm": 2.3227758773490006, + "learning_rate": 2.6387616609823506e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5070 + }, + { + "epoch": 0.4876664903591864, + "grad_norm": 1.9116352713781313, + "learning_rate": 2.638000011745677e-06, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5071 + }, + { + "epoch": 0.487762658075684, + "grad_norm": 1.4938380749672224, + "learning_rate": 2.637238349660819e-06, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5072 + }, + { + "epoch": 0.48785882579218154, + "grad_norm": 2.032919194690925, + "learning_rate": 2.6364766747986877e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5073 + }, + { + "epoch": 0.48795499350867916, + "grad_norm": 1.6300629545350176, + "learning_rate": 2.635714987230197e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5074 + }, + { + "epoch": 0.4880511612251767, + "grad_norm": 1.8359722342668259, + "learning_rate": 2.6349532870262646e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5075 + }, + { + "epoch": 0.4881473289416743, + "grad_norm": 1.8445636725430838, + "learning_rate": 2.634191574257804e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5076 + }, + { + "epoch": 0.48824349665817185, + "grad_norm": 1.997572691452484, + "learning_rate": 2.6334298489957346e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5077 + }, + { + "epoch": 0.4883396643746694, + "grad_norm": 3.3843614060867555, + "learning_rate": 2.632668111310975e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5078 + }, + { + "epoch": 0.488435832091167, + "grad_norm": 1.4893315284918593, + "learning_rate": 2.631906361274444e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5079 + }, + { + "epoch": 0.48853199980766454, + "grad_norm": 2.1413037496637686, + "learning_rate": 2.6311445989570633e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5080 + }, + { + "epoch": 0.48862816752416216, + "grad_norm": 1.5220313078681793, + "learning_rate": 2.630382824429756e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5081 + }, + { + "epoch": 0.4887243352406597, + "grad_norm": 2.1015175034255282, + "learning_rate": 2.6296210377634437e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5082 + }, + { + "epoch": 0.4888205029571573, + "grad_norm": 1.951231090238333, + "learning_rate": 2.6288592390290524e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5083 + }, + { + "epoch": 0.48891667067365485, + "grad_norm": 1.607306310234325, + "learning_rate": 2.628097428297506e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5084 + }, + { + "epoch": 0.4890128383901524, + "grad_norm": 1.85615759992429, + "learning_rate": 2.627335605639733e-06, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5085 + }, + { + "epoch": 0.48910900610665, + "grad_norm": 1.828533623093188, + "learning_rate": 2.6265737711266605e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5086 + }, + { + "epoch": 0.48920517382314754, + "grad_norm": 2.373128956478658, + "learning_rate": 2.625811924829217e-06, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5087 + }, + { + "epoch": 0.48930134153964516, + "grad_norm": 2.5680614261595167, + "learning_rate": 2.6250500668183325e-06, + "loss": 0.1576, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5088 + }, + { + "epoch": 0.4893975092561427, + "grad_norm": 1.9310344698170472, + "learning_rate": 2.624288197164939e-06, + "loss": 0.1643, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5089 + }, + { + "epoch": 0.4894936769726403, + "grad_norm": 1.2674319778788639, + "learning_rate": 2.6235263159399684e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5090 + }, + { + "epoch": 0.48958984468913785, + "grad_norm": 1.884496961733338, + "learning_rate": 2.622764423214353e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5091 + }, + { + "epoch": 0.4896860124056354, + "grad_norm": 2.577132817786271, + "learning_rate": 2.622002519059029e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5092 + }, + { + "epoch": 0.489782180122133, + "grad_norm": 1.6137143009300172, + "learning_rate": 2.6212406035449293e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5093 + }, + { + "epoch": 0.48987834783863055, + "grad_norm": 1.5103938435428654, + "learning_rate": 2.620478676742993e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5094 + }, + { + "epoch": 0.48997451555512817, + "grad_norm": 1.8905113440198034, + "learning_rate": 2.619716738724155e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5095 + }, + { + "epoch": 0.49007068327162573, + "grad_norm": 1.8520194485110044, + "learning_rate": 2.6189547895593565e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5096 + }, + { + "epoch": 0.4901668509881233, + "grad_norm": 1.963214315843824, + "learning_rate": 2.618192829319535e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5097 + }, + { + "epoch": 0.49026301870462086, + "grad_norm": 2.786571017246776, + "learning_rate": 2.617430858075632e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5098 + }, + { + "epoch": 0.4903591864211184, + "grad_norm": 2.6700313301190852, + "learning_rate": 2.6166688758985886e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5099 + }, + { + "epoch": 0.490455354137616, + "grad_norm": 2.9357665243156443, + "learning_rate": 2.615906882859349e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5100 + }, + { + "epoch": 0.49055152185411355, + "grad_norm": 1.7959685887965189, + "learning_rate": 2.615144879028854e-06, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5101 + }, + { + "epoch": 0.49064768957061117, + "grad_norm": 1.5768676029897128, + "learning_rate": 2.614382864478051e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5102 + }, + { + "epoch": 0.49074385728710873, + "grad_norm": 1.61425090458716, + "learning_rate": 2.613620839277885e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5103 + }, + { + "epoch": 0.4908400250036063, + "grad_norm": 1.303674340885811, + "learning_rate": 2.612858803499302e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5104 + }, + { + "epoch": 0.49093619272010386, + "grad_norm": 2.9926806224009566, + "learning_rate": 2.61209675721325e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5105 + }, + { + "epoch": 0.4910323604366014, + "grad_norm": 1.6404293439155584, + "learning_rate": 2.611334700490678e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5106 + }, + { + "epoch": 0.491128528153099, + "grad_norm": 2.5489980306308144, + "learning_rate": 2.610572633402535e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5107 + }, + { + "epoch": 0.49122469586959655, + "grad_norm": 2.030688758322875, + "learning_rate": 2.6098105560197724e-06, + "loss": 0.1556, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5108 + }, + { + "epoch": 0.49132086358609417, + "grad_norm": 2.141307702755295, + "learning_rate": 2.6090484684133406e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5109 + }, + { + "epoch": 0.49141703130259173, + "grad_norm": 1.3271711412369085, + "learning_rate": 2.6082863706541923e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5110 + }, + { + "epoch": 0.4915131990190893, + "grad_norm": 2.029032944493551, + "learning_rate": 2.607524262813282e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5111 + }, + { + "epoch": 0.49160936673558686, + "grad_norm": 1.7908543940344084, + "learning_rate": 2.6067621449615633e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5112 + }, + { + "epoch": 0.4917055344520844, + "grad_norm": 2.448122253713913, + "learning_rate": 2.606000017169991e-06, + "loss": 0.165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5113 + }, + { + "epoch": 0.491801702168582, + "grad_norm": 2.1739828027054506, + "learning_rate": 2.6052378795095224e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5114 + }, + { + "epoch": 0.49189786988507955, + "grad_norm": 1.6779175463087497, + "learning_rate": 2.6044757320511136e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5115 + }, + { + "epoch": 0.49199403760157717, + "grad_norm": 1.9215063996400508, + "learning_rate": 2.6037135748657232e-06, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5116 + }, + { + "epoch": 0.49209020531807474, + "grad_norm": 1.9513981762532653, + "learning_rate": 2.6029514080243105e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5117 + }, + { + "epoch": 0.4921863730345723, + "grad_norm": 1.5996852522634446, + "learning_rate": 2.602189231597835e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5118 + }, + { + "epoch": 0.49228254075106986, + "grad_norm": 1.8609233237268097, + "learning_rate": 2.6014270456572564e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5119 + }, + { + "epoch": 0.4923787084675674, + "grad_norm": 2.4646290131284654, + "learning_rate": 2.6006648502735384e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5120 + }, + { + "epoch": 0.492474876184065, + "grad_norm": 1.549617798988004, + "learning_rate": 2.5999026455176418e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5121 + }, + { + "epoch": 0.49257104390056256, + "grad_norm": 1.8069447343482512, + "learning_rate": 2.599140431460531e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5122 + }, + { + "epoch": 0.4926672116170602, + "grad_norm": 1.4759819894529846, + "learning_rate": 2.5983782081731697e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5123 + }, + { + "epoch": 0.49276337933355774, + "grad_norm": 2.2018886092198136, + "learning_rate": 2.597615975726523e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5124 + }, + { + "epoch": 0.4928595470500553, + "grad_norm": 2.157958465666622, + "learning_rate": 2.596853734191558e-06, + "loss": 0.1521, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5125 + }, + { + "epoch": 0.49295571476655287, + "grad_norm": 1.6387225447638645, + "learning_rate": 2.5960914836392394e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5126 + }, + { + "epoch": 0.49305188248305043, + "grad_norm": 1.986598146129342, + "learning_rate": 2.5953292241405364e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5127 + }, + { + "epoch": 0.493148050199548, + "grad_norm": 1.9196825647783127, + "learning_rate": 2.5945669557664176e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5128 + }, + { + "epoch": 0.49324421791604556, + "grad_norm": 1.9340194645233515, + "learning_rate": 2.5938046785878517e-06, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5129 + }, + { + "epoch": 0.4933403856325432, + "grad_norm": 2.6552679125296637, + "learning_rate": 2.5930423926758087e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5130 + }, + { + "epoch": 0.49343655334904074, + "grad_norm": 1.4847842499900157, + "learning_rate": 2.5922800981012596e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5131 + }, + { + "epoch": 0.4935327210655383, + "grad_norm": 1.482480514076068, + "learning_rate": 2.5915177949351765e-06, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5132 + }, + { + "epoch": 0.49362888878203587, + "grad_norm": 1.515114761618753, + "learning_rate": 2.5907554832485316e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5133 + }, + { + "epoch": 0.49372505649853343, + "grad_norm": 2.6966418800227947, + "learning_rate": 2.589993163112299e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5134 + }, + { + "epoch": 0.493821224215031, + "grad_norm": 2.502580201939163, + "learning_rate": 2.5892308345974517e-06, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5135 + }, + { + "epoch": 0.49391739193152856, + "grad_norm": 3.62395731466622, + "learning_rate": 2.588468497774965e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5136 + }, + { + "epoch": 0.4940135596480262, + "grad_norm": 1.8267826998067012, + "learning_rate": 2.5877061527158154e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5137 + }, + { + "epoch": 0.49410972736452374, + "grad_norm": 1.9507457283159415, + "learning_rate": 2.586943799490978e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5138 + }, + { + "epoch": 0.4942058950810213, + "grad_norm": 1.6310206841443997, + "learning_rate": 2.586181438171431e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5139 + }, + { + "epoch": 0.49430206279751887, + "grad_norm": 1.4701647562285882, + "learning_rate": 2.585419068828152e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5140 + }, + { + "epoch": 0.49439823051401643, + "grad_norm": 2.3420589010416366, + "learning_rate": 2.5846566915321198e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5141 + }, + { + "epoch": 0.494494398230514, + "grad_norm": 1.869745175994293, + "learning_rate": 2.5838943063543137e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5142 + }, + { + "epoch": 0.49459056594701156, + "grad_norm": 1.6718001856029319, + "learning_rate": 2.583131913365714e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5143 + }, + { + "epoch": 0.4946867336635092, + "grad_norm": 2.9013975700363006, + "learning_rate": 2.582369512637302e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5144 + }, + { + "epoch": 0.49478290138000675, + "grad_norm": 1.7610274199100815, + "learning_rate": 2.581607104240059e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5145 + }, + { + "epoch": 0.4948790690965043, + "grad_norm": 1.97380932448146, + "learning_rate": 2.580844688244967e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5146 + }, + { + "epoch": 0.4949752368130019, + "grad_norm": 1.4346146283578842, + "learning_rate": 2.580082264723009e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5147 + }, + { + "epoch": 0.49507140452949944, + "grad_norm": 1.5585979412208035, + "learning_rate": 2.57931983374517e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5148 + }, + { + "epoch": 0.495167572245997, + "grad_norm": 1.4808758251217886, + "learning_rate": 2.5785573953824323e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5149 + }, + { + "epoch": 0.49526373996249456, + "grad_norm": 1.3349818684173862, + "learning_rate": 2.5777949497057838e-06, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5150 + }, + { + "epoch": 0.4953599076789922, + "grad_norm": 2.0995478040476288, + "learning_rate": 2.577032496786207e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5151 + }, + { + "epoch": 0.49545607539548975, + "grad_norm": 1.7955984840684247, + "learning_rate": 2.5762700366946915e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5152 + }, + { + "epoch": 0.4955522431119873, + "grad_norm": 2.5105326472076883, + "learning_rate": 2.5755075695022223e-06, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5153 + }, + { + "epoch": 0.4956484108284849, + "grad_norm": 2.03645157330713, + "learning_rate": 2.574745095279789e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5154 + }, + { + "epoch": 0.49574457854498244, + "grad_norm": 1.354303903535523, + "learning_rate": 2.5739826140983783e-06, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5155 + }, + { + "epoch": 0.49584074626148, + "grad_norm": 1.644629574057748, + "learning_rate": 2.573220126028981e-06, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5156 + }, + { + "epoch": 0.49593691397797757, + "grad_norm": 1.8140153082439074, + "learning_rate": 2.5724576311425845e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5157 + }, + { + "epoch": 0.4960330816944752, + "grad_norm": 1.7453062424901793, + "learning_rate": 2.571695129510182e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5158 + }, + { + "epoch": 0.49612924941097275, + "grad_norm": 1.8602223093707313, + "learning_rate": 2.570932621202763e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5159 + }, + { + "epoch": 0.4962254171274703, + "grad_norm": 2.2938499240544394, + "learning_rate": 2.5701701062913194e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5160 + }, + { + "epoch": 0.4963215848439679, + "grad_norm": 2.4051078443164533, + "learning_rate": 2.5694075848468435e-06, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5161 + }, + { + "epoch": 0.49641775256046544, + "grad_norm": 2.4015504714958515, + "learning_rate": 2.5686450569403284e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5162 + }, + { + "epoch": 0.496513920276963, + "grad_norm": 1.2488370023197248, + "learning_rate": 2.5678825226427666e-06, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5163 + }, + { + "epoch": 0.49661008799346057, + "grad_norm": 2.2803178914700317, + "learning_rate": 2.5671199820251537e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5164 + }, + { + "epoch": 0.4967062557099582, + "grad_norm": 1.7746391618249113, + "learning_rate": 2.5663574351584824e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5165 + }, + { + "epoch": 0.49680242342645575, + "grad_norm": 1.6853602624861472, + "learning_rate": 2.5655948821137492e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5166 + }, + { + "epoch": 0.4968985911429533, + "grad_norm": 1.6673469589682994, + "learning_rate": 2.564832322961951e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5167 + }, + { + "epoch": 0.4969947588594509, + "grad_norm": 1.6355433469374228, + "learning_rate": 2.564069757774082e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5168 + }, + { + "epoch": 0.49709092657594844, + "grad_norm": 1.4505265069823365, + "learning_rate": 2.56330718662114e-06, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5169 + }, + { + "epoch": 0.497187094292446, + "grad_norm": 1.933833008790377, + "learning_rate": 2.562544609574123e-06, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5170 + }, + { + "epoch": 0.49728326200894357, + "grad_norm": 2.4228742894498634, + "learning_rate": 2.5617820267040284e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5171 + }, + { + "epoch": 0.4973794297254412, + "grad_norm": 1.3566934922956304, + "learning_rate": 2.561019438081855e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5172 + }, + { + "epoch": 0.49747559744193875, + "grad_norm": 2.731755371235353, + "learning_rate": 2.5602568437786017e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5173 + }, + { + "epoch": 0.4975717651584363, + "grad_norm": 3.147325091553124, + "learning_rate": 2.5594942438652685e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5174 + }, + { + "epoch": 0.4976679328749339, + "grad_norm": 2.220226198444713, + "learning_rate": 2.5587316384128557e-06, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5175 + }, + { + "epoch": 0.49776410059143145, + "grad_norm": 1.6824917235577928, + "learning_rate": 2.557969027492364e-06, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5176 + }, + { + "epoch": 0.497860268307929, + "grad_norm": 1.7889444292399215, + "learning_rate": 2.557206411174794e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5177 + }, + { + "epoch": 0.4979564360244266, + "grad_norm": 1.64303681011938, + "learning_rate": 2.5564437895311482e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5178 + }, + { + "epoch": 0.4980526037409242, + "grad_norm": 1.9227294345411894, + "learning_rate": 2.555681162632429e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5179 + }, + { + "epoch": 0.49814877145742176, + "grad_norm": 1.4477140842973633, + "learning_rate": 2.554918530549637e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5180 + }, + { + "epoch": 0.4982449391739193, + "grad_norm": 1.6843783236307523, + "learning_rate": 2.554155893353778e-06, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5181 + }, + { + "epoch": 0.4983411068904169, + "grad_norm": 2.012913136498125, + "learning_rate": 2.553393251115854e-06, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5182 + }, + { + "epoch": 0.49843727460691445, + "grad_norm": 1.9080511831886677, + "learning_rate": 2.5526306039068687e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5183 + }, + { + "epoch": 0.498533442323412, + "grad_norm": 1.5118896702029248, + "learning_rate": 2.5518679517978288e-06, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5184 + }, + { + "epoch": 0.4986296100399096, + "grad_norm": 2.1418697046655133, + "learning_rate": 2.5511052948597375e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5185 + }, + { + "epoch": 0.4987257777564072, + "grad_norm": 1.9950557299479557, + "learning_rate": 2.550342633163601e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5186 + }, + { + "epoch": 0.49882194547290476, + "grad_norm": 3.017478368332957, + "learning_rate": 2.5495799667804253e-06, + "loss": 0.157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5187 + }, + { + "epoch": 0.4989181131894023, + "grad_norm": 5.656762063826434, + "learning_rate": 2.548817295781216e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5188 + }, + { + "epoch": 0.4990142809058999, + "grad_norm": 1.5303067460205309, + "learning_rate": 2.548054620236981e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5189 + }, + { + "epoch": 0.49911044862239745, + "grad_norm": 1.6853222839476087, + "learning_rate": 2.5472919402187273e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5190 + }, + { + "epoch": 0.499206616338895, + "grad_norm": 1.335956778718757, + "learning_rate": 2.546529255797462e-06, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5191 + }, + { + "epoch": 0.4993027840553926, + "grad_norm": 1.5393394096440398, + "learning_rate": 2.5457665670441937e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5192 + }, + { + "epoch": 0.4993989517718902, + "grad_norm": 1.926606094815778, + "learning_rate": 2.54500387402993e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5193 + }, + { + "epoch": 0.49949511948838776, + "grad_norm": 2.72804910550538, + "learning_rate": 2.544241176825681e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5194 + }, + { + "epoch": 0.4995912872048853, + "grad_norm": 1.7547452649243793, + "learning_rate": 2.543478475502454e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5195 + }, + { + "epoch": 0.4996874549213829, + "grad_norm": 2.5875975000583216, + "learning_rate": 2.542715770131261e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5196 + }, + { + "epoch": 0.49978362263788045, + "grad_norm": 2.1368500723648456, + "learning_rate": 2.54195306078311e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5197 + }, + { + "epoch": 0.499879790354378, + "grad_norm": 1.6507555741704099, + "learning_rate": 2.541190347529013e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5198 + }, + { + "epoch": 0.4999759580708756, + "grad_norm": 2.445285716568898, + "learning_rate": 2.540427630439979e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5199 + }, + { + "epoch": 0.5000721257873731, + "grad_norm": 2.2420901088551153, + "learning_rate": 2.53966490958702e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5200 + }, + { + "epoch": 0.5001682935038707, + "grad_norm": 1.5419838298423687, + "learning_rate": 2.538902185041148e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5201 + }, + { + "epoch": 0.5002644612203683, + "grad_norm": 2.940743519212923, + "learning_rate": 2.5381394568733743e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5202 + }, + { + "epoch": 0.500360628936866, + "grad_norm": 1.968070664982708, + "learning_rate": 2.53737672515471e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5203 + }, + { + "epoch": 0.5004567966533635, + "grad_norm": 1.3536594043501744, + "learning_rate": 2.53661398995617e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5204 + }, + { + "epoch": 0.5005529643698611, + "grad_norm": 1.6164969117654753, + "learning_rate": 2.5358512513487637e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5205 + }, + { + "epoch": 0.5006491320863586, + "grad_norm": 1.2326390029745704, + "learning_rate": 2.5350885094035077e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5206 + }, + { + "epoch": 0.5007452998028562, + "grad_norm": 1.5121768569949288, + "learning_rate": 2.534325764191413e-06, + "loss": 0.1544, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5207 + }, + { + "epoch": 0.5008414675193538, + "grad_norm": 1.9010793631605223, + "learning_rate": 2.533563015783494e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5208 + }, + { + "epoch": 0.5009376352358513, + "grad_norm": 1.8367176450595855, + "learning_rate": 2.5328002642507648e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5209 + }, + { + "epoch": 0.5010338029523489, + "grad_norm": 1.3562232768898697, + "learning_rate": 2.5320375096642395e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5210 + }, + { + "epoch": 0.5011299706688465, + "grad_norm": 1.4958578644246134, + "learning_rate": 2.531274752094933e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5211 + }, + { + "epoch": 0.501226138385344, + "grad_norm": 1.786890376539387, + "learning_rate": 2.5305119916138606e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5212 + }, + { + "epoch": 0.5013223061018416, + "grad_norm": 1.767203853811976, + "learning_rate": 2.529749228292036e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5213 + }, + { + "epoch": 0.5014184738183391, + "grad_norm": 1.5926671804587096, + "learning_rate": 2.528986462200475e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5214 + }, + { + "epoch": 0.5015146415348367, + "grad_norm": 4.296121878764755, + "learning_rate": 2.528223693410195e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5215 + }, + { + "epoch": 0.5016108092513343, + "grad_norm": 2.7169419567777826, + "learning_rate": 2.5274609219922093e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5216 + }, + { + "epoch": 0.501706976967832, + "grad_norm": 4.435842769155739, + "learning_rate": 2.5266981480175356e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5217 + }, + { + "epoch": 0.5018031446843295, + "grad_norm": 2.5420387887217317, + "learning_rate": 2.52593537155719e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5218 + }, + { + "epoch": 0.5018993124008271, + "grad_norm": 2.9918095680623127, + "learning_rate": 2.525172592682189e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5219 + }, + { + "epoch": 0.5019954801173246, + "grad_norm": 1.6412327181834467, + "learning_rate": 2.5244098114635503e-06, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5220 + }, + { + "epoch": 0.5020916478338222, + "grad_norm": 1.5587883553634498, + "learning_rate": 2.523647027972289e-06, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5221 + }, + { + "epoch": 0.5021878155503198, + "grad_norm": 3.5404786199000577, + "learning_rate": 2.5228842422794237e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5222 + }, + { + "epoch": 0.5022839832668173, + "grad_norm": 2.6358647027493167, + "learning_rate": 2.5221214544559723e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5223 + }, + { + "epoch": 0.5023801509833149, + "grad_norm": 3.151666335337771, + "learning_rate": 2.5213586645729514e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5224 + }, + { + "epoch": 0.5024763186998125, + "grad_norm": 1.691480258538943, + "learning_rate": 2.520595872701379e-06, + "loss": 0.0813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5225 + }, + { + "epoch": 0.50257248641631, + "grad_norm": 2.751134044076561, + "learning_rate": 2.5198330789122743e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5226 + }, + { + "epoch": 0.5026686541328076, + "grad_norm": 1.397020492560991, + "learning_rate": 2.5190702832766546e-06, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5227 + }, + { + "epoch": 0.5027648218493052, + "grad_norm": 1.75525969168319, + "learning_rate": 2.518307485865538e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5228 + }, + { + "epoch": 0.5028609895658027, + "grad_norm": 2.2219187494195265, + "learning_rate": 2.517544686749944e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5229 + }, + { + "epoch": 0.5029571572823003, + "grad_norm": 1.9681940164507636, + "learning_rate": 2.516781886000891e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5230 + }, + { + "epoch": 0.503053324998798, + "grad_norm": 2.218826004373951, + "learning_rate": 2.5160190836893967e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5231 + }, + { + "epoch": 0.5031494927152955, + "grad_norm": 1.481330114840164, + "learning_rate": 2.5152562798864816e-06, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5232 + }, + { + "epoch": 0.5032456604317931, + "grad_norm": 2.226883531807528, + "learning_rate": 2.5144934746631638e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5233 + }, + { + "epoch": 0.5033418281482906, + "grad_norm": 2.457911289095105, + "learning_rate": 2.5137306680904644e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5234 + }, + { + "epoch": 0.5034379958647882, + "grad_norm": 3.2149369053822108, + "learning_rate": 2.5129678602394005e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5235 + }, + { + "epoch": 0.5035341635812858, + "grad_norm": 1.8652251201350833, + "learning_rate": 2.5122050511809924e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5236 + }, + { + "epoch": 0.5036303312977833, + "grad_norm": 1.6549485689447903, + "learning_rate": 2.511442240986261e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5237 + }, + { + "epoch": 0.5037264990142809, + "grad_norm": 2.780019291350299, + "learning_rate": 2.510679429726224e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5238 + }, + { + "epoch": 0.5038226667307785, + "grad_norm": 2.008125427754807, + "learning_rate": 2.509916617471903e-06, + "loss": 0.1633, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5239 + }, + { + "epoch": 0.503918834447276, + "grad_norm": 2.419441188232453, + "learning_rate": 2.5091538042943183e-06, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5240 + }, + { + "epoch": 0.5040150021637736, + "grad_norm": 2.509907067264759, + "learning_rate": 2.5083909902644874e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5241 + }, + { + "epoch": 0.5041111698802712, + "grad_norm": 2.0343244205408433, + "learning_rate": 2.507628175453432e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5242 + }, + { + "epoch": 0.5042073375967687, + "grad_norm": 1.7092650942305738, + "learning_rate": 2.5068653599321736e-06, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5243 + }, + { + "epoch": 0.5043035053132663, + "grad_norm": 2.7168246424706575, + "learning_rate": 2.50610254377173e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5244 + }, + { + "epoch": 0.504399673029764, + "grad_norm": 3.279889506648198, + "learning_rate": 2.505339727043123e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5245 + }, + { + "epoch": 0.5044958407462615, + "grad_norm": 2.1464713518995224, + "learning_rate": 2.504576909817372e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5246 + }, + { + "epoch": 0.5045920084627591, + "grad_norm": 1.9345779421097928, + "learning_rate": 2.5038140921654987e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5247 + }, + { + "epoch": 0.5046881761792567, + "grad_norm": 1.6420839450785065, + "learning_rate": 2.503051274158522e-06, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5248 + }, + { + "epoch": 0.5047843438957542, + "grad_norm": 2.087382743064419, + "learning_rate": 2.5022884558674637e-06, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5249 + }, + { + "epoch": 0.5048805116122518, + "grad_norm": 2.02760406715179, + "learning_rate": 2.5015256373633435e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5250 + }, + { + "epoch": 0.5049766793287493, + "grad_norm": 2.2479128462395788, + "learning_rate": 2.5007628187171822e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5251 + }, + { + "epoch": 0.5050728470452469, + "grad_norm": 1.4749523377484355, + "learning_rate": 2.5e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5252 + }, + { + "epoch": 0.5051690147617445, + "grad_norm": 2.49983998558118, + "learning_rate": 2.499237181282818e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5253 + }, + { + "epoch": 0.505265182478242, + "grad_norm": 1.3006486844578795, + "learning_rate": 2.498474362636657e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5254 + }, + { + "epoch": 0.5053613501947396, + "grad_norm": 2.5945696238782965, + "learning_rate": 2.497711544132538e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5255 + }, + { + "epoch": 0.5054575179112372, + "grad_norm": 1.4445975069431254, + "learning_rate": 2.496948725841479e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5256 + }, + { + "epoch": 0.5055536856277347, + "grad_norm": 1.3632355989332252, + "learning_rate": 2.4961859078345025e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5257 + }, + { + "epoch": 0.5056498533442323, + "grad_norm": 1.4119151612034642, + "learning_rate": 2.4954230901826283e-06, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5258 + }, + { + "epoch": 0.50574602106073, + "grad_norm": 1.4267176069931735, + "learning_rate": 2.494660272956878e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5259 + }, + { + "epoch": 0.5058421887772275, + "grad_norm": 1.6188441430511793, + "learning_rate": 2.4938974562282708e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5260 + }, + { + "epoch": 0.5059383564937251, + "grad_norm": 1.8316363984079047, + "learning_rate": 2.4931346400678276e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5261 + }, + { + "epoch": 0.5060345242102227, + "grad_norm": 1.4978188306399653, + "learning_rate": 2.4923718245465683e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5262 + }, + { + "epoch": 0.5061306919267202, + "grad_norm": 2.172090312280819, + "learning_rate": 2.4916090097355134e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5263 + }, + { + "epoch": 0.5062268596432178, + "grad_norm": 1.550628622219081, + "learning_rate": 2.490846195705683e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5264 + }, + { + "epoch": 0.5063230273597153, + "grad_norm": 2.118944268193833, + "learning_rate": 2.490083382528097e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5265 + }, + { + "epoch": 0.5064191950762129, + "grad_norm": 2.119926561195137, + "learning_rate": 2.489320570273777e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5266 + }, + { + "epoch": 0.5065153627927105, + "grad_norm": 2.114624810829005, + "learning_rate": 2.48855775901374e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5267 + }, + { + "epoch": 0.506611530509208, + "grad_norm": 1.6758249106143206, + "learning_rate": 2.487794948819008e-06, + "loss": 0.1448, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5268 + }, + { + "epoch": 0.5067076982257056, + "grad_norm": 1.504264818151111, + "learning_rate": 2.4870321397606003e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5269 + }, + { + "epoch": 0.5068038659422032, + "grad_norm": 1.917221911519794, + "learning_rate": 2.4862693319095364e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5270 + }, + { + "epoch": 0.5069000336587007, + "grad_norm": 3.2455207790484955, + "learning_rate": 2.485506525336836e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5271 + }, + { + "epoch": 0.5069962013751983, + "grad_norm": 1.5592982263362762, + "learning_rate": 2.4847437201135197e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5272 + }, + { + "epoch": 0.507092369091696, + "grad_norm": 2.2982544120273083, + "learning_rate": 2.4839809163106045e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5273 + }, + { + "epoch": 0.5071885368081935, + "grad_norm": 2.9147001539889175, + "learning_rate": 2.4832181139991103e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5274 + }, + { + "epoch": 0.5072847045246911, + "grad_norm": 1.6775948703886947, + "learning_rate": 2.4824553132500564e-06, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5275 + }, + { + "epoch": 0.5073808722411887, + "grad_norm": 2.4379386086955575, + "learning_rate": 2.4816925141344623e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5276 + }, + { + "epoch": 0.5074770399576862, + "grad_norm": 1.6200175497611522, + "learning_rate": 2.480929716723346e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5277 + }, + { + "epoch": 0.5075732076741838, + "grad_norm": 2.6964354056606954, + "learning_rate": 2.4801669210877265e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5278 + }, + { + "epoch": 0.5076693753906814, + "grad_norm": 2.585016230342389, + "learning_rate": 2.4794041272986216e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5279 + }, + { + "epoch": 0.5077655431071789, + "grad_norm": 1.495555308542192, + "learning_rate": 2.4786413354270494e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5280 + }, + { + "epoch": 0.5078617108236765, + "grad_norm": 1.4896501757150984, + "learning_rate": 2.4778785455440285e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5281 + }, + { + "epoch": 0.507957878540174, + "grad_norm": 2.8223656951438723, + "learning_rate": 2.4771157577205767e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5282 + }, + { + "epoch": 0.5080540462566716, + "grad_norm": 1.404732610185222, + "learning_rate": 2.4763529720277126e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5283 + }, + { + "epoch": 0.5081502139731692, + "grad_norm": 2.3236195412039424, + "learning_rate": 2.4755901885364514e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5284 + }, + { + "epoch": 0.5082463816896667, + "grad_norm": 1.5127233244359102, + "learning_rate": 2.4748274073178114e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5285 + }, + { + "epoch": 0.5083425494061643, + "grad_norm": 1.351679185734134, + "learning_rate": 2.4740646284428104e-06, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5286 + }, + { + "epoch": 0.508438717122662, + "grad_norm": 2.139544109573584, + "learning_rate": 2.4733018519824648e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5287 + }, + { + "epoch": 0.5085348848391595, + "grad_norm": 2.1603918575614007, + "learning_rate": 2.472539078007791e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5288 + }, + { + "epoch": 0.5086310525556571, + "grad_norm": 1.8347584184005934, + "learning_rate": 2.4717763065898064e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5289 + }, + { + "epoch": 0.5087272202721547, + "grad_norm": 1.5407694203828215, + "learning_rate": 2.4710135377995254e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5290 + }, + { + "epoch": 0.5088233879886522, + "grad_norm": 2.995500946868377, + "learning_rate": 2.4702507717079647e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5291 + }, + { + "epoch": 0.5089195557051498, + "grad_norm": 1.9688861284706418, + "learning_rate": 2.46948800838614e-06, + "loss": 0.1509, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5292 + }, + { + "epoch": 0.5090157234216474, + "grad_norm": 1.6902466559135738, + "learning_rate": 2.4687252479050674e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5293 + }, + { + "epoch": 0.5091118911381449, + "grad_norm": 2.396641865501658, + "learning_rate": 2.4679624903357613e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5294 + }, + { + "epoch": 0.5092080588546425, + "grad_norm": 2.068428840141718, + "learning_rate": 2.4671997357492356e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5295 + }, + { + "epoch": 0.50930422657114, + "grad_norm": 1.54513197736724, + "learning_rate": 2.466436984216507e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5296 + }, + { + "epoch": 0.5094003942876376, + "grad_norm": 2.1005813903685104, + "learning_rate": 2.4656742358085876e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5297 + }, + { + "epoch": 0.5094965620041352, + "grad_norm": 2.5109087759770454, + "learning_rate": 2.464911490596493e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5298 + }, + { + "epoch": 0.5095927297206327, + "grad_norm": 1.5871171598811022, + "learning_rate": 2.4641487486512363e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5299 + }, + { + "epoch": 0.5096888974371303, + "grad_norm": 1.9608171755219446, + "learning_rate": 2.4633860100438317e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5300 + }, + { + "epoch": 0.509785065153628, + "grad_norm": 2.11334497841398, + "learning_rate": 2.4626232748452905e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5301 + }, + { + "epoch": 0.5098812328701255, + "grad_norm": 2.3478639775876005, + "learning_rate": 2.461860543126627e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5302 + }, + { + "epoch": 0.5099774005866231, + "grad_norm": 2.4771707027838796, + "learning_rate": 2.4610978149588522e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5303 + }, + { + "epoch": 0.5100735683031207, + "grad_norm": 1.392939880687417, + "learning_rate": 2.4603350904129802e-06, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5304 + }, + { + "epoch": 0.5101697360196182, + "grad_norm": 1.9065989457328172, + "learning_rate": 2.459572369560021e-06, + "loss": 0.1648, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5305 + }, + { + "epoch": 0.5102659037361158, + "grad_norm": 1.375058258560896, + "learning_rate": 2.458809652470989e-06, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5306 + }, + { + "epoch": 0.5103620714526134, + "grad_norm": 2.744516181948021, + "learning_rate": 2.4580469392168905e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5307 + }, + { + "epoch": 0.5104582391691109, + "grad_norm": 2.5747821940525846, + "learning_rate": 2.45728422986874e-06, + "loss": 0.1527, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5308 + }, + { + "epoch": 0.5105544068856085, + "grad_norm": 1.403591362781087, + "learning_rate": 2.4565215244975466e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5309 + }, + { + "epoch": 0.510650574602106, + "grad_norm": 1.5675661516819517, + "learning_rate": 2.4557588231743203e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5310 + }, + { + "epoch": 0.5107467423186036, + "grad_norm": 1.9076749514994664, + "learning_rate": 2.454996125970071e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5311 + }, + { + "epoch": 0.5108429100351012, + "grad_norm": 2.260544729331781, + "learning_rate": 2.4542334329558075e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5312 + }, + { + "epoch": 0.5109390777515987, + "grad_norm": 2.889071336396741, + "learning_rate": 2.4534707442025388e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5313 + }, + { + "epoch": 0.5110352454680963, + "grad_norm": 1.5709348487189054, + "learning_rate": 2.452708059781273e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5314 + }, + { + "epoch": 0.511131413184594, + "grad_norm": 1.7364996308081235, + "learning_rate": 2.451945379763019e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5315 + }, + { + "epoch": 0.5112275809010915, + "grad_norm": 2.0008994474209936, + "learning_rate": 2.4511827042187834e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5316 + }, + { + "epoch": 0.5113237486175891, + "grad_norm": 2.374056814876595, + "learning_rate": 2.4504200332195755e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5317 + }, + { + "epoch": 0.5114199163340867, + "grad_norm": 2.654463914346041, + "learning_rate": 2.4496573668364e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5318 + }, + { + "epoch": 0.5115160840505842, + "grad_norm": 1.974984012355256, + "learning_rate": 2.4488947051402633e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5319 + }, + { + "epoch": 0.5116122517670818, + "grad_norm": 1.9234083564467606, + "learning_rate": 2.4481320482021716e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5320 + }, + { + "epoch": 0.5117084194835794, + "grad_norm": 1.62211338011847, + "learning_rate": 2.4473693960931313e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5321 + }, + { + "epoch": 0.5118045872000769, + "grad_norm": 2.763501038797438, + "learning_rate": 2.4466067488841477e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5322 + }, + { + "epoch": 0.5119007549165745, + "grad_norm": 3.2653588558516775, + "learning_rate": 2.445844106646223e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5323 + }, + { + "epoch": 0.5119969226330721, + "grad_norm": 1.9564453172181353, + "learning_rate": 2.4450814694503637e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5324 + }, + { + "epoch": 0.5120930903495696, + "grad_norm": 1.7406657812496116, + "learning_rate": 2.4443188373675717e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5325 + }, + { + "epoch": 0.5121892580660672, + "grad_norm": 1.5767495589457468, + "learning_rate": 2.443556210468852e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5326 + }, + { + "epoch": 0.5122854257825648, + "grad_norm": 1.3159188465667662, + "learning_rate": 2.442793588825206e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5327 + }, + { + "epoch": 0.5123815934990623, + "grad_norm": 1.8307073619869332, + "learning_rate": 2.4420309725076364e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5328 + }, + { + "epoch": 0.51247776121556, + "grad_norm": 1.5100333432670878, + "learning_rate": 2.4412683615871447e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5329 + }, + { + "epoch": 0.5125739289320576, + "grad_norm": 2.490627852417918, + "learning_rate": 2.440505756134732e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5330 + }, + { + "epoch": 0.5126700966485551, + "grad_norm": 3.409311982486758, + "learning_rate": 2.4397431562213987e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5331 + }, + { + "epoch": 0.5127662643650527, + "grad_norm": 2.4145604639703326, + "learning_rate": 2.438980561918145e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5332 + }, + { + "epoch": 0.5128624320815502, + "grad_norm": 1.8582784699760915, + "learning_rate": 2.4382179732959716e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5333 + }, + { + "epoch": 0.5129585997980478, + "grad_norm": 1.9602757742211827, + "learning_rate": 2.437455390425878e-06, + "loss": 0.1824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5334 + }, + { + "epoch": 0.5130547675145454, + "grad_norm": 1.8270819525047663, + "learning_rate": 2.436692813378861e-06, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5335 + }, + { + "epoch": 0.5131509352310429, + "grad_norm": 1.9460175843441774, + "learning_rate": 2.435930242225919e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5336 + }, + { + "epoch": 0.5132471029475405, + "grad_norm": 3.1214251688032264, + "learning_rate": 2.43516767703805e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5337 + }, + { + "epoch": 0.5133432706640381, + "grad_norm": 5.430780157021572, + "learning_rate": 2.4344051178862503e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5338 + }, + { + "epoch": 0.5134394383805356, + "grad_norm": 3.857629622390735, + "learning_rate": 2.433642564841519e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5339 + }, + { + "epoch": 0.5135356060970332, + "grad_norm": 2.877682634422052, + "learning_rate": 2.4328800179748475e-06, + "loss": 0.1714, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5340 + }, + { + "epoch": 0.5136317738135308, + "grad_norm": 2.2804674194467043, + "learning_rate": 2.4321174773572338e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5341 + }, + { + "epoch": 0.5137279415300283, + "grad_norm": 1.6288341815777234, + "learning_rate": 2.431354943059672e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5342 + }, + { + "epoch": 0.513824109246526, + "grad_norm": 1.515576225757068, + "learning_rate": 2.430592415153157e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5343 + }, + { + "epoch": 0.5139202769630236, + "grad_norm": 2.432948041353812, + "learning_rate": 2.429829893708681e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5344 + }, + { + "epoch": 0.5140164446795211, + "grad_norm": 1.6148712211104492, + "learning_rate": 2.4290673787972378e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5345 + }, + { + "epoch": 0.5141126123960187, + "grad_norm": 2.8072949247890753, + "learning_rate": 2.428304870489819e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5346 + }, + { + "epoch": 0.5142087801125163, + "grad_norm": 3.052003316433369, + "learning_rate": 2.427542368857416e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5347 + }, + { + "epoch": 0.5143049478290138, + "grad_norm": 1.387225318938805, + "learning_rate": 2.4267798739710203e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5348 + }, + { + "epoch": 0.5144011155455114, + "grad_norm": 2.0367575757786347, + "learning_rate": 2.426017385901622e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5349 + }, + { + "epoch": 0.5144972832620089, + "grad_norm": 2.3880176559803497, + "learning_rate": 2.4252549047202124e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5350 + }, + { + "epoch": 0.5145934509785065, + "grad_norm": 2.00334305372523, + "learning_rate": 2.4244924304977785e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5351 + }, + { + "epoch": 0.5146896186950041, + "grad_norm": 1.567384558101629, + "learning_rate": 2.4237299633053098e-06, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5352 + }, + { + "epoch": 0.5147857864115016, + "grad_norm": 2.0191109555191926, + "learning_rate": 2.4229675032137935e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5353 + }, + { + "epoch": 0.5148819541279992, + "grad_norm": 1.5188147265326868, + "learning_rate": 2.4222050502942175e-06, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5354 + }, + { + "epoch": 0.5149781218444968, + "grad_norm": 1.5263877888171593, + "learning_rate": 2.4214426046175677e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5355 + }, + { + "epoch": 0.5150742895609943, + "grad_norm": 1.5264544080199969, + "learning_rate": 2.4206801662548314e-06, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5356 + }, + { + "epoch": 0.515170457277492, + "grad_norm": 2.8076754979869065, + "learning_rate": 2.4199177352769916e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5357 + }, + { + "epoch": 0.5152666249939896, + "grad_norm": 3.4841256548047768, + "learning_rate": 2.4191553117550336e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5358 + }, + { + "epoch": 0.5153627927104871, + "grad_norm": 1.7562949513957042, + "learning_rate": 2.418392895759942e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5359 + }, + { + "epoch": 0.5154589604269847, + "grad_norm": 2.5482006219621973, + "learning_rate": 2.4176304873626983e-06, + "loss": 0.1734, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5360 + }, + { + "epoch": 0.5155551281434823, + "grad_norm": 2.096645521469848, + "learning_rate": 2.416868086634286e-06, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5361 + }, + { + "epoch": 0.5156512958599798, + "grad_norm": 2.063744304823552, + "learning_rate": 2.416105693645687e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5362 + }, + { + "epoch": 0.5157474635764774, + "grad_norm": 1.5859631732453285, + "learning_rate": 2.415343308467881e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5363 + }, + { + "epoch": 0.515843631292975, + "grad_norm": 1.7266607197023054, + "learning_rate": 2.4145809311718487e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5364 + }, + { + "epoch": 0.5159397990094725, + "grad_norm": 1.5766782085968771, + "learning_rate": 2.4138185618285693e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5365 + }, + { + "epoch": 0.5160359667259701, + "grad_norm": 1.7891593188599242, + "learning_rate": 2.4130562005090225e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5366 + }, + { + "epoch": 0.5161321344424676, + "grad_norm": 1.3841649740291582, + "learning_rate": 2.4122938472841863e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5367 + }, + { + "epoch": 0.5162283021589652, + "grad_norm": 1.9210749933196838, + "learning_rate": 2.411531502225036e-06, + "loss": 0.15, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5368 + }, + { + "epoch": 0.5163244698754628, + "grad_norm": 2.093216842791654, + "learning_rate": 2.410769165402549e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5369 + }, + { + "epoch": 0.5164206375919603, + "grad_norm": 1.8723605125471545, + "learning_rate": 2.410006836887702e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5370 + }, + { + "epoch": 0.516516805308458, + "grad_norm": 1.313536232960904, + "learning_rate": 2.4092445167514684e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5371 + }, + { + "epoch": 0.5166129730249556, + "grad_norm": 1.3826510224331396, + "learning_rate": 2.408482205064824e-06, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5372 + }, + { + "epoch": 0.5167091407414531, + "grad_norm": 2.075855012674867, + "learning_rate": 2.4077199018987417e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5373 + }, + { + "epoch": 0.5168053084579507, + "grad_norm": 1.6425456324342484, + "learning_rate": 2.406957607324192e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5374 + }, + { + "epoch": 0.5169014761744483, + "grad_norm": 2.2115322589020736, + "learning_rate": 2.406195321412149e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5375 + }, + { + "epoch": 0.5169976438909458, + "grad_norm": 2.302479800655095, + "learning_rate": 2.405433044233583e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5376 + }, + { + "epoch": 0.5170938116074434, + "grad_norm": 2.047713933562242, + "learning_rate": 2.4046707758594636e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5377 + }, + { + "epoch": 0.517189979323941, + "grad_norm": 2.0583121808564733, + "learning_rate": 2.403908516360761e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5378 + }, + { + "epoch": 0.5172861470404385, + "grad_norm": 2.5468608465217772, + "learning_rate": 2.4031462658084433e-06, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5379 + }, + { + "epoch": 0.5173823147569361, + "grad_norm": 1.4418059696913152, + "learning_rate": 2.4023840242734774e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5380 + }, + { + "epoch": 0.5174784824734336, + "grad_norm": 1.5845813060772318, + "learning_rate": 2.4016217918268308e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5381 + }, + { + "epoch": 0.5175746501899312, + "grad_norm": 1.9521114567522355, + "learning_rate": 2.4008595685394694e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5382 + }, + { + "epoch": 0.5176708179064288, + "grad_norm": 1.617719634190436, + "learning_rate": 2.400097354482358e-06, + "loss": 0.1412, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5383 + }, + { + "epoch": 0.5177669856229263, + "grad_norm": 1.5520694526697698, + "learning_rate": 2.399335149726463e-06, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5384 + }, + { + "epoch": 0.517863153339424, + "grad_norm": 2.434731611011092, + "learning_rate": 2.3985729543427444e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5385 + }, + { + "epoch": 0.5179593210559216, + "grad_norm": 1.722836955431545, + "learning_rate": 2.3978107684021664e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5386 + }, + { + "epoch": 0.5180554887724191, + "grad_norm": 2.4928215383463774, + "learning_rate": 2.3970485919756903e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5387 + }, + { + "epoch": 0.5181516564889167, + "grad_norm": 2.0842328313338223, + "learning_rate": 2.396286425134277e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5388 + }, + { + "epoch": 0.5182478242054143, + "grad_norm": 2.142824255571586, + "learning_rate": 2.395524267948887e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5389 + }, + { + "epoch": 0.5183439919219118, + "grad_norm": 3.883654704845559, + "learning_rate": 2.394762120490479e-06, + "loss": 0.1512, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5390 + }, + { + "epoch": 0.5184401596384094, + "grad_norm": 2.6576727411352556, + "learning_rate": 2.3939999828300097e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5391 + }, + { + "epoch": 0.518536327354907, + "grad_norm": 3.034632716554147, + "learning_rate": 2.3932378550384375e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5392 + }, + { + "epoch": 0.5186324950714045, + "grad_norm": 2.453485672274251, + "learning_rate": 2.392475737186719e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5393 + }, + { + "epoch": 0.5187286627879021, + "grad_norm": 1.6367488761586293, + "learning_rate": 2.391713629345808e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5394 + }, + { + "epoch": 0.5188248305043996, + "grad_norm": 2.1675803855270663, + "learning_rate": 2.3909515315866606e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5395 + }, + { + "epoch": 0.5189209982208972, + "grad_norm": 1.837701322376468, + "learning_rate": 2.3901894439802292e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5396 + }, + { + "epoch": 0.5190171659373948, + "grad_norm": 1.5717860410568723, + "learning_rate": 2.3894273665974656e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5397 + }, + { + "epoch": 0.5191133336538923, + "grad_norm": 2.5483188780751527, + "learning_rate": 2.3886652995093227e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5398 + }, + { + "epoch": 0.51920950137039, + "grad_norm": 1.4114510306688097, + "learning_rate": 2.38790324278675e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5399 + }, + { + "epoch": 0.5193056690868876, + "grad_norm": 2.7306347909719144, + "learning_rate": 2.3871411965006985e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5400 + }, + { + "epoch": 0.5194018368033851, + "grad_norm": 2.071103844292229, + "learning_rate": 2.3863791607221163e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5401 + }, + { + "epoch": 0.5194980045198827, + "grad_norm": 3.1545274109066286, + "learning_rate": 2.3856171355219494e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5402 + }, + { + "epoch": 0.5195941722363803, + "grad_norm": 1.5246293018679087, + "learning_rate": 2.3848551209711466e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5403 + }, + { + "epoch": 0.5196903399528778, + "grad_norm": 2.218136872036601, + "learning_rate": 2.3840931171406525e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5404 + }, + { + "epoch": 0.5197865076693754, + "grad_norm": 2.1542224035090993, + "learning_rate": 2.383331124101412e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5405 + }, + { + "epoch": 0.519882675385873, + "grad_norm": 3.1127693127088123, + "learning_rate": 2.3825691419243696e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5406 + }, + { + "epoch": 0.5199788431023705, + "grad_norm": 1.4047447440293532, + "learning_rate": 2.3818071706804662e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5407 + }, + { + "epoch": 0.5200750108188681, + "grad_norm": 1.741843801359934, + "learning_rate": 2.3810452104406444e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5408 + }, + { + "epoch": 0.5201711785353657, + "grad_norm": 1.9458422928137293, + "learning_rate": 2.3802832612758457e-06, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5409 + }, + { + "epoch": 0.5202673462518632, + "grad_norm": 3.9418991124863423, + "learning_rate": 2.3795213232570082e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5410 + }, + { + "epoch": 0.5203635139683608, + "grad_norm": 2.6287745450682447, + "learning_rate": 2.378759396455071e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5411 + }, + { + "epoch": 0.5204596816848583, + "grad_norm": 4.747748434280221, + "learning_rate": 2.3779974809409724e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5412 + }, + { + "epoch": 0.520555849401356, + "grad_norm": 1.8562201360620134, + "learning_rate": 2.3772355767856474e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5413 + }, + { + "epoch": 0.5206520171178536, + "grad_norm": 1.7903772218399525, + "learning_rate": 2.3764736840600325e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5414 + }, + { + "epoch": 0.5207481848343511, + "grad_norm": 1.538903979277867, + "learning_rate": 2.375711802835061e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5415 + }, + { + "epoch": 0.5208443525508487, + "grad_norm": 2.331618773796589, + "learning_rate": 2.3749499331816675e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5416 + }, + { + "epoch": 0.5209405202673463, + "grad_norm": 2.624488847739242, + "learning_rate": 2.374188075170783e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5417 + }, + { + "epoch": 0.5210366879838438, + "grad_norm": 5.17028570426077, + "learning_rate": 2.373426228873341e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5418 + }, + { + "epoch": 0.5211328557003414, + "grad_norm": 3.300551402664136, + "learning_rate": 2.3726643943602676e-06, + "loss": 0.1609, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5419 + }, + { + "epoch": 0.521229023416839, + "grad_norm": 2.053284454529226, + "learning_rate": 2.3719025717024946e-06, + "loss": 0.1586, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5420 + }, + { + "epoch": 0.5213251911333365, + "grad_norm": 1.6612594509456209, + "learning_rate": 2.3711407609709485e-06, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5421 + }, + { + "epoch": 0.5214213588498341, + "grad_norm": 1.5347664480593717, + "learning_rate": 2.3703789622365567e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5422 + }, + { + "epoch": 0.5215175265663317, + "grad_norm": 2.2058025349706694, + "learning_rate": 2.3696171755702453e-06, + "loss": 0.179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5423 + }, + { + "epoch": 0.5216136942828292, + "grad_norm": 2.142476870084941, + "learning_rate": 2.3688554010429376e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5424 + }, + { + "epoch": 0.5217098619993268, + "grad_norm": 2.5477697244063915, + "learning_rate": 2.3680936387255564e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5425 + }, + { + "epoch": 0.5218060297158243, + "grad_norm": 4.404848655050807, + "learning_rate": 2.3673318886890264e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5426 + }, + { + "epoch": 0.521902197432322, + "grad_norm": 1.380367458234812, + "learning_rate": 2.366570151004266e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5427 + }, + { + "epoch": 0.5219983651488196, + "grad_norm": 1.8430961130329, + "learning_rate": 2.365808425742196e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5428 + }, + { + "epoch": 0.5220945328653172, + "grad_norm": 1.9703775216420927, + "learning_rate": 2.3650467129737366e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5429 + }, + { + "epoch": 0.5221907005818147, + "grad_norm": 2.6519151013651996, + "learning_rate": 2.3642850127698033e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5430 + }, + { + "epoch": 0.5222868682983123, + "grad_norm": 1.7783060667705508, + "learning_rate": 2.363523325201313e-06, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5431 + }, + { + "epoch": 0.5223830360148098, + "grad_norm": 1.5537951326162032, + "learning_rate": 2.3627616503391813e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5432 + }, + { + "epoch": 0.5224792037313074, + "grad_norm": 1.6257172235618902, + "learning_rate": 2.3619999882543224e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5433 + }, + { + "epoch": 0.522575371447805, + "grad_norm": 2.6503111678277604, + "learning_rate": 2.3612383390176503e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5434 + }, + { + "epoch": 0.5226715391643025, + "grad_norm": 1.831694558894391, + "learning_rate": 2.360476702700074e-06, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5435 + }, + { + "epoch": 0.5227677068808001, + "grad_norm": 1.7398069519962371, + "learning_rate": 2.359715079372505e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5436 + }, + { + "epoch": 0.5228638745972977, + "grad_norm": 1.5684780156881855, + "learning_rate": 2.3589534691058536e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5437 + }, + { + "epoch": 0.5229600423137952, + "grad_norm": 1.7856261634627169, + "learning_rate": 2.3581918719710263e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5438 + }, + { + "epoch": 0.5230562100302928, + "grad_norm": 2.20664383531522, + "learning_rate": 2.357430288038932e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5439 + }, + { + "epoch": 0.5231523777467904, + "grad_norm": 2.9900855985999257, + "learning_rate": 2.3566687173804747e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5440 + }, + { + "epoch": 0.523248545463288, + "grad_norm": 1.8998727955223922, + "learning_rate": 2.355907160066558e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5441 + }, + { + "epoch": 0.5233447131797856, + "grad_norm": 1.5147561927545852, + "learning_rate": 2.3551456161680865e-06, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5442 + }, + { + "epoch": 0.5234408808962832, + "grad_norm": 2.386460045660579, + "learning_rate": 2.354384085755962e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5443 + }, + { + "epoch": 0.5235370486127807, + "grad_norm": 2.566084065199529, + "learning_rate": 2.3536225689010846e-06, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5444 + }, + { + "epoch": 0.5236332163292783, + "grad_norm": 1.456723729142031, + "learning_rate": 2.3528610656743535e-06, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5445 + }, + { + "epoch": 0.5237293840457758, + "grad_norm": 2.4066099260895615, + "learning_rate": 2.352099576146668e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5446 + }, + { + "epoch": 0.5238255517622734, + "grad_norm": 2.269057311203068, + "learning_rate": 2.3513381003889228e-06, + "loss": 0.1526, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5447 + }, + { + "epoch": 0.523921719478771, + "grad_norm": 1.9177748064481264, + "learning_rate": 2.3505766384720148e-06, + "loss": 0.1574, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5448 + }, + { + "epoch": 0.5240178871952685, + "grad_norm": 1.5990503807636183, + "learning_rate": 2.3498151904668377e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5449 + }, + { + "epoch": 0.5241140549117661, + "grad_norm": 1.8000496989430654, + "learning_rate": 2.349053756444285e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5450 + }, + { + "epoch": 0.5242102226282637, + "grad_norm": 2.1573442158382754, + "learning_rate": 2.3482923364752488e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5451 + }, + { + "epoch": 0.5243063903447612, + "grad_norm": 2.4099338122455616, + "learning_rate": 2.3475309306306174e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5452 + }, + { + "epoch": 0.5244025580612588, + "grad_norm": 2.1420045959481855, + "learning_rate": 2.3467695389812813e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5453 + }, + { + "epoch": 0.5244987257777564, + "grad_norm": 1.3026466969782853, + "learning_rate": 2.346008161598128e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5454 + }, + { + "epoch": 0.524594893494254, + "grad_norm": 1.6080547365364024, + "learning_rate": 2.3452467985520437e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5455 + }, + { + "epoch": 0.5246910612107516, + "grad_norm": 2.439822276516174, + "learning_rate": 2.344485449913914e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5456 + }, + { + "epoch": 0.5247872289272492, + "grad_norm": 1.5179449399143932, + "learning_rate": 2.3437241157546225e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5457 + }, + { + "epoch": 0.5248833966437467, + "grad_norm": 1.5571394376608165, + "learning_rate": 2.34296279614505e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5458 + }, + { + "epoch": 0.5249795643602443, + "grad_norm": 1.567865722005663, + "learning_rate": 2.3422014911560796e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5459 + }, + { + "epoch": 0.5250757320767419, + "grad_norm": 2.5841020169760696, + "learning_rate": 2.341440200858589e-06, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5460 + }, + { + "epoch": 0.5251718997932394, + "grad_norm": 1.9459091512182933, + "learning_rate": 2.3406789253234577e-06, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5461 + }, + { + "epoch": 0.525268067509737, + "grad_norm": 1.8470013401275673, + "learning_rate": 2.339917664621563e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5462 + }, + { + "epoch": 0.5253642352262345, + "grad_norm": 1.644341978829923, + "learning_rate": 2.3391564188237786e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5463 + }, + { + "epoch": 0.5254604029427321, + "grad_norm": 2.1451387864930247, + "learning_rate": 2.33839518800098e-06, + "loss": 0.099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5464 + }, + { + "epoch": 0.5255565706592297, + "grad_norm": 1.451736541438732, + "learning_rate": 2.3376339722240392e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5465 + }, + { + "epoch": 0.5256527383757272, + "grad_norm": 1.49510080777434, + "learning_rate": 2.3368727715638275e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5466 + }, + { + "epoch": 0.5257489060922248, + "grad_norm": 1.9690416743206445, + "learning_rate": 2.3361115860912156e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5467 + }, + { + "epoch": 0.5258450738087224, + "grad_norm": 1.3925122285758345, + "learning_rate": 2.3353504158770725e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5468 + }, + { + "epoch": 0.52594124152522, + "grad_norm": 1.5339917282441533, + "learning_rate": 2.334589260992263e-06, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5469 + }, + { + "epoch": 0.5260374092417176, + "grad_norm": 1.6863906228139682, + "learning_rate": 2.333828121507654e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5470 + }, + { + "epoch": 0.5261335769582152, + "grad_norm": 1.6669482649773277, + "learning_rate": 2.3330669974941105e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5471 + }, + { + "epoch": 0.5262297446747127, + "grad_norm": 1.850999023467371, + "learning_rate": 2.332305889022494e-06, + "loss": 0.0849, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5472 + }, + { + "epoch": 0.5263259123912103, + "grad_norm": 2.8663322318446074, + "learning_rate": 2.3315447961636666e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5473 + }, + { + "epoch": 0.5264220801077079, + "grad_norm": 2.2385343473566652, + "learning_rate": 2.3307837189884885e-06, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5474 + }, + { + "epoch": 0.5265182478242054, + "grad_norm": 1.646071166922654, + "learning_rate": 2.3300226575678162e-06, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5475 + }, + { + "epoch": 0.526614415540703, + "grad_norm": 2.8189034406335987, + "learning_rate": 2.329261611972509e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5476 + }, + { + "epoch": 0.5267105832572005, + "grad_norm": 3.1822416258701622, + "learning_rate": 2.328500582273421e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5477 + }, + { + "epoch": 0.5268067509736981, + "grad_norm": 1.7459504338918306, + "learning_rate": 2.3277395685414062e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5478 + }, + { + "epoch": 0.5269029186901957, + "grad_norm": 2.590352381150059, + "learning_rate": 2.3269785708473183e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5479 + }, + { + "epoch": 0.5269990864066932, + "grad_norm": 1.485037315853856, + "learning_rate": 2.3262175892620064e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5480 + }, + { + "epoch": 0.5270952541231908, + "grad_norm": 1.3867757420284397, + "learning_rate": 2.325456623856321e-06, + "loss": 0.0919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5481 + }, + { + "epoch": 0.5271914218396884, + "grad_norm": 1.9746998555265063, + "learning_rate": 2.32469567470111e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5482 + }, + { + "epoch": 0.527287589556186, + "grad_norm": 1.9001192853524627, + "learning_rate": 2.3239347418672203e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5483 + }, + { + "epoch": 0.5273837572726836, + "grad_norm": 2.198356656170353, + "learning_rate": 2.3231738254254963e-06, + "loss": 0.1751, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5484 + }, + { + "epoch": 0.5274799249891812, + "grad_norm": 1.7408127305239915, + "learning_rate": 2.322412925446783e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5485 + }, + { + "epoch": 0.5275760927056787, + "grad_norm": 1.676952099615272, + "learning_rate": 2.3216520420019194e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5486 + }, + { + "epoch": 0.5276722604221763, + "grad_norm": 2.0451766358096757, + "learning_rate": 2.320891175161748e-06, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5487 + }, + { + "epoch": 0.5277684281386739, + "grad_norm": 1.6958078118963462, + "learning_rate": 2.3201303249971068e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5488 + }, + { + "epoch": 0.5278645958551714, + "grad_norm": 3.678835610800995, + "learning_rate": 2.319369491578834e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5489 + }, + { + "epoch": 0.527960763571669, + "grad_norm": 1.9138627565015929, + "learning_rate": 2.318608674977765e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5490 + }, + { + "epoch": 0.5280569312881666, + "grad_norm": 1.5589859924350156, + "learning_rate": 2.3178478752647323e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5491 + }, + { + "epoch": 0.5281530990046641, + "grad_norm": 1.8749073645292071, + "learning_rate": 2.3170870925105703e-06, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5492 + }, + { + "epoch": 0.5282492667211617, + "grad_norm": 1.2531117341232187, + "learning_rate": 2.31632632678611e-06, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5493 + }, + { + "epoch": 0.5283454344376592, + "grad_norm": 1.8977190148022376, + "learning_rate": 2.3155655781621795e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5494 + }, + { + "epoch": 0.5284416021541568, + "grad_norm": 1.3460060482958316, + "learning_rate": 2.3148048467096076e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5495 + }, + { + "epoch": 0.5285377698706544, + "grad_norm": 1.7670583334841157, + "learning_rate": 2.3140441324992215e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5496 + }, + { + "epoch": 0.528633937587152, + "grad_norm": 1.8838467815241489, + "learning_rate": 2.3132834356018433e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5497 + }, + { + "epoch": 0.5287301053036496, + "grad_norm": 2.0863611365028323, + "learning_rate": 2.3125227560882975e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5498 + }, + { + "epoch": 0.5288262730201472, + "grad_norm": 2.1008326872457403, + "learning_rate": 2.311762094029405e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5499 + }, + { + "epoch": 0.5289224407366447, + "grad_norm": 1.9562266474143437, + "learning_rate": 2.311001449495986e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5500 + }, + { + "epoch": 0.5290186084531423, + "grad_norm": 1.8918734189945015, + "learning_rate": 2.3102408225588588e-06, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5501 + }, + { + "epoch": 0.5291147761696399, + "grad_norm": 2.015065820855323, + "learning_rate": 2.3094802132888405e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5502 + }, + { + "epoch": 0.5292109438861374, + "grad_norm": 3.484163700828407, + "learning_rate": 2.308719621756744e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5503 + }, + { + "epoch": 0.529307111602635, + "grad_norm": 2.8287041012042393, + "learning_rate": 2.307959048033383e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5504 + }, + { + "epoch": 0.5294032793191326, + "grad_norm": 2.185747330807255, + "learning_rate": 2.30719849218957e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5505 + }, + { + "epoch": 0.5294994470356301, + "grad_norm": 2.0190444229432405, + "learning_rate": 2.306437954296115e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5506 + }, + { + "epoch": 0.5295956147521277, + "grad_norm": 1.6395438395231632, + "learning_rate": 2.305677434423825e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5507 + }, + { + "epoch": 0.5296917824686252, + "grad_norm": 1.9687427750047495, + "learning_rate": 2.3049169326435076e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5508 + }, + { + "epoch": 0.5297879501851228, + "grad_norm": 1.6602740426399025, + "learning_rate": 2.3041564490259673e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5509 + }, + { + "epoch": 0.5298841179016204, + "grad_norm": 2.68657104736888, + "learning_rate": 2.3033959836420063e-06, + "loss": 0.1535, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5510 + }, + { + "epoch": 0.529980285618118, + "grad_norm": 1.6143417299256535, + "learning_rate": 2.302635536562427e-06, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5511 + }, + { + "epoch": 0.5300764533346156, + "grad_norm": 2.5321266520785457, + "learning_rate": 2.3018751078580287e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5512 + }, + { + "epoch": 0.5301726210511132, + "grad_norm": 1.5236276629695475, + "learning_rate": 2.3011146975996114e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5513 + }, + { + "epoch": 0.5302687887676107, + "grad_norm": 2.115327882678653, + "learning_rate": 2.300354305857968e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5514 + }, + { + "epoch": 0.5303649564841083, + "grad_norm": 1.686210021364552, + "learning_rate": 2.2995939327038954e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5515 + }, + { + "epoch": 0.5304611242006059, + "grad_norm": 1.7522009385663082, + "learning_rate": 2.2988335782081854e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5516 + }, + { + "epoch": 0.5305572919171034, + "grad_norm": 2.629513764541275, + "learning_rate": 2.29807324244163e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5517 + }, + { + "epoch": 0.530653459633601, + "grad_norm": 2.1600336924820867, + "learning_rate": 2.297312925475019e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5518 + }, + { + "epoch": 0.5307496273500986, + "grad_norm": 2.195117876005232, + "learning_rate": 2.296552627379138e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5519 + }, + { + "epoch": 0.5308457950665961, + "grad_norm": 2.0876843737896733, + "learning_rate": 2.2957923482247745e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5520 + }, + { + "epoch": 0.5309419627830937, + "grad_norm": 1.9498043082917325, + "learning_rate": 2.295032088082712e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5521 + }, + { + "epoch": 0.5310381304995913, + "grad_norm": 1.612261852634365, + "learning_rate": 2.294271847023733e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5522 + }, + { + "epoch": 0.5311342982160888, + "grad_norm": 1.7596955566563621, + "learning_rate": 2.2935116251186184e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5523 + }, + { + "epoch": 0.5312304659325864, + "grad_norm": 3.0294974846288523, + "learning_rate": 2.292751422438147e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5524 + }, + { + "epoch": 0.5313266336490841, + "grad_norm": 3.2658048896573613, + "learning_rate": 2.2919912390530945e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5525 + }, + { + "epoch": 0.5314228013655816, + "grad_norm": 1.9883006008004414, + "learning_rate": 2.291231075034238e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5526 + }, + { + "epoch": 0.5315189690820792, + "grad_norm": 1.4242147928993683, + "learning_rate": 2.290470930452349e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5527 + }, + { + "epoch": 0.5316151367985767, + "grad_norm": 1.9240617152691109, + "learning_rate": 2.2897108053782e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5528 + }, + { + "epoch": 0.5317113045150743, + "grad_norm": 1.9805368925358062, + "learning_rate": 2.2889506998825626e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5529 + }, + { + "epoch": 0.5318074722315719, + "grad_norm": 2.342048973199261, + "learning_rate": 2.2881906140362013e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5530 + }, + { + "epoch": 0.5319036399480694, + "grad_norm": 2.5807292283714527, + "learning_rate": 2.2874305479098835e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5531 + }, + { + "epoch": 0.531999807664567, + "grad_norm": 1.7613323331232167, + "learning_rate": 2.286670501574374e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5532 + }, + { + "epoch": 0.5320959753810646, + "grad_norm": 1.5714676794333098, + "learning_rate": 2.2859104751004352e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5533 + }, + { + "epoch": 0.5321921430975621, + "grad_norm": 2.149658172489002, + "learning_rate": 2.2851504685588275e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5534 + }, + { + "epoch": 0.5322883108140597, + "grad_norm": 2.4754692975886403, + "learning_rate": 2.2843904820203107e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5535 + }, + { + "epoch": 0.5323844785305573, + "grad_norm": 1.5146397403770337, + "learning_rate": 2.283630515555639e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5536 + }, + { + "epoch": 0.5324806462470548, + "grad_norm": 1.4332129403624876, + "learning_rate": 2.2828705692355697e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5537 + }, + { + "epoch": 0.5325768139635524, + "grad_norm": 2.3472856811381404, + "learning_rate": 2.2821106431308546e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5538 + }, + { + "epoch": 0.5326729816800501, + "grad_norm": 1.9736314152855916, + "learning_rate": 2.281350737312246e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5539 + }, + { + "epoch": 0.5327691493965476, + "grad_norm": 1.9674009602340403, + "learning_rate": 2.280590851850493e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5540 + }, + { + "epoch": 0.5328653171130452, + "grad_norm": 1.7690345533742249, + "learning_rate": 2.279830986816343e-06, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5541 + }, + { + "epoch": 0.5329614848295428, + "grad_norm": 2.2831611838016177, + "learning_rate": 2.279071142280541e-06, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5542 + }, + { + "epoch": 0.5330576525460403, + "grad_norm": 1.8265633291502739, + "learning_rate": 2.2783113183138313e-06, + "loss": 0.1497, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5543 + }, + { + "epoch": 0.5331538202625379, + "grad_norm": 1.6195131440254509, + "learning_rate": 2.2775515149869544e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5544 + }, + { + "epoch": 0.5332499879790354, + "grad_norm": 1.225223589086288, + "learning_rate": 2.276791732370652e-06, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5545 + }, + { + "epoch": 0.533346155695533, + "grad_norm": 1.6194393373084104, + "learning_rate": 2.2760319705356616e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5546 + }, + { + "epoch": 0.5334423234120306, + "grad_norm": 2.0417497172895573, + "learning_rate": 2.2752722295527174e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5547 + }, + { + "epoch": 0.5335384911285281, + "grad_norm": 1.830915702328337, + "learning_rate": 2.274512509492555e-06, + "loss": 0.1562, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5548 + }, + { + "epoch": 0.5336346588450257, + "grad_norm": 1.6655469148153579, + "learning_rate": 2.273752810425906e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5549 + }, + { + "epoch": 0.5337308265615233, + "grad_norm": 1.7510309461421898, + "learning_rate": 2.2729931324235e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5550 + }, + { + "epoch": 0.5338269942780208, + "grad_norm": 1.9211706596043505, + "learning_rate": 2.272233475556066e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5551 + }, + { + "epoch": 0.5339231619945184, + "grad_norm": 1.7034545428929107, + "learning_rate": 2.271473839894331e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5552 + }, + { + "epoch": 0.5340193297110161, + "grad_norm": 1.4054342600367074, + "learning_rate": 2.2707142255090168e-06, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5553 + }, + { + "epoch": 0.5341154974275136, + "grad_norm": 1.8793572115480268, + "learning_rate": 2.269954632470847e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5554 + }, + { + "epoch": 0.5342116651440112, + "grad_norm": 2.078018953473931, + "learning_rate": 2.269195060850542e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5555 + }, + { + "epoch": 0.5343078328605088, + "grad_norm": 1.48901207932617, + "learning_rate": 2.26843551071882e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5556 + }, + { + "epoch": 0.5344040005770063, + "grad_norm": 1.6665541797065306, + "learning_rate": 2.2676759821463968e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5557 + }, + { + "epoch": 0.5345001682935039, + "grad_norm": 1.8809272155360188, + "learning_rate": 2.2669164752039863e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5558 + }, + { + "epoch": 0.5345963360100014, + "grad_norm": 2.1651811893430377, + "learning_rate": 2.2661569899623016e-06, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5559 + }, + { + "epoch": 0.534692503726499, + "grad_norm": 2.2858333963220576, + "learning_rate": 2.265397526492052e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5560 + }, + { + "epoch": 0.5347886714429966, + "grad_norm": 2.8872544550252046, + "learning_rate": 2.2646380848639465e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5561 + }, + { + "epoch": 0.5348848391594941, + "grad_norm": 1.8015955664283287, + "learning_rate": 2.2638786651486908e-06, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5562 + }, + { + "epoch": 0.5349810068759917, + "grad_norm": 2.501622626961992, + "learning_rate": 2.2631192674169898e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5563 + }, + { + "epoch": 0.5350771745924893, + "grad_norm": 1.8054613003768598, + "learning_rate": 2.262359891739544e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5564 + }, + { + "epoch": 0.5351733423089868, + "grad_norm": 2.94638452439972, + "learning_rate": 2.2616005381870542e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5565 + }, + { + "epoch": 0.5352695100254844, + "grad_norm": 2.6685000258672122, + "learning_rate": 2.2608412068302183e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5566 + }, + { + "epoch": 0.5353656777419821, + "grad_norm": 1.7351254241499428, + "learning_rate": 2.260081897739732e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5567 + }, + { + "epoch": 0.5354618454584796, + "grad_norm": 1.843473554845069, + "learning_rate": 2.2593226109862896e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5568 + }, + { + "epoch": 0.5355580131749772, + "grad_norm": 1.9293059054019994, + "learning_rate": 2.2585633466405836e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5569 + }, + { + "epoch": 0.5356541808914748, + "grad_norm": 2.1943732264737723, + "learning_rate": 2.257804104773301e-06, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5570 + }, + { + "epoch": 0.5357503486079723, + "grad_norm": 2.5394317893061533, + "learning_rate": 2.2570448854551314e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5571 + }, + { + "epoch": 0.5358465163244699, + "grad_norm": 1.8734171613504083, + "learning_rate": 2.256285688756759e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5572 + }, + { + "epoch": 0.5359426840409675, + "grad_norm": 1.5924670266948988, + "learning_rate": 2.255526514748869e-06, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5573 + }, + { + "epoch": 0.536038851757465, + "grad_norm": 1.7371250521919757, + "learning_rate": 2.2547673635021413e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5574 + }, + { + "epoch": 0.5361350194739626, + "grad_norm": 1.6952522839503796, + "learning_rate": 2.2540082350872544e-06, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5575 + }, + { + "epoch": 0.5362311871904601, + "grad_norm": 1.8185550901531429, + "learning_rate": 2.253249129574887e-06, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5576 + }, + { + "epoch": 0.5363273549069577, + "grad_norm": 2.40996739585308, + "learning_rate": 2.252490047035712e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5577 + }, + { + "epoch": 0.5364235226234553, + "grad_norm": 1.9455581683809662, + "learning_rate": 2.251730987540403e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5578 + }, + { + "epoch": 0.5365196903399528, + "grad_norm": 1.699345964501949, + "learning_rate": 2.2509719511596307e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5579 + }, + { + "epoch": 0.5366158580564504, + "grad_norm": 3.4680462295436216, + "learning_rate": 2.2502129379640644e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5580 + }, + { + "epoch": 0.5367120257729481, + "grad_norm": 1.8401582408277835, + "learning_rate": 2.2494539480243683e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5581 + }, + { + "epoch": 0.5368081934894456, + "grad_norm": 1.4206939243119536, + "learning_rate": 2.248694981411208e-06, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5582 + }, + { + "epoch": 0.5369043612059432, + "grad_norm": 1.7213679908852892, + "learning_rate": 2.2479360381952442e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5583 + }, + { + "epoch": 0.5370005289224408, + "grad_norm": 1.596511093101517, + "learning_rate": 2.2471771184471373e-06, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5584 + }, + { + "epoch": 0.5370966966389383, + "grad_norm": 1.3931669846448413, + "learning_rate": 2.2464182222375466e-06, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5585 + }, + { + "epoch": 0.5371928643554359, + "grad_norm": 1.9149106589463714, + "learning_rate": 2.2456593496371244e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5586 + }, + { + "epoch": 0.5372890320719335, + "grad_norm": 1.8872984299446214, + "learning_rate": 2.244900500716525e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5587 + }, + { + "epoch": 0.537385199788431, + "grad_norm": 2.6658454335392703, + "learning_rate": 2.2441416755463997e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5588 + }, + { + "epoch": 0.5374813675049286, + "grad_norm": 1.5904558572022962, + "learning_rate": 2.2433828741973975e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5589 + }, + { + "epoch": 0.5375775352214262, + "grad_norm": 2.6297684047633023, + "learning_rate": 2.242624096740164e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5590 + }, + { + "epoch": 0.5376737029379237, + "grad_norm": 2.025283201107527, + "learning_rate": 2.2418653432453446e-06, + "loss": 0.1541, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5591 + }, + { + "epoch": 0.5377698706544213, + "grad_norm": 2.8219709500199603, + "learning_rate": 2.2411066137835806e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5592 + }, + { + "epoch": 0.5378660383709188, + "grad_norm": 1.6783504367469289, + "learning_rate": 2.2403479084255118e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5593 + }, + { + "epoch": 0.5379622060874164, + "grad_norm": 1.937359105581157, + "learning_rate": 2.239589227241776e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5594 + }, + { + "epoch": 0.5380583738039141, + "grad_norm": 1.727817721622581, + "learning_rate": 2.238830570303008e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5595 + }, + { + "epoch": 0.5381545415204116, + "grad_norm": 1.4470627027498353, + "learning_rate": 2.2380719376798417e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5596 + }, + { + "epoch": 0.5382507092369092, + "grad_norm": 1.615536853469649, + "learning_rate": 2.237313329442908e-06, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5597 + }, + { + "epoch": 0.5383468769534068, + "grad_norm": 1.7243265778341736, + "learning_rate": 2.236554745662835e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5598 + }, + { + "epoch": 0.5384430446699043, + "grad_norm": 1.4355237010358588, + "learning_rate": 2.2357961864102483e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5599 + }, + { + "epoch": 0.5385392123864019, + "grad_norm": 2.6904881828062313, + "learning_rate": 2.235037651755773e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5600 + }, + { + "epoch": 0.5386353801028995, + "grad_norm": 1.615519331447449, + "learning_rate": 2.23427914177003e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5601 + }, + { + "epoch": 0.538731547819397, + "grad_norm": 1.4174335872355777, + "learning_rate": 2.2335206565236403e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5602 + }, + { + "epoch": 0.5388277155358946, + "grad_norm": 1.7080582836514917, + "learning_rate": 2.2327621960872187e-06, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5603 + }, + { + "epoch": 0.5389238832523922, + "grad_norm": 1.4630252808490185, + "learning_rate": 2.2320037605313807e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5604 + }, + { + "epoch": 0.5390200509688897, + "grad_norm": 1.2522427420629183, + "learning_rate": 2.231245349926739e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5605 + }, + { + "epoch": 0.5391162186853873, + "grad_norm": 1.9108422763640178, + "learning_rate": 2.230486964343905e-06, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5606 + }, + { + "epoch": 0.5392123864018848, + "grad_norm": 1.7420361755729699, + "learning_rate": 2.2297286038534845e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5607 + }, + { + "epoch": 0.5393085541183824, + "grad_norm": 1.4952982747648254, + "learning_rate": 2.228970268526084e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5608 + }, + { + "epoch": 0.5394047218348801, + "grad_norm": 2.1204635143414343, + "learning_rate": 2.2282119584323066e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5609 + }, + { + "epoch": 0.5395008895513776, + "grad_norm": 1.9248242742746557, + "learning_rate": 2.2274536736427526e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5610 + }, + { + "epoch": 0.5395970572678752, + "grad_norm": 1.4649005436811513, + "learning_rate": 2.2266954142280207e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5611 + }, + { + "epoch": 0.5396932249843728, + "grad_norm": 1.8739068397825627, + "learning_rate": 2.2259371802587067e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5612 + }, + { + "epoch": 0.5397893927008703, + "grad_norm": 1.3851958728276448, + "learning_rate": 2.225178971805406e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5613 + }, + { + "epoch": 0.5398855604173679, + "grad_norm": 1.8040146137623654, + "learning_rate": 2.224420788938707e-06, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5614 + }, + { + "epoch": 0.5399817281338655, + "grad_norm": 1.784675243819849, + "learning_rate": 2.2236626317292006e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5615 + }, + { + "epoch": 0.540077895850363, + "grad_norm": 1.9796873421779007, + "learning_rate": 2.2229045002474727e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5616 + }, + { + "epoch": 0.5401740635668606, + "grad_norm": 3.5589703090453377, + "learning_rate": 2.2221463945641077e-06, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5617 + }, + { + "epoch": 0.5402702312833582, + "grad_norm": 1.5022091898047756, + "learning_rate": 2.2213883147496875e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5618 + }, + { + "epoch": 0.5403663989998557, + "grad_norm": 1.4649822155727084, + "learning_rate": 2.220630260874792e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5619 + }, + { + "epoch": 0.5404625667163533, + "grad_norm": 1.7570255190649222, + "learning_rate": 2.2198722330099964e-06, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5620 + }, + { + "epoch": 0.5405587344328509, + "grad_norm": 1.7149586646972554, + "learning_rate": 2.2191142312258763e-06, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5621 + }, + { + "epoch": 0.5406549021493484, + "grad_norm": 3.71968544444927, + "learning_rate": 2.218356255593003e-06, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5622 + }, + { + "epoch": 0.5407510698658461, + "grad_norm": 2.9965679557157694, + "learning_rate": 2.2175983061819483e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5623 + }, + { + "epoch": 0.5408472375823437, + "grad_norm": 3.5694325508297493, + "learning_rate": 2.216840383063277e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5624 + }, + { + "epoch": 0.5409434052988412, + "grad_norm": 2.5822281209268816, + "learning_rate": 2.216082486307555e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5625 + }, + { + "epoch": 0.5410395730153388, + "grad_norm": 1.9287617335709148, + "learning_rate": 2.2153246159853448e-06, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5626 + }, + { + "epoch": 0.5411357407318363, + "grad_norm": 2.107243164913681, + "learning_rate": 2.2145667721672053e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5627 + }, + { + "epoch": 0.5412319084483339, + "grad_norm": 3.494212391952088, + "learning_rate": 2.2138089549236936e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5628 + }, + { + "epoch": 0.5413280761648315, + "grad_norm": 1.7268262330039679, + "learning_rate": 2.213051164325366e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5629 + }, + { + "epoch": 0.541424243881329, + "grad_norm": 1.599290126139059, + "learning_rate": 2.2122934004427752e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5630 + }, + { + "epoch": 0.5415204115978266, + "grad_norm": 2.149507674885668, + "learning_rate": 2.211535663346469e-06, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5631 + }, + { + "epoch": 0.5416165793143242, + "grad_norm": 3.3345190515708785, + "learning_rate": 2.2107779531069964e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5632 + }, + { + "epoch": 0.5417127470308217, + "grad_norm": 1.5725752202179497, + "learning_rate": 2.2100202697949016e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5633 + }, + { + "epoch": 0.5418089147473193, + "grad_norm": 1.5261600994927536, + "learning_rate": 2.2092626134807276e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5634 + }, + { + "epoch": 0.5419050824638169, + "grad_norm": 1.4979506394816724, + "learning_rate": 2.208504984235014e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5635 + }, + { + "epoch": 0.5420012501803144, + "grad_norm": 1.4634238216038475, + "learning_rate": 2.2077473821283e-06, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5636 + }, + { + "epoch": 0.5420974178968121, + "grad_norm": 3.2033714064055894, + "learning_rate": 2.2069898072311165e-06, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5637 + }, + { + "epoch": 0.5421935856133097, + "grad_norm": 1.5531423134303497, + "learning_rate": 2.206232259613999e-06, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5638 + }, + { + "epoch": 0.5422897533298072, + "grad_norm": 2.5910525157125353, + "learning_rate": 2.2054747393474765e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5639 + }, + { + "epoch": 0.5423859210463048, + "grad_norm": 1.395095759176386, + "learning_rate": 2.2047172465020757e-06, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5640 + }, + { + "epoch": 0.5424820887628024, + "grad_norm": 2.2287181482133183, + "learning_rate": 2.2039597811483223e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5641 + }, + { + "epoch": 0.5425782564792999, + "grad_norm": 1.8674308431539925, + "learning_rate": 2.2032023433567377e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5642 + }, + { + "epoch": 0.5426744241957975, + "grad_norm": 2.5948417274017905, + "learning_rate": 2.2024449331978416e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5643 + }, + { + "epoch": 0.542770591912295, + "grad_norm": 1.651931667674511, + "learning_rate": 2.2016875507421508e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5644 + }, + { + "epoch": 0.5428667596287926, + "grad_norm": 2.878154008364251, + "learning_rate": 2.20093019606018e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5645 + }, + { + "epoch": 0.5429629273452902, + "grad_norm": 2.833153059004314, + "learning_rate": 2.2001728692224405e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5646 + }, + { + "epoch": 0.5430590950617877, + "grad_norm": 1.364705391969794, + "learning_rate": 2.1994155702994435e-06, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5647 + }, + { + "epoch": 0.5431552627782853, + "grad_norm": 2.0268306548966986, + "learning_rate": 2.1986582993616926e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5648 + }, + { + "epoch": 0.5432514304947829, + "grad_norm": 1.7174334152053121, + "learning_rate": 2.1979010564796944e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5649 + }, + { + "epoch": 0.5433475982112804, + "grad_norm": 1.5717081538544582, + "learning_rate": 2.1971438417239487e-06, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5650 + }, + { + "epoch": 0.5434437659277781, + "grad_norm": 1.8661826279163116, + "learning_rate": 2.196386655164955e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5651 + }, + { + "epoch": 0.5435399336442757, + "grad_norm": 1.7549525842766944, + "learning_rate": 2.195629496873209e-06, + "loss": 0.0993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5652 + }, + { + "epoch": 0.5436361013607732, + "grad_norm": 1.7549791344155559, + "learning_rate": 2.1948723669192064e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5653 + }, + { + "epoch": 0.5437322690772708, + "grad_norm": 1.858653018037219, + "learning_rate": 2.1941152653734345e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5654 + }, + { + "epoch": 0.5438284367937684, + "grad_norm": 2.4276423687655044, + "learning_rate": 2.193358192306384e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5655 + }, + { + "epoch": 0.5439246045102659, + "grad_norm": 1.967048054496831, + "learning_rate": 2.1926011477885403e-06, + "loss": 0.1605, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5656 + }, + { + "epoch": 0.5440207722267635, + "grad_norm": 2.004893456510903, + "learning_rate": 2.1918441318903854e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5657 + }, + { + "epoch": 0.544116939943261, + "grad_norm": 3.1380612761772473, + "learning_rate": 2.1910871446824007e-06, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5658 + }, + { + "epoch": 0.5442131076597586, + "grad_norm": 1.4752396675567476, + "learning_rate": 2.190330186235063e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5659 + }, + { + "epoch": 0.5443092753762562, + "grad_norm": 2.7807381780298757, + "learning_rate": 2.1895732566188475e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5660 + }, + { + "epoch": 0.5444054430927537, + "grad_norm": 1.6231937960323535, + "learning_rate": 2.1888163559042266e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5661 + }, + { + "epoch": 0.5445016108092513, + "grad_norm": 2.5078551810667284, + "learning_rate": 2.1880594841616688e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5662 + }, + { + "epoch": 0.5445977785257489, + "grad_norm": 1.9711013020766395, + "learning_rate": 2.1873026414616426e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5663 + }, + { + "epoch": 0.5446939462422464, + "grad_norm": 2.5732184299579144, + "learning_rate": 2.186545827874613e-06, + "loss": 0.1683, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5664 + }, + { + "epoch": 0.5447901139587441, + "grad_norm": 1.570794477215902, + "learning_rate": 2.185789043471038e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5665 + }, + { + "epoch": 0.5448862816752417, + "grad_norm": 1.5335532243789376, + "learning_rate": 2.1850322883213783e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5666 + }, + { + "epoch": 0.5449824493917392, + "grad_norm": 1.7571081349556785, + "learning_rate": 2.1842755624960903e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5667 + }, + { + "epoch": 0.5450786171082368, + "grad_norm": 1.4725689405595332, + "learning_rate": 2.183518866065627e-06, + "loss": 0.0893, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5668 + }, + { + "epoch": 0.5451747848247344, + "grad_norm": 1.7767209293177386, + "learning_rate": 2.1827621991004393e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5669 + }, + { + "epoch": 0.5452709525412319, + "grad_norm": 1.6312337458034247, + "learning_rate": 2.1820055616709735e-06, + "loss": 0.1396, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5670 + }, + { + "epoch": 0.5453671202577295, + "grad_norm": 1.4001683327515668, + "learning_rate": 2.1812489538476763e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5671 + }, + { + "epoch": 0.545463287974227, + "grad_norm": 1.435300903154208, + "learning_rate": 2.1804923757009885e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5672 + }, + { + "epoch": 0.5455594556907246, + "grad_norm": 1.5975225473051322, + "learning_rate": 2.1797358273013516e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5673 + }, + { + "epoch": 0.5456556234072222, + "grad_norm": 2.0243666138465097, + "learning_rate": 2.1789793087192006e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5674 + }, + { + "epoch": 0.5457517911237197, + "grad_norm": 1.616718362305318, + "learning_rate": 2.1782228200249705e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5675 + }, + { + "epoch": 0.5458479588402173, + "grad_norm": 1.456001597887874, + "learning_rate": 2.177466361289092e-06, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5676 + }, + { + "epoch": 0.5459441265567149, + "grad_norm": 1.9729010888217775, + "learning_rate": 2.1767099325819937e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5677 + }, + { + "epoch": 0.5460402942732124, + "grad_norm": 2.2628812046419196, + "learning_rate": 2.1759535339741005e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5678 + }, + { + "epoch": 0.5461364619897101, + "grad_norm": 1.637150159294141, + "learning_rate": 2.175197165535836e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5679 + }, + { + "epoch": 0.5462326297062077, + "grad_norm": 1.7727476702117781, + "learning_rate": 2.1744408273376204e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5680 + }, + { + "epoch": 0.5463287974227052, + "grad_norm": 1.705868862226348, + "learning_rate": 2.173684519449872e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5681 + }, + { + "epoch": 0.5464249651392028, + "grad_norm": 1.7717135131373312, + "learning_rate": 2.172928241943002e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5682 + }, + { + "epoch": 0.5465211328557004, + "grad_norm": 2.416379465265941, + "learning_rate": 2.172171994887424e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5683 + }, + { + "epoch": 0.5466173005721979, + "grad_norm": 1.4430992774248277, + "learning_rate": 2.1714157783535464e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5684 + }, + { + "epoch": 0.5467134682886955, + "grad_norm": 1.8702523961713198, + "learning_rate": 2.1706595924117747e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5685 + }, + { + "epoch": 0.5468096360051931, + "grad_norm": 1.4927855946319306, + "learning_rate": 2.169903437132514e-06, + "loss": 0.083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5686 + }, + { + "epoch": 0.5469058037216906, + "grad_norm": 1.593541670394683, + "learning_rate": 2.1691473125861614e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5687 + }, + { + "epoch": 0.5470019714381882, + "grad_norm": 2.240015457907656, + "learning_rate": 2.1683912188431154e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5688 + }, + { + "epoch": 0.5470981391546857, + "grad_norm": 2.044002453693109, + "learning_rate": 2.1676351559737715e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5689 + }, + { + "epoch": 0.5471943068711833, + "grad_norm": 1.7860530952687232, + "learning_rate": 2.1668791240485198e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5690 + }, + { + "epoch": 0.5472904745876809, + "grad_norm": 1.6630645557634185, + "learning_rate": 2.166123123137749e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5691 + }, + { + "epoch": 0.5473866423041784, + "grad_norm": 1.4560109072974206, + "learning_rate": 2.165367153311847e-06, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5692 + }, + { + "epoch": 0.5474828100206761, + "grad_norm": 1.5118742523502993, + "learning_rate": 2.1646112146411936e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5693 + }, + { + "epoch": 0.5475789777371737, + "grad_norm": 3.1149689259455937, + "learning_rate": 2.1638553071961706e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5694 + }, + { + "epoch": 0.5476751454536712, + "grad_norm": 1.9364181281689028, + "learning_rate": 2.163099431047155e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5695 + }, + { + "epoch": 0.5477713131701688, + "grad_norm": 2.0883072964306004, + "learning_rate": 2.1623435862645207e-06, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5696 + }, + { + "epoch": 0.5478674808866664, + "grad_norm": 1.8458365745749015, + "learning_rate": 2.1615877729186397e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5697 + }, + { + "epoch": 0.5479636486031639, + "grad_norm": 2.120375517806959, + "learning_rate": 2.160831991079879e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5698 + }, + { + "epoch": 0.5480598163196615, + "grad_norm": 2.1931550409650917, + "learning_rate": 2.1600762408186045e-06, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5699 + }, + { + "epoch": 0.5481559840361591, + "grad_norm": 1.9973435577225196, + "learning_rate": 2.159320522205179e-06, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5700 + }, + { + "epoch": 0.5482521517526566, + "grad_norm": 2.851255422589299, + "learning_rate": 2.1585648353099615e-06, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5701 + }, + { + "epoch": 0.5483483194691542, + "grad_norm": 1.4002401482324336, + "learning_rate": 2.1578091802033093e-06, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5702 + }, + { + "epoch": 0.5484444871856518, + "grad_norm": 1.9499209708696341, + "learning_rate": 2.1570535569555757e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5703 + }, + { + "epoch": 0.5485406549021493, + "grad_norm": 1.936168687950606, + "learning_rate": 2.1562979656371112e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5704 + }, + { + "epoch": 0.5486368226186469, + "grad_norm": 4.5106581573657545, + "learning_rate": 2.1555424063182627e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5705 + }, + { + "epoch": 0.5487329903351444, + "grad_norm": 1.6126395865049492, + "learning_rate": 2.1547868790693767e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5706 + }, + { + "epoch": 0.5488291580516421, + "grad_norm": 3.6876497670060093, + "learning_rate": 2.154031383960793e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5707 + }, + { + "epoch": 0.5489253257681397, + "grad_norm": 2.65210133211378, + "learning_rate": 2.153275921062851e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5708 + }, + { + "epoch": 0.5490214934846372, + "grad_norm": 2.1963348989186047, + "learning_rate": 2.152520490445888e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5709 + }, + { + "epoch": 0.5491176612011348, + "grad_norm": 1.6182508126885278, + "learning_rate": 2.1517650921802337e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5710 + }, + { + "epoch": 0.5492138289176324, + "grad_norm": 1.5819866049933897, + "learning_rate": 2.1510097263362197e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5711 + }, + { + "epoch": 0.5493099966341299, + "grad_norm": 1.7061912205712844, + "learning_rate": 2.150254392984172e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5712 + }, + { + "epoch": 0.5494061643506275, + "grad_norm": 2.092921822727725, + "learning_rate": 2.149499092194415e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5713 + }, + { + "epoch": 0.5495023320671251, + "grad_norm": 1.8921845927994116, + "learning_rate": 2.148743824037269e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5714 + }, + { + "epoch": 0.5495984997836226, + "grad_norm": 1.7249394409794248, + "learning_rate": 2.147988588583051e-06, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5715 + }, + { + "epoch": 0.5496946675001202, + "grad_norm": 1.8268130817270003, + "learning_rate": 2.1472333859020755e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5716 + }, + { + "epoch": 0.5497908352166178, + "grad_norm": 1.874253803148908, + "learning_rate": 2.1464782160646543e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5717 + }, + { + "epoch": 0.5498870029331153, + "grad_norm": 1.4828182290893803, + "learning_rate": 2.1457230791410962e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5718 + }, + { + "epoch": 0.5499831706496129, + "grad_norm": 1.5315711986777456, + "learning_rate": 2.1449679752017063e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5719 + }, + { + "epoch": 0.5500793383661104, + "grad_norm": 1.270067239483041, + "learning_rate": 2.1442129043167877e-06, + "loss": 0.0935, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5720 + }, + { + "epoch": 0.5501755060826081, + "grad_norm": 1.6914922028156836, + "learning_rate": 2.1434578665566376e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5721 + }, + { + "epoch": 0.5502716737991057, + "grad_norm": 2.6612296124883117, + "learning_rate": 2.1427028619915536e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5722 + }, + { + "epoch": 0.5503678415156033, + "grad_norm": 1.9028025311660615, + "learning_rate": 2.141947890691828e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5723 + }, + { + "epoch": 0.5504640092321008, + "grad_norm": 1.5030186742303686, + "learning_rate": 2.141192952727752e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5724 + }, + { + "epoch": 0.5505601769485984, + "grad_norm": 1.7183025126418066, + "learning_rate": 2.140438048169611e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5725 + }, + { + "epoch": 0.5506563446650959, + "grad_norm": 1.5748226587217857, + "learning_rate": 2.1396831770876894e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5726 + }, + { + "epoch": 0.5507525123815935, + "grad_norm": 1.4688357498439766, + "learning_rate": 2.138928339552267e-06, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5727 + }, + { + "epoch": 0.5508486800980911, + "grad_norm": 1.6113699162222643, + "learning_rate": 2.1381735356336225e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5728 + }, + { + "epoch": 0.5509448478145886, + "grad_norm": 1.5244626366863525, + "learning_rate": 2.137418765402029e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5729 + }, + { + "epoch": 0.5510410155310862, + "grad_norm": 1.713195732631716, + "learning_rate": 2.1366640289277588e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5730 + }, + { + "epoch": 0.5511371832475838, + "grad_norm": 1.7176718263822675, + "learning_rate": 2.1359093262810806e-06, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5731 + }, + { + "epoch": 0.5512333509640813, + "grad_norm": 1.2338556954570414, + "learning_rate": 2.135154657532257e-06, + "loss": 0.0979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5732 + }, + { + "epoch": 0.5513295186805789, + "grad_norm": 1.651653165449402, + "learning_rate": 2.134400022751551e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5733 + }, + { + "epoch": 0.5514256863970765, + "grad_norm": 3.6858360136117923, + "learning_rate": 2.1336454220092217e-06, + "loss": 0.1566, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5734 + }, + { + "epoch": 0.5515218541135741, + "grad_norm": 1.4880017673917876, + "learning_rate": 2.1328908553755235e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5735 + }, + { + "epoch": 0.5516180218300717, + "grad_norm": 5.900657015267538, + "learning_rate": 2.1321363229207097e-06, + "loss": 0.1658, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5736 + }, + { + "epoch": 0.5517141895465693, + "grad_norm": 3.4331673105917857, + "learning_rate": 2.1313818247150294e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5737 + }, + { + "epoch": 0.5518103572630668, + "grad_norm": 1.8120878594590888, + "learning_rate": 2.1306273608287274e-06, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5738 + }, + { + "epoch": 0.5519065249795644, + "grad_norm": 2.130519999486254, + "learning_rate": 2.129872931332047e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5739 + }, + { + "epoch": 0.552002692696062, + "grad_norm": 1.845877020910345, + "learning_rate": 2.1291185362952274e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5740 + }, + { + "epoch": 0.5520988604125595, + "grad_norm": 2.27598222374991, + "learning_rate": 2.1283641757885055e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5741 + }, + { + "epoch": 0.5521950281290571, + "grad_norm": 1.5003920132345896, + "learning_rate": 2.1276098498821147e-06, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5742 + }, + { + "epoch": 0.5522911958455546, + "grad_norm": 1.9696514011374848, + "learning_rate": 2.1268555586462826e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5743 + }, + { + "epoch": 0.5523873635620522, + "grad_norm": 2.20365179150907, + "learning_rate": 2.126101302151238e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5744 + }, + { + "epoch": 0.5524835312785498, + "grad_norm": 1.8918149843846666, + "learning_rate": 2.1253470804672034e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5745 + }, + { + "epoch": 0.5525796989950473, + "grad_norm": 2.819441761654778, + "learning_rate": 2.124592893664399e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5746 + }, + { + "epoch": 0.5526758667115449, + "grad_norm": 1.6578576559956189, + "learning_rate": 2.1238387418130425e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5747 + }, + { + "epoch": 0.5527720344280425, + "grad_norm": 1.4933218276341733, + "learning_rate": 2.1230846249833474e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5748 + }, + { + "epoch": 0.5528682021445401, + "grad_norm": 1.4785845047107193, + "learning_rate": 2.1223305432455226e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5749 + }, + { + "epoch": 0.5529643698610377, + "grad_norm": 2.3674827628441206, + "learning_rate": 2.1215764966697765e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5750 + }, + { + "epoch": 0.5530605375775353, + "grad_norm": 1.6464573118602197, + "learning_rate": 2.120822485326312e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5751 + }, + { + "epoch": 0.5531567052940328, + "grad_norm": 2.0598379813625076, + "learning_rate": 2.1200685092853305e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5752 + }, + { + "epoch": 0.5532528730105304, + "grad_norm": 1.600846114980301, + "learning_rate": 2.1193145686170306e-06, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5753 + }, + { + "epoch": 0.553349040727028, + "grad_norm": 1.7653639437450936, + "learning_rate": 2.1185606633916033e-06, + "loss": 0.1437, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5754 + }, + { + "epoch": 0.5534452084435255, + "grad_norm": 1.7909586183309327, + "learning_rate": 2.1178067936792406e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5755 + }, + { + "epoch": 0.5535413761600231, + "grad_norm": 2.381674733570635, + "learning_rate": 2.1170529595501307e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5756 + }, + { + "epoch": 0.5536375438765206, + "grad_norm": 1.599935128912152, + "learning_rate": 2.1162991610744565e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5757 + }, + { + "epoch": 0.5537337115930182, + "grad_norm": 1.6839323028515547, + "learning_rate": 2.115545398322399e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5758 + }, + { + "epoch": 0.5538298793095158, + "grad_norm": 2.955376878737507, + "learning_rate": 2.1147916713641365e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5759 + }, + { + "epoch": 0.5539260470260133, + "grad_norm": 1.7838278127552993, + "learning_rate": 2.114037980269842e-06, + "loss": 0.1653, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5760 + }, + { + "epoch": 0.5540222147425109, + "grad_norm": 1.4994630164523386, + "learning_rate": 2.1132843251096865e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5761 + }, + { + "epoch": 0.5541183824590085, + "grad_norm": 1.4358235055512578, + "learning_rate": 2.112530705953837e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5762 + }, + { + "epoch": 0.5542145501755061, + "grad_norm": 1.3344842434544444, + "learning_rate": 2.111777122872458e-06, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5763 + }, + { + "epoch": 0.5543107178920037, + "grad_norm": 2.565779623889512, + "learning_rate": 2.111023575935711e-06, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5764 + }, + { + "epoch": 0.5544068856085013, + "grad_norm": 3.3897229487584686, + "learning_rate": 2.1102700652137527e-06, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5765 + }, + { + "epoch": 0.5545030533249988, + "grad_norm": 3.2016935557871897, + "learning_rate": 2.109516590776736e-06, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5766 + }, + { + "epoch": 0.5545992210414964, + "grad_norm": 1.4760459253906997, + "learning_rate": 2.108763152694813e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5767 + }, + { + "epoch": 0.554695388757994, + "grad_norm": 1.52601565837024, + "learning_rate": 2.10800975103813e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5768 + }, + { + "epoch": 0.5547915564744915, + "grad_norm": 1.885860869176047, + "learning_rate": 2.107256385876831e-06, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5769 + }, + { + "epoch": 0.5548877241909891, + "grad_norm": 1.4791605059105084, + "learning_rate": 2.106503057281057e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5770 + }, + { + "epoch": 0.5549838919074866, + "grad_norm": 2.0274008260134235, + "learning_rate": 2.1057497653209438e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5771 + }, + { + "epoch": 0.5550800596239842, + "grad_norm": 4.103264791747469, + "learning_rate": 2.1049965100666255e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5772 + }, + { + "epoch": 0.5551762273404818, + "grad_norm": 1.819302496366258, + "learning_rate": 2.1042432915882328e-06, + "loss": 0.1524, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5773 + }, + { + "epoch": 0.5552723950569793, + "grad_norm": 1.5415624587482868, + "learning_rate": 2.103490109955892e-06, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5774 + }, + { + "epoch": 0.5553685627734769, + "grad_norm": 2.780647731206259, + "learning_rate": 2.1027369652397255e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5775 + }, + { + "epoch": 0.5554647304899745, + "grad_norm": 2.2921865066803973, + "learning_rate": 2.1019838575098555e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5776 + }, + { + "epoch": 0.5555608982064721, + "grad_norm": 2.8179829680376476, + "learning_rate": 2.1012307868363963e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5777 + }, + { + "epoch": 0.5556570659229697, + "grad_norm": 3.1661345842671813, + "learning_rate": 2.1004777532894614e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5778 + }, + { + "epoch": 0.5557532336394673, + "grad_norm": 1.9946775051559968, + "learning_rate": 2.09972475693916e-06, + "loss": 0.1578, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5779 + }, + { + "epoch": 0.5558494013559648, + "grad_norm": 1.7698285461011107, + "learning_rate": 2.0989717978555992e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5780 + }, + { + "epoch": 0.5559455690724624, + "grad_norm": 1.9438768403687927, + "learning_rate": 2.0982188761088822e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5781 + }, + { + "epoch": 0.55604173678896, + "grad_norm": 1.9757451598017455, + "learning_rate": 2.0974659917691057e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5782 + }, + { + "epoch": 0.5561379045054575, + "grad_norm": 2.3120454534403976, + "learning_rate": 2.0967131449063668e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5783 + }, + { + "epoch": 0.5562340722219551, + "grad_norm": 3.123545075459751, + "learning_rate": 2.095960335590757e-06, + "loss": 0.1525, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5784 + }, + { + "epoch": 0.5563302399384527, + "grad_norm": 1.8916790431172852, + "learning_rate": 2.0952075638923656e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5785 + }, + { + "epoch": 0.5564264076549502, + "grad_norm": 1.5084372186560755, + "learning_rate": 2.094454829881278e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5786 + }, + { + "epoch": 0.5565225753714478, + "grad_norm": 1.4402744992758107, + "learning_rate": 2.093702133627575e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5787 + }, + { + "epoch": 0.5566187430879453, + "grad_norm": 2.9700612043030006, + "learning_rate": 2.0929494752013354e-06, + "loss": 0.1636, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5788 + }, + { + "epoch": 0.5567149108044429, + "grad_norm": 1.9842382969941812, + "learning_rate": 2.0921968546726334e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5789 + }, + { + "epoch": 0.5568110785209405, + "grad_norm": 2.2173900370807913, + "learning_rate": 2.0914442721115395e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5790 + }, + { + "epoch": 0.5569072462374381, + "grad_norm": 1.8320242138388123, + "learning_rate": 2.0906917275881224e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5791 + }, + { + "epoch": 0.5570034139539357, + "grad_norm": 1.7652829754189883, + "learning_rate": 2.089939221172446e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5792 + }, + { + "epoch": 0.5570995816704333, + "grad_norm": 2.5457610397174175, + "learning_rate": 2.08918675293457e-06, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5793 + }, + { + "epoch": 0.5571957493869308, + "grad_norm": 1.8768706260510355, + "learning_rate": 2.088434322944551e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5794 + }, + { + "epoch": 0.5572919171034284, + "grad_norm": 2.9186650476624734, + "learning_rate": 2.087681931272443e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5795 + }, + { + "epoch": 0.557388084819926, + "grad_norm": 1.7252475708546877, + "learning_rate": 2.086929577988296e-06, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5796 + }, + { + "epoch": 0.5574842525364235, + "grad_norm": 1.4381015586396666, + "learning_rate": 2.0861772631621554e-06, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5797 + }, + { + "epoch": 0.5575804202529211, + "grad_norm": 1.49402443222118, + "learning_rate": 2.0854249868640653e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5798 + }, + { + "epoch": 0.5576765879694187, + "grad_norm": 1.5085939679556488, + "learning_rate": 2.084672749164063e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5799 + }, + { + "epoch": 0.5577727556859162, + "grad_norm": 1.820508455670805, + "learning_rate": 2.0839205501321844e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5800 + }, + { + "epoch": 0.5578689234024138, + "grad_norm": 1.658753152549817, + "learning_rate": 2.083168389838461e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5801 + }, + { + "epoch": 0.5579650911189113, + "grad_norm": 1.8284018235104085, + "learning_rate": 2.0824162683529225e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5802 + }, + { + "epoch": 0.5580612588354089, + "grad_norm": 1.2932012045244756, + "learning_rate": 2.0816641857455926e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5803 + }, + { + "epoch": 0.5581574265519065, + "grad_norm": 2.2027362805866004, + "learning_rate": 2.0809121420864927e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5804 + }, + { + "epoch": 0.5582535942684042, + "grad_norm": 1.9437090543022923, + "learning_rate": 2.0801601374456386e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5805 + }, + { + "epoch": 0.5583497619849017, + "grad_norm": 1.8466868889882262, + "learning_rate": 2.079408171893046e-06, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5806 + }, + { + "epoch": 0.5584459297013993, + "grad_norm": 2.174904377017924, + "learning_rate": 2.0786562454987238e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5807 + }, + { + "epoch": 0.5585420974178968, + "grad_norm": 2.4711470196900023, + "learning_rate": 2.077904358332678e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5808 + }, + { + "epoch": 0.5586382651343944, + "grad_norm": 1.7722391112919793, + "learning_rate": 2.0771525104649136e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5809 + }, + { + "epoch": 0.558734432850892, + "grad_norm": 1.9079721079510636, + "learning_rate": 2.0764007019654274e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5810 + }, + { + "epoch": 0.5588306005673895, + "grad_norm": 2.114658604381323, + "learning_rate": 2.075648932904216e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5811 + }, + { + "epoch": 0.5589267682838871, + "grad_norm": 1.8336743142973508, + "learning_rate": 2.07489720335127e-06, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5812 + }, + { + "epoch": 0.5590229360003847, + "grad_norm": 1.889611570712052, + "learning_rate": 2.0741455133765794e-06, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5813 + }, + { + "epoch": 0.5591191037168822, + "grad_norm": 1.9156228455877489, + "learning_rate": 2.073393863050127e-06, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5814 + }, + { + "epoch": 0.5592152714333798, + "grad_norm": 1.9592231828316857, + "learning_rate": 2.0726422524418956e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5815 + }, + { + "epoch": 0.5593114391498774, + "grad_norm": 1.451945223010719, + "learning_rate": 2.07189068162186e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5816 + }, + { + "epoch": 0.5594076068663749, + "grad_norm": 2.065015655489417, + "learning_rate": 2.071139150659994e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5817 + }, + { + "epoch": 0.5595037745828725, + "grad_norm": 1.9851923332285955, + "learning_rate": 2.0703876596262677e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5818 + }, + { + "epoch": 0.5595999422993702, + "grad_norm": 3.0707523379286346, + "learning_rate": 2.069636208590648e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5819 + }, + { + "epoch": 0.5596961100158677, + "grad_norm": 2.2127442274590083, + "learning_rate": 2.0688847976230952e-06, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5820 + }, + { + "epoch": 0.5597922777323653, + "grad_norm": 1.4078085361913077, + "learning_rate": 2.0681334267935687e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5821 + }, + { + "epoch": 0.5598884454488628, + "grad_norm": 2.330047165816258, + "learning_rate": 2.067382096172023e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5822 + }, + { + "epoch": 0.5599846131653604, + "grad_norm": 1.7282346748194573, + "learning_rate": 2.0666308058284093e-06, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5823 + }, + { + "epoch": 0.560080780881858, + "grad_norm": 2.239999343479268, + "learning_rate": 2.0658795558326745e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5824 + }, + { + "epoch": 0.5601769485983555, + "grad_norm": 2.7677964679811273, + "learning_rate": 2.065128346254762e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5825 + }, + { + "epoch": 0.5602731163148531, + "grad_norm": 2.4833278580364166, + "learning_rate": 2.064377177164613e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5826 + }, + { + "epoch": 0.5603692840313507, + "grad_norm": 1.6886459633153452, + "learning_rate": 2.063626048632161e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5827 + }, + { + "epoch": 0.5604654517478482, + "grad_norm": 2.4890766092472907, + "learning_rate": 2.0628749607273396e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5828 + }, + { + "epoch": 0.5605616194643458, + "grad_norm": 1.9172309165760875, + "learning_rate": 2.062123913520077e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5829 + }, + { + "epoch": 0.5606577871808434, + "grad_norm": 1.56063246114378, + "learning_rate": 2.0613729070802974e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5830 + }, + { + "epoch": 0.5607539548973409, + "grad_norm": 1.961906591626432, + "learning_rate": 2.060621941477922e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5831 + }, + { + "epoch": 0.5608501226138385, + "grad_norm": 2.0869516230573897, + "learning_rate": 2.0598710167828688e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5832 + }, + { + "epoch": 0.5609462903303362, + "grad_norm": 1.8115468529974676, + "learning_rate": 2.0591201330650487e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5833 + }, + { + "epoch": 0.5610424580468337, + "grad_norm": 2.078228868079396, + "learning_rate": 2.058369290394372e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5834 + }, + { + "epoch": 0.5611386257633313, + "grad_norm": 2.199508295879761, + "learning_rate": 2.057618488840745e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5835 + }, + { + "epoch": 0.5612347934798289, + "grad_norm": 1.9232718051240434, + "learning_rate": 2.0568677284740695e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5836 + }, + { + "epoch": 0.5613309611963264, + "grad_norm": 2.7335098220732985, + "learning_rate": 2.0561170093642422e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5837 + }, + { + "epoch": 0.561427128912824, + "grad_norm": 1.860492487092207, + "learning_rate": 2.055366331581158e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5838 + }, + { + "epoch": 0.5615232966293215, + "grad_norm": 1.6645445843577948, + "learning_rate": 2.0546156951947067e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5839 + }, + { + "epoch": 0.5616194643458191, + "grad_norm": 1.7654079019228732, + "learning_rate": 2.0538651002747745e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5840 + }, + { + "epoch": 0.5617156320623167, + "grad_norm": 1.678961068163869, + "learning_rate": 2.0531145468912437e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5841 + }, + { + "epoch": 0.5618117997788142, + "grad_norm": 2.008352488345648, + "learning_rate": 2.052364035113994e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5842 + }, + { + "epoch": 0.5619079674953118, + "grad_norm": 1.5362962688610498, + "learning_rate": 2.0516135650129003e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5843 + }, + { + "epoch": 0.5620041352118094, + "grad_norm": 1.5560311049550721, + "learning_rate": 2.0508631366578314e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5844 + }, + { + "epoch": 0.5621003029283069, + "grad_norm": 2.211203014343386, + "learning_rate": 2.0501127501186553e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5845 + }, + { + "epoch": 0.5621964706448045, + "grad_norm": 1.6880977769897558, + "learning_rate": 2.049362405465236e-06, + "loss": 0.1651, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5846 + }, + { + "epoch": 0.5622926383613022, + "grad_norm": 1.526647442684656, + "learning_rate": 2.048612102767431e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5847 + }, + { + "epoch": 0.5623888060777997, + "grad_norm": 1.8770499136184466, + "learning_rate": 2.0478618420950987e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5848 + }, + { + "epoch": 0.5624849737942973, + "grad_norm": 1.3123881782923286, + "learning_rate": 2.0471116235180866e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5849 + }, + { + "epoch": 0.5625811415107949, + "grad_norm": 1.5663741220186582, + "learning_rate": 2.046361447106244e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5850 + }, + { + "epoch": 0.5626773092272924, + "grad_norm": 1.4764779926670653, + "learning_rate": 2.045611312929414e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5851 + }, + { + "epoch": 0.56277347694379, + "grad_norm": 1.5303550155082648, + "learning_rate": 2.0448612210574364e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5852 + }, + { + "epoch": 0.5628696446602875, + "grad_norm": 1.458594331482552, + "learning_rate": 2.0441111715601476e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5853 + }, + { + "epoch": 0.5629658123767851, + "grad_norm": 1.4556499308174506, + "learning_rate": 2.0433611645073786e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5854 + }, + { + "epoch": 0.5630619800932827, + "grad_norm": 1.5189784556539307, + "learning_rate": 2.0426111999689568e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5855 + }, + { + "epoch": 0.5631581478097802, + "grad_norm": 1.6169587492108641, + "learning_rate": 2.0418612780147064e-06, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5856 + }, + { + "epoch": 0.5632543155262778, + "grad_norm": 2.192754618270214, + "learning_rate": 2.0411113987144468e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5857 + }, + { + "epoch": 0.5633504832427754, + "grad_norm": 2.569910799481601, + "learning_rate": 2.040361562137994e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5858 + }, + { + "epoch": 0.5634466509592729, + "grad_norm": 1.6397279225036927, + "learning_rate": 2.0396117683551603e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5859 + }, + { + "epoch": 0.5635428186757705, + "grad_norm": 1.4894380587998803, + "learning_rate": 2.0388620174357542e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5860 + }, + { + "epoch": 0.5636389863922682, + "grad_norm": 2.645510860551646, + "learning_rate": 2.038112309449578e-06, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5861 + }, + { + "epoch": 0.5637351541087657, + "grad_norm": 1.7547591260421367, + "learning_rate": 2.037362644466432e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5862 + }, + { + "epoch": 0.5638313218252633, + "grad_norm": 2.755173109707093, + "learning_rate": 2.0366130225561124e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5863 + }, + { + "epoch": 0.5639274895417609, + "grad_norm": 1.862600458049239, + "learning_rate": 2.0358634437884114e-06, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5864 + }, + { + "epoch": 0.5640236572582584, + "grad_norm": 1.7647300809226447, + "learning_rate": 2.0351139082331177e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5865 + }, + { + "epoch": 0.564119824974756, + "grad_norm": 1.7331137016636415, + "learning_rate": 2.0343644159600125e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5866 + }, + { + "epoch": 0.5642159926912536, + "grad_norm": 2.193090951081002, + "learning_rate": 2.0336149670388776e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5867 + }, + { + "epoch": 0.5643121604077511, + "grad_norm": 2.9885193820686666, + "learning_rate": 2.0328655615394884e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5868 + }, + { + "epoch": 0.5644083281242487, + "grad_norm": 2.1493653300862303, + "learning_rate": 2.0321161995316165e-06, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5869 + }, + { + "epoch": 0.5645044958407462, + "grad_norm": 2.5995746941142928, + "learning_rate": 2.0313668810850296e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5870 + }, + { + "epoch": 0.5646006635572438, + "grad_norm": 1.8229080168149665, + "learning_rate": 2.0306176062694922e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5871 + }, + { + "epoch": 0.5646968312737414, + "grad_norm": 1.8107073855876141, + "learning_rate": 2.0298683751547622e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5872 + }, + { + "epoch": 0.5647929989902389, + "grad_norm": 1.8970068413164494, + "learning_rate": 2.0291191878105966e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5873 + }, + { + "epoch": 0.5648891667067365, + "grad_norm": 1.8845216921348382, + "learning_rate": 2.0283700443067455e-06, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5874 + }, + { + "epoch": 0.5649853344232342, + "grad_norm": 2.0432188277686008, + "learning_rate": 2.027620944712957e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5875 + }, + { + "epoch": 0.5650815021397317, + "grad_norm": 2.099807588833167, + "learning_rate": 2.0268718890989754e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5876 + }, + { + "epoch": 0.5651776698562293, + "grad_norm": 2.1468954844181507, + "learning_rate": 2.0261228775345378e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5877 + }, + { + "epoch": 0.5652738375727269, + "grad_norm": 2.404787570397084, + "learning_rate": 2.0253739100893803e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5878 + }, + { + "epoch": 0.5653700052892244, + "grad_norm": 1.978744464866825, + "learning_rate": 2.024624986833234e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5879 + }, + { + "epoch": 0.565466173005722, + "grad_norm": 1.7222143523541757, + "learning_rate": 2.023876107835825e-06, + "loss": 0.1022, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5880 + }, + { + "epoch": 0.5655623407222196, + "grad_norm": 1.534627630452587, + "learning_rate": 2.023127273166877e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5881 + }, + { + "epoch": 0.5656585084387171, + "grad_norm": 1.8390497759829905, + "learning_rate": 2.022378482896109e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5882 + }, + { + "epoch": 0.5657546761552147, + "grad_norm": 1.9397909258227792, + "learning_rate": 2.021629737093234e-06, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5883 + }, + { + "epoch": 0.5658508438717123, + "grad_norm": 2.622242726998541, + "learning_rate": 2.020881035827962e-06, + "loss": 0.162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5884 + }, + { + "epoch": 0.5659470115882098, + "grad_norm": 1.6330358407488486, + "learning_rate": 2.0201323791700005e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5885 + }, + { + "epoch": 0.5660431793047074, + "grad_norm": 1.5906137598681862, + "learning_rate": 2.019383767189052e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5886 + }, + { + "epoch": 0.5661393470212049, + "grad_norm": 2.213964383956641, + "learning_rate": 2.0186351999548127e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5887 + }, + { + "epoch": 0.5662355147377025, + "grad_norm": 1.7062266547608091, + "learning_rate": 2.017886677536978e-06, + "loss": 0.0891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5888 + }, + { + "epoch": 0.5663316824542002, + "grad_norm": 1.8219096419592342, + "learning_rate": 2.0171382000052357e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5889 + }, + { + "epoch": 0.5664278501706977, + "grad_norm": 1.5550394732504604, + "learning_rate": 2.016389767429272e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5890 + }, + { + "epoch": 0.5665240178871953, + "grad_norm": 1.57546603333915, + "learning_rate": 2.015641379878768e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5891 + }, + { + "epoch": 0.5666201856036929, + "grad_norm": 1.802878894823701, + "learning_rate": 2.0148930374234004e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5892 + }, + { + "epoch": 0.5667163533201904, + "grad_norm": 2.0439009960334853, + "learning_rate": 2.0141447401328436e-06, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5893 + }, + { + "epoch": 0.566812521036688, + "grad_norm": 2.589880291073085, + "learning_rate": 2.0133964880767637e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5894 + }, + { + "epoch": 0.5669086887531856, + "grad_norm": 1.747238869306854, + "learning_rate": 2.012648281324826e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5895 + }, + { + "epoch": 0.5670048564696831, + "grad_norm": 1.8821012309850353, + "learning_rate": 2.011900119946691e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5896 + }, + { + "epoch": 0.5671010241861807, + "grad_norm": 2.24589075530779, + "learning_rate": 2.011152004012014e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5897 + }, + { + "epoch": 0.5671971919026783, + "grad_norm": 1.9327375937089042, + "learning_rate": 2.0104039335904476e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5898 + }, + { + "epoch": 0.5672933596191758, + "grad_norm": 2.1124372530032627, + "learning_rate": 2.0096559087516394e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5899 + }, + { + "epoch": 0.5673895273356734, + "grad_norm": 2.7361800988134086, + "learning_rate": 2.008907929565231e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5900 + }, + { + "epoch": 0.567485695052171, + "grad_norm": 1.4585149861778424, + "learning_rate": 2.008159996100862e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5901 + }, + { + "epoch": 0.5675818627686685, + "grad_norm": 2.481052016306471, + "learning_rate": 2.007412108428168e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5902 + }, + { + "epoch": 0.5676780304851662, + "grad_norm": 1.704545065599077, + "learning_rate": 2.0066642666167784e-06, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5903 + }, + { + "epoch": 0.5677741982016637, + "grad_norm": 2.0856338782386663, + "learning_rate": 2.0059164707363206e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5904 + }, + { + "epoch": 0.5678703659181613, + "grad_norm": 2.4732012883453356, + "learning_rate": 2.0051687208564146e-06, + "loss": 0.1719, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5905 + }, + { + "epoch": 0.5679665336346589, + "grad_norm": 1.7896876152747923, + "learning_rate": 2.0044210170466793e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5906 + }, + { + "epoch": 0.5680627013511564, + "grad_norm": 2.1534825427009983, + "learning_rate": 2.0036733593767272e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5907 + }, + { + "epoch": 0.568158869067654, + "grad_norm": 1.306660940066163, + "learning_rate": 2.002925747916168e-06, + "loss": 0.0975, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5908 + }, + { + "epoch": 0.5682550367841516, + "grad_norm": 2.4948663833472393, + "learning_rate": 2.0021781827346066e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5909 + }, + { + "epoch": 0.5683512045006491, + "grad_norm": 2.664767441589237, + "learning_rate": 2.0014306639016437e-06, + "loss": 0.1567, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5910 + }, + { + "epoch": 0.5684473722171467, + "grad_norm": 1.4678643854853963, + "learning_rate": 2.0006831914868736e-06, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5911 + }, + { + "epoch": 0.5685435399336443, + "grad_norm": 2.191886329891479, + "learning_rate": 1.9999357655598894e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5912 + }, + { + "epoch": 0.5686397076501418, + "grad_norm": 2.200605216510936, + "learning_rate": 1.999188386190278e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5913 + }, + { + "epoch": 0.5687358753666394, + "grad_norm": 1.791868026712539, + "learning_rate": 1.998441053447623e-06, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5914 + }, + { + "epoch": 0.568832043083137, + "grad_norm": 1.5514450372681787, + "learning_rate": 1.997693767401503e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5915 + }, + { + "epoch": 0.5689282107996345, + "grad_norm": 2.355902353848791, + "learning_rate": 1.9969465281214934e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5916 + }, + { + "epoch": 0.5690243785161322, + "grad_norm": 1.5779422615172725, + "learning_rate": 1.996199335677162e-06, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5917 + }, + { + "epoch": 0.5691205462326298, + "grad_norm": 1.992699356940318, + "learning_rate": 1.9954521901380763e-06, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5918 + }, + { + "epoch": 0.5692167139491273, + "grad_norm": 1.9785150153152509, + "learning_rate": 1.994705091573797e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5919 + }, + { + "epoch": 0.5693128816656249, + "grad_norm": 3.34666534398094, + "learning_rate": 1.993958040053881e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5920 + }, + { + "epoch": 0.5694090493821224, + "grad_norm": 1.4435809484843398, + "learning_rate": 1.9932110356478817e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5921 + }, + { + "epoch": 0.56950521709862, + "grad_norm": 2.0944676918385965, + "learning_rate": 1.9924640784253455e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5922 + }, + { + "epoch": 0.5696013848151176, + "grad_norm": 1.6651695428071975, + "learning_rate": 1.991717168455818e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5923 + }, + { + "epoch": 0.5696975525316151, + "grad_norm": 1.455683512073344, + "learning_rate": 1.9909703058088375e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5924 + }, + { + "epoch": 0.5697937202481127, + "grad_norm": 1.5201088665254652, + "learning_rate": 1.990223490553939e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5925 + }, + { + "epoch": 0.5698898879646103, + "grad_norm": 1.4873769655644649, + "learning_rate": 1.989476722760654e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5926 + }, + { + "epoch": 0.5699860556811078, + "grad_norm": 2.551183667044335, + "learning_rate": 1.988730002498509e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5927 + }, + { + "epoch": 0.5700822233976054, + "grad_norm": 1.6016390599892867, + "learning_rate": 1.987983329837024e-06, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5928 + }, + { + "epoch": 0.570178391114103, + "grad_norm": 1.7108860381166069, + "learning_rate": 1.987236704845717e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5929 + }, + { + "epoch": 0.5702745588306005, + "grad_norm": 3.092314675877493, + "learning_rate": 1.9864901275941007e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5930 + }, + { + "epoch": 0.5703707265470982, + "grad_norm": 1.5372328206906178, + "learning_rate": 1.9857435981516844e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5931 + }, + { + "epoch": 0.5704668942635958, + "grad_norm": 1.2336788943749968, + "learning_rate": 1.9849971165879723e-06, + "loss": 0.0811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5932 + }, + { + "epoch": 0.5705630619800933, + "grad_norm": 2.298928304196955, + "learning_rate": 1.984250682972462e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5933 + }, + { + "epoch": 0.5706592296965909, + "grad_norm": 1.7176865328356175, + "learning_rate": 1.98350429737465e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5934 + }, + { + "epoch": 0.5707553974130884, + "grad_norm": 2.042741606289572, + "learning_rate": 1.982757959864026e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5935 + }, + { + "epoch": 0.570851565129586, + "grad_norm": 1.7228932311720286, + "learning_rate": 1.9820116705100778e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5936 + }, + { + "epoch": 0.5709477328460836, + "grad_norm": 2.385020773847988, + "learning_rate": 1.981265429382285e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5937 + }, + { + "epoch": 0.5710439005625811, + "grad_norm": 1.7661861847285878, + "learning_rate": 1.9805192365501265e-06, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5938 + }, + { + "epoch": 0.5711400682790787, + "grad_norm": 2.073208428276442, + "learning_rate": 1.979773092083074e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5939 + }, + { + "epoch": 0.5712362359955763, + "grad_norm": 1.561285097424611, + "learning_rate": 1.9790269960505947e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5940 + }, + { + "epoch": 0.5713324037120738, + "grad_norm": 1.7196920079151072, + "learning_rate": 1.978280948522153e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5941 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 2.1156731246233647, + "learning_rate": 1.977534949567209e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5942 + }, + { + "epoch": 0.571524739145069, + "grad_norm": 2.106503833605767, + "learning_rate": 1.976788999255216e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5943 + }, + { + "epoch": 0.5716209068615665, + "grad_norm": 2.2893493045358198, + "learning_rate": 1.9760430976556257e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5944 + }, + { + "epoch": 0.5717170745780641, + "grad_norm": 1.8056889902693425, + "learning_rate": 1.9752972448378817e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5945 + }, + { + "epoch": 0.5718132422945618, + "grad_norm": 1.7034836887058735, + "learning_rate": 1.974551440871425e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5946 + }, + { + "epoch": 0.5719094100110593, + "grad_norm": 2.448942497020956, + "learning_rate": 1.9738056858256935e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5947 + }, + { + "epoch": 0.5720055777275569, + "grad_norm": 2.0473137122415377, + "learning_rate": 1.9730599797701177e-06, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5948 + }, + { + "epoch": 0.5721017454440545, + "grad_norm": 2.1661434368690133, + "learning_rate": 1.972314322774127e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5949 + }, + { + "epoch": 0.572197913160552, + "grad_norm": 3.0769495230286648, + "learning_rate": 1.9715687149071415e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5950 + }, + { + "epoch": 0.5722940808770496, + "grad_norm": 2.003206257815481, + "learning_rate": 1.9708231562385804e-06, + "loss": 0.1595, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5951 + }, + { + "epoch": 0.5723902485935471, + "grad_norm": 1.6602864556181303, + "learning_rate": 1.970077646837858e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5952 + }, + { + "epoch": 0.5724864163100447, + "grad_norm": 1.4146431296242699, + "learning_rate": 1.9693321867743825e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5953 + }, + { + "epoch": 0.5725825840265423, + "grad_norm": 1.6966250481188274, + "learning_rate": 1.9685867761175584e-06, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5954 + }, + { + "epoch": 0.5726787517430398, + "grad_norm": 1.9774026916423997, + "learning_rate": 1.967841414936786e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5955 + }, + { + "epoch": 0.5727749194595374, + "grad_norm": 1.370751173656593, + "learning_rate": 1.9670961033014604e-06, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5956 + }, + { + "epoch": 0.572871087176035, + "grad_norm": 2.739378273159036, + "learning_rate": 1.9663508412809712e-06, + "loss": 0.1508, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5957 + }, + { + "epoch": 0.5729672548925325, + "grad_norm": 1.8916716011524697, + "learning_rate": 1.965605628944705e-06, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5958 + }, + { + "epoch": 0.5730634226090301, + "grad_norm": 1.6665568905978896, + "learning_rate": 1.9648604663620435e-06, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5959 + }, + { + "epoch": 0.5731595903255278, + "grad_norm": 1.3619739250568201, + "learning_rate": 1.9641153536023646e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5960 + }, + { + "epoch": 0.5732557580420253, + "grad_norm": 1.8741446342732322, + "learning_rate": 1.963370290735037e-06, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5961 + }, + { + "epoch": 0.5733519257585229, + "grad_norm": 1.769545738925739, + "learning_rate": 1.9626252778294305e-06, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5962 + }, + { + "epoch": 0.5734480934750205, + "grad_norm": 1.9179688083008255, + "learning_rate": 1.9618803149549074e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5963 + }, + { + "epoch": 0.573544261191518, + "grad_norm": 2.3571637721617598, + "learning_rate": 1.961135402180826e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5964 + }, + { + "epoch": 0.5736404289080156, + "grad_norm": 1.87364750272239, + "learning_rate": 1.9603905395765397e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5965 + }, + { + "epoch": 0.5737365966245132, + "grad_norm": 2.590848756142062, + "learning_rate": 1.959645727211398e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5966 + }, + { + "epoch": 0.5738327643410107, + "grad_norm": 1.7273119102332324, + "learning_rate": 1.958900965154743e-06, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5967 + }, + { + "epoch": 0.5739289320575083, + "grad_norm": 1.8743816070803658, + "learning_rate": 1.958156253475916e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5968 + }, + { + "epoch": 0.5740250997740058, + "grad_norm": 1.8157965687706896, + "learning_rate": 1.957411592244251e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5969 + }, + { + "epoch": 0.5741212674905034, + "grad_norm": 1.6147642976039323, + "learning_rate": 1.9566669815290778e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5970 + }, + { + "epoch": 0.574217435207001, + "grad_norm": 1.4816581483832814, + "learning_rate": 1.9559224213997217e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5971 + }, + { + "epoch": 0.5743136029234985, + "grad_norm": 1.596596936787941, + "learning_rate": 1.955177911925504e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5972 + }, + { + "epoch": 0.5744097706399961, + "grad_norm": 2.19222851024057, + "learning_rate": 1.954433453175741e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5973 + }, + { + "epoch": 0.5745059383564938, + "grad_norm": 2.1799558452475125, + "learning_rate": 1.9536890452197424e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5974 + }, + { + "epoch": 0.5746021060729913, + "grad_norm": 3.417424530739289, + "learning_rate": 1.952944688126815e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5975 + }, + { + "epoch": 0.5746982737894889, + "grad_norm": 1.6790333932008452, + "learning_rate": 1.9522003819662614e-06, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5976 + }, + { + "epoch": 0.5747944415059865, + "grad_norm": 1.9936308080926501, + "learning_rate": 1.951456126807379e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5977 + }, + { + "epoch": 0.574890609222484, + "grad_norm": 3.2702242108591366, + "learning_rate": 1.950711922719458e-06, + "loss": 0.1568, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5978 + }, + { + "epoch": 0.5749867769389816, + "grad_norm": 1.5334100807407232, + "learning_rate": 1.949967769771787e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5979 + }, + { + "epoch": 0.5750829446554792, + "grad_norm": 2.2511988282420923, + "learning_rate": 1.9492236680336486e-06, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5980 + }, + { + "epoch": 0.5751791123719767, + "grad_norm": 2.5338656506255837, + "learning_rate": 1.948479617574321e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5981 + }, + { + "epoch": 0.5752752800884743, + "grad_norm": 1.3487908292420185, + "learning_rate": 1.947735618463077e-06, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5982 + }, + { + "epoch": 0.5753714478049718, + "grad_norm": 1.5695504190180902, + "learning_rate": 1.9469916707691866e-06, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5983 + }, + { + "epoch": 0.5754676155214694, + "grad_norm": 1.4958751621660071, + "learning_rate": 1.9462477745619106e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5984 + }, + { + "epoch": 0.575563783237967, + "grad_norm": 4.1398818801560875, + "learning_rate": 1.9455039299105096e-06, + "loss": 0.1618, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5985 + }, + { + "epoch": 0.5756599509544645, + "grad_norm": 1.4922197579144196, + "learning_rate": 1.9447601368842374e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5986 + }, + { + "epoch": 0.5757561186709621, + "grad_norm": 2.584770976127221, + "learning_rate": 1.9440163955523424e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5987 + }, + { + "epoch": 0.5758522863874598, + "grad_norm": 2.3955355007548254, + "learning_rate": 1.9432727059840707e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5988 + }, + { + "epoch": 0.5759484541039573, + "grad_norm": 1.712568226900916, + "learning_rate": 1.94252906824866e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5989 + }, + { + "epoch": 0.5760446218204549, + "grad_norm": 4.329743365026642, + "learning_rate": 1.941785482415346e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5990 + }, + { + "epoch": 0.5761407895369525, + "grad_norm": 1.4006881643108815, + "learning_rate": 1.9410419485533584e-06, + "loss": 0.0983, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5991 + }, + { + "epoch": 0.57623695725345, + "grad_norm": 3.3102048523591003, + "learning_rate": 1.940298466731922e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5992 + }, + { + "epoch": 0.5763331249699476, + "grad_norm": 2.387300139368271, + "learning_rate": 1.9395550370202575e-06, + "loss": 0.1598, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5993 + }, + { + "epoch": 0.5764292926864452, + "grad_norm": 3.9182590050671924, + "learning_rate": 1.938811659487581e-06, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5994 + }, + { + "epoch": 0.5765254604029427, + "grad_norm": 1.356770024556609, + "learning_rate": 1.9380683342031014e-06, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5995 + }, + { + "epoch": 0.5766216281194403, + "grad_norm": 2.4249829151924662, + "learning_rate": 1.9373250612360247e-06, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5996 + }, + { + "epoch": 0.5767177958359379, + "grad_norm": 1.5269656774372893, + "learning_rate": 1.9365818406555523e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5997 + }, + { + "epoch": 0.5768139635524354, + "grad_norm": 2.285342502628105, + "learning_rate": 1.93583867253088e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5998 + }, + { + "epoch": 0.576910131268933, + "grad_norm": 2.5796096363730365, + "learning_rate": 1.9350955569311987e-06, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 5999 + }, + { + "epoch": 0.5770062989854305, + "grad_norm": 2.171925641049162, + "learning_rate": 1.934352493925695e-06, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6000 + }, + { + "epoch": 0.5771024667019281, + "grad_norm": 1.933776600223355, + "learning_rate": 1.9336094835835496e-06, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6001 + }, + { + "epoch": 0.5771986344184258, + "grad_norm": 1.8668093776682066, + "learning_rate": 1.932866525973938e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6002 + }, + { + "epoch": 0.5772948021349233, + "grad_norm": 2.042430829199948, + "learning_rate": 1.932123621166034e-06, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6003 + }, + { + "epoch": 0.5773909698514209, + "grad_norm": 1.5323012718878382, + "learning_rate": 1.931380769229001e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6004 + }, + { + "epoch": 0.5774871375679185, + "grad_norm": 2.052972625861713, + "learning_rate": 1.9306379702320037e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6005 + }, + { + "epoch": 0.577583305284416, + "grad_norm": 1.6756513142472633, + "learning_rate": 1.929895224244197e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6006 + }, + { + "epoch": 0.5776794730009136, + "grad_norm": 2.3643379514957954, + "learning_rate": 1.929152531334732e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6007 + }, + { + "epoch": 0.5777756407174112, + "grad_norm": 1.422860848740781, + "learning_rate": 1.928409891572757e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6008 + }, + { + "epoch": 0.5778718084339087, + "grad_norm": 1.6703873851525073, + "learning_rate": 1.9276673050274126e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6009 + }, + { + "epoch": 0.5779679761504063, + "grad_norm": 2.200218043857278, + "learning_rate": 1.926924771767837e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6010 + }, + { + "epoch": 0.5780641438669039, + "grad_norm": 1.6238119346828963, + "learning_rate": 1.926182291863162e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6011 + }, + { + "epoch": 0.5781603115834014, + "grad_norm": 1.5569236538323448, + "learning_rate": 1.9254398653825128e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6012 + }, + { + "epoch": 0.578256479299899, + "grad_norm": 1.564102010428871, + "learning_rate": 1.9246974923950127e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6013 + }, + { + "epoch": 0.5783526470163965, + "grad_norm": 1.6528717899014622, + "learning_rate": 1.9239551729697783e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6014 + }, + { + "epoch": 0.5784488147328941, + "grad_norm": 1.597037003546464, + "learning_rate": 1.923212907175922e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6015 + }, + { + "epoch": 0.5785449824493918, + "grad_norm": 1.761851834568154, + "learning_rate": 1.9224706950825517e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6016 + }, + { + "epoch": 0.5786411501658894, + "grad_norm": 1.826543116426395, + "learning_rate": 1.921728536758767e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6017 + }, + { + "epoch": 0.5787373178823869, + "grad_norm": 2.5984380295087024, + "learning_rate": 1.9209864322736664e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6018 + }, + { + "epoch": 0.5788334855988845, + "grad_norm": 1.328489767530535, + "learning_rate": 1.9202443816963426e-06, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6019 + }, + { + "epoch": 0.578929653315382, + "grad_norm": 1.9678552153029525, + "learning_rate": 1.9195023850958812e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6020 + }, + { + "epoch": 0.5790258210318796, + "grad_norm": 1.4906216387311104, + "learning_rate": 1.918760442541364e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6021 + }, + { + "epoch": 0.5791219887483772, + "grad_norm": 1.613693687027241, + "learning_rate": 1.91801855410187e-06, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6022 + }, + { + "epoch": 0.5792181564648747, + "grad_norm": 1.7110037887858316, + "learning_rate": 1.9172767198464694e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6023 + }, + { + "epoch": 0.5793143241813723, + "grad_norm": 1.7534698714806847, + "learning_rate": 1.9165349398442284e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6024 + }, + { + "epoch": 0.5794104918978699, + "grad_norm": 1.799974009447017, + "learning_rate": 1.91579321416421e-06, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6025 + }, + { + "epoch": 0.5795066596143674, + "grad_norm": 1.94913430182072, + "learning_rate": 1.9150515428754702e-06, + "loss": 0.1517, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6026 + }, + { + "epoch": 0.579602827330865, + "grad_norm": 1.769973703524498, + "learning_rate": 1.9143099260470627e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6027 + }, + { + "epoch": 0.5796989950473626, + "grad_norm": 1.8177854224859606, + "learning_rate": 1.9135683637480308e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6028 + }, + { + "epoch": 0.5797951627638601, + "grad_norm": 3.0724836892783016, + "learning_rate": 1.9128268560474183e-06, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6029 + }, + { + "epoch": 0.5798913304803578, + "grad_norm": 1.8911349221453075, + "learning_rate": 1.9120854030142606e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6030 + }, + { + "epoch": 0.5799874981968554, + "grad_norm": 2.0775090594896324, + "learning_rate": 1.91134400471759e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6031 + }, + { + "epoch": 0.5800836659133529, + "grad_norm": 1.5473793268762217, + "learning_rate": 1.9106026612264316e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6032 + }, + { + "epoch": 0.5801798336298505, + "grad_norm": 2.3278679116085743, + "learning_rate": 1.9098613726098084e-06, + "loss": 0.1584, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6033 + }, + { + "epoch": 0.580276001346348, + "grad_norm": 1.9223136560079948, + "learning_rate": 1.9091201389367342e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6034 + }, + { + "epoch": 0.5803721690628456, + "grad_norm": 1.3047810372120587, + "learning_rate": 1.908378960276221e-06, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6035 + }, + { + "epoch": 0.5804683367793432, + "grad_norm": 1.4466935515723869, + "learning_rate": 1.907637836697275e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6036 + }, + { + "epoch": 0.5805645044958407, + "grad_norm": 1.5103924526168702, + "learning_rate": 1.9068967682688957e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6037 + }, + { + "epoch": 0.5806606722123383, + "grad_norm": 1.255145244085066, + "learning_rate": 1.9061557550600796e-06, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6038 + }, + { + "epoch": 0.5807568399288359, + "grad_norm": 1.1836030376670374, + "learning_rate": 1.9054147971398176e-06, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6039 + }, + { + "epoch": 0.5808530076453334, + "grad_norm": 1.4290309499539033, + "learning_rate": 1.9046738945770932e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6040 + }, + { + "epoch": 0.580949175361831, + "grad_norm": 1.8048675456542178, + "learning_rate": 1.9039330474408874e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6041 + }, + { + "epoch": 0.5810453430783286, + "grad_norm": 2.8232088685693006, + "learning_rate": 1.9031922558001753e-06, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6042 + }, + { + "epoch": 0.5811415107948261, + "grad_norm": 1.938028948657388, + "learning_rate": 1.9024515197239262e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6043 + }, + { + "epoch": 0.5812376785113238, + "grad_norm": 1.8482940681594662, + "learning_rate": 1.9017108392811065e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6044 + }, + { + "epoch": 0.5813338462278214, + "grad_norm": 2.238710826367023, + "learning_rate": 1.9009702145406728e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6045 + }, + { + "epoch": 0.5814300139443189, + "grad_norm": 2.1628791969590546, + "learning_rate": 1.9002296455715807e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6046 + }, + { + "epoch": 0.5815261816608165, + "grad_norm": 1.748769256030077, + "learning_rate": 1.8994891324427794e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6047 + }, + { + "epoch": 0.581622349377314, + "grad_norm": 1.4882046366163582, + "learning_rate": 1.8987486752232122e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6048 + }, + { + "epoch": 0.5817185170938116, + "grad_norm": 1.9438419737889057, + "learning_rate": 1.8980082739818185e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6049 + }, + { + "epoch": 0.5818146848103092, + "grad_norm": 1.580817110048954, + "learning_rate": 1.897267928787532e-06, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6050 + }, + { + "epoch": 0.5819108525268067, + "grad_norm": 1.7336744711798813, + "learning_rate": 1.8965276397092793e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6051 + }, + { + "epoch": 0.5820070202433043, + "grad_norm": 1.885110732615454, + "learning_rate": 1.8957874068159843e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6052 + }, + { + "epoch": 0.5821031879598019, + "grad_norm": 1.5806069854519322, + "learning_rate": 1.8950472301765652e-06, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6053 + }, + { + "epoch": 0.5821993556762994, + "grad_norm": 1.5892855137199136, + "learning_rate": 1.8943071098599333e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6054 + }, + { + "epoch": 0.582295523392797, + "grad_norm": 1.8403145992298275, + "learning_rate": 1.8935670459349973e-06, + "loss": 0.1515, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6055 + }, + { + "epoch": 0.5823916911092946, + "grad_norm": 1.7008945313206532, + "learning_rate": 1.8928270384706585e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6056 + }, + { + "epoch": 0.5824878588257921, + "grad_norm": 2.100103746611638, + "learning_rate": 1.892087087535813e-06, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6057 + }, + { + "epoch": 0.5825840265422898, + "grad_norm": 1.6686264267274369, + "learning_rate": 1.891347193199353e-06, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6058 + }, + { + "epoch": 0.5826801942587874, + "grad_norm": 1.8449240236494842, + "learning_rate": 1.8906073555301645e-06, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6059 + }, + { + "epoch": 0.5827763619752849, + "grad_norm": 2.61536393556035, + "learning_rate": 1.889867574597129e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6060 + }, + { + "epoch": 0.5828725296917825, + "grad_norm": 3.103767639600033, + "learning_rate": 1.8891278504691225e-06, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6061 + }, + { + "epoch": 0.5829686974082801, + "grad_norm": 2.7581459469340177, + "learning_rate": 1.8883881832150136e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6062 + }, + { + "epoch": 0.5830648651247776, + "grad_norm": 1.7725671737109971, + "learning_rate": 1.8876485729036687e-06, + "loss": 0.1642, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6063 + }, + { + "epoch": 0.5831610328412752, + "grad_norm": 2.8010882654733025, + "learning_rate": 1.8869090196039469e-06, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6064 + }, + { + "epoch": 0.5832572005577727, + "grad_norm": 1.4770871003920958, + "learning_rate": 1.8861695233847036e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6065 + }, + { + "epoch": 0.5833533682742703, + "grad_norm": 1.6811021659196337, + "learning_rate": 1.8854300843147878e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6066 + }, + { + "epoch": 0.5834495359907679, + "grad_norm": 2.0337715998073036, + "learning_rate": 1.8846907024630425e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6067 + }, + { + "epoch": 0.5835457037072654, + "grad_norm": 1.3944522384469045, + "learning_rate": 1.8839513778983068e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6068 + }, + { + "epoch": 0.583641871423763, + "grad_norm": 2.3211866515681314, + "learning_rate": 1.8832121106894142e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6069 + }, + { + "epoch": 0.5837380391402606, + "grad_norm": 2.558466066110284, + "learning_rate": 1.8824729009051912e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6070 + }, + { + "epoch": 0.5838342068567581, + "grad_norm": 1.549412852970969, + "learning_rate": 1.8817337486144614e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6071 + }, + { + "epoch": 0.5839303745732558, + "grad_norm": 2.200377145875984, + "learning_rate": 1.8809946538860427e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6072 + }, + { + "epoch": 0.5840265422897534, + "grad_norm": 2.0671982504950273, + "learning_rate": 1.8802556167887452e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6073 + }, + { + "epoch": 0.5841227100062509, + "grad_norm": 2.0203998103663974, + "learning_rate": 1.8795166373913757e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6074 + }, + { + "epoch": 0.5842188777227485, + "grad_norm": 1.4360696434671651, + "learning_rate": 1.8787777157627354e-06, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6075 + }, + { + "epoch": 0.5843150454392461, + "grad_norm": 2.3199777065490115, + "learning_rate": 1.8780388519716203e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6076 + }, + { + "epoch": 0.5844112131557436, + "grad_norm": 1.6066713821083507, + "learning_rate": 1.8773000460868206e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6077 + }, + { + "epoch": 0.5845073808722412, + "grad_norm": 1.775895659665781, + "learning_rate": 1.8765612981771222e-06, + "loss": 0.1347, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6078 + }, + { + "epoch": 0.5846035485887388, + "grad_norm": 1.7187737192306463, + "learning_rate": 1.8758226083113024e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6079 + }, + { + "epoch": 0.5846997163052363, + "grad_norm": 1.5317550170376169, + "learning_rate": 1.875083976558136e-06, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6080 + }, + { + "epoch": 0.5847958840217339, + "grad_norm": 1.5000719101496676, + "learning_rate": 1.8743454029863925e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6081 + }, + { + "epoch": 0.5848920517382314, + "grad_norm": 1.3671100252558623, + "learning_rate": 1.8736068876648345e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6082 + }, + { + "epoch": 0.584988219454729, + "grad_norm": 3.0226775784851267, + "learning_rate": 1.872868430662221e-06, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6083 + }, + { + "epoch": 0.5850843871712266, + "grad_norm": 3.24077752290661, + "learning_rate": 1.8721300320473023e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6084 + }, + { + "epoch": 0.5851805548877241, + "grad_norm": 1.8600678076249877, + "learning_rate": 1.8713916918888266e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6085 + }, + { + "epoch": 0.5852767226042218, + "grad_norm": 1.8314062811121692, + "learning_rate": 1.870653410255536e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6086 + }, + { + "epoch": 0.5853728903207194, + "grad_norm": 1.6037587589508617, + "learning_rate": 1.8699151872161654e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6087 + }, + { + "epoch": 0.5854690580372169, + "grad_norm": 1.7817577400681226, + "learning_rate": 1.8691770228394458e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6088 + }, + { + "epoch": 0.5855652257537145, + "grad_norm": 2.4737954393625263, + "learning_rate": 1.8684389171941033e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6089 + }, + { + "epoch": 0.5856613934702121, + "grad_norm": 1.670522177093719, + "learning_rate": 1.867700870348856e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6090 + }, + { + "epoch": 0.5857575611867096, + "grad_norm": 1.6479924268580486, + "learning_rate": 1.8669628823724187e-06, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6091 + }, + { + "epoch": 0.5858537289032072, + "grad_norm": 1.8083860983605904, + "learning_rate": 1.8662249533335004e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6092 + }, + { + "epoch": 0.5859498966197048, + "grad_norm": 1.978068510915584, + "learning_rate": 1.8654870833008043e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6093 + }, + { + "epoch": 0.5860460643362023, + "grad_norm": 2.647142603890483, + "learning_rate": 1.864749272343028e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6094 + }, + { + "epoch": 0.5861422320526999, + "grad_norm": 4.038632526898906, + "learning_rate": 1.864011520528865e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6095 + }, + { + "epoch": 0.5862383997691974, + "grad_norm": 2.6005924316943196, + "learning_rate": 1.863273827927e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6096 + }, + { + "epoch": 0.586334567485695, + "grad_norm": 1.5224920741727073, + "learning_rate": 1.862536194606115e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6097 + }, + { + "epoch": 0.5864307352021926, + "grad_norm": 1.38630450770151, + "learning_rate": 1.861798620634886e-06, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6098 + }, + { + "epoch": 0.5865269029186901, + "grad_norm": 2.4166845579374407, + "learning_rate": 1.861061106081983e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6099 + }, + { + "epoch": 0.5866230706351878, + "grad_norm": 1.3355702012144075, + "learning_rate": 1.860323651016072e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6100 + }, + { + "epoch": 0.5867192383516854, + "grad_norm": 1.6256835206002385, + "learning_rate": 1.8595862555058095e-06, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6101 + }, + { + "epoch": 0.5868154060681829, + "grad_norm": 2.2969920173952834, + "learning_rate": 1.8588489196198507e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6102 + }, + { + "epoch": 0.5869115737846805, + "grad_norm": 2.8628003124320904, + "learning_rate": 1.8581116434268437e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6103 + }, + { + "epoch": 0.5870077415011781, + "grad_norm": 1.6341016020268044, + "learning_rate": 1.85737442699543e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6104 + }, + { + "epoch": 0.5871039092176756, + "grad_norm": 1.648984322501255, + "learning_rate": 1.856637270394247e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6105 + }, + { + "epoch": 0.5872000769341732, + "grad_norm": 2.486621934167934, + "learning_rate": 1.8559001736919274e-06, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6106 + }, + { + "epoch": 0.5872962446506708, + "grad_norm": 1.5396949944510285, + "learning_rate": 1.8551631369570949e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6107 + }, + { + "epoch": 0.5873924123671683, + "grad_norm": 2.8249513369382533, + "learning_rate": 1.8544261602583703e-06, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6108 + }, + { + "epoch": 0.5874885800836659, + "grad_norm": 2.5900554904129187, + "learning_rate": 1.8536892436643684e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6109 + }, + { + "epoch": 0.5875847478001635, + "grad_norm": 2.3366432356171747, + "learning_rate": 1.852952387243698e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6110 + }, + { + "epoch": 0.587680915516661, + "grad_norm": 2.0953124394853204, + "learning_rate": 1.852215591064964e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6111 + }, + { + "epoch": 0.5877770832331586, + "grad_norm": 1.8169304023092416, + "learning_rate": 1.8514788551967616e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6112 + }, + { + "epoch": 0.5878732509496561, + "grad_norm": 1.8258956908165132, + "learning_rate": 1.850742179707684e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6113 + }, + { + "epoch": 0.5879694186661538, + "grad_norm": 1.7704243206047765, + "learning_rate": 1.8500055646663182e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6114 + }, + { + "epoch": 0.5880655863826514, + "grad_norm": 1.357288892271915, + "learning_rate": 1.8492690101412447e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6115 + }, + { + "epoch": 0.588161754099149, + "grad_norm": 2.332408863517162, + "learning_rate": 1.8485325162010392e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6116 + }, + { + "epoch": 0.5882579218156465, + "grad_norm": 2.0927170402015918, + "learning_rate": 1.8477960829142716e-06, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6117 + }, + { + "epoch": 0.5883540895321441, + "grad_norm": 1.87626595853187, + "learning_rate": 1.8470597103495042e-06, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6118 + }, + { + "epoch": 0.5884502572486416, + "grad_norm": 3.0234764394907483, + "learning_rate": 1.8463233985752975e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6119 + }, + { + "epoch": 0.5885464249651392, + "grad_norm": 1.3385246468976333, + "learning_rate": 1.8455871476602023e-06, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6120 + }, + { + "epoch": 0.5886425926816368, + "grad_norm": 1.3661870289817069, + "learning_rate": 1.8448509576727667e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6121 + }, + { + "epoch": 0.5887387603981343, + "grad_norm": 1.5209327202940754, + "learning_rate": 1.8441148286815317e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6122 + }, + { + "epoch": 0.5888349281146319, + "grad_norm": 1.5496451847710075, + "learning_rate": 1.843378760755034e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6123 + }, + { + "epoch": 0.5889310958311295, + "grad_norm": 1.315416972797568, + "learning_rate": 1.842642753961802e-06, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6124 + }, + { + "epoch": 0.589027263547627, + "grad_norm": 1.3161301524639424, + "learning_rate": 1.84190680837036e-06, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6125 + }, + { + "epoch": 0.5891234312641246, + "grad_norm": 1.893883283084692, + "learning_rate": 1.8411709240492277e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6126 + }, + { + "epoch": 0.5892195989806222, + "grad_norm": 1.479280978839481, + "learning_rate": 1.8404351010669172e-06, + "loss": 0.0986, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6127 + }, + { + "epoch": 0.5893157666971198, + "grad_norm": 2.316285469210968, + "learning_rate": 1.8396993394919372e-06, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6128 + }, + { + "epoch": 0.5894119344136174, + "grad_norm": 1.8095550992319036, + "learning_rate": 1.8389636393927868e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6129 + }, + { + "epoch": 0.589508102130115, + "grad_norm": 1.3872904160491282, + "learning_rate": 1.8382280008379627e-06, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6130 + }, + { + "epoch": 0.5896042698466125, + "grad_norm": 1.732540634306584, + "learning_rate": 1.8374924238959552e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6131 + }, + { + "epoch": 0.5897004375631101, + "grad_norm": 1.5698377594306427, + "learning_rate": 1.8367569086352483e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6132 + }, + { + "epoch": 0.5897966052796076, + "grad_norm": 1.4197564058055074, + "learning_rate": 1.836021455124321e-06, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6133 + }, + { + "epoch": 0.5898927729961052, + "grad_norm": 2.0704966358306645, + "learning_rate": 1.8352860634316458e-06, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6134 + }, + { + "epoch": 0.5899889407126028, + "grad_norm": 1.517910171356509, + "learning_rate": 1.834550733625689e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6135 + }, + { + "epoch": 0.5900851084291003, + "grad_norm": 1.8696332442620072, + "learning_rate": 1.833815465774913e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6136 + }, + { + "epoch": 0.5901812761455979, + "grad_norm": 2.464176476651395, + "learning_rate": 1.8330802599477724e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6137 + }, + { + "epoch": 0.5902774438620955, + "grad_norm": 1.929888937389442, + "learning_rate": 1.832345116212717e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6138 + }, + { + "epoch": 0.590373611578593, + "grad_norm": 2.588160778912093, + "learning_rate": 1.831610034638192e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6139 + }, + { + "epoch": 0.5904697792950906, + "grad_norm": 2.251110064861184, + "learning_rate": 1.8308750152926338e-06, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6140 + }, + { + "epoch": 0.5905659470115882, + "grad_norm": 1.7065783366958098, + "learning_rate": 1.8301400582444754e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6141 + }, + { + "epoch": 0.5906621147280858, + "grad_norm": 1.6075197904844505, + "learning_rate": 1.8294051635621432e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6142 + }, + { + "epoch": 0.5907582824445834, + "grad_norm": 1.6185991589195385, + "learning_rate": 1.828670331314058e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6143 + }, + { + "epoch": 0.590854450161081, + "grad_norm": 1.9645030147497489, + "learning_rate": 1.8279355615686353e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6144 + }, + { + "epoch": 0.5909506178775785, + "grad_norm": 1.6864623461738897, + "learning_rate": 1.827200854394285e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6145 + }, + { + "epoch": 0.5910467855940761, + "grad_norm": 2.3421780795488703, + "learning_rate": 1.8264662098594076e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6146 + }, + { + "epoch": 0.5911429533105736, + "grad_norm": 1.5723072269907326, + "learning_rate": 1.8257316280324023e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6147 + }, + { + "epoch": 0.5912391210270712, + "grad_norm": 3.342896280547941, + "learning_rate": 1.8249971089816604e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6148 + }, + { + "epoch": 0.5913352887435688, + "grad_norm": 2.2679141380634067, + "learning_rate": 1.824262652775568e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6149 + }, + { + "epoch": 0.5914314564600663, + "grad_norm": 1.6380239584249334, + "learning_rate": 1.8235282594825054e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6150 + }, + { + "epoch": 0.5915276241765639, + "grad_norm": 1.7471200641772227, + "learning_rate": 1.8227939291708464e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6151 + }, + { + "epoch": 0.5916237918930615, + "grad_norm": 2.4159453369372694, + "learning_rate": 1.8220596619089576e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6152 + }, + { + "epoch": 0.591719959609559, + "grad_norm": 1.8714182924298428, + "learning_rate": 1.8213254577652038e-06, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6153 + }, + { + "epoch": 0.5918161273260566, + "grad_norm": 1.4647061788628435, + "learning_rate": 1.8205913168079392e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6154 + }, + { + "epoch": 0.5919122950425542, + "grad_norm": 2.098920050451038, + "learning_rate": 1.8198572391055159e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6155 + }, + { + "epoch": 0.5920084627590518, + "grad_norm": 2.1483332034012133, + "learning_rate": 1.8191232247262791e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6156 + }, + { + "epoch": 0.5921046304755494, + "grad_norm": 2.9322360638017364, + "learning_rate": 1.8183892737385655e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6157 + }, + { + "epoch": 0.592200798192047, + "grad_norm": 1.9887188180734918, + "learning_rate": 1.817655386210709e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6158 + }, + { + "epoch": 0.5922969659085445, + "grad_norm": 1.6125546054371243, + "learning_rate": 1.8169215622110363e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6159 + }, + { + "epoch": 0.5923931336250421, + "grad_norm": 2.0511664685689093, + "learning_rate": 1.8161878018078693e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6160 + }, + { + "epoch": 0.5924893013415397, + "grad_norm": 2.136086708830045, + "learning_rate": 1.8154541050695229e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6161 + }, + { + "epoch": 0.5925854690580372, + "grad_norm": 1.9028851800335438, + "learning_rate": 1.8147204720643066e-06, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6162 + }, + { + "epoch": 0.5926816367745348, + "grad_norm": 2.8859566581328626, + "learning_rate": 1.8139869028605222e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6163 + }, + { + "epoch": 0.5927778044910323, + "grad_norm": 2.771222894464747, + "learning_rate": 1.8132533975264681e-06, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6164 + }, + { + "epoch": 0.5928739722075299, + "grad_norm": 1.7005368810723749, + "learning_rate": 1.8125199561304356e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6165 + }, + { + "epoch": 0.5929701399240275, + "grad_norm": 3.1277039127967576, + "learning_rate": 1.8117865787407104e-06, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6166 + }, + { + "epoch": 0.593066307640525, + "grad_norm": 1.3816818603878636, + "learning_rate": 1.811053265425572e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6167 + }, + { + "epoch": 0.5931624753570226, + "grad_norm": 2.193696021403627, + "learning_rate": 1.8103200162532927e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6168 + }, + { + "epoch": 0.5932586430735202, + "grad_norm": 2.6061828556563156, + "learning_rate": 1.8095868312921416e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6169 + }, + { + "epoch": 0.5933548107900178, + "grad_norm": 3.700914622426145, + "learning_rate": 1.8088537106103793e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6170 + }, + { + "epoch": 0.5934509785065154, + "grad_norm": 2.1979988246887237, + "learning_rate": 1.8081206542762613e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6171 + }, + { + "epoch": 0.593547146223013, + "grad_norm": 2.2599094391556855, + "learning_rate": 1.8073876623580378e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6172 + }, + { + "epoch": 0.5936433139395105, + "grad_norm": 1.4497379630960976, + "learning_rate": 1.806654734923953e-06, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6173 + }, + { + "epoch": 0.5937394816560081, + "grad_norm": 1.6939421875428786, + "learning_rate": 1.8059218720422429e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6174 + }, + { + "epoch": 0.5938356493725057, + "grad_norm": 2.1316436403358545, + "learning_rate": 1.8051890737811395e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6175 + }, + { + "epoch": 0.5939318170890032, + "grad_norm": 2.6152029722163705, + "learning_rate": 1.8044563402088686e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6176 + }, + { + "epoch": 0.5940279848055008, + "grad_norm": 2.625822744362387, + "learning_rate": 1.80372367139365e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6177 + }, + { + "epoch": 0.5941241525219983, + "grad_norm": 1.3985773027079893, + "learning_rate": 1.8029910674036972e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6178 + }, + { + "epoch": 0.5942203202384959, + "grad_norm": 1.6970426491943575, + "learning_rate": 1.802258528307218e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6179 + }, + { + "epoch": 0.5943164879549935, + "grad_norm": 2.9668040623334417, + "learning_rate": 1.8015260541724128e-06, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6180 + }, + { + "epoch": 0.594412655671491, + "grad_norm": 1.7413953635540649, + "learning_rate": 1.800793645067477e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6181 + }, + { + "epoch": 0.5945088233879886, + "grad_norm": 1.5897333627179888, + "learning_rate": 1.8000613010606008e-06, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6182 + }, + { + "epoch": 0.5946049911044862, + "grad_norm": 2.5136544105843583, + "learning_rate": 1.7993290222199674e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6183 + }, + { + "epoch": 0.5947011588209838, + "grad_norm": 2.8419303667445486, + "learning_rate": 1.798596808613754e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6184 + }, + { + "epoch": 0.5947973265374814, + "grad_norm": 4.683066576206845, + "learning_rate": 1.7978646603101304e-06, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6185 + }, + { + "epoch": 0.594893494253979, + "grad_norm": 2.160912023814149, + "learning_rate": 1.7971325773772636e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6186 + }, + { + "epoch": 0.5949896619704765, + "grad_norm": 3.606184958402218, + "learning_rate": 1.7964005598833108e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6187 + }, + { + "epoch": 0.5950858296869741, + "grad_norm": 2.016275710635794, + "learning_rate": 1.7956686078964257e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6188 + }, + { + "epoch": 0.5951819974034717, + "grad_norm": 1.6018935880105094, + "learning_rate": 1.7949367214847555e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6189 + }, + { + "epoch": 0.5952781651199692, + "grad_norm": 1.6026984506789779, + "learning_rate": 1.7942049007164411e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6190 + }, + { + "epoch": 0.5953743328364668, + "grad_norm": 3.8994473899038224, + "learning_rate": 1.7934731456596154e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6191 + }, + { + "epoch": 0.5954705005529644, + "grad_norm": 2.1078089566851475, + "learning_rate": 1.7927414563824077e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6192 + }, + { + "epoch": 0.5955666682694619, + "grad_norm": 2.717287579527914, + "learning_rate": 1.7920098329529407e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6193 + }, + { + "epoch": 0.5956628359859595, + "grad_norm": 1.9663037348681274, + "learning_rate": 1.79127827543933e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6194 + }, + { + "epoch": 0.595759003702457, + "grad_norm": 1.6719149768228891, + "learning_rate": 1.7905467839096872e-06, + "loss": 0.0979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6195 + }, + { + "epoch": 0.5958551714189546, + "grad_norm": 2.243900519348414, + "learning_rate": 1.7898153584321137e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6196 + }, + { + "epoch": 0.5959513391354522, + "grad_norm": 2.753094361450469, + "learning_rate": 1.7890839990747085e-06, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6197 + }, + { + "epoch": 0.5960475068519498, + "grad_norm": 1.975350811925, + "learning_rate": 1.7883527059055633e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6198 + }, + { + "epoch": 0.5961436745684474, + "grad_norm": 1.5653187019055388, + "learning_rate": 1.7876214789927635e-06, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6199 + }, + { + "epoch": 0.596239842284945, + "grad_norm": 1.6553569723073713, + "learning_rate": 1.7868903184043888e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6200 + }, + { + "epoch": 0.5963360100014425, + "grad_norm": 1.6896632743599975, + "learning_rate": 1.7861592242085116e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6201 + }, + { + "epoch": 0.5964321777179401, + "grad_norm": 2.057855077560267, + "learning_rate": 1.7854281964731984e-06, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6202 + }, + { + "epoch": 0.5965283454344377, + "grad_norm": 2.419421482375996, + "learning_rate": 1.784697235266511e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6203 + }, + { + "epoch": 0.5966245131509352, + "grad_norm": 1.5105004672627442, + "learning_rate": 1.7839663406565031e-06, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6204 + }, + { + "epoch": 0.5967206808674328, + "grad_norm": 1.9091530524868427, + "learning_rate": 1.7832355127112233e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6205 + }, + { + "epoch": 0.5968168485839304, + "grad_norm": 3.2473284194344365, + "learning_rate": 1.7825047514987134e-06, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6206 + }, + { + "epoch": 0.5969130163004279, + "grad_norm": 1.6044792960919116, + "learning_rate": 1.7817740570870113e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6207 + }, + { + "epoch": 0.5970091840169255, + "grad_norm": 1.576926560632416, + "learning_rate": 1.7810434295441434e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6208 + }, + { + "epoch": 0.597105351733423, + "grad_norm": 1.4419994821178796, + "learning_rate": 1.780312868938135e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6209 + }, + { + "epoch": 0.5972015194499206, + "grad_norm": 1.4627644635131969, + "learning_rate": 1.7795823753370035e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6210 + }, + { + "epoch": 0.5972976871664182, + "grad_norm": 1.6927325111091567, + "learning_rate": 1.7788519488087591e-06, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6211 + }, + { + "epoch": 0.5973938548829159, + "grad_norm": 1.4975488803609076, + "learning_rate": 1.778121589421408e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6212 + }, + { + "epoch": 0.5974900225994134, + "grad_norm": 1.4640793882896168, + "learning_rate": 1.7773912972429466e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6213 + }, + { + "epoch": 0.597586190315911, + "grad_norm": 1.3777032343075113, + "learning_rate": 1.7766610723413686e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6214 + }, + { + "epoch": 0.5976823580324085, + "grad_norm": 2.898857136298951, + "learning_rate": 1.775930914784659e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6215 + }, + { + "epoch": 0.5977785257489061, + "grad_norm": 1.9363492358255783, + "learning_rate": 1.7752008246407986e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6216 + }, + { + "epoch": 0.5978746934654037, + "grad_norm": 1.6205879756523676, + "learning_rate": 1.7744708019777602e-06, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6217 + }, + { + "epoch": 0.5979708611819012, + "grad_norm": 1.9944126056599865, + "learning_rate": 1.7737408468635115e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6218 + }, + { + "epoch": 0.5980670288983988, + "grad_norm": 1.975982560648385, + "learning_rate": 1.7730109593660127e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6219 + }, + { + "epoch": 0.5981631966148964, + "grad_norm": 1.6135976369725937, + "learning_rate": 1.772281139553218e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6220 + }, + { + "epoch": 0.5982593643313939, + "grad_norm": 1.6764441748030652, + "learning_rate": 1.7715513874930766e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6221 + }, + { + "epoch": 0.5983555320478915, + "grad_norm": 2.0895182059481527, + "learning_rate": 1.7708217032535296e-06, + "loss": 0.1639, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6222 + }, + { + "epoch": 0.5984516997643891, + "grad_norm": 1.9291556743463418, + "learning_rate": 1.7700920869025143e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6223 + }, + { + "epoch": 0.5985478674808866, + "grad_norm": 2.004130655626548, + "learning_rate": 1.7693625385079576e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6224 + }, + { + "epoch": 0.5986440351973842, + "grad_norm": 2.9865466528410507, + "learning_rate": 1.7686330581377838e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6225 + }, + { + "epoch": 0.5987402029138819, + "grad_norm": 1.6656381791608226, + "learning_rate": 1.7679036458599098e-06, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6226 + }, + { + "epoch": 0.5988363706303794, + "grad_norm": 1.851737337157359, + "learning_rate": 1.7671743017422449e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6227 + }, + { + "epoch": 0.598932538346877, + "grad_norm": 2.4130970961291975, + "learning_rate": 1.7664450258526944e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6228 + }, + { + "epoch": 0.5990287060633745, + "grad_norm": 1.788074703147678, + "learning_rate": 1.7657158182591557e-06, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6229 + }, + { + "epoch": 0.5991248737798721, + "grad_norm": 1.678428016090452, + "learning_rate": 1.764986679029519e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6230 + }, + { + "epoch": 0.5992210414963697, + "grad_norm": 1.793847788551433, + "learning_rate": 1.7642576082316696e-06, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6231 + }, + { + "epoch": 0.5993172092128672, + "grad_norm": 1.6204286115689217, + "learning_rate": 1.763528605933486e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6232 + }, + { + "epoch": 0.5994133769293648, + "grad_norm": 1.7569550303979387, + "learning_rate": 1.7627996722028415e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6233 + }, + { + "epoch": 0.5995095446458624, + "grad_norm": 1.6597541840236756, + "learning_rate": 1.7620708071076004e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6234 + }, + { + "epoch": 0.5996057123623599, + "grad_norm": 2.3872166729830524, + "learning_rate": 1.7613420107156227e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6235 + }, + { + "epoch": 0.5997018800788575, + "grad_norm": 1.8227479402415923, + "learning_rate": 1.7606132830947615e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6236 + }, + { + "epoch": 0.5997980477953551, + "grad_norm": 2.000261903292222, + "learning_rate": 1.7598846243128626e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6237 + }, + { + "epoch": 0.5998942155118526, + "grad_norm": 1.4266188574992806, + "learning_rate": 1.7591560344377667e-06, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6238 + }, + { + "epoch": 0.5999903832283502, + "grad_norm": 1.6976493216081558, + "learning_rate": 1.7584275135373077e-06, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6239 + }, + { + "epoch": 0.6000865509448479, + "grad_norm": 1.419216345051924, + "learning_rate": 1.7576990616793139e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6240 + }, + { + "epoch": 0.6001827186613454, + "grad_norm": 3.14745870779377, + "learning_rate": 1.7569706789316037e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6241 + }, + { + "epoch": 0.600278886377843, + "grad_norm": 2.2214617640815306, + "learning_rate": 1.7562423653619931e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6242 + }, + { + "epoch": 0.6003750540943406, + "grad_norm": 2.8088965626033082, + "learning_rate": 1.7555141210382897e-06, + "loss": 0.1456, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6243 + }, + { + "epoch": 0.6004712218108381, + "grad_norm": 2.2464084237952906, + "learning_rate": 1.7547859460282957e-06, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6244 + }, + { + "epoch": 0.6005673895273357, + "grad_norm": 1.6598696821615766, + "learning_rate": 1.7540578403998055e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6245 + }, + { + "epoch": 0.6006635572438332, + "grad_norm": 1.4935746448653227, + "learning_rate": 1.7533298042206096e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6246 + }, + { + "epoch": 0.6007597249603308, + "grad_norm": 2.7541438844804764, + "learning_rate": 1.7526018375584875e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6247 + }, + { + "epoch": 0.6008558926768284, + "grad_norm": 3.0250902964836466, + "learning_rate": 1.7518739404812158e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6248 + }, + { + "epoch": 0.6009520603933259, + "grad_norm": 2.7336438978295066, + "learning_rate": 1.7511461130565644e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6249 + }, + { + "epoch": 0.6010482281098235, + "grad_norm": 2.488619868016798, + "learning_rate": 1.7504183553522962e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6250 + }, + { + "epoch": 0.6011443958263211, + "grad_norm": 1.647552828400378, + "learning_rate": 1.749690667436167e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6251 + }, + { + "epoch": 0.6012405635428186, + "grad_norm": 2.08815231989441, + "learning_rate": 1.7489630493759264e-06, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6252 + }, + { + "epoch": 0.6013367312593162, + "grad_norm": 1.7954198597865496, + "learning_rate": 1.7482355012393177e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6253 + }, + { + "epoch": 0.6014328989758139, + "grad_norm": 2.5407387790598883, + "learning_rate": 1.7475080230940778e-06, + "loss": 0.1532, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6254 + }, + { + "epoch": 0.6015290666923114, + "grad_norm": 2.3224563500237463, + "learning_rate": 1.7467806150079367e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6255 + }, + { + "epoch": 0.601625234408809, + "grad_norm": 1.968507129978152, + "learning_rate": 1.7460532770486185e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6256 + }, + { + "epoch": 0.6017214021253066, + "grad_norm": 1.4960749776478204, + "learning_rate": 1.7453260092838414e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6257 + }, + { + "epoch": 0.6018175698418041, + "grad_norm": 1.5643629528398846, + "learning_rate": 1.7445988117813134e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6258 + }, + { + "epoch": 0.6019137375583017, + "grad_norm": 1.5588574265721447, + "learning_rate": 1.7438716846087405e-06, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6259 + }, + { + "epoch": 0.6020099052747993, + "grad_norm": 1.7770693993667224, + "learning_rate": 1.74314462783382e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6260 + }, + { + "epoch": 0.6021060729912968, + "grad_norm": 1.510333639542056, + "learning_rate": 1.7424176415242429e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6261 + }, + { + "epoch": 0.6022022407077944, + "grad_norm": 1.8055861803102746, + "learning_rate": 1.741690725747693e-06, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6262 + }, + { + "epoch": 0.6022984084242919, + "grad_norm": 1.7226084810961382, + "learning_rate": 1.7409638805718501e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6263 + }, + { + "epoch": 0.6023945761407895, + "grad_norm": 1.4428104672636746, + "learning_rate": 1.740237106064383e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6264 + }, + { + "epoch": 0.6024907438572871, + "grad_norm": 2.927197685731081, + "learning_rate": 1.7395104022929576e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6265 + }, + { + "epoch": 0.6025869115737846, + "grad_norm": 1.5984645109421312, + "learning_rate": 1.738783769325233e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6266 + }, + { + "epoch": 0.6026830792902822, + "grad_norm": 2.197393489608533, + "learning_rate": 1.7380572072288588e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6267 + }, + { + "epoch": 0.6027792470067799, + "grad_norm": 1.9695585892250478, + "learning_rate": 1.7373307160714814e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6268 + }, + { + "epoch": 0.6028754147232774, + "grad_norm": 1.4137249495098165, + "learning_rate": 1.7366042959207388e-06, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6269 + }, + { + "epoch": 0.602971582439775, + "grad_norm": 2.2702861918066595, + "learning_rate": 1.7358779468442621e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6270 + }, + { + "epoch": 0.6030677501562726, + "grad_norm": 1.5517852839012405, + "learning_rate": 1.7351516689096771e-06, + "loss": 0.1427, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6271 + }, + { + "epoch": 0.6031639178727701, + "grad_norm": 2.021451847457112, + "learning_rate": 1.7344254621846018e-06, + "loss": 0.1635, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6272 + }, + { + "epoch": 0.6032600855892677, + "grad_norm": 1.4396538970208486, + "learning_rate": 1.7336993267366486e-06, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6273 + }, + { + "epoch": 0.6033562533057653, + "grad_norm": 1.615625723674667, + "learning_rate": 1.7329732626334237e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6274 + }, + { + "epoch": 0.6034524210222628, + "grad_norm": 1.6908602306702827, + "learning_rate": 1.7322472699425236e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6275 + }, + { + "epoch": 0.6035485887387604, + "grad_norm": 1.6880689365314268, + "learning_rate": 1.7315213487315409e-06, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6276 + }, + { + "epoch": 0.603644756455258, + "grad_norm": 1.8042708068970352, + "learning_rate": 1.7307954990680609e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6277 + }, + { + "epoch": 0.6037409241717555, + "grad_norm": 1.4640574543821365, + "learning_rate": 1.730069721019663e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6278 + }, + { + "epoch": 0.6038370918882531, + "grad_norm": 1.3512908858594772, + "learning_rate": 1.7293440146539195e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6279 + }, + { + "epoch": 0.6039332596047506, + "grad_norm": 2.0661716004263524, + "learning_rate": 1.7286183800383937e-06, + "loss": 0.1442, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6280 + }, + { + "epoch": 0.6040294273212482, + "grad_norm": 1.571061460645406, + "learning_rate": 1.7278928172406456e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6281 + }, + { + "epoch": 0.6041255950377459, + "grad_norm": 1.5757660419708268, + "learning_rate": 1.7271673263282266e-06, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6282 + }, + { + "epoch": 0.6042217627542434, + "grad_norm": 1.4812217600314352, + "learning_rate": 1.7264419073686827e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6283 + }, + { + "epoch": 0.604317930470741, + "grad_norm": 2.1311870404121382, + "learning_rate": 1.7257165604295514e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6284 + }, + { + "epoch": 0.6044140981872386, + "grad_norm": 1.913276524954476, + "learning_rate": 1.7249912855783658e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6285 + }, + { + "epoch": 0.6045102659037361, + "grad_norm": 1.3690888553177185, + "learning_rate": 1.72426608288265e-06, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6286 + }, + { + "epoch": 0.6046064336202337, + "grad_norm": 2.0620161247790603, + "learning_rate": 1.723540952409922e-06, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6287 + }, + { + "epoch": 0.6047026013367313, + "grad_norm": 1.7988717219919854, + "learning_rate": 1.7228158942276942e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6288 + }, + { + "epoch": 0.6047987690532288, + "grad_norm": 1.406745096321977, + "learning_rate": 1.7220909084034715e-06, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6289 + }, + { + "epoch": 0.6048949367697264, + "grad_norm": 1.2194409187269524, + "learning_rate": 1.7213659950047529e-06, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6290 + }, + { + "epoch": 0.604991104486224, + "grad_norm": 1.600186505822981, + "learning_rate": 1.7206411540990282e-06, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6291 + }, + { + "epoch": 0.6050872722027215, + "grad_norm": 1.9513578145598782, + "learning_rate": 1.7199163857537825e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6292 + }, + { + "epoch": 0.6051834399192191, + "grad_norm": 1.6654748987649508, + "learning_rate": 1.7191916900364948e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6293 + }, + { + "epoch": 0.6052796076357166, + "grad_norm": 1.9625672878996452, + "learning_rate": 1.718467067014635e-06, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6294 + }, + { + "epoch": 0.6053757753522142, + "grad_norm": 1.871566862388607, + "learning_rate": 1.7177425167556683e-06, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6295 + }, + { + "epoch": 0.6054719430687119, + "grad_norm": 2.423832433623372, + "learning_rate": 1.7170180393270533e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6296 + }, + { + "epoch": 0.6055681107852094, + "grad_norm": 1.9083909501631853, + "learning_rate": 1.716293634796239e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6297 + }, + { + "epoch": 0.605664278501707, + "grad_norm": 1.866842141419023, + "learning_rate": 1.7155693032306698e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6298 + }, + { + "epoch": 0.6057604462182046, + "grad_norm": 2.7587278378652695, + "learning_rate": 1.714845044697784e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6299 + }, + { + "epoch": 0.6058566139347021, + "grad_norm": 3.235936414587901, + "learning_rate": 1.714120859265011e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6300 + }, + { + "epoch": 0.6059527816511997, + "grad_norm": 3.356488338257829, + "learning_rate": 1.7133967469997753e-06, + "loss": 0.1459, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6301 + }, + { + "epoch": 0.6060489493676973, + "grad_norm": 1.9099563953074519, + "learning_rate": 1.7126727079694937e-06, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6302 + }, + { + "epoch": 0.6061451170841948, + "grad_norm": 1.3827602212519918, + "learning_rate": 1.7119487422415764e-06, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6303 + }, + { + "epoch": 0.6062412848006924, + "grad_norm": 2.550800380841578, + "learning_rate": 1.7112248498834256e-06, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6304 + }, + { + "epoch": 0.60633745251719, + "grad_norm": 1.7227113422391986, + "learning_rate": 1.7105010309624381e-06, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6305 + }, + { + "epoch": 0.6064336202336875, + "grad_norm": 1.7556833336237971, + "learning_rate": 1.7097772855460038e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6306 + }, + { + "epoch": 0.6065297879501851, + "grad_norm": 1.6512505388512904, + "learning_rate": 1.7090536137015062e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6307 + }, + { + "epoch": 0.6066259556666826, + "grad_norm": 3.4716447308430256, + "learning_rate": 1.7083300154963195e-06, + "loss": 0.1628, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6308 + }, + { + "epoch": 0.6067221233831802, + "grad_norm": 2.0762794255293544, + "learning_rate": 1.7076064909978133e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6309 + }, + { + "epoch": 0.6068182910996779, + "grad_norm": 2.765162021226757, + "learning_rate": 1.70688304027335e-06, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6310 + }, + { + "epoch": 0.6069144588161755, + "grad_norm": 1.8046367086408823, + "learning_rate": 1.706159663390285e-06, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6311 + }, + { + "epoch": 0.607010626532673, + "grad_norm": 1.4971579615002168, + "learning_rate": 1.705436360415966e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6312 + }, + { + "epoch": 0.6071067942491706, + "grad_norm": 2.005404350642382, + "learning_rate": 1.7047131314177364e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6313 + }, + { + "epoch": 0.6072029619656681, + "grad_norm": 2.0501715369204523, + "learning_rate": 1.7039899764629287e-06, + "loss": 0.1546, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6314 + }, + { + "epoch": 0.6072991296821657, + "grad_norm": 1.8915629210989888, + "learning_rate": 1.7032668956188708e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6315 + }, + { + "epoch": 0.6073952973986633, + "grad_norm": 2.466528978133306, + "learning_rate": 1.702543888952885e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6316 + }, + { + "epoch": 0.6074914651151608, + "grad_norm": 1.2842106939113147, + "learning_rate": 1.7018209565322841e-06, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6317 + }, + { + "epoch": 0.6075876328316584, + "grad_norm": 1.7834795178308085, + "learning_rate": 1.7010980984243756e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6318 + }, + { + "epoch": 0.607683800548156, + "grad_norm": 1.628421236372203, + "learning_rate": 1.7003753146964594e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6319 + }, + { + "epoch": 0.6077799682646535, + "grad_norm": 1.9434415279850163, + "learning_rate": 1.6996526054158283e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6320 + }, + { + "epoch": 0.6078761359811511, + "grad_norm": 2.2573725586657254, + "learning_rate": 1.698929970649769e-06, + "loss": 0.0972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6321 + }, + { + "epoch": 0.6079723036976487, + "grad_norm": 2.626653375882977, + "learning_rate": 1.6982074104655605e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6322 + }, + { + "epoch": 0.6080684714141462, + "grad_norm": 1.6451254909962847, + "learning_rate": 1.6974849249304754e-06, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6323 + }, + { + "epoch": 0.6081646391306439, + "grad_norm": 1.823236334605173, + "learning_rate": 1.6967625141117806e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6324 + }, + { + "epoch": 0.6082608068471415, + "grad_norm": 1.5612772581531178, + "learning_rate": 1.6960401780767317e-06, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6325 + }, + { + "epoch": 0.608356974563639, + "grad_norm": 2.949227745089865, + "learning_rate": 1.6953179168925816e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6326 + }, + { + "epoch": 0.6084531422801366, + "grad_norm": 1.9880083189837312, + "learning_rate": 1.6945957306265749e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6327 + }, + { + "epoch": 0.6085493099966341, + "grad_norm": 1.9993139360226249, + "learning_rate": 1.6938736193459487e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6328 + }, + { + "epoch": 0.6086454777131317, + "grad_norm": 1.538450452272398, + "learning_rate": 1.6931515831179343e-06, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6329 + }, + { + "epoch": 0.6087416454296293, + "grad_norm": 2.1685076562415215, + "learning_rate": 1.6924296220097559e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6330 + }, + { + "epoch": 0.6088378131461268, + "grad_norm": 1.5062622887330943, + "learning_rate": 1.691707736088628e-06, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6331 + }, + { + "epoch": 0.6089339808626244, + "grad_norm": 1.8464508303059595, + "learning_rate": 1.6909859254217615e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6332 + }, + { + "epoch": 0.609030148579122, + "grad_norm": 1.46618893078713, + "learning_rate": 1.6902641900763592e-06, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6333 + }, + { + "epoch": 0.6091263162956195, + "grad_norm": 1.9872184124287438, + "learning_rate": 1.6895425301196157e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6334 + }, + { + "epoch": 0.6092224840121171, + "grad_norm": 1.7687066204161368, + "learning_rate": 1.6888209456187204e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6335 + }, + { + "epoch": 0.6093186517286147, + "grad_norm": 1.627924340002884, + "learning_rate": 1.6880994366408548e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6336 + }, + { + "epoch": 0.6094148194451122, + "grad_norm": 1.7697984972334513, + "learning_rate": 1.6873780032531928e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6337 + }, + { + "epoch": 0.6095109871616099, + "grad_norm": 2.0248876754190746, + "learning_rate": 1.6866566455229017e-06, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6338 + }, + { + "epoch": 0.6096071548781075, + "grad_norm": 1.7346524991476384, + "learning_rate": 1.6859353635171427e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6339 + }, + { + "epoch": 0.609703322594605, + "grad_norm": 2.3277257163873655, + "learning_rate": 1.685214157303069e-06, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6340 + }, + { + "epoch": 0.6097994903111026, + "grad_norm": 2.1685678975185017, + "learning_rate": 1.6844930269478274e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6341 + }, + { + "epoch": 0.6098956580276002, + "grad_norm": 2.000513219529627, + "learning_rate": 1.6837719725185557e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6342 + }, + { + "epoch": 0.6099918257440977, + "grad_norm": 1.7272929292849923, + "learning_rate": 1.683050994082387e-06, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6343 + }, + { + "epoch": 0.6100879934605953, + "grad_norm": 1.7013216336678816, + "learning_rate": 1.6823300917064462e-06, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6344 + }, + { + "epoch": 0.6101841611770928, + "grad_norm": 2.2119250536693666, + "learning_rate": 1.6816092654578511e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6345 + }, + { + "epoch": 0.6102803288935904, + "grad_norm": 3.4141560219874973, + "learning_rate": 1.6808885154037141e-06, + "loss": 0.1449, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6346 + }, + { + "epoch": 0.610376496610088, + "grad_norm": 3.1771957270412265, + "learning_rate": 1.6801678416111372e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6347 + }, + { + "epoch": 0.6104726643265855, + "grad_norm": 1.332389602766002, + "learning_rate": 1.6794472441472175e-06, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6348 + }, + { + "epoch": 0.6105688320430831, + "grad_norm": 2.463961266877368, + "learning_rate": 1.6787267230790455e-06, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6349 + }, + { + "epoch": 0.6106649997595807, + "grad_norm": 2.129111267953623, + "learning_rate": 1.6780062784737027e-06, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6350 + }, + { + "epoch": 0.6107611674760782, + "grad_norm": 1.4337430874828967, + "learning_rate": 1.6772859103982653e-06, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6351 + }, + { + "epoch": 0.6108573351925759, + "grad_norm": 2.4292544879095126, + "learning_rate": 1.6765656189198013e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6352 + }, + { + "epoch": 0.6109535029090735, + "grad_norm": 2.2645346541214755, + "learning_rate": 1.6758454041053718e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6353 + }, + { + "epoch": 0.611049670625571, + "grad_norm": 3.4173789214921593, + "learning_rate": 1.6751252660220305e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6354 + }, + { + "epoch": 0.6111458383420686, + "grad_norm": 4.414522142118542, + "learning_rate": 1.6744052047368245e-06, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6355 + }, + { + "epoch": 0.6112420060585662, + "grad_norm": 1.6557416192272518, + "learning_rate": 1.6736852203167936e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6356 + }, + { + "epoch": 0.6113381737750637, + "grad_norm": 1.644478039409907, + "learning_rate": 1.6729653128289703e-06, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6357 + }, + { + "epoch": 0.6114343414915613, + "grad_norm": 1.7591374981124965, + "learning_rate": 1.672245482340381e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6358 + }, + { + "epoch": 0.6115305092080588, + "grad_norm": 1.4810078650669323, + "learning_rate": 1.6715257289180414e-06, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6359 + }, + { + "epoch": 0.6116266769245564, + "grad_norm": 3.7330181088838073, + "learning_rate": 1.6708060526289648e-06, + "loss": 0.1571, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6360 + }, + { + "epoch": 0.611722844641054, + "grad_norm": 2.1222970140540296, + "learning_rate": 1.670086453540154e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6361 + }, + { + "epoch": 0.6118190123575515, + "grad_norm": 2.180943690352887, + "learning_rate": 1.6693669317186063e-06, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6362 + }, + { + "epoch": 0.6119151800740491, + "grad_norm": 2.347464338133043, + "learning_rate": 1.6686474872313116e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6363 + }, + { + "epoch": 0.6120113477905467, + "grad_norm": 1.856970620700905, + "learning_rate": 1.6679281201452508e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6364 + }, + { + "epoch": 0.6121075155070442, + "grad_norm": 1.9684919675977788, + "learning_rate": 1.6672088305273993e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6365 + }, + { + "epoch": 0.6122036832235419, + "grad_norm": 1.6571374379976322, + "learning_rate": 1.666489618444726e-06, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6366 + }, + { + "epoch": 0.6122998509400395, + "grad_norm": 1.833282663010361, + "learning_rate": 1.6657704839641905e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6367 + }, + { + "epoch": 0.612396018656537, + "grad_norm": 1.6421439094103487, + "learning_rate": 1.6650514271527468e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6368 + }, + { + "epoch": 0.6124921863730346, + "grad_norm": 1.5028627200968094, + "learning_rate": 1.6643324480773416e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6369 + }, + { + "epoch": 0.6125883540895322, + "grad_norm": 1.7437953037919864, + "learning_rate": 1.6636135468049122e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6370 + }, + { + "epoch": 0.6126845218060297, + "grad_norm": 1.9384952163786047, + "learning_rate": 1.6628947234023913e-06, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6371 + }, + { + "epoch": 0.6127806895225273, + "grad_norm": 3.86033190738514, + "learning_rate": 1.6621759779367034e-06, + "loss": 0.1613, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6372 + }, + { + "epoch": 0.6128768572390249, + "grad_norm": 1.7338927864327884, + "learning_rate": 1.6614573104747656e-06, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6373 + }, + { + "epoch": 0.6129730249555224, + "grad_norm": 3.097348681042175, + "learning_rate": 1.6607387210834889e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6374 + }, + { + "epoch": 0.61306919267202, + "grad_norm": 1.7363193749250063, + "learning_rate": 1.6600202098297737e-06, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6375 + }, + { + "epoch": 0.6131653603885175, + "grad_norm": 1.6527888031784213, + "learning_rate": 1.659301776780517e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6376 + }, + { + "epoch": 0.6132615281050151, + "grad_norm": 2.0358190077151543, + "learning_rate": 1.6585834220026064e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6377 + }, + { + "epoch": 0.6133576958215127, + "grad_norm": 1.5269757561463388, + "learning_rate": 1.657865145562923e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6378 + }, + { + "epoch": 0.6134538635380102, + "grad_norm": 1.5030154370255822, + "learning_rate": 1.6571469475283403e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6379 + }, + { + "epoch": 0.6135500312545079, + "grad_norm": 1.5705789244392474, + "learning_rate": 1.6564288279657253e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6380 + }, + { + "epoch": 0.6136461989710055, + "grad_norm": 1.2812037855626563, + "learning_rate": 1.655710786941935e-06, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6381 + }, + { + "epoch": 0.613742366687503, + "grad_norm": 1.7477717629179896, + "learning_rate": 1.6549928245238222e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6382 + }, + { + "epoch": 0.6138385344040006, + "grad_norm": 1.3640716241787276, + "learning_rate": 1.6542749407782322e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6383 + }, + { + "epoch": 0.6139347021204982, + "grad_norm": 1.8428759313744898, + "learning_rate": 1.6535571357719998e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6384 + }, + { + "epoch": 0.6140308698369957, + "grad_norm": 1.6075835840607993, + "learning_rate": 1.6528394095719558e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6385 + }, + { + "epoch": 0.6141270375534933, + "grad_norm": 1.810893998080676, + "learning_rate": 1.6521217622449238e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6386 + }, + { + "epoch": 0.6142232052699909, + "grad_norm": 1.9026830817515539, + "learning_rate": 1.6514041938577158e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6387 + }, + { + "epoch": 0.6143193729864884, + "grad_norm": 2.130506066252795, + "learning_rate": 1.6506867044771417e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6388 + }, + { + "epoch": 0.614415540702986, + "grad_norm": 2.9082875402669512, + "learning_rate": 1.6499692941700007e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6389 + }, + { + "epoch": 0.6145117084194835, + "grad_norm": 2.238053545202923, + "learning_rate": 1.6492519630030861e-06, + "loss": 0.0942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6390 + }, + { + "epoch": 0.6146078761359811, + "grad_norm": 1.5005215311452713, + "learning_rate": 1.648534711043185e-06, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6391 + }, + { + "epoch": 0.6147040438524787, + "grad_norm": 1.7222008762773324, + "learning_rate": 1.647817538357072e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6392 + }, + { + "epoch": 0.6148002115689762, + "grad_norm": 4.042470478448548, + "learning_rate": 1.6471004450115208e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6393 + }, + { + "epoch": 0.6148963792854739, + "grad_norm": 2.4543983661403295, + "learning_rate": 1.6463834310732935e-06, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6394 + }, + { + "epoch": 0.6149925470019715, + "grad_norm": 1.8674164796521517, + "learning_rate": 1.6456664966091463e-06, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6395 + }, + { + "epoch": 0.615088714718469, + "grad_norm": 1.3874372445161451, + "learning_rate": 1.6449496416858285e-06, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6396 + }, + { + "epoch": 0.6151848824349666, + "grad_norm": 1.8687355141604933, + "learning_rate": 1.6442328663700806e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6397 + }, + { + "epoch": 0.6152810501514642, + "grad_norm": 1.5202252112060155, + "learning_rate": 1.6435161707286362e-06, + "loss": 0.0898, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6398 + }, + { + "epoch": 0.6153772178679617, + "grad_norm": 2.5997616987071037, + "learning_rate": 1.6427995548282225e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6399 + }, + { + "epoch": 0.6154733855844593, + "grad_norm": 2.3466033245713547, + "learning_rate": 1.6420830187355572e-06, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6400 + }, + { + "epoch": 0.6155695533009569, + "grad_norm": 1.7201988283467786, + "learning_rate": 1.6413665625173526e-06, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6401 + }, + { + "epoch": 0.6156657210174544, + "grad_norm": 1.4277829213352036, + "learning_rate": 1.6406501862403132e-06, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6402 + }, + { + "epoch": 0.615761888733952, + "grad_norm": 2.8519547215473353, + "learning_rate": 1.6399338899711353e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6403 + }, + { + "epoch": 0.6158580564504496, + "grad_norm": 3.380300153335186, + "learning_rate": 1.639217673776507e-06, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6404 + }, + { + "epoch": 0.6159542241669471, + "grad_norm": 2.4854165931105463, + "learning_rate": 1.6385015377231112e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6405 + }, + { + "epoch": 0.6160503918834447, + "grad_norm": 2.629612833820735, + "learning_rate": 1.6377854818776218e-06, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6406 + }, + { + "epoch": 0.6161465595999422, + "grad_norm": 2.1043691055598885, + "learning_rate": 1.6370695063067054e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6407 + }, + { + "epoch": 0.6162427273164399, + "grad_norm": 2.249619593651315, + "learning_rate": 1.636353611077023e-06, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6408 + }, + { + "epoch": 0.6163388950329375, + "grad_norm": 1.6967924823548732, + "learning_rate": 1.6356377962552239e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6409 + }, + { + "epoch": 0.616435062749435, + "grad_norm": 2.201598846963732, + "learning_rate": 1.6349220619079533e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6410 + }, + { + "epoch": 0.6165312304659326, + "grad_norm": 2.080414141609389, + "learning_rate": 1.6342064081018486e-06, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6411 + }, + { + "epoch": 0.6166273981824302, + "grad_norm": 2.0081749339558237, + "learning_rate": 1.6334908349035388e-06, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6412 + }, + { + "epoch": 0.6167235658989277, + "grad_norm": 2.404955105457695, + "learning_rate": 1.6327753423796463e-06, + "loss": 0.1606, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6413 + }, + { + "epoch": 0.6168197336154253, + "grad_norm": 1.875720998673636, + "learning_rate": 1.6320599305967852e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6414 + }, + { + "epoch": 0.6169159013319229, + "grad_norm": 2.0097574822406385, + "learning_rate": 1.6313445996215615e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6415 + }, + { + "epoch": 0.6170120690484204, + "grad_norm": 2.452674806072063, + "learning_rate": 1.6306293495205758e-06, + "loss": 0.1604, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6416 + }, + { + "epoch": 0.617108236764918, + "grad_norm": 1.3462679986589194, + "learning_rate": 1.6299141803604185e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6417 + }, + { + "epoch": 0.6172044044814156, + "grad_norm": 2.2990189132692076, + "learning_rate": 1.6291990922076744e-06, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6418 + }, + { + "epoch": 0.6173005721979131, + "grad_norm": 2.0108042605968817, + "learning_rate": 1.628484085128922e-06, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6419 + }, + { + "epoch": 0.6173967399144107, + "grad_norm": 2.1909245105660253, + "learning_rate": 1.6277691591907272e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6420 + }, + { + "epoch": 0.6174929076309082, + "grad_norm": 2.012235595103658, + "learning_rate": 1.6270543144596534e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6421 + }, + { + "epoch": 0.6175890753474059, + "grad_norm": 1.7524033664860836, + "learning_rate": 1.6263395510022546e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6422 + }, + { + "epoch": 0.6176852430639035, + "grad_norm": 2.2647600254232407, + "learning_rate": 1.6256248688850767e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6423 + }, + { + "epoch": 0.617781410780401, + "grad_norm": 1.5897183315355023, + "learning_rate": 1.6249102681746593e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6424 + }, + { + "epoch": 0.6178775784968986, + "grad_norm": 2.0515314307593386, + "learning_rate": 1.6241957489375343e-06, + "loss": 0.1656, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6425 + }, + { + "epoch": 0.6179737462133962, + "grad_norm": 1.377755089412923, + "learning_rate": 1.6234813112402237e-06, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6426 + }, + { + "epoch": 0.6180699139298937, + "grad_norm": 1.7467254228984102, + "learning_rate": 1.6227669551492443e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6427 + }, + { + "epoch": 0.6181660816463913, + "grad_norm": 2.5223679780450126, + "learning_rate": 1.6220526807311048e-06, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6428 + }, + { + "epoch": 0.6182622493628889, + "grad_norm": 1.2404396993225049, + "learning_rate": 1.6213384880523065e-06, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6429 + }, + { + "epoch": 0.6183584170793864, + "grad_norm": 1.302017386477045, + "learning_rate": 1.6206243771793434e-06, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6430 + }, + { + "epoch": 0.618454584795884, + "grad_norm": 1.8003699528135302, + "learning_rate": 1.6199103481786987e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6431 + }, + { + "epoch": 0.6185507525123816, + "grad_norm": 1.7024276033716281, + "learning_rate": 1.6191964011168523e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6432 + }, + { + "epoch": 0.6186469202288791, + "grad_norm": 3.0917835389295814, + "learning_rate": 1.618482536060275e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6433 + }, + { + "epoch": 0.6187430879453767, + "grad_norm": 1.6712579039737157, + "learning_rate": 1.6177687530754283e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6434 + }, + { + "epoch": 0.6188392556618743, + "grad_norm": 1.8577973302900561, + "learning_rate": 1.617055052228768e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6435 + }, + { + "epoch": 0.6189354233783719, + "grad_norm": 2.914673956531093, + "learning_rate": 1.6163414335867428e-06, + "loss": 0.1626, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6436 + }, + { + "epoch": 0.6190315910948695, + "grad_norm": 2.0487263699392484, + "learning_rate": 1.6156278972157907e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6437 + }, + { + "epoch": 0.6191277588113671, + "grad_norm": 1.6402265986189604, + "learning_rate": 1.6149144431823444e-06, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6438 + }, + { + "epoch": 0.6192239265278646, + "grad_norm": 2.7480556623111987, + "learning_rate": 1.6142010715528289e-06, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6439 + }, + { + "epoch": 0.6193200942443622, + "grad_norm": 3.593460521397513, + "learning_rate": 1.613487782393661e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6440 + }, + { + "epoch": 0.6194162619608597, + "grad_norm": 3.0857302638201944, + "learning_rate": 1.6127745757712498e-06, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6441 + }, + { + "epoch": 0.6195124296773573, + "grad_norm": 1.5229730827997792, + "learning_rate": 1.6120614517519978e-06, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6442 + }, + { + "epoch": 0.6196085973938549, + "grad_norm": 14.182938890225874, + "learning_rate": 1.611348410402297e-06, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6443 + }, + { + "epoch": 0.6197047651103524, + "grad_norm": 3.2685262807098168, + "learning_rate": 1.6106354517885346e-06, + "loss": 0.1579, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6444 + }, + { + "epoch": 0.61980093282685, + "grad_norm": 2.0572416899760486, + "learning_rate": 1.6099225759770886e-06, + "loss": 0.1465, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6445 + }, + { + "epoch": 0.6198971005433476, + "grad_norm": 1.811668885068153, + "learning_rate": 1.6092097830343307e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6446 + }, + { + "epoch": 0.6199932682598451, + "grad_norm": 1.7495603068331982, + "learning_rate": 1.608497073026623e-06, + "loss": 0.1463, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6447 + }, + { + "epoch": 0.6200894359763427, + "grad_norm": 1.9490365426923721, + "learning_rate": 1.6077844460203207e-06, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6448 + }, + { + "epoch": 0.6201856036928403, + "grad_norm": 1.9822491461783844, + "learning_rate": 1.607071902081772e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6449 + }, + { + "epoch": 0.6202817714093379, + "grad_norm": 4.077334671406232, + "learning_rate": 1.6063594412773162e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6450 + }, + { + "epoch": 0.6203779391258355, + "grad_norm": 2.706157120937798, + "learning_rate": 1.6056470636732852e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6451 + }, + { + "epoch": 0.6204741068423331, + "grad_norm": 2.4469289527610267, + "learning_rate": 1.604934769336004e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6452 + }, + { + "epoch": 0.6205702745588306, + "grad_norm": 1.820413605448271, + "learning_rate": 1.60422255833179e-06, + "loss": 0.1091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6453 + }, + { + "epoch": 0.6206664422753282, + "grad_norm": 1.4543224975649764, + "learning_rate": 1.6035104307269494e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6454 + }, + { + "epoch": 0.6207626099918258, + "grad_norm": 2.536306181826394, + "learning_rate": 1.6027983865877852e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6455 + }, + { + "epoch": 0.6208587777083233, + "grad_norm": 2.6307396670038674, + "learning_rate": 1.6020864259805902e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6456 + }, + { + "epoch": 0.6209549454248209, + "grad_norm": 2.0230433181770464, + "learning_rate": 1.6013745489716502e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6457 + }, + { + "epoch": 0.6210511131413184, + "grad_norm": 2.6036740564010636, + "learning_rate": 1.6006627556272436e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6458 + }, + { + "epoch": 0.621147280857816, + "grad_norm": 1.5660776192303092, + "learning_rate": 1.5999510460136383e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6459 + }, + { + "epoch": 0.6212434485743136, + "grad_norm": 1.3295694428979408, + "learning_rate": 1.599239420197098e-06, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6460 + }, + { + "epoch": 0.6213396162908111, + "grad_norm": 1.6378605676255065, + "learning_rate": 1.5985278782438762e-06, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6461 + }, + { + "epoch": 0.6214357840073087, + "grad_norm": 1.7944993726678018, + "learning_rate": 1.5978164202202201e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6462 + }, + { + "epoch": 0.6215319517238063, + "grad_norm": 1.79590631391049, + "learning_rate": 1.5971050461923688e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6463 + }, + { + "epoch": 0.6216281194403039, + "grad_norm": 1.7143156794286214, + "learning_rate": 1.5963937562265524e-06, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6464 + }, + { + "epoch": 0.6217242871568015, + "grad_norm": 2.1280757543751783, + "learning_rate": 1.5956825503889938e-06, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6465 + }, + { + "epoch": 0.6218204548732991, + "grad_norm": 2.245196219278461, + "learning_rate": 1.5949714287459096e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6466 + }, + { + "epoch": 0.6219166225897966, + "grad_norm": 1.574735424644923, + "learning_rate": 1.5942603913635052e-06, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6467 + }, + { + "epoch": 0.6220127903062942, + "grad_norm": 1.7216279619680865, + "learning_rate": 1.5935494383079814e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6468 + }, + { + "epoch": 0.6221089580227918, + "grad_norm": 1.7191101853313178, + "learning_rate": 1.5928385696455298e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6469 + }, + { + "epoch": 0.6222051257392893, + "grad_norm": 2.677797387658106, + "learning_rate": 1.592127785442335e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6470 + }, + { + "epoch": 0.6223012934557869, + "grad_norm": 2.155958688243279, + "learning_rate": 1.5914170857645717e-06, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6471 + }, + { + "epoch": 0.6223974611722844, + "grad_norm": 1.453194932421198, + "learning_rate": 1.5907064706784082e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6472 + }, + { + "epoch": 0.622493628888782, + "grad_norm": 1.6867122649624777, + "learning_rate": 1.5899959402500049e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6473 + }, + { + "epoch": 0.6225897966052796, + "grad_norm": 1.558334942437, + "learning_rate": 1.589285494545514e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6474 + }, + { + "epoch": 0.6226859643217771, + "grad_norm": 1.7275932769137248, + "learning_rate": 1.5885751336310817e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6475 + }, + { + "epoch": 0.6227821320382747, + "grad_norm": 2.8731775348308117, + "learning_rate": 1.5878648575728422e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6476 + }, + { + "epoch": 0.6228782997547723, + "grad_norm": 1.5494224082749988, + "learning_rate": 1.587154666436925e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6477 + }, + { + "epoch": 0.6229744674712699, + "grad_norm": 1.4343560500022896, + "learning_rate": 1.5864445602894509e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6478 + }, + { + "epoch": 0.6230706351877675, + "grad_norm": 2.084984429159079, + "learning_rate": 1.5857345391965328e-06, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6479 + }, + { + "epoch": 0.6231668029042651, + "grad_norm": 1.7934133346971828, + "learning_rate": 1.5850246032242766e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6480 + }, + { + "epoch": 0.6232629706207626, + "grad_norm": 1.6654918913692505, + "learning_rate": 1.584314752438778e-06, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6481 + }, + { + "epoch": 0.6233591383372602, + "grad_norm": 1.9797187429959098, + "learning_rate": 1.5836049869061265e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6482 + }, + { + "epoch": 0.6234553060537578, + "grad_norm": 2.016638618807007, + "learning_rate": 1.5828953066924035e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6483 + }, + { + "epoch": 0.6235514737702553, + "grad_norm": 1.8822827801163617, + "learning_rate": 1.5821857118636814e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6484 + }, + { + "epoch": 0.6236476414867529, + "grad_norm": 1.5438725926770875, + "learning_rate": 1.5814762024860261e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6485 + }, + { + "epoch": 0.6237438092032505, + "grad_norm": 1.4669550082862166, + "learning_rate": 1.5807667786254957e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6486 + }, + { + "epoch": 0.623839976919748, + "grad_norm": 1.7943476307652315, + "learning_rate": 1.5800574403481379e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6487 + }, + { + "epoch": 0.6239361446362456, + "grad_norm": 1.778570124410202, + "learning_rate": 1.5793481877199946e-06, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6488 + }, + { + "epoch": 0.6240323123527431, + "grad_norm": 1.9687008215441846, + "learning_rate": 1.5786390208070995e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6489 + }, + { + "epoch": 0.6241284800692407, + "grad_norm": 1.9268636592786443, + "learning_rate": 1.5779299396754777e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6490 + }, + { + "epoch": 0.6242246477857383, + "grad_norm": 1.5424171393125323, + "learning_rate": 1.5772209443911472e-06, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6491 + }, + { + "epoch": 0.624320815502236, + "grad_norm": 2.4568785756392963, + "learning_rate": 1.5765120350201179e-06, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6492 + }, + { + "epoch": 0.6244169832187335, + "grad_norm": 1.4273407471204462, + "learning_rate": 1.5758032116283895e-06, + "loss": 0.0906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6493 + }, + { + "epoch": 0.6245131509352311, + "grad_norm": 1.6508532446696376, + "learning_rate": 1.5750944742819563e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6494 + }, + { + "epoch": 0.6246093186517286, + "grad_norm": 1.5483096408855024, + "learning_rate": 1.574385823046804e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6495 + }, + { + "epoch": 0.6247054863682262, + "grad_norm": 1.8680033806408671, + "learning_rate": 1.5736772579889102e-06, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6496 + }, + { + "epoch": 0.6248016540847238, + "grad_norm": 1.50782238009105, + "learning_rate": 1.572968779174243e-06, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6497 + }, + { + "epoch": 0.6248978218012213, + "grad_norm": 2.411289206133889, + "learning_rate": 1.5722603866687658e-06, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6498 + }, + { + "epoch": 0.6249939895177189, + "grad_norm": 1.9821046684464034, + "learning_rate": 1.5715520805384302e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6499 + }, + { + "epoch": 0.6250901572342165, + "grad_norm": 1.9201832173704116, + "learning_rate": 1.5708438608491816e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6500 + }, + { + "epoch": 0.625186324950714, + "grad_norm": 2.1627457078889725, + "learning_rate": 1.5701357276669577e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6501 + }, + { + "epoch": 0.6252824926672116, + "grad_norm": 2.7581686784770536, + "learning_rate": 1.5694276810576875e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6502 + }, + { + "epoch": 0.6253786603837092, + "grad_norm": 1.8352138290034947, + "learning_rate": 1.5687197210872935e-06, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6503 + }, + { + "epoch": 0.6254748281002067, + "grad_norm": 3.388337108420569, + "learning_rate": 1.5680118478216865e-06, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6504 + }, + { + "epoch": 0.6255709958167043, + "grad_norm": 2.614558949032349, + "learning_rate": 1.567304061326772e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6505 + }, + { + "epoch": 0.625667163533202, + "grad_norm": 1.9439691258999283, + "learning_rate": 1.5665963616684477e-06, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6506 + }, + { + "epoch": 0.6257633312496995, + "grad_norm": 1.5176249635185144, + "learning_rate": 1.5658887489126017e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6507 + }, + { + "epoch": 0.6258594989661971, + "grad_norm": 1.9969845641126436, + "learning_rate": 1.5651812231251157e-06, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6508 + }, + { + "epoch": 0.6259556666826946, + "grad_norm": 1.7204841318871489, + "learning_rate": 1.564473784371862e-06, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6509 + }, + { + "epoch": 0.6260518343991922, + "grad_norm": 2.1066841576675643, + "learning_rate": 1.563766432718704e-06, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6510 + }, + { + "epoch": 0.6261480021156898, + "grad_norm": 2.111656436472363, + "learning_rate": 1.5630591682314994e-06, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6511 + }, + { + "epoch": 0.6262441698321873, + "grad_norm": 1.688659715768575, + "learning_rate": 1.5623519909760953e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6512 + }, + { + "epoch": 0.6263403375486849, + "grad_norm": 2.0845803123971014, + "learning_rate": 1.5616449010183335e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6513 + }, + { + "epoch": 0.6264365052651825, + "grad_norm": 1.6549670850798508, + "learning_rate": 1.560937898424045e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6514 + }, + { + "epoch": 0.62653267298168, + "grad_norm": 1.6964212382838741, + "learning_rate": 1.5602309832590533e-06, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6515 + }, + { + "epoch": 0.6266288406981776, + "grad_norm": 2.3744030796149156, + "learning_rate": 1.5595241555891753e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6516 + }, + { + "epoch": 0.6267250084146752, + "grad_norm": 1.4893653579474273, + "learning_rate": 1.5588174154802175e-06, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6517 + }, + { + "epoch": 0.6268211761311727, + "grad_norm": 1.592882860590189, + "learning_rate": 1.5581107629979798e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6518 + }, + { + "epoch": 0.6269173438476703, + "grad_norm": 1.7234702581602208, + "learning_rate": 1.5574041982082535e-06, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6519 + }, + { + "epoch": 0.627013511564168, + "grad_norm": 1.9569375902986903, + "learning_rate": 1.556697721176823e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6520 + }, + { + "epoch": 0.6271096792806655, + "grad_norm": 2.161158666413731, + "learning_rate": 1.5559913319694614e-06, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6521 + }, + { + "epoch": 0.6272058469971631, + "grad_norm": 1.4109519348783564, + "learning_rate": 1.555285030651936e-06, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6522 + }, + { + "epoch": 0.6273020147136606, + "grad_norm": 1.8352280292666927, + "learning_rate": 1.5545788172900057e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6523 + }, + { + "epoch": 0.6273981824301582, + "grad_norm": 2.2277096723792478, + "learning_rate": 1.5538726919494206e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6524 + }, + { + "epoch": 0.6274943501466558, + "grad_norm": 1.97636986516575, + "learning_rate": 1.5531666546959237e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6525 + }, + { + "epoch": 0.6275905178631533, + "grad_norm": 1.8360953268200642, + "learning_rate": 1.5524607055952495e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6526 + }, + { + "epoch": 0.6276866855796509, + "grad_norm": 1.4771183009565434, + "learning_rate": 1.5517548447131217e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6527 + }, + { + "epoch": 0.6277828532961485, + "grad_norm": 1.5896218595740974, + "learning_rate": 1.5510490721152594e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6528 + }, + { + "epoch": 0.627879021012646, + "grad_norm": 2.028513220144089, + "learning_rate": 1.5503433878673712e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6529 + }, + { + "epoch": 0.6279751887291436, + "grad_norm": 3.4662409781182664, + "learning_rate": 1.5496377920351595e-06, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6530 + }, + { + "epoch": 0.6280713564456412, + "grad_norm": 1.4987671135900724, + "learning_rate": 1.548932284684317e-06, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6531 + }, + { + "epoch": 0.6281675241621387, + "grad_norm": 2.7806444455594246, + "learning_rate": 1.548226865880527e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6532 + }, + { + "epoch": 0.6282636918786363, + "grad_norm": 1.4078203954086692, + "learning_rate": 1.5475215356894673e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6533 + }, + { + "epoch": 0.628359859595134, + "grad_norm": 2.0803467946712737, + "learning_rate": 1.546816294176805e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6534 + }, + { + "epoch": 0.6284560273116315, + "grad_norm": 1.5660529796138014, + "learning_rate": 1.546111141408201e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6535 + }, + { + "epoch": 0.6285521950281291, + "grad_norm": 1.7032803232027853, + "learning_rate": 1.545406077449307e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6536 + }, + { + "epoch": 0.6286483627446267, + "grad_norm": 2.617691769251172, + "learning_rate": 1.5447011023657666e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6537 + }, + { + "epoch": 0.6287445304611242, + "grad_norm": 1.6582741296709103, + "learning_rate": 1.5439962162232135e-06, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6538 + }, + { + "epoch": 0.6288406981776218, + "grad_norm": 1.657318187576876, + "learning_rate": 1.5432914190872757e-06, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6539 + }, + { + "epoch": 0.6289368658941193, + "grad_norm": 1.7508638716829865, + "learning_rate": 1.5425867110235717e-06, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6540 + }, + { + "epoch": 0.6290330336106169, + "grad_norm": 2.2684747535208283, + "learning_rate": 1.5418820920977119e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6541 + }, + { + "epoch": 0.6291292013271145, + "grad_norm": 1.4809178594729295, + "learning_rate": 1.5411775623752986e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6542 + }, + { + "epoch": 0.629225369043612, + "grad_norm": 1.4918656335110816, + "learning_rate": 1.5404731219219243e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6543 + }, + { + "epoch": 0.6293215367601096, + "grad_norm": 2.4686645673044603, + "learning_rate": 1.5397687708031747e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6544 + }, + { + "epoch": 0.6294177044766072, + "grad_norm": 1.5220832724342992, + "learning_rate": 1.5390645090846274e-06, + "loss": 0.0948, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6545 + }, + { + "epoch": 0.6295138721931047, + "grad_norm": 1.5650788258845145, + "learning_rate": 1.5383603368318512e-06, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6546 + }, + { + "epoch": 0.6296100399096023, + "grad_norm": 1.7069126614190333, + "learning_rate": 1.5376562541104061e-06, + "loss": 0.147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6547 + }, + { + "epoch": 0.6297062076261, + "grad_norm": 2.4414488045131746, + "learning_rate": 1.5369522609858444e-06, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6548 + }, + { + "epoch": 0.6298023753425975, + "grad_norm": 2.2031826305429667, + "learning_rate": 1.5362483575237096e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6549 + }, + { + "epoch": 0.6298985430590951, + "grad_norm": 2.4224720004808353, + "learning_rate": 1.5355445437895373e-06, + "loss": 0.1475, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6550 + }, + { + "epoch": 0.6299947107755927, + "grad_norm": 2.1118215765299246, + "learning_rate": 1.5348408198488537e-06, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6551 + }, + { + "epoch": 0.6300908784920902, + "grad_norm": 1.8253935081672292, + "learning_rate": 1.5341371857671782e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6552 + }, + { + "epoch": 0.6301870462085878, + "grad_norm": 3.0238011927498354, + "learning_rate": 1.5334336416100227e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6553 + }, + { + "epoch": 0.6302832139250854, + "grad_norm": 2.6823798808007826, + "learning_rate": 1.5327301874428857e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6554 + }, + { + "epoch": 0.6303793816415829, + "grad_norm": 2.1695659179744533, + "learning_rate": 1.532026823331263e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6555 + }, + { + "epoch": 0.6304755493580805, + "grad_norm": 1.5838345214568432, + "learning_rate": 1.531323549340639e-06, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6556 + }, + { + "epoch": 0.630571717074578, + "grad_norm": 1.385521186283169, + "learning_rate": 1.5306203655364906e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6557 + }, + { + "epoch": 0.6306678847910756, + "grad_norm": 1.8718816071919333, + "learning_rate": 1.5299172719842864e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6558 + }, + { + "epoch": 0.6307640525075732, + "grad_norm": 1.3864998256632939, + "learning_rate": 1.5292142687494874e-06, + "loss": 0.0847, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6559 + }, + { + "epoch": 0.6308602202240707, + "grad_norm": 1.6994462269580712, + "learning_rate": 1.5285113558975429e-06, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6560 + }, + { + "epoch": 0.6309563879405683, + "grad_norm": 2.1051217181273696, + "learning_rate": 1.5278085334938972e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6561 + }, + { + "epoch": 0.631052555657066, + "grad_norm": 1.9067335885039616, + "learning_rate": 1.527105801603985e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6562 + }, + { + "epoch": 0.6311487233735635, + "grad_norm": 2.2330172239299304, + "learning_rate": 1.5264031602932328e-06, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6563 + }, + { + "epoch": 0.6312448910900611, + "grad_norm": 1.9708272170260221, + "learning_rate": 1.5257006096270583e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6564 + }, + { + "epoch": 0.6313410588065587, + "grad_norm": 1.9178132857789805, + "learning_rate": 1.524998149670871e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6565 + }, + { + "epoch": 0.6314372265230562, + "grad_norm": 2.4043147921159798, + "learning_rate": 1.5242957804900716e-06, + "loss": 0.1471, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6566 + }, + { + "epoch": 0.6315333942395538, + "grad_norm": 1.7971175065734075, + "learning_rate": 1.5235935021500526e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6567 + }, + { + "epoch": 0.6316295619560514, + "grad_norm": 2.535174895091243, + "learning_rate": 1.5228913147161982e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6568 + }, + { + "epoch": 0.6317257296725489, + "grad_norm": 1.668492063373277, + "learning_rate": 1.5221892182538838e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6569 + }, + { + "epoch": 0.6318218973890465, + "grad_norm": 1.7545851871302929, + "learning_rate": 1.521487212828478e-06, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6570 + }, + { + "epoch": 0.631918065105544, + "grad_norm": 2.355855788121652, + "learning_rate": 1.5207852985053373e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6571 + }, + { + "epoch": 0.6320142328220416, + "grad_norm": 2.7264640242639873, + "learning_rate": 1.520083475349813e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6572 + }, + { + "epoch": 0.6321104005385392, + "grad_norm": 1.982452714022485, + "learning_rate": 1.5193817434272461e-06, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6573 + }, + { + "epoch": 0.6322065682550367, + "grad_norm": 1.4477409381446358, + "learning_rate": 1.5186801028029705e-06, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6574 + }, + { + "epoch": 0.6323027359715343, + "grad_norm": 1.5201822300039392, + "learning_rate": 1.5179785535423109e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6575 + }, + { + "epoch": 0.632398903688032, + "grad_norm": 2.152542477987091, + "learning_rate": 1.5172770957105843e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6576 + }, + { + "epoch": 0.6324950714045295, + "grad_norm": 2.086431456521234, + "learning_rate": 1.516575729373096e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6577 + }, + { + "epoch": 0.6325912391210271, + "grad_norm": 1.91977161255986, + "learning_rate": 1.5158744545951468e-06, + "loss": 0.1561, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6578 + }, + { + "epoch": 0.6326874068375247, + "grad_norm": 2.20403805014026, + "learning_rate": 1.515173271442027e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6579 + }, + { + "epoch": 0.6327835745540222, + "grad_norm": 1.4903304112239275, + "learning_rate": 1.5144721799790194e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6580 + }, + { + "epoch": 0.6328797422705198, + "grad_norm": 1.585210424010809, + "learning_rate": 1.513771180271397e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6581 + }, + { + "epoch": 0.6329759099870174, + "grad_norm": 1.813738345875415, + "learning_rate": 1.513070272384424e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6582 + }, + { + "epoch": 0.6330720777035149, + "grad_norm": 1.7142892252093296, + "learning_rate": 1.5123694563833585e-06, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6583 + }, + { + "epoch": 0.6331682454200125, + "grad_norm": 1.934361977660971, + "learning_rate": 1.5116687323334467e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6584 + }, + { + "epoch": 0.63326441313651, + "grad_norm": 1.61403665060045, + "learning_rate": 1.510968100299929e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6585 + }, + { + "epoch": 0.6333605808530076, + "grad_norm": 1.6043473768726604, + "learning_rate": 1.510267560348036e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6586 + }, + { + "epoch": 0.6334567485695052, + "grad_norm": 2.4236827948472506, + "learning_rate": 1.5095671125429911e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6587 + }, + { + "epoch": 0.6335529162860027, + "grad_norm": 1.8918047036974992, + "learning_rate": 1.5088667569500054e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6588 + }, + { + "epoch": 0.6336490840025003, + "grad_norm": 1.3476926278882762, + "learning_rate": 1.5081664936342857e-06, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6589 + }, + { + "epoch": 0.633745251718998, + "grad_norm": 1.4558831037046627, + "learning_rate": 1.507466322661028e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6590 + }, + { + "epoch": 0.6338414194354955, + "grad_norm": 1.7967938742682317, + "learning_rate": 1.5067662440954202e-06, + "loss": 0.0887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6591 + }, + { + "epoch": 0.6339375871519931, + "grad_norm": 3.6132603519633366, + "learning_rate": 1.506066258002642e-06, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6592 + }, + { + "epoch": 0.6340337548684907, + "grad_norm": 1.943098268841509, + "learning_rate": 1.5053663644478644e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6593 + }, + { + "epoch": 0.6341299225849882, + "grad_norm": 1.4913414498159825, + "learning_rate": 1.5046665634962477e-06, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6594 + }, + { + "epoch": 0.6342260903014858, + "grad_norm": 1.473603394282168, + "learning_rate": 1.5039668552129463e-06, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6595 + }, + { + "epoch": 0.6343222580179834, + "grad_norm": 2.1260388220892406, + "learning_rate": 1.5032672396631056e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6596 + }, + { + "epoch": 0.6344184257344809, + "grad_norm": 1.8214059142983168, + "learning_rate": 1.5025677169118608e-06, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6597 + }, + { + "epoch": 0.6345145934509785, + "grad_norm": 1.7165750649759164, + "learning_rate": 1.5018682870243404e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6598 + }, + { + "epoch": 0.6346107611674761, + "grad_norm": 1.8263401374634474, + "learning_rate": 1.5011689500656624e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6599 + }, + { + "epoch": 0.6347069288839736, + "grad_norm": 2.0608836790784077, + "learning_rate": 1.5004697061009372e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6600 + }, + { + "epoch": 0.6348030966004712, + "grad_norm": 1.418833774709434, + "learning_rate": 1.499770555195266e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6601 + }, + { + "epoch": 0.6348992643169687, + "grad_norm": 1.9368775593253171, + "learning_rate": 1.4990714974137424e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6602 + }, + { + "epoch": 0.6349954320334663, + "grad_norm": 1.4398555806865379, + "learning_rate": 1.4983725328214505e-06, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6603 + }, + { + "epoch": 0.635091599749964, + "grad_norm": 1.6765998667209387, + "learning_rate": 1.4976736614834664e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6604 + }, + { + "epoch": 0.6351877674664616, + "grad_norm": 2.3702182225083352, + "learning_rate": 1.4969748834648556e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6605 + }, + { + "epoch": 0.6352839351829591, + "grad_norm": 1.5144038143573717, + "learning_rate": 1.4962761988306768e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6606 + }, + { + "epoch": 0.6353801028994567, + "grad_norm": 1.8253461000607754, + "learning_rate": 1.4955776076459798e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6607 + }, + { + "epoch": 0.6354762706159542, + "grad_norm": 2.8160753679437462, + "learning_rate": 1.4948791099758052e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6608 + }, + { + "epoch": 0.6355724383324518, + "grad_norm": 2.442577636909435, + "learning_rate": 1.4941807058851865e-06, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6609 + }, + { + "epoch": 0.6356686060489494, + "grad_norm": 1.557743424854138, + "learning_rate": 1.4934823954391442e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6610 + }, + { + "epoch": 0.6357647737654469, + "grad_norm": 1.5077396626280761, + "learning_rate": 1.4927841787026948e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6611 + }, + { + "epoch": 0.6358609414819445, + "grad_norm": 2.0461978556325873, + "learning_rate": 1.4920860557408434e-06, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6612 + }, + { + "epoch": 0.6359571091984421, + "grad_norm": 1.9725371900704884, + "learning_rate": 1.4913880266185888e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6613 + }, + { + "epoch": 0.6360532769149396, + "grad_norm": 1.70396398595429, + "learning_rate": 1.4906900914009173e-06, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6614 + }, + { + "epoch": 0.6361494446314372, + "grad_norm": 2.2104200721401526, + "learning_rate": 1.4899922501528102e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6615 + }, + { + "epoch": 0.6362456123479348, + "grad_norm": 3.0225434302745215, + "learning_rate": 1.489294502939238e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6616 + }, + { + "epoch": 0.6363417800644323, + "grad_norm": 1.6666344707776548, + "learning_rate": 1.4885968498251625e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6617 + }, + { + "epoch": 0.63643794778093, + "grad_norm": 1.5509459377940131, + "learning_rate": 1.4878992908755375e-06, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6618 + }, + { + "epoch": 0.6365341154974276, + "grad_norm": 2.4357140421598062, + "learning_rate": 1.4872018261553073e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6619 + }, + { + "epoch": 0.6366302832139251, + "grad_norm": 3.48765492864367, + "learning_rate": 1.486504455729408e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6620 + }, + { + "epoch": 0.6367264509304227, + "grad_norm": 4.515707226530097, + "learning_rate": 1.4858071796627683e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6621 + }, + { + "epoch": 0.6368226186469202, + "grad_norm": 2.1235953403875723, + "learning_rate": 1.485109998020304e-06, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6622 + }, + { + "epoch": 0.6369187863634178, + "grad_norm": 1.4538938384246451, + "learning_rate": 1.484412910866926e-06, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6623 + }, + { + "epoch": 0.6370149540799154, + "grad_norm": 1.6189632743298186, + "learning_rate": 1.4837159182675343e-06, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6624 + }, + { + "epoch": 0.6371111217964129, + "grad_norm": 2.3002623198811363, + "learning_rate": 1.4830190202870216e-06, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6625 + }, + { + "epoch": 0.6372072895129105, + "grad_norm": 1.5636462473348633, + "learning_rate": 1.4823222169902718e-06, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6626 + }, + { + "epoch": 0.6373034572294081, + "grad_norm": 2.144185274571059, + "learning_rate": 1.4816255084421575e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6627 + }, + { + "epoch": 0.6373996249459056, + "grad_norm": 1.7319174801326143, + "learning_rate": 1.4809288947075445e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6628 + }, + { + "epoch": 0.6374957926624032, + "grad_norm": 2.02505218964439, + "learning_rate": 1.4802323758512904e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6629 + }, + { + "epoch": 0.6375919603789008, + "grad_norm": 1.7190655480582449, + "learning_rate": 1.479535951938243e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6630 + }, + { + "epoch": 0.6376881280953983, + "grad_norm": 2.4455509006038234, + "learning_rate": 1.4788396230332403e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6631 + }, + { + "epoch": 0.637784295811896, + "grad_norm": 1.907566205247044, + "learning_rate": 1.4781433892011132e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6632 + }, + { + "epoch": 0.6378804635283936, + "grad_norm": 1.8292942558422738, + "learning_rate": 1.4774472505066834e-06, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6633 + }, + { + "epoch": 0.6379766312448911, + "grad_norm": 1.7148016248452491, + "learning_rate": 1.4767512070147622e-06, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6634 + }, + { + "epoch": 0.6380727989613887, + "grad_norm": 1.5660938130349924, + "learning_rate": 1.4760552587901537e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6635 + }, + { + "epoch": 0.6381689666778863, + "grad_norm": 2.5019386426681707, + "learning_rate": 1.4753594058976526e-06, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6636 + }, + { + "epoch": 0.6382651343943838, + "grad_norm": 1.589277931931, + "learning_rate": 1.474663648402046e-06, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6637 + }, + { + "epoch": 0.6383613021108814, + "grad_norm": 1.6580103831029867, + "learning_rate": 1.4739679863681086e-06, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6638 + }, + { + "epoch": 0.6384574698273789, + "grad_norm": 2.770701933867892, + "learning_rate": 1.4732724198606096e-06, + "loss": 0.1467, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6639 + }, + { + "epoch": 0.6385536375438765, + "grad_norm": 2.898641022058118, + "learning_rate": 1.4725769489443082e-06, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6640 + }, + { + "epoch": 0.6386498052603741, + "grad_norm": 1.6096288417260485, + "learning_rate": 1.4718815736839548e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6641 + }, + { + "epoch": 0.6387459729768716, + "grad_norm": 1.9642620143228262, + "learning_rate": 1.4711862941442906e-06, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6642 + }, + { + "epoch": 0.6388421406933692, + "grad_norm": 1.4834464925838482, + "learning_rate": 1.4704911103900488e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6643 + }, + { + "epoch": 0.6389383084098668, + "grad_norm": 1.9346188222922105, + "learning_rate": 1.4697960224859513e-06, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6644 + }, + { + "epoch": 0.6390344761263643, + "grad_norm": 2.3138693214065067, + "learning_rate": 1.469101030496714e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6645 + }, + { + "epoch": 0.639130643842862, + "grad_norm": 2.08419889241118, + "learning_rate": 1.4684061344870427e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6646 + }, + { + "epoch": 0.6392268115593596, + "grad_norm": 2.5534416631061694, + "learning_rate": 1.467711334521633e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6647 + }, + { + "epoch": 0.6393229792758571, + "grad_norm": 1.4670083398681841, + "learning_rate": 1.4670166306651734e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6648 + }, + { + "epoch": 0.6394191469923547, + "grad_norm": 1.769536090750199, + "learning_rate": 1.4663220229823438e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6649 + }, + { + "epoch": 0.6395153147088523, + "grad_norm": 1.2654880530908796, + "learning_rate": 1.465627511537812e-06, + "loss": 0.0952, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6650 + }, + { + "epoch": 0.6396114824253498, + "grad_norm": 3.292112306131439, + "learning_rate": 1.4649330963962398e-06, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6651 + }, + { + "epoch": 0.6397076501418474, + "grad_norm": 2.4263998744881077, + "learning_rate": 1.46423877762228e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6652 + }, + { + "epoch": 0.639803817858345, + "grad_norm": 1.5871461713112278, + "learning_rate": 1.4635445552805746e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6653 + }, + { + "epoch": 0.6398999855748425, + "grad_norm": 3.344225363778209, + "learning_rate": 1.4628504294357593e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6654 + }, + { + "epoch": 0.6399961532913401, + "grad_norm": 2.9243208305986816, + "learning_rate": 1.4621564001524568e-06, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6655 + }, + { + "epoch": 0.6400923210078376, + "grad_norm": 1.7365153743967594, + "learning_rate": 1.4614624674952843e-06, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6656 + }, + { + "epoch": 0.6401884887243352, + "grad_norm": 1.6722863325149366, + "learning_rate": 1.460768631528849e-06, + "loss": 0.0885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6657 + }, + { + "epoch": 0.6402846564408328, + "grad_norm": 1.7370611969489294, + "learning_rate": 1.4600748923177483e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6658 + }, + { + "epoch": 0.6403808241573303, + "grad_norm": 2.177957709868046, + "learning_rate": 1.4593812499265725e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6659 + }, + { + "epoch": 0.640476991873828, + "grad_norm": 3.503830118821987, + "learning_rate": 1.4586877044199015e-06, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6660 + }, + { + "epoch": 0.6405731595903256, + "grad_norm": 2.5124401671654115, + "learning_rate": 1.4579942558623044e-06, + "loss": 0.1458, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6661 + }, + { + "epoch": 0.6406693273068231, + "grad_norm": 1.523987174295846, + "learning_rate": 1.457300904318345e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6662 + }, + { + "epoch": 0.6407654950233207, + "grad_norm": 2.4603521605130547, + "learning_rate": 1.4566076498525756e-06, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6663 + }, + { + "epoch": 0.6408616627398183, + "grad_norm": 1.624754316446531, + "learning_rate": 1.45591449252954e-06, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6664 + }, + { + "epoch": 0.6409578304563158, + "grad_norm": 1.747772404316802, + "learning_rate": 1.4552214324137744e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6665 + }, + { + "epoch": 0.6410539981728134, + "grad_norm": 1.5890359072518254, + "learning_rate": 1.4545284695698027e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6666 + }, + { + "epoch": 0.641150165889311, + "grad_norm": 2.532171283735963, + "learning_rate": 1.4538356040621427e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6667 + }, + { + "epoch": 0.6412463336058085, + "grad_norm": 2.0480956483980273, + "learning_rate": 1.4531428359553017e-06, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6668 + }, + { + "epoch": 0.6413425013223061, + "grad_norm": 1.7073814758423713, + "learning_rate": 1.4524501653137787e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6669 + }, + { + "epoch": 0.6414386690388036, + "grad_norm": 2.465134131240172, + "learning_rate": 1.4517575922020632e-06, + "loss": 0.1536, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6670 + }, + { + "epoch": 0.6415348367553012, + "grad_norm": 1.6786370870289025, + "learning_rate": 1.4510651166846369e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6671 + }, + { + "epoch": 0.6416310044717988, + "grad_norm": 2.270521285365425, + "learning_rate": 1.4503727388259686e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6672 + }, + { + "epoch": 0.6417271721882963, + "grad_norm": 1.858184566947154, + "learning_rate": 1.449680458690522e-06, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6673 + }, + { + "epoch": 0.641823339904794, + "grad_norm": 1.745555808055098, + "learning_rate": 1.44898827634275e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6674 + }, + { + "epoch": 0.6419195076212916, + "grad_norm": 1.8669217381241765, + "learning_rate": 1.4482961918470976e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6675 + }, + { + "epoch": 0.6420156753377891, + "grad_norm": 1.7993857146328593, + "learning_rate": 1.4476042052679987e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6676 + }, + { + "epoch": 0.6421118430542867, + "grad_norm": 1.8932770086502255, + "learning_rate": 1.4469123166698807e-06, + "loss": 0.1409, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6677 + }, + { + "epoch": 0.6422080107707843, + "grad_norm": 1.5026178193215327, + "learning_rate": 1.4462205261171586e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6678 + }, + { + "epoch": 0.6423041784872818, + "grad_norm": 1.3167125997881528, + "learning_rate": 1.4455288336742404e-06, + "loss": 0.0848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6679 + }, + { + "epoch": 0.6424003462037794, + "grad_norm": 1.8262111813553588, + "learning_rate": 1.4448372394055249e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6680 + }, + { + "epoch": 0.642496513920277, + "grad_norm": 1.807300371362058, + "learning_rate": 1.444145743375402e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6681 + }, + { + "epoch": 0.6425926816367745, + "grad_norm": 1.4797794038141785, + "learning_rate": 1.443454345648252e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6682 + }, + { + "epoch": 0.6426888493532721, + "grad_norm": 1.6570192708557145, + "learning_rate": 1.442763046288444e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6683 + }, + { + "epoch": 0.6427850170697696, + "grad_norm": 2.5975452411659763, + "learning_rate": 1.4420718453603415e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6684 + }, + { + "epoch": 0.6428811847862672, + "grad_norm": 3.25106361189865, + "learning_rate": 1.4413807429282972e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6685 + }, + { + "epoch": 0.6429773525027648, + "grad_norm": 1.6788481816635956, + "learning_rate": 1.4406897390566555e-06, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6686 + }, + { + "epoch": 0.6430735202192623, + "grad_norm": 3.0129813551338827, + "learning_rate": 1.4399988338097481e-06, + "loss": 0.1498, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6687 + }, + { + "epoch": 0.64316968793576, + "grad_norm": 1.8315914055153906, + "learning_rate": 1.4393080272519022e-06, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6688 + }, + { + "epoch": 0.6432658556522576, + "grad_norm": 2.2378137569430896, + "learning_rate": 1.4386173194474345e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6689 + }, + { + "epoch": 0.6433620233687551, + "grad_norm": 1.662913815877615, + "learning_rate": 1.4379267104606497e-06, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6690 + }, + { + "epoch": 0.6434581910852527, + "grad_norm": 2.3512045279611407, + "learning_rate": 1.4372362003558466e-06, + "loss": 0.1649, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6691 + }, + { + "epoch": 0.6435543588017503, + "grad_norm": 1.9559438407311718, + "learning_rate": 1.436545789197313e-06, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6692 + }, + { + "epoch": 0.6436505265182478, + "grad_norm": 1.3168300277616865, + "learning_rate": 1.43585547704933e-06, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6693 + }, + { + "epoch": 0.6437466942347454, + "grad_norm": 2.977156165529203, + "learning_rate": 1.4351652639761652e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6694 + }, + { + "epoch": 0.643842861951243, + "grad_norm": 2.9157678250863697, + "learning_rate": 1.43447515004208e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6695 + }, + { + "epoch": 0.6439390296677405, + "grad_norm": 1.6676613446812574, + "learning_rate": 1.4337851353113264e-06, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6696 + }, + { + "epoch": 0.6440351973842381, + "grad_norm": 3.5141306934605194, + "learning_rate": 1.4330952198481468e-06, + "loss": 0.153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6697 + }, + { + "epoch": 0.6441313651007357, + "grad_norm": 2.291361333586769, + "learning_rate": 1.4324054037167737e-06, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6698 + }, + { + "epoch": 0.6442275328172332, + "grad_norm": 1.4424247212399202, + "learning_rate": 1.4317156869814324e-06, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6699 + }, + { + "epoch": 0.6443237005337308, + "grad_norm": 1.6806632585341634, + "learning_rate": 1.431026069706335e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6700 + }, + { + "epoch": 0.6444198682502283, + "grad_norm": 3.1680707313620924, + "learning_rate": 1.4303365519556882e-06, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6701 + }, + { + "epoch": 0.644516035966726, + "grad_norm": 1.6580158635453006, + "learning_rate": 1.4296471337936879e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6702 + }, + { + "epoch": 0.6446122036832236, + "grad_norm": 2.2407516333980713, + "learning_rate": 1.4289578152845207e-06, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6703 + }, + { + "epoch": 0.6447083713997211, + "grad_norm": 1.4278614696878007, + "learning_rate": 1.4282685964923643e-06, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6704 + }, + { + "epoch": 0.6448045391162187, + "grad_norm": 1.8842612767156017, + "learning_rate": 1.4275794774813878e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6705 + }, + { + "epoch": 0.6449007068327163, + "grad_norm": 1.340814827088058, + "learning_rate": 1.426890458315748e-06, + "loss": 0.0885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6706 + }, + { + "epoch": 0.6449968745492138, + "grad_norm": 2.0569801718164697, + "learning_rate": 1.4262015390595957e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6707 + }, + { + "epoch": 0.6450930422657114, + "grad_norm": 1.6932925114152564, + "learning_rate": 1.425512719777071e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6708 + }, + { + "epoch": 0.645189209982209, + "grad_norm": 1.599646893793893, + "learning_rate": 1.4248240005323049e-06, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6709 + }, + { + "epoch": 0.6452853776987065, + "grad_norm": 2.334098356716466, + "learning_rate": 1.4241353813894204e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6710 + }, + { + "epoch": 0.6453815454152041, + "grad_norm": 1.3303575239899996, + "learning_rate": 1.4234468624125274e-06, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6711 + }, + { + "epoch": 0.6454777131317017, + "grad_norm": 1.8038404476675818, + "learning_rate": 1.42275844366573e-06, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6712 + }, + { + "epoch": 0.6455738808481992, + "grad_norm": 1.6010570990032884, + "learning_rate": 1.4220701252131224e-06, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6713 + }, + { + "epoch": 0.6456700485646968, + "grad_norm": 1.4946452667603631, + "learning_rate": 1.4213819071187885e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6714 + }, + { + "epoch": 0.6457662162811943, + "grad_norm": 3.085881285812195, + "learning_rate": 1.4206937894468033e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6715 + }, + { + "epoch": 0.645862383997692, + "grad_norm": 1.9479877199133429, + "learning_rate": 1.4200057722612338e-06, + "loss": 0.1573, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6716 + }, + { + "epoch": 0.6459585517141896, + "grad_norm": 1.7455527384000138, + "learning_rate": 1.4193178556261341e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6717 + }, + { + "epoch": 0.6460547194306872, + "grad_norm": 2.3004132542854854, + "learning_rate": 1.4186300396055522e-06, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6718 + }, + { + "epoch": 0.6461508871471847, + "grad_norm": 2.170758840785249, + "learning_rate": 1.4179423242635257e-06, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6719 + }, + { + "epoch": 0.6462470548636823, + "grad_norm": 2.230624005915373, + "learning_rate": 1.4172547096640837e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6720 + }, + { + "epoch": 0.6463432225801798, + "grad_norm": 2.9432554680444887, + "learning_rate": 1.4165671958712429e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6721 + }, + { + "epoch": 0.6464393902966774, + "grad_norm": 1.8676241487591017, + "learning_rate": 1.4158797829490144e-06, + "loss": 0.1641, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6722 + }, + { + "epoch": 0.646535558013175, + "grad_norm": 1.6183185411791543, + "learning_rate": 1.4151924709613987e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6723 + }, + { + "epoch": 0.6466317257296725, + "grad_norm": 2.0230430623640125, + "learning_rate": 1.4145052599723846e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6724 + }, + { + "epoch": 0.6467278934461701, + "grad_norm": 2.086208913497744, + "learning_rate": 1.4138181500459542e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6725 + }, + { + "epoch": 0.6468240611626677, + "grad_norm": 2.775833128986797, + "learning_rate": 1.4131311412460797e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6726 + }, + { + "epoch": 0.6469202288791652, + "grad_norm": 1.97748492343183, + "learning_rate": 1.4124442336367243e-06, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6727 + }, + { + "epoch": 0.6470163965956628, + "grad_norm": 1.7870242486104606, + "learning_rate": 1.4117574272818388e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6728 + }, + { + "epoch": 0.6471125643121604, + "grad_norm": 2.1377511304151646, + "learning_rate": 1.411070722245368e-06, + "loss": 0.1373, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6729 + }, + { + "epoch": 0.647208732028658, + "grad_norm": 1.4876045838040166, + "learning_rate": 1.4103841185912464e-06, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6730 + }, + { + "epoch": 0.6473048997451556, + "grad_norm": 1.6832294662442817, + "learning_rate": 1.409697616383398e-06, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6731 + }, + { + "epoch": 0.6474010674616532, + "grad_norm": 2.149558569949942, + "learning_rate": 1.409011215685739e-06, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6732 + }, + { + "epoch": 0.6474972351781507, + "grad_norm": 1.6987986762567173, + "learning_rate": 1.4083249165621754e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6733 + }, + { + "epoch": 0.6475934028946483, + "grad_norm": 2.0516484321903663, + "learning_rate": 1.4076387190766017e-06, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6734 + }, + { + "epoch": 0.6476895706111458, + "grad_norm": 1.5588558429371084, + "learning_rate": 1.406952623292906e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6735 + }, + { + "epoch": 0.6477857383276434, + "grad_norm": 1.5424178210157091, + "learning_rate": 1.4062666292749657e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6736 + }, + { + "epoch": 0.647881906044141, + "grad_norm": 1.913852090090297, + "learning_rate": 1.4055807370866488e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6737 + }, + { + "epoch": 0.6479780737606385, + "grad_norm": 3.4327377577650853, + "learning_rate": 1.4048949467918144e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6738 + }, + { + "epoch": 0.6480742414771361, + "grad_norm": 2.0998801708270065, + "learning_rate": 1.4042092584543099e-06, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6739 + }, + { + "epoch": 0.6481704091936337, + "grad_norm": 1.6792396653469472, + "learning_rate": 1.4035236721379758e-06, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6740 + }, + { + "epoch": 0.6482665769101312, + "grad_norm": 1.5409417236846352, + "learning_rate": 1.4028381879066421e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6741 + }, + { + "epoch": 0.6483627446266288, + "grad_norm": 3.4214772335139108, + "learning_rate": 1.4021528058241289e-06, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6742 + }, + { + "epoch": 0.6484589123431264, + "grad_norm": 1.369404929598484, + "learning_rate": 1.4014675259542473e-06, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6743 + }, + { + "epoch": 0.648555080059624, + "grad_norm": 2.0540171185469758, + "learning_rate": 1.4007823483608002e-06, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6744 + }, + { + "epoch": 0.6486512477761216, + "grad_norm": 2.1652750293341105, + "learning_rate": 1.4000972731075771e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6745 + }, + { + "epoch": 0.6487474154926192, + "grad_norm": 2.413932081508726, + "learning_rate": 1.3994123002583614e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6746 + }, + { + "epoch": 0.6488435832091167, + "grad_norm": 2.204157673673614, + "learning_rate": 1.3987274298769266e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6747 + }, + { + "epoch": 0.6489397509256143, + "grad_norm": 3.050997503655112, + "learning_rate": 1.3980426620270351e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6748 + }, + { + "epoch": 0.6490359186421119, + "grad_norm": 1.742425135080102, + "learning_rate": 1.3973579967724424e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6749 + }, + { + "epoch": 0.6491320863586094, + "grad_norm": 1.6053403550389012, + "learning_rate": 1.3966734341768906e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6750 + }, + { + "epoch": 0.649228254075107, + "grad_norm": 1.4667817922809774, + "learning_rate": 1.395988974304115e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6751 + }, + { + "epoch": 0.6493244217916045, + "grad_norm": 1.685144060996279, + "learning_rate": 1.3953046172178413e-06, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6752 + }, + { + "epoch": 0.6494205895081021, + "grad_norm": 2.37962566226485, + "learning_rate": 1.3946203629817856e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6753 + }, + { + "epoch": 0.6495167572245997, + "grad_norm": 1.8591256316394817, + "learning_rate": 1.3939362116596522e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6754 + }, + { + "epoch": 0.6496129249410972, + "grad_norm": 1.443993110237622, + "learning_rate": 1.393252163315138e-06, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6755 + }, + { + "epoch": 0.6497090926575948, + "grad_norm": 1.4709787139030648, + "learning_rate": 1.3925682180119314e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6756 + }, + { + "epoch": 0.6498052603740924, + "grad_norm": 1.5877511646108098, + "learning_rate": 1.3918843758137074e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6757 + }, + { + "epoch": 0.64990142809059, + "grad_norm": 2.225935964000772, + "learning_rate": 1.3912006367841347e-06, + "loss": 0.1496, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6758 + }, + { + "epoch": 0.6499975958070876, + "grad_norm": 1.8307840331615994, + "learning_rate": 1.390517000986871e-06, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6759 + }, + { + "epoch": 0.6500937635235852, + "grad_norm": 1.2623161756288794, + "learning_rate": 1.3898334684855647e-06, + "loss": 0.086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6760 + }, + { + "epoch": 0.6501899312400827, + "grad_norm": 1.6202119444387324, + "learning_rate": 1.389150039343856e-06, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6761 + }, + { + "epoch": 0.6502860989565803, + "grad_norm": 2.07231049945913, + "learning_rate": 1.3884667136253716e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6762 + }, + { + "epoch": 0.6503822666730779, + "grad_norm": 1.6922340178246627, + "learning_rate": 1.3877834913937327e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6763 + }, + { + "epoch": 0.6504784343895754, + "grad_norm": 1.6390159362517254, + "learning_rate": 1.3871003727125481e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6764 + }, + { + "epoch": 0.650574602106073, + "grad_norm": 1.8639027894158884, + "learning_rate": 1.3864173576454194e-06, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6765 + }, + { + "epoch": 0.6506707698225705, + "grad_norm": 1.7224950392628373, + "learning_rate": 1.3857344462559369e-06, + "loss": 0.0913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6766 + }, + { + "epoch": 0.6507669375390681, + "grad_norm": 2.181717159316905, + "learning_rate": 1.38505163860768e-06, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6767 + }, + { + "epoch": 0.6508631052555657, + "grad_norm": 1.9273065710224766, + "learning_rate": 1.3843689347642217e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6768 + }, + { + "epoch": 0.6509592729720632, + "grad_norm": 1.928664716351155, + "learning_rate": 1.3836863347891227e-06, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6769 + }, + { + "epoch": 0.6510554406885608, + "grad_norm": 3.4103684435720014, + "learning_rate": 1.3830038387459354e-06, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6770 + }, + { + "epoch": 0.6511516084050584, + "grad_norm": 3.1880650177825465, + "learning_rate": 1.382321446698202e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6771 + }, + { + "epoch": 0.651247776121556, + "grad_norm": 1.9032722331860443, + "learning_rate": 1.3816391587094564e-06, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6772 + }, + { + "epoch": 0.6513439438380536, + "grad_norm": 1.4498266797194475, + "learning_rate": 1.380956974843219e-06, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6773 + }, + { + "epoch": 0.6514401115545512, + "grad_norm": 2.4584698146590624, + "learning_rate": 1.3802748951630041e-06, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6774 + }, + { + "epoch": 0.6515362792710487, + "grad_norm": 1.6258442758488936, + "learning_rate": 1.3795929197323154e-06, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6775 + }, + { + "epoch": 0.6516324469875463, + "grad_norm": 1.7150586591078039, + "learning_rate": 1.378911048614647e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6776 + }, + { + "epoch": 0.6517286147040439, + "grad_norm": 2.9138759927692823, + "learning_rate": 1.3782292818734833e-06, + "loss": 0.1506, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6777 + }, + { + "epoch": 0.6518247824205414, + "grad_norm": 3.4942002200833295, + "learning_rate": 1.3775476195722972e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6778 + }, + { + "epoch": 0.651920950137039, + "grad_norm": 2.300519747208115, + "learning_rate": 1.3768660617745544e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6779 + }, + { + "epoch": 0.6520171178535366, + "grad_norm": 1.3914627588838258, + "learning_rate": 1.376184608543709e-06, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6780 + }, + { + "epoch": 0.6521132855700341, + "grad_norm": 3.9600323788207747, + "learning_rate": 1.3755032599432075e-06, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6781 + }, + { + "epoch": 0.6522094532865317, + "grad_norm": 2.262011210598816, + "learning_rate": 1.3748220160364844e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6782 + }, + { + "epoch": 0.6523056210030292, + "grad_norm": 2.065231565066262, + "learning_rate": 1.374140876886967e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6783 + }, + { + "epoch": 0.6524017887195268, + "grad_norm": 1.5009671717716033, + "learning_rate": 1.3734598425580686e-06, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6784 + }, + { + "epoch": 0.6524979564360244, + "grad_norm": 2.47948921742541, + "learning_rate": 1.372778913113197e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6785 + }, + { + "epoch": 0.652594124152522, + "grad_norm": 1.7510769964870692, + "learning_rate": 1.372098088615749e-06, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6786 + }, + { + "epoch": 0.6526902918690196, + "grad_norm": 1.6451559796517752, + "learning_rate": 1.3714173691291098e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6787 + }, + { + "epoch": 0.6527864595855172, + "grad_norm": 3.558143541589186, + "learning_rate": 1.370736754716657e-06, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6788 + }, + { + "epoch": 0.6528826273020147, + "grad_norm": 2.9703113780468695, + "learning_rate": 1.3700562454417588e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6789 + }, + { + "epoch": 0.6529787950185123, + "grad_norm": 1.8883946672243528, + "learning_rate": 1.3693758413677708e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6790 + }, + { + "epoch": 0.6530749627350099, + "grad_norm": 1.6389457476844347, + "learning_rate": 1.3686955425580408e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6791 + }, + { + "epoch": 0.6531711304515074, + "grad_norm": 2.0884390366005556, + "learning_rate": 1.3680153490759074e-06, + "loss": 0.1588, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6792 + }, + { + "epoch": 0.653267298168005, + "grad_norm": 3.5794621184334368, + "learning_rate": 1.3673352609846978e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6793 + }, + { + "epoch": 0.6533634658845026, + "grad_norm": 2.2834697934489925, + "learning_rate": 1.3666552783477313e-06, + "loss": 0.1408, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6794 + }, + { + "epoch": 0.6534596336010001, + "grad_norm": 3.957350691326502, + "learning_rate": 1.3659754012283145e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6795 + }, + { + "epoch": 0.6535558013174977, + "grad_norm": 2.616233140090723, + "learning_rate": 1.3652956296897463e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6796 + }, + { + "epoch": 0.6536519690339953, + "grad_norm": 2.048387663032591, + "learning_rate": 1.3646159637953155e-06, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6797 + }, + { + "epoch": 0.6537481367504928, + "grad_norm": 1.6490746009361883, + "learning_rate": 1.3639364036083012e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6798 + }, + { + "epoch": 0.6538443044669904, + "grad_norm": 1.7356874156413564, + "learning_rate": 1.363256949191972e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6799 + }, + { + "epoch": 0.653940472183488, + "grad_norm": 2.2228018927303923, + "learning_rate": 1.3625776006095882e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6800 + }, + { + "epoch": 0.6540366398999856, + "grad_norm": 1.711903869249077, + "learning_rate": 1.3618983579243968e-06, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6801 + }, + { + "epoch": 0.6541328076164832, + "grad_norm": 1.5074807951214495, + "learning_rate": 1.3612192211996386e-06, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6802 + }, + { + "epoch": 0.6542289753329807, + "grad_norm": 1.611581664730429, + "learning_rate": 1.3605401904985427e-06, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6803 + }, + { + "epoch": 0.6543251430494783, + "grad_norm": 1.9835576420517824, + "learning_rate": 1.3598612658843292e-06, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6804 + }, + { + "epoch": 0.6544213107659759, + "grad_norm": 1.3917740179959026, + "learning_rate": 1.3591824474202084e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6805 + }, + { + "epoch": 0.6545174784824734, + "grad_norm": 2.0136437535335956, + "learning_rate": 1.3585037351693783e-06, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6806 + }, + { + "epoch": 0.654613646198971, + "grad_norm": 1.8648271078687495, + "learning_rate": 1.3578251291950301e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6807 + }, + { + "epoch": 0.6547098139154686, + "grad_norm": 1.7970780555990122, + "learning_rate": 1.3571466295603438e-06, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6808 + }, + { + "epoch": 0.6548059816319661, + "grad_norm": 1.5111866725139274, + "learning_rate": 1.3564682363284898e-06, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6809 + }, + { + "epoch": 0.6549021493484637, + "grad_norm": 1.3888393311999823, + "learning_rate": 1.3557899495626282e-06, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6810 + }, + { + "epoch": 0.6549983170649613, + "grad_norm": 1.7198978505307438, + "learning_rate": 1.35511176932591e-06, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6811 + }, + { + "epoch": 0.6550944847814588, + "grad_norm": 1.7571989178123286, + "learning_rate": 1.354433695681474e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6812 + }, + { + "epoch": 0.6551906524979564, + "grad_norm": 2.2106090009803823, + "learning_rate": 1.3537557286924524e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6813 + }, + { + "epoch": 0.6552868202144541, + "grad_norm": 1.5288488639298299, + "learning_rate": 1.3530778684219647e-06, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6814 + }, + { + "epoch": 0.6553829879309516, + "grad_norm": 2.032034588619795, + "learning_rate": 1.3524001149331224e-06, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6815 + }, + { + "epoch": 0.6554791556474492, + "grad_norm": 2.2173010686384917, + "learning_rate": 1.3517224682890268e-06, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6816 + }, + { + "epoch": 0.6555753233639467, + "grad_norm": 2.500833796774715, + "learning_rate": 1.3510449285527671e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6817 + }, + { + "epoch": 0.6556714910804443, + "grad_norm": 2.531766887766077, + "learning_rate": 1.3503674957874247e-06, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6818 + }, + { + "epoch": 0.6557676587969419, + "grad_norm": 2.251870835419117, + "learning_rate": 1.3496901700560705e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6819 + }, + { + "epoch": 0.6558638265134394, + "grad_norm": 1.7036946403964182, + "learning_rate": 1.3490129514217665e-06, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6820 + }, + { + "epoch": 0.655959994229937, + "grad_norm": 1.800542466633058, + "learning_rate": 1.3483358399475617e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6821 + }, + { + "epoch": 0.6560561619464346, + "grad_norm": 1.503327280431659, + "learning_rate": 1.3476588356964984e-06, + "loss": 0.0983, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6822 + }, + { + "epoch": 0.6561523296629321, + "grad_norm": 1.7851371614768996, + "learning_rate": 1.346981938731608e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6823 + }, + { + "epoch": 0.6562484973794297, + "grad_norm": 1.8558366565784008, + "learning_rate": 1.3463051491159095e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6824 + }, + { + "epoch": 0.6563446650959273, + "grad_norm": 1.4086611209614734, + "learning_rate": 1.3456284669124159e-06, + "loss": 0.0957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6825 + }, + { + "epoch": 0.6564408328124248, + "grad_norm": 3.0103950500179266, + "learning_rate": 1.3449518921841268e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6826 + }, + { + "epoch": 0.6565370005289224, + "grad_norm": 2.1660089265269464, + "learning_rate": 1.3442754249940338e-06, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6827 + }, + { + "epoch": 0.6566331682454201, + "grad_norm": 2.5655002018721804, + "learning_rate": 1.343599065405119e-06, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6828 + }, + { + "epoch": 0.6567293359619176, + "grad_norm": 1.9654310653138067, + "learning_rate": 1.3429228134803515e-06, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6829 + }, + { + "epoch": 0.6568255036784152, + "grad_norm": 1.7746642690984729, + "learning_rate": 1.342246669282693e-06, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6830 + }, + { + "epoch": 0.6569216713949128, + "grad_norm": 2.0945723566170726, + "learning_rate": 1.341570632875094e-06, + "loss": 0.0943, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6831 + }, + { + "epoch": 0.6570178391114103, + "grad_norm": 1.9224863609604588, + "learning_rate": 1.340894704320496e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6832 + }, + { + "epoch": 0.6571140068279079, + "grad_norm": 2.177555609804203, + "learning_rate": 1.340218883681831e-06, + "loss": 0.1621, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6833 + }, + { + "epoch": 0.6572101745444054, + "grad_norm": 1.6490136317301047, + "learning_rate": 1.339543171022017e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6834 + }, + { + "epoch": 0.657306342260903, + "grad_norm": 1.8272038982875654, + "learning_rate": 1.3388675664039658e-06, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6835 + }, + { + "epoch": 0.6574025099774006, + "grad_norm": 2.4860369478893714, + "learning_rate": 1.3381920698905788e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6836 + }, + { + "epoch": 0.6574986776938981, + "grad_norm": 1.9212600778080806, + "learning_rate": 1.3375166815447463e-06, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6837 + }, + { + "epoch": 0.6575948454103957, + "grad_norm": 2.663877324617684, + "learning_rate": 1.3368414014293485e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6838 + }, + { + "epoch": 0.6576910131268933, + "grad_norm": 2.7487194510738893, + "learning_rate": 1.3361662296072572e-06, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6839 + }, + { + "epoch": 0.6577871808433908, + "grad_norm": 2.435369259594466, + "learning_rate": 1.3354911661413305e-06, + "loss": 0.0912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6840 + }, + { + "epoch": 0.6578833485598884, + "grad_norm": 1.6166215055301838, + "learning_rate": 1.3348162110944202e-06, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6841 + }, + { + "epoch": 0.6579795162763861, + "grad_norm": 2.4620012381191296, + "learning_rate": 1.3341413645293661e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6842 + }, + { + "epoch": 0.6580756839928836, + "grad_norm": 1.7221811818546302, + "learning_rate": 1.3334666265089985e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6843 + }, + { + "epoch": 0.6581718517093812, + "grad_norm": 2.9232813185526534, + "learning_rate": 1.3327919970961379e-06, + "loss": 0.1647, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6844 + }, + { + "epoch": 0.6582680194258788, + "grad_norm": 1.6521785202526997, + "learning_rate": 1.3321174763535926e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6845 + }, + { + "epoch": 0.6583641871423763, + "grad_norm": 1.7549271972739133, + "learning_rate": 1.3314430643441635e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6846 + }, + { + "epoch": 0.6584603548588739, + "grad_norm": 1.874900854404889, + "learning_rate": 1.3307687611306397e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6847 + }, + { + "epoch": 0.6585565225753715, + "grad_norm": 2.0157676556687196, + "learning_rate": 1.3300945667758015e-06, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6848 + }, + { + "epoch": 0.658652690291869, + "grad_norm": 1.6215258248541295, + "learning_rate": 1.329420481342417e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6849 + }, + { + "epoch": 0.6587488580083666, + "grad_norm": 2.239115322768753, + "learning_rate": 1.328746504893248e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6850 + }, + { + "epoch": 0.6588450257248641, + "grad_norm": 1.6951352283998846, + "learning_rate": 1.3280726374910404e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6851 + }, + { + "epoch": 0.6589411934413617, + "grad_norm": 2.6672418167695793, + "learning_rate": 1.3273988791985348e-06, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6852 + }, + { + "epoch": 0.6590373611578593, + "grad_norm": 1.7789144944060107, + "learning_rate": 1.3267252300784606e-06, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6853 + }, + { + "epoch": 0.6591335288743568, + "grad_norm": 1.4474378334694675, + "learning_rate": 1.3260516901935347e-06, + "loss": 0.0943, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6854 + }, + { + "epoch": 0.6592296965908544, + "grad_norm": 1.577321858723162, + "learning_rate": 1.3253782596064663e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6855 + }, + { + "epoch": 0.6593258643073521, + "grad_norm": 1.8680671539382787, + "learning_rate": 1.3247049383799545e-06, + "loss": 0.1569, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6856 + }, + { + "epoch": 0.6594220320238496, + "grad_norm": 1.6231093704608837, + "learning_rate": 1.324031726576686e-06, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6857 + }, + { + "epoch": 0.6595181997403472, + "grad_norm": 1.2819217746814786, + "learning_rate": 1.3233586242593388e-06, + "loss": 0.0977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6858 + }, + { + "epoch": 0.6596143674568448, + "grad_norm": 1.7981689608947595, + "learning_rate": 1.3226856314905817e-06, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6859 + }, + { + "epoch": 0.6597105351733423, + "grad_norm": 1.9161894307498601, + "learning_rate": 1.3220127483330714e-06, + "loss": 0.0873, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6860 + }, + { + "epoch": 0.6598067028898399, + "grad_norm": 2.0741672980616235, + "learning_rate": 1.321339974849456e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6861 + }, + { + "epoch": 0.6599028706063375, + "grad_norm": 1.6214624746723443, + "learning_rate": 1.3206673111023716e-06, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6862 + }, + { + "epoch": 0.659999038322835, + "grad_norm": 1.6854552866509742, + "learning_rate": 1.3199947571544452e-06, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6863 + }, + { + "epoch": 0.6600952060393326, + "grad_norm": 1.3824748016259207, + "learning_rate": 1.3193223130682937e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6864 + }, + { + "epoch": 0.6601913737558301, + "grad_norm": 1.6344198538246284, + "learning_rate": 1.3186499789065232e-06, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6865 + }, + { + "epoch": 0.6602875414723277, + "grad_norm": 2.576653809492517, + "learning_rate": 1.3179777547317307e-06, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6866 + }, + { + "epoch": 0.6603837091888253, + "grad_norm": 1.5383424746100167, + "learning_rate": 1.3173056406065027e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6867 + }, + { + "epoch": 0.6604798769053228, + "grad_norm": 2.051665370725031, + "learning_rate": 1.3166336365934123e-06, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6868 + }, + { + "epoch": 0.6605760446218204, + "grad_norm": 1.6978810395601094, + "learning_rate": 1.315961742755027e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6869 + }, + { + "epoch": 0.6606722123383181, + "grad_norm": 2.6571720935156415, + "learning_rate": 1.3152899591539015e-06, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6870 + }, + { + "epoch": 0.6607683800548156, + "grad_norm": 2.114666359980105, + "learning_rate": 1.3146182858525804e-06, + "loss": 0.1643, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6871 + }, + { + "epoch": 0.6608645477713132, + "grad_norm": 1.4059621173141228, + "learning_rate": 1.3139467229135999e-06, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6872 + }, + { + "epoch": 0.6609607154878108, + "grad_norm": 2.2282638798084036, + "learning_rate": 1.313275270399482e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6873 + }, + { + "epoch": 0.6610568832043083, + "grad_norm": 1.4388344205698584, + "learning_rate": 1.3126039283727421e-06, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6874 + }, + { + "epoch": 0.6611530509208059, + "grad_norm": 2.090388498055773, + "learning_rate": 1.3119326968958838e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6875 + }, + { + "epoch": 0.6612492186373035, + "grad_norm": 1.7096435901552194, + "learning_rate": 1.3112615760314004e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6876 + }, + { + "epoch": 0.661345386353801, + "grad_norm": 1.83684243145062, + "learning_rate": 1.3105905658417756e-06, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6877 + }, + { + "epoch": 0.6614415540702986, + "grad_norm": 1.6247904490911873, + "learning_rate": 1.309919666389483e-06, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6878 + }, + { + "epoch": 0.6615377217867962, + "grad_norm": 1.6815997392886397, + "learning_rate": 1.309248877736984e-06, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6879 + }, + { + "epoch": 0.6616338895032937, + "grad_norm": 3.081589597511126, + "learning_rate": 1.3085781999467303e-06, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6880 + }, + { + "epoch": 0.6617300572197913, + "grad_norm": 1.641318257407346, + "learning_rate": 1.3079076330811653e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6881 + }, + { + "epoch": 0.6618262249362888, + "grad_norm": 2.502090524181716, + "learning_rate": 1.3072371772027198e-06, + "loss": 0.1694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6882 + }, + { + "epoch": 0.6619223926527864, + "grad_norm": 1.8121106010875778, + "learning_rate": 1.3065668323738167e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6883 + }, + { + "epoch": 0.6620185603692841, + "grad_norm": 2.7961703009404575, + "learning_rate": 1.3058965986568647e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6884 + }, + { + "epoch": 0.6621147280857816, + "grad_norm": 1.7411776635382397, + "learning_rate": 1.3052264761142653e-06, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6885 + }, + { + "epoch": 0.6622108958022792, + "grad_norm": 2.58320883066329, + "learning_rate": 1.3045564648084102e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6886 + }, + { + "epoch": 0.6623070635187768, + "grad_norm": 1.5602155409029808, + "learning_rate": 1.3038865648016768e-06, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6887 + }, + { + "epoch": 0.6624032312352743, + "grad_norm": 2.1440925941906097, + "learning_rate": 1.3032167761564357e-06, + "loss": 0.1559, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6888 + }, + { + "epoch": 0.6624993989517719, + "grad_norm": 2.3367535021213315, + "learning_rate": 1.3025470989350475e-06, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6889 + }, + { + "epoch": 0.6625955666682695, + "grad_norm": 1.9304008778350967, + "learning_rate": 1.301877533199859e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6890 + }, + { + "epoch": 0.662691734384767, + "grad_norm": 1.4854906151919989, + "learning_rate": 1.3012080790132092e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6891 + }, + { + "epoch": 0.6627879021012646, + "grad_norm": 3.3064238253820264, + "learning_rate": 1.300538736437426e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6892 + }, + { + "epoch": 0.6628840698177622, + "grad_norm": 1.6437234602151412, + "learning_rate": 1.299869505534828e-06, + "loss": 0.0936, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6893 + }, + { + "epoch": 0.6629802375342597, + "grad_norm": 1.6708692354004842, + "learning_rate": 1.2992003863677217e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6894 + }, + { + "epoch": 0.6630764052507573, + "grad_norm": 1.6855775596705715, + "learning_rate": 1.298531378998405e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6895 + }, + { + "epoch": 0.6631725729672548, + "grad_norm": 1.8213411153835533, + "learning_rate": 1.2978624834891629e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6896 + }, + { + "epoch": 0.6632687406837524, + "grad_norm": 1.9757629136501909, + "learning_rate": 1.2971936999022715e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6897 + }, + { + "epoch": 0.6633649084002501, + "grad_norm": 1.6079102734034645, + "learning_rate": 1.2965250282999974e-06, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6898 + }, + { + "epoch": 0.6634610761167477, + "grad_norm": 1.7319123421998504, + "learning_rate": 1.2958564687445951e-06, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6899 + }, + { + "epoch": 0.6635572438332452, + "grad_norm": 1.565903818515133, + "learning_rate": 1.2951880212983106e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6900 + }, + { + "epoch": 0.6636534115497428, + "grad_norm": 1.4427033266929115, + "learning_rate": 1.2945196860233763e-06, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6901 + }, + { + "epoch": 0.6637495792662403, + "grad_norm": 2.9841692517653753, + "learning_rate": 1.293851462982017e-06, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6902 + }, + { + "epoch": 0.6638457469827379, + "grad_norm": 2.1158043505530353, + "learning_rate": 1.2931833522364462e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6903 + }, + { + "epoch": 0.6639419146992355, + "grad_norm": 1.5233440485068575, + "learning_rate": 1.2925153538488666e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6904 + }, + { + "epoch": 0.664038082415733, + "grad_norm": 1.7880346120115467, + "learning_rate": 1.291847467881471e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6905 + }, + { + "epoch": 0.6641342501322306, + "grad_norm": 2.0575419825378503, + "learning_rate": 1.2911796943964422e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6906 + }, + { + "epoch": 0.6642304178487282, + "grad_norm": 1.9172363789821392, + "learning_rate": 1.29051203345595e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6907 + }, + { + "epoch": 0.6643265855652257, + "grad_norm": 1.662519123080471, + "learning_rate": 1.2898444851221564e-06, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6908 + }, + { + "epoch": 0.6644227532817233, + "grad_norm": 1.5055286851124634, + "learning_rate": 1.2891770494572122e-06, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6909 + }, + { + "epoch": 0.6645189209982209, + "grad_norm": 1.3985850548550862, + "learning_rate": 1.288509726523257e-06, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6910 + }, + { + "epoch": 0.6646150887147184, + "grad_norm": 1.3384283979860758, + "learning_rate": 1.2878425163824212e-06, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6911 + }, + { + "epoch": 0.6647112564312161, + "grad_norm": 1.5191108888239107, + "learning_rate": 1.2871754190968244e-06, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6912 + }, + { + "epoch": 0.6648074241477137, + "grad_norm": 1.5614273261505247, + "learning_rate": 1.2865084347285728e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6913 + }, + { + "epoch": 0.6649035918642112, + "grad_norm": 1.5179531728358384, + "learning_rate": 1.2858415633397664e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6914 + }, + { + "epoch": 0.6649997595807088, + "grad_norm": 1.7186779818290432, + "learning_rate": 1.2851748049924923e-06, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6915 + }, + { + "epoch": 0.6650959272972063, + "grad_norm": 1.891497469086515, + "learning_rate": 1.2845081597488288e-06, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6916 + }, + { + "epoch": 0.6651920950137039, + "grad_norm": 1.5559692940730478, + "learning_rate": 1.2838416276708402e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6917 + }, + { + "epoch": 0.6652882627302015, + "grad_norm": 1.343862144082373, + "learning_rate": 1.2831752088205834e-06, + "loss": 0.084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6918 + }, + { + "epoch": 0.665384430446699, + "grad_norm": 1.927326919415326, + "learning_rate": 1.282508903260104e-06, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6919 + }, + { + "epoch": 0.6654805981631966, + "grad_norm": 2.273828464375612, + "learning_rate": 1.2818427110514382e-06, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6920 + }, + { + "epoch": 0.6655767658796942, + "grad_norm": 1.7644590440057557, + "learning_rate": 1.2811766322566076e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6921 + }, + { + "epoch": 0.6656729335961917, + "grad_norm": 1.6336524287906213, + "learning_rate": 1.2805106669376282e-06, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6922 + }, + { + "epoch": 0.6657691013126893, + "grad_norm": 2.8859701933525383, + "learning_rate": 1.279844815156503e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6923 + }, + { + "epoch": 0.6658652690291869, + "grad_norm": 1.5190815521651193, + "learning_rate": 1.2791790769752232e-06, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6924 + }, + { + "epoch": 0.6659614367456844, + "grad_norm": 1.8370917718103534, + "learning_rate": 1.278513452455772e-06, + "loss": 0.0993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6925 + }, + { + "epoch": 0.6660576044621821, + "grad_norm": 1.7286444954632707, + "learning_rate": 1.2778479416601203e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6926 + }, + { + "epoch": 0.6661537721786797, + "grad_norm": 1.286588108949383, + "learning_rate": 1.2771825446502297e-06, + "loss": 0.091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6927 + }, + { + "epoch": 0.6662499398951772, + "grad_norm": 1.7756658685015807, + "learning_rate": 1.276517261488051e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6928 + }, + { + "epoch": 0.6663461076116748, + "grad_norm": 2.1128488155001173, + "learning_rate": 1.2758520922355228e-06, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6929 + }, + { + "epoch": 0.6664422753281724, + "grad_norm": 1.8690338907991122, + "learning_rate": 1.2751870369545738e-06, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6930 + }, + { + "epoch": 0.6665384430446699, + "grad_norm": 1.7912716226828478, + "learning_rate": 1.2745220957071239e-06, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6931 + }, + { + "epoch": 0.6666346107611675, + "grad_norm": 2.4021261609526796, + "learning_rate": 1.27385726855508e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6932 + }, + { + "epoch": 0.666730778477665, + "grad_norm": 2.946956834640489, + "learning_rate": 1.2731925555603398e-06, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6933 + }, + { + "epoch": 0.6668269461941626, + "grad_norm": 2.634134849671879, + "learning_rate": 1.272527956784791e-06, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6934 + }, + { + "epoch": 0.6669231139106602, + "grad_norm": 1.8530025950580438, + "learning_rate": 1.2718634722903073e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6935 + }, + { + "epoch": 0.6670192816271577, + "grad_norm": 1.463806319491486, + "learning_rate": 1.271199102138755e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6936 + }, + { + "epoch": 0.6671154493436553, + "grad_norm": 3.1078712863469424, + "learning_rate": 1.2705348463919892e-06, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6937 + }, + { + "epoch": 0.6672116170601529, + "grad_norm": 2.571561197374492, + "learning_rate": 1.2698707051118537e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6938 + }, + { + "epoch": 0.6673077847766504, + "grad_norm": 1.3316162509968446, + "learning_rate": 1.269206678360182e-06, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6939 + }, + { + "epoch": 0.6674039524931481, + "grad_norm": 1.8607727464257886, + "learning_rate": 1.2685427661987975e-06, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6940 + }, + { + "epoch": 0.6675001202096457, + "grad_norm": 2.1418138719689273, + "learning_rate": 1.267878968689511e-06, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6941 + }, + { + "epoch": 0.6675962879261432, + "grad_norm": 1.8922947008947733, + "learning_rate": 1.2672152858941244e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6942 + }, + { + "epoch": 0.6676924556426408, + "grad_norm": 1.3876996157019432, + "learning_rate": 1.2665517178744285e-06, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6943 + }, + { + "epoch": 0.6677886233591384, + "grad_norm": 1.8402640353481297, + "learning_rate": 1.2658882646922036e-06, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6944 + }, + { + "epoch": 0.6678847910756359, + "grad_norm": 2.058965346264291, + "learning_rate": 1.2652249264092192e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6945 + }, + { + "epoch": 0.6679809587921335, + "grad_norm": 1.6395517401295137, + "learning_rate": 1.2645617030872328e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6946 + }, + { + "epoch": 0.668077126508631, + "grad_norm": 1.8761695498407656, + "learning_rate": 1.2638985947879933e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6947 + }, + { + "epoch": 0.6681732942251286, + "grad_norm": 1.731416046431772, + "learning_rate": 1.2632356015732373e-06, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6948 + }, + { + "epoch": 0.6682694619416262, + "grad_norm": 2.911079047748316, + "learning_rate": 1.262572723504692e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6949 + }, + { + "epoch": 0.6683656296581237, + "grad_norm": 1.6008265386466716, + "learning_rate": 1.2619099606440735e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6950 + }, + { + "epoch": 0.6684617973746213, + "grad_norm": 2.277818362955969, + "learning_rate": 1.2612473130530856e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6951 + }, + { + "epoch": 0.6685579650911189, + "grad_norm": 2.2251328419330982, + "learning_rate": 1.2605847807934229e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6952 + }, + { + "epoch": 0.6686541328076164, + "grad_norm": 1.6568402857564117, + "learning_rate": 1.2599223639267704e-06, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6953 + }, + { + "epoch": 0.6687503005241141, + "grad_norm": 1.662998936078724, + "learning_rate": 1.2592600625147994e-06, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6954 + }, + { + "epoch": 0.6688464682406117, + "grad_norm": 3.773596152072716, + "learning_rate": 1.2585978766191726e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6955 + }, + { + "epoch": 0.6689426359571092, + "grad_norm": 1.593598558366215, + "learning_rate": 1.2579358063015418e-06, + "loss": 0.0899, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6956 + }, + { + "epoch": 0.6690388036736068, + "grad_norm": 1.9899428089190228, + "learning_rate": 1.2572738516235462e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6957 + }, + { + "epoch": 0.6691349713901044, + "grad_norm": 1.6454095653573049, + "learning_rate": 1.2566120126468167e-06, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6958 + }, + { + "epoch": 0.6692311391066019, + "grad_norm": 1.7501472916493115, + "learning_rate": 1.2559502894329722e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6959 + }, + { + "epoch": 0.6693273068230995, + "grad_norm": 1.614489705958973, + "learning_rate": 1.2552886820436208e-06, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6960 + }, + { + "epoch": 0.669423474539597, + "grad_norm": 2.202460213771169, + "learning_rate": 1.25462719054036e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6961 + }, + { + "epoch": 0.6695196422560946, + "grad_norm": 2.0978671268927536, + "learning_rate": 1.2539658149847781e-06, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6962 + }, + { + "epoch": 0.6696158099725922, + "grad_norm": 2.1892291173891496, + "learning_rate": 1.2533045554384482e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6963 + }, + { + "epoch": 0.6697119776890897, + "grad_norm": 2.2484523720149707, + "learning_rate": 1.2526434119629368e-06, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6964 + }, + { + "epoch": 0.6698081454055873, + "grad_norm": 2.493182922738268, + "learning_rate": 1.2519823846197982e-06, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6965 + }, + { + "epoch": 0.6699043131220849, + "grad_norm": 1.6584767789910533, + "learning_rate": 1.2513214734705758e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6966 + }, + { + "epoch": 0.6700004808385824, + "grad_norm": 2.5135213090994504, + "learning_rate": 1.2506606785768021e-06, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6967 + }, + { + "epoch": 0.6700966485550801, + "grad_norm": 1.7039316179352473, + "learning_rate": 1.2500000000000007e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6968 + }, + { + "epoch": 0.6701928162715777, + "grad_norm": 2.8032583600999224, + "learning_rate": 1.2493394378016797e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6969 + }, + { + "epoch": 0.6702889839880752, + "grad_norm": 1.7622575600012058, + "learning_rate": 1.2486789920433406e-06, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6970 + }, + { + "epoch": 0.6703851517045728, + "grad_norm": 3.266181333474943, + "learning_rate": 1.248018662786473e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6971 + }, + { + "epoch": 0.6704813194210704, + "grad_norm": 2.1749349112230876, + "learning_rate": 1.2473584500925548e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6972 + }, + { + "epoch": 0.6705774871375679, + "grad_norm": 1.5271603930730189, + "learning_rate": 1.2466983540230554e-06, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6973 + }, + { + "epoch": 0.6706736548540655, + "grad_norm": 1.9841969012593346, + "learning_rate": 1.2460383746394289e-06, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6974 + }, + { + "epoch": 0.6707698225705631, + "grad_norm": 2.2990414050360473, + "learning_rate": 1.2453785120031227e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6975 + }, + { + "epoch": 0.6708659902870606, + "grad_norm": 1.7500367082295163, + "learning_rate": 1.2447187661755717e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6976 + }, + { + "epoch": 0.6709621580035582, + "grad_norm": 1.7612577575278903, + "learning_rate": 1.2440591372182001e-06, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6977 + }, + { + "epoch": 0.6710583257200557, + "grad_norm": 1.9122934164626395, + "learning_rate": 1.243399625192421e-06, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6978 + }, + { + "epoch": 0.6711544934365533, + "grad_norm": 2.6064713247262987, + "learning_rate": 1.242740230159638e-06, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6979 + }, + { + "epoch": 0.6712506611530509, + "grad_norm": 1.6947507112187485, + "learning_rate": 1.2420809521812406e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6980 + }, + { + "epoch": 0.6713468288695484, + "grad_norm": 2.475878922435755, + "learning_rate": 1.2414217913186102e-06, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6981 + }, + { + "epoch": 0.6714429965860461, + "grad_norm": 1.4595135014230456, + "learning_rate": 1.2407627476331168e-06, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6982 + }, + { + "epoch": 0.6715391643025437, + "grad_norm": 1.6461119425220803, + "learning_rate": 1.2401038211861202e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6983 + }, + { + "epoch": 0.6716353320190412, + "grad_norm": 2.7480099299047795, + "learning_rate": 1.2394450120389658e-06, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6984 + }, + { + "epoch": 0.6717314997355388, + "grad_norm": 3.226457404021211, + "learning_rate": 1.2387863202529923e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6985 + }, + { + "epoch": 0.6718276674520364, + "grad_norm": 1.7380895228659594, + "learning_rate": 1.2381277458895263e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6986 + }, + { + "epoch": 0.6719238351685339, + "grad_norm": 1.800130695979802, + "learning_rate": 1.2374692890098812e-06, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6987 + }, + { + "epoch": 0.6720200028850315, + "grad_norm": 1.5618919680988477, + "learning_rate": 1.2368109496753616e-06, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6988 + }, + { + "epoch": 0.6721161706015291, + "grad_norm": 1.9243655744149333, + "learning_rate": 1.2361527279472614e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6989 + }, + { + "epoch": 0.6722123383180266, + "grad_norm": 1.781829797378526, + "learning_rate": 1.2354946238868631e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6990 + }, + { + "epoch": 0.6723085060345242, + "grad_norm": 1.5519859977804566, + "learning_rate": 1.2348366375554368e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6991 + }, + { + "epoch": 0.6724046737510218, + "grad_norm": 1.6570200011290575, + "learning_rate": 1.2341787690142436e-06, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6992 + }, + { + "epoch": 0.6725008414675193, + "grad_norm": 1.853503733521308, + "learning_rate": 1.2335210183245328e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6993 + }, + { + "epoch": 0.6725970091840169, + "grad_norm": 1.9141963322926638, + "learning_rate": 1.232863385547543e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6994 + }, + { + "epoch": 0.6726931769005144, + "grad_norm": 2.549617099295251, + "learning_rate": 1.2322058707445012e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6995 + }, + { + "epoch": 0.6727893446170121, + "grad_norm": 2.251156144078593, + "learning_rate": 1.2315484739766253e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6996 + }, + { + "epoch": 0.6728855123335097, + "grad_norm": 1.8836981310477294, + "learning_rate": 1.2308911953051187e-06, + "loss": 0.151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6997 + }, + { + "epoch": 0.6729816800500072, + "grad_norm": 1.5624891217101822, + "learning_rate": 1.2302340347911767e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6998 + }, + { + "epoch": 0.6730778477665048, + "grad_norm": 2.953919151585124, + "learning_rate": 1.229576992495983e-06, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 6999 + }, + { + "epoch": 0.6731740154830024, + "grad_norm": 1.5074391143994332, + "learning_rate": 1.2289200684807098e-06, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7000 + }, + { + "epoch": 0.6732701831994999, + "grad_norm": 1.7139602121659103, + "learning_rate": 1.2282632628065197e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7001 + }, + { + "epoch": 0.6733663509159975, + "grad_norm": 1.5826426776817553, + "learning_rate": 1.2276065755345612e-06, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7002 + }, + { + "epoch": 0.6734625186324951, + "grad_norm": 1.8303875649758605, + "learning_rate": 1.2269500067259748e-06, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7003 + }, + { + "epoch": 0.6735586863489926, + "grad_norm": 2.8578919487388212, + "learning_rate": 1.2262935564418888e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7004 + }, + { + "epoch": 0.6736548540654902, + "grad_norm": 1.5009115377148667, + "learning_rate": 1.22563722474342e-06, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7005 + }, + { + "epoch": 0.6737510217819878, + "grad_norm": 2.1533030951051604, + "learning_rate": 1.2249810116916758e-06, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7006 + }, + { + "epoch": 0.6738471894984853, + "grad_norm": 1.6699935077387067, + "learning_rate": 1.2243249173477514e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7007 + }, + { + "epoch": 0.6739433572149829, + "grad_norm": 3.89658733087064, + "learning_rate": 1.2236689417727297e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7008 + }, + { + "epoch": 0.6740395249314804, + "grad_norm": 2.1830946247410314, + "learning_rate": 1.2230130850276845e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7009 + }, + { + "epoch": 0.6741356926479781, + "grad_norm": 1.6975126469562942, + "learning_rate": 1.2223573471736783e-06, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7010 + }, + { + "epoch": 0.6742318603644757, + "grad_norm": 1.5692289903360328, + "learning_rate": 1.2217017282717616e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7011 + }, + { + "epoch": 0.6743280280809733, + "grad_norm": 1.7406547755013653, + "learning_rate": 1.2210462283829754e-06, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7012 + }, + { + "epoch": 0.6744241957974708, + "grad_norm": 2.3437325888931317, + "learning_rate": 1.2203908475683472e-06, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7013 + }, + { + "epoch": 0.6745203635139684, + "grad_norm": 1.828328704990515, + "learning_rate": 1.2197355858888952e-06, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7014 + }, + { + "epoch": 0.6746165312304659, + "grad_norm": 2.3644441944993453, + "learning_rate": 1.219080443405626e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7015 + }, + { + "epoch": 0.6747126989469635, + "grad_norm": 1.5874129621967672, + "learning_rate": 1.2184254201795364e-06, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7016 + }, + { + "epoch": 0.6748088666634611, + "grad_norm": 1.6687376510626295, + "learning_rate": 1.2177705162716092e-06, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 42.78, + "memory/max_mem_allocated(gib)": 42.78, + "step": 7017 + }, + { + "epoch": 0.6749050343799586, + "grad_norm": 1.6613249594366932, + "learning_rate": 1.2171157317428184e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7018 + }, + { + "epoch": 0.6750012020964562, + "grad_norm": 1.6220456983092684, + "learning_rate": 1.2164610666541262e-06, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7019 + }, + { + "epoch": 0.6750973698129538, + "grad_norm": 1.9937847614858957, + "learning_rate": 1.2158065210664848e-06, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7020 + }, + { + "epoch": 0.6751935375294513, + "grad_norm": 2.7473941775159805, + "learning_rate": 1.2151520950408325e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7021 + }, + { + "epoch": 0.6752897052459489, + "grad_norm": 1.8092058461756322, + "learning_rate": 1.2144977886380987e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7022 + }, + { + "epoch": 0.6753858729624465, + "grad_norm": 1.505109942416298, + "learning_rate": 1.2138436019192019e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7023 + }, + { + "epoch": 0.6754820406789441, + "grad_norm": 2.1067576585422265, + "learning_rate": 1.213189534945049e-06, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7024 + }, + { + "epoch": 0.6755782083954417, + "grad_norm": 1.6772541694905883, + "learning_rate": 1.2125355877765336e-06, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7025 + }, + { + "epoch": 0.6756743761119393, + "grad_norm": 1.7392907132498792, + "learning_rate": 1.2118817604745412e-06, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7026 + }, + { + "epoch": 0.6757705438284368, + "grad_norm": 1.6916096556647144, + "learning_rate": 1.2112280530999448e-06, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7027 + }, + { + "epoch": 0.6758667115449344, + "grad_norm": 1.7634393166857536, + "learning_rate": 1.2105744657136065e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7028 + }, + { + "epoch": 0.675962879261432, + "grad_norm": 1.7414260008967744, + "learning_rate": 1.2099209983763776e-06, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7029 + }, + { + "epoch": 0.6760590469779295, + "grad_norm": 1.5177210209735426, + "learning_rate": 1.2092676511490967e-06, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7030 + }, + { + "epoch": 0.6761552146944271, + "grad_norm": 1.824517354355413, + "learning_rate": 1.2086144240925925e-06, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7031 + }, + { + "epoch": 0.6762513824109246, + "grad_norm": 2.0778860719591834, + "learning_rate": 1.2079613172676824e-06, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7032 + }, + { + "epoch": 0.6763475501274222, + "grad_norm": 1.6478739745961952, + "learning_rate": 1.2073083307351727e-06, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7033 + }, + { + "epoch": 0.6764437178439198, + "grad_norm": 2.8077518360198392, + "learning_rate": 1.2066554645558578e-06, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7034 + }, + { + "epoch": 0.6765398855604173, + "grad_norm": 1.9578398061325444, + "learning_rate": 1.206002718790523e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7035 + }, + { + "epoch": 0.6766360532769149, + "grad_norm": 3.012769594712718, + "learning_rate": 1.2053500934999382e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7036 + }, + { + "epoch": 0.6767322209934125, + "grad_norm": 2.053075891878837, + "learning_rate": 1.204697588744866e-06, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7037 + }, + { + "epoch": 0.6768283887099101, + "grad_norm": 1.591293006071952, + "learning_rate": 1.2040452045860559e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7038 + }, + { + "epoch": 0.6769245564264077, + "grad_norm": 2.6169570319900695, + "learning_rate": 1.203392941084247e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7039 + }, + { + "epoch": 0.6770207241429053, + "grad_norm": 1.6638344404956642, + "learning_rate": 1.2027407983001683e-06, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7040 + }, + { + "epoch": 0.6771168918594028, + "grad_norm": 2.781388458905471, + "learning_rate": 1.2020887762945333e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7041 + }, + { + "epoch": 0.6772130595759004, + "grad_norm": 1.75332818881102, + "learning_rate": 1.2014368751280486e-06, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7042 + }, + { + "epoch": 0.677309227292398, + "grad_norm": 1.6802598785036065, + "learning_rate": 1.200785094861408e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7043 + }, + { + "epoch": 0.6774053950088955, + "grad_norm": 1.6308745426878708, + "learning_rate": 1.2001334355552938e-06, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7044 + }, + { + "epoch": 0.6775015627253931, + "grad_norm": 1.5245373454177464, + "learning_rate": 1.1994818972703774e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7045 + }, + { + "epoch": 0.6775977304418906, + "grad_norm": 1.9074561509670367, + "learning_rate": 1.1988304800673197e-06, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7046 + }, + { + "epoch": 0.6776938981583882, + "grad_norm": 1.8307554551716676, + "learning_rate": 1.1981791840067677e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7047 + }, + { + "epoch": 0.6777900658748858, + "grad_norm": 2.122668717076095, + "learning_rate": 1.19752800914936e-06, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7048 + }, + { + "epoch": 0.6778862335913833, + "grad_norm": 1.84535535250402, + "learning_rate": 1.1968769555557223e-06, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7049 + }, + { + "epoch": 0.6779824013078809, + "grad_norm": 1.9957392022476932, + "learning_rate": 1.196226023286471e-06, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7050 + }, + { + "epoch": 0.6780785690243785, + "grad_norm": 1.4889129279115432, + "learning_rate": 1.1955752124022071e-06, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7051 + }, + { + "epoch": 0.6781747367408761, + "grad_norm": 1.955420233785818, + "learning_rate": 1.1949245229635245e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7052 + }, + { + "epoch": 0.6782709044573737, + "grad_norm": 1.7273851904029822, + "learning_rate": 1.194273955031005e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7053 + }, + { + "epoch": 0.6783670721738713, + "grad_norm": 1.4220909648804738, + "learning_rate": 1.1936235086652163e-06, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7054 + }, + { + "epoch": 0.6784632398903688, + "grad_norm": 1.5991515551632585, + "learning_rate": 1.1929731839267179e-06, + "loss": 0.0912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7055 + }, + { + "epoch": 0.6785594076068664, + "grad_norm": 1.889299923485189, + "learning_rate": 1.1923229808760565e-06, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7056 + }, + { + "epoch": 0.678655575323364, + "grad_norm": 2.2905807440262747, + "learning_rate": 1.1916728995737687e-06, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7057 + }, + { + "epoch": 0.6787517430398615, + "grad_norm": 1.7010074194791207, + "learning_rate": 1.1910229400803774e-06, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7058 + }, + { + "epoch": 0.6788479107563591, + "grad_norm": 1.836328609925756, + "learning_rate": 1.1903731024563967e-06, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7059 + }, + { + "epoch": 0.6789440784728566, + "grad_norm": 1.3705841570676458, + "learning_rate": 1.189723386762328e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7060 + }, + { + "epoch": 0.6790402461893542, + "grad_norm": 1.6637998455174638, + "learning_rate": 1.1890737930586614e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7061 + }, + { + "epoch": 0.6791364139058518, + "grad_norm": 1.861852900026274, + "learning_rate": 1.1884243214058761e-06, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7062 + }, + { + "epoch": 0.6792325816223493, + "grad_norm": 3.0480549021367573, + "learning_rate": 1.1877749718644408e-06, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7063 + }, + { + "epoch": 0.6793287493388469, + "grad_norm": 1.5787784927013344, + "learning_rate": 1.1871257444948098e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7064 + }, + { + "epoch": 0.6794249170553445, + "grad_norm": 1.9589327478768692, + "learning_rate": 1.1864766393574287e-06, + "loss": 0.1481, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7065 + }, + { + "epoch": 0.6795210847718421, + "grad_norm": 2.459643999891069, + "learning_rate": 1.1858276565127314e-06, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7066 + }, + { + "epoch": 0.6796172524883397, + "grad_norm": 1.4108759139459568, + "learning_rate": 1.1851787960211397e-06, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7067 + }, + { + "epoch": 0.6797134202048373, + "grad_norm": 1.7885501953659766, + "learning_rate": 1.1845300579430654e-06, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7068 + }, + { + "epoch": 0.6798095879213348, + "grad_norm": 1.7082146768209394, + "learning_rate": 1.1838814423389057e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7069 + }, + { + "epoch": 0.6799057556378324, + "grad_norm": 1.7942640244649795, + "learning_rate": 1.18323294926905e-06, + "loss": 0.0885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7070 + }, + { + "epoch": 0.68000192335433, + "grad_norm": 2.1145288687269583, + "learning_rate": 1.182584578793874e-06, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7071 + }, + { + "epoch": 0.6800980910708275, + "grad_norm": 2.5595542435240324, + "learning_rate": 1.181936330973744e-06, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7072 + }, + { + "epoch": 0.6801942587873251, + "grad_norm": 1.8078769151995109, + "learning_rate": 1.1812882058690123e-06, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7073 + }, + { + "epoch": 0.6802904265038227, + "grad_norm": 2.352790211452149, + "learning_rate": 1.1806402035400231e-06, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7074 + }, + { + "epoch": 0.6803865942203202, + "grad_norm": 2.035577922832257, + "learning_rate": 1.1799923240471046e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7075 + }, + { + "epoch": 0.6804827619368178, + "grad_norm": 2.421338946387048, + "learning_rate": 1.1793445674505778e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7076 + }, + { + "epoch": 0.6805789296533153, + "grad_norm": 1.8748486428165254, + "learning_rate": 1.1786969338107501e-06, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7077 + }, + { + "epoch": 0.6806750973698129, + "grad_norm": 2.486663521191269, + "learning_rate": 1.1780494231879183e-06, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7078 + }, + { + "epoch": 0.6807712650863105, + "grad_norm": 1.652712465430662, + "learning_rate": 1.1774020356423684e-06, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7079 + }, + { + "epoch": 0.6808674328028081, + "grad_norm": 1.584210871810826, + "learning_rate": 1.1767547712343722e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7080 + }, + { + "epoch": 0.6809636005193057, + "grad_norm": 2.1120714900410507, + "learning_rate": 1.176107630024192e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7081 + }, + { + "epoch": 0.6810597682358033, + "grad_norm": 1.6558919787269708, + "learning_rate": 1.1754606120720796e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7082 + }, + { + "epoch": 0.6811559359523008, + "grad_norm": 3.8904530954956043, + "learning_rate": 1.1748137174382742e-06, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7083 + }, + { + "epoch": 0.6812521036687984, + "grad_norm": 2.790806323049722, + "learning_rate": 1.1741669461830022e-06, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7084 + }, + { + "epoch": 0.681348271385296, + "grad_norm": 2.940168415258008, + "learning_rate": 1.1735202983664803e-06, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7085 + }, + { + "epoch": 0.6814444391017935, + "grad_norm": 2.757876808609232, + "learning_rate": 1.1728737740489146e-06, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7086 + }, + { + "epoch": 0.6815406068182911, + "grad_norm": 2.191546649313582, + "learning_rate": 1.172227373290496e-06, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7087 + }, + { + "epoch": 0.6816367745347887, + "grad_norm": 2.64945882480835, + "learning_rate": 1.1715810961514073e-06, + "loss": 0.099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7088 + }, + { + "epoch": 0.6817329422512862, + "grad_norm": 1.4977828789712526, + "learning_rate": 1.170934942691819e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7089 + }, + { + "epoch": 0.6818291099677838, + "grad_norm": 1.6368378660859106, + "learning_rate": 1.1702889129718895e-06, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7090 + }, + { + "epoch": 0.6819252776842814, + "grad_norm": 1.779580298110717, + "learning_rate": 1.1696430070517667e-06, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7091 + }, + { + "epoch": 0.6820214454007789, + "grad_norm": 2.1607379555072312, + "learning_rate": 1.1689972249915847e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7092 + }, + { + "epoch": 0.6821176131172765, + "grad_norm": 4.363691002336462, + "learning_rate": 1.1683515668514684e-06, + "loss": 0.1529, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7093 + }, + { + "epoch": 0.6822137808337742, + "grad_norm": 3.0267709091496733, + "learning_rate": 1.1677060326915305e-06, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7094 + }, + { + "epoch": 0.6823099485502717, + "grad_norm": 2.7259123166781314, + "learning_rate": 1.167060622571872e-06, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7095 + }, + { + "epoch": 0.6824061162667693, + "grad_norm": 1.9905636578973975, + "learning_rate": 1.166415336552583e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7096 + }, + { + "epoch": 0.6825022839832668, + "grad_norm": 1.877265682839297, + "learning_rate": 1.16577017469374e-06, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7097 + }, + { + "epoch": 0.6825984516997644, + "grad_norm": 1.6448242022522064, + "learning_rate": 1.16512513705541e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7098 + }, + { + "epoch": 0.682694619416262, + "grad_norm": 1.8520205612262557, + "learning_rate": 1.1644802236976483e-06, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7099 + }, + { + "epoch": 0.6827907871327595, + "grad_norm": 1.6651891361901516, + "learning_rate": 1.1638354346804974e-06, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7100 + }, + { + "epoch": 0.6828869548492571, + "grad_norm": 3.4623024230754544, + "learning_rate": 1.1631907700639894e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7101 + }, + { + "epoch": 0.6829831225657547, + "grad_norm": 2.011765329475174, + "learning_rate": 1.162546229908145e-06, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7102 + }, + { + "epoch": 0.6830792902822522, + "grad_norm": 3.4233651865433923, + "learning_rate": 1.1619018142729713e-06, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7103 + }, + { + "epoch": 0.6831754579987498, + "grad_norm": 1.7809064153683745, + "learning_rate": 1.1612575232184657e-06, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7104 + }, + { + "epoch": 0.6832716257152474, + "grad_norm": 1.6426280334143342, + "learning_rate": 1.1606133568046134e-06, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7105 + }, + { + "epoch": 0.6833677934317449, + "grad_norm": 1.4758939108703724, + "learning_rate": 1.1599693150913888e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7106 + }, + { + "epoch": 0.6834639611482425, + "grad_norm": 2.491436362068095, + "learning_rate": 1.1593253981387542e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7107 + }, + { + "epoch": 0.6835601288647402, + "grad_norm": 1.7100874569372237, + "learning_rate": 1.1586816060066585e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7108 + }, + { + "epoch": 0.6836562965812377, + "grad_norm": 1.6132053626605116, + "learning_rate": 1.1580379387550413e-06, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7109 + }, + { + "epoch": 0.6837524642977353, + "grad_norm": 1.6851232592268934, + "learning_rate": 1.15739439644383e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7110 + }, + { + "epoch": 0.6838486320142328, + "grad_norm": 2.9147563994670325, + "learning_rate": 1.1567509791329402e-06, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7111 + }, + { + "epoch": 0.6839447997307304, + "grad_norm": 3.163644051664732, + "learning_rate": 1.1561076868822756e-06, + "loss": 0.1507, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7112 + }, + { + "epoch": 0.684040967447228, + "grad_norm": 2.2928093467540083, + "learning_rate": 1.1554645197517298e-06, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7113 + }, + { + "epoch": 0.6841371351637255, + "grad_norm": 2.0817661478149248, + "learning_rate": 1.154821477801181e-06, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7114 + }, + { + "epoch": 0.6842333028802231, + "grad_norm": 1.315629791078607, + "learning_rate": 1.1541785610904995e-06, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7115 + }, + { + "epoch": 0.6843294705967207, + "grad_norm": 1.5239792089320994, + "learning_rate": 1.1535357696795437e-06, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7116 + }, + { + "epoch": 0.6844256383132182, + "grad_norm": 2.1510922944269293, + "learning_rate": 1.1528931036281576e-06, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7117 + }, + { + "epoch": 0.6845218060297158, + "grad_norm": 1.6057555380237378, + "learning_rate": 1.1522505629961755e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7118 + }, + { + "epoch": 0.6846179737462134, + "grad_norm": 1.7323700139541012, + "learning_rate": 1.151608147843421e-06, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7119 + }, + { + "epoch": 0.6847141414627109, + "grad_norm": 2.606207470537724, + "learning_rate": 1.1509658582297025e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7120 + }, + { + "epoch": 0.6848103091792085, + "grad_norm": 1.5631365000293975, + "learning_rate": 1.1503236942148207e-06, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7121 + }, + { + "epoch": 0.6849064768957062, + "grad_norm": 1.8905257608890813, + "learning_rate": 1.1496816558585622e-06, + "loss": 0.1602, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7122 + }, + { + "epoch": 0.6850026446122037, + "grad_norm": 1.5943227281113455, + "learning_rate": 1.1490397432207027e-06, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7123 + }, + { + "epoch": 0.6850988123287013, + "grad_norm": 2.8481322660178168, + "learning_rate": 1.148397956361007e-06, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7124 + }, + { + "epoch": 0.6851949800451989, + "grad_norm": 1.7340303964210648, + "learning_rate": 1.1477562953392255e-06, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7125 + }, + { + "epoch": 0.6852911477616964, + "grad_norm": 1.5938539923156145, + "learning_rate": 1.1471147602150991e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7126 + }, + { + "epoch": 0.685387315478194, + "grad_norm": 2.2406458224274273, + "learning_rate": 1.1464733510483575e-06, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7127 + }, + { + "epoch": 0.6854834831946915, + "grad_norm": 1.6894379700337676, + "learning_rate": 1.1458320678987166e-06, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7128 + }, + { + "epoch": 0.6855796509111891, + "grad_norm": 1.7217404532279534, + "learning_rate": 1.1451909108258822e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7129 + }, + { + "epoch": 0.6856758186276867, + "grad_norm": 2.054474835468591, + "learning_rate": 1.1445498798895485e-06, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7130 + }, + { + "epoch": 0.6857719863441842, + "grad_norm": 2.2684129025584627, + "learning_rate": 1.1439089751493959e-06, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7131 + }, + { + "epoch": 0.6858681540606818, + "grad_norm": 2.151598177776365, + "learning_rate": 1.1432681966650947e-06, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7132 + }, + { + "epoch": 0.6859643217771794, + "grad_norm": 2.9793143391556827, + "learning_rate": 1.1426275444963033e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7133 + }, + { + "epoch": 0.6860604894936769, + "grad_norm": 1.864872904231807, + "learning_rate": 1.1419870187026688e-06, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7134 + }, + { + "epoch": 0.6861566572101745, + "grad_norm": 1.9840732458773587, + "learning_rate": 1.1413466193438261e-06, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7135 + }, + { + "epoch": 0.6862528249266722, + "grad_norm": 2.132559446843805, + "learning_rate": 1.1407063464793966e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7136 + }, + { + "epoch": 0.6863489926431697, + "grad_norm": 2.0493826273867732, + "learning_rate": 1.1400662001689927e-06, + "loss": 0.1379, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7137 + }, + { + "epoch": 0.6864451603596673, + "grad_norm": 2.3292321668103257, + "learning_rate": 1.1394261804722132e-06, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7138 + }, + { + "epoch": 0.6865413280761649, + "grad_norm": 1.7262784935356077, + "learning_rate": 1.1387862874486464e-06, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7139 + }, + { + "epoch": 0.6866374957926624, + "grad_norm": 1.892823585507582, + "learning_rate": 1.1381465211578673e-06, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7140 + }, + { + "epoch": 0.68673366350916, + "grad_norm": 2.3393574855888244, + "learning_rate": 1.1375068816594418e-06, + "loss": 0.0876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7141 + }, + { + "epoch": 0.6868298312256575, + "grad_norm": 1.6036630359238608, + "learning_rate": 1.1368673690129198e-06, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7142 + }, + { + "epoch": 0.6869259989421551, + "grad_norm": 1.5761685559649112, + "learning_rate": 1.1362279832778423e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7143 + }, + { + "epoch": 0.6870221666586527, + "grad_norm": 1.698125741271953, + "learning_rate": 1.1355887245137383e-06, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7144 + }, + { + "epoch": 0.6871183343751502, + "grad_norm": 2.208006043452628, + "learning_rate": 1.1349495927801246e-06, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7145 + }, + { + "epoch": 0.6872145020916478, + "grad_norm": 1.6761536652345106, + "learning_rate": 1.134310588136506e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7146 + }, + { + "epoch": 0.6873106698081454, + "grad_norm": 2.292572843776995, + "learning_rate": 1.1336717106423768e-06, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7147 + }, + { + "epoch": 0.6874068375246429, + "grad_norm": 1.5935494376527202, + "learning_rate": 1.133032960357216e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7148 + }, + { + "epoch": 0.6875030052411405, + "grad_norm": 1.775173122700183, + "learning_rate": 1.1323943373404942e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7149 + }, + { + "epoch": 0.6875991729576382, + "grad_norm": 1.645954894956722, + "learning_rate": 1.1317558416516696e-06, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7150 + }, + { + "epoch": 0.6876953406741357, + "grad_norm": 2.5190649009236523, + "learning_rate": 1.1311174733501867e-06, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7151 + }, + { + "epoch": 0.6877915083906333, + "grad_norm": 1.9117539899517244, + "learning_rate": 1.1304792324954796e-06, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7152 + }, + { + "epoch": 0.6878876761071309, + "grad_norm": 2.6189236630373873, + "learning_rate": 1.1298411191469718e-06, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7153 + }, + { + "epoch": 0.6879838438236284, + "grad_norm": 1.938237750185941, + "learning_rate": 1.1292031333640717e-06, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7154 + }, + { + "epoch": 0.688080011540126, + "grad_norm": 1.711029276756783, + "learning_rate": 1.1285652752061774e-06, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7155 + }, + { + "epoch": 0.6881761792566236, + "grad_norm": 1.5216860331512245, + "learning_rate": 1.1279275447326763e-06, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7156 + }, + { + "epoch": 0.6882723469731211, + "grad_norm": 1.6943950307957911, + "learning_rate": 1.1272899420029425e-06, + "loss": 0.1376, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7157 + }, + { + "epoch": 0.6883685146896187, + "grad_norm": 1.6663694125468023, + "learning_rate": 1.12665246707634e-06, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7158 + }, + { + "epoch": 0.6884646824061162, + "grad_norm": 1.6555679159804968, + "learning_rate": 1.1260151200122167e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7159 + }, + { + "epoch": 0.6885608501226138, + "grad_norm": 3.457907871950081, + "learning_rate": 1.1253779008699131e-06, + "loss": 0.1528, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7160 + }, + { + "epoch": 0.6886570178391114, + "grad_norm": 1.941054971765797, + "learning_rate": 1.1247408097087556e-06, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7161 + }, + { + "epoch": 0.6887531855556089, + "grad_norm": 1.4046291197672596, + "learning_rate": 1.1241038465880598e-06, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7162 + }, + { + "epoch": 0.6888493532721065, + "grad_norm": 1.9471288648434721, + "learning_rate": 1.1234670115671286e-06, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7163 + }, + { + "epoch": 0.6889455209886042, + "grad_norm": 1.9350191580736766, + "learning_rate": 1.1228303047052524e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7164 + }, + { + "epoch": 0.6890416887051017, + "grad_norm": 1.8741780368416991, + "learning_rate": 1.1221937260617107e-06, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7165 + }, + { + "epoch": 0.6891378564215993, + "grad_norm": 1.6479973148068536, + "learning_rate": 1.121557275695771e-06, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7166 + }, + { + "epoch": 0.6892340241380969, + "grad_norm": 2.1662639635516796, + "learning_rate": 1.1209209536666882e-06, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7167 + }, + { + "epoch": 0.6893301918545944, + "grad_norm": 2.1518239317576415, + "learning_rate": 1.120284760033706e-06, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7168 + }, + { + "epoch": 0.689426359571092, + "grad_norm": 1.5337963105801302, + "learning_rate": 1.1196486948560567e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7169 + }, + { + "epoch": 0.6895225272875896, + "grad_norm": 2.9315587686168816, + "learning_rate": 1.119012758192958e-06, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7170 + }, + { + "epoch": 0.6896186950040871, + "grad_norm": 1.892888366971099, + "learning_rate": 1.118376950103618e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7171 + }, + { + "epoch": 0.6897148627205847, + "grad_norm": 1.4596057465855528, + "learning_rate": 1.1177412706472322e-06, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7172 + }, + { + "epoch": 0.6898110304370823, + "grad_norm": 1.5463731990879648, + "learning_rate": 1.1171057198829844e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7173 + }, + { + "epoch": 0.6899071981535798, + "grad_norm": 1.5218854543987599, + "learning_rate": 1.116470297870046e-06, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7174 + }, + { + "epoch": 0.6900033658700774, + "grad_norm": 1.6213960025331433, + "learning_rate": 1.1158350046675773e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7175 + }, + { + "epoch": 0.6900995335865749, + "grad_norm": 1.5408656222276746, + "learning_rate": 1.1151998403347245e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7176 + }, + { + "epoch": 0.6901957013030725, + "grad_norm": 1.6753071059688287, + "learning_rate": 1.1145648049306238e-06, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7177 + }, + { + "epoch": 0.6902918690195702, + "grad_norm": 3.2081152845517638, + "learning_rate": 1.1139298985143985e-06, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7178 + }, + { + "epoch": 0.6903880367360677, + "grad_norm": 1.578612363175678, + "learning_rate": 1.1132951211451607e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7179 + }, + { + "epoch": 0.6904842044525653, + "grad_norm": 1.5644478321094926, + "learning_rate": 1.1126604728820102e-06, + "loss": 0.0985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7180 + }, + { + "epoch": 0.6905803721690629, + "grad_norm": 2.0230630947377164, + "learning_rate": 1.1120259537840333e-06, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7181 + }, + { + "epoch": 0.6906765398855604, + "grad_norm": 1.5851645249565207, + "learning_rate": 1.1113915639103062e-06, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7182 + }, + { + "epoch": 0.690772707602058, + "grad_norm": 1.3727465737613835, + "learning_rate": 1.1107573033198935e-06, + "loss": 0.0905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7183 + }, + { + "epoch": 0.6908688753185556, + "grad_norm": 1.4718421560937682, + "learning_rate": 1.1101231720718442e-06, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7184 + }, + { + "epoch": 0.6909650430350531, + "grad_norm": 3.101762503653935, + "learning_rate": 1.1094891702251992e-06, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7185 + }, + { + "epoch": 0.6910612107515507, + "grad_norm": 1.6462585163705787, + "learning_rate": 1.1088552978389864e-06, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7186 + }, + { + "epoch": 0.6911573784680483, + "grad_norm": 2.402701756061776, + "learning_rate": 1.1082215549722192e-06, + "loss": 0.1531, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7187 + }, + { + "epoch": 0.6912535461845458, + "grad_norm": 1.7168700008528939, + "learning_rate": 1.1075879416839022e-06, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7188 + }, + { + "epoch": 0.6913497139010434, + "grad_norm": 1.838957783142317, + "learning_rate": 1.106954458033026e-06, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7189 + }, + { + "epoch": 0.691445881617541, + "grad_norm": 1.752272581628915, + "learning_rate": 1.1063211040785701e-06, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7190 + }, + { + "epoch": 0.6915420493340385, + "grad_norm": 1.6881463732808455, + "learning_rate": 1.1056878798795023e-06, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7191 + }, + { + "epoch": 0.6916382170505362, + "grad_norm": 1.6934734987180051, + "learning_rate": 1.1050547854947757e-06, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7192 + }, + { + "epoch": 0.6917343847670337, + "grad_norm": 1.7673111947886058, + "learning_rate": 1.104421820983334e-06, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7193 + }, + { + "epoch": 0.6918305524835313, + "grad_norm": 1.9016510050227833, + "learning_rate": 1.1037889864041082e-06, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7194 + }, + { + "epoch": 0.6919267202000289, + "grad_norm": 1.6782987173952695, + "learning_rate": 1.103156281816017e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7195 + }, + { + "epoch": 0.6920228879165264, + "grad_norm": 3.1233716731837586, + "learning_rate": 1.1025237072779663e-06, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7196 + }, + { + "epoch": 0.692119055633024, + "grad_norm": 2.530933908642601, + "learning_rate": 1.101891262848852e-06, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7197 + }, + { + "epoch": 0.6922152233495216, + "grad_norm": 1.4146333609644544, + "learning_rate": 1.1012589485875547e-06, + "loss": 0.0831, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7198 + }, + { + "epoch": 0.6923113910660191, + "grad_norm": 1.528989117989982, + "learning_rate": 1.100626764552945e-06, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7199 + }, + { + "epoch": 0.6924075587825167, + "grad_norm": 1.510223389483619, + "learning_rate": 1.0999947108038816e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7200 + }, + { + "epoch": 0.6925037264990143, + "grad_norm": 1.6191636126419946, + "learning_rate": 1.0993627873992102e-06, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7201 + }, + { + "epoch": 0.6925998942155118, + "grad_norm": 1.4089356114216707, + "learning_rate": 1.0987309943977647e-06, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7202 + }, + { + "epoch": 0.6926960619320094, + "grad_norm": 1.6827489961465545, + "learning_rate": 1.0980993318583677e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7203 + }, + { + "epoch": 0.692792229648507, + "grad_norm": 3.08118960807214, + "learning_rate": 1.0974677998398267e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7204 + }, + { + "epoch": 0.6928883973650045, + "grad_norm": 1.6391046974688475, + "learning_rate": 1.0968363984009402e-06, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7205 + }, + { + "epoch": 0.6929845650815022, + "grad_norm": 1.5260903225105633, + "learning_rate": 1.0962051276004935e-06, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7206 + }, + { + "epoch": 0.6930807327979998, + "grad_norm": 1.584596423427407, + "learning_rate": 1.0955739874972593e-06, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7207 + }, + { + "epoch": 0.6931769005144973, + "grad_norm": 4.1523773147425205, + "learning_rate": 1.0949429781500002e-06, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7208 + }, + { + "epoch": 0.6932730682309949, + "grad_norm": 2.1044010318135418, + "learning_rate": 1.094312099617462e-06, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7209 + }, + { + "epoch": 0.6933692359474924, + "grad_norm": 1.9510588919223573, + "learning_rate": 1.093681351958383e-06, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7210 + }, + { + "epoch": 0.69346540366399, + "grad_norm": 2.385966557459153, + "learning_rate": 1.0930507352314872e-06, + "loss": 0.092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7211 + }, + { + "epoch": 0.6935615713804876, + "grad_norm": 1.9002863384175364, + "learning_rate": 1.0924202494954872e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7212 + }, + { + "epoch": 0.6936577390969851, + "grad_norm": 2.337362649239392, + "learning_rate": 1.0917898948090832e-06, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7213 + }, + { + "epoch": 0.6937539068134827, + "grad_norm": 2.196363947998904, + "learning_rate": 1.091159671230962e-06, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7214 + }, + { + "epoch": 0.6938500745299803, + "grad_norm": 1.9354468060402872, + "learning_rate": 1.0905295788197993e-06, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7215 + }, + { + "epoch": 0.6939462422464778, + "grad_norm": 1.8931437326840543, + "learning_rate": 1.0898996176342595e-06, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7216 + }, + { + "epoch": 0.6940424099629754, + "grad_norm": 1.795901787882049, + "learning_rate": 1.0892697877329921e-06, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7217 + }, + { + "epoch": 0.694138577679473, + "grad_norm": 2.201401588396205, + "learning_rate": 1.0886400891746373e-06, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7218 + }, + { + "epoch": 0.6942347453959705, + "grad_norm": 1.6383835575862966, + "learning_rate": 1.0880105220178223e-06, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7219 + }, + { + "epoch": 0.6943309131124682, + "grad_norm": 1.3974427838113528, + "learning_rate": 1.0873810863211595e-06, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7220 + }, + { + "epoch": 0.6944270808289658, + "grad_norm": 1.8425232229706918, + "learning_rate": 1.0867517821432525e-06, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7221 + }, + { + "epoch": 0.6945232485454633, + "grad_norm": 1.7530618908741615, + "learning_rate": 1.0861226095426909e-06, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7222 + }, + { + "epoch": 0.6946194162619609, + "grad_norm": 1.4593789413837124, + "learning_rate": 1.0854935685780528e-06, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7223 + }, + { + "epoch": 0.6947155839784585, + "grad_norm": 1.5588476419916129, + "learning_rate": 1.0848646593079028e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7224 + }, + { + "epoch": 0.694811751694956, + "grad_norm": 2.1356355798626074, + "learning_rate": 1.0842358817907963e-06, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7225 + }, + { + "epoch": 0.6949079194114536, + "grad_norm": 2.2664373793165793, + "learning_rate": 1.0836072360852717e-06, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7226 + }, + { + "epoch": 0.6950040871279511, + "grad_norm": 2.069564985287881, + "learning_rate": 1.0829787222498586e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7227 + }, + { + "epoch": 0.6951002548444487, + "grad_norm": 1.5233394723511717, + "learning_rate": 1.0823503403430736e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7228 + }, + { + "epoch": 0.6951964225609463, + "grad_norm": 1.636135390982245, + "learning_rate": 1.0817220904234202e-06, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7229 + }, + { + "epoch": 0.6952925902774438, + "grad_norm": 2.156177210241705, + "learning_rate": 1.081093972549391e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7230 + }, + { + "epoch": 0.6953887579939414, + "grad_norm": 1.4757738255329673, + "learning_rate": 1.0804659867794661e-06, + "loss": 0.0977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7231 + }, + { + "epoch": 0.695484925710439, + "grad_norm": 1.9440797416900872, + "learning_rate": 1.079838133172111e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7232 + }, + { + "epoch": 0.6955810934269365, + "grad_norm": 1.4898466437487423, + "learning_rate": 1.0792104117857812e-06, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7233 + }, + { + "epoch": 0.6956772611434342, + "grad_norm": 2.0775933671381046, + "learning_rate": 1.0785828226789197e-06, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7234 + }, + { + "epoch": 0.6957734288599318, + "grad_norm": 1.4715842303691566, + "learning_rate": 1.0779553659099566e-06, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7235 + }, + { + "epoch": 0.6958695965764293, + "grad_norm": 1.5846401311106257, + "learning_rate": 1.0773280415373107e-06, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7236 + }, + { + "epoch": 0.6959657642929269, + "grad_norm": 2.53519628462064, + "learning_rate": 1.0767008496193863e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7237 + }, + { + "epoch": 0.6960619320094245, + "grad_norm": 1.4872316489981143, + "learning_rate": 1.076073790214577e-06, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7238 + }, + { + "epoch": 0.696158099725922, + "grad_norm": 2.807687974699844, + "learning_rate": 1.0754468633812639e-06, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7239 + }, + { + "epoch": 0.6962542674424196, + "grad_norm": 2.1542832280655673, + "learning_rate": 1.074820069177816e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7240 + }, + { + "epoch": 0.6963504351589171, + "grad_norm": 2.03419065295962, + "learning_rate": 1.0741934076625895e-06, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7241 + }, + { + "epoch": 0.6964466028754147, + "grad_norm": 1.517977100440103, + "learning_rate": 1.0735668788939288e-06, + "loss": 0.0859, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7242 + }, + { + "epoch": 0.6965427705919123, + "grad_norm": 1.892139564824706, + "learning_rate": 1.0729404829301644e-06, + "loss": 0.1574, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7243 + }, + { + "epoch": 0.6966389383084098, + "grad_norm": 1.8653803378734315, + "learning_rate": 1.0723142198296155e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7244 + }, + { + "epoch": 0.6967351060249074, + "grad_norm": 1.6367665545603438, + "learning_rate": 1.0716880896505898e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7245 + }, + { + "epoch": 0.696831273741405, + "grad_norm": 1.8863799506545758, + "learning_rate": 1.0710620924513812e-06, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7246 + }, + { + "epoch": 0.6969274414579025, + "grad_norm": 1.7208890588865136, + "learning_rate": 1.070436228290273e-06, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7247 + }, + { + "epoch": 0.6970236091744002, + "grad_norm": 1.9015016575023551, + "learning_rate": 1.069810497225533e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7248 + }, + { + "epoch": 0.6971197768908978, + "grad_norm": 2.0293935433453814, + "learning_rate": 1.0691848993154192e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7249 + }, + { + "epoch": 0.6972159446073953, + "grad_norm": 3.437090267248964, + "learning_rate": 1.0685594346181776e-06, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7250 + }, + { + "epoch": 0.6973121123238929, + "grad_norm": 1.8505637091021319, + "learning_rate": 1.067934103192039e-06, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7251 + }, + { + "epoch": 0.6974082800403905, + "grad_norm": 1.6139476456622426, + "learning_rate": 1.0673089050952242e-06, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7252 + }, + { + "epoch": 0.697504447756888, + "grad_norm": 1.6821354431821591, + "learning_rate": 1.066683840385942e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7253 + }, + { + "epoch": 0.6976006154733856, + "grad_norm": 1.5918380132919063, + "learning_rate": 1.0660589091223854e-06, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7254 + }, + { + "epoch": 0.6976967831898832, + "grad_norm": 1.7555126841113937, + "learning_rate": 1.0654341113627389e-06, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7255 + }, + { + "epoch": 0.6977929509063807, + "grad_norm": 1.6671877083547701, + "learning_rate": 1.0648094471651723e-06, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7256 + }, + { + "epoch": 0.6978891186228783, + "grad_norm": 2.069633063396951, + "learning_rate": 1.0641849165878437e-06, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7257 + }, + { + "epoch": 0.6979852863393758, + "grad_norm": 1.0972869173907123, + "learning_rate": 1.0635605196888988e-06, + "loss": 0.0725, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7258 + }, + { + "epoch": 0.6980814540558734, + "grad_norm": 1.7576451252074243, + "learning_rate": 1.0629362565264715e-06, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7259 + }, + { + "epoch": 0.698177621772371, + "grad_norm": 1.6531186134013915, + "learning_rate": 1.0623121271586806e-06, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7260 + }, + { + "epoch": 0.6982737894888685, + "grad_norm": 1.6680287175819615, + "learning_rate": 1.0616881316436353e-06, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7261 + }, + { + "epoch": 0.6983699572053662, + "grad_norm": 1.7299351918193044, + "learning_rate": 1.0610642700394312e-06, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7262 + }, + { + "epoch": 0.6984661249218638, + "grad_norm": 1.6576380434909843, + "learning_rate": 1.0604405424041513e-06, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7263 + }, + { + "epoch": 0.6985622926383613, + "grad_norm": 1.7699182793212245, + "learning_rate": 1.0598169487958678e-06, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7264 + }, + { + "epoch": 0.6986584603548589, + "grad_norm": 1.4281657818225961, + "learning_rate": 1.0591934892726372e-06, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7265 + }, + { + "epoch": 0.6987546280713565, + "grad_norm": 2.360410189809088, + "learning_rate": 1.0585701638925055e-06, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7266 + }, + { + "epoch": 0.698850795787854, + "grad_norm": 1.5360626389186025, + "learning_rate": 1.0579469727135069e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7267 + }, + { + "epoch": 0.6989469635043516, + "grad_norm": 1.6017758550748986, + "learning_rate": 1.0573239157936619e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7268 + }, + { + "epoch": 0.6990431312208492, + "grad_norm": 1.65313538197768, + "learning_rate": 1.0567009931909785e-06, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7269 + }, + { + "epoch": 0.6991392989373467, + "grad_norm": 1.5512711120351028, + "learning_rate": 1.0560782049634539e-06, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7270 + }, + { + "epoch": 0.6992354666538443, + "grad_norm": 3.329335424845729, + "learning_rate": 1.0554555511690692e-06, + "loss": 0.1473, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7271 + }, + { + "epoch": 0.6993316343703418, + "grad_norm": 2.355707779787302, + "learning_rate": 1.0548330318657968e-06, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7272 + }, + { + "epoch": 0.6994278020868394, + "grad_norm": 1.513139406742527, + "learning_rate": 1.054210647111594e-06, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7273 + }, + { + "epoch": 0.699523969803337, + "grad_norm": 1.4134380301019671, + "learning_rate": 1.0535883969644074e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7274 + }, + { + "epoch": 0.6996201375198345, + "grad_norm": 4.211045664729162, + "learning_rate": 1.0529662814821706e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7275 + }, + { + "epoch": 0.6997163052363322, + "grad_norm": 1.4233801033349405, + "learning_rate": 1.052344300722803e-06, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7276 + }, + { + "epoch": 0.6998124729528298, + "grad_norm": 1.811747285111792, + "learning_rate": 1.051722454744213e-06, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7277 + }, + { + "epoch": 0.6999086406693273, + "grad_norm": 1.5327850311034157, + "learning_rate": 1.0511007436042966e-06, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7278 + }, + { + "epoch": 0.7000048083858249, + "grad_norm": 1.5031796587374697, + "learning_rate": 1.050479167360937e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7279 + }, + { + "epoch": 0.7001009761023225, + "grad_norm": 1.7418439773517742, + "learning_rate": 1.049857726072005e-06, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7280 + }, + { + "epoch": 0.70019714381882, + "grad_norm": 1.415023661628211, + "learning_rate": 1.0492364197953569e-06, + "loss": 0.0955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7281 + }, + { + "epoch": 0.7002933115353176, + "grad_norm": 1.8806673246355103, + "learning_rate": 1.0486152485888396e-06, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7282 + }, + { + "epoch": 0.7003894792518152, + "grad_norm": 1.832261472063168, + "learning_rate": 1.047994212510286e-06, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7283 + }, + { + "epoch": 0.7004856469683127, + "grad_norm": 2.6534029959316743, + "learning_rate": 1.0473733116175147e-06, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7284 + }, + { + "epoch": 0.7005818146848103, + "grad_norm": 2.2003082504039595, + "learning_rate": 1.0467525459683342e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7285 + }, + { + "epoch": 0.7006779824013079, + "grad_norm": 3.2523956653661195, + "learning_rate": 1.0461319156205396e-06, + "loss": 0.1704, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7286 + }, + { + "epoch": 0.7007741501178054, + "grad_norm": 1.861234617239586, + "learning_rate": 1.0455114206319144e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7287 + }, + { + "epoch": 0.700870317834303, + "grad_norm": 1.7904691113662874, + "learning_rate": 1.0448910610602262e-06, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7288 + }, + { + "epoch": 0.7009664855508005, + "grad_norm": 1.6073261219953805, + "learning_rate": 1.0442708369632334e-06, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7289 + }, + { + "epoch": 0.7010626532672982, + "grad_norm": 1.893880545181937, + "learning_rate": 1.0436507483986805e-06, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7290 + }, + { + "epoch": 0.7011588209837958, + "grad_norm": 2.272242631366385, + "learning_rate": 1.0430307954242994e-06, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7291 + }, + { + "epoch": 0.7012549887002933, + "grad_norm": 1.3168482559280117, + "learning_rate": 1.0424109780978103e-06, + "loss": 0.0948, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7292 + }, + { + "epoch": 0.7013511564167909, + "grad_norm": 2.998580229337103, + "learning_rate": 1.0417912964769184e-06, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7293 + }, + { + "epoch": 0.7014473241332885, + "grad_norm": 1.5735977529024334, + "learning_rate": 1.0411717506193184e-06, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7294 + }, + { + "epoch": 0.701543491849786, + "grad_norm": 2.3969264387793823, + "learning_rate": 1.0405523405826917e-06, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7295 + }, + { + "epoch": 0.7016396595662836, + "grad_norm": 1.481559040852026, + "learning_rate": 1.0399330664247077e-06, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7296 + }, + { + "epoch": 0.7017358272827812, + "grad_norm": 1.7310680311364883, + "learning_rate": 1.0393139282030216e-06, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7297 + }, + { + "epoch": 0.7018319949992787, + "grad_norm": 1.5652075316070766, + "learning_rate": 1.0386949259752784e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7298 + }, + { + "epoch": 0.7019281627157763, + "grad_norm": 1.7012234033998357, + "learning_rate": 1.0380760597991071e-06, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7299 + }, + { + "epoch": 0.7020243304322739, + "grad_norm": 1.741034568456163, + "learning_rate": 1.037457329732127e-06, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7300 + }, + { + "epoch": 0.7021204981487714, + "grad_norm": 2.0387493607596876, + "learning_rate": 1.0368387358319428e-06, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7301 + }, + { + "epoch": 0.702216665865269, + "grad_norm": 1.6253522503160565, + "learning_rate": 1.0362202781561482e-06, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7302 + }, + { + "epoch": 0.7023128335817665, + "grad_norm": 1.4335229753871626, + "learning_rate": 1.0356019567623237e-06, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7303 + }, + { + "epoch": 0.7024090012982642, + "grad_norm": 3.2696066258456282, + "learning_rate": 1.034983771708035e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7304 + }, + { + "epoch": 0.7025051690147618, + "grad_norm": 2.4812791165962502, + "learning_rate": 1.0343657230508377e-06, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7305 + }, + { + "epoch": 0.7026013367312594, + "grad_norm": 1.3415005112443827, + "learning_rate": 1.0337478108482742e-06, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7306 + }, + { + "epoch": 0.7026975044477569, + "grad_norm": 1.5134922527762351, + "learning_rate": 1.0331300351578737e-06, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7307 + }, + { + "epoch": 0.7027936721642545, + "grad_norm": 1.7239200874475396, + "learning_rate": 1.0325123960371527e-06, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7308 + }, + { + "epoch": 0.702889839880752, + "grad_norm": 1.7255147465326062, + "learning_rate": 1.031894893543616e-06, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7309 + }, + { + "epoch": 0.7029860075972496, + "grad_norm": 1.7573824035716796, + "learning_rate": 1.031277527734753e-06, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7310 + }, + { + "epoch": 0.7030821753137472, + "grad_norm": 2.0349112989371236, + "learning_rate": 1.0306602986680434e-06, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7311 + }, + { + "epoch": 0.7031783430302447, + "grad_norm": 1.6814042111068224, + "learning_rate": 1.0300432064009527e-06, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7312 + }, + { + "epoch": 0.7032745107467423, + "grad_norm": 1.5849191635401945, + "learning_rate": 1.0294262509909347e-06, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7313 + }, + { + "epoch": 0.7033706784632399, + "grad_norm": 1.2375289170874448, + "learning_rate": 1.0288094324954278e-06, + "loss": 0.084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7314 + }, + { + "epoch": 0.7034668461797374, + "grad_norm": 1.492534045851404, + "learning_rate": 1.028192750971861e-06, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7315 + }, + { + "epoch": 0.703563013896235, + "grad_norm": 2.1633321520273188, + "learning_rate": 1.0275762064776493e-06, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7316 + }, + { + "epoch": 0.7036591816127326, + "grad_norm": 1.8450827686338052, + "learning_rate": 1.0269597990701932e-06, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7317 + }, + { + "epoch": 0.7037553493292302, + "grad_norm": 1.911287018667959, + "learning_rate": 1.026343528806883e-06, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7318 + }, + { + "epoch": 0.7038515170457278, + "grad_norm": 1.4432279877453205, + "learning_rate": 1.025727395745095e-06, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7319 + }, + { + "epoch": 0.7039476847622254, + "grad_norm": 1.8128089758292265, + "learning_rate": 1.0251113999421936e-06, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7320 + }, + { + "epoch": 0.7040438524787229, + "grad_norm": 1.5318039900669114, + "learning_rate": 1.0244955414555283e-06, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7321 + }, + { + "epoch": 0.7041400201952205, + "grad_norm": 2.071937454774063, + "learning_rate": 1.023879820342438e-06, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7322 + }, + { + "epoch": 0.704236187911718, + "grad_norm": 1.5832089238692693, + "learning_rate": 1.023264236660248e-06, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7323 + }, + { + "epoch": 0.7043323556282156, + "grad_norm": 2.3068132964283525, + "learning_rate": 1.0226487904662708e-06, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7324 + }, + { + "epoch": 0.7044285233447132, + "grad_norm": 1.666247458428939, + "learning_rate": 1.0220334818178062e-06, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7325 + }, + { + "epoch": 0.7045246910612107, + "grad_norm": 1.6462343409674247, + "learning_rate": 1.0214183107721423e-06, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7326 + }, + { + "epoch": 0.7046208587777083, + "grad_norm": 1.6085136516345642, + "learning_rate": 1.020803277386551e-06, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7327 + }, + { + "epoch": 0.7047170264942059, + "grad_norm": 1.7499162601491176, + "learning_rate": 1.020188381718295e-06, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7328 + }, + { + "epoch": 0.7048131942107034, + "grad_norm": 1.6702270339914875, + "learning_rate": 1.0195736238246222e-06, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7329 + }, + { + "epoch": 0.704909361927201, + "grad_norm": 1.562127470752638, + "learning_rate": 1.018959003762769e-06, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7330 + }, + { + "epoch": 0.7050055296436986, + "grad_norm": 1.7379554842842413, + "learning_rate": 1.0183445215899585e-06, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7331 + }, + { + "epoch": 0.7051016973601962, + "grad_norm": 1.7649467645535653, + "learning_rate": 1.0177301773633993e-06, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7332 + }, + { + "epoch": 0.7051978650766938, + "grad_norm": 1.6683241686623622, + "learning_rate": 1.0171159711402892e-06, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7333 + }, + { + "epoch": 0.7052940327931914, + "grad_norm": 1.6553917757360284, + "learning_rate": 1.0165019029778128e-06, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7334 + }, + { + "epoch": 0.7053902005096889, + "grad_norm": 1.577147717825637, + "learning_rate": 1.0158879729331413e-06, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7335 + }, + { + "epoch": 0.7054863682261865, + "grad_norm": 1.7716353356379415, + "learning_rate": 1.0152741810634333e-06, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7336 + }, + { + "epoch": 0.705582535942684, + "grad_norm": 1.5276409575262877, + "learning_rate": 1.0146605274258355e-06, + "loss": 0.0943, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7337 + }, + { + "epoch": 0.7056787036591816, + "grad_norm": 2.5390656473279636, + "learning_rate": 1.0140470120774792e-06, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7338 + }, + { + "epoch": 0.7057748713756792, + "grad_norm": 1.5163159591078097, + "learning_rate": 1.013433635075485e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7339 + }, + { + "epoch": 0.7058710390921767, + "grad_norm": 1.8976176678678416, + "learning_rate": 1.0128203964769602e-06, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7340 + }, + { + "epoch": 0.7059672068086743, + "grad_norm": 1.830386994240346, + "learning_rate": 1.0122072963389985e-06, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7341 + }, + { + "epoch": 0.7060633745251719, + "grad_norm": 2.0112693740281595, + "learning_rate": 1.0115943347186826e-06, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7342 + }, + { + "epoch": 0.7061595422416694, + "grad_norm": 2.9402318207528957, + "learning_rate": 1.0109815116730792e-06, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7343 + }, + { + "epoch": 0.706255709958167, + "grad_norm": 2.288277419072399, + "learning_rate": 1.0103688272592446e-06, + "loss": 0.1091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7344 + }, + { + "epoch": 0.7063518776746646, + "grad_norm": 1.962783590482693, + "learning_rate": 1.0097562815342215e-06, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7345 + }, + { + "epoch": 0.7064480453911622, + "grad_norm": 1.981421782979641, + "learning_rate": 1.0091438745550403e-06, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7346 + }, + { + "epoch": 0.7065442131076598, + "grad_norm": 1.7585223235588725, + "learning_rate": 1.0085316063787163e-06, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7347 + }, + { + "epoch": 0.7066403808241574, + "grad_norm": 2.213137588988389, + "learning_rate": 1.007919477062254e-06, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7348 + }, + { + "epoch": 0.7067365485406549, + "grad_norm": 1.9303765826712564, + "learning_rate": 1.0073074866626442e-06, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7349 + }, + { + "epoch": 0.7068327162571525, + "grad_norm": 1.7640460296073677, + "learning_rate": 1.0066956352368662e-06, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7350 + }, + { + "epoch": 0.7069288839736501, + "grad_norm": 1.6207624127256612, + "learning_rate": 1.0060839228418833e-06, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7351 + }, + { + "epoch": 0.7070250516901476, + "grad_norm": 2.794331086428961, + "learning_rate": 1.0054723495346484e-06, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7352 + }, + { + "epoch": 0.7071212194066452, + "grad_norm": 1.819854202057236, + "learning_rate": 1.0048609153721004e-06, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7353 + }, + { + "epoch": 0.7072173871231427, + "grad_norm": 1.901802645138846, + "learning_rate": 1.0042496204111666e-06, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7354 + }, + { + "epoch": 0.7073135548396403, + "grad_norm": 2.5721412931667365, + "learning_rate": 1.0036384647087588e-06, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7355 + }, + { + "epoch": 0.7074097225561379, + "grad_norm": 1.8394376098096583, + "learning_rate": 1.0030274483217779e-06, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7356 + }, + { + "epoch": 0.7075058902726354, + "grad_norm": 2.0658436033425454, + "learning_rate": 1.002416571307111e-06, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7357 + }, + { + "epoch": 0.707602057989133, + "grad_norm": 2.0012949280595946, + "learning_rate": 1.0018058337216327e-06, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7358 + }, + { + "epoch": 0.7076982257056306, + "grad_norm": 1.9042228216794312, + "learning_rate": 1.0011952356222054e-06, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7359 + }, + { + "epoch": 0.7077943934221282, + "grad_norm": 1.9472957825528194, + "learning_rate": 1.0005847770656757e-06, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7360 + }, + { + "epoch": 0.7078905611386258, + "grad_norm": 1.6600775658094318, + "learning_rate": 9.999744581088794e-07, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7361 + }, + { + "epoch": 0.7079867288551234, + "grad_norm": 3.7943643409608745, + "learning_rate": 9.993642788086394e-07, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7362 + }, + { + "epoch": 0.7080828965716209, + "grad_norm": 1.5185614386269617, + "learning_rate": 9.98754239221765e-07, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7363 + }, + { + "epoch": 0.7081790642881185, + "grad_norm": 1.6506878147579993, + "learning_rate": 9.981443394050525e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7364 + }, + { + "epoch": 0.7082752320046161, + "grad_norm": 1.5565815022196567, + "learning_rate": 9.975345794152862e-07, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7365 + }, + { + "epoch": 0.7083713997211136, + "grad_norm": 2.3704197711043586, + "learning_rate": 9.969249593092347e-07, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7366 + }, + { + "epoch": 0.7084675674376112, + "grad_norm": 1.9914165728596116, + "learning_rate": 9.96315479143656e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7367 + }, + { + "epoch": 0.7085637351541088, + "grad_norm": 2.2112273802556692, + "learning_rate": 9.957061389752948e-07, + "loss": 0.137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7368 + }, + { + "epoch": 0.7086599028706063, + "grad_norm": 1.6767346971899244, + "learning_rate": 9.950969388608819e-07, + "loss": 0.0935, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7369 + }, + { + "epoch": 0.7087560705871039, + "grad_norm": 1.8051441559718213, + "learning_rate": 9.944878788571368e-07, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7370 + }, + { + "epoch": 0.7088522383036014, + "grad_norm": 2.678470063627865, + "learning_rate": 9.938789590207626e-07, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7371 + }, + { + "epoch": 0.708948406020099, + "grad_norm": 2.4129196468994984, + "learning_rate": 9.932701794084526e-07, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7372 + }, + { + "epoch": 0.7090445737365966, + "grad_norm": 2.469092343141365, + "learning_rate": 9.926615400768857e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7373 + }, + { + "epoch": 0.7091407414530942, + "grad_norm": 2.2621720525395737, + "learning_rate": 9.92053041082728e-07, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7374 + }, + { + "epoch": 0.7092369091695918, + "grad_norm": 2.0043487102123283, + "learning_rate": 9.914446824826326e-07, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7375 + }, + { + "epoch": 0.7093330768860894, + "grad_norm": 1.554274043330838, + "learning_rate": 9.9083646433324e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7376 + }, + { + "epoch": 0.7094292446025869, + "grad_norm": 3.2491674527865606, + "learning_rate": 9.902283866911752e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7377 + }, + { + "epoch": 0.7095254123190845, + "grad_norm": 1.5285001342441502, + "learning_rate": 9.896204496130532e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7378 + }, + { + "epoch": 0.7096215800355821, + "grad_norm": 2.361449908470779, + "learning_rate": 9.890126531554742e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7379 + }, + { + "epoch": 0.7097177477520796, + "grad_norm": 1.7356963625210409, + "learning_rate": 9.884049973750268e-07, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7380 + }, + { + "epoch": 0.7098139154685772, + "grad_norm": 1.9440603452496648, + "learning_rate": 9.877974823282836e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7381 + }, + { + "epoch": 0.7099100831850748, + "grad_norm": 2.178053768614883, + "learning_rate": 9.87190108071807e-07, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7382 + }, + { + "epoch": 0.7100062509015723, + "grad_norm": 1.5346545555286646, + "learning_rate": 9.86582874662146e-07, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7383 + }, + { + "epoch": 0.7101024186180699, + "grad_norm": 1.7990374347935787, + "learning_rate": 9.85975782155834e-07, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7384 + }, + { + "epoch": 0.7101985863345674, + "grad_norm": 1.7154111473343312, + "learning_rate": 9.853688306093935e-07, + "loss": 0.0866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7385 + }, + { + "epoch": 0.710294754051065, + "grad_norm": 2.2359106883053403, + "learning_rate": 9.847620200793343e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7386 + }, + { + "epoch": 0.7103909217675626, + "grad_norm": 1.8954491441344035, + "learning_rate": 9.841553506221523e-07, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7387 + }, + { + "epoch": 0.7104870894840603, + "grad_norm": 2.5001824433298507, + "learning_rate": 9.835488222943285e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7388 + }, + { + "epoch": 0.7105832572005578, + "grad_norm": 2.194740319751896, + "learning_rate": 9.829424351523332e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7389 + }, + { + "epoch": 0.7106794249170554, + "grad_norm": 2.063051881652778, + "learning_rate": 9.82336189252623e-07, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7390 + }, + { + "epoch": 0.7107755926335529, + "grad_norm": 2.2450286950580494, + "learning_rate": 9.817300846516406e-07, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7391 + }, + { + "epoch": 0.7108717603500505, + "grad_norm": 1.746501053495291, + "learning_rate": 9.811241214058168e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7392 + }, + { + "epoch": 0.7109679280665481, + "grad_norm": 2.0046847592750416, + "learning_rate": 9.805182995715685e-07, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7393 + }, + { + "epoch": 0.7110640957830456, + "grad_norm": 1.6078902674426985, + "learning_rate": 9.79912619205298e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7394 + }, + { + "epoch": 0.7111602634995432, + "grad_norm": 3.393671456023387, + "learning_rate": 9.79307080363397e-07, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7395 + }, + { + "epoch": 0.7112564312160408, + "grad_norm": 1.7067734818681155, + "learning_rate": 9.787016831022423e-07, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7396 + }, + { + "epoch": 0.7113525989325383, + "grad_norm": 2.8522969862307566, + "learning_rate": 9.780964274781984e-07, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7397 + }, + { + "epoch": 0.7114487666490359, + "grad_norm": 1.6150528102206192, + "learning_rate": 9.77491313547617e-07, + "loss": 0.0993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7398 + }, + { + "epoch": 0.7115449343655335, + "grad_norm": 1.7622886214843527, + "learning_rate": 9.768863413668345e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7399 + }, + { + "epoch": 0.711641102082031, + "grad_norm": 2.022079636817565, + "learning_rate": 9.762815109921762e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7400 + }, + { + "epoch": 0.7117372697985286, + "grad_norm": 2.2883145165559444, + "learning_rate": 9.756768224799532e-07, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7401 + }, + { + "epoch": 0.7118334375150263, + "grad_norm": 2.697823922260809, + "learning_rate": 9.750722758864637e-07, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7402 + }, + { + "epoch": 0.7119296052315238, + "grad_norm": 2.041054395084865, + "learning_rate": 9.744678712679931e-07, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7403 + }, + { + "epoch": 0.7120257729480214, + "grad_norm": 1.9390790309081543, + "learning_rate": 9.738636086808138e-07, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7404 + }, + { + "epoch": 0.712121940664519, + "grad_norm": 1.6621358528666639, + "learning_rate": 9.732594881811827e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7405 + }, + { + "epoch": 0.7122181083810165, + "grad_norm": 2.009480235833876, + "learning_rate": 9.726555098253456e-07, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7406 + }, + { + "epoch": 0.7123142760975141, + "grad_norm": 1.5038081279310447, + "learning_rate": 9.720516736695348e-07, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7407 + }, + { + "epoch": 0.7124104438140116, + "grad_norm": 3.7873067641159297, + "learning_rate": 9.714479797699695e-07, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7408 + }, + { + "epoch": 0.7125066115305092, + "grad_norm": 1.5127912000576058, + "learning_rate": 9.708444281828546e-07, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7409 + }, + { + "epoch": 0.7126027792470068, + "grad_norm": 1.6068377093161732, + "learning_rate": 9.702410189643838e-07, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7410 + }, + { + "epoch": 0.7126989469635043, + "grad_norm": 1.6311818595911123, + "learning_rate": 9.696377521707343e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7411 + }, + { + "epoch": 0.7127951146800019, + "grad_norm": 1.6406543515918643, + "learning_rate": 9.690346278580726e-07, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7412 + }, + { + "epoch": 0.7128912823964995, + "grad_norm": 1.8534730312967074, + "learning_rate": 9.684316460825524e-07, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7413 + }, + { + "epoch": 0.712987450112997, + "grad_norm": 1.7767001738495498, + "learning_rate": 9.67828806900311e-07, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7414 + }, + { + "epoch": 0.7130836178294946, + "grad_norm": 2.4291210981728937, + "learning_rate": 9.672261103674754e-07, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7415 + }, + { + "epoch": 0.7131797855459923, + "grad_norm": 2.436858277891396, + "learning_rate": 9.666235565401594e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7416 + }, + { + "epoch": 0.7132759532624898, + "grad_norm": 1.4927224790636116, + "learning_rate": 9.660211454744605e-07, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7417 + }, + { + "epoch": 0.7133721209789874, + "grad_norm": 2.8047913332343795, + "learning_rate": 9.654188772264656e-07, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7418 + }, + { + "epoch": 0.713468288695485, + "grad_norm": 1.8855645765001954, + "learning_rate": 9.648167518522479e-07, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7419 + }, + { + "epoch": 0.7135644564119825, + "grad_norm": 1.4847329456292542, + "learning_rate": 9.642147694078664e-07, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7420 + }, + { + "epoch": 0.7136606241284801, + "grad_norm": 2.1776327932416457, + "learning_rate": 9.636129299493685e-07, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7421 + }, + { + "epoch": 0.7137567918449776, + "grad_norm": 2.078307828090661, + "learning_rate": 9.630112335327858e-07, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7422 + }, + { + "epoch": 0.7138529595614752, + "grad_norm": 1.6466742567954102, + "learning_rate": 9.62409680214138e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7423 + }, + { + "epoch": 0.7139491272779728, + "grad_norm": 1.4093573270679376, + "learning_rate": 9.61808270049432e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7424 + }, + { + "epoch": 0.7140452949944703, + "grad_norm": 2.1010713788947046, + "learning_rate": 9.612070030946605e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7425 + }, + { + "epoch": 0.7141414627109679, + "grad_norm": 2.2545077021017965, + "learning_rate": 9.60605879405804e-07, + "loss": 0.1519, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7426 + }, + { + "epoch": 0.7142376304274655, + "grad_norm": 1.9682285086392013, + "learning_rate": 9.600048990388271e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7427 + }, + { + "epoch": 0.714333798143963, + "grad_norm": 2.5844484563584476, + "learning_rate": 9.59404062049684e-07, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7428 + }, + { + "epoch": 0.7144299658604606, + "grad_norm": 1.8160488759906503, + "learning_rate": 9.588033684943136e-07, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7429 + }, + { + "epoch": 0.7145261335769583, + "grad_norm": 2.1053334604720115, + "learning_rate": 9.582028184286423e-07, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7430 + }, + { + "epoch": 0.7146223012934558, + "grad_norm": 1.8658482725270011, + "learning_rate": 9.576024119085833e-07, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7431 + }, + { + "epoch": 0.7147184690099534, + "grad_norm": 1.7827845516659448, + "learning_rate": 9.57002148990037e-07, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7432 + }, + { + "epoch": 0.714814636726451, + "grad_norm": 1.902488217812814, + "learning_rate": 9.564020297288876e-07, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7433 + }, + { + "epoch": 0.7149108044429485, + "grad_norm": 1.4012245162869283, + "learning_rate": 9.558020541810087e-07, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7434 + }, + { + "epoch": 0.7150069721594461, + "grad_norm": 1.4847809792710565, + "learning_rate": 9.552022224022597e-07, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7435 + }, + { + "epoch": 0.7151031398759436, + "grad_norm": 1.7105995756352512, + "learning_rate": 9.546025344484868e-07, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7436 + }, + { + "epoch": 0.7151993075924412, + "grad_norm": 1.7396439400191934, + "learning_rate": 9.540029903755225e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7437 + }, + { + "epoch": 0.7152954753089388, + "grad_norm": 2.4164003863108503, + "learning_rate": 9.534035902391869e-07, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7438 + }, + { + "epoch": 0.7153916430254363, + "grad_norm": 1.824555410995207, + "learning_rate": 9.528043340952842e-07, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7439 + }, + { + "epoch": 0.7154878107419339, + "grad_norm": 4.097836362519906, + "learning_rate": 9.522052219996072e-07, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7440 + }, + { + "epoch": 0.7155839784584315, + "grad_norm": 2.035527784319124, + "learning_rate": 9.516062540079357e-07, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7441 + }, + { + "epoch": 0.715680146174929, + "grad_norm": 2.9713160444915983, + "learning_rate": 9.510074301760347e-07, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7442 + }, + { + "epoch": 0.7157763138914266, + "grad_norm": 1.342394774312928, + "learning_rate": 9.504087505596573e-07, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7443 + }, + { + "epoch": 0.7158724816079243, + "grad_norm": 2.844441090010212, + "learning_rate": 9.498102152145408e-07, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7444 + }, + { + "epoch": 0.7159686493244218, + "grad_norm": 1.482361060647663, + "learning_rate": 9.49211824196411e-07, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7445 + }, + { + "epoch": 0.7160648170409194, + "grad_norm": 1.4420953467190087, + "learning_rate": 9.486135775609808e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7446 + }, + { + "epoch": 0.716160984757417, + "grad_norm": 1.709423515252947, + "learning_rate": 9.480154753639473e-07, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7447 + }, + { + "epoch": 0.7162571524739145, + "grad_norm": 1.8268633676237953, + "learning_rate": 9.474175176609956e-07, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7448 + }, + { + "epoch": 0.7163533201904121, + "grad_norm": 1.6984981321247143, + "learning_rate": 9.468197045077976e-07, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7449 + }, + { + "epoch": 0.7164494879069097, + "grad_norm": 1.7497029094644934, + "learning_rate": 9.462220359600125e-07, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7450 + }, + { + "epoch": 0.7165456556234072, + "grad_norm": 1.744506790214154, + "learning_rate": 9.456245120732826e-07, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7451 + }, + { + "epoch": 0.7166418233399048, + "grad_norm": 1.735728099471334, + "learning_rate": 9.450271329032404e-07, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7452 + }, + { + "epoch": 0.7167379910564023, + "grad_norm": 1.5449223881106775, + "learning_rate": 9.444298985055031e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7453 + }, + { + "epoch": 0.7168341587728999, + "grad_norm": 1.7647949469975577, + "learning_rate": 9.438328089356763e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7454 + }, + { + "epoch": 0.7169303264893975, + "grad_norm": 4.659806609108781, + "learning_rate": 9.432358642493486e-07, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7455 + }, + { + "epoch": 0.717026494205895, + "grad_norm": 1.724514779901776, + "learning_rate": 9.426390645020981e-07, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7456 + }, + { + "epoch": 0.7171226619223926, + "grad_norm": 1.404554724128024, + "learning_rate": 9.420424097494885e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7457 + }, + { + "epoch": 0.7172188296388903, + "grad_norm": 1.4405274960431822, + "learning_rate": 9.414459000470702e-07, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7458 + }, + { + "epoch": 0.7173149973553878, + "grad_norm": 1.5659656802926332, + "learning_rate": 9.4084953545038e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7459 + }, + { + "epoch": 0.7174111650718854, + "grad_norm": 1.5685481253604776, + "learning_rate": 9.402533160149415e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7460 + }, + { + "epoch": 0.717507332788383, + "grad_norm": 1.5652482324444212, + "learning_rate": 9.396572417962633e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7461 + }, + { + "epoch": 0.7176035005048805, + "grad_norm": 1.6881486830516486, + "learning_rate": 9.390613128498419e-07, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7462 + }, + { + "epoch": 0.7176996682213781, + "grad_norm": 2.17493484931879, + "learning_rate": 9.3846552923116e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7463 + }, + { + "epoch": 0.7177958359378757, + "grad_norm": 2.0107114599325175, + "learning_rate": 9.378698909956868e-07, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7464 + }, + { + "epoch": 0.7178920036543732, + "grad_norm": 2.005367662648405, + "learning_rate": 9.372743981988782e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7465 + }, + { + "epoch": 0.7179881713708708, + "grad_norm": 1.857127259378153, + "learning_rate": 9.366790508961767e-07, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7466 + }, + { + "epoch": 0.7180843390873684, + "grad_norm": 2.2373965864028738, + "learning_rate": 9.360838491430093e-07, + "loss": 0.1481, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7467 + }, + { + "epoch": 0.7181805068038659, + "grad_norm": 2.1699570005621776, + "learning_rate": 9.354887929947915e-07, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7468 + }, + { + "epoch": 0.7182766745203635, + "grad_norm": 1.9133646588012536, + "learning_rate": 9.348938825069251e-07, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7469 + }, + { + "epoch": 0.718372842236861, + "grad_norm": 1.5462985923730712, + "learning_rate": 9.342991177347977e-07, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7470 + }, + { + "epoch": 0.7184690099533586, + "grad_norm": 2.722029482822811, + "learning_rate": 9.337044987337843e-07, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7471 + }, + { + "epoch": 0.7185651776698563, + "grad_norm": 2.289914985831192, + "learning_rate": 9.331100255592437e-07, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7472 + }, + { + "epoch": 0.7186613453863538, + "grad_norm": 2.7317336222014545, + "learning_rate": 9.325156982665246e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7473 + }, + { + "epoch": 0.7187575131028514, + "grad_norm": 1.518391011557911, + "learning_rate": 9.319215169109599e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7474 + }, + { + "epoch": 0.718853680819349, + "grad_norm": 1.7701406392041306, + "learning_rate": 9.313274815478698e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7475 + }, + { + "epoch": 0.7189498485358465, + "grad_norm": 3.065942423724246, + "learning_rate": 9.307335922325605e-07, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7476 + }, + { + "epoch": 0.7190460162523441, + "grad_norm": 1.8744694087992728, + "learning_rate": 9.301398490203256e-07, + "loss": 0.0993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7477 + }, + { + "epoch": 0.7191421839688417, + "grad_norm": 1.7542376114135123, + "learning_rate": 9.295462519664428e-07, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7478 + }, + { + "epoch": 0.7192383516853392, + "grad_norm": 1.8346083002612128, + "learning_rate": 9.289528011261781e-07, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7479 + }, + { + "epoch": 0.7193345194018368, + "grad_norm": 1.570061147344857, + "learning_rate": 9.283594965547846e-07, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7480 + }, + { + "epoch": 0.7194306871183344, + "grad_norm": 2.2575325807064157, + "learning_rate": 9.277663383074989e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7481 + }, + { + "epoch": 0.7195268548348319, + "grad_norm": 2.309577256204627, + "learning_rate": 9.271733264395464e-07, + "loss": 0.0905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7482 + }, + { + "epoch": 0.7196230225513295, + "grad_norm": 1.4492558270911573, + "learning_rate": 9.265804610061388e-07, + "loss": 0.0907, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7483 + }, + { + "epoch": 0.719719190267827, + "grad_norm": 1.4786453401190303, + "learning_rate": 9.259877420624722e-07, + "loss": 0.0955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7484 + }, + { + "epoch": 0.7198153579843246, + "grad_norm": 2.933843672716565, + "learning_rate": 9.253951696637311e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7485 + }, + { + "epoch": 0.7199115257008223, + "grad_norm": 2.1242350991737986, + "learning_rate": 9.248027438650856e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7486 + }, + { + "epoch": 0.7200076934173198, + "grad_norm": 1.8686834658010287, + "learning_rate": 9.242104647216921e-07, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7487 + }, + { + "epoch": 0.7201038611338174, + "grad_norm": 2.11463816039694, + "learning_rate": 9.236183322886946e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7488 + }, + { + "epoch": 0.720200028850315, + "grad_norm": 2.788024539471091, + "learning_rate": 9.230263466212203e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7489 + }, + { + "epoch": 0.7202961965668125, + "grad_norm": 2.040735623640108, + "learning_rate": 9.224345077743857e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7490 + }, + { + "epoch": 0.7203923642833101, + "grad_norm": 1.6221156539744594, + "learning_rate": 9.218428158032925e-07, + "loss": 0.0972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7491 + }, + { + "epoch": 0.7204885319998077, + "grad_norm": 1.970181419121404, + "learning_rate": 9.212512707630289e-07, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7492 + }, + { + "epoch": 0.7205846997163052, + "grad_norm": 1.7280185288150123, + "learning_rate": 9.206598727086696e-07, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7493 + }, + { + "epoch": 0.7206808674328028, + "grad_norm": 2.0842666523334725, + "learning_rate": 9.200686216952762e-07, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7494 + }, + { + "epoch": 0.7207770351493004, + "grad_norm": 1.9781655097914994, + "learning_rate": 9.194775177778939e-07, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7495 + }, + { + "epoch": 0.7208732028657979, + "grad_norm": 2.1634552701048984, + "learning_rate": 9.188865610115572e-07, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7496 + }, + { + "epoch": 0.7209693705822955, + "grad_norm": 3.114400618080373, + "learning_rate": 9.182957514512855e-07, + "loss": 0.1804, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7497 + }, + { + "epoch": 0.721065538298793, + "grad_norm": 1.9809237681278598, + "learning_rate": 9.177050891520852e-07, + "loss": 0.0903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7498 + }, + { + "epoch": 0.7211617060152906, + "grad_norm": 1.8489100889783858, + "learning_rate": 9.171145741689494e-07, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7499 + }, + { + "epoch": 0.7212578737317883, + "grad_norm": 2.3555780858357385, + "learning_rate": 9.165242065568547e-07, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7500 + }, + { + "epoch": 0.7213540414482859, + "grad_norm": 2.3754259285135153, + "learning_rate": 9.15933986370767e-07, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7501 + }, + { + "epoch": 0.7214502091647834, + "grad_norm": 1.965607583748575, + "learning_rate": 9.153439136656376e-07, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7502 + }, + { + "epoch": 0.721546376881281, + "grad_norm": 2.272845256377289, + "learning_rate": 9.147539884964038e-07, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7503 + }, + { + "epoch": 0.7216425445977785, + "grad_norm": 1.785028774668347, + "learning_rate": 9.141642109179891e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7504 + }, + { + "epoch": 0.7217387123142761, + "grad_norm": 1.6168119539311563, + "learning_rate": 9.135745809853041e-07, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7505 + }, + { + "epoch": 0.7218348800307737, + "grad_norm": 2.376606034201681, + "learning_rate": 9.12985098753244e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7506 + }, + { + "epoch": 0.7219310477472712, + "grad_norm": 1.556535253183367, + "learning_rate": 9.123957642766917e-07, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7507 + }, + { + "epoch": 0.7220272154637688, + "grad_norm": 1.6485134440167224, + "learning_rate": 9.118065776105159e-07, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7508 + }, + { + "epoch": 0.7221233831802664, + "grad_norm": 1.349515712667395, + "learning_rate": 9.112175388095715e-07, + "loss": 0.083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7509 + }, + { + "epoch": 0.7222195508967639, + "grad_norm": 1.548056314538719, + "learning_rate": 9.106286479287002e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7510 + }, + { + "epoch": 0.7223157186132615, + "grad_norm": 1.6437570667434847, + "learning_rate": 9.100399050227282e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7511 + }, + { + "epoch": 0.7224118863297591, + "grad_norm": 1.943838678306608, + "learning_rate": 9.094513101464697e-07, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7512 + }, + { + "epoch": 0.7225080540462566, + "grad_norm": 1.7260407027160403, + "learning_rate": 9.088628633547256e-07, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7513 + }, + { + "epoch": 0.7226042217627543, + "grad_norm": 1.8185509089612075, + "learning_rate": 9.082745647022798e-07, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7514 + }, + { + "epoch": 0.7227003894792519, + "grad_norm": 1.7448787773750998, + "learning_rate": 9.076864142439056e-07, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7515 + }, + { + "epoch": 0.7227965571957494, + "grad_norm": 1.4659788515114014, + "learning_rate": 9.070984120343626e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7516 + }, + { + "epoch": 0.722892724912247, + "grad_norm": 1.9830575604627583, + "learning_rate": 9.065105581283934e-07, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7517 + }, + { + "epoch": 0.7229888926287446, + "grad_norm": 1.7760710557950796, + "learning_rate": 9.059228525807296e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7518 + }, + { + "epoch": 0.7230850603452421, + "grad_norm": 1.6693345917087776, + "learning_rate": 9.053352954460884e-07, + "loss": 0.1542, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7519 + }, + { + "epoch": 0.7231812280617397, + "grad_norm": 2.1460789821519004, + "learning_rate": 9.047478867791732e-07, + "loss": 0.1552, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7520 + }, + { + "epoch": 0.7232773957782372, + "grad_norm": 1.4760865313401417, + "learning_rate": 9.041606266346731e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7521 + }, + { + "epoch": 0.7233735634947348, + "grad_norm": 1.81699455858322, + "learning_rate": 9.035735150672645e-07, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7522 + }, + { + "epoch": 0.7234697312112324, + "grad_norm": 1.9168948661986651, + "learning_rate": 9.029865521316075e-07, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7523 + }, + { + "epoch": 0.7235658989277299, + "grad_norm": 1.7824596196540936, + "learning_rate": 9.02399737882351e-07, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7524 + }, + { + "epoch": 0.7236620666442275, + "grad_norm": 1.7353470116004575, + "learning_rate": 9.018130723741286e-07, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7525 + }, + { + "epoch": 0.7237582343607251, + "grad_norm": 2.276631525555348, + "learning_rate": 9.012265556615609e-07, + "loss": 0.0985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7526 + }, + { + "epoch": 0.7238544020772226, + "grad_norm": 1.6082896592376918, + "learning_rate": 9.006401877992549e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7527 + }, + { + "epoch": 0.7239505697937203, + "grad_norm": 1.7878903288075205, + "learning_rate": 9.000539688418017e-07, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7528 + }, + { + "epoch": 0.7240467375102179, + "grad_norm": 1.9302411321315407, + "learning_rate": 8.994678988437802e-07, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7529 + }, + { + "epoch": 0.7241429052267154, + "grad_norm": 1.7474167963355556, + "learning_rate": 8.988819778597557e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7530 + }, + { + "epoch": 0.724239072943213, + "grad_norm": 1.7793305744082268, + "learning_rate": 8.982962059442787e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7531 + }, + { + "epoch": 0.7243352406597106, + "grad_norm": 2.124330030175782, + "learning_rate": 8.977105831518865e-07, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7532 + }, + { + "epoch": 0.7244314083762081, + "grad_norm": 1.3378316456550536, + "learning_rate": 8.971251095371025e-07, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7533 + }, + { + "epoch": 0.7245275760927057, + "grad_norm": 1.4130632793529891, + "learning_rate": 8.96539785154435e-07, + "loss": 0.0874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7534 + }, + { + "epoch": 0.7246237438092032, + "grad_norm": 1.371327222931858, + "learning_rate": 8.959546100583796e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7535 + }, + { + "epoch": 0.7247199115257008, + "grad_norm": 1.7670461874505905, + "learning_rate": 8.953695843034179e-07, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7536 + }, + { + "epoch": 0.7248160792421984, + "grad_norm": 1.8880206608217258, + "learning_rate": 8.947847079440178e-07, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7537 + }, + { + "epoch": 0.7249122469586959, + "grad_norm": 1.5756312845406908, + "learning_rate": 8.94199981034633e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7538 + }, + { + "epoch": 0.7250084146751935, + "grad_norm": 1.7448350458987343, + "learning_rate": 8.936154036297024e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7539 + }, + { + "epoch": 0.7251045823916911, + "grad_norm": 1.5661554770031774, + "learning_rate": 8.930309757836517e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7540 + }, + { + "epoch": 0.7252007501081886, + "grad_norm": 1.6704063378829022, + "learning_rate": 8.924466975508936e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7541 + }, + { + "epoch": 0.7252969178246863, + "grad_norm": 1.6897394652797175, + "learning_rate": 8.918625689858254e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7542 + }, + { + "epoch": 0.7253930855411839, + "grad_norm": 2.1249803330032835, + "learning_rate": 8.912785901428322e-07, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7543 + }, + { + "epoch": 0.7254892532576814, + "grad_norm": 1.629621286200865, + "learning_rate": 8.906947610762826e-07, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7544 + }, + { + "epoch": 0.725585420974179, + "grad_norm": 1.420341833042425, + "learning_rate": 8.901110818405328e-07, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7545 + }, + { + "epoch": 0.7256815886906766, + "grad_norm": 2.4628383550114132, + "learning_rate": 8.895275524899269e-07, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7546 + }, + { + "epoch": 0.7257777564071741, + "grad_norm": 1.9923500620587677, + "learning_rate": 8.889441730787907e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7547 + }, + { + "epoch": 0.7258739241236717, + "grad_norm": 1.7435524150840318, + "learning_rate": 8.883609436614394e-07, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7548 + }, + { + "epoch": 0.7259700918401693, + "grad_norm": 1.7367060595256318, + "learning_rate": 8.877778642921731e-07, + "loss": 0.0985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7549 + }, + { + "epoch": 0.7260662595566668, + "grad_norm": 1.9838819270796124, + "learning_rate": 8.871949350252795e-07, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7550 + }, + { + "epoch": 0.7261624272731644, + "grad_norm": 1.6161227282813597, + "learning_rate": 8.866121559150287e-07, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7551 + }, + { + "epoch": 0.7262585949896619, + "grad_norm": 1.8319118490578241, + "learning_rate": 8.860295270156804e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7552 + }, + { + "epoch": 0.7263547627061595, + "grad_norm": 1.5231960587542606, + "learning_rate": 8.854470483814784e-07, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7553 + }, + { + "epoch": 0.7264509304226571, + "grad_norm": 1.9016380672541031, + "learning_rate": 8.848647200666536e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7554 + }, + { + "epoch": 0.7265470981391546, + "grad_norm": 1.526451031680835, + "learning_rate": 8.842825421254231e-07, + "loss": 0.0956, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7555 + }, + { + "epoch": 0.7266432658556523, + "grad_norm": 1.6580236329169593, + "learning_rate": 8.837005146119873e-07, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7556 + }, + { + "epoch": 0.7267394335721499, + "grad_norm": 1.659344341788776, + "learning_rate": 8.831186375805356e-07, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7557 + }, + { + "epoch": 0.7268356012886474, + "grad_norm": 1.9707443926146677, + "learning_rate": 8.825369110852427e-07, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7558 + }, + { + "epoch": 0.726931769005145, + "grad_norm": 1.485318674633483, + "learning_rate": 8.819553351802685e-07, + "loss": 0.092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7559 + }, + { + "epoch": 0.7270279367216426, + "grad_norm": 1.8528161513157897, + "learning_rate": 8.813739099197597e-07, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7560 + }, + { + "epoch": 0.7271241044381401, + "grad_norm": 1.8609767457324788, + "learning_rate": 8.807926353578491e-07, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7561 + }, + { + "epoch": 0.7272202721546377, + "grad_norm": 1.6415519300973453, + "learning_rate": 8.802115115486534e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7562 + }, + { + "epoch": 0.7273164398711353, + "grad_norm": 1.6495483814028218, + "learning_rate": 8.796305385462781e-07, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7563 + }, + { + "epoch": 0.7274126075876328, + "grad_norm": 1.8655510764961851, + "learning_rate": 8.790497164048126e-07, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7564 + }, + { + "epoch": 0.7275087753041304, + "grad_norm": 1.9462320695356274, + "learning_rate": 8.784690451783337e-07, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7565 + }, + { + "epoch": 0.727604943020628, + "grad_norm": 1.350778056485612, + "learning_rate": 8.778885249209044e-07, + "loss": 0.0919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7566 + }, + { + "epoch": 0.7277011107371255, + "grad_norm": 1.616119545193482, + "learning_rate": 8.773081556865706e-07, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7567 + }, + { + "epoch": 0.7277972784536231, + "grad_norm": 1.7196094140728115, + "learning_rate": 8.767279375293672e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7568 + }, + { + "epoch": 0.7278934461701206, + "grad_norm": 2.2289821646048447, + "learning_rate": 8.761478705033147e-07, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7569 + }, + { + "epoch": 0.7279896138866183, + "grad_norm": 1.6582732924973187, + "learning_rate": 8.755679546624182e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7570 + }, + { + "epoch": 0.7280857816031159, + "grad_norm": 2.149498647520849, + "learning_rate": 8.749881900606699e-07, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7571 + }, + { + "epoch": 0.7281819493196134, + "grad_norm": 1.8907537158830714, + "learning_rate": 8.744085767520485e-07, + "loss": 0.0855, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7572 + }, + { + "epoch": 0.728278117036111, + "grad_norm": 2.0082568645682732, + "learning_rate": 8.738291147905157e-07, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7573 + }, + { + "epoch": 0.7283742847526086, + "grad_norm": 2.1937756217997393, + "learning_rate": 8.732498042300216e-07, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7574 + }, + { + "epoch": 0.7284704524691061, + "grad_norm": 2.0499637087937757, + "learning_rate": 8.72670645124502e-07, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7575 + }, + { + "epoch": 0.7285666201856037, + "grad_norm": 1.9850421449512485, + "learning_rate": 8.720916375278782e-07, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7576 + }, + { + "epoch": 0.7286627879021013, + "grad_norm": 1.7342373040082448, + "learning_rate": 8.715127814940583e-07, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7577 + }, + { + "epoch": 0.7287589556185988, + "grad_norm": 1.8107383859406625, + "learning_rate": 8.709340770769334e-07, + "loss": 0.087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7578 + }, + { + "epoch": 0.7288551233350964, + "grad_norm": 1.3721875624475381, + "learning_rate": 8.703555243303835e-07, + "loss": 0.0881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7579 + }, + { + "epoch": 0.728951291051594, + "grad_norm": 2.7448652535323594, + "learning_rate": 8.697771233082744e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7580 + }, + { + "epoch": 0.7290474587680915, + "grad_norm": 1.747400987155786, + "learning_rate": 8.691988740644552e-07, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7581 + }, + { + "epoch": 0.7291436264845891, + "grad_norm": 2.1169140481401407, + "learning_rate": 8.686207766527632e-07, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7582 + }, + { + "epoch": 0.7292397942010866, + "grad_norm": 3.202104989159659, + "learning_rate": 8.680428311270219e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7583 + }, + { + "epoch": 0.7293359619175843, + "grad_norm": 3.094302451049353, + "learning_rate": 8.674650375410379e-07, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7584 + }, + { + "epoch": 0.7294321296340819, + "grad_norm": 2.0964209178614346, + "learning_rate": 8.66887395948606e-07, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7585 + }, + { + "epoch": 0.7295282973505794, + "grad_norm": 1.9065199478144033, + "learning_rate": 8.663099064035066e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7586 + }, + { + "epoch": 0.729624465067077, + "grad_norm": 1.5740173772656205, + "learning_rate": 8.657325689595053e-07, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7587 + }, + { + "epoch": 0.7297206327835746, + "grad_norm": 2.272183330478291, + "learning_rate": 8.651553836703541e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7588 + }, + { + "epoch": 0.7298168005000721, + "grad_norm": 2.085497569887045, + "learning_rate": 8.645783505897909e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7589 + }, + { + "epoch": 0.7299129682165697, + "grad_norm": 1.889981554938263, + "learning_rate": 8.640014697715382e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7590 + }, + { + "epoch": 0.7300091359330673, + "grad_norm": 2.4702420543606745, + "learning_rate": 8.634247412693053e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7591 + }, + { + "epoch": 0.7301053036495648, + "grad_norm": 1.948657245472517, + "learning_rate": 8.628481651367876e-07, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7592 + }, + { + "epoch": 0.7302014713660624, + "grad_norm": 1.8367955930214415, + "learning_rate": 8.622717414276657e-07, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7593 + }, + { + "epoch": 0.73029763908256, + "grad_norm": 1.9180015551915266, + "learning_rate": 8.616954701956074e-07, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7594 + }, + { + "epoch": 0.7303938067990575, + "grad_norm": 2.9530186019413507, + "learning_rate": 8.611193514942634e-07, + "loss": 0.1542, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7595 + }, + { + "epoch": 0.7304899745155551, + "grad_norm": 2.003379142444378, + "learning_rate": 8.605433853772727e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7596 + }, + { + "epoch": 0.7305861422320526, + "grad_norm": 2.29910335014625, + "learning_rate": 8.599675718982595e-07, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7597 + }, + { + "epoch": 0.7306823099485503, + "grad_norm": 2.6510946621125155, + "learning_rate": 8.593919111108332e-07, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7598 + }, + { + "epoch": 0.7307784776650479, + "grad_norm": 2.0516490995223453, + "learning_rate": 8.5881640306859e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7599 + }, + { + "epoch": 0.7308746453815455, + "grad_norm": 1.7250568435524412, + "learning_rate": 8.582410478251119e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7600 + }, + { + "epoch": 0.730970813098043, + "grad_norm": 2.822510262131833, + "learning_rate": 8.576658454339642e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7601 + }, + { + "epoch": 0.7310669808145406, + "grad_norm": 2.772168263017534, + "learning_rate": 8.57090795948701e-07, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7602 + }, + { + "epoch": 0.7311631485310381, + "grad_norm": 3.7966234514628576, + "learning_rate": 8.565158994228609e-07, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7603 + }, + { + "epoch": 0.7312593162475357, + "grad_norm": 1.651258247412231, + "learning_rate": 8.559411559099682e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7604 + }, + { + "epoch": 0.7313554839640333, + "grad_norm": 2.971143473853943, + "learning_rate": 8.553665654635343e-07, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7605 + }, + { + "epoch": 0.7314516516805308, + "grad_norm": 2.6254615830928416, + "learning_rate": 8.54792128137053e-07, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7606 + }, + { + "epoch": 0.7315478193970284, + "grad_norm": 2.1370832107655238, + "learning_rate": 8.542178439840073e-07, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7607 + }, + { + "epoch": 0.731643987113526, + "grad_norm": 1.5653197526425733, + "learning_rate": 8.536437130578648e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7608 + }, + { + "epoch": 0.7317401548300235, + "grad_norm": 1.8589364212501518, + "learning_rate": 8.530697354120782e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7609 + }, + { + "epoch": 0.7318363225465211, + "grad_norm": 1.6775144681829484, + "learning_rate": 8.524959111000872e-07, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7610 + }, + { + "epoch": 0.7319324902630187, + "grad_norm": 1.5197611186893372, + "learning_rate": 8.519222401753152e-07, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7611 + }, + { + "epoch": 0.7320286579795163, + "grad_norm": 2.1382815082073563, + "learning_rate": 8.513487226911732e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7612 + }, + { + "epoch": 0.7321248256960139, + "grad_norm": 1.5541078678246252, + "learning_rate": 8.507753587010584e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7613 + }, + { + "epoch": 0.7322209934125115, + "grad_norm": 1.7566607345745127, + "learning_rate": 8.502021482583503e-07, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7614 + }, + { + "epoch": 0.732317161129009, + "grad_norm": 2.396856634816477, + "learning_rate": 8.496290914164177e-07, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7615 + }, + { + "epoch": 0.7324133288455066, + "grad_norm": 1.5079550518648595, + "learning_rate": 8.490561882286136e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7616 + }, + { + "epoch": 0.7325094965620041, + "grad_norm": 1.7263242054909378, + "learning_rate": 8.484834387482777e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7617 + }, + { + "epoch": 0.7326056642785017, + "grad_norm": 1.5735200304319694, + "learning_rate": 8.479108430287331e-07, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7618 + }, + { + "epoch": 0.7327018319949993, + "grad_norm": 2.1627900201665877, + "learning_rate": 8.473384011232907e-07, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7619 + }, + { + "epoch": 0.7327979997114968, + "grad_norm": 2.1293738734648486, + "learning_rate": 8.467661130852464e-07, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7620 + }, + { + "epoch": 0.7328941674279944, + "grad_norm": 1.7646878413557319, + "learning_rate": 8.461939789678822e-07, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7621 + }, + { + "epoch": 0.732990335144492, + "grad_norm": 1.6126133282494675, + "learning_rate": 8.456219988244657e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7622 + }, + { + "epoch": 0.7330865028609895, + "grad_norm": 3.0185303073196246, + "learning_rate": 8.450501727082486e-07, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7623 + }, + { + "epoch": 0.7331826705774871, + "grad_norm": 1.6902464027647834, + "learning_rate": 8.444785006724698e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7624 + }, + { + "epoch": 0.7332788382939847, + "grad_norm": 1.6553401625115167, + "learning_rate": 8.439069827703541e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7625 + }, + { + "epoch": 0.7333750060104823, + "grad_norm": 1.7318471603212273, + "learning_rate": 8.433356190551112e-07, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7626 + }, + { + "epoch": 0.7334711737269799, + "grad_norm": 1.625139294347558, + "learning_rate": 8.427644095799367e-07, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7627 + }, + { + "epoch": 0.7335673414434775, + "grad_norm": 2.646967582204547, + "learning_rate": 8.421933543980126e-07, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7628 + }, + { + "epoch": 0.733663509159975, + "grad_norm": 2.2227536916078745, + "learning_rate": 8.416224535625039e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7629 + }, + { + "epoch": 0.7337596768764726, + "grad_norm": 2.5510885364137836, + "learning_rate": 8.410517071265642e-07, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7630 + }, + { + "epoch": 0.7338558445929702, + "grad_norm": 2.297971063754673, + "learning_rate": 8.404811151433311e-07, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7631 + }, + { + "epoch": 0.7339520123094677, + "grad_norm": 1.589403096756606, + "learning_rate": 8.399106776659291e-07, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7632 + }, + { + "epoch": 0.7340481800259653, + "grad_norm": 1.5694325143141872, + "learning_rate": 8.393403947474677e-07, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7633 + }, + { + "epoch": 0.7341443477424628, + "grad_norm": 1.8742593695072574, + "learning_rate": 8.387702664410402e-07, + "loss": 0.0726, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7634 + }, + { + "epoch": 0.7342405154589604, + "grad_norm": 1.8395352625396546, + "learning_rate": 8.382002927997283e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7635 + }, + { + "epoch": 0.734336683175458, + "grad_norm": 2.1453682097432165, + "learning_rate": 8.376304738765978e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7636 + }, + { + "epoch": 0.7344328508919555, + "grad_norm": 1.9315923202150496, + "learning_rate": 8.370608097247005e-07, + "loss": 0.0956, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7637 + }, + { + "epoch": 0.7345290186084531, + "grad_norm": 1.450079325284581, + "learning_rate": 8.36491300397074e-07, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7638 + }, + { + "epoch": 0.7346251863249507, + "grad_norm": 2.122320987058908, + "learning_rate": 8.359219459467415e-07, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7639 + }, + { + "epoch": 0.7347213540414483, + "grad_norm": 1.5954862629221918, + "learning_rate": 8.353527464267105e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7640 + }, + { + "epoch": 0.7348175217579459, + "grad_norm": 1.7381074233962646, + "learning_rate": 8.347837018899754e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7641 + }, + { + "epoch": 0.7349136894744435, + "grad_norm": 2.639330002699744, + "learning_rate": 8.342148123895158e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7642 + }, + { + "epoch": 0.735009857190941, + "grad_norm": 2.8729850624918107, + "learning_rate": 8.336460779782982e-07, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7643 + }, + { + "epoch": 0.7351060249074386, + "grad_norm": 1.4893462510754851, + "learning_rate": 8.330774987092713e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7644 + }, + { + "epoch": 0.7352021926239362, + "grad_norm": 1.8806879485900914, + "learning_rate": 8.325090746353723e-07, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7645 + }, + { + "epoch": 0.7352983603404337, + "grad_norm": 2.2538149601902373, + "learning_rate": 8.319408058095238e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7646 + }, + { + "epoch": 0.7353945280569313, + "grad_norm": 1.5055312123102287, + "learning_rate": 8.313726922846319e-07, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7647 + }, + { + "epoch": 0.7354906957734288, + "grad_norm": 1.428669677779863, + "learning_rate": 8.308047341135899e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7648 + }, + { + "epoch": 0.7355868634899264, + "grad_norm": 1.7344846835977747, + "learning_rate": 8.302369313492767e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7649 + }, + { + "epoch": 0.735683031206424, + "grad_norm": 2.185848466132369, + "learning_rate": 8.296692840445569e-07, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7650 + }, + { + "epoch": 0.7357791989229215, + "grad_norm": 1.5870270460372573, + "learning_rate": 8.291017922522787e-07, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7651 + }, + { + "epoch": 0.7358753666394191, + "grad_norm": 2.548548767892577, + "learning_rate": 8.285344560252778e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7652 + }, + { + "epoch": 0.7359715343559167, + "grad_norm": 1.6217202274732965, + "learning_rate": 8.279672754163745e-07, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7653 + }, + { + "epoch": 0.7360677020724143, + "grad_norm": 1.7884579230048494, + "learning_rate": 8.274002504783754e-07, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7654 + }, + { + "epoch": 0.7361638697889119, + "grad_norm": 2.0011001561388047, + "learning_rate": 8.268333812640717e-07, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7655 + }, + { + "epoch": 0.7362600375054095, + "grad_norm": 1.599158788012404, + "learning_rate": 8.262666678262415e-07, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7656 + }, + { + "epoch": 0.736356205221907, + "grad_norm": 1.7389661412815118, + "learning_rate": 8.257001102176459e-07, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7657 + }, + { + "epoch": 0.7364523729384046, + "grad_norm": 1.8147455768846696, + "learning_rate": 8.251337084910335e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7658 + }, + { + "epoch": 0.7365485406549022, + "grad_norm": 2.0309688484982953, + "learning_rate": 8.245674626991382e-07, + "loss": 0.0956, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7659 + }, + { + "epoch": 0.7366447083713997, + "grad_norm": 2.0291954518876296, + "learning_rate": 8.240013728946786e-07, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7660 + }, + { + "epoch": 0.7367408760878973, + "grad_norm": 3.1604564003115447, + "learning_rate": 8.234354391303606e-07, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7661 + }, + { + "epoch": 0.7368370438043949, + "grad_norm": 2.02973198380807, + "learning_rate": 8.228696614588721e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7662 + }, + { + "epoch": 0.7369332115208924, + "grad_norm": 1.997388958706783, + "learning_rate": 8.223040399328899e-07, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7663 + }, + { + "epoch": 0.73702937923739, + "grad_norm": 1.9538309126005746, + "learning_rate": 8.217385746050743e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7664 + }, + { + "epoch": 0.7371255469538875, + "grad_norm": 1.6662183718839672, + "learning_rate": 8.211732655280724e-07, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7665 + }, + { + "epoch": 0.7372217146703851, + "grad_norm": 2.883958300238821, + "learning_rate": 8.206081127545157e-07, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7666 + }, + { + "epoch": 0.7373178823868827, + "grad_norm": 2.526939016279424, + "learning_rate": 8.20043116337022e-07, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7667 + }, + { + "epoch": 0.7374140501033803, + "grad_norm": 2.2494766947442675, + "learning_rate": 8.19478276328193e-07, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7668 + }, + { + "epoch": 0.7375102178198779, + "grad_norm": 1.4014115540583056, + "learning_rate": 8.189135927806172e-07, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7669 + }, + { + "epoch": 0.7376063855363755, + "grad_norm": 1.6502819750357425, + "learning_rate": 8.183490657468687e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7670 + }, + { + "epoch": 0.737702553252873, + "grad_norm": 1.658843518060389, + "learning_rate": 8.17784695279506e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7671 + }, + { + "epoch": 0.7377987209693706, + "grad_norm": 1.7377494483476936, + "learning_rate": 8.172204814310741e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7672 + }, + { + "epoch": 0.7378948886858682, + "grad_norm": 2.192703459551686, + "learning_rate": 8.166564242541034e-07, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7673 + }, + { + "epoch": 0.7379910564023657, + "grad_norm": 2.2642649233411167, + "learning_rate": 8.160925238011072e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7674 + }, + { + "epoch": 0.7380872241188633, + "grad_norm": 2.1232082825668073, + "learning_rate": 8.155287801245878e-07, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7675 + }, + { + "epoch": 0.7381833918353609, + "grad_norm": 2.0443182561830753, + "learning_rate": 8.149651932770308e-07, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7676 + }, + { + "epoch": 0.7382795595518584, + "grad_norm": 1.9359185765878228, + "learning_rate": 8.144017633109086e-07, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7677 + }, + { + "epoch": 0.738375727268356, + "grad_norm": 1.5537944571618807, + "learning_rate": 8.138384902786767e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7678 + }, + { + "epoch": 0.7384718949848535, + "grad_norm": 1.803098515703623, + "learning_rate": 8.132753742327778e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7679 + }, + { + "epoch": 0.7385680627013511, + "grad_norm": 1.635344878926269, + "learning_rate": 8.127124152256408e-07, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7680 + }, + { + "epoch": 0.7386642304178487, + "grad_norm": 1.9323240584078263, + "learning_rate": 8.121496133096768e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7681 + }, + { + "epoch": 0.7387603981343464, + "grad_norm": 2.2824354681293286, + "learning_rate": 8.115869685372851e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7682 + }, + { + "epoch": 0.7388565658508439, + "grad_norm": 2.253666461768071, + "learning_rate": 8.110244809608494e-07, + "loss": 0.1405, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7683 + }, + { + "epoch": 0.7389527335673415, + "grad_norm": 1.5570791396382258, + "learning_rate": 8.104621506327401e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7684 + }, + { + "epoch": 0.739048901283839, + "grad_norm": 1.6328064670118247, + "learning_rate": 8.098999776053099e-07, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7685 + }, + { + "epoch": 0.7391450690003366, + "grad_norm": 2.32452445652063, + "learning_rate": 8.093379619308991e-07, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7686 + }, + { + "epoch": 0.7392412367168342, + "grad_norm": 3.26504905890128, + "learning_rate": 8.087761036618336e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7687 + }, + { + "epoch": 0.7393374044333317, + "grad_norm": 2.551493239174388, + "learning_rate": 8.082144028504233e-07, + "loss": 0.1655, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7688 + }, + { + "epoch": 0.7394335721498293, + "grad_norm": 3.133179450225458, + "learning_rate": 8.076528595489652e-07, + "loss": 0.1468, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7689 + }, + { + "epoch": 0.7395297398663269, + "grad_norm": 2.9757819271075285, + "learning_rate": 8.070914738097391e-07, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7690 + }, + { + "epoch": 0.7396259075828244, + "grad_norm": 2.0345565146645685, + "learning_rate": 8.065302456850124e-07, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7691 + }, + { + "epoch": 0.739722075299322, + "grad_norm": 1.6146091354530792, + "learning_rate": 8.059691752270366e-07, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7692 + }, + { + "epoch": 0.7398182430158196, + "grad_norm": 1.5325564218066645, + "learning_rate": 8.054082624880491e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7693 + }, + { + "epoch": 0.7399144107323171, + "grad_norm": 1.5362494187865638, + "learning_rate": 8.048475075202727e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7694 + }, + { + "epoch": 0.7400105784488147, + "grad_norm": 1.7068473093800534, + "learning_rate": 8.042869103759157e-07, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7695 + }, + { + "epoch": 0.7401067461653124, + "grad_norm": 2.0579057673063827, + "learning_rate": 8.037264711071699e-07, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7696 + }, + { + "epoch": 0.7402029138818099, + "grad_norm": 1.5921036763302077, + "learning_rate": 8.031661897662143e-07, + "loss": 0.0926, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7697 + }, + { + "epoch": 0.7402990815983075, + "grad_norm": 1.2257713551543548, + "learning_rate": 8.026060664052132e-07, + "loss": 0.0849, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7698 + }, + { + "epoch": 0.740395249314805, + "grad_norm": 2.0962071026408062, + "learning_rate": 8.020461010763151e-07, + "loss": 0.0934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7699 + }, + { + "epoch": 0.7404914170313026, + "grad_norm": 1.5317392616226753, + "learning_rate": 8.014862938316542e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7700 + }, + { + "epoch": 0.7405875847478002, + "grad_norm": 2.7773974407777207, + "learning_rate": 8.009266447233513e-07, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7701 + }, + { + "epoch": 0.7406837524642977, + "grad_norm": 1.8698122561111399, + "learning_rate": 8.003671538035099e-07, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7702 + }, + { + "epoch": 0.7407799201807953, + "grad_norm": 1.6011668798044014, + "learning_rate": 7.998078211242202e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7703 + }, + { + "epoch": 0.7408760878972929, + "grad_norm": 1.8028630415870122, + "learning_rate": 7.992486467375585e-07, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7704 + }, + { + "epoch": 0.7409722556137904, + "grad_norm": 1.7639277563335962, + "learning_rate": 7.986896306955849e-07, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7705 + }, + { + "epoch": 0.741068423330288, + "grad_norm": 1.7285246019013956, + "learning_rate": 7.981307730503462e-07, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7706 + }, + { + "epoch": 0.7411645910467856, + "grad_norm": 2.966822904456659, + "learning_rate": 7.975720738538725e-07, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7707 + }, + { + "epoch": 0.7412607587632831, + "grad_norm": 2.8101916733610546, + "learning_rate": 7.970135331581805e-07, + "loss": 0.1403, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7708 + }, + { + "epoch": 0.7413569264797807, + "grad_norm": 1.7894954243268626, + "learning_rate": 7.964551510152721e-07, + "loss": 0.0819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7709 + }, + { + "epoch": 0.7414530941962784, + "grad_norm": 2.2527531699448993, + "learning_rate": 7.95896927477135e-07, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7710 + }, + { + "epoch": 0.7415492619127759, + "grad_norm": 1.945349641388568, + "learning_rate": 7.953388625957397e-07, + "loss": 0.1367, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7711 + }, + { + "epoch": 0.7416454296292735, + "grad_norm": 2.2598115430376913, + "learning_rate": 7.947809564230446e-07, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7712 + }, + { + "epoch": 0.741741597345771, + "grad_norm": 2.082972572976923, + "learning_rate": 7.942232090109928e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7713 + }, + { + "epoch": 0.7418377650622686, + "grad_norm": 2.0907289117466097, + "learning_rate": 7.936656204115109e-07, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7714 + }, + { + "epoch": 0.7419339327787662, + "grad_norm": 1.808928305276578, + "learning_rate": 7.931081906765125e-07, + "loss": 0.0899, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7715 + }, + { + "epoch": 0.7420301004952637, + "grad_norm": 1.7989361418780894, + "learning_rate": 7.925509198578959e-07, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7716 + }, + { + "epoch": 0.7421262682117613, + "grad_norm": 1.7256419533567426, + "learning_rate": 7.919938080075457e-07, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7717 + }, + { + "epoch": 0.7422224359282589, + "grad_norm": 1.5175910688236212, + "learning_rate": 7.914368551773286e-07, + "loss": 0.0952, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7718 + }, + { + "epoch": 0.7423186036447564, + "grad_norm": 2.263908296205851, + "learning_rate": 7.90880061419099e-07, + "loss": 0.1583, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7719 + }, + { + "epoch": 0.742414771361254, + "grad_norm": 14.412222916163314, + "learning_rate": 7.903234267846965e-07, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7720 + }, + { + "epoch": 0.7425109390777516, + "grad_norm": 1.4733817603791715, + "learning_rate": 7.897669513259451e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7721 + }, + { + "epoch": 0.7426071067942491, + "grad_norm": 1.619679409909652, + "learning_rate": 7.892106350946544e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7722 + }, + { + "epoch": 0.7427032745107467, + "grad_norm": 1.6026022753428202, + "learning_rate": 7.886544781426195e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7723 + }, + { + "epoch": 0.7427994422272444, + "grad_norm": 1.5919265061237324, + "learning_rate": 7.880984805216186e-07, + "loss": 0.0957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7724 + }, + { + "epoch": 0.7428956099437419, + "grad_norm": 1.4477281550510008, + "learning_rate": 7.875426422834176e-07, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7725 + }, + { + "epoch": 0.7429917776602395, + "grad_norm": 1.6213052009434934, + "learning_rate": 7.869869634797664e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7726 + }, + { + "epoch": 0.7430879453767371, + "grad_norm": 1.5690423050029432, + "learning_rate": 7.864314441624005e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7727 + }, + { + "epoch": 0.7431841130932346, + "grad_norm": 1.557732184849515, + "learning_rate": 7.858760843830402e-07, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7728 + }, + { + "epoch": 0.7432802808097322, + "grad_norm": 2.2087218326083424, + "learning_rate": 7.853208841933916e-07, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7729 + }, + { + "epoch": 0.7433764485262297, + "grad_norm": 1.3985944560995043, + "learning_rate": 7.847658436451441e-07, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7730 + }, + { + "epoch": 0.7434726162427273, + "grad_norm": 2.393296414696044, + "learning_rate": 7.842109627899741e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7731 + }, + { + "epoch": 0.7435687839592249, + "grad_norm": 1.5918334131489913, + "learning_rate": 7.836562416795427e-07, + "loss": 0.0924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7732 + }, + { + "epoch": 0.7436649516757224, + "grad_norm": 2.201677849588727, + "learning_rate": 7.831016803654959e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7733 + }, + { + "epoch": 0.74376111939222, + "grad_norm": 3.2615402212073907, + "learning_rate": 7.825472788994657e-07, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7734 + }, + { + "epoch": 0.7438572871087176, + "grad_norm": 2.6700329096234796, + "learning_rate": 7.819930373330669e-07, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7735 + }, + { + "epoch": 0.7439534548252151, + "grad_norm": 2.008991214337775, + "learning_rate": 7.814389557179017e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7736 + }, + { + "epoch": 0.7440496225417127, + "grad_norm": 1.8186809275545959, + "learning_rate": 7.808850341055565e-07, + "loss": 0.0822, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7737 + }, + { + "epoch": 0.7441457902582104, + "grad_norm": 1.927969607277778, + "learning_rate": 7.803312725476031e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7738 + }, + { + "epoch": 0.7442419579747079, + "grad_norm": 2.1798996987394363, + "learning_rate": 7.797776710955984e-07, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7739 + }, + { + "epoch": 0.7443381256912055, + "grad_norm": 2.2605453545851333, + "learning_rate": 7.792242298010847e-07, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7740 + }, + { + "epoch": 0.7444342934077031, + "grad_norm": 1.5875331059572706, + "learning_rate": 7.786709487155874e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7741 + }, + { + "epoch": 0.7445304611242006, + "grad_norm": 2.3897987250199364, + "learning_rate": 7.781178278906196e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7742 + }, + { + "epoch": 0.7446266288406982, + "grad_norm": 1.596926649675277, + "learning_rate": 7.775648673776787e-07, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7743 + }, + { + "epoch": 0.7447227965571958, + "grad_norm": 2.013678983075342, + "learning_rate": 7.770120672282458e-07, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7744 + }, + { + "epoch": 0.7448189642736933, + "grad_norm": 1.7349023217779327, + "learning_rate": 7.764594274937884e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7745 + }, + { + "epoch": 0.7449151319901909, + "grad_norm": 1.721494811248571, + "learning_rate": 7.7590694822576e-07, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7746 + }, + { + "epoch": 0.7450112997066884, + "grad_norm": 1.883180525593293, + "learning_rate": 7.753546294755965e-07, + "loss": 0.0928, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7747 + }, + { + "epoch": 0.745107467423186, + "grad_norm": 1.6640238984855513, + "learning_rate": 7.748024712947205e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7748 + }, + { + "epoch": 0.7452036351396836, + "grad_norm": 1.7132378911442248, + "learning_rate": 7.742504737345399e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7749 + }, + { + "epoch": 0.7452998028561811, + "grad_norm": 2.33871172285456, + "learning_rate": 7.736986368464471e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7750 + }, + { + "epoch": 0.7453959705726787, + "grad_norm": 2.6515727590932725, + "learning_rate": 7.731469606818207e-07, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7751 + }, + { + "epoch": 0.7454921382891764, + "grad_norm": 2.1234007999636817, + "learning_rate": 7.725954452920212e-07, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7752 + }, + { + "epoch": 0.7455883060056739, + "grad_norm": 2.274521681886221, + "learning_rate": 7.720440907283971e-07, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7753 + }, + { + "epoch": 0.7456844737221715, + "grad_norm": 2.618618345840384, + "learning_rate": 7.714928970422817e-07, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7754 + }, + { + "epoch": 0.7457806414386691, + "grad_norm": 2.5156526247915894, + "learning_rate": 7.70941864284992e-07, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7755 + }, + { + "epoch": 0.7458768091551666, + "grad_norm": 1.8653533759214074, + "learning_rate": 7.703909925078307e-07, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7756 + }, + { + "epoch": 0.7459729768716642, + "grad_norm": 2.834588101432768, + "learning_rate": 7.698402817620865e-07, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7757 + }, + { + "epoch": 0.7460691445881618, + "grad_norm": 1.5965029893829341, + "learning_rate": 7.692897320990305e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7758 + }, + { + "epoch": 0.7461653123046593, + "grad_norm": 1.7247320420915158, + "learning_rate": 7.687393435699214e-07, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7759 + }, + { + "epoch": 0.7462614800211569, + "grad_norm": 1.605484520610913, + "learning_rate": 7.681891162260016e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7760 + }, + { + "epoch": 0.7463576477376545, + "grad_norm": 2.2024007948057007, + "learning_rate": 7.676390501184988e-07, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7761 + }, + { + "epoch": 0.746453815454152, + "grad_norm": 2.1300612582776117, + "learning_rate": 7.670891452986265e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7762 + }, + { + "epoch": 0.7465499831706496, + "grad_norm": 1.8494067250065362, + "learning_rate": 7.665394018175812e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7763 + }, + { + "epoch": 0.7466461508871471, + "grad_norm": 1.666258994899431, + "learning_rate": 7.659898197265456e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7764 + }, + { + "epoch": 0.7467423186036447, + "grad_norm": 1.755777844350999, + "learning_rate": 7.654403990766878e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7765 + }, + { + "epoch": 0.7468384863201424, + "grad_norm": 1.855176922261975, + "learning_rate": 7.648911399191606e-07, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7766 + }, + { + "epoch": 0.74693465403664, + "grad_norm": 2.0837818229390814, + "learning_rate": 7.643420423051009e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7767 + }, + { + "epoch": 0.7470308217531375, + "grad_norm": 1.6509674387709985, + "learning_rate": 7.637931062856324e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7768 + }, + { + "epoch": 0.7471269894696351, + "grad_norm": 1.7684287386392719, + "learning_rate": 7.632443319118613e-07, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7769 + }, + { + "epoch": 0.7472231571861326, + "grad_norm": 1.7596895168075462, + "learning_rate": 7.626957192348803e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7770 + }, + { + "epoch": 0.7473193249026302, + "grad_norm": 1.982111767251216, + "learning_rate": 7.621472683057668e-07, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7771 + }, + { + "epoch": 0.7474154926191278, + "grad_norm": 1.4615192981975267, + "learning_rate": 7.615989791755834e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7772 + }, + { + "epoch": 0.7475116603356253, + "grad_norm": 2.0625782728994824, + "learning_rate": 7.61050851895378e-07, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7773 + }, + { + "epoch": 0.7476078280521229, + "grad_norm": 1.7222554725987098, + "learning_rate": 7.60502886516181e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7774 + }, + { + "epoch": 0.7477039957686205, + "grad_norm": 1.9115318344232863, + "learning_rate": 7.599550830890107e-07, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7775 + }, + { + "epoch": 0.747800163485118, + "grad_norm": 1.7347991993915204, + "learning_rate": 7.594074416648689e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7776 + }, + { + "epoch": 0.7478963312016156, + "grad_norm": 1.623244003025575, + "learning_rate": 7.588599622947432e-07, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7777 + }, + { + "epoch": 0.7479924989181131, + "grad_norm": 1.8789259982906645, + "learning_rate": 7.583126450296042e-07, + "loss": 0.0877, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7778 + }, + { + "epoch": 0.7480886666346107, + "grad_norm": 1.903253951775036, + "learning_rate": 7.57765489920409e-07, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7779 + }, + { + "epoch": 0.7481848343511084, + "grad_norm": 2.5901404058572783, + "learning_rate": 7.572184970181005e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7780 + }, + { + "epoch": 0.748281002067606, + "grad_norm": 1.723308474961602, + "learning_rate": 7.566716663736035e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7781 + }, + { + "epoch": 0.7483771697841035, + "grad_norm": 1.5720018450913364, + "learning_rate": 7.561249980378302e-07, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7782 + }, + { + "epoch": 0.7484733375006011, + "grad_norm": 1.5941669734727615, + "learning_rate": 7.555784920616772e-07, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7783 + }, + { + "epoch": 0.7485695052170986, + "grad_norm": 3.7956439375497735, + "learning_rate": 7.550321484960252e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7784 + }, + { + "epoch": 0.7486656729335962, + "grad_norm": 3.155411784535938, + "learning_rate": 7.544859673917415e-07, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7785 + }, + { + "epoch": 0.7487618406500938, + "grad_norm": 2.1852931635816835, + "learning_rate": 7.539399487996754e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7786 + }, + { + "epoch": 0.7488580083665913, + "grad_norm": 2.2341050542909886, + "learning_rate": 7.533940927706637e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7787 + }, + { + "epoch": 0.7489541760830889, + "grad_norm": 1.6007064525032526, + "learning_rate": 7.528483993555269e-07, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7788 + }, + { + "epoch": 0.7490503437995865, + "grad_norm": 1.6741812428659806, + "learning_rate": 7.523028686050707e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7789 + }, + { + "epoch": 0.749146511516084, + "grad_norm": 1.5692629840368393, + "learning_rate": 7.517575005700864e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7790 + }, + { + "epoch": 0.7492426792325816, + "grad_norm": 1.9382904746878753, + "learning_rate": 7.512122953013474e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7791 + }, + { + "epoch": 0.7493388469490792, + "grad_norm": 1.4802734526608081, + "learning_rate": 7.506672528496148e-07, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7792 + }, + { + "epoch": 0.7494350146655767, + "grad_norm": 2.6544283161443527, + "learning_rate": 7.501223732656337e-07, + "loss": 0.0913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7793 + }, + { + "epoch": 0.7495311823820744, + "grad_norm": 1.5799331758148045, + "learning_rate": 7.495776566001337e-07, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7794 + }, + { + "epoch": 0.749627350098572, + "grad_norm": 1.9209383028222904, + "learning_rate": 7.490331029038294e-07, + "loss": 0.1432, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7795 + }, + { + "epoch": 0.7497235178150695, + "grad_norm": 1.6368208105548496, + "learning_rate": 7.484887122274215e-07, + "loss": 0.0986, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7796 + }, + { + "epoch": 0.7498196855315671, + "grad_norm": 3.4333218415592217, + "learning_rate": 7.479444846215919e-07, + "loss": 0.1417, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7797 + }, + { + "epoch": 0.7499158532480646, + "grad_norm": 1.856098910139636, + "learning_rate": 7.474004201370114e-07, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7798 + }, + { + "epoch": 0.7500120209645622, + "grad_norm": 1.7360637381515331, + "learning_rate": 7.468565188243332e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7799 + }, + { + "epoch": 0.7501081886810598, + "grad_norm": 2.419820002840275, + "learning_rate": 7.463127807341966e-07, + "loss": 0.1419, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7800 + }, + { + "epoch": 0.7502043563975573, + "grad_norm": 1.4994682798770378, + "learning_rate": 7.457692059172255e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7801 + }, + { + "epoch": 0.7503005241140549, + "grad_norm": 2.0748465943310004, + "learning_rate": 7.452257944240269e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7802 + }, + { + "epoch": 0.7503966918305525, + "grad_norm": 3.1596844523764007, + "learning_rate": 7.446825463051946e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7803 + }, + { + "epoch": 0.75049285954705, + "grad_norm": 1.7359643037173902, + "learning_rate": 7.441394616113062e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7804 + }, + { + "epoch": 0.7505890272635476, + "grad_norm": 2.2609113365346776, + "learning_rate": 7.435965403929251e-07, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7805 + }, + { + "epoch": 0.7506851949800452, + "grad_norm": 2.0714505919091284, + "learning_rate": 7.430537827005982e-07, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7806 + }, + { + "epoch": 0.7507813626965427, + "grad_norm": 1.7925181056154864, + "learning_rate": 7.425111885848588e-07, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7807 + }, + { + "epoch": 0.7508775304130404, + "grad_norm": 1.6496836602343012, + "learning_rate": 7.419687580962223e-07, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7808 + }, + { + "epoch": 0.750973698129538, + "grad_norm": 1.6274258836636737, + "learning_rate": 7.414264912851912e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7809 + }, + { + "epoch": 0.7510698658460355, + "grad_norm": 2.085676102611759, + "learning_rate": 7.408843882022531e-07, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7810 + }, + { + "epoch": 0.7511660335625331, + "grad_norm": 2.7599033414593963, + "learning_rate": 7.403424488978772e-07, + "loss": 0.1345, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7811 + }, + { + "epoch": 0.7512622012790307, + "grad_norm": 1.6449974614538194, + "learning_rate": 7.39800673422521e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7812 + }, + { + "epoch": 0.7513583689955282, + "grad_norm": 1.994676065824945, + "learning_rate": 7.392590618266257e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7813 + }, + { + "epoch": 0.7514545367120258, + "grad_norm": 2.1962081833961657, + "learning_rate": 7.387176141606153e-07, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7814 + }, + { + "epoch": 0.7515507044285233, + "grad_norm": 1.899330415764988, + "learning_rate": 7.38176330474901e-07, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7815 + }, + { + "epoch": 0.7516468721450209, + "grad_norm": 2.1537645666554446, + "learning_rate": 7.376352108198776e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7816 + }, + { + "epoch": 0.7517430398615185, + "grad_norm": 1.972416367098333, + "learning_rate": 7.370942552459248e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7817 + }, + { + "epoch": 0.751839207578016, + "grad_norm": 1.6773073754381898, + "learning_rate": 7.365534638034083e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7818 + }, + { + "epoch": 0.7519353752945136, + "grad_norm": 2.5061282976886514, + "learning_rate": 7.360128365426755e-07, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7819 + }, + { + "epoch": 0.7520315430110112, + "grad_norm": 1.9222159805757277, + "learning_rate": 7.354723735140609e-07, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7820 + }, + { + "epoch": 0.7521277107275087, + "grad_norm": 2.091958852165004, + "learning_rate": 7.349320747678834e-07, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7821 + }, + { + "epoch": 0.7522238784440064, + "grad_norm": 1.61804770013066, + "learning_rate": 7.343919403544461e-07, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7822 + }, + { + "epoch": 0.752320046160504, + "grad_norm": 1.733454692788088, + "learning_rate": 7.338519703240371e-07, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7823 + }, + { + "epoch": 0.7524162138770015, + "grad_norm": 1.701500197831738, + "learning_rate": 7.3331216472693e-07, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7824 + }, + { + "epoch": 0.7525123815934991, + "grad_norm": 1.6914784666610931, + "learning_rate": 7.327725236133803e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7825 + }, + { + "epoch": 0.7526085493099967, + "grad_norm": 1.713487905346853, + "learning_rate": 7.322330470336314e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7826 + }, + { + "epoch": 0.7527047170264942, + "grad_norm": 1.4980756314462307, + "learning_rate": 7.316937350379097e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7827 + }, + { + "epoch": 0.7528008847429918, + "grad_norm": 1.9791845853140029, + "learning_rate": 7.311545876764267e-07, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7828 + }, + { + "epoch": 0.7528970524594893, + "grad_norm": 2.31147047925794, + "learning_rate": 7.306156049993792e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7829 + }, + { + "epoch": 0.7529932201759869, + "grad_norm": 1.7412037516947494, + "learning_rate": 7.300767870569466e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7830 + }, + { + "epoch": 0.7530893878924845, + "grad_norm": 2.3595736885018326, + "learning_rate": 7.295381338992952e-07, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7831 + }, + { + "epoch": 0.753185555608982, + "grad_norm": 2.1168698102962775, + "learning_rate": 7.289996455765749e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7832 + }, + { + "epoch": 0.7532817233254796, + "grad_norm": 1.7335672326838796, + "learning_rate": 7.284613221389209e-07, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7833 + }, + { + "epoch": 0.7533778910419772, + "grad_norm": 1.7211178321406606, + "learning_rate": 7.279231636364517e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7834 + }, + { + "epoch": 0.7534740587584747, + "grad_norm": 2.053483850880783, + "learning_rate": 7.27385170119273e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7835 + }, + { + "epoch": 0.7535702264749724, + "grad_norm": 1.622110597658867, + "learning_rate": 7.268473416374719e-07, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7836 + }, + { + "epoch": 0.75366639419147, + "grad_norm": 2.8752466073011425, + "learning_rate": 7.263096782411219e-07, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7837 + }, + { + "epoch": 0.7537625619079675, + "grad_norm": 1.6700158385434647, + "learning_rate": 7.257721799802814e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 43.53, + "memory/max_mem_allocated(gib)": 43.53, + "step": 7838 + }, + { + "epoch": 0.7538587296244651, + "grad_norm": 2.418843055400903, + "learning_rate": 7.25234846904993e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7839 + }, + { + "epoch": 0.7539548973409627, + "grad_norm": 2.3887309152830247, + "learning_rate": 7.246976790652843e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7840 + }, + { + "epoch": 0.7540510650574602, + "grad_norm": 1.3028602038036607, + "learning_rate": 7.241606765111661e-07, + "loss": 0.0814, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7841 + }, + { + "epoch": 0.7541472327739578, + "grad_norm": 1.6921721943266563, + "learning_rate": 7.236238392926354e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7842 + }, + { + "epoch": 0.7542434004904554, + "grad_norm": 1.7944594569606218, + "learning_rate": 7.23087167459674e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7843 + }, + { + "epoch": 0.7543395682069529, + "grad_norm": 1.7913299335935784, + "learning_rate": 7.225506610622457e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7844 + }, + { + "epoch": 0.7544357359234505, + "grad_norm": 1.6877156270905542, + "learning_rate": 7.220143201503019e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7845 + }, + { + "epoch": 0.754531903639948, + "grad_norm": 2.237490190386191, + "learning_rate": 7.214781447737782e-07, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7846 + }, + { + "epoch": 0.7546280713564456, + "grad_norm": 2.24898084479382, + "learning_rate": 7.209421349825923e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7847 + }, + { + "epoch": 0.7547242390729432, + "grad_norm": 2.1207653144118503, + "learning_rate": 7.204062908266491e-07, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7848 + }, + { + "epoch": 0.7548204067894407, + "grad_norm": 2.4745854561000518, + "learning_rate": 7.198706123558368e-07, + "loss": 0.0868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7849 + }, + { + "epoch": 0.7549165745059384, + "grad_norm": 2.5722961153374246, + "learning_rate": 7.193350996200294e-07, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7850 + }, + { + "epoch": 0.755012742222436, + "grad_norm": 1.3871676120581116, + "learning_rate": 7.187997526690838e-07, + "loss": 0.088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7851 + }, + { + "epoch": 0.7551089099389335, + "grad_norm": 1.794389858962848, + "learning_rate": 7.182645715528436e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7852 + }, + { + "epoch": 0.7552050776554311, + "grad_norm": 2.0661792934971723, + "learning_rate": 7.177295563211337e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7853 + }, + { + "epoch": 0.7553012453719287, + "grad_norm": 1.8305405995140682, + "learning_rate": 7.171947070237664e-07, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7854 + }, + { + "epoch": 0.7553974130884262, + "grad_norm": 2.0767273473410306, + "learning_rate": 7.166600237105378e-07, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7855 + }, + { + "epoch": 0.7554935808049238, + "grad_norm": 1.497138780209301, + "learning_rate": 7.161255064312284e-07, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7856 + }, + { + "epoch": 0.7555897485214214, + "grad_norm": 1.7175569119750915, + "learning_rate": 7.155911552356038e-07, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7857 + }, + { + "epoch": 0.7556859162379189, + "grad_norm": 2.378198035504387, + "learning_rate": 7.150569701734122e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7858 + }, + { + "epoch": 0.7557820839544165, + "grad_norm": 2.3062597597850614, + "learning_rate": 7.145229512943886e-07, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7859 + }, + { + "epoch": 0.755878251670914, + "grad_norm": 2.1647951474094302, + "learning_rate": 7.139890986482515e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7860 + }, + { + "epoch": 0.7559744193874116, + "grad_norm": 1.6094806974090323, + "learning_rate": 7.13455412284704e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7861 + }, + { + "epoch": 0.7560705871039092, + "grad_norm": 1.908459718059, + "learning_rate": 7.129218922534343e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7862 + }, + { + "epoch": 0.7561667548204067, + "grad_norm": 1.9865051462993608, + "learning_rate": 7.123885386041144e-07, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7863 + }, + { + "epoch": 0.7562629225369044, + "grad_norm": 1.6976152803359883, + "learning_rate": 7.118553513864002e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7864 + }, + { + "epoch": 0.756359090253402, + "grad_norm": 1.6177854108754022, + "learning_rate": 7.113223306499336e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7865 + }, + { + "epoch": 0.7564552579698995, + "grad_norm": 1.5090350283192566, + "learning_rate": 7.107894764443401e-07, + "loss": 0.0841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7866 + }, + { + "epoch": 0.7565514256863971, + "grad_norm": 2.5589747760460058, + "learning_rate": 7.102567888192302e-07, + "loss": 0.16, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7867 + }, + { + "epoch": 0.7566475934028947, + "grad_norm": 2.0802080812356345, + "learning_rate": 7.09724267824199e-07, + "loss": 0.0923, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7868 + }, + { + "epoch": 0.7567437611193922, + "grad_norm": 2.0716538692257664, + "learning_rate": 7.091919135088244e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7869 + }, + { + "epoch": 0.7568399288358898, + "grad_norm": 2.9454733986322186, + "learning_rate": 7.086597259226708e-07, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7870 + }, + { + "epoch": 0.7569360965523874, + "grad_norm": 1.6022169000031552, + "learning_rate": 7.081277051152865e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7871 + }, + { + "epoch": 0.7570322642688849, + "grad_norm": 2.174074310538776, + "learning_rate": 7.075958511362038e-07, + "loss": 0.1547, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7872 + }, + { + "epoch": 0.7571284319853825, + "grad_norm": 1.7662896071832235, + "learning_rate": 7.070641640349407e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7873 + }, + { + "epoch": 0.75722459970188, + "grad_norm": 1.7805423105457558, + "learning_rate": 7.065326438609973e-07, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7874 + }, + { + "epoch": 0.7573207674183776, + "grad_norm": 2.0794903823164623, + "learning_rate": 7.060012906638603e-07, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7875 + }, + { + "epoch": 0.7574169351348752, + "grad_norm": 1.6374308312812167, + "learning_rate": 7.05470104493001e-07, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7876 + }, + { + "epoch": 0.7575131028513727, + "grad_norm": 1.9639295786861841, + "learning_rate": 7.049390853978725e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7877 + }, + { + "epoch": 0.7576092705678704, + "grad_norm": 2.2047472427433874, + "learning_rate": 7.044082334279151e-07, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7878 + }, + { + "epoch": 0.757705438284368, + "grad_norm": 1.5820983477987818, + "learning_rate": 7.038775486325528e-07, + "loss": 0.088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7879 + }, + { + "epoch": 0.7578016060008655, + "grad_norm": 1.4954970884091474, + "learning_rate": 7.033470310611945e-07, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7880 + }, + { + "epoch": 0.7578977737173631, + "grad_norm": 1.8220300541149004, + "learning_rate": 7.028166807632311e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7881 + }, + { + "epoch": 0.7579939414338607, + "grad_norm": 2.1487446447760203, + "learning_rate": 7.022864977880409e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7882 + }, + { + "epoch": 0.7580901091503582, + "grad_norm": 1.6104116326808726, + "learning_rate": 7.017564821849848e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7883 + }, + { + "epoch": 0.7581862768668558, + "grad_norm": 1.8372553375582001, + "learning_rate": 7.012266340034091e-07, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7884 + }, + { + "epoch": 0.7582824445833534, + "grad_norm": 1.7841945325095097, + "learning_rate": 7.00696953292645e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7885 + }, + { + "epoch": 0.7583786122998509, + "grad_norm": 1.7830426474185275, + "learning_rate": 7.001674401020056e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7886 + }, + { + "epoch": 0.7584747800163485, + "grad_norm": 2.0189746184336284, + "learning_rate": 6.996380944807907e-07, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7887 + }, + { + "epoch": 0.7585709477328461, + "grad_norm": 2.153891923823962, + "learning_rate": 6.991089164782839e-07, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7888 + }, + { + "epoch": 0.7586671154493436, + "grad_norm": 2.127737375627144, + "learning_rate": 6.985799061437532e-07, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7889 + }, + { + "epoch": 0.7587632831658412, + "grad_norm": 1.8717059972154138, + "learning_rate": 6.980510635264512e-07, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7890 + }, + { + "epoch": 0.7588594508823387, + "grad_norm": 1.721377490898008, + "learning_rate": 6.97522388675615e-07, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7891 + }, + { + "epoch": 0.7589556185988364, + "grad_norm": 2.053126968307888, + "learning_rate": 6.969938816404639e-07, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7892 + }, + { + "epoch": 0.759051786315334, + "grad_norm": 2.2684348863714026, + "learning_rate": 6.964655424702049e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7893 + }, + { + "epoch": 0.7591479540318316, + "grad_norm": 2.017550087447738, + "learning_rate": 6.959373712140275e-07, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7894 + }, + { + "epoch": 0.7592441217483291, + "grad_norm": 1.9090860170247126, + "learning_rate": 6.95409367921106e-07, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7895 + }, + { + "epoch": 0.7593402894648267, + "grad_norm": 2.0195808364975747, + "learning_rate": 6.948815326405994e-07, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7896 + }, + { + "epoch": 0.7594364571813242, + "grad_norm": 1.8914394828692218, + "learning_rate": 6.943538654216494e-07, + "loss": 0.0917, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7897 + }, + { + "epoch": 0.7595326248978218, + "grad_norm": 2.2103357593586446, + "learning_rate": 6.93826366313384e-07, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7898 + }, + { + "epoch": 0.7596287926143194, + "grad_norm": 1.770998934565579, + "learning_rate": 6.932990353649149e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7899 + }, + { + "epoch": 0.7597249603308169, + "grad_norm": 3.57758355699479, + "learning_rate": 6.927718726253379e-07, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7900 + }, + { + "epoch": 0.7598211280473145, + "grad_norm": 1.4825396516370424, + "learning_rate": 6.922448781437335e-07, + "loss": 0.0887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7901 + }, + { + "epoch": 0.7599172957638121, + "grad_norm": 1.5236349269026754, + "learning_rate": 6.917180519691672e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7902 + }, + { + "epoch": 0.7600134634803096, + "grad_norm": 1.7581253989508248, + "learning_rate": 6.911913941506862e-07, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7903 + }, + { + "epoch": 0.7601096311968072, + "grad_norm": 2.489477974055202, + "learning_rate": 6.906649047373246e-07, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7904 + }, + { + "epoch": 0.7602057989133048, + "grad_norm": 1.5125262075625017, + "learning_rate": 6.901385837781002e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7905 + }, + { + "epoch": 0.7603019666298024, + "grad_norm": 1.8347325268742398, + "learning_rate": 6.896124313220148e-07, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7906 + }, + { + "epoch": 0.7603981343463, + "grad_norm": 1.5984349812327947, + "learning_rate": 6.890864474180556e-07, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7907 + }, + { + "epoch": 0.7604943020627976, + "grad_norm": 1.793332866504405, + "learning_rate": 6.885606321151914e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7908 + }, + { + "epoch": 0.7605904697792951, + "grad_norm": 1.7300228642110647, + "learning_rate": 6.88034985462378e-07, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7909 + }, + { + "epoch": 0.7606866374957927, + "grad_norm": 1.5889977397814437, + "learning_rate": 6.875095075085553e-07, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7910 + }, + { + "epoch": 0.7607828052122902, + "grad_norm": 3.18927429430884, + "learning_rate": 6.869841983026451e-07, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7911 + }, + { + "epoch": 0.7608789729287878, + "grad_norm": 1.7810916796151544, + "learning_rate": 6.864590578935562e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7912 + }, + { + "epoch": 0.7609751406452854, + "grad_norm": 2.1637357161228565, + "learning_rate": 6.859340863301811e-07, + "loss": 0.1404, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7913 + }, + { + "epoch": 0.7610713083617829, + "grad_norm": 2.2516246942049367, + "learning_rate": 6.854092836613948e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7914 + }, + { + "epoch": 0.7611674760782805, + "grad_norm": 2.1147367860253903, + "learning_rate": 6.848846499360584e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7915 + }, + { + "epoch": 0.7612636437947781, + "grad_norm": 1.6099121837459998, + "learning_rate": 6.843601852030171e-07, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7916 + }, + { + "epoch": 0.7613598115112756, + "grad_norm": 2.691283977968577, + "learning_rate": 6.838358895111e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7917 + }, + { + "epoch": 0.7614559792277732, + "grad_norm": 1.9629264628627665, + "learning_rate": 6.833117629091201e-07, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7918 + }, + { + "epoch": 0.7615521469442708, + "grad_norm": 1.613419629071163, + "learning_rate": 6.827878054458761e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7919 + }, + { + "epoch": 0.7616483146607684, + "grad_norm": 1.776451359580936, + "learning_rate": 6.822640171701486e-07, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7920 + }, + { + "epoch": 0.761744482377266, + "grad_norm": 1.5416560469922505, + "learning_rate": 6.817403981307041e-07, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7921 + }, + { + "epoch": 0.7618406500937636, + "grad_norm": 1.6440928930000172, + "learning_rate": 6.812169483762929e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7922 + }, + { + "epoch": 0.7619368178102611, + "grad_norm": 2.0660531411349825, + "learning_rate": 6.806936679556503e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7923 + }, + { + "epoch": 0.7620329855267587, + "grad_norm": 1.64423021046385, + "learning_rate": 6.801705569174952e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7924 + }, + { + "epoch": 0.7621291532432563, + "grad_norm": 1.8213679917448786, + "learning_rate": 6.796476153105294e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7925 + }, + { + "epoch": 0.7622253209597538, + "grad_norm": 1.8122351284811962, + "learning_rate": 6.791248431834413e-07, + "loss": 0.1415, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7926 + }, + { + "epoch": 0.7623214886762514, + "grad_norm": 2.189707989581864, + "learning_rate": 6.786022405849019e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7927 + }, + { + "epoch": 0.7624176563927489, + "grad_norm": 1.2534998843839382, + "learning_rate": 6.780798075635675e-07, + "loss": 0.0813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7928 + }, + { + "epoch": 0.7625138241092465, + "grad_norm": 1.5709224376256075, + "learning_rate": 6.775575441680776e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7929 + }, + { + "epoch": 0.7626099918257441, + "grad_norm": 1.6029848896984331, + "learning_rate": 6.770354504470575e-07, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7930 + }, + { + "epoch": 0.7627061595422416, + "grad_norm": 1.660469713489541, + "learning_rate": 6.765135264491137e-07, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7931 + }, + { + "epoch": 0.7628023272587392, + "grad_norm": 1.727065923560094, + "learning_rate": 6.759917722228402e-07, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7932 + }, + { + "epoch": 0.7628984949752368, + "grad_norm": 1.8966649809987557, + "learning_rate": 6.754701878168129e-07, + "loss": 0.1499, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7933 + }, + { + "epoch": 0.7629946626917344, + "grad_norm": 1.8585071158615254, + "learning_rate": 6.749487732795934e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7934 + }, + { + "epoch": 0.763090830408232, + "grad_norm": 2.226868616996141, + "learning_rate": 6.744275286597265e-07, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7935 + }, + { + "epoch": 0.7631869981247296, + "grad_norm": 1.8494624051340083, + "learning_rate": 6.739064540057425e-07, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7936 + }, + { + "epoch": 0.7632831658412271, + "grad_norm": 1.7913007472527562, + "learning_rate": 6.733855493661531e-07, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7937 + }, + { + "epoch": 0.7633793335577247, + "grad_norm": 2.5543940037313115, + "learning_rate": 6.728648147894574e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7938 + }, + { + "epoch": 0.7634755012742223, + "grad_norm": 1.5130128850296936, + "learning_rate": 6.723442503241362e-07, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7939 + }, + { + "epoch": 0.7635716689907198, + "grad_norm": 1.3867409449471129, + "learning_rate": 6.718238560186572e-07, + "loss": 0.0878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7940 + }, + { + "epoch": 0.7636678367072174, + "grad_norm": 2.2350004567777777, + "learning_rate": 6.713036319214686e-07, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7941 + }, + { + "epoch": 0.763764004423715, + "grad_norm": 1.4895454391787932, + "learning_rate": 6.707835780810054e-07, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7942 + }, + { + "epoch": 0.7638601721402125, + "grad_norm": 1.8900387740064326, + "learning_rate": 6.70263694545687e-07, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7943 + }, + { + "epoch": 0.7639563398567101, + "grad_norm": 2.3867736452686126, + "learning_rate": 6.697439813639145e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7944 + }, + { + "epoch": 0.7640525075732076, + "grad_norm": 2.0359472065334363, + "learning_rate": 6.692244385840754e-07, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7945 + }, + { + "epoch": 0.7641486752897052, + "grad_norm": 1.7610236663930612, + "learning_rate": 6.687050662545405e-07, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7946 + }, + { + "epoch": 0.7642448430062028, + "grad_norm": 2.237558242414135, + "learning_rate": 6.681858644236655e-07, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7947 + }, + { + "epoch": 0.7643410107227004, + "grad_norm": 1.8128602760539556, + "learning_rate": 6.676668331397884e-07, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7948 + }, + { + "epoch": 0.764437178439198, + "grad_norm": 2.5282036425248986, + "learning_rate": 6.671479724512328e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7949 + }, + { + "epoch": 0.7645333461556956, + "grad_norm": 1.8146742674502405, + "learning_rate": 6.666292824063062e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7950 + }, + { + "epoch": 0.7646295138721931, + "grad_norm": 1.8718035405022464, + "learning_rate": 6.661107630532998e-07, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7951 + }, + { + "epoch": 0.7647256815886907, + "grad_norm": 1.8848120166658657, + "learning_rate": 6.655924144404907e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7952 + }, + { + "epoch": 0.7648218493051883, + "grad_norm": 1.99193177851735, + "learning_rate": 6.650742366161365e-07, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7953 + }, + { + "epoch": 0.7649180170216858, + "grad_norm": 1.8571055601816333, + "learning_rate": 6.645562296284819e-07, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7954 + }, + { + "epoch": 0.7650141847381834, + "grad_norm": 1.9846050430932414, + "learning_rate": 6.640383935257547e-07, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7955 + }, + { + "epoch": 0.765110352454681, + "grad_norm": 1.9430384089055472, + "learning_rate": 6.635207283561671e-07, + "loss": 0.1454, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7956 + }, + { + "epoch": 0.7652065201711785, + "grad_norm": 1.8498146197967622, + "learning_rate": 6.630032341679149e-07, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7957 + }, + { + "epoch": 0.7653026878876761, + "grad_norm": 1.5204952184631655, + "learning_rate": 6.624859110091791e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7958 + }, + { + "epoch": 0.7653988556041736, + "grad_norm": 1.6681122658309866, + "learning_rate": 6.619687589281226e-07, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7959 + }, + { + "epoch": 0.7654950233206712, + "grad_norm": 1.880128437374582, + "learning_rate": 6.614517779728943e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7960 + }, + { + "epoch": 0.7655911910371688, + "grad_norm": 1.8161667994951383, + "learning_rate": 6.609349681916266e-07, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7961 + }, + { + "epoch": 0.7656873587536664, + "grad_norm": 1.795710417038641, + "learning_rate": 6.604183296324359e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7962 + }, + { + "epoch": 0.765783526470164, + "grad_norm": 3.5030975577245496, + "learning_rate": 6.599018623434225e-07, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7963 + }, + { + "epoch": 0.7658796941866616, + "grad_norm": 1.587216438116183, + "learning_rate": 6.593855663726723e-07, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7964 + }, + { + "epoch": 0.7659758619031591, + "grad_norm": 2.28068914397954, + "learning_rate": 6.588694417682517e-07, + "loss": 0.1382, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7965 + }, + { + "epoch": 0.7660720296196567, + "grad_norm": 3.0972256870055124, + "learning_rate": 6.583534885782145e-07, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7966 + }, + { + "epoch": 0.7661681973361543, + "grad_norm": 1.3631090973238922, + "learning_rate": 6.578377068505973e-07, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7967 + }, + { + "epoch": 0.7662643650526518, + "grad_norm": 2.0390525236420642, + "learning_rate": 6.573220966334207e-07, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7968 + }, + { + "epoch": 0.7663605327691494, + "grad_norm": 1.8657113923671693, + "learning_rate": 6.568066579746901e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7969 + }, + { + "epoch": 0.766456700485647, + "grad_norm": 1.7789198662524413, + "learning_rate": 6.562913909223931e-07, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7970 + }, + { + "epoch": 0.7665528682021445, + "grad_norm": 2.751977906329972, + "learning_rate": 6.557762955245031e-07, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7971 + }, + { + "epoch": 0.7666490359186421, + "grad_norm": 2.176896090303873, + "learning_rate": 6.552613718289771e-07, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7972 + }, + { + "epoch": 0.7667452036351396, + "grad_norm": 2.5963341544840657, + "learning_rate": 6.547466198837562e-07, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7973 + }, + { + "epoch": 0.7668413713516372, + "grad_norm": 6.9632728621136275, + "learning_rate": 6.542320397367641e-07, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7974 + }, + { + "epoch": 0.7669375390681348, + "grad_norm": 1.7084044856248908, + "learning_rate": 6.537176314359104e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7975 + }, + { + "epoch": 0.7670337067846325, + "grad_norm": 1.902990311306431, + "learning_rate": 6.532033950290887e-07, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7976 + }, + { + "epoch": 0.76712987450113, + "grad_norm": 1.8890605805929637, + "learning_rate": 6.526893305641741e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7977 + }, + { + "epoch": 0.7672260422176276, + "grad_norm": 1.7213368874136639, + "learning_rate": 6.521754380890283e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7978 + }, + { + "epoch": 0.7673222099341251, + "grad_norm": 1.673654916629355, + "learning_rate": 6.516617176514964e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7979 + }, + { + "epoch": 0.7674183776506227, + "grad_norm": 1.5118037821402492, + "learning_rate": 6.511481692994077e-07, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7980 + }, + { + "epoch": 0.7675145453671203, + "grad_norm": 1.8114422473756817, + "learning_rate": 6.506347930805731e-07, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7981 + }, + { + "epoch": 0.7676107130836178, + "grad_norm": 1.8298066263707482, + "learning_rate": 6.501215890427908e-07, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7982 + }, + { + "epoch": 0.7677068808001154, + "grad_norm": 2.1034076941335513, + "learning_rate": 6.496085572338415e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7983 + }, + { + "epoch": 0.767803048516613, + "grad_norm": 1.7227840540962958, + "learning_rate": 6.490956977014892e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7984 + }, + { + "epoch": 0.7678992162331105, + "grad_norm": 1.541638239348358, + "learning_rate": 6.485830104934831e-07, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7985 + }, + { + "epoch": 0.7679953839496081, + "grad_norm": 1.6294539974100388, + "learning_rate": 6.480704956575564e-07, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7986 + }, + { + "epoch": 0.7680915516661057, + "grad_norm": 1.7270949435226282, + "learning_rate": 6.475581532414244e-07, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7987 + }, + { + "epoch": 0.7681877193826032, + "grad_norm": 2.618832333761298, + "learning_rate": 6.470459832927881e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7988 + }, + { + "epoch": 0.7682838870991008, + "grad_norm": 2.055101684725659, + "learning_rate": 6.465339858593317e-07, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7989 + }, + { + "epoch": 0.7683800548155985, + "grad_norm": 1.7488377529007235, + "learning_rate": 6.46022160988724e-07, + "loss": 0.0921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7990 + }, + { + "epoch": 0.768476222532096, + "grad_norm": 1.6598021017688989, + "learning_rate": 6.455105087286173e-07, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7991 + }, + { + "epoch": 0.7685723902485936, + "grad_norm": 1.7100368746281465, + "learning_rate": 6.449990291266486e-07, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7992 + }, + { + "epoch": 0.7686685579650911, + "grad_norm": 1.5906834469470825, + "learning_rate": 6.444877222304363e-07, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7993 + }, + { + "epoch": 0.7687647256815887, + "grad_norm": 1.69903611815834, + "learning_rate": 6.439765880875856e-07, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7994 + }, + { + "epoch": 0.7688608933980863, + "grad_norm": 2.05713848810957, + "learning_rate": 6.434656267456843e-07, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7995 + }, + { + "epoch": 0.7689570611145838, + "grad_norm": 1.708493275586489, + "learning_rate": 6.429548382523043e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7996 + }, + { + "epoch": 0.7690532288310814, + "grad_norm": 1.8617556655023373, + "learning_rate": 6.424442226550021e-07, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7997 + }, + { + "epoch": 0.769149396547579, + "grad_norm": 1.6070109258340983, + "learning_rate": 6.419337800013162e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7998 + }, + { + "epoch": 0.7692455642640765, + "grad_norm": 1.7773652510069895, + "learning_rate": 6.414235103387709e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 7999 + }, + { + "epoch": 0.7693417319805741, + "grad_norm": 2.0556186710322026, + "learning_rate": 6.409134137148737e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8000 + }, + { + "epoch": 0.7694378996970717, + "grad_norm": 2.140857831293011, + "learning_rate": 6.404034901771161e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8001 + }, + { + "epoch": 0.7695340674135692, + "grad_norm": 2.6208564957627223, + "learning_rate": 6.398937397729732e-07, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8002 + }, + { + "epoch": 0.7696302351300668, + "grad_norm": 1.7453279301424534, + "learning_rate": 6.39384162549905e-07, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8003 + }, + { + "epoch": 0.7697264028465645, + "grad_norm": 1.7604777202934154, + "learning_rate": 6.388747585553532e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8004 + }, + { + "epoch": 0.769822570563062, + "grad_norm": 2.1421736892882643, + "learning_rate": 6.383655278367451e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8005 + }, + { + "epoch": 0.7699187382795596, + "grad_norm": 2.7344830236274333, + "learning_rate": 6.378564704414921e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8006 + }, + { + "epoch": 0.7700149059960572, + "grad_norm": 1.9630182204531066, + "learning_rate": 6.373475864169892e-07, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8007 + }, + { + "epoch": 0.7701110737125547, + "grad_norm": 1.725298893288333, + "learning_rate": 6.368388758106134e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8008 + }, + { + "epoch": 0.7702072414290523, + "grad_norm": 1.9884001162557323, + "learning_rate": 6.363303386697281e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8009 + }, + { + "epoch": 0.7703034091455498, + "grad_norm": 2.2607121137835002, + "learning_rate": 6.3582197504168e-07, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8010 + }, + { + "epoch": 0.7703995768620474, + "grad_norm": 1.9455590340455629, + "learning_rate": 6.353137849737978e-07, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8011 + }, + { + "epoch": 0.770495744578545, + "grad_norm": 2.182026098430555, + "learning_rate": 6.348057685133963e-07, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8012 + }, + { + "epoch": 0.7705919122950425, + "grad_norm": 1.519378952512769, + "learning_rate": 6.342979257077728e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8013 + }, + { + "epoch": 0.7706880800115401, + "grad_norm": 2.6597576657276787, + "learning_rate": 6.337902566042101e-07, + "loss": 0.141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8014 + }, + { + "epoch": 0.7707842477280377, + "grad_norm": 1.4801083726650064, + "learning_rate": 6.332827612499718e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8015 + }, + { + "epoch": 0.7708804154445352, + "grad_norm": 1.843234661374037, + "learning_rate": 6.32775439692308e-07, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8016 + }, + { + "epoch": 0.7709765831610328, + "grad_norm": 9.516178356906742, + "learning_rate": 6.32268291978452e-07, + "loss": 0.0979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8017 + }, + { + "epoch": 0.7710727508775305, + "grad_norm": 2.1725373993748764, + "learning_rate": 6.317613181556201e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8018 + }, + { + "epoch": 0.771168918594028, + "grad_norm": 1.7171443582690815, + "learning_rate": 6.312545182710133e-07, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8019 + }, + { + "epoch": 0.7712650863105256, + "grad_norm": 1.593352542799364, + "learning_rate": 6.307478923718171e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8020 + }, + { + "epoch": 0.7713612540270232, + "grad_norm": 2.0008999752660954, + "learning_rate": 6.30241440505198e-07, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8021 + }, + { + "epoch": 0.7714574217435207, + "grad_norm": 1.9265621665057175, + "learning_rate": 6.297351627183088e-07, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8022 + }, + { + "epoch": 0.7715535894600183, + "grad_norm": 1.8928483571006056, + "learning_rate": 6.292290590582853e-07, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8023 + }, + { + "epoch": 0.7716497571765158, + "grad_norm": 2.0195272635077117, + "learning_rate": 6.28723129572247e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8024 + }, + { + "epoch": 0.7717459248930134, + "grad_norm": 1.7775752308725912, + "learning_rate": 6.282173743072989e-07, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8025 + }, + { + "epoch": 0.771842092609511, + "grad_norm": 1.7121869835501176, + "learning_rate": 6.277117933105259e-07, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8026 + }, + { + "epoch": 0.7719382603260085, + "grad_norm": 2.1384077139644413, + "learning_rate": 6.272063866290001e-07, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8027 + }, + { + "epoch": 0.7720344280425061, + "grad_norm": 2.0640263736732813, + "learning_rate": 6.267011543097762e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8028 + }, + { + "epoch": 0.7721305957590037, + "grad_norm": 2.025069872036181, + "learning_rate": 6.261960963998928e-07, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8029 + }, + { + "epoch": 0.7722267634755012, + "grad_norm": 1.8045980771195969, + "learning_rate": 6.256912129463719e-07, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8030 + }, + { + "epoch": 0.7723229311919988, + "grad_norm": 1.4880951325318763, + "learning_rate": 6.251865039962207e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8031 + }, + { + "epoch": 0.7724190989084965, + "grad_norm": 1.7368015739723148, + "learning_rate": 6.246819695964274e-07, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8032 + }, + { + "epoch": 0.772515266624994, + "grad_norm": 1.7234695472491965, + "learning_rate": 6.241776097939664e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8033 + }, + { + "epoch": 0.7726114343414916, + "grad_norm": 1.4857166740594145, + "learning_rate": 6.236734246357948e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8034 + }, + { + "epoch": 0.7727076020579892, + "grad_norm": 2.542266232320951, + "learning_rate": 6.231694141688535e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8035 + }, + { + "epoch": 0.7728037697744867, + "grad_norm": 4.413820912293766, + "learning_rate": 6.226655784400684e-07, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8036 + }, + { + "epoch": 0.7728999374909843, + "grad_norm": 1.8371116201117164, + "learning_rate": 6.221619174963461e-07, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8037 + }, + { + "epoch": 0.7729961052074819, + "grad_norm": 1.6083684401039164, + "learning_rate": 6.216584313845803e-07, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8038 + }, + { + "epoch": 0.7730922729239794, + "grad_norm": 3.206675255627114, + "learning_rate": 6.211551201516461e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8039 + }, + { + "epoch": 0.773188440640477, + "grad_norm": 1.465337458631977, + "learning_rate": 6.206519838444044e-07, + "loss": 0.085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8040 + }, + { + "epoch": 0.7732846083569745, + "grad_norm": 1.6107078040051122, + "learning_rate": 6.20149022509697e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8041 + }, + { + "epoch": 0.7733807760734721, + "grad_norm": 2.1900667588566813, + "learning_rate": 6.19646236194352e-07, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8042 + }, + { + "epoch": 0.7734769437899697, + "grad_norm": 1.862918629526177, + "learning_rate": 6.191436249451804e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8043 + }, + { + "epoch": 0.7735731115064672, + "grad_norm": 1.7091779291831064, + "learning_rate": 6.186411888089758e-07, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8044 + }, + { + "epoch": 0.7736692792229648, + "grad_norm": 1.5988317871053486, + "learning_rate": 6.181389278325167e-07, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8045 + }, + { + "epoch": 0.7737654469394625, + "grad_norm": 2.106854317274304, + "learning_rate": 6.176368420625653e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8046 + }, + { + "epoch": 0.77386161465596, + "grad_norm": 1.7362296728570503, + "learning_rate": 6.171349315458669e-07, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8047 + }, + { + "epoch": 0.7739577823724576, + "grad_norm": 1.7863825274965017, + "learning_rate": 6.166331963291519e-07, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8048 + }, + { + "epoch": 0.7740539500889552, + "grad_norm": 1.9137569157970866, + "learning_rate": 6.161316364591314e-07, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8049 + }, + { + "epoch": 0.7741501178054527, + "grad_norm": 2.2823755382073583, + "learning_rate": 6.156302519825031e-07, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8050 + }, + { + "epoch": 0.7742462855219503, + "grad_norm": 1.7785596422679704, + "learning_rate": 6.151290429459466e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8051 + }, + { + "epoch": 0.7743424532384479, + "grad_norm": 1.6460181040411008, + "learning_rate": 6.146280093961268e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8052 + }, + { + "epoch": 0.7744386209549454, + "grad_norm": 2.2710265733285393, + "learning_rate": 6.141271513796915e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8053 + }, + { + "epoch": 0.774534788671443, + "grad_norm": 2.340989769290086, + "learning_rate": 6.136264689432705e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8054 + }, + { + "epoch": 0.7746309563879406, + "grad_norm": 1.876559009218358, + "learning_rate": 6.131259621334795e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8055 + }, + { + "epoch": 0.7747271241044381, + "grad_norm": 2.489746065471813, + "learning_rate": 6.126256309969172e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8056 + }, + { + "epoch": 0.7748232918209357, + "grad_norm": 1.8025357740388201, + "learning_rate": 6.121254755801659e-07, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8057 + }, + { + "epoch": 0.7749194595374332, + "grad_norm": 1.8608161346428271, + "learning_rate": 6.116254959297913e-07, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8058 + }, + { + "epoch": 0.7750156272539308, + "grad_norm": 1.6854802216125369, + "learning_rate": 6.111256920923436e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8059 + }, + { + "epoch": 0.7751117949704285, + "grad_norm": 1.7129231615569525, + "learning_rate": 6.106260641143547e-07, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8060 + }, + { + "epoch": 0.775207962686926, + "grad_norm": 2.129786605050442, + "learning_rate": 6.101266120423416e-07, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8061 + }, + { + "epoch": 0.7753041304034236, + "grad_norm": 2.5128014153015146, + "learning_rate": 6.096273359228052e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8062 + }, + { + "epoch": 0.7754002981199212, + "grad_norm": 2.4705065106352273, + "learning_rate": 6.091282358022293e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8063 + }, + { + "epoch": 0.7754964658364187, + "grad_norm": 3.0194053582116935, + "learning_rate": 6.086293117270822e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8064 + }, + { + "epoch": 0.7755926335529163, + "grad_norm": 1.8119672733193317, + "learning_rate": 6.081305637438137e-07, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8065 + }, + { + "epoch": 0.7756888012694139, + "grad_norm": 3.062985076509886, + "learning_rate": 6.076319918988593e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8066 + }, + { + "epoch": 0.7757849689859114, + "grad_norm": 2.0566501966792137, + "learning_rate": 6.071335962386374e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8067 + }, + { + "epoch": 0.775881136702409, + "grad_norm": 2.4881584691733067, + "learning_rate": 6.066353768095504e-07, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8068 + }, + { + "epoch": 0.7759773044189066, + "grad_norm": 1.5103622031718733, + "learning_rate": 6.061373336579835e-07, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8069 + }, + { + "epoch": 0.7760734721354041, + "grad_norm": 1.7271025855141664, + "learning_rate": 6.056394668303065e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8070 + }, + { + "epoch": 0.7761696398519017, + "grad_norm": 1.6861091932206285, + "learning_rate": 6.051417763728712e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8071 + }, + { + "epoch": 0.7762658075683992, + "grad_norm": 2.052165150459034, + "learning_rate": 6.046442623320145e-07, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8072 + }, + { + "epoch": 0.7763619752848968, + "grad_norm": 1.9962296618201352, + "learning_rate": 6.041469247540571e-07, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8073 + }, + { + "epoch": 0.7764581430013945, + "grad_norm": 1.6731327744777995, + "learning_rate": 6.036497636853009e-07, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8074 + }, + { + "epoch": 0.776554310717892, + "grad_norm": 1.7194773651215733, + "learning_rate": 6.031527791720338e-07, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8075 + }, + { + "epoch": 0.7766504784343896, + "grad_norm": 1.6581303230293318, + "learning_rate": 6.02655971260527e-07, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8076 + }, + { + "epoch": 0.7767466461508872, + "grad_norm": 1.5237063117218521, + "learning_rate": 6.021593399970338e-07, + "loss": 0.0921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8077 + }, + { + "epoch": 0.7768428138673847, + "grad_norm": 1.5239579575373232, + "learning_rate": 6.016628854277922e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8078 + }, + { + "epoch": 0.7769389815838823, + "grad_norm": 2.905420705330668, + "learning_rate": 6.011666075990236e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8079 + }, + { + "epoch": 0.7770351493003799, + "grad_norm": 1.457090729933418, + "learning_rate": 6.006705065569329e-07, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8080 + }, + { + "epoch": 0.7771313170168774, + "grad_norm": 1.759896315011617, + "learning_rate": 6.001745823477093e-07, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8081 + }, + { + "epoch": 0.777227484733375, + "grad_norm": 1.4622187971615963, + "learning_rate": 5.996788350175228e-07, + "loss": 0.0892, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8082 + }, + { + "epoch": 0.7773236524498726, + "grad_norm": 1.7293969955482917, + "learning_rate": 5.991832646125301e-07, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8083 + }, + { + "epoch": 0.7774198201663701, + "grad_norm": 1.4117336356831467, + "learning_rate": 5.986878711788702e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8084 + }, + { + "epoch": 0.7775159878828677, + "grad_norm": 1.4059431622228677, + "learning_rate": 5.981926547626652e-07, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8085 + }, + { + "epoch": 0.7776121555993653, + "grad_norm": 2.0655576260501007, + "learning_rate": 5.976976154100214e-07, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8086 + }, + { + "epoch": 0.7777083233158628, + "grad_norm": 1.7941693276331463, + "learning_rate": 5.972027531670291e-07, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8087 + }, + { + "epoch": 0.7778044910323605, + "grad_norm": 2.1569737871540737, + "learning_rate": 5.967080680797599e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8088 + }, + { + "epoch": 0.777900658748858, + "grad_norm": 1.893028717299564, + "learning_rate": 5.962135601942709e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8089 + }, + { + "epoch": 0.7779968264653556, + "grad_norm": 2.5575101382134897, + "learning_rate": 5.957192295566022e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8090 + }, + { + "epoch": 0.7780929941818532, + "grad_norm": 1.7343209111619973, + "learning_rate": 5.952250762127776e-07, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8091 + }, + { + "epoch": 0.7781891618983507, + "grad_norm": 1.7576121065911443, + "learning_rate": 5.947311002088047e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8092 + }, + { + "epoch": 0.7782853296148483, + "grad_norm": 2.167301783343402, + "learning_rate": 5.942373015906725e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8093 + }, + { + "epoch": 0.7783814973313459, + "grad_norm": 2.0166877871474655, + "learning_rate": 5.937436804043558e-07, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8094 + }, + { + "epoch": 0.7784776650478434, + "grad_norm": 1.7880426200907071, + "learning_rate": 5.932502366958124e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8095 + }, + { + "epoch": 0.778573832764341, + "grad_norm": 1.7768859514740007, + "learning_rate": 5.927569705109828e-07, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8096 + }, + { + "epoch": 0.7786700004808386, + "grad_norm": 2.230449815723676, + "learning_rate": 5.922638818957919e-07, + "loss": 0.1351, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8097 + }, + { + "epoch": 0.7787661681973361, + "grad_norm": 1.7592226518359209, + "learning_rate": 5.917709708961481e-07, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8098 + }, + { + "epoch": 0.7788623359138337, + "grad_norm": 1.9154714094675842, + "learning_rate": 5.912782375579412e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8099 + }, + { + "epoch": 0.7789585036303313, + "grad_norm": 3.6192780001422107, + "learning_rate": 5.907856819270471e-07, + "loss": 0.149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8100 + }, + { + "epoch": 0.7790546713468288, + "grad_norm": 1.9764216553847973, + "learning_rate": 5.902933040493242e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8101 + }, + { + "epoch": 0.7791508390633265, + "grad_norm": 1.8612349200767908, + "learning_rate": 5.898011039706136e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8102 + }, + { + "epoch": 0.7792470067798241, + "grad_norm": 1.5552163939100396, + "learning_rate": 5.89309081736742e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8103 + }, + { + "epoch": 0.7793431744963216, + "grad_norm": 2.783687001521008, + "learning_rate": 5.888172373935161e-07, + "loss": 0.097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8104 + }, + { + "epoch": 0.7794393422128192, + "grad_norm": 2.13141203247026, + "learning_rate": 5.883255709867288e-07, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8105 + }, + { + "epoch": 0.7795355099293168, + "grad_norm": 1.780421911074693, + "learning_rate": 5.878340825621556e-07, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8106 + }, + { + "epoch": 0.7796316776458143, + "grad_norm": 1.552327009857856, + "learning_rate": 5.873427721655561e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8107 + }, + { + "epoch": 0.7797278453623119, + "grad_norm": 1.68660868342336, + "learning_rate": 5.868516398426715e-07, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8108 + }, + { + "epoch": 0.7798240130788094, + "grad_norm": 2.0331327081750454, + "learning_rate": 5.863606856392281e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8109 + }, + { + "epoch": 0.779920180795307, + "grad_norm": 1.5746991185573658, + "learning_rate": 5.85869909600936e-07, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8110 + }, + { + "epoch": 0.7800163485118046, + "grad_norm": 2.0036118440095323, + "learning_rate": 5.85379311773486e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8111 + }, + { + "epoch": 0.7801125162283021, + "grad_norm": 1.3786598105786558, + "learning_rate": 5.848888922025553e-07, + "loss": 0.0873, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8112 + }, + { + "epoch": 0.7802086839447997, + "grad_norm": 1.9161804595498313, + "learning_rate": 5.843986509338028e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8113 + }, + { + "epoch": 0.7803048516612973, + "grad_norm": 1.8476495551435121, + "learning_rate": 5.83908588012872e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8114 + }, + { + "epoch": 0.7804010193777948, + "grad_norm": 2.8100575971868254, + "learning_rate": 5.834187034853895e-07, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8115 + }, + { + "epoch": 0.7804971870942925, + "grad_norm": 1.7791733076487446, + "learning_rate": 5.829289973969632e-07, + "loss": 0.0926, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8116 + }, + { + "epoch": 0.7805933548107901, + "grad_norm": 2.226207907769019, + "learning_rate": 5.82439469793187e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8117 + }, + { + "epoch": 0.7806895225272876, + "grad_norm": 2.307952077871234, + "learning_rate": 5.819501207196376e-07, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8118 + }, + { + "epoch": 0.7807856902437852, + "grad_norm": 1.7422823836693733, + "learning_rate": 5.814609502218743e-07, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8119 + }, + { + "epoch": 0.7808818579602828, + "grad_norm": 1.8276363700893081, + "learning_rate": 5.809719583454415e-07, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8120 + }, + { + "epoch": 0.7809780256767803, + "grad_norm": 2.3204132853520485, + "learning_rate": 5.804831451358636e-07, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8121 + }, + { + "epoch": 0.7810741933932779, + "grad_norm": 1.8762835264761029, + "learning_rate": 5.799945106386518e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8122 + }, + { + "epoch": 0.7811703611097754, + "grad_norm": 2.22121361673916, + "learning_rate": 5.79506054899299e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8123 + }, + { + "epoch": 0.781266528826273, + "grad_norm": 2.527935339319615, + "learning_rate": 5.790177779632816e-07, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8124 + }, + { + "epoch": 0.7813626965427706, + "grad_norm": 1.754441758530694, + "learning_rate": 5.785296798760601e-07, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8125 + }, + { + "epoch": 0.7814588642592681, + "grad_norm": 1.769927426737361, + "learning_rate": 5.780417606830782e-07, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8126 + }, + { + "epoch": 0.7815550319757657, + "grad_norm": 1.8240605617799814, + "learning_rate": 5.775540204297609e-07, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8127 + }, + { + "epoch": 0.7816511996922633, + "grad_norm": 2.3527299934931745, + "learning_rate": 5.770664591615191e-07, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8128 + }, + { + "epoch": 0.7817473674087608, + "grad_norm": 1.341615091621408, + "learning_rate": 5.765790769237464e-07, + "loss": 0.0914, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8129 + }, + { + "epoch": 0.7818435351252585, + "grad_norm": 2.215369613040944, + "learning_rate": 5.76091873761819e-07, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8130 + }, + { + "epoch": 0.7819397028417561, + "grad_norm": 2.032668749284878, + "learning_rate": 5.756048497210978e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8131 + }, + { + "epoch": 0.7820358705582536, + "grad_norm": 1.89363832743848, + "learning_rate": 5.751180048469243e-07, + "loss": 0.1369, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8132 + }, + { + "epoch": 0.7821320382747512, + "grad_norm": 2.1788581053008107, + "learning_rate": 5.746313391846264e-07, + "loss": 0.1476, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8133 + }, + { + "epoch": 0.7822282059912488, + "grad_norm": 1.900252622481249, + "learning_rate": 5.741448527795137e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8134 + }, + { + "epoch": 0.7823243737077463, + "grad_norm": 1.814800276250086, + "learning_rate": 5.736585456768798e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8135 + }, + { + "epoch": 0.7824205414242439, + "grad_norm": 1.7371303909486173, + "learning_rate": 5.731724179220008e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8136 + }, + { + "epoch": 0.7825167091407415, + "grad_norm": 1.8140151897436838, + "learning_rate": 5.726864695601373e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8137 + }, + { + "epoch": 0.782612876857239, + "grad_norm": 1.6609712853399168, + "learning_rate": 5.72200700636531e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8138 + }, + { + "epoch": 0.7827090445737366, + "grad_norm": 2.368738215169156, + "learning_rate": 5.717151111964095e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8139 + }, + { + "epoch": 0.7828052122902341, + "grad_norm": 1.4428883561242651, + "learning_rate": 5.712297012849826e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8140 + }, + { + "epoch": 0.7829013800067317, + "grad_norm": 1.4735725636194703, + "learning_rate": 5.707444709474424e-07, + "loss": 0.0891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8141 + }, + { + "epoch": 0.7829975477232293, + "grad_norm": 2.3214849224641196, + "learning_rate": 5.702594202289658e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8142 + }, + { + "epoch": 0.7830937154397268, + "grad_norm": 2.263422921893314, + "learning_rate": 5.697745491747131e-07, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8143 + }, + { + "epoch": 0.7831898831562245, + "grad_norm": 1.6897162762685851, + "learning_rate": 5.692898578298253e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8144 + }, + { + "epoch": 0.7832860508727221, + "grad_norm": 2.0779917104117196, + "learning_rate": 5.688053462394297e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8145 + }, + { + "epoch": 0.7833822185892196, + "grad_norm": 1.8004318169141933, + "learning_rate": 5.683210144486357e-07, + "loss": 0.0965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8146 + }, + { + "epoch": 0.7834783863057172, + "grad_norm": 1.942212213877798, + "learning_rate": 5.678368625025354e-07, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8147 + }, + { + "epoch": 0.7835745540222148, + "grad_norm": 1.4880575913731842, + "learning_rate": 5.673528904462061e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8148 + }, + { + "epoch": 0.7836707217387123, + "grad_norm": 1.579021582476529, + "learning_rate": 5.668690983247053e-07, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8149 + }, + { + "epoch": 0.7837668894552099, + "grad_norm": 2.0287478439718285, + "learning_rate": 5.663854861830759e-07, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8150 + }, + { + "epoch": 0.7838630571717075, + "grad_norm": 1.641856508614473, + "learning_rate": 5.659020540663434e-07, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8151 + }, + { + "epoch": 0.783959224888205, + "grad_norm": 1.665744549425302, + "learning_rate": 5.654188020195173e-07, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8152 + }, + { + "epoch": 0.7840553926047026, + "grad_norm": 1.8828931193300078, + "learning_rate": 5.64935730087589e-07, + "loss": 0.0921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8153 + }, + { + "epoch": 0.7841515603212001, + "grad_norm": 1.4358184371932277, + "learning_rate": 5.644528383155351e-07, + "loss": 0.0935, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8154 + }, + { + "epoch": 0.7842477280376977, + "grad_norm": 1.2415122940496894, + "learning_rate": 5.639701267483125e-07, + "loss": 0.067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8155 + }, + { + "epoch": 0.7843438957541953, + "grad_norm": 1.5097218622326842, + "learning_rate": 5.634875954308638e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8156 + }, + { + "epoch": 0.7844400634706928, + "grad_norm": 1.6486020062686184, + "learning_rate": 5.630052444081138e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8157 + }, + { + "epoch": 0.7845362311871905, + "grad_norm": 1.9226453188442143, + "learning_rate": 5.625230737249709e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8158 + }, + { + "epoch": 0.7846323989036881, + "grad_norm": 2.183686923883589, + "learning_rate": 5.620410834263274e-07, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8159 + }, + { + "epoch": 0.7847285666201856, + "grad_norm": 1.70172250762146, + "learning_rate": 5.615592735570563e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8160 + }, + { + "epoch": 0.7848247343366832, + "grad_norm": 1.7437907446335312, + "learning_rate": 5.610776441620161e-07, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8161 + }, + { + "epoch": 0.7849209020531808, + "grad_norm": 1.5998292437009054, + "learning_rate": 5.605961952860478e-07, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8162 + }, + { + "epoch": 0.7850170697696783, + "grad_norm": 1.9126731394945882, + "learning_rate": 5.601149269739758e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8163 + }, + { + "epoch": 0.7851132374861759, + "grad_norm": 1.9230061698101226, + "learning_rate": 5.596338392706077e-07, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8164 + }, + { + "epoch": 0.7852094052026735, + "grad_norm": 1.7494215878255261, + "learning_rate": 5.591529322207345e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8165 + }, + { + "epoch": 0.785305572919171, + "grad_norm": 1.9006329083843643, + "learning_rate": 5.586722058691286e-07, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8166 + }, + { + "epoch": 0.7854017406356686, + "grad_norm": 2.210867564850402, + "learning_rate": 5.581916602605478e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8167 + }, + { + "epoch": 0.7854979083521662, + "grad_norm": 1.8327810700924227, + "learning_rate": 5.57711295439732e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8168 + }, + { + "epoch": 0.7855940760686637, + "grad_norm": 1.7741964152324439, + "learning_rate": 5.572311114514047e-07, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8169 + }, + { + "epoch": 0.7856902437851613, + "grad_norm": 1.5585513291846596, + "learning_rate": 5.567511083402733e-07, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8170 + }, + { + "epoch": 0.7857864115016588, + "grad_norm": 2.4767798336687243, + "learning_rate": 5.562712861510255e-07, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8171 + }, + { + "epoch": 0.7858825792181565, + "grad_norm": 2.351354849215282, + "learning_rate": 5.557916449283351e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8172 + }, + { + "epoch": 0.7859787469346541, + "grad_norm": 1.6618901393821903, + "learning_rate": 5.553121847168589e-07, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8173 + }, + { + "epoch": 0.7860749146511516, + "grad_norm": 1.5793431360117705, + "learning_rate": 5.548329055612342e-07, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8174 + }, + { + "epoch": 0.7861710823676492, + "grad_norm": 2.7783801107195725, + "learning_rate": 5.54353807506084e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8175 + }, + { + "epoch": 0.7862672500841468, + "grad_norm": 1.8349480038894195, + "learning_rate": 5.538748905960145e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8176 + }, + { + "epoch": 0.7863634178006443, + "grad_norm": 1.6718374132852911, + "learning_rate": 5.533961548756128e-07, + "loss": 0.086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8177 + }, + { + "epoch": 0.7864595855171419, + "grad_norm": 2.1060048693751168, + "learning_rate": 5.52917600389451e-07, + "loss": 0.0952, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8178 + }, + { + "epoch": 0.7865557532336395, + "grad_norm": 1.7228892935143467, + "learning_rate": 5.524392271820841e-07, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8179 + }, + { + "epoch": 0.786651920950137, + "grad_norm": 1.5184825868968523, + "learning_rate": 5.519610352980501e-07, + "loss": 0.0926, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8180 + }, + { + "epoch": 0.7867480886666346, + "grad_norm": 2.3043102411280225, + "learning_rate": 5.514830247818697e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8181 + }, + { + "epoch": 0.7868442563831322, + "grad_norm": 2.713073158649014, + "learning_rate": 5.510051956780476e-07, + "loss": 0.1482, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8182 + }, + { + "epoch": 0.7869404240996297, + "grad_norm": 1.6664302542175902, + "learning_rate": 5.505275480310701e-07, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8183 + }, + { + "epoch": 0.7870365918161273, + "grad_norm": 2.053578799052997, + "learning_rate": 5.500500818854079e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8184 + }, + { + "epoch": 0.7871327595326248, + "grad_norm": 2.4991753887855523, + "learning_rate": 5.495727972855145e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8185 + }, + { + "epoch": 0.7872289272491225, + "grad_norm": 1.8310896847248708, + "learning_rate": 5.490956942758263e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8186 + }, + { + "epoch": 0.7873250949656201, + "grad_norm": 1.5581846215456603, + "learning_rate": 5.486187729007639e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8187 + }, + { + "epoch": 0.7874212626821177, + "grad_norm": 2.3204845659126674, + "learning_rate": 5.481420332047285e-07, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8188 + }, + { + "epoch": 0.7875174303986152, + "grad_norm": 1.633269238600668, + "learning_rate": 5.476654752321065e-07, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8189 + }, + { + "epoch": 0.7876135981151128, + "grad_norm": 3.700352400349776, + "learning_rate": 5.471890990272666e-07, + "loss": 0.158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8190 + }, + { + "epoch": 0.7877097658316103, + "grad_norm": 1.9739153164475773, + "learning_rate": 5.46712904634561e-07, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8191 + }, + { + "epoch": 0.7878059335481079, + "grad_norm": 1.593820877973687, + "learning_rate": 5.462368920983249e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8192 + }, + { + "epoch": 0.7879021012646055, + "grad_norm": 1.9565928366691636, + "learning_rate": 5.457610614628766e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8193 + }, + { + "epoch": 0.787998268981103, + "grad_norm": 2.5446894952111094, + "learning_rate": 5.452854127725163e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8194 + }, + { + "epoch": 0.7880944366976006, + "grad_norm": 1.8595581313071492, + "learning_rate": 5.448099460715289e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8195 + }, + { + "epoch": 0.7881906044140982, + "grad_norm": 2.6078361435864554, + "learning_rate": 5.443346614041814e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8196 + }, + { + "epoch": 0.7882867721305957, + "grad_norm": 1.9630534289973791, + "learning_rate": 5.438595588147241e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8197 + }, + { + "epoch": 0.7883829398470933, + "grad_norm": 1.5915650774813512, + "learning_rate": 5.433846383473907e-07, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8198 + }, + { + "epoch": 0.7884791075635909, + "grad_norm": 2.3644363010026637, + "learning_rate": 5.429099000463983e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8199 + }, + { + "epoch": 0.7885752752800885, + "grad_norm": 1.3360537700191135, + "learning_rate": 5.424353439559446e-07, + "loss": 0.0836, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8200 + }, + { + "epoch": 0.7886714429965861, + "grad_norm": 1.6142266006522508, + "learning_rate": 5.419609701202131e-07, + "loss": 0.0847, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8201 + }, + { + "epoch": 0.7887676107130837, + "grad_norm": 1.9308621822312677, + "learning_rate": 5.414867785833691e-07, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8202 + }, + { + "epoch": 0.7888637784295812, + "grad_norm": 1.7156368369138062, + "learning_rate": 5.41012769389562e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8203 + }, + { + "epoch": 0.7889599461460788, + "grad_norm": 1.8994734118628271, + "learning_rate": 5.405389425829219e-07, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8204 + }, + { + "epoch": 0.7890561138625763, + "grad_norm": 1.45710709544667, + "learning_rate": 5.400652982075644e-07, + "loss": 0.0977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8205 + }, + { + "epoch": 0.7891522815790739, + "grad_norm": 1.9809434206253431, + "learning_rate": 5.395918363075867e-07, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8206 + }, + { + "epoch": 0.7892484492955715, + "grad_norm": 2.201369223533341, + "learning_rate": 5.391185569270702e-07, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8207 + }, + { + "epoch": 0.789344617012069, + "grad_norm": 2.0159320337882995, + "learning_rate": 5.386454601100774e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8208 + }, + { + "epoch": 0.7894407847285666, + "grad_norm": 2.5865497346093163, + "learning_rate": 5.381725459006553e-07, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8209 + }, + { + "epoch": 0.7895369524450642, + "grad_norm": 1.8264600239251623, + "learning_rate": 5.376998143428344e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8210 + }, + { + "epoch": 0.7896331201615617, + "grad_norm": 2.410878954889446, + "learning_rate": 5.37227265480626e-07, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8211 + }, + { + "epoch": 0.7897292878780593, + "grad_norm": 1.6766360467079622, + "learning_rate": 5.367548993580263e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8212 + }, + { + "epoch": 0.7898254555945569, + "grad_norm": 1.8687794653927632, + "learning_rate": 5.362827160190137e-07, + "loss": 0.1197, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8213 + }, + { + "epoch": 0.7899216233110545, + "grad_norm": 1.7986267730401304, + "learning_rate": 5.358107155075503e-07, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8214 + }, + { + "epoch": 0.7900177910275521, + "grad_norm": 1.4150478600422427, + "learning_rate": 5.353388978675809e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8215 + }, + { + "epoch": 0.7901139587440497, + "grad_norm": 1.5193543697483416, + "learning_rate": 5.348672631430319e-07, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8216 + }, + { + "epoch": 0.7902101264605472, + "grad_norm": 2.3026001079962883, + "learning_rate": 5.343958113778141e-07, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8217 + }, + { + "epoch": 0.7903062941770448, + "grad_norm": 1.6221670170581084, + "learning_rate": 5.339245426158218e-07, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8218 + }, + { + "epoch": 0.7904024618935424, + "grad_norm": 1.656174100795375, + "learning_rate": 5.334534569009306e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8219 + }, + { + "epoch": 0.7904986296100399, + "grad_norm": 1.448029073800559, + "learning_rate": 5.32982554277e-07, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8220 + }, + { + "epoch": 0.7905947973265375, + "grad_norm": 1.9318006907940117, + "learning_rate": 5.325118347878735e-07, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8221 + }, + { + "epoch": 0.790690965043035, + "grad_norm": 1.4077418171845737, + "learning_rate": 5.320412984773749e-07, + "loss": 0.0765, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8222 + }, + { + "epoch": 0.7907871327595326, + "grad_norm": 1.525475858922743, + "learning_rate": 5.315709453893128e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8223 + }, + { + "epoch": 0.7908833004760302, + "grad_norm": 2.178425341422641, + "learning_rate": 5.311007755674786e-07, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8224 + }, + { + "epoch": 0.7909794681925277, + "grad_norm": 1.5013319848091184, + "learning_rate": 5.306307890556464e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8225 + }, + { + "epoch": 0.7910756359090253, + "grad_norm": 1.6333761990701736, + "learning_rate": 5.301609858975731e-07, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8226 + }, + { + "epoch": 0.7911718036255229, + "grad_norm": 1.5732400003396028, + "learning_rate": 5.296913661369999e-07, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8227 + }, + { + "epoch": 0.7912679713420205, + "grad_norm": 1.7917008162289414, + "learning_rate": 5.292219298176477e-07, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8228 + }, + { + "epoch": 0.7913641390585181, + "grad_norm": 1.8882134698846007, + "learning_rate": 5.287526769832232e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8229 + }, + { + "epoch": 0.7914603067750157, + "grad_norm": 1.6670361392638853, + "learning_rate": 5.282836076774154e-07, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8230 + }, + { + "epoch": 0.7915564744915132, + "grad_norm": 1.9768139453895763, + "learning_rate": 5.278147219438953e-07, + "loss": 0.1511, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8231 + }, + { + "epoch": 0.7916526422080108, + "grad_norm": 1.5631082587791734, + "learning_rate": 5.273460198263192e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8232 + }, + { + "epoch": 0.7917488099245084, + "grad_norm": 1.641193663377553, + "learning_rate": 5.268775013683222e-07, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8233 + }, + { + "epoch": 0.7918449776410059, + "grad_norm": 2.1007375994344497, + "learning_rate": 5.264091666135259e-07, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8234 + }, + { + "epoch": 0.7919411453575035, + "grad_norm": 1.5942933786242477, + "learning_rate": 5.259410156055336e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8235 + }, + { + "epoch": 0.792037313074001, + "grad_norm": 1.3851886935350912, + "learning_rate": 5.254730483879311e-07, + "loss": 0.0814, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8236 + }, + { + "epoch": 0.7921334807904986, + "grad_norm": 1.91033034805867, + "learning_rate": 5.250052650042886e-07, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8237 + }, + { + "epoch": 0.7922296485069962, + "grad_norm": 1.6982341447394957, + "learning_rate": 5.245376654981562e-07, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8238 + }, + { + "epoch": 0.7923258162234937, + "grad_norm": 2.038236117504251, + "learning_rate": 5.240702499130698e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8239 + }, + { + "epoch": 0.7924219839399913, + "grad_norm": 2.0643365219021153, + "learning_rate": 5.236030182925475e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8240 + }, + { + "epoch": 0.7925181516564889, + "grad_norm": 2.1066450536691868, + "learning_rate": 5.231359706800887e-07, + "loss": 0.1563, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8241 + }, + { + "epoch": 0.7926143193729865, + "grad_norm": 1.7768725300774448, + "learning_rate": 5.226691071191773e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8242 + }, + { + "epoch": 0.7927104870894841, + "grad_norm": 1.984409689177098, + "learning_rate": 5.222024276532803e-07, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8243 + }, + { + "epoch": 0.7928066548059817, + "grad_norm": 1.6851034188503713, + "learning_rate": 5.217359323258459e-07, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8244 + }, + { + "epoch": 0.7929028225224792, + "grad_norm": 1.7115094231280141, + "learning_rate": 5.212696211803061e-07, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8245 + }, + { + "epoch": 0.7929989902389768, + "grad_norm": 1.682669715840196, + "learning_rate": 5.208034942600765e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8246 + }, + { + "epoch": 0.7930951579554744, + "grad_norm": 1.891756634800181, + "learning_rate": 5.203375516085541e-07, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8247 + }, + { + "epoch": 0.7931913256719719, + "grad_norm": 1.608375698918847, + "learning_rate": 5.1987179326912e-07, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8248 + }, + { + "epoch": 0.7932874933884695, + "grad_norm": 1.9598547470246328, + "learning_rate": 5.194062192851381e-07, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8249 + }, + { + "epoch": 0.793383661104967, + "grad_norm": 2.691606974967217, + "learning_rate": 5.189408296999535e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8250 + }, + { + "epoch": 0.7934798288214646, + "grad_norm": 1.7880307809113987, + "learning_rate": 5.184756245568953e-07, + "loss": 0.1397, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8251 + }, + { + "epoch": 0.7935759965379622, + "grad_norm": 1.8169488232719282, + "learning_rate": 5.18010603899276e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8252 + }, + { + "epoch": 0.7936721642544597, + "grad_norm": 3.900787872628502, + "learning_rate": 5.175457677703901e-07, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8253 + }, + { + "epoch": 0.7937683319709573, + "grad_norm": 1.6815126812550576, + "learning_rate": 5.170811162135153e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8254 + }, + { + "epoch": 0.7938644996874549, + "grad_norm": 1.7413078569490044, + "learning_rate": 5.166166492719124e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8255 + }, + { + "epoch": 0.7939606674039525, + "grad_norm": 2.067006368289671, + "learning_rate": 5.161523669888235e-07, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8256 + }, + { + "epoch": 0.7940568351204501, + "grad_norm": 1.8313135159849498, + "learning_rate": 5.156882694074749e-07, + "loss": 0.1487, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8257 + }, + { + "epoch": 0.7941530028369477, + "grad_norm": 1.3171484614070614, + "learning_rate": 5.152243565710754e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8258 + }, + { + "epoch": 0.7942491705534452, + "grad_norm": 2.3057735939994797, + "learning_rate": 5.147606285228172e-07, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8259 + }, + { + "epoch": 0.7943453382699428, + "grad_norm": 1.6816061638013564, + "learning_rate": 5.142970853058743e-07, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8260 + }, + { + "epoch": 0.7944415059864404, + "grad_norm": 1.691141453808281, + "learning_rate": 5.138337269634033e-07, + "loss": 0.0936, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8261 + }, + { + "epoch": 0.7945376737029379, + "grad_norm": 1.728980411029731, + "learning_rate": 5.133705535385447e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8262 + }, + { + "epoch": 0.7946338414194355, + "grad_norm": 1.830799855668586, + "learning_rate": 5.129075650744212e-07, + "loss": 0.0903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8263 + }, + { + "epoch": 0.7947300091359331, + "grad_norm": 2.3179539371429176, + "learning_rate": 5.124447616141382e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8264 + }, + { + "epoch": 0.7948261768524306, + "grad_norm": 1.795915849842931, + "learning_rate": 5.11982143200784e-07, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8265 + }, + { + "epoch": 0.7949223445689282, + "grad_norm": 1.604625389012718, + "learning_rate": 5.115197098774302e-07, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8266 + }, + { + "epoch": 0.7950185122854257, + "grad_norm": 1.7919426927037545, + "learning_rate": 5.110574616871297e-07, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8267 + }, + { + "epoch": 0.7951146800019233, + "grad_norm": 1.7685281664879993, + "learning_rate": 5.105953986729196e-07, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8268 + }, + { + "epoch": 0.7952108477184209, + "grad_norm": 1.8158741399504976, + "learning_rate": 5.101335208778191e-07, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8269 + }, + { + "epoch": 0.7953070154349186, + "grad_norm": 1.6833918618699384, + "learning_rate": 5.096718283448313e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8270 + }, + { + "epoch": 0.7954031831514161, + "grad_norm": 1.8039025276680007, + "learning_rate": 5.092103211169391e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8271 + }, + { + "epoch": 0.7954993508679137, + "grad_norm": 2.233896555686281, + "learning_rate": 5.087489992371114e-07, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8272 + }, + { + "epoch": 0.7955955185844112, + "grad_norm": 2.0674806463833084, + "learning_rate": 5.08287862748299e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8273 + }, + { + "epoch": 0.7956916863009088, + "grad_norm": 1.9496537382036092, + "learning_rate": 5.078269116934334e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8274 + }, + { + "epoch": 0.7957878540174064, + "grad_norm": 1.6929890824546854, + "learning_rate": 5.073661461154317e-07, + "loss": 0.091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8275 + }, + { + "epoch": 0.7958840217339039, + "grad_norm": 2.1200481686826755, + "learning_rate": 5.06905566057192e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8276 + }, + { + "epoch": 0.7959801894504015, + "grad_norm": 2.395968332661139, + "learning_rate": 5.064451715615962e-07, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8277 + }, + { + "epoch": 0.7960763571668991, + "grad_norm": 1.9669106782552521, + "learning_rate": 5.059849626715072e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8278 + }, + { + "epoch": 0.7961725248833966, + "grad_norm": 1.75483049905707, + "learning_rate": 5.055249394297724e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8279 + }, + { + "epoch": 0.7962686925998942, + "grad_norm": 2.3700371427256437, + "learning_rate": 5.050651018792213e-07, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8280 + }, + { + "epoch": 0.7963648603163918, + "grad_norm": 1.7229917201115041, + "learning_rate": 5.046054500626657e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8281 + }, + { + "epoch": 0.7964610280328893, + "grad_norm": 1.6360866813500063, + "learning_rate": 5.041459840229007e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8282 + }, + { + "epoch": 0.7965571957493869, + "grad_norm": 2.138402920246977, + "learning_rate": 5.036867038027046e-07, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8283 + }, + { + "epoch": 0.7966533634658846, + "grad_norm": 2.5294850920064387, + "learning_rate": 5.032276094448363e-07, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8284 + }, + { + "epoch": 0.7967495311823821, + "grad_norm": 1.934312370694879, + "learning_rate": 5.027687009920393e-07, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8285 + }, + { + "epoch": 0.7968456988988797, + "grad_norm": 2.0873141583037422, + "learning_rate": 5.023099784870394e-07, + "loss": 0.1311, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8286 + }, + { + "epoch": 0.7969418666153772, + "grad_norm": 1.8773961464247402, + "learning_rate": 5.018514419725451e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8287 + }, + { + "epoch": 0.7970380343318748, + "grad_norm": 1.793594266448337, + "learning_rate": 5.013930914912477e-07, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8288 + }, + { + "epoch": 0.7971342020483724, + "grad_norm": 2.418651001176164, + "learning_rate": 5.009349270858197e-07, + "loss": 0.1285, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8289 + }, + { + "epoch": 0.7972303697648699, + "grad_norm": 1.9613136583583748, + "learning_rate": 5.004769487989183e-07, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8290 + }, + { + "epoch": 0.7973265374813675, + "grad_norm": 1.661397959501782, + "learning_rate": 5.000191566731824e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8291 + }, + { + "epoch": 0.7974227051978651, + "grad_norm": 2.2118686675841968, + "learning_rate": 4.995615507512339e-07, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8292 + }, + { + "epoch": 0.7975188729143626, + "grad_norm": 2.058027446847352, + "learning_rate": 4.99104131075677e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8293 + }, + { + "epoch": 0.7976150406308602, + "grad_norm": 2.0350640995139937, + "learning_rate": 4.986468976890993e-07, + "loss": 0.0911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8294 + }, + { + "epoch": 0.7977112083473578, + "grad_norm": 2.077299570727592, + "learning_rate": 4.981898506340696e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8295 + }, + { + "epoch": 0.7978073760638553, + "grad_norm": 1.956698940734242, + "learning_rate": 4.977329899531405e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8296 + }, + { + "epoch": 0.7979035437803529, + "grad_norm": 1.8342835800927038, + "learning_rate": 4.972763156888472e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8297 + }, + { + "epoch": 0.7979997114968506, + "grad_norm": 1.1450982518596398, + "learning_rate": 4.968198278837074e-07, + "loss": 0.07, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8298 + }, + { + "epoch": 0.7980958792133481, + "grad_norm": 1.818033305216275, + "learning_rate": 4.963635265802219e-07, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8299 + }, + { + "epoch": 0.7981920469298457, + "grad_norm": 2.0441459690277606, + "learning_rate": 4.959074118208726e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8300 + }, + { + "epoch": 0.7982882146463433, + "grad_norm": 1.6945544270962751, + "learning_rate": 4.954514836481253e-07, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8301 + }, + { + "epoch": 0.7983843823628408, + "grad_norm": 1.9541016951225518, + "learning_rate": 4.949957421044283e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8302 + }, + { + "epoch": 0.7984805500793384, + "grad_norm": 1.6825644205187493, + "learning_rate": 4.945401872322131e-07, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8303 + }, + { + "epoch": 0.7985767177958359, + "grad_norm": 1.504557076969137, + "learning_rate": 4.94084819073892e-07, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8304 + }, + { + "epoch": 0.7986728855123335, + "grad_norm": 2.8061978468872053, + "learning_rate": 4.936296376718616e-07, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8305 + }, + { + "epoch": 0.7987690532288311, + "grad_norm": 1.5577886529995655, + "learning_rate": 4.931746430685003e-07, + "loss": 0.0928, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8306 + }, + { + "epoch": 0.7988652209453286, + "grad_norm": 1.7881375593615179, + "learning_rate": 4.927198353061705e-07, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8307 + }, + { + "epoch": 0.7989613886618262, + "grad_norm": 1.5259558357983865, + "learning_rate": 4.922652144272141e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8308 + }, + { + "epoch": 0.7990575563783238, + "grad_norm": 2.882974666788037, + "learning_rate": 4.918107804739589e-07, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8309 + }, + { + "epoch": 0.7991537240948213, + "grad_norm": 1.9527053844902755, + "learning_rate": 4.913565334887135e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8310 + }, + { + "epoch": 0.7992498918113189, + "grad_norm": 2.3646798924131005, + "learning_rate": 4.909024735137705e-07, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8311 + }, + { + "epoch": 0.7993460595278166, + "grad_norm": 1.4790734701779755, + "learning_rate": 4.904486005914027e-07, + "loss": 0.0845, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8312 + }, + { + "epoch": 0.7994422272443141, + "grad_norm": 1.9675289313080346, + "learning_rate": 4.899949147638675e-07, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8313 + }, + { + "epoch": 0.7995383949608117, + "grad_norm": 1.663553569339374, + "learning_rate": 4.895414160734046e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8314 + }, + { + "epoch": 0.7996345626773093, + "grad_norm": 2.2176690882552053, + "learning_rate": 4.890881045622359e-07, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8315 + }, + { + "epoch": 0.7997307303938068, + "grad_norm": 1.962595581640065, + "learning_rate": 4.886349802725662e-07, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8316 + }, + { + "epoch": 0.7998268981103044, + "grad_norm": 1.789969289459987, + "learning_rate": 4.88182043246582e-07, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8317 + }, + { + "epoch": 0.799923065826802, + "grad_norm": 2.032469421126099, + "learning_rate": 4.87729293526453e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8318 + }, + { + "epoch": 0.8000192335432995, + "grad_norm": 2.305944365447675, + "learning_rate": 4.872767311543319e-07, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8319 + }, + { + "epoch": 0.8001154012597971, + "grad_norm": 1.8868059610946986, + "learning_rate": 4.868243561723535e-07, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8320 + }, + { + "epoch": 0.8002115689762946, + "grad_norm": 1.645421461019952, + "learning_rate": 4.86372168622635e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8321 + }, + { + "epoch": 0.8003077366927922, + "grad_norm": 2.3726408947213264, + "learning_rate": 4.85920168547277e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8322 + }, + { + "epoch": 0.8004039044092898, + "grad_norm": 1.959532181679734, + "learning_rate": 4.854683559883607e-07, + "loss": 0.1411, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8323 + }, + { + "epoch": 0.8005000721257873, + "grad_norm": 1.74809243190127, + "learning_rate": 4.85016730987952e-07, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8324 + }, + { + "epoch": 0.8005962398422849, + "grad_norm": 1.9390278711116358, + "learning_rate": 4.845652935880979e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8325 + }, + { + "epoch": 0.8006924075587826, + "grad_norm": 2.160040918021564, + "learning_rate": 4.84114043830829e-07, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8326 + }, + { + "epoch": 0.8007885752752801, + "grad_norm": 2.018082803643542, + "learning_rate": 4.836629817581581e-07, + "loss": 0.1438, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8327 + }, + { + "epoch": 0.8008847429917777, + "grad_norm": 1.8592861055791499, + "learning_rate": 4.832121074120794e-07, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8328 + }, + { + "epoch": 0.8009809107082753, + "grad_norm": 1.6508476661887082, + "learning_rate": 4.82761420834571e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8329 + }, + { + "epoch": 0.8010770784247728, + "grad_norm": 2.237000363357904, + "learning_rate": 4.823109220675934e-07, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8330 + }, + { + "epoch": 0.8011732461412704, + "grad_norm": 1.8960971038769587, + "learning_rate": 4.818606111530888e-07, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8331 + }, + { + "epoch": 0.801269413857768, + "grad_norm": 1.7917114707735082, + "learning_rate": 4.814104881329829e-07, + "loss": 0.0759, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8332 + }, + { + "epoch": 0.8013655815742655, + "grad_norm": 1.9508527106226068, + "learning_rate": 4.809605530491837e-07, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8333 + }, + { + "epoch": 0.8014617492907631, + "grad_norm": 1.750720897522112, + "learning_rate": 4.805108059435801e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8334 + }, + { + "epoch": 0.8015579170072606, + "grad_norm": 1.5578851789323471, + "learning_rate": 4.800612468580456e-07, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8335 + }, + { + "epoch": 0.8016540847237582, + "grad_norm": 1.8180681420745075, + "learning_rate": 4.796118758344354e-07, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8336 + }, + { + "epoch": 0.8017502524402558, + "grad_norm": 1.8369228497250698, + "learning_rate": 4.791626929145879e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8337 + }, + { + "epoch": 0.8018464201567533, + "grad_norm": 2.6434591823837668, + "learning_rate": 4.787136981403218e-07, + "loss": 0.142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8338 + }, + { + "epoch": 0.8019425878732509, + "grad_norm": 1.7581464044717872, + "learning_rate": 4.782648915534408e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8339 + }, + { + "epoch": 0.8020387555897486, + "grad_norm": 1.7823128531794854, + "learning_rate": 4.7781627319573e-07, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8340 + }, + { + "epoch": 0.8021349233062461, + "grad_norm": 1.9969088490710818, + "learning_rate": 4.773678431089565e-07, + "loss": 0.1423, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8341 + }, + { + "epoch": 0.8022310910227437, + "grad_norm": 1.8113945802664633, + "learning_rate": 4.769196013348703e-07, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8342 + }, + { + "epoch": 0.8023272587392413, + "grad_norm": 1.7935217458233221, + "learning_rate": 4.764715479152046e-07, + "loss": 0.1399, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8343 + }, + { + "epoch": 0.8024234264557388, + "grad_norm": 1.7204699356958955, + "learning_rate": 4.7602368289167477e-07, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8344 + }, + { + "epoch": 0.8025195941722364, + "grad_norm": 1.8237446250585228, + "learning_rate": 4.7557600630597693e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8345 + }, + { + "epoch": 0.802615761888734, + "grad_norm": 1.8325403219594838, + "learning_rate": 4.7512851819979196e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8346 + }, + { + "epoch": 0.8027119296052315, + "grad_norm": 2.4228363570979745, + "learning_rate": 4.7468121861478174e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8347 + }, + { + "epoch": 0.8028080973217291, + "grad_norm": 1.8852011963946778, + "learning_rate": 4.742341075925916e-07, + "loss": 0.1287, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8348 + }, + { + "epoch": 0.8029042650382266, + "grad_norm": 2.044206967881725, + "learning_rate": 4.7378718517484853e-07, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8349 + }, + { + "epoch": 0.8030004327547242, + "grad_norm": 1.694663693189138, + "learning_rate": 4.733404514031631e-07, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8350 + }, + { + "epoch": 0.8030966004712218, + "grad_norm": 1.9340375003006485, + "learning_rate": 4.7289390631912626e-07, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8351 + }, + { + "epoch": 0.8031927681877193, + "grad_norm": 2.842512047461323, + "learning_rate": 4.7244754996431273e-07, + "loss": 0.1469, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8352 + }, + { + "epoch": 0.8032889359042169, + "grad_norm": 1.8680974514011794, + "learning_rate": 4.720013823802799e-07, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8353 + }, + { + "epoch": 0.8033851036207146, + "grad_norm": 1.803061542908901, + "learning_rate": 4.715554036085673e-07, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8354 + }, + { + "epoch": 0.8034812713372121, + "grad_norm": 1.4931997168531859, + "learning_rate": 4.7110961369069757e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8355 + }, + { + "epoch": 0.8035774390537097, + "grad_norm": 2.7077095126232407, + "learning_rate": 4.70664012668173e-07, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8356 + }, + { + "epoch": 0.8036736067702073, + "grad_norm": 1.9452131135278286, + "learning_rate": 4.7021860058248157e-07, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8357 + }, + { + "epoch": 0.8037697744867048, + "grad_norm": 1.7100154483278516, + "learning_rate": 4.6977337747509233e-07, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8358 + }, + { + "epoch": 0.8038659422032024, + "grad_norm": 1.628582692279275, + "learning_rate": 4.6932834338745654e-07, + "loss": 0.1185, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8359 + }, + { + "epoch": 0.8039621099197, + "grad_norm": 1.8441165001306343, + "learning_rate": 4.688834983610083e-07, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8360 + }, + { + "epoch": 0.8040582776361975, + "grad_norm": 1.5199479506094906, + "learning_rate": 4.684388424371644e-07, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8361 + }, + { + "epoch": 0.8041544453526951, + "grad_norm": 1.906341173671682, + "learning_rate": 4.6799437565732243e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8362 + }, + { + "epoch": 0.8042506130691927, + "grad_norm": 1.8222196228931955, + "learning_rate": 4.6755009806286415e-07, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8363 + }, + { + "epoch": 0.8043467807856902, + "grad_norm": 2.423761259559869, + "learning_rate": 4.6710600969515267e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8364 + }, + { + "epoch": 0.8044429485021878, + "grad_norm": 1.7663243610230595, + "learning_rate": 4.6666211059553436e-07, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8365 + }, + { + "epoch": 0.8045391162186853, + "grad_norm": 1.63893687570856, + "learning_rate": 4.6621840080533804e-07, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8366 + }, + { + "epoch": 0.8046352839351829, + "grad_norm": 1.550609137244736, + "learning_rate": 4.6577488036587264e-07, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8367 + }, + { + "epoch": 0.8047314516516806, + "grad_norm": 1.7045019230936922, + "learning_rate": 4.653315493184321e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8368 + }, + { + "epoch": 0.8048276193681781, + "grad_norm": 1.8369676370412522, + "learning_rate": 4.6488840770429145e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8369 + }, + { + "epoch": 0.8049237870846757, + "grad_norm": 1.8133147454237515, + "learning_rate": 4.644454555647096e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8370 + }, + { + "epoch": 0.8050199548011733, + "grad_norm": 1.2733664529806712, + "learning_rate": 4.6400269294092496e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8371 + }, + { + "epoch": 0.8051161225176708, + "grad_norm": 2.013146438768265, + "learning_rate": 4.635601198741607e-07, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8372 + }, + { + "epoch": 0.8052122902341684, + "grad_norm": 1.8328867636363972, + "learning_rate": 4.6311773640562245e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8373 + }, + { + "epoch": 0.805308457950666, + "grad_norm": 1.8508141218476617, + "learning_rate": 4.626755425764956e-07, + "loss": 0.0896, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8374 + }, + { + "epoch": 0.8054046256671635, + "grad_norm": 1.7985618242680252, + "learning_rate": 4.6223353842795053e-07, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8375 + }, + { + "epoch": 0.8055007933836611, + "grad_norm": 1.5896016225412517, + "learning_rate": 4.617917240011394e-07, + "loss": 0.0897, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8376 + }, + { + "epoch": 0.8055969611001587, + "grad_norm": 1.5788878741964592, + "learning_rate": 4.6135009933719553e-07, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8377 + }, + { + "epoch": 0.8056931288166562, + "grad_norm": 1.7596118728194892, + "learning_rate": 4.6090866447723677e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8378 + }, + { + "epoch": 0.8057892965331538, + "grad_norm": 2.2427145366770307, + "learning_rate": 4.604674194623604e-07, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8379 + }, + { + "epoch": 0.8058854642496514, + "grad_norm": 2.0806397094724787, + "learning_rate": 4.6002636433364836e-07, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8380 + }, + { + "epoch": 0.8059816319661489, + "grad_norm": 1.7359945402556054, + "learning_rate": 4.595854991321638e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8381 + }, + { + "epoch": 0.8060777996826466, + "grad_norm": 1.911734266383534, + "learning_rate": 4.5914482389895267e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8382 + }, + { + "epoch": 0.8061739673991442, + "grad_norm": 1.921431740743921, + "learning_rate": 4.5870433867504366e-07, + "loss": 0.1023, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8383 + }, + { + "epoch": 0.8062701351156417, + "grad_norm": 1.6964309734926424, + "learning_rate": 4.5826404350144597e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8384 + }, + { + "epoch": 0.8063663028321393, + "grad_norm": 1.8192757231522831, + "learning_rate": 4.57823938419153e-07, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8385 + }, + { + "epoch": 0.8064624705486368, + "grad_norm": 2.763008304100484, + "learning_rate": 4.573840234691396e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8386 + }, + { + "epoch": 0.8065586382651344, + "grad_norm": 1.4706870198331068, + "learning_rate": 4.569442986923631e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8387 + }, + { + "epoch": 0.806654805981632, + "grad_norm": 1.6802858917076062, + "learning_rate": 4.565047641297632e-07, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8388 + }, + { + "epoch": 0.8067509736981295, + "grad_norm": 1.4741236628977141, + "learning_rate": 4.560654198222622e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8389 + }, + { + "epoch": 0.8068471414146271, + "grad_norm": 1.3563806393884728, + "learning_rate": 4.5562626581076354e-07, + "loss": 0.0929, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8390 + }, + { + "epoch": 0.8069433091311247, + "grad_norm": 3.3391219795419818, + "learning_rate": 4.551873021361536e-07, + "loss": 0.1372, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8391 + }, + { + "epoch": 0.8070394768476222, + "grad_norm": 1.7473685981430245, + "learning_rate": 4.5474852883930163e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8392 + }, + { + "epoch": 0.8071356445641198, + "grad_norm": 1.3866015739657536, + "learning_rate": 4.543099459610586e-07, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8393 + }, + { + "epoch": 0.8072318122806174, + "grad_norm": 1.8072187426594746, + "learning_rate": 4.538715535422583e-07, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8394 + }, + { + "epoch": 0.8073279799971149, + "grad_norm": 1.5566749788485024, + "learning_rate": 4.534333516237149e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8395 + }, + { + "epoch": 0.8074241477136126, + "grad_norm": 2.1532300068088905, + "learning_rate": 4.52995340246227e-07, + "loss": 0.1281, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8396 + }, + { + "epoch": 0.8075203154301102, + "grad_norm": 2.253154728514516, + "learning_rate": 4.5255751945057465e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8397 + }, + { + "epoch": 0.8076164831466077, + "grad_norm": 1.370941452282033, + "learning_rate": 4.5211988927752026e-07, + "loss": 0.0877, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8398 + }, + { + "epoch": 0.8077126508631053, + "grad_norm": 1.5594023243468214, + "learning_rate": 4.516824497678085e-07, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8399 + }, + { + "epoch": 0.8078088185796028, + "grad_norm": 1.5606871923404073, + "learning_rate": 4.512452009621665e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8400 + }, + { + "epoch": 0.8079049862961004, + "grad_norm": 1.5741487011709432, + "learning_rate": 4.5080814290130254e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8401 + }, + { + "epoch": 0.808001154012598, + "grad_norm": 2.4662864848415254, + "learning_rate": 4.503712756259082e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8402 + }, + { + "epoch": 0.8080973217290955, + "grad_norm": 2.09197099290383, + "learning_rate": 4.499345991766577e-07, + "loss": 0.1313, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8403 + }, + { + "epoch": 0.8081934894455931, + "grad_norm": 1.551759322904464, + "learning_rate": 4.494981135942056e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8404 + }, + { + "epoch": 0.8082896571620907, + "grad_norm": 1.746214905241482, + "learning_rate": 4.490618189191909e-07, + "loss": 0.0885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8405 + }, + { + "epoch": 0.8083858248785882, + "grad_norm": 1.7695980292832363, + "learning_rate": 4.4862571519223406e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8406 + }, + { + "epoch": 0.8084819925950858, + "grad_norm": 1.4499430612987716, + "learning_rate": 4.4818980245393635e-07, + "loss": 0.0897, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8407 + }, + { + "epoch": 0.8085781603115834, + "grad_norm": 2.081456441619079, + "learning_rate": 4.4775408074488326e-07, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8408 + }, + { + "epoch": 0.8086743280280809, + "grad_norm": 1.8361547173396622, + "learning_rate": 4.473185501056415e-07, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8409 + }, + { + "epoch": 0.8087704957445786, + "grad_norm": 1.4254644694619272, + "learning_rate": 4.468832105767604e-07, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8410 + }, + { + "epoch": 0.8088666634610762, + "grad_norm": 1.498330953631033, + "learning_rate": 4.4644806219877183e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8411 + }, + { + "epoch": 0.8089628311775737, + "grad_norm": 2.2750182574534623, + "learning_rate": 4.4601310501218795e-07, + "loss": 0.1503, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8412 + }, + { + "epoch": 0.8090589988940713, + "grad_norm": 2.4106271658376914, + "learning_rate": 4.4557833905750523e-07, + "loss": 0.1477, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8413 + }, + { + "epoch": 0.8091551666105689, + "grad_norm": 2.975595537974761, + "learning_rate": 4.451437643752013e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8414 + }, + { + "epoch": 0.8092513343270664, + "grad_norm": 2.968219143104629, + "learning_rate": 4.447093810057368e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8415 + }, + { + "epoch": 0.809347502043564, + "grad_norm": 1.9094746343647717, + "learning_rate": 4.4427518898955383e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8416 + }, + { + "epoch": 0.8094436697600615, + "grad_norm": 1.92681440431072, + "learning_rate": 4.4384118836707735e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8417 + }, + { + "epoch": 0.8095398374765591, + "grad_norm": 1.727645249723718, + "learning_rate": 4.434073791787127e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8418 + }, + { + "epoch": 0.8096360051930567, + "grad_norm": 1.3526945446057839, + "learning_rate": 4.4297376146484966e-07, + "loss": 0.0866, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8419 + }, + { + "epoch": 0.8097321729095542, + "grad_norm": 2.0692849640349626, + "learning_rate": 4.4254033526585917e-07, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8420 + }, + { + "epoch": 0.8098283406260518, + "grad_norm": 1.7714675657345718, + "learning_rate": 4.4210710062209424e-07, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8421 + }, + { + "epoch": 0.8099245083425494, + "grad_norm": 1.7019110900885583, + "learning_rate": 4.4167405757389093e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8422 + }, + { + "epoch": 0.8100206760590469, + "grad_norm": 1.925553193619664, + "learning_rate": 4.412412061615654e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8423 + }, + { + "epoch": 0.8101168437755446, + "grad_norm": 1.6843591219832148, + "learning_rate": 4.4080854642541833e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8424 + }, + { + "epoch": 0.8102130114920422, + "grad_norm": 2.9090715573036015, + "learning_rate": 4.403760784057312e-07, + "loss": 0.0928, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8425 + }, + { + "epoch": 0.8103091792085397, + "grad_norm": 2.1959829748244446, + "learning_rate": 4.399438021427679e-07, + "loss": 0.1596, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8426 + }, + { + "epoch": 0.8104053469250373, + "grad_norm": 1.5193056700054346, + "learning_rate": 4.395117176767749e-07, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8427 + }, + { + "epoch": 0.8105015146415349, + "grad_norm": 1.9716870093022592, + "learning_rate": 4.390798250479808e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8428 + }, + { + "epoch": 0.8105976823580324, + "grad_norm": 2.101038322878728, + "learning_rate": 4.386481242965951e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8429 + }, + { + "epoch": 0.81069385007453, + "grad_norm": 1.5542408819763727, + "learning_rate": 4.382166154628106e-07, + "loss": 0.1182, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8430 + }, + { + "epoch": 0.8107900177910276, + "grad_norm": 1.5442518101277782, + "learning_rate": 4.37785298586802e-07, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8431 + }, + { + "epoch": 0.8108861855075251, + "grad_norm": 1.8601088780104578, + "learning_rate": 4.373541737087264e-07, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8432 + }, + { + "epoch": 0.8109823532240227, + "grad_norm": 2.0492869878503024, + "learning_rate": 4.369232408687224e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8433 + }, + { + "epoch": 0.8110785209405202, + "grad_norm": 1.6478928654001317, + "learning_rate": 4.36492500106912e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8434 + }, + { + "epoch": 0.8111746886570178, + "grad_norm": 1.7477720661474032, + "learning_rate": 4.360619514633968e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8435 + }, + { + "epoch": 0.8112708563735154, + "grad_norm": 2.1741501337634377, + "learning_rate": 4.3563159497826276e-07, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8436 + }, + { + "epoch": 0.8113670240900129, + "grad_norm": 1.7452826255350737, + "learning_rate": 4.3520143069157803e-07, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8437 + }, + { + "epoch": 0.8114631918065106, + "grad_norm": 1.9363341408905137, + "learning_rate": 4.34771458643391e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8438 + }, + { + "epoch": 0.8115593595230082, + "grad_norm": 2.561509470024795, + "learning_rate": 4.343416788737334e-07, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8439 + }, + { + "epoch": 0.8116555272395057, + "grad_norm": 3.13648483098639, + "learning_rate": 4.3391209142261996e-07, + "loss": 0.1489, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8440 + }, + { + "epoch": 0.8117516949560033, + "grad_norm": 1.5790160142446852, + "learning_rate": 4.334826963300451e-07, + "loss": 0.0891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8441 + }, + { + "epoch": 0.8118478626725009, + "grad_norm": 1.8454841280534786, + "learning_rate": 4.330534936359873e-07, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8442 + }, + { + "epoch": 0.8119440303889984, + "grad_norm": 1.6613817987622583, + "learning_rate": 4.3262448338040665e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8443 + }, + { + "epoch": 0.812040198105496, + "grad_norm": 1.6502476500942678, + "learning_rate": 4.3219566560324497e-07, + "loss": 0.088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8444 + }, + { + "epoch": 0.8121363658219936, + "grad_norm": 1.7774687227094526, + "learning_rate": 4.317670403444271e-07, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8445 + }, + { + "epoch": 0.8122325335384911, + "grad_norm": 1.599791598615498, + "learning_rate": 4.3133860764385804e-07, + "loss": 0.1216, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8446 + }, + { + "epoch": 0.8123287012549887, + "grad_norm": 1.8219581993174307, + "learning_rate": 4.309103675414267e-07, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8447 + }, + { + "epoch": 0.8124248689714862, + "grad_norm": 2.3690935452829343, + "learning_rate": 4.3048232007700363e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8448 + }, + { + "epoch": 0.8125210366879838, + "grad_norm": 1.4061389099910568, + "learning_rate": 4.300544652904406e-07, + "loss": 0.0906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8449 + }, + { + "epoch": 0.8126172044044814, + "grad_norm": 1.8721398070451039, + "learning_rate": 4.2962680322157335e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8450 + }, + { + "epoch": 0.8127133721209789, + "grad_norm": 2.472886907652462, + "learning_rate": 4.291993339102171e-07, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8451 + }, + { + "epoch": 0.8128095398374766, + "grad_norm": 1.88559267438891, + "learning_rate": 4.287720573961707e-07, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8452 + }, + { + "epoch": 0.8129057075539742, + "grad_norm": 1.8524569832476059, + "learning_rate": 4.2834497371921496e-07, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8453 + }, + { + "epoch": 0.8130018752704717, + "grad_norm": 1.5773190426472856, + "learning_rate": 4.279180829191126e-07, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8454 + }, + { + "epoch": 0.8130980429869693, + "grad_norm": 1.5252788491214213, + "learning_rate": 4.2749138503560854e-07, + "loss": 0.0954, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8455 + }, + { + "epoch": 0.8131942107034669, + "grad_norm": 1.824114589911637, + "learning_rate": 4.2706488010842957e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8456 + }, + { + "epoch": 0.8132903784199644, + "grad_norm": 2.7990626870557485, + "learning_rate": 4.266385681772839e-07, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8457 + }, + { + "epoch": 0.813386546136462, + "grad_norm": 1.787554330152776, + "learning_rate": 4.2621244928186254e-07, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8458 + }, + { + "epoch": 0.8134827138529596, + "grad_norm": 1.8806863763426243, + "learning_rate": 4.2578652346183856e-07, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8459 + }, + { + "epoch": 0.8135788815694571, + "grad_norm": 1.5076274697509953, + "learning_rate": 4.2536079075686687e-07, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8460 + }, + { + "epoch": 0.8136750492859547, + "grad_norm": 1.8572734414202232, + "learning_rate": 4.2493525120658437e-07, + "loss": 0.1421, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8461 + }, + { + "epoch": 0.8137712170024523, + "grad_norm": 1.7724617362066366, + "learning_rate": 4.245099048506102e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8462 + }, + { + "epoch": 0.8138673847189498, + "grad_norm": 2.03158099885635, + "learning_rate": 4.240847517285446e-07, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8463 + }, + { + "epoch": 0.8139635524354474, + "grad_norm": 1.657856656799265, + "learning_rate": 4.2365979187997094e-07, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8464 + }, + { + "epoch": 0.8140597201519449, + "grad_norm": 2.3534391549036053, + "learning_rate": 4.232350253444539e-07, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8465 + }, + { + "epoch": 0.8141558878684426, + "grad_norm": 2.00317831797635, + "learning_rate": 4.228104521615406e-07, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8466 + }, + { + "epoch": 0.8142520555849402, + "grad_norm": 1.6644674203149414, + "learning_rate": 4.2238607237076094e-07, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8467 + }, + { + "epoch": 0.8143482233014377, + "grad_norm": 2.067730682711396, + "learning_rate": 4.2196188601162427e-07, + "loss": 0.0881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8468 + }, + { + "epoch": 0.8144443910179353, + "grad_norm": 1.8983063823023496, + "learning_rate": 4.2153789312362386e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8469 + }, + { + "epoch": 0.8145405587344329, + "grad_norm": 1.917355077252027, + "learning_rate": 4.2111409374623604e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8470 + }, + { + "epoch": 0.8146367264509304, + "grad_norm": 1.6615801962811156, + "learning_rate": 4.206904879189158e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8471 + }, + { + "epoch": 0.814732894167428, + "grad_norm": 1.7068582318239127, + "learning_rate": 4.202670756811028e-07, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8472 + }, + { + "epoch": 0.8148290618839256, + "grad_norm": 1.994868578294155, + "learning_rate": 4.198438570722188e-07, + "loss": 0.0927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8473 + }, + { + "epoch": 0.8149252296004231, + "grad_norm": 2.591875667938927, + "learning_rate": 4.1942083213166506e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8474 + }, + { + "epoch": 0.8150213973169207, + "grad_norm": 2.4566984304506807, + "learning_rate": 4.189980008988273e-07, + "loss": 0.1389, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8475 + }, + { + "epoch": 0.8151175650334183, + "grad_norm": 1.4560736490390533, + "learning_rate": 4.185753634130718e-07, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8476 + }, + { + "epoch": 0.8152137327499158, + "grad_norm": 1.929965636578634, + "learning_rate": 4.181529197137477e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8477 + }, + { + "epoch": 0.8153099004664134, + "grad_norm": 1.797928750440933, + "learning_rate": 4.1773066984018645e-07, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8478 + }, + { + "epoch": 0.815406068182911, + "grad_norm": 2.4645055094399755, + "learning_rate": 4.1730861383169897e-07, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8479 + }, + { + "epoch": 0.8155022358994086, + "grad_norm": 1.8490312632566497, + "learning_rate": 4.168867517275807e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8480 + }, + { + "epoch": 0.8155984036159062, + "grad_norm": 1.8451224790763192, + "learning_rate": 4.16465083567108e-07, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8481 + }, + { + "epoch": 0.8156945713324038, + "grad_norm": 1.549480240130278, + "learning_rate": 4.160436093895398e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8482 + }, + { + "epoch": 0.8157907390489013, + "grad_norm": 1.8036349880619624, + "learning_rate": 4.1562232923411603e-07, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8483 + }, + { + "epoch": 0.8158869067653989, + "grad_norm": 1.9098990132645357, + "learning_rate": 4.152012431400601e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8484 + }, + { + "epoch": 0.8159830744818964, + "grad_norm": 1.9167442946429132, + "learning_rate": 4.147803511465748e-07, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8485 + }, + { + "epoch": 0.816079242198394, + "grad_norm": 1.5803547362341006, + "learning_rate": 4.143596532928468e-07, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8486 + }, + { + "epoch": 0.8161754099148916, + "grad_norm": 1.9340983768429911, + "learning_rate": 4.139391496180448e-07, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8487 + }, + { + "epoch": 0.8162715776313891, + "grad_norm": 2.7613816718498683, + "learning_rate": 4.1351884016131826e-07, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8488 + }, + { + "epoch": 0.8163677453478867, + "grad_norm": 1.5808920129632462, + "learning_rate": 4.1309872496179934e-07, + "loss": 0.087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8489 + }, + { + "epoch": 0.8164639130643843, + "grad_norm": 2.192337264440761, + "learning_rate": 4.1267880405860273e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8490 + }, + { + "epoch": 0.8165600807808818, + "grad_norm": 2.12341461489623, + "learning_rate": 4.1225907749082304e-07, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8491 + }, + { + "epoch": 0.8166562484973794, + "grad_norm": 1.8502012911239736, + "learning_rate": 4.118395452975382e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8492 + }, + { + "epoch": 0.816752416213877, + "grad_norm": 2.0622031718027545, + "learning_rate": 4.1142020751780836e-07, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8493 + }, + { + "epoch": 0.8168485839303746, + "grad_norm": 1.88182302287473, + "learning_rate": 4.110010641906745e-07, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8494 + }, + { + "epoch": 0.8169447516468722, + "grad_norm": 1.866403707710691, + "learning_rate": 4.105821153551609e-07, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8495 + }, + { + "epoch": 0.8170409193633698, + "grad_norm": 2.2849676934529533, + "learning_rate": 4.101633610502717e-07, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8496 + }, + { + "epoch": 0.8171370870798673, + "grad_norm": 2.393860559718729, + "learning_rate": 4.097448013149949e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8497 + }, + { + "epoch": 0.8172332547963649, + "grad_norm": 1.8190086254739353, + "learning_rate": 4.09326436188299e-07, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8498 + }, + { + "epoch": 0.8173294225128624, + "grad_norm": 1.742590223476741, + "learning_rate": 4.0890826570913533e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8499 + }, + { + "epoch": 0.81742559022936, + "grad_norm": 3.067397704194101, + "learning_rate": 4.0849028991643726e-07, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8500 + }, + { + "epoch": 0.8175217579458576, + "grad_norm": 3.2589124942979715, + "learning_rate": 4.080725088491183e-07, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8501 + }, + { + "epoch": 0.8176179256623551, + "grad_norm": 1.6791012950439905, + "learning_rate": 4.076549225460757e-07, + "loss": 0.078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8502 + }, + { + "epoch": 0.8177140933788527, + "grad_norm": 1.7999269397343454, + "learning_rate": 4.0723753104618874e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8503 + }, + { + "epoch": 0.8178102610953503, + "grad_norm": 1.4923028907929767, + "learning_rate": 4.0682033438831593e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8504 + }, + { + "epoch": 0.8179064288118478, + "grad_norm": 1.828562419945277, + "learning_rate": 4.064033326113004e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8505 + }, + { + "epoch": 0.8180025965283454, + "grad_norm": 1.652956946296037, + "learning_rate": 4.059865257539666e-07, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8506 + }, + { + "epoch": 0.818098764244843, + "grad_norm": 1.6056203479456677, + "learning_rate": 4.0556991385511973e-07, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8507 + }, + { + "epoch": 0.8181949319613406, + "grad_norm": 1.7244601288505854, + "learning_rate": 4.0515349695354726e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8508 + }, + { + "epoch": 0.8182910996778382, + "grad_norm": 2.8423024305803875, + "learning_rate": 4.047372750880196e-07, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8509 + }, + { + "epoch": 0.8183872673943358, + "grad_norm": 1.7658994319377712, + "learning_rate": 4.0432124829728774e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8510 + }, + { + "epoch": 0.8184834351108333, + "grad_norm": 1.6180965999508843, + "learning_rate": 4.0390541662008483e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8511 + }, + { + "epoch": 0.8185796028273309, + "grad_norm": 1.6653111341588023, + "learning_rate": 4.034897800951268e-07, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8512 + }, + { + "epoch": 0.8186757705438285, + "grad_norm": 1.3478153209712898, + "learning_rate": 4.030743387611091e-07, + "loss": 0.0832, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8513 + }, + { + "epoch": 0.818771938260326, + "grad_norm": 1.5830478734199003, + "learning_rate": 4.026590926567117e-07, + "loss": 0.0986, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8514 + }, + { + "epoch": 0.8188681059768236, + "grad_norm": 1.7395776405661096, + "learning_rate": 4.0224404182059443e-07, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8515 + }, + { + "epoch": 0.8189642736933211, + "grad_norm": 3.1552467273688505, + "learning_rate": 4.0182918629140004e-07, + "loss": 0.1597, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8516 + }, + { + "epoch": 0.8190604414098187, + "grad_norm": 1.626301751622677, + "learning_rate": 4.014145261077526e-07, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8517 + }, + { + "epoch": 0.8191566091263163, + "grad_norm": 3.8496155503965355, + "learning_rate": 4.01000061308259e-07, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8518 + }, + { + "epoch": 0.8192527768428138, + "grad_norm": 1.4396690035486164, + "learning_rate": 4.0058579193150537e-07, + "loss": 0.0935, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8519 + }, + { + "epoch": 0.8193489445593114, + "grad_norm": 1.4728405699359504, + "learning_rate": 4.0017171801606245e-07, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8520 + }, + { + "epoch": 0.819445112275809, + "grad_norm": 1.9873867996149168, + "learning_rate": 3.997578396004817e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8521 + }, + { + "epoch": 0.8195412799923066, + "grad_norm": 1.662527122911105, + "learning_rate": 3.9934415672329585e-07, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8522 + }, + { + "epoch": 0.8196374477088042, + "grad_norm": 3.603661730926341, + "learning_rate": 3.989306694230208e-07, + "loss": 0.1378, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8523 + }, + { + "epoch": 0.8197336154253018, + "grad_norm": 1.3579933117566707, + "learning_rate": 3.9851737773815206e-07, + "loss": 0.0841, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8524 + }, + { + "epoch": 0.8198297831417993, + "grad_norm": 1.944848105847872, + "learning_rate": 3.9810428170716924e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8525 + }, + { + "epoch": 0.8199259508582969, + "grad_norm": 2.012083064363883, + "learning_rate": 3.9769138136853206e-07, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8526 + }, + { + "epoch": 0.8200221185747945, + "grad_norm": 1.8831353260967887, + "learning_rate": 3.9727867676068343e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8527 + }, + { + "epoch": 0.820118286291292, + "grad_norm": 1.876517216413104, + "learning_rate": 3.9686616792204677e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8528 + }, + { + "epoch": 0.8202144540077896, + "grad_norm": 2.016892729599391, + "learning_rate": 3.964538548910288e-07, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8529 + }, + { + "epoch": 0.8203106217242871, + "grad_norm": 1.8485597699913787, + "learning_rate": 3.9604173770601524e-07, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8530 + }, + { + "epoch": 0.8204067894407847, + "grad_norm": 2.316451856327135, + "learning_rate": 3.956298164053765e-07, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8531 + }, + { + "epoch": 0.8205029571572823, + "grad_norm": 1.6907030662539242, + "learning_rate": 3.952180910274633e-07, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8532 + }, + { + "epoch": 0.8205991248737798, + "grad_norm": 1.621343007149578, + "learning_rate": 3.9480656161060863e-07, + "loss": 0.0893, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8533 + }, + { + "epoch": 0.8206952925902774, + "grad_norm": 1.6928949955931587, + "learning_rate": 3.9439522819312736e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8534 + }, + { + "epoch": 0.820791460306775, + "grad_norm": 1.9793738145101183, + "learning_rate": 3.939840908133147e-07, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8535 + }, + { + "epoch": 0.8208876280232726, + "grad_norm": 1.7448018002947403, + "learning_rate": 3.9357314950944943e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8536 + }, + { + "epoch": 0.8209837957397702, + "grad_norm": 1.5929731638672344, + "learning_rate": 3.931624043197918e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8537 + }, + { + "epoch": 0.8210799634562678, + "grad_norm": 1.9689281689607319, + "learning_rate": 3.927518552825821e-07, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8538 + }, + { + "epoch": 0.8211761311727653, + "grad_norm": 1.829792839940679, + "learning_rate": 3.923415024360441e-07, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8539 + }, + { + "epoch": 0.8212722988892629, + "grad_norm": 1.467179047604819, + "learning_rate": 3.9193134581838375e-07, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8540 + }, + { + "epoch": 0.8213684666057605, + "grad_norm": 2.2700767249378995, + "learning_rate": 3.915213854677863e-07, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8541 + }, + { + "epoch": 0.821464634322258, + "grad_norm": 2.0107401527308935, + "learning_rate": 3.9111162142242085e-07, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8542 + }, + { + "epoch": 0.8215608020387556, + "grad_norm": 2.474605242330871, + "learning_rate": 3.907020537204373e-07, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8543 + }, + { + "epoch": 0.8216569697552532, + "grad_norm": 2.2106459662077613, + "learning_rate": 3.902926823999681e-07, + "loss": 0.1542, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8544 + }, + { + "epoch": 0.8217531374717507, + "grad_norm": 2.905386345809643, + "learning_rate": 3.898835074991264e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8545 + }, + { + "epoch": 0.8218493051882483, + "grad_norm": 1.8920746723280177, + "learning_rate": 3.8947452905600844e-07, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8546 + }, + { + "epoch": 0.8219454729047458, + "grad_norm": 1.7050944512955297, + "learning_rate": 3.8906574710869006e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8547 + }, + { + "epoch": 0.8220416406212434, + "grad_norm": 1.7733013282668126, + "learning_rate": 3.8865716169523004e-07, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8548 + }, + { + "epoch": 0.822137808337741, + "grad_norm": 2.0540014607859733, + "learning_rate": 3.8824877285366965e-07, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8549 + }, + { + "epoch": 0.8222339760542386, + "grad_norm": 1.9507707295454109, + "learning_rate": 3.8784058062203034e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8550 + }, + { + "epoch": 0.8223301437707362, + "grad_norm": 1.6636516590085186, + "learning_rate": 3.874325850383168e-07, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8551 + }, + { + "epoch": 0.8224263114872338, + "grad_norm": 2.002178009421854, + "learning_rate": 3.8702478614051353e-07, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8552 + }, + { + "epoch": 0.8225224792037313, + "grad_norm": 1.6575153878047941, + "learning_rate": 3.8661718396658805e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8553 + }, + { + "epoch": 0.8226186469202289, + "grad_norm": 1.578695237096981, + "learning_rate": 3.8620977855448937e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8554 + }, + { + "epoch": 0.8227148146367265, + "grad_norm": 1.523423138135197, + "learning_rate": 3.8580256994214805e-07, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8555 + }, + { + "epoch": 0.822810982353224, + "grad_norm": 1.8042282756851966, + "learning_rate": 3.853955581674765e-07, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8556 + }, + { + "epoch": 0.8229071500697216, + "grad_norm": 1.9354306642826085, + "learning_rate": 3.849887432683691e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8557 + }, + { + "epoch": 0.8230033177862192, + "grad_norm": 1.6627121752064233, + "learning_rate": 3.845821252827003e-07, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8558 + }, + { + "epoch": 0.8230994855027167, + "grad_norm": 1.940020932757057, + "learning_rate": 3.8417570424832795e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8559 + }, + { + "epoch": 0.8231956532192143, + "grad_norm": 1.64250080883025, + "learning_rate": 3.8376948020309083e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8560 + }, + { + "epoch": 0.8232918209357118, + "grad_norm": 1.6646060150953579, + "learning_rate": 3.833634531848099e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8561 + }, + { + "epoch": 0.8233879886522094, + "grad_norm": 1.9571695072639441, + "learning_rate": 3.829576232312876e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8562 + }, + { + "epoch": 0.823484156368707, + "grad_norm": 1.3606306639260213, + "learning_rate": 3.8255199038030685e-07, + "loss": 0.0816, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8563 + }, + { + "epoch": 0.8235803240852047, + "grad_norm": 1.7860930988283343, + "learning_rate": 3.8214655466963377e-07, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8564 + }, + { + "epoch": 0.8236764918017022, + "grad_norm": 1.4701169455077392, + "learning_rate": 3.8174131613701566e-07, + "loss": 0.0923, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8565 + }, + { + "epoch": 0.8237726595181998, + "grad_norm": 1.81322732829077, + "learning_rate": 3.81336274820181e-07, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8566 + }, + { + "epoch": 0.8238688272346973, + "grad_norm": 2.0567250481823325, + "learning_rate": 3.8093143075684126e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8567 + }, + { + "epoch": 0.8239649949511949, + "grad_norm": 3.3801905051577403, + "learning_rate": 3.805267839846874e-07, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8568 + }, + { + "epoch": 0.8240611626676925, + "grad_norm": 1.745549828755204, + "learning_rate": 3.8012233454139317e-07, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8569 + }, + { + "epoch": 0.82415733038419, + "grad_norm": 2.5614819874559966, + "learning_rate": 3.797180824646152e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8570 + }, + { + "epoch": 0.8242534981006876, + "grad_norm": 2.1102418771712483, + "learning_rate": 3.793140277919888e-07, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8571 + }, + { + "epoch": 0.8243496658171852, + "grad_norm": 1.8146666027608538, + "learning_rate": 3.789101705611337e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8572 + }, + { + "epoch": 0.8244458335336827, + "grad_norm": 1.5733744625033212, + "learning_rate": 3.785065108096495e-07, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8573 + }, + { + "epoch": 0.8245420012501803, + "grad_norm": 1.9932859277335968, + "learning_rate": 3.7810304857511914e-07, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8574 + }, + { + "epoch": 0.8246381689666779, + "grad_norm": 2.236761342579564, + "learning_rate": 3.7769978389510467e-07, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8575 + }, + { + "epoch": 0.8247343366831754, + "grad_norm": 1.604367302610455, + "learning_rate": 3.7729671680715175e-07, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8576 + }, + { + "epoch": 0.824830504399673, + "grad_norm": 1.7438430541668488, + "learning_rate": 3.768938473487874e-07, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8577 + }, + { + "epoch": 0.8249266721161707, + "grad_norm": 1.624645806289028, + "learning_rate": 3.764911755575193e-07, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8578 + }, + { + "epoch": 0.8250228398326682, + "grad_norm": 1.5371609301877653, + "learning_rate": 3.760887014708381e-07, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8579 + }, + { + "epoch": 0.8251190075491658, + "grad_norm": 1.5548863904639734, + "learning_rate": 3.756864251262143e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8580 + }, + { + "epoch": 0.8252151752656633, + "grad_norm": 2.978921435355839, + "learning_rate": 3.752843465611017e-07, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8581 + }, + { + "epoch": 0.8253113429821609, + "grad_norm": 2.0500998180556054, + "learning_rate": 3.7488246581293434e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8582 + }, + { + "epoch": 0.8254075106986585, + "grad_norm": 2.796228429725281, + "learning_rate": 3.744807829191288e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8583 + }, + { + "epoch": 0.825503678415156, + "grad_norm": 2.021998249438582, + "learning_rate": 3.7407929791708264e-07, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8584 + }, + { + "epoch": 0.8255998461316536, + "grad_norm": 2.4245540999805066, + "learning_rate": 3.736780108441762e-07, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8585 + }, + { + "epoch": 0.8256960138481512, + "grad_norm": 1.8547542538595274, + "learning_rate": 3.732769217377691e-07, + "loss": 0.0903, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8586 + }, + { + "epoch": 0.8257921815646487, + "grad_norm": 1.676660757062024, + "learning_rate": 3.728760306352042e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8587 + }, + { + "epoch": 0.8258883492811463, + "grad_norm": 2.179034309500725, + "learning_rate": 3.7247533757380604e-07, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8588 + }, + { + "epoch": 0.8259845169976439, + "grad_norm": 2.7266903278717254, + "learning_rate": 3.720748425908796e-07, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8589 + }, + { + "epoch": 0.8260806847141414, + "grad_norm": 1.9084554776066218, + "learning_rate": 3.716745457237134e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8590 + }, + { + "epoch": 0.826176852430639, + "grad_norm": 1.3209607063594448, + "learning_rate": 3.7127444700957444e-07, + "loss": 0.0638, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8591 + }, + { + "epoch": 0.8262730201471367, + "grad_norm": 2.270766592519279, + "learning_rate": 3.7087454648571385e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8592 + }, + { + "epoch": 0.8263691878636342, + "grad_norm": 2.6063464970987633, + "learning_rate": 3.704748441893638e-07, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8593 + }, + { + "epoch": 0.8264653555801318, + "grad_norm": 2.970267783273562, + "learning_rate": 3.70075340157737e-07, + "loss": 0.1429, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8594 + }, + { + "epoch": 0.8265615232966294, + "grad_norm": 2.405269835370807, + "learning_rate": 3.6967603442802896e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8595 + }, + { + "epoch": 0.8266576910131269, + "grad_norm": 5.310181617986196, + "learning_rate": 3.6927692703741636e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8596 + }, + { + "epoch": 0.8267538587296245, + "grad_norm": 1.7688262769947287, + "learning_rate": 3.688780180230564e-07, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8597 + }, + { + "epoch": 0.826850026446122, + "grad_norm": 1.8654803296627738, + "learning_rate": 3.684793074220888e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8598 + }, + { + "epoch": 0.8269461941626196, + "grad_norm": 2.190046760103761, + "learning_rate": 3.68080795271635e-07, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8599 + }, + { + "epoch": 0.8270423618791172, + "grad_norm": 2.6397197553415057, + "learning_rate": 3.6768248160879786e-07, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8600 + }, + { + "epoch": 0.8271385295956147, + "grad_norm": 2.1934179205291593, + "learning_rate": 3.6728436647066074e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8601 + }, + { + "epoch": 0.8272346973121123, + "grad_norm": 2.420116055477453, + "learning_rate": 3.6688644989428924e-07, + "loss": 0.1587, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8602 + }, + { + "epoch": 0.8273308650286099, + "grad_norm": 1.9561621921186527, + "learning_rate": 3.664887319167318e-07, + "loss": 0.091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8603 + }, + { + "epoch": 0.8274270327451074, + "grad_norm": 1.5453002503123028, + "learning_rate": 3.6609121257501517e-07, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8604 + }, + { + "epoch": 0.827523200461605, + "grad_norm": 2.0191546901449704, + "learning_rate": 3.656938919061509e-07, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8605 + }, + { + "epoch": 0.8276193681781027, + "grad_norm": 2.622073765673442, + "learning_rate": 3.652967699471299e-07, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8606 + }, + { + "epoch": 0.8277155358946002, + "grad_norm": 1.983528607817827, + "learning_rate": 3.648998467349263e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8607 + }, + { + "epoch": 0.8278117036110978, + "grad_norm": 2.237536088514574, + "learning_rate": 3.645031223064935e-07, + "loss": 0.152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8608 + }, + { + "epoch": 0.8279078713275954, + "grad_norm": 1.4637041152987909, + "learning_rate": 3.6410659669876834e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8609 + }, + { + "epoch": 0.8280040390440929, + "grad_norm": 2.0375585965921057, + "learning_rate": 3.637102699486683e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8610 + }, + { + "epoch": 0.8281002067605905, + "grad_norm": 1.758128819067022, + "learning_rate": 3.633141420930927e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8611 + }, + { + "epoch": 0.828196374477088, + "grad_norm": 1.8912265993994828, + "learning_rate": 3.629182131689218e-07, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8612 + }, + { + "epoch": 0.8282925421935856, + "grad_norm": 1.76889880500205, + "learning_rate": 3.625224832130186e-07, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8613 + }, + { + "epoch": 0.8283887099100832, + "grad_norm": 2.893676438823575, + "learning_rate": 3.621269522622256e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8614 + }, + { + "epoch": 0.8284848776265807, + "grad_norm": 1.6051240759632657, + "learning_rate": 3.61731620353368e-07, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8615 + }, + { + "epoch": 0.8285810453430783, + "grad_norm": 2.39384338223014, + "learning_rate": 3.6133648752325253e-07, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8616 + }, + { + "epoch": 0.8286772130595759, + "grad_norm": 1.6427716744098795, + "learning_rate": 3.6094155380866693e-07, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8617 + }, + { + "epoch": 0.8287733807760734, + "grad_norm": 1.868417032917458, + "learning_rate": 3.6054681924638154e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8618 + }, + { + "epoch": 0.828869548492571, + "grad_norm": 2.42525645162002, + "learning_rate": 3.601522838731461e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8619 + }, + { + "epoch": 0.8289657162090687, + "grad_norm": 1.6660490557438477, + "learning_rate": 3.597579477256932e-07, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8620 + }, + { + "epoch": 0.8290618839255662, + "grad_norm": 2.746265914926082, + "learning_rate": 3.593638108407371e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8621 + }, + { + "epoch": 0.8291580516420638, + "grad_norm": 1.8378649122448816, + "learning_rate": 3.5896987325497265e-07, + "loss": 0.0868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8622 + }, + { + "epoch": 0.8292542193585614, + "grad_norm": 1.7420472451691769, + "learning_rate": 3.5857613500507646e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8623 + }, + { + "epoch": 0.8293503870750589, + "grad_norm": 2.0935637854207605, + "learning_rate": 3.5818259612770746e-07, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8624 + }, + { + "epoch": 0.8294465547915565, + "grad_norm": 1.467225111884045, + "learning_rate": 3.577892566595043e-07, + "loss": 0.0907, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8625 + }, + { + "epoch": 0.829542722508054, + "grad_norm": 1.5671116541735564, + "learning_rate": 3.573961166370879e-07, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8626 + }, + { + "epoch": 0.8296388902245516, + "grad_norm": 1.8986667856634474, + "learning_rate": 3.570031760970613e-07, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8627 + }, + { + "epoch": 0.8297350579410492, + "grad_norm": 2.226849326891585, + "learning_rate": 3.566104350760083e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8628 + }, + { + "epoch": 0.8298312256575467, + "grad_norm": 1.807622870598008, + "learning_rate": 3.562178936104943e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8629 + }, + { + "epoch": 0.8299273933740443, + "grad_norm": 1.9905102683688711, + "learning_rate": 3.5582555173706513e-07, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8630 + }, + { + "epoch": 0.8300235610905419, + "grad_norm": 1.68419905351468, + "learning_rate": 3.5543340949224984e-07, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8631 + }, + { + "epoch": 0.8301197288070394, + "grad_norm": 1.9316411612114242, + "learning_rate": 3.5504146691255736e-07, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8632 + }, + { + "epoch": 0.830215896523537, + "grad_norm": 1.719481793861274, + "learning_rate": 3.5464972403447956e-07, + "loss": 0.0985, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8633 + }, + { + "epoch": 0.8303120642400347, + "grad_norm": 2.0371610194400973, + "learning_rate": 3.5425818089448756e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8634 + }, + { + "epoch": 0.8304082319565322, + "grad_norm": 1.5484176158641705, + "learning_rate": 3.538668375290355e-07, + "loss": 0.0906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8635 + }, + { + "epoch": 0.8305043996730298, + "grad_norm": 1.9787677792202754, + "learning_rate": 3.53475693974559e-07, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8636 + }, + { + "epoch": 0.8306005673895274, + "grad_norm": 2.3005455714259764, + "learning_rate": 3.5308475026747475e-07, + "loss": 0.1217, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8637 + }, + { + "epoch": 0.8306967351060249, + "grad_norm": 2.1958989013519123, + "learning_rate": 3.5269400644418e-07, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8638 + }, + { + "epoch": 0.8307929028225225, + "grad_norm": 1.7862342302464613, + "learning_rate": 3.52303462541054e-07, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8639 + }, + { + "epoch": 0.8308890705390201, + "grad_norm": 2.153451933619362, + "learning_rate": 3.51913118594458e-07, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8640 + }, + { + "epoch": 0.8309852382555176, + "grad_norm": 1.8316133798947558, + "learning_rate": 3.5152297464073474e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8641 + }, + { + "epoch": 0.8310814059720152, + "grad_norm": 2.1134945303572796, + "learning_rate": 3.5113303071620614e-07, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8642 + }, + { + "epoch": 0.8311775736885127, + "grad_norm": 1.9631833917081234, + "learning_rate": 3.507432868571778e-07, + "loss": 0.1441, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8643 + }, + { + "epoch": 0.8312737414050103, + "grad_norm": 1.8272503157554973, + "learning_rate": 3.5035374309993627e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8644 + }, + { + "epoch": 0.8313699091215079, + "grad_norm": 1.7786218418277246, + "learning_rate": 3.499643994807486e-07, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8645 + }, + { + "epoch": 0.8314660768380054, + "grad_norm": 1.7943718218429776, + "learning_rate": 3.4957525603586505e-07, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8646 + }, + { + "epoch": 0.831562244554503, + "grad_norm": 1.6174509576900669, + "learning_rate": 3.491863128015141e-07, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8647 + }, + { + "epoch": 0.8316584122710007, + "grad_norm": 2.103885963774156, + "learning_rate": 3.487975698139084e-07, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8648 + }, + { + "epoch": 0.8317545799874982, + "grad_norm": 2.1067194336069943, + "learning_rate": 3.484090271092411e-07, + "loss": 0.0883, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8649 + }, + { + "epoch": 0.8318507477039958, + "grad_norm": 2.2915141436205, + "learning_rate": 3.480206847236861e-07, + "loss": 0.1492, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8650 + }, + { + "epoch": 0.8319469154204934, + "grad_norm": 2.9202956450234376, + "learning_rate": 3.4763254269339965e-07, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8651 + }, + { + "epoch": 0.8320430831369909, + "grad_norm": 1.6141868430101378, + "learning_rate": 3.4724460105451943e-07, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8652 + }, + { + "epoch": 0.8321392508534885, + "grad_norm": 1.7610457173189034, + "learning_rate": 3.468568598431624e-07, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8653 + }, + { + "epoch": 0.8322354185699861, + "grad_norm": 1.8700629815912637, + "learning_rate": 3.4646931909542935e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8654 + }, + { + "epoch": 0.8323315862864836, + "grad_norm": 2.926143090073436, + "learning_rate": 3.4608197884740094e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8655 + }, + { + "epoch": 0.8324277540029812, + "grad_norm": 2.263490173846276, + "learning_rate": 3.4569483913514003e-07, + "loss": 0.1622, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8656 + }, + { + "epoch": 0.8325239217194788, + "grad_norm": 2.574273463188956, + "learning_rate": 3.4530789999469045e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8657 + }, + { + "epoch": 0.8326200894359763, + "grad_norm": 2.0355655533951316, + "learning_rate": 3.4492116146207677e-07, + "loss": 0.0833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8658 + }, + { + "epoch": 0.8327162571524739, + "grad_norm": 1.9148755968303808, + "learning_rate": 3.4453462357330574e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8659 + }, + { + "epoch": 0.8328124248689714, + "grad_norm": 1.4538891096793471, + "learning_rate": 3.4414828636436525e-07, + "loss": 0.0894, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8660 + }, + { + "epoch": 0.832908592585469, + "grad_norm": 1.9895566103977413, + "learning_rate": 3.43762149871224e-07, + "loss": 0.0955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8661 + }, + { + "epoch": 0.8330047603019667, + "grad_norm": 1.9175757550428665, + "learning_rate": 3.4337621412983276e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8662 + }, + { + "epoch": 0.8331009280184642, + "grad_norm": 2.078282547397813, + "learning_rate": 3.429904791761235e-07, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8663 + }, + { + "epoch": 0.8331970957349618, + "grad_norm": 2.3138256205010013, + "learning_rate": 3.426049450460084e-07, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8664 + }, + { + "epoch": 0.8332932634514594, + "grad_norm": 1.8830097529776728, + "learning_rate": 3.4221961177538203e-07, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8665 + }, + { + "epoch": 0.8333894311679569, + "grad_norm": 1.6536229190719844, + "learning_rate": 3.418344794001199e-07, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8666 + }, + { + "epoch": 0.8334855988844545, + "grad_norm": 2.2863626513919524, + "learning_rate": 3.4144954795608005e-07, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8667 + }, + { + "epoch": 0.8335817666009521, + "grad_norm": 2.0377545674200688, + "learning_rate": 3.4106481747909895e-07, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8668 + }, + { + "epoch": 0.8336779343174496, + "grad_norm": 1.6484220264726774, + "learning_rate": 3.4068028800499695e-07, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8669 + }, + { + "epoch": 0.8337741020339472, + "grad_norm": 2.3873547087613822, + "learning_rate": 3.402959595695754e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8670 + }, + { + "epoch": 0.8338702697504448, + "grad_norm": 1.6558599121144775, + "learning_rate": 3.3991183220861506e-07, + "loss": 0.0819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8671 + }, + { + "epoch": 0.8339664374669423, + "grad_norm": 1.6331916137826767, + "learning_rate": 3.3952790595787986e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8672 + }, + { + "epoch": 0.8340626051834399, + "grad_norm": 3.663597838656154, + "learning_rate": 3.391441808531143e-07, + "loss": 0.1494, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8673 + }, + { + "epoch": 0.8341587728999375, + "grad_norm": 2.341247039034231, + "learning_rate": 3.3876065693004503e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8674 + }, + { + "epoch": 0.834254940616435, + "grad_norm": 3.2181325714004814, + "learning_rate": 3.383773342243779e-07, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8675 + }, + { + "epoch": 0.8343511083329327, + "grad_norm": 1.8956031015073722, + "learning_rate": 3.3799421277180224e-07, + "loss": 0.1305, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8676 + }, + { + "epoch": 0.8344472760494303, + "grad_norm": 1.4827926084426364, + "learning_rate": 3.376112926079872e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8677 + }, + { + "epoch": 0.8345434437659278, + "grad_norm": 1.7918673364054551, + "learning_rate": 3.3722857376858406e-07, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8678 + }, + { + "epoch": 0.8346396114824254, + "grad_norm": 1.4695951571179435, + "learning_rate": 3.368460562892248e-07, + "loss": 0.0811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8679 + }, + { + "epoch": 0.834735779198923, + "grad_norm": 2.3024952442806015, + "learning_rate": 3.364637402055235e-07, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8680 + }, + { + "epoch": 0.8348319469154205, + "grad_norm": 1.7387136808344517, + "learning_rate": 3.360816255530738e-07, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8681 + }, + { + "epoch": 0.8349281146319181, + "grad_norm": 1.4921587554584443, + "learning_rate": 3.356997123674524e-07, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8682 + }, + { + "epoch": 0.8350242823484156, + "grad_norm": 2.1983067698310093, + "learning_rate": 3.353180006842161e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8683 + }, + { + "epoch": 0.8351204500649132, + "grad_norm": 2.4643190586198087, + "learning_rate": 3.3493649053890325e-07, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8684 + }, + { + "epoch": 0.8352166177814108, + "grad_norm": 2.0273669083457104, + "learning_rate": 3.3455518196703453e-07, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8685 + }, + { + "epoch": 0.8353127854979083, + "grad_norm": 1.5290037624683968, + "learning_rate": 3.341740750041095e-07, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8686 + }, + { + "epoch": 0.8354089532144059, + "grad_norm": 1.8042013797842937, + "learning_rate": 3.337931696856106e-07, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8687 + }, + { + "epoch": 0.8355051209309035, + "grad_norm": 1.48323449602307, + "learning_rate": 3.334124660470017e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8688 + }, + { + "epoch": 0.835601288647401, + "grad_norm": 2.235520748707677, + "learning_rate": 3.3303196412372673e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8689 + }, + { + "epoch": 0.8356974563638987, + "grad_norm": 1.53871024595946, + "learning_rate": 3.32651663951212e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8690 + }, + { + "epoch": 0.8357936240803963, + "grad_norm": 1.8154279691357909, + "learning_rate": 3.322715655648648e-07, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8691 + }, + { + "epoch": 0.8358897917968938, + "grad_norm": 2.323581744066344, + "learning_rate": 3.318916690000723e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8692 + }, + { + "epoch": 0.8359859595133914, + "grad_norm": 2.2814786869229646, + "learning_rate": 3.3151197429220434e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8693 + }, + { + "epoch": 0.836082127229889, + "grad_norm": 2.2119564343747604, + "learning_rate": 3.31132481476612e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8694 + }, + { + "epoch": 0.8361782949463865, + "grad_norm": 1.6942650933177095, + "learning_rate": 3.307531905886266e-07, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8695 + }, + { + "epoch": 0.8362744626628841, + "grad_norm": 1.8439479052969157, + "learning_rate": 3.3037410166356144e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8696 + }, + { + "epoch": 0.8363706303793816, + "grad_norm": 1.822723655475342, + "learning_rate": 3.2999521473671137e-07, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8697 + }, + { + "epoch": 0.8364667980958792, + "grad_norm": 2.555291598743087, + "learning_rate": 3.296165298433507e-07, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8698 + }, + { + "epoch": 0.8365629658123768, + "grad_norm": 1.8878350245689612, + "learning_rate": 3.292380470187365e-07, + "loss": 0.1358, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8699 + }, + { + "epoch": 0.8366591335288743, + "grad_norm": 2.173294209483265, + "learning_rate": 3.288597662981069e-07, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8700 + }, + { + "epoch": 0.8367553012453719, + "grad_norm": 1.964311320886639, + "learning_rate": 3.284816877166802e-07, + "loss": 0.1343, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8701 + }, + { + "epoch": 0.8368514689618695, + "grad_norm": 2.410952266555907, + "learning_rate": 3.2810381130965686e-07, + "loss": 0.1462, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8702 + }, + { + "epoch": 0.836947636678367, + "grad_norm": 1.4808415169991256, + "learning_rate": 3.2772613711221895e-07, + "loss": 0.0887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8703 + }, + { + "epoch": 0.8370438043948647, + "grad_norm": 1.6447127773700878, + "learning_rate": 3.2734866515952754e-07, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8704 + }, + { + "epoch": 0.8371399721113623, + "grad_norm": 2.0491405234101507, + "learning_rate": 3.2697139548672737e-07, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8705 + }, + { + "epoch": 0.8372361398278598, + "grad_norm": 2.11872672641623, + "learning_rate": 3.2659432812894297e-07, + "loss": 0.0865, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8706 + }, + { + "epoch": 0.8373323075443574, + "grad_norm": 1.7906400622268934, + "learning_rate": 3.2621746312128053e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8707 + }, + { + "epoch": 0.837428475260855, + "grad_norm": 1.5842258623575263, + "learning_rate": 3.258408004988278e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8708 + }, + { + "epoch": 0.8375246429773525, + "grad_norm": 1.5301566437821765, + "learning_rate": 3.2546434029665177e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8709 + }, + { + "epoch": 0.8376208106938501, + "grad_norm": 1.830917523284949, + "learning_rate": 3.250880825498026e-07, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8710 + }, + { + "epoch": 0.8377169784103476, + "grad_norm": 2.1547618406071964, + "learning_rate": 3.247120272933113e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8711 + }, + { + "epoch": 0.8378131461268452, + "grad_norm": 1.5383814933207756, + "learning_rate": 3.2433617456218894e-07, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8712 + }, + { + "epoch": 0.8379093138433428, + "grad_norm": 1.9644529888173534, + "learning_rate": 3.2396052439142953e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8713 + }, + { + "epoch": 0.8380054815598403, + "grad_norm": 1.5163084643026545, + "learning_rate": 3.2358507681600595e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8714 + }, + { + "epoch": 0.8381016492763379, + "grad_norm": 1.4853266955488358, + "learning_rate": 3.2320983187087403e-07, + "loss": 0.0824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8715 + }, + { + "epoch": 0.8381978169928355, + "grad_norm": 4.679242262125745, + "learning_rate": 3.2283478959096975e-07, + "loss": 0.1585, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8716 + }, + { + "epoch": 0.838293984709333, + "grad_norm": 2.0784091403604186, + "learning_rate": 3.2245995001121103e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8717 + }, + { + "epoch": 0.8383901524258307, + "grad_norm": 2.1303242127386386, + "learning_rate": 3.220853131664964e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8718 + }, + { + "epoch": 0.8384863201423283, + "grad_norm": 1.7119253630779498, + "learning_rate": 3.217108790917059e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8719 + }, + { + "epoch": 0.8385824878588258, + "grad_norm": 1.7217883681712256, + "learning_rate": 3.2133664782169947e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8720 + }, + { + "epoch": 0.8386786555753234, + "grad_norm": 1.8778743316078574, + "learning_rate": 3.2096261939131954e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8721 + }, + { + "epoch": 0.838774823291821, + "grad_norm": 1.6308100016573486, + "learning_rate": 3.2058879383538937e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8722 + }, + { + "epoch": 0.8388709910083185, + "grad_norm": 1.680065812127848, + "learning_rate": 3.2021517118871306e-07, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8723 + }, + { + "epoch": 0.8389671587248161, + "grad_norm": 2.3613450450378832, + "learning_rate": 3.1984175148607603e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8724 + }, + { + "epoch": 0.8390633264413137, + "grad_norm": 1.695475861189231, + "learning_rate": 3.1946853476224475e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8725 + }, + { + "epoch": 0.8391594941578112, + "grad_norm": 2.6656139822297082, + "learning_rate": 3.190955210519664e-07, + "loss": 0.1451, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8726 + }, + { + "epoch": 0.8392556618743088, + "grad_norm": 1.4540324141215937, + "learning_rate": 3.1872271038996954e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8727 + }, + { + "epoch": 0.8393518295908063, + "grad_norm": 2.00707169341144, + "learning_rate": 3.1835010281096426e-07, + "loss": 0.1553, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8728 + }, + { + "epoch": 0.8394479973073039, + "grad_norm": 1.700265911267109, + "learning_rate": 3.17977698349641e-07, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8729 + }, + { + "epoch": 0.8395441650238015, + "grad_norm": 1.6657650705703553, + "learning_rate": 3.1760549704067253e-07, + "loss": 0.1025, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8730 + }, + { + "epoch": 0.839640332740299, + "grad_norm": 1.8483334986500475, + "learning_rate": 3.1723349891871065e-07, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8731 + }, + { + "epoch": 0.8397365004567967, + "grad_norm": 2.6250385095357265, + "learning_rate": 3.1686170401838965e-07, + "loss": 0.1395, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8732 + }, + { + "epoch": 0.8398326681732943, + "grad_norm": 2.1342150966533997, + "learning_rate": 3.164901123743258e-07, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8733 + }, + { + "epoch": 0.8399288358897918, + "grad_norm": 2.0486212670818493, + "learning_rate": 3.1611872402111386e-07, + "loss": 0.1402, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8734 + }, + { + "epoch": 0.8400250036062894, + "grad_norm": 1.828109213520149, + "learning_rate": 3.1574753899333176e-07, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8735 + }, + { + "epoch": 0.840121171322787, + "grad_norm": 1.7885431316917209, + "learning_rate": 3.153765573255377e-07, + "loss": 0.0795, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8736 + }, + { + "epoch": 0.8402173390392845, + "grad_norm": 2.657572129119921, + "learning_rate": 3.150057790522718e-07, + "loss": 0.1332, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8737 + }, + { + "epoch": 0.8403135067557821, + "grad_norm": 1.5248685705289435, + "learning_rate": 3.146352042080533e-07, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8738 + }, + { + "epoch": 0.8404096744722797, + "grad_norm": 1.4950568992813436, + "learning_rate": 3.142648328273848e-07, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8739 + }, + { + "epoch": 0.8405058421887772, + "grad_norm": 1.9554619048320694, + "learning_rate": 3.138946649447483e-07, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8740 + }, + { + "epoch": 0.8406020099052748, + "grad_norm": 1.4750981341594511, + "learning_rate": 3.135247005946082e-07, + "loss": 0.0874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8741 + }, + { + "epoch": 0.8406981776217723, + "grad_norm": 1.9786801201955742, + "learning_rate": 3.131549398114081e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8742 + }, + { + "epoch": 0.8407943453382699, + "grad_norm": 1.6634777254849338, + "learning_rate": 3.1278538262957456e-07, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8743 + }, + { + "epoch": 0.8408905130547675, + "grad_norm": 2.3733597443526073, + "learning_rate": 3.1241602908351404e-07, + "loss": 0.1336, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8744 + }, + { + "epoch": 0.840986680771265, + "grad_norm": 1.617515703696758, + "learning_rate": 3.1204687920761476e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8745 + }, + { + "epoch": 0.8410828484877627, + "grad_norm": 1.7943406597293778, + "learning_rate": 3.1167793303624514e-07, + "loss": 0.0867, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8746 + }, + { + "epoch": 0.8411790162042603, + "grad_norm": 1.596537851573537, + "learning_rate": 3.11309190603756e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8747 + }, + { + "epoch": 0.8412751839207578, + "grad_norm": 1.538620801273578, + "learning_rate": 3.1094065194447714e-07, + "loss": 0.0849, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8748 + }, + { + "epoch": 0.8413713516372554, + "grad_norm": 2.0999136960822935, + "learning_rate": 3.105723170927208e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8749 + }, + { + "epoch": 0.841467519353753, + "grad_norm": 1.9480119055372556, + "learning_rate": 3.102041860827804e-07, + "loss": 0.0986, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8750 + }, + { + "epoch": 0.8415636870702505, + "grad_norm": 1.9552999769083426, + "learning_rate": 3.098362589489298e-07, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8751 + }, + { + "epoch": 0.8416598547867481, + "grad_norm": 2.83195546243321, + "learning_rate": 3.0946853572542375e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8752 + }, + { + "epoch": 0.8417560225032457, + "grad_norm": 1.9690122493553204, + "learning_rate": 3.091010164464994e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8753 + }, + { + "epoch": 0.8418521902197432, + "grad_norm": 1.6962424977027108, + "learning_rate": 3.087337011463723e-07, + "loss": 0.0913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8754 + }, + { + "epoch": 0.8419483579362408, + "grad_norm": 1.6105311492105296, + "learning_rate": 3.0836658985924114e-07, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8755 + }, + { + "epoch": 0.8420445256527384, + "grad_norm": 1.7719448021051967, + "learning_rate": 3.079996826192849e-07, + "loss": 0.0868, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8756 + }, + { + "epoch": 0.8421406933692359, + "grad_norm": 1.8773030585062696, + "learning_rate": 3.076329794606642e-07, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8757 + }, + { + "epoch": 0.8422368610857335, + "grad_norm": 2.0280675821132137, + "learning_rate": 3.072664804175202e-07, + "loss": 0.0957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8758 + }, + { + "epoch": 0.842333028802231, + "grad_norm": 1.6949648219591262, + "learning_rate": 3.0690018552397396e-07, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8759 + }, + { + "epoch": 0.8424291965187287, + "grad_norm": 2.307111900716697, + "learning_rate": 3.0653409481412906e-07, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8760 + }, + { + "epoch": 0.8425253642352263, + "grad_norm": 1.9146423423453025, + "learning_rate": 3.061682083220696e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8761 + }, + { + "epoch": 0.8426215319517238, + "grad_norm": 1.6106343691340999, + "learning_rate": 3.058025260818609e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8762 + }, + { + "epoch": 0.8427176996682214, + "grad_norm": 2.3067055769213676, + "learning_rate": 3.0543704812754844e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8763 + }, + { + "epoch": 0.842813867384719, + "grad_norm": 1.7999966386518214, + "learning_rate": 3.0507177449316026e-07, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8764 + }, + { + "epoch": 0.8429100351012165, + "grad_norm": 1.7232883758020439, + "learning_rate": 3.0470670521270315e-07, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8765 + }, + { + "epoch": 0.8430062028177141, + "grad_norm": 1.8874150165602666, + "learning_rate": 3.0434184032016674e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8766 + }, + { + "epoch": 0.8431023705342117, + "grad_norm": 1.8317584905968423, + "learning_rate": 3.0397717984952114e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8767 + }, + { + "epoch": 0.8431985382507092, + "grad_norm": 1.766837303869611, + "learning_rate": 3.036127238347164e-07, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8768 + }, + { + "epoch": 0.8432947059672068, + "grad_norm": 1.8871576431283292, + "learning_rate": 3.0324847230968486e-07, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8769 + }, + { + "epoch": 0.8433908736837044, + "grad_norm": 2.7107437991414107, + "learning_rate": 3.0288442530834014e-07, + "loss": 0.1601, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8770 + }, + { + "epoch": 0.8434870414002019, + "grad_norm": 1.6374204868354518, + "learning_rate": 3.025205828645747e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8771 + }, + { + "epoch": 0.8435832091166995, + "grad_norm": 1.7153639240980156, + "learning_rate": 3.0215694501226386e-07, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8772 + }, + { + "epoch": 0.843679376833197, + "grad_norm": 2.461012793336172, + "learning_rate": 3.0179351178526326e-07, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8773 + }, + { + "epoch": 0.8437755445496947, + "grad_norm": 2.358068479658741, + "learning_rate": 3.014302832174096e-07, + "loss": 0.0804, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8774 + }, + { + "epoch": 0.8438717122661923, + "grad_norm": 1.6506081413689508, + "learning_rate": 3.0106725934252095e-07, + "loss": 0.0757, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8775 + }, + { + "epoch": 0.8439678799826899, + "grad_norm": 1.5027589025623072, + "learning_rate": 3.007044401943951e-07, + "loss": 0.0972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8776 + }, + { + "epoch": 0.8440640476991874, + "grad_norm": 2.0126653105315064, + "learning_rate": 3.0034182580681164e-07, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8777 + }, + { + "epoch": 0.844160215415685, + "grad_norm": 1.821931102079555, + "learning_rate": 2.9997941621353136e-07, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8778 + }, + { + "epoch": 0.8442563831321825, + "grad_norm": 1.9110616658357686, + "learning_rate": 2.996172114482954e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8779 + }, + { + "epoch": 0.8443525508486801, + "grad_norm": 1.776526790951115, + "learning_rate": 2.9925521154482577e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8780 + }, + { + "epoch": 0.8444487185651777, + "grad_norm": 1.8290019539468654, + "learning_rate": 2.988934165368265e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8781 + }, + { + "epoch": 0.8445448862816752, + "grad_norm": 1.4948167534497152, + "learning_rate": 2.985318264579809e-07, + "loss": 0.0921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8782 + }, + { + "epoch": 0.8446410539981728, + "grad_norm": 2.179433421354925, + "learning_rate": 2.9817044134195396e-07, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8783 + }, + { + "epoch": 0.8447372217146704, + "grad_norm": 2.149333155766359, + "learning_rate": 2.9780926122239206e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8784 + }, + { + "epoch": 0.8448333894311679, + "grad_norm": 1.7862786423267378, + "learning_rate": 2.974482861329217e-07, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8785 + }, + { + "epoch": 0.8449295571476655, + "grad_norm": 1.8783490430551355, + "learning_rate": 2.970875161071518e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8786 + }, + { + "epoch": 0.845025724864163, + "grad_norm": 2.2647417522611524, + "learning_rate": 2.967269511786694e-07, + "loss": 0.1022, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8787 + }, + { + "epoch": 0.8451218925806607, + "grad_norm": 1.892882666026898, + "learning_rate": 2.963665913810451e-07, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8788 + }, + { + "epoch": 0.8452180602971583, + "grad_norm": 1.801044445337494, + "learning_rate": 2.960064367478291e-07, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8789 + }, + { + "epoch": 0.8453142280136559, + "grad_norm": 1.7988203933566884, + "learning_rate": 2.956464873125528e-07, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8790 + }, + { + "epoch": 0.8454103957301534, + "grad_norm": 3.029658292724317, + "learning_rate": 2.9528674310872836e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8791 + }, + { + "epoch": 0.845506563446651, + "grad_norm": 1.6005920330331138, + "learning_rate": 2.9492720416985004e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8792 + }, + { + "epoch": 0.8456027311631485, + "grad_norm": 1.796953227856243, + "learning_rate": 2.9456787052939027e-07, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8793 + }, + { + "epoch": 0.8456988988796461, + "grad_norm": 2.2742345536865356, + "learning_rate": 2.942087422208051e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8794 + }, + { + "epoch": 0.8457950665961437, + "grad_norm": 3.047117520665539, + "learning_rate": 2.9384981927752964e-07, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8795 + }, + { + "epoch": 0.8458912343126412, + "grad_norm": 1.928816395617967, + "learning_rate": 2.934911017329814e-07, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8796 + }, + { + "epoch": 0.8459874020291388, + "grad_norm": 2.184217391833393, + "learning_rate": 2.931325896205581e-07, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8797 + }, + { + "epoch": 0.8460835697456364, + "grad_norm": 1.5982315252024542, + "learning_rate": 2.927742829736371e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8798 + }, + { + "epoch": 0.8461797374621339, + "grad_norm": 3.2810994957661697, + "learning_rate": 2.924161818255783e-07, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8799 + }, + { + "epoch": 0.8462759051786315, + "grad_norm": 1.6688711615381226, + "learning_rate": 2.9205828620972267e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8800 + }, + { + "epoch": 0.8463720728951291, + "grad_norm": 1.8686720799337835, + "learning_rate": 2.917005961593902e-07, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8801 + }, + { + "epoch": 0.8464682406116267, + "grad_norm": 2.7456668439772773, + "learning_rate": 2.9134311170788324e-07, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8802 + }, + { + "epoch": 0.8465644083281243, + "grad_norm": 1.681727984156908, + "learning_rate": 2.9098583288848484e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8803 + }, + { + "epoch": 0.8466605760446219, + "grad_norm": 3.46452959055623, + "learning_rate": 2.9062875973445814e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8804 + }, + { + "epoch": 0.8467567437611194, + "grad_norm": 1.7518241703193778, + "learning_rate": 2.902718922790479e-07, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8805 + }, + { + "epoch": 0.846852911477617, + "grad_norm": 1.6157837623219644, + "learning_rate": 2.899152305554795e-07, + "loss": 0.0942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8806 + }, + { + "epoch": 0.8469490791941146, + "grad_norm": 7.410148602618143, + "learning_rate": 2.8955877459695913e-07, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8807 + }, + { + "epoch": 0.8470452469106121, + "grad_norm": 1.7710289013524836, + "learning_rate": 2.892025244366736e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8808 + }, + { + "epoch": 0.8471414146271097, + "grad_norm": 2.337430706571143, + "learning_rate": 2.888464801077917e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8809 + }, + { + "epoch": 0.8472375823436072, + "grad_norm": 1.8791838309694724, + "learning_rate": 2.88490641643461e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8810 + }, + { + "epoch": 0.8473337500601048, + "grad_norm": 1.9114484076258842, + "learning_rate": 2.8813500907681174e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8811 + }, + { + "epoch": 0.8474299177766024, + "grad_norm": 3.091929065719641, + "learning_rate": 2.877795824409538e-07, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8812 + }, + { + "epoch": 0.8475260854930999, + "grad_norm": 1.54985176897117, + "learning_rate": 2.874243617689787e-07, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8813 + }, + { + "epoch": 0.8476222532095975, + "grad_norm": 1.7979975787237026, + "learning_rate": 2.8706934709395893e-07, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8814 + }, + { + "epoch": 0.8477184209260951, + "grad_norm": 2.070986472087709, + "learning_rate": 2.867145384489464e-07, + "loss": 0.1151, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8815 + }, + { + "epoch": 0.8478145886425927, + "grad_norm": 3.250514886652391, + "learning_rate": 2.8635993586697555e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8816 + }, + { + "epoch": 0.8479107563590903, + "grad_norm": 1.6501229623418696, + "learning_rate": 2.8600553938106013e-07, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8817 + }, + { + "epoch": 0.8480069240755879, + "grad_norm": 1.6377471947002593, + "learning_rate": 2.8565134902419607e-07, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8818 + }, + { + "epoch": 0.8481030917920854, + "grad_norm": 1.5964175636412954, + "learning_rate": 2.8529736482935946e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8819 + }, + { + "epoch": 0.848199259508583, + "grad_norm": 2.0967858698329143, + "learning_rate": 2.8494358682950725e-07, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8820 + }, + { + "epoch": 0.8482954272250806, + "grad_norm": 1.8188972157229735, + "learning_rate": 2.8459001505757673e-07, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8821 + }, + { + "epoch": 0.8483915949415781, + "grad_norm": 1.7208503248539064, + "learning_rate": 2.8423664954648626e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8822 + }, + { + "epoch": 0.8484877626580757, + "grad_norm": 1.6946487273448438, + "learning_rate": 2.8388349032913597e-07, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8823 + }, + { + "epoch": 0.8485839303745732, + "grad_norm": 1.9619879370586417, + "learning_rate": 2.8353053743840534e-07, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8824 + }, + { + "epoch": 0.8486800980910708, + "grad_norm": 2.1245087982167417, + "learning_rate": 2.8317779090715593e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8825 + }, + { + "epoch": 0.8487762658075684, + "grad_norm": 1.4631332063860083, + "learning_rate": 2.828252507682283e-07, + "loss": 0.0893, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8826 + }, + { + "epoch": 0.8488724335240659, + "grad_norm": 2.7878446457429975, + "learning_rate": 2.8247291705444575e-07, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8827 + }, + { + "epoch": 0.8489686012405635, + "grad_norm": 1.689678849103247, + "learning_rate": 2.821207897986114e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8828 + }, + { + "epoch": 0.8490647689570611, + "grad_norm": 1.560457604375745, + "learning_rate": 2.817688690335091e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8829 + }, + { + "epoch": 0.8491609366735587, + "grad_norm": 1.6654081472033548, + "learning_rate": 2.814171547919042e-07, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8830 + }, + { + "epoch": 0.8492571043900563, + "grad_norm": 1.483057294939043, + "learning_rate": 2.810656471065415e-07, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8831 + }, + { + "epoch": 0.8493532721065539, + "grad_norm": 2.24462386799487, + "learning_rate": 2.8071434601014774e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8832 + }, + { + "epoch": 0.8494494398230514, + "grad_norm": 2.195888559742795, + "learning_rate": 2.8036325153543043e-07, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8833 + }, + { + "epoch": 0.849545607539549, + "grad_norm": 1.7223919243077532, + "learning_rate": 2.8001236371507673e-07, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8834 + }, + { + "epoch": 0.8496417752560466, + "grad_norm": 1.89746360779488, + "learning_rate": 2.7966168258175527e-07, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8835 + }, + { + "epoch": 0.8497379429725441, + "grad_norm": 2.628414181095116, + "learning_rate": 2.793112081681157e-07, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8836 + }, + { + "epoch": 0.8498341106890417, + "grad_norm": 1.8514597400016763, + "learning_rate": 2.789609405067889e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8837 + }, + { + "epoch": 0.8499302784055393, + "grad_norm": 2.717959122645738, + "learning_rate": 2.7861087963038436e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8838 + }, + { + "epoch": 0.8500264461220368, + "grad_norm": 3.319769497445513, + "learning_rate": 2.782610255714943e-07, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8839 + }, + { + "epoch": 0.8501226138385344, + "grad_norm": 2.5936157507479725, + "learning_rate": 2.779113783626916e-07, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8840 + }, + { + "epoch": 0.8502187815550319, + "grad_norm": 1.9816980637788781, + "learning_rate": 2.775619380365288e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8841 + }, + { + "epoch": 0.8503149492715295, + "grad_norm": 1.7105394058023768, + "learning_rate": 2.7721270462554044e-07, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8842 + }, + { + "epoch": 0.8504111169880271, + "grad_norm": 2.417031171909955, + "learning_rate": 2.768636781622402e-07, + "loss": 0.1303, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8843 + }, + { + "epoch": 0.8505072847045247, + "grad_norm": 1.961580981418705, + "learning_rate": 2.765148586791239e-07, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8844 + }, + { + "epoch": 0.8506034524210223, + "grad_norm": 1.7306786840323165, + "learning_rate": 2.761662462086676e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8845 + }, + { + "epoch": 0.8506996201375199, + "grad_norm": 2.454269316886389, + "learning_rate": 2.7581784078332785e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8846 + }, + { + "epoch": 0.8507957878540174, + "grad_norm": 2.0222362836103445, + "learning_rate": 2.754696424355424e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8847 + }, + { + "epoch": 0.850891955570515, + "grad_norm": 1.80857064785159, + "learning_rate": 2.751216511977303e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8848 + }, + { + "epoch": 0.8509881232870126, + "grad_norm": 1.764422170767639, + "learning_rate": 2.74773867102289e-07, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8849 + }, + { + "epoch": 0.8510842910035101, + "grad_norm": 1.6566564530292918, + "learning_rate": 2.744262901815989e-07, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8850 + }, + { + "epoch": 0.8511804587200077, + "grad_norm": 1.8141447631577488, + "learning_rate": 2.7407892046802025e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8851 + }, + { + "epoch": 0.8512766264365053, + "grad_norm": 1.682137987643451, + "learning_rate": 2.737317579938942e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8852 + }, + { + "epoch": 0.8513727941530028, + "grad_norm": 2.006932824628336, + "learning_rate": 2.733848027915431e-07, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8853 + }, + { + "epoch": 0.8514689618695004, + "grad_norm": 2.6008138104409535, + "learning_rate": 2.7303805489326845e-07, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8854 + }, + { + "epoch": 0.851565129585998, + "grad_norm": 1.769394772502956, + "learning_rate": 2.726915143313538e-07, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8855 + }, + { + "epoch": 0.8516612973024955, + "grad_norm": 1.8919471066182119, + "learning_rate": 2.7234518113806316e-07, + "loss": 0.139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8856 + }, + { + "epoch": 0.8517574650189931, + "grad_norm": 1.779008676474255, + "learning_rate": 2.719990553456411e-07, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8857 + }, + { + "epoch": 0.8518536327354908, + "grad_norm": 1.7698359017156144, + "learning_rate": 2.716531369863129e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8858 + }, + { + "epoch": 0.8519498004519883, + "grad_norm": 2.4682307823774328, + "learning_rate": 2.7130742609228487e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8859 + }, + { + "epoch": 0.8520459681684859, + "grad_norm": 1.7343426824039465, + "learning_rate": 2.70961922695743e-07, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8860 + }, + { + "epoch": 0.8521421358849834, + "grad_norm": 1.8677560752783127, + "learning_rate": 2.706166268288549e-07, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8861 + }, + { + "epoch": 0.852238303601481, + "grad_norm": 1.8392508165536325, + "learning_rate": 2.7027153852376866e-07, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8862 + }, + { + "epoch": 0.8523344713179786, + "grad_norm": 1.7798181881087731, + "learning_rate": 2.699266578126125e-07, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8863 + }, + { + "epoch": 0.8524306390344761, + "grad_norm": 1.936668035550351, + "learning_rate": 2.695819847274972e-07, + "loss": 0.0977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8864 + }, + { + "epoch": 0.8525268067509737, + "grad_norm": 1.8955573987747443, + "learning_rate": 2.6923751930051087e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8865 + }, + { + "epoch": 0.8526229744674713, + "grad_norm": 1.893679831113533, + "learning_rate": 2.688932615637252e-07, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8866 + }, + { + "epoch": 0.8527191421839688, + "grad_norm": 1.9748130086385152, + "learning_rate": 2.6854921154919197e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8867 + }, + { + "epoch": 0.8528153099004664, + "grad_norm": 2.403542410508288, + "learning_rate": 2.682053692889422e-07, + "loss": 0.1366, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8868 + }, + { + "epoch": 0.852911477616964, + "grad_norm": 1.712275709558499, + "learning_rate": 2.6786173481498896e-07, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8869 + }, + { + "epoch": 0.8530076453334615, + "grad_norm": 2.7891036292294, + "learning_rate": 2.6751830815932606e-07, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8870 + }, + { + "epoch": 0.8531038130499591, + "grad_norm": 2.678782202465501, + "learning_rate": 2.6717508935392685e-07, + "loss": 0.144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8871 + }, + { + "epoch": 0.8531999807664568, + "grad_norm": 1.5983037372521232, + "learning_rate": 2.668320784307457e-07, + "loss": 0.087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8872 + }, + { + "epoch": 0.8532961484829543, + "grad_norm": 1.538799415313728, + "learning_rate": 2.6648927542171856e-07, + "loss": 0.0981, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8873 + }, + { + "epoch": 0.8533923161994519, + "grad_norm": 1.9222401804581322, + "learning_rate": 2.661466803587612e-07, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8874 + }, + { + "epoch": 0.8534884839159494, + "grad_norm": 2.4996489376186526, + "learning_rate": 2.6580429327377007e-07, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8875 + }, + { + "epoch": 0.853584651632447, + "grad_norm": 2.7613883290423047, + "learning_rate": 2.65462114198623e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8876 + }, + { + "epoch": 0.8536808193489446, + "grad_norm": 2.83724668308933, + "learning_rate": 2.651201431651765e-07, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8877 + }, + { + "epoch": 0.8537769870654421, + "grad_norm": 2.269992696986782, + "learning_rate": 2.6477838020526977e-07, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8878 + }, + { + "epoch": 0.8538731547819397, + "grad_norm": 2.1984859298103423, + "learning_rate": 2.644368253507218e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8879 + }, + { + "epoch": 0.8539693224984373, + "grad_norm": 1.6116986128262791, + "learning_rate": 2.6409547863333246e-07, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8880 + }, + { + "epoch": 0.8540654902149348, + "grad_norm": 2.593440454199798, + "learning_rate": 2.637543400848824e-07, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8881 + }, + { + "epoch": 0.8541616579314324, + "grad_norm": 2.127050091183305, + "learning_rate": 2.634134097371319e-07, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8882 + }, + { + "epoch": 0.85425782564793, + "grad_norm": 1.734814225834132, + "learning_rate": 2.630726876218226e-07, + "loss": 0.0965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8883 + }, + { + "epoch": 0.8543539933644275, + "grad_norm": 1.2832184846859243, + "learning_rate": 2.627321737706767e-07, + "loss": 0.0811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8884 + }, + { + "epoch": 0.8544501610809251, + "grad_norm": 1.5975378309020354, + "learning_rate": 2.6239186821539726e-07, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8885 + }, + { + "epoch": 0.8545463287974228, + "grad_norm": 1.773269489693768, + "learning_rate": 2.620517709876677e-07, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8886 + }, + { + "epoch": 0.8546424965139203, + "grad_norm": 1.8150536915517832, + "learning_rate": 2.617118821191522e-07, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8887 + }, + { + "epoch": 0.8547386642304179, + "grad_norm": 1.868164869692377, + "learning_rate": 2.613722016414944e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8888 + }, + { + "epoch": 0.8548348319469155, + "grad_norm": 1.4131956693761494, + "learning_rate": 2.610327295863205e-07, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8889 + }, + { + "epoch": 0.854930999663413, + "grad_norm": 1.7909626101394331, + "learning_rate": 2.606934659852356e-07, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8890 + }, + { + "epoch": 0.8550271673799106, + "grad_norm": 2.52462952649565, + "learning_rate": 2.603544108698264e-07, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8891 + }, + { + "epoch": 0.8551233350964081, + "grad_norm": 1.8555718083692945, + "learning_rate": 2.6001556427166064e-07, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8892 + }, + { + "epoch": 0.8552195028129057, + "grad_norm": 1.8719578661593579, + "learning_rate": 2.596769262222845e-07, + "loss": 0.0955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8893 + }, + { + "epoch": 0.8553156705294033, + "grad_norm": 1.6069087901735104, + "learning_rate": 2.593384967532267e-07, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8894 + }, + { + "epoch": 0.8554118382459008, + "grad_norm": 1.7765423845440018, + "learning_rate": 2.5900027589599587e-07, + "loss": 0.0981, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8895 + }, + { + "epoch": 0.8555080059623984, + "grad_norm": 2.158250064449395, + "learning_rate": 2.586622636820818e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8896 + }, + { + "epoch": 0.855604173678896, + "grad_norm": 1.782313101759291, + "learning_rate": 2.5832446014295436e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8897 + }, + { + "epoch": 0.8557003413953935, + "grad_norm": 1.7217912534592437, + "learning_rate": 2.5798686531006314e-07, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8898 + }, + { + "epoch": 0.8557965091118911, + "grad_norm": 1.8799849187064592, + "learning_rate": 2.5764947921483985e-07, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8899 + }, + { + "epoch": 0.8558926768283888, + "grad_norm": 4.487057505100274, + "learning_rate": 2.573123018886961e-07, + "loss": 0.1431, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8900 + }, + { + "epoch": 0.8559888445448863, + "grad_norm": 2.0411234219931815, + "learning_rate": 2.5697533336302374e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8901 + }, + { + "epoch": 0.8560850122613839, + "grad_norm": 2.0287911969540353, + "learning_rate": 2.566385736691954e-07, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8902 + }, + { + "epoch": 0.8561811799778815, + "grad_norm": 2.1800701042090003, + "learning_rate": 2.563020228385646e-07, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8903 + }, + { + "epoch": 0.856277347694379, + "grad_norm": 2.052037927559915, + "learning_rate": 2.5596568090246546e-07, + "loss": 0.0817, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8904 + }, + { + "epoch": 0.8563735154108766, + "grad_norm": 1.5350796828875666, + "learning_rate": 2.5562954789221164e-07, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8905 + }, + { + "epoch": 0.8564696831273741, + "grad_norm": 1.8281592526303736, + "learning_rate": 2.552936238390982e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8906 + }, + { + "epoch": 0.8565658508438717, + "grad_norm": 2.0698622700335085, + "learning_rate": 2.5495790877440086e-07, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8907 + }, + { + "epoch": 0.8566620185603693, + "grad_norm": 1.8076303865124912, + "learning_rate": 2.546224027293759e-07, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8908 + }, + { + "epoch": 0.8567581862768668, + "grad_norm": 1.6454925117765191, + "learning_rate": 2.542871057352597e-07, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8909 + }, + { + "epoch": 0.8568543539933644, + "grad_norm": 1.9956427968639354, + "learning_rate": 2.539520178232688e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8910 + }, + { + "epoch": 0.856950521709862, + "grad_norm": 3.267752716409462, + "learning_rate": 2.536171390246012e-07, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8911 + }, + { + "epoch": 0.8570466894263595, + "grad_norm": 2.3130370750033085, + "learning_rate": 2.5328246937043526e-07, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8912 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 1.3499502144691522, + "learning_rate": 2.529480088919292e-07, + "loss": 0.0827, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8913 + }, + { + "epoch": 0.8572390248593547, + "grad_norm": 1.82783736899163, + "learning_rate": 2.5261375762022257e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8914 + }, + { + "epoch": 0.8573351925758523, + "grad_norm": 2.191874915785301, + "learning_rate": 2.522797155864354e-07, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8915 + }, + { + "epoch": 0.8574313602923499, + "grad_norm": 2.010152370451512, + "learning_rate": 2.5194588282166724e-07, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8916 + }, + { + "epoch": 0.8575275280088475, + "grad_norm": 2.1773345763019925, + "learning_rate": 2.5161225935699893e-07, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8917 + }, + { + "epoch": 0.857623695725345, + "grad_norm": 2.0528133781838998, + "learning_rate": 2.512788452234921e-07, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8918 + }, + { + "epoch": 0.8577198634418426, + "grad_norm": 1.7055719754362895, + "learning_rate": 2.5094564045218845e-07, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8919 + }, + { + "epoch": 0.8578160311583402, + "grad_norm": 2.146820145813568, + "learning_rate": 2.5061264507411057e-07, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8920 + }, + { + "epoch": 0.8579121988748377, + "grad_norm": 2.0362700617966727, + "learning_rate": 2.502798591202604e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8921 + }, + { + "epoch": 0.8580083665913353, + "grad_norm": 1.797131745758757, + "learning_rate": 2.499472826216218e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8922 + }, + { + "epoch": 0.8581045343078328, + "grad_norm": 3.0023095134143554, + "learning_rate": 2.4961491560915847e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8923 + }, + { + "epoch": 0.8582007020243304, + "grad_norm": 1.7695011552475135, + "learning_rate": 2.4928275811381494e-07, + "loss": 0.0924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8924 + }, + { + "epoch": 0.858296869740828, + "grad_norm": 1.6563104214902915, + "learning_rate": 2.4895081016651555e-07, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8925 + }, + { + "epoch": 0.8583930374573255, + "grad_norm": 1.7996508692429403, + "learning_rate": 2.4861907179816654e-07, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8926 + }, + { + "epoch": 0.8584892051738231, + "grad_norm": 2.1266998150581795, + "learning_rate": 2.4828754303965225e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8927 + }, + { + "epoch": 0.8585853728903207, + "grad_norm": 2.9094095523582224, + "learning_rate": 2.4795622392184e-07, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8928 + }, + { + "epoch": 0.8586815406068183, + "grad_norm": 1.5629138411600507, + "learning_rate": 2.476251144755759e-07, + "loss": 0.0913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8929 + }, + { + "epoch": 0.8587777083233159, + "grad_norm": 1.658031529422498, + "learning_rate": 2.4729421473168784e-07, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8930 + }, + { + "epoch": 0.8588738760398135, + "grad_norm": 2.2489126604588194, + "learning_rate": 2.4696352472098286e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8931 + }, + { + "epoch": 0.858970043756311, + "grad_norm": 2.1152549312359894, + "learning_rate": 2.466330444742493e-07, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8932 + }, + { + "epoch": 0.8590662114728086, + "grad_norm": 1.725234470483113, + "learning_rate": 2.463027740222562e-07, + "loss": 0.1204, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8933 + }, + { + "epoch": 0.8591623791893062, + "grad_norm": 1.8530316769302086, + "learning_rate": 2.459727133957518e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8934 + }, + { + "epoch": 0.8592585469058037, + "grad_norm": 1.559933197961372, + "learning_rate": 2.456428626254664e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8935 + }, + { + "epoch": 0.8593547146223013, + "grad_norm": 1.6964394707014157, + "learning_rate": 2.4531322174210976e-07, + "loss": 0.0882, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8936 + }, + { + "epoch": 0.8594508823387988, + "grad_norm": 1.6211135566813972, + "learning_rate": 2.4498379077637277e-07, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8937 + }, + { + "epoch": 0.8595470500552964, + "grad_norm": 1.5586028243982044, + "learning_rate": 2.446545697589256e-07, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8938 + }, + { + "epoch": 0.859643217771794, + "grad_norm": 2.1717194850645987, + "learning_rate": 2.443255587204202e-07, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8939 + }, + { + "epoch": 0.8597393854882915, + "grad_norm": 1.9481847896940037, + "learning_rate": 2.4399675769148784e-07, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8940 + }, + { + "epoch": 0.8598355532047891, + "grad_norm": 1.5239837366750615, + "learning_rate": 2.4366816670274155e-07, + "loss": 0.0786, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8941 + }, + { + "epoch": 0.8599317209212867, + "grad_norm": 1.689105545762822, + "learning_rate": 2.433397857847739e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8942 + }, + { + "epoch": 0.8600278886377843, + "grad_norm": 1.7495689777335208, + "learning_rate": 2.43011614968158e-07, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8943 + }, + { + "epoch": 0.8601240563542819, + "grad_norm": 2.203427639552308, + "learning_rate": 2.4268365428344737e-07, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8944 + }, + { + "epoch": 0.8602202240707795, + "grad_norm": 1.814578510228293, + "learning_rate": 2.4235590376117586e-07, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8945 + }, + { + "epoch": 0.860316391787277, + "grad_norm": 1.8591332207233062, + "learning_rate": 2.4202836343185794e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8946 + }, + { + "epoch": 0.8604125595037746, + "grad_norm": 1.6113762191954004, + "learning_rate": 2.4170103332598893e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8947 + }, + { + "epoch": 0.8605087272202722, + "grad_norm": 1.9686085511229598, + "learning_rate": 2.4137391347404475e-07, + "loss": 0.0986, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8948 + }, + { + "epoch": 0.8606048949367697, + "grad_norm": 1.4736467629868133, + "learning_rate": 2.4104700390647966e-07, + "loss": 0.0835, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8949 + }, + { + "epoch": 0.8607010626532673, + "grad_norm": 2.445610201422776, + "learning_rate": 2.407203046537307e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8950 + }, + { + "epoch": 0.8607972303697649, + "grad_norm": 2.5925791912501084, + "learning_rate": 2.4039381574621434e-07, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8951 + }, + { + "epoch": 0.8608933980862624, + "grad_norm": 1.7577322579141836, + "learning_rate": 2.4006753721432794e-07, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8952 + }, + { + "epoch": 0.86098956580276, + "grad_norm": 2.555798931755068, + "learning_rate": 2.397414690884484e-07, + "loss": 0.1394, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8953 + }, + { + "epoch": 0.8610857335192575, + "grad_norm": 2.257377877932413, + "learning_rate": 2.3941561139893433e-07, + "loss": 0.1548, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8954 + }, + { + "epoch": 0.8611819012357551, + "grad_norm": 1.7788281630231362, + "learning_rate": 2.3908996417612296e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8955 + }, + { + "epoch": 0.8612780689522527, + "grad_norm": 2.2615510609281904, + "learning_rate": 2.387645274503336e-07, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8956 + }, + { + "epoch": 0.8613742366687503, + "grad_norm": 1.770639903378201, + "learning_rate": 2.384393012518654e-07, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8957 + }, + { + "epoch": 0.8614704043852479, + "grad_norm": 2.2207482970316486, + "learning_rate": 2.381142856109975e-07, + "loss": 0.1331, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8958 + }, + { + "epoch": 0.8615665721017455, + "grad_norm": 2.017655750765417, + "learning_rate": 2.3778948055798974e-07, + "loss": 0.1375, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8959 + }, + { + "epoch": 0.861662739818243, + "grad_norm": 1.3962898570593103, + "learning_rate": 2.37464886123083e-07, + "loss": 0.0927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8960 + }, + { + "epoch": 0.8617589075347406, + "grad_norm": 2.4900199532091722, + "learning_rate": 2.3714050233649727e-07, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8961 + }, + { + "epoch": 0.8618550752512382, + "grad_norm": 1.7550524007669093, + "learning_rate": 2.368163292284334e-07, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8962 + }, + { + "epoch": 0.8619512429677357, + "grad_norm": 1.8209887897306682, + "learning_rate": 2.3649236682907363e-07, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8963 + }, + { + "epoch": 0.8620474106842333, + "grad_norm": 1.4777292408355927, + "learning_rate": 2.3616861516857936e-07, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8964 + }, + { + "epoch": 0.8621435784007309, + "grad_norm": 1.5546009543993127, + "learning_rate": 2.3584507427709236e-07, + "loss": 0.0882, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8965 + }, + { + "epoch": 0.8622397461172284, + "grad_norm": 1.7157113367020087, + "learning_rate": 2.3552174418473572e-07, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8966 + }, + { + "epoch": 0.862335913833726, + "grad_norm": 1.8317773707628797, + "learning_rate": 2.351986249216126e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8967 + }, + { + "epoch": 0.8624320815502236, + "grad_norm": 2.182059162129141, + "learning_rate": 2.3487571651780534e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8968 + }, + { + "epoch": 0.8625282492667211, + "grad_norm": 1.7616234925057697, + "learning_rate": 2.345530190033782e-07, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8969 + }, + { + "epoch": 0.8626244169832187, + "grad_norm": 2.2293215794883836, + "learning_rate": 2.3423053240837518e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8970 + }, + { + "epoch": 0.8627205846997164, + "grad_norm": 1.3915157230056656, + "learning_rate": 2.339082567628212e-07, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8971 + }, + { + "epoch": 0.8628167524162139, + "grad_norm": 1.8353208913559365, + "learning_rate": 2.3358619209672e-07, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8972 + }, + { + "epoch": 0.8629129201327115, + "grad_norm": 1.516049860347971, + "learning_rate": 2.3326433844005736e-07, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8973 + }, + { + "epoch": 0.863009087849209, + "grad_norm": 2.2068075928936324, + "learning_rate": 2.3294269582279844e-07, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8974 + }, + { + "epoch": 0.8631052555657066, + "grad_norm": 1.8515997366610144, + "learning_rate": 2.326212642748893e-07, + "loss": 0.1452, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8975 + }, + { + "epoch": 0.8632014232822042, + "grad_norm": 2.7412479046695037, + "learning_rate": 2.3230004382625653e-07, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8976 + }, + { + "epoch": 0.8632975909987017, + "grad_norm": 2.0425817435143596, + "learning_rate": 2.319790345068057e-07, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8977 + }, + { + "epoch": 0.8633937587151993, + "grad_norm": 1.6708252639894852, + "learning_rate": 2.3165823634642399e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8978 + }, + { + "epoch": 0.8634899264316969, + "grad_norm": 1.8477075513302441, + "learning_rate": 2.3133764937497887e-07, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8979 + }, + { + "epoch": 0.8635860941481944, + "grad_norm": 2.4255584774797705, + "learning_rate": 2.3101727362231762e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8980 + }, + { + "epoch": 0.863682261864692, + "grad_norm": 1.5986990182310241, + "learning_rate": 2.3069710911826858e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8981 + }, + { + "epoch": 0.8637784295811896, + "grad_norm": 1.6819612342631765, + "learning_rate": 2.3037715589263988e-07, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8982 + }, + { + "epoch": 0.8638745972976871, + "grad_norm": 2.1526921923836904, + "learning_rate": 2.300574139752193e-07, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8983 + }, + { + "epoch": 0.8639707650141847, + "grad_norm": 2.1984309023251716, + "learning_rate": 2.2973788339577613e-07, + "loss": 0.0932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8984 + }, + { + "epoch": 0.8640669327306824, + "grad_norm": 1.6926807338175662, + "learning_rate": 2.294185641840596e-07, + "loss": 0.0768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8985 + }, + { + "epoch": 0.8641631004471799, + "grad_norm": 1.6580881029358843, + "learning_rate": 2.2909945636979957e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8986 + }, + { + "epoch": 0.8642592681636775, + "grad_norm": 1.8324514587618008, + "learning_rate": 2.2878055998270532e-07, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8987 + }, + { + "epoch": 0.864355435880175, + "grad_norm": 2.0592780760409224, + "learning_rate": 2.284618750524678e-07, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8988 + }, + { + "epoch": 0.8644516035966726, + "grad_norm": 1.649372259656697, + "learning_rate": 2.2814340160875636e-07, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8989 + }, + { + "epoch": 0.8645477713131702, + "grad_norm": 2.1030218985369378, + "learning_rate": 2.2782513968122227e-07, + "loss": 0.1428, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8990 + }, + { + "epoch": 0.8646439390296677, + "grad_norm": 2.928681931752096, + "learning_rate": 2.2750708929949656e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8991 + }, + { + "epoch": 0.8647401067461653, + "grad_norm": 2.7022388248667166, + "learning_rate": 2.271892504931905e-07, + "loss": 0.0943, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8992 + }, + { + "epoch": 0.8648362744626629, + "grad_norm": 2.271398893374928, + "learning_rate": 2.2687162329189687e-07, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8993 + }, + { + "epoch": 0.8649324421791604, + "grad_norm": 1.8418235837622543, + "learning_rate": 2.2655420772518587e-07, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8994 + }, + { + "epoch": 0.865028609895658, + "grad_norm": 2.1702428401295877, + "learning_rate": 2.2623700382261082e-07, + "loss": 0.1582, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8995 + }, + { + "epoch": 0.8651247776121556, + "grad_norm": 2.624271564512738, + "learning_rate": 2.2592001161370392e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8996 + }, + { + "epoch": 0.8652209453286531, + "grad_norm": 2.396623599171867, + "learning_rate": 2.2560323112797877e-07, + "loss": 0.1289, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8997 + }, + { + "epoch": 0.8653171130451507, + "grad_norm": 2.3382051085148463, + "learning_rate": 2.2528666239492735e-07, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8998 + }, + { + "epoch": 0.8654132807616484, + "grad_norm": 1.7451817125526465, + "learning_rate": 2.2497030544402389e-07, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 8999 + }, + { + "epoch": 0.8655094484781459, + "grad_norm": 1.9356703814201666, + "learning_rate": 2.2465416030472227e-07, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9000 + }, + { + "epoch": 0.8656056161946435, + "grad_norm": 2.9763450899625505, + "learning_rate": 2.2433822700645564e-07, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9001 + }, + { + "epoch": 0.865701783911141, + "grad_norm": 1.54179145093044, + "learning_rate": 2.2402250557863854e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9002 + }, + { + "epoch": 0.8657979516276386, + "grad_norm": 1.914391453757746, + "learning_rate": 2.2370699605066577e-07, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9003 + }, + { + "epoch": 0.8658941193441362, + "grad_norm": 2.384098678694859, + "learning_rate": 2.233916984519127e-07, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9004 + }, + { + "epoch": 0.8659902870606337, + "grad_norm": 1.6743741875526972, + "learning_rate": 2.2307661281173314e-07, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9005 + }, + { + "epoch": 0.8660864547771313, + "grad_norm": 1.460388144883019, + "learning_rate": 2.227617391594633e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9006 + }, + { + "epoch": 0.8661826224936289, + "grad_norm": 1.717806989534934, + "learning_rate": 2.2244707752441863e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9007 + }, + { + "epoch": 0.8662787902101264, + "grad_norm": 3.225642136001742, + "learning_rate": 2.2213262793589485e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9008 + }, + { + "epoch": 0.866374957926624, + "grad_norm": 1.5584719453624412, + "learning_rate": 2.21818390423168e-07, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9009 + }, + { + "epoch": 0.8664711256431216, + "grad_norm": 1.9975931502761564, + "learning_rate": 2.2150436501549549e-07, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9010 + }, + { + "epoch": 0.8665672933596191, + "grad_norm": 1.6438087480854398, + "learning_rate": 2.2119055174211256e-07, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9011 + }, + { + "epoch": 0.8666634610761167, + "grad_norm": 2.318594494575886, + "learning_rate": 2.208769506322367e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9012 + }, + { + "epoch": 0.8667596287926144, + "grad_norm": 2.5630186850907672, + "learning_rate": 2.2056356171506504e-07, + "loss": 0.1435, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9013 + }, + { + "epoch": 0.8668557965091119, + "grad_norm": 1.7011231810901946, + "learning_rate": 2.2025038501977485e-07, + "loss": 0.0954, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9014 + }, + { + "epoch": 0.8669519642256095, + "grad_norm": 1.4760425709517075, + "learning_rate": 2.1993742057552393e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9015 + }, + { + "epoch": 0.8670481319421071, + "grad_norm": 1.6953549547550304, + "learning_rate": 2.196246684114506e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9016 + }, + { + "epoch": 0.8671442996586046, + "grad_norm": 1.8347972420911978, + "learning_rate": 2.1931212855667184e-07, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9017 + }, + { + "epoch": 0.8672404673751022, + "grad_norm": 1.5976497148996083, + "learning_rate": 2.1899980104028662e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9018 + }, + { + "epoch": 0.8673366350915998, + "grad_norm": 1.4845265019990757, + "learning_rate": 2.1868768589137335e-07, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9019 + }, + { + "epoch": 0.8674328028080973, + "grad_norm": 2.036066571051807, + "learning_rate": 2.18375783138991e-07, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9020 + }, + { + "epoch": 0.8675289705245949, + "grad_norm": 1.6449721696537811, + "learning_rate": 2.1806409281217884e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9021 + }, + { + "epoch": 0.8676251382410924, + "grad_norm": 1.4305080428960923, + "learning_rate": 2.177526149399556e-07, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9022 + }, + { + "epoch": 0.86772130595759, + "grad_norm": 1.5686027727915315, + "learning_rate": 2.1744134955132084e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9023 + }, + { + "epoch": 0.8678174736740876, + "grad_norm": 1.763904869788997, + "learning_rate": 2.1713029667525422e-07, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9024 + }, + { + "epoch": 0.8679136413905851, + "grad_norm": 1.7659949389151166, + "learning_rate": 2.1681945634071555e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9025 + }, + { + "epoch": 0.8680098091070827, + "grad_norm": 2.650906749535828, + "learning_rate": 2.1650882857664535e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9026 + }, + { + "epoch": 0.8681059768235804, + "grad_norm": 1.9248768190647993, + "learning_rate": 2.1619841341196406e-07, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9027 + }, + { + "epoch": 0.8682021445400779, + "grad_norm": 1.575694578722179, + "learning_rate": 2.1588821087557137e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9028 + }, + { + "epoch": 0.8682983122565755, + "grad_norm": 1.74473235280544, + "learning_rate": 2.1557822099634834e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9029 + }, + { + "epoch": 0.8683944799730731, + "grad_norm": 2.28844399999497, + "learning_rate": 2.152684438031566e-07, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9030 + }, + { + "epoch": 0.8684906476895706, + "grad_norm": 1.7387083736724092, + "learning_rate": 2.149588793248361e-07, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9031 + }, + { + "epoch": 0.8685868154060682, + "grad_norm": 1.900759759873319, + "learning_rate": 2.1464952759020857e-07, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9032 + }, + { + "epoch": 0.8686829831225658, + "grad_norm": 1.7667024384146888, + "learning_rate": 2.143403886280762e-07, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9033 + }, + { + "epoch": 0.8687791508390633, + "grad_norm": 2.5530889763159634, + "learning_rate": 2.140314624672199e-07, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9034 + }, + { + "epoch": 0.8688753185555609, + "grad_norm": 2.229046417756623, + "learning_rate": 2.137227491364016e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9035 + }, + { + "epoch": 0.8689714862720584, + "grad_norm": 1.8781022666273397, + "learning_rate": 2.1341424866436366e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9036 + }, + { + "epoch": 0.869067653988556, + "grad_norm": 1.8082625518049513, + "learning_rate": 2.1310596107982835e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9037 + }, + { + "epoch": 0.8691638217050536, + "grad_norm": 1.943652316374687, + "learning_rate": 2.1279788641149822e-07, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9038 + }, + { + "epoch": 0.8692599894215511, + "grad_norm": 1.7227723701584925, + "learning_rate": 2.124900246880554e-07, + "loss": 0.0908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9039 + }, + { + "epoch": 0.8693561571380487, + "grad_norm": 1.4794980887031204, + "learning_rate": 2.1218237593816305e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9040 + }, + { + "epoch": 0.8694523248545464, + "grad_norm": 1.7063575706984773, + "learning_rate": 2.1187494019046378e-07, + "loss": 0.0916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9041 + }, + { + "epoch": 0.8695484925710439, + "grad_norm": 1.9852155591344904, + "learning_rate": 2.1156771747358113e-07, + "loss": 0.1461, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9042 + }, + { + "epoch": 0.8696446602875415, + "grad_norm": 1.7705505491275189, + "learning_rate": 2.1126070781611802e-07, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9043 + }, + { + "epoch": 0.8697408280040391, + "grad_norm": 2.859385649773007, + "learning_rate": 2.1095391124665882e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9044 + }, + { + "epoch": 0.8698369957205366, + "grad_norm": 1.810204054020983, + "learning_rate": 2.1064732779376568e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9045 + }, + { + "epoch": 0.8699331634370342, + "grad_norm": 1.709962401580981, + "learning_rate": 2.1034095748598327e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9046 + }, + { + "epoch": 0.8700293311535318, + "grad_norm": 1.563444943330771, + "learning_rate": 2.1003480035183515e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9047 + }, + { + "epoch": 0.8701254988700293, + "grad_norm": 2.9871972856077855, + "learning_rate": 2.0972885641982605e-07, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9048 + }, + { + "epoch": 0.8702216665865269, + "grad_norm": 2.3935494528489736, + "learning_rate": 2.094231257184398e-07, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9049 + }, + { + "epoch": 0.8703178343030245, + "grad_norm": 1.8553058837455172, + "learning_rate": 2.0911760827614035e-07, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9050 + }, + { + "epoch": 0.870414002019522, + "grad_norm": 1.831689196654731, + "learning_rate": 2.0881230412137272e-07, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9051 + }, + { + "epoch": 0.8705101697360196, + "grad_norm": 2.2224204536767393, + "learning_rate": 2.0850721328256162e-07, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9052 + }, + { + "epoch": 0.8706063374525171, + "grad_norm": 1.5911157877789723, + "learning_rate": 2.082023357881116e-07, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9053 + }, + { + "epoch": 0.8707025051690147, + "grad_norm": 2.310804792831662, + "learning_rate": 2.0789767166640772e-07, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9054 + }, + { + "epoch": 0.8707986728855124, + "grad_norm": 2.1441544439731923, + "learning_rate": 2.0759322094581536e-07, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9055 + }, + { + "epoch": 0.87089484060201, + "grad_norm": 1.6446953351970357, + "learning_rate": 2.0728898365467903e-07, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9056 + }, + { + "epoch": 0.8709910083185075, + "grad_norm": 2.002829595779606, + "learning_rate": 2.069849598213247e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9057 + }, + { + "epoch": 0.8710871760350051, + "grad_norm": 1.7601376663993098, + "learning_rate": 2.0668114947405727e-07, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9058 + }, + { + "epoch": 0.8711833437515026, + "grad_norm": 1.9247691142323256, + "learning_rate": 2.0637755264116293e-07, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9059 + }, + { + "epoch": 0.8712795114680002, + "grad_norm": 1.8157737811462824, + "learning_rate": 2.0607416935090746e-07, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9060 + }, + { + "epoch": 0.8713756791844978, + "grad_norm": 2.147858892539168, + "learning_rate": 2.05770999631536e-07, + "loss": 0.0861, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9061 + }, + { + "epoch": 0.8714718469009953, + "grad_norm": 1.837276654251589, + "learning_rate": 2.0546804351127464e-07, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9062 + }, + { + "epoch": 0.8715680146174929, + "grad_norm": 2.0416605137048127, + "learning_rate": 2.0516530101832993e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9063 + }, + { + "epoch": 0.8716641823339905, + "grad_norm": 1.5706314974479176, + "learning_rate": 2.0486277218088796e-07, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9064 + }, + { + "epoch": 0.871760350050488, + "grad_norm": 1.7902622266688888, + "learning_rate": 2.0456045702711452e-07, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9065 + }, + { + "epoch": 0.8718565177669856, + "grad_norm": 2.360310398558114, + "learning_rate": 2.0425835558515627e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9066 + }, + { + "epoch": 0.8719526854834831, + "grad_norm": 2.0554910566376696, + "learning_rate": 2.0395646788313987e-07, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9067 + }, + { + "epoch": 0.8720488531999807, + "grad_norm": 2.280677227927948, + "learning_rate": 2.0365479394917149e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9068 + }, + { + "epoch": 0.8721450209164784, + "grad_norm": 2.262377712376838, + "learning_rate": 2.0335333381133805e-07, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9069 + }, + { + "epoch": 0.872241188632976, + "grad_norm": 1.5759024370065386, + "learning_rate": 2.0305208749770656e-07, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9070 + }, + { + "epoch": 0.8723373563494735, + "grad_norm": 1.8757527364289592, + "learning_rate": 2.0275105503632347e-07, + "loss": 0.0931, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9071 + }, + { + "epoch": 0.8724335240659711, + "grad_norm": 1.7557623868256687, + "learning_rate": 2.024502364552164e-07, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9072 + }, + { + "epoch": 0.8725296917824686, + "grad_norm": 1.7225319062322486, + "learning_rate": 2.021496317823915e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9073 + }, + { + "epoch": 0.8726258594989662, + "grad_norm": 1.5290275361834569, + "learning_rate": 2.0184924104583615e-07, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9074 + }, + { + "epoch": 0.8727220272154638, + "grad_norm": 2.0664810674851504, + "learning_rate": 2.0154906427351794e-07, + "loss": 0.1264, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9075 + }, + { + "epoch": 0.8728181949319613, + "grad_norm": 1.5242202181189157, + "learning_rate": 2.01249101493384e-07, + "loss": 0.0895, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9076 + }, + { + "epoch": 0.8729143626484589, + "grad_norm": 1.8585445313922144, + "learning_rate": 2.0094935273336196e-07, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9077 + }, + { + "epoch": 0.8730105303649565, + "grad_norm": 1.7309044277154946, + "learning_rate": 2.006498180213587e-07, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9078 + }, + { + "epoch": 0.873106698081454, + "grad_norm": 2.146487994681904, + "learning_rate": 2.0035049738526213e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9079 + }, + { + "epoch": 0.8732028657979516, + "grad_norm": 1.7836274718493645, + "learning_rate": 2.0005139085293945e-07, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9080 + }, + { + "epoch": 0.8732990335144492, + "grad_norm": 1.8814763186089936, + "learning_rate": 1.9975249845223892e-07, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9081 + }, + { + "epoch": 0.8733952012309467, + "grad_norm": 2.1403612904139595, + "learning_rate": 1.9945382021098774e-07, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9082 + }, + { + "epoch": 0.8734913689474444, + "grad_norm": 1.6746107645058013, + "learning_rate": 1.9915535615699423e-07, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9083 + }, + { + "epoch": 0.873587536663942, + "grad_norm": 1.6375126908035367, + "learning_rate": 1.9885710631804555e-07, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9084 + }, + { + "epoch": 0.8736837043804395, + "grad_norm": 2.056776201666079, + "learning_rate": 1.9855907072190984e-07, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9085 + }, + { + "epoch": 0.8737798720969371, + "grad_norm": 2.493240251932319, + "learning_rate": 1.9826124939633511e-07, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9086 + }, + { + "epoch": 0.8738760398134346, + "grad_norm": 2.114303852019986, + "learning_rate": 1.9796364236904925e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9087 + }, + { + "epoch": 0.8739722075299322, + "grad_norm": 1.7713721823504205, + "learning_rate": 1.9766624966776088e-07, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9088 + }, + { + "epoch": 0.8740683752464298, + "grad_norm": 2.0597882669138805, + "learning_rate": 1.9736907132015732e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9089 + }, + { + "epoch": 0.8741645429629273, + "grad_norm": 2.0111414350811807, + "learning_rate": 1.9707210735390696e-07, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9090 + }, + { + "epoch": 0.8742607106794249, + "grad_norm": 1.5460086165813574, + "learning_rate": 1.9677535779665803e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9091 + }, + { + "epoch": 0.8743568783959225, + "grad_norm": 1.623342527073277, + "learning_rate": 1.9647882267603863e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9092 + }, + { + "epoch": 0.87445304611242, + "grad_norm": 1.7369936975103901, + "learning_rate": 1.9618250201965699e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9093 + }, + { + "epoch": 0.8745492138289176, + "grad_norm": 1.6649265510249551, + "learning_rate": 1.9588639585510217e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9094 + }, + { + "epoch": 0.8746453815454152, + "grad_norm": 2.1280830616472772, + "learning_rate": 1.9559050420994124e-07, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9095 + }, + { + "epoch": 0.8747415492619127, + "grad_norm": 1.8252659576975618, + "learning_rate": 1.9529482711172305e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9096 + }, + { + "epoch": 0.8748377169784104, + "grad_norm": 1.6727683021033892, + "learning_rate": 1.9499936458797664e-07, + "loss": 0.1047, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9097 + }, + { + "epoch": 0.874933884694908, + "grad_norm": 1.9415721899180052, + "learning_rate": 1.947041166662092e-07, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9098 + }, + { + "epoch": 0.8750300524114055, + "grad_norm": 1.5627246863810857, + "learning_rate": 1.9440908337390958e-07, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9099 + }, + { + "epoch": 0.8751262201279031, + "grad_norm": 1.5829554675099111, + "learning_rate": 1.941142647385469e-07, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9100 + }, + { + "epoch": 0.8752223878444007, + "grad_norm": 2.3072571135904156, + "learning_rate": 1.9381966078756865e-07, + "loss": 0.1265, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9101 + }, + { + "epoch": 0.8753185555608982, + "grad_norm": 1.8521219229153665, + "learning_rate": 1.9352527154840345e-07, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9102 + }, + { + "epoch": 0.8754147232773958, + "grad_norm": 3.3138018287088964, + "learning_rate": 1.932310970484602e-07, + "loss": 0.1327, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9103 + }, + { + "epoch": 0.8755108909938933, + "grad_norm": 1.9781044193656123, + "learning_rate": 1.9293713731512675e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9104 + }, + { + "epoch": 0.8756070587103909, + "grad_norm": 1.938616406449085, + "learning_rate": 1.9264339237577257e-07, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9105 + }, + { + "epoch": 0.8757032264268885, + "grad_norm": 2.270338801425473, + "learning_rate": 1.9234986225774522e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9106 + }, + { + "epoch": 0.875799394143386, + "grad_norm": 1.622235774732057, + "learning_rate": 1.920565469883734e-07, + "loss": 0.0895, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9107 + }, + { + "epoch": 0.8758955618598836, + "grad_norm": 1.9911396901783842, + "learning_rate": 1.9176344659496555e-07, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9108 + }, + { + "epoch": 0.8759917295763812, + "grad_norm": 2.0343695055506834, + "learning_rate": 1.9147056110481011e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9109 + }, + { + "epoch": 0.8760878972928787, + "grad_norm": 1.547925608492485, + "learning_rate": 1.9117789054517583e-07, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9110 + }, + { + "epoch": 0.8761840650093764, + "grad_norm": 1.6002847275945866, + "learning_rate": 1.9088543494331118e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9111 + }, + { + "epoch": 0.876280232725874, + "grad_norm": 1.5251009114935756, + "learning_rate": 1.9059319432644412e-07, + "loss": 0.0931, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9112 + }, + { + "epoch": 0.8763764004423715, + "grad_norm": 2.814897127309192, + "learning_rate": 1.9030116872178317e-07, + "loss": 0.0926, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9113 + }, + { + "epoch": 0.8764725681588691, + "grad_norm": 2.261106163222266, + "learning_rate": 1.9000935815651712e-07, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9114 + }, + { + "epoch": 0.8765687358753667, + "grad_norm": 1.8226901291081492, + "learning_rate": 1.8971776265781395e-07, + "loss": 0.1231, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9115 + }, + { + "epoch": 0.8766649035918642, + "grad_norm": 2.0277353844930532, + "learning_rate": 1.894263822528225e-07, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9116 + }, + { + "epoch": 0.8767610713083618, + "grad_norm": 2.00338304883155, + "learning_rate": 1.8913521696867055e-07, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9117 + }, + { + "epoch": 0.8768572390248593, + "grad_norm": 1.809427179821194, + "learning_rate": 1.8884426683246637e-07, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9118 + }, + { + "epoch": 0.8769534067413569, + "grad_norm": 1.6169513597010183, + "learning_rate": 1.885535318712986e-07, + "loss": 0.0934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9119 + }, + { + "epoch": 0.8770495744578545, + "grad_norm": 1.721073825251679, + "learning_rate": 1.882630121122353e-07, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9120 + }, + { + "epoch": 0.877145742174352, + "grad_norm": 1.61880970399572, + "learning_rate": 1.8797270758232484e-07, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9121 + }, + { + "epoch": 0.8772419098908496, + "grad_norm": 2.4415775894345204, + "learning_rate": 1.876826183085956e-07, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9122 + }, + { + "epoch": 0.8773380776073472, + "grad_norm": 2.2570294384691394, + "learning_rate": 1.8739274431805482e-07, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9123 + }, + { + "epoch": 0.8774342453238447, + "grad_norm": 1.7440341939723858, + "learning_rate": 1.8710308563769124e-07, + "loss": 0.1135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9124 + }, + { + "epoch": 0.8775304130403424, + "grad_norm": 1.8251968546161306, + "learning_rate": 1.8681364229447274e-07, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9125 + }, + { + "epoch": 0.87762658075684, + "grad_norm": 1.6310133968249942, + "learning_rate": 1.865244143153472e-07, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9126 + }, + { + "epoch": 0.8777227484733375, + "grad_norm": 1.6492379552414949, + "learning_rate": 1.862354017272433e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9127 + }, + { + "epoch": 0.8778189161898351, + "grad_norm": 1.861638416410547, + "learning_rate": 1.8594660455706764e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9128 + }, + { + "epoch": 0.8779150839063327, + "grad_norm": 1.8005756403023703, + "learning_rate": 1.856580228317087e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9129 + }, + { + "epoch": 0.8780112516228302, + "grad_norm": 2.3673538265474803, + "learning_rate": 1.8536965657803497e-07, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9130 + }, + { + "epoch": 0.8781074193393278, + "grad_norm": 1.9288837057804387, + "learning_rate": 1.8508150582289274e-07, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9131 + }, + { + "epoch": 0.8782035870558254, + "grad_norm": 2.2017432785716333, + "learning_rate": 1.8479357059311032e-07, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9132 + }, + { + "epoch": 0.8782997547723229, + "grad_norm": 1.455949564276466, + "learning_rate": 1.8450585091549567e-07, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9133 + }, + { + "epoch": 0.8783959224888205, + "grad_norm": 1.794767522529451, + "learning_rate": 1.842183468168357e-07, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9134 + }, + { + "epoch": 0.878492090205318, + "grad_norm": 1.6692364523470877, + "learning_rate": 1.8393105832389791e-07, + "loss": 0.0919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9135 + }, + { + "epoch": 0.8785882579218156, + "grad_norm": 1.9004670004799726, + "learning_rate": 1.8364398546342978e-07, + "loss": 0.0931, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9136 + }, + { + "epoch": 0.8786844256383132, + "grad_norm": 2.198063764803135, + "learning_rate": 1.833571282621585e-07, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9137 + }, + { + "epoch": 0.8787805933548107, + "grad_norm": 2.1585417573042887, + "learning_rate": 1.830704867467914e-07, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9138 + }, + { + "epoch": 0.8788767610713084, + "grad_norm": 2.061540235885432, + "learning_rate": 1.8278406094401624e-07, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9139 + }, + { + "epoch": 0.878972928787806, + "grad_norm": 1.9417430471691393, + "learning_rate": 1.8249785088049894e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9140 + }, + { + "epoch": 0.8790690965043035, + "grad_norm": 2.118696355119074, + "learning_rate": 1.8221185658288675e-07, + "loss": 0.1484, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9141 + }, + { + "epoch": 0.8791652642208011, + "grad_norm": 1.7709875650222708, + "learning_rate": 1.8192607807780675e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9142 + }, + { + "epoch": 0.8792614319372987, + "grad_norm": 1.3591368842862486, + "learning_rate": 1.8164051539186573e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9143 + }, + { + "epoch": 0.8793575996537962, + "grad_norm": 1.673706725502949, + "learning_rate": 1.813551685516507e-07, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9144 + }, + { + "epoch": 0.8794537673702938, + "grad_norm": 1.6011656102089482, + "learning_rate": 1.8107003758372738e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9145 + }, + { + "epoch": 0.8795499350867914, + "grad_norm": 2.462606752752686, + "learning_rate": 1.8078512251464285e-07, + "loss": 0.1091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9146 + }, + { + "epoch": 0.8796461028032889, + "grad_norm": 1.6280684110639108, + "learning_rate": 1.8050042337092344e-07, + "loss": 0.0972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9147 + }, + { + "epoch": 0.8797422705197865, + "grad_norm": 1.9049993305964816, + "learning_rate": 1.802159401790754e-07, + "loss": 0.0927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9148 + }, + { + "epoch": 0.879838438236284, + "grad_norm": 2.0140784156505838, + "learning_rate": 1.799316729655848e-07, + "loss": 0.1251, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9149 + }, + { + "epoch": 0.8799346059527816, + "grad_norm": 1.625864668769258, + "learning_rate": 1.796476217569182e-07, + "loss": 0.0977, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9150 + }, + { + "epoch": 0.8800307736692792, + "grad_norm": 1.7571726418658382, + "learning_rate": 1.7936378657952058e-07, + "loss": 0.1072, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9151 + }, + { + "epoch": 0.8801269413857767, + "grad_norm": 2.0326732316076295, + "learning_rate": 1.790801674598186e-07, + "loss": 0.0835, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9152 + }, + { + "epoch": 0.8802231091022744, + "grad_norm": 1.9580085498709598, + "learning_rate": 1.787967644242175e-07, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9153 + }, + { + "epoch": 0.880319276818772, + "grad_norm": 1.4793175240244838, + "learning_rate": 1.7851357749910342e-07, + "loss": 0.0894, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9154 + }, + { + "epoch": 0.8804154445352695, + "grad_norm": 1.634350205229791, + "learning_rate": 1.782306067108419e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9155 + }, + { + "epoch": 0.8805116122517671, + "grad_norm": 1.77100290322794, + "learning_rate": 1.7794785208577748e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9156 + }, + { + "epoch": 0.8806077799682647, + "grad_norm": 1.74000678906938, + "learning_rate": 1.7766531365023598e-07, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9157 + }, + { + "epoch": 0.8807039476847622, + "grad_norm": 1.79514949101054, + "learning_rate": 1.7738299143052224e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9158 + }, + { + "epoch": 0.8808001154012598, + "grad_norm": 1.6871826140862727, + "learning_rate": 1.771008854529216e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9159 + }, + { + "epoch": 0.8808962831177574, + "grad_norm": 1.8996986621378356, + "learning_rate": 1.7681899574369916e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9160 + }, + { + "epoch": 0.8809924508342549, + "grad_norm": 2.075694711799688, + "learning_rate": 1.7653732232909892e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9161 + }, + { + "epoch": 0.8810886185507525, + "grad_norm": 1.7179511336578137, + "learning_rate": 1.7625586523534578e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9162 + }, + { + "epoch": 0.88118478626725, + "grad_norm": 1.8659279060678662, + "learning_rate": 1.7597462448864454e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9163 + }, + { + "epoch": 0.8812809539837476, + "grad_norm": 1.6582200909579738, + "learning_rate": 1.756936001151785e-07, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9164 + }, + { + "epoch": 0.8813771217002452, + "grad_norm": 1.9849961190286871, + "learning_rate": 1.7541279214111277e-07, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9165 + }, + { + "epoch": 0.8814732894167427, + "grad_norm": 2.661137268700411, + "learning_rate": 1.7513220059259068e-07, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9166 + }, + { + "epoch": 0.8815694571332404, + "grad_norm": 1.9167028770394825, + "learning_rate": 1.7485182549573708e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9167 + }, + { + "epoch": 0.881665624849738, + "grad_norm": 1.8724237178969587, + "learning_rate": 1.745716668766545e-07, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9168 + }, + { + "epoch": 0.8817617925662355, + "grad_norm": 1.6300617791086736, + "learning_rate": 1.7429172476142702e-07, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9169 + }, + { + "epoch": 0.8818579602827331, + "grad_norm": 1.7101911714354576, + "learning_rate": 1.7401199917611827e-07, + "loss": 0.0928, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9170 + }, + { + "epoch": 0.8819541279992307, + "grad_norm": 1.7873198111327913, + "learning_rate": 1.7373249014677102e-07, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9171 + }, + { + "epoch": 0.8820502957157282, + "grad_norm": 1.7662159492110685, + "learning_rate": 1.7345319769940888e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9172 + }, + { + "epoch": 0.8821464634322258, + "grad_norm": 1.3112643302516231, + "learning_rate": 1.7317412186003414e-07, + "loss": 0.0878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9173 + }, + { + "epoch": 0.8822426311487234, + "grad_norm": 3.4244056321405703, + "learning_rate": 1.7289526265462985e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9174 + }, + { + "epoch": 0.8823387988652209, + "grad_norm": 2.6833025617100454, + "learning_rate": 1.7261662010915837e-07, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9175 + }, + { + "epoch": 0.8824349665817185, + "grad_norm": 1.740480165706791, + "learning_rate": 1.723381942495625e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9176 + }, + { + "epoch": 0.8825311342982161, + "grad_norm": 2.862192637636188, + "learning_rate": 1.7205998510176404e-07, + "loss": 0.164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9177 + }, + { + "epoch": 0.8826273020147136, + "grad_norm": 1.770184672030684, + "learning_rate": 1.7178199269166584e-07, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9178 + }, + { + "epoch": 0.8827234697312112, + "grad_norm": 2.5928327627449312, + "learning_rate": 1.7150421704514865e-07, + "loss": 0.1324, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9179 + }, + { + "epoch": 0.8828196374477087, + "grad_norm": 1.8449675141173154, + "learning_rate": 1.7122665818807478e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9180 + }, + { + "epoch": 0.8829158051642064, + "grad_norm": 1.5108984261474858, + "learning_rate": 1.7094931614628551e-07, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9181 + }, + { + "epoch": 0.883011972880704, + "grad_norm": 1.5567576748035394, + "learning_rate": 1.7067219094560244e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9182 + }, + { + "epoch": 0.8831081405972016, + "grad_norm": 1.6244860176208717, + "learning_rate": 1.703952826118266e-07, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9183 + }, + { + "epoch": 0.8832043083136991, + "grad_norm": 1.8529877704271578, + "learning_rate": 1.701185911707387e-07, + "loss": 0.1099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9184 + }, + { + "epoch": 0.8833004760301967, + "grad_norm": 1.7858760551605042, + "learning_rate": 1.6984211664809957e-07, + "loss": 0.1059, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9185 + }, + { + "epoch": 0.8833966437466942, + "grad_norm": 1.70404012911986, + "learning_rate": 1.6956585906965e-07, + "loss": 0.0846, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9186 + }, + { + "epoch": 0.8834928114631918, + "grad_norm": 1.6187518415650004, + "learning_rate": 1.6928981846110997e-07, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9187 + }, + { + "epoch": 0.8835889791796894, + "grad_norm": 1.9148152202909774, + "learning_rate": 1.6901399484818005e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9188 + }, + { + "epoch": 0.8836851468961869, + "grad_norm": 1.7066271874978625, + "learning_rate": 1.687383882565402e-07, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9189 + }, + { + "epoch": 0.8837813146126845, + "grad_norm": 2.047114582245536, + "learning_rate": 1.684629987118494e-07, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9190 + }, + { + "epoch": 0.8838774823291821, + "grad_norm": 1.7460075925671468, + "learning_rate": 1.6818782623974794e-07, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9191 + }, + { + "epoch": 0.8839736500456796, + "grad_norm": 1.5592441229013894, + "learning_rate": 1.679128708658548e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9192 + }, + { + "epoch": 0.8840698177621772, + "grad_norm": 1.7217821635507722, + "learning_rate": 1.6763813261576917e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9193 + }, + { + "epoch": 0.8841659854786748, + "grad_norm": 1.5458660848849937, + "learning_rate": 1.6736361151507063e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9194 + }, + { + "epoch": 0.8842621531951724, + "grad_norm": 1.7842126653468655, + "learning_rate": 1.6708930758931653e-07, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9195 + }, + { + "epoch": 0.88435832091167, + "grad_norm": 2.247651062573933, + "learning_rate": 1.668152208640461e-07, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9196 + }, + { + "epoch": 0.8844544886281676, + "grad_norm": 1.968637296460011, + "learning_rate": 1.665413513647779e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9197 + }, + { + "epoch": 0.8845506563446651, + "grad_norm": 1.675300656304699, + "learning_rate": 1.6626769911700925e-07, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9198 + }, + { + "epoch": 0.8846468240611627, + "grad_norm": 2.104229215867704, + "learning_rate": 1.659942641462181e-07, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9199 + }, + { + "epoch": 0.8847429917776602, + "grad_norm": 2.861198436574686, + "learning_rate": 1.6572104647786247e-07, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9200 + }, + { + "epoch": 0.8848391594941578, + "grad_norm": 1.5262828953371268, + "learning_rate": 1.6544804613737892e-07, + "loss": 0.0917, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9201 + }, + { + "epoch": 0.8849353272106554, + "grad_norm": 2.5121728159254055, + "learning_rate": 1.6517526315018518e-07, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9202 + }, + { + "epoch": 0.8850314949271529, + "grad_norm": 1.9356914146260167, + "learning_rate": 1.6490269754167759e-07, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9203 + }, + { + "epoch": 0.8851276626436505, + "grad_norm": 1.5848082831611372, + "learning_rate": 1.6463034933723336e-07, + "loss": 0.09, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9204 + }, + { + "epoch": 0.8852238303601481, + "grad_norm": 1.9886697854679949, + "learning_rate": 1.6435821856220834e-07, + "loss": 0.1199, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9205 + }, + { + "epoch": 0.8853199980766456, + "grad_norm": 1.800936782539668, + "learning_rate": 1.6408630524193948e-07, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9206 + }, + { + "epoch": 0.8854161657931432, + "grad_norm": 2.7349644987586594, + "learning_rate": 1.6381460940174148e-07, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9207 + }, + { + "epoch": 0.8855123335096408, + "grad_norm": 3.5690319806462156, + "learning_rate": 1.6354313106691083e-07, + "loss": 0.1608, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9208 + }, + { + "epoch": 0.8856085012261384, + "grad_norm": 1.624690647311773, + "learning_rate": 1.6327187026272255e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9209 + }, + { + "epoch": 0.885704668942636, + "grad_norm": 1.8114292221571207, + "learning_rate": 1.6300082701443204e-07, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9210 + }, + { + "epoch": 0.8858008366591336, + "grad_norm": 1.642405941084726, + "learning_rate": 1.6273000134727406e-07, + "loss": 0.0878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9211 + }, + { + "epoch": 0.8858970043756311, + "grad_norm": 1.482426868375074, + "learning_rate": 1.6245939328646322e-07, + "loss": 0.0874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9212 + }, + { + "epoch": 0.8859931720921287, + "grad_norm": 1.9857127335795248, + "learning_rate": 1.6218900285719375e-07, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9213 + }, + { + "epoch": 0.8860893398086263, + "grad_norm": 2.8846693240661616, + "learning_rate": 1.6191883008463972e-07, + "loss": 0.1495, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9214 + }, + { + "epoch": 0.8861855075251238, + "grad_norm": 1.9601466066692192, + "learning_rate": 1.6164887499395543e-07, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9215 + }, + { + "epoch": 0.8862816752416214, + "grad_norm": 1.8058498511380643, + "learning_rate": 1.6137913761027384e-07, + "loss": 0.1299, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9216 + }, + { + "epoch": 0.8863778429581189, + "grad_norm": 2.1745144111785613, + "learning_rate": 1.6110961795870906e-07, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9217 + }, + { + "epoch": 0.8864740106746165, + "grad_norm": 1.5913047501442663, + "learning_rate": 1.6084031606435347e-07, + "loss": 0.0764, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9218 + }, + { + "epoch": 0.8865701783911141, + "grad_norm": 1.9763202350663527, + "learning_rate": 1.6057123195227952e-07, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9219 + }, + { + "epoch": 0.8866663461076116, + "grad_norm": 1.874584780796543, + "learning_rate": 1.6030236564754054e-07, + "loss": 0.0897, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9220 + }, + { + "epoch": 0.8867625138241092, + "grad_norm": 1.8152645893750499, + "learning_rate": 1.6003371717516813e-07, + "loss": 0.0948, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9221 + }, + { + "epoch": 0.8868586815406068, + "grad_norm": 1.5166455282342337, + "learning_rate": 1.5976528656017453e-07, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9222 + }, + { + "epoch": 0.8869548492571044, + "grad_norm": 2.043678237873053, + "learning_rate": 1.594970738275517e-07, + "loss": 0.1241, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9223 + }, + { + "epoch": 0.887051016973602, + "grad_norm": 3.0498063571564713, + "learning_rate": 1.592290790022702e-07, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9224 + }, + { + "epoch": 0.8871471846900996, + "grad_norm": 1.766831937198869, + "learning_rate": 1.5896130210928117e-07, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9225 + }, + { + "epoch": 0.8872433524065971, + "grad_norm": 1.6750168245167865, + "learning_rate": 1.5869374317351584e-07, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9226 + }, + { + "epoch": 0.8873395201230947, + "grad_norm": 2.3057983568040976, + "learning_rate": 1.5842640221988504e-07, + "loss": 0.1274, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9227 + }, + { + "epoch": 0.8874356878395923, + "grad_norm": 1.7811651018329242, + "learning_rate": 1.5815927927327778e-07, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9228 + }, + { + "epoch": 0.8875318555560898, + "grad_norm": 1.6652056641007729, + "learning_rate": 1.5789237435856504e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9229 + }, + { + "epoch": 0.8876280232725874, + "grad_norm": 1.4623252810008176, + "learning_rate": 1.5762568750059604e-07, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9230 + }, + { + "epoch": 0.887724190989085, + "grad_norm": 2.3358403286093505, + "learning_rate": 1.5735921872419956e-07, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9231 + }, + { + "epoch": 0.8878203587055825, + "grad_norm": 1.9362809035990214, + "learning_rate": 1.5709296805418523e-07, + "loss": 0.1096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9232 + }, + { + "epoch": 0.8879165264220801, + "grad_norm": 2.19953526659108, + "learning_rate": 1.568269355153415e-07, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9233 + }, + { + "epoch": 0.8880126941385776, + "grad_norm": 2.3114714254818347, + "learning_rate": 1.565611211324372e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9234 + }, + { + "epoch": 0.8881088618550752, + "grad_norm": 2.180223763115088, + "learning_rate": 1.5629552493021948e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9235 + }, + { + "epoch": 0.8882050295715728, + "grad_norm": 1.7940052789713496, + "learning_rate": 1.5603014693341662e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9236 + }, + { + "epoch": 0.8883011972880704, + "grad_norm": 1.6413014567834874, + "learning_rate": 1.5576498716673605e-07, + "loss": 0.127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9237 + }, + { + "epoch": 0.888397365004568, + "grad_norm": 1.7464808198152297, + "learning_rate": 1.5550004565486472e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9238 + }, + { + "epoch": 0.8884935327210656, + "grad_norm": 1.852402231510663, + "learning_rate": 1.552353224224698e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9239 + }, + { + "epoch": 0.8885897004375631, + "grad_norm": 1.8279229921393618, + "learning_rate": 1.5497081749419745e-07, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9240 + }, + { + "epoch": 0.8886858681540607, + "grad_norm": 1.590419038698774, + "learning_rate": 1.547065308946738e-07, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9241 + }, + { + "epoch": 0.8887820358705583, + "grad_norm": 1.7525997625599916, + "learning_rate": 1.5444246264850442e-07, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9242 + }, + { + "epoch": 0.8888782035870558, + "grad_norm": 1.552150242711813, + "learning_rate": 1.5417861278027552e-07, + "loss": 0.087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9243 + }, + { + "epoch": 0.8889743713035534, + "grad_norm": 2.013256594906341, + "learning_rate": 1.5391498131455156e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9244 + }, + { + "epoch": 0.889070539020051, + "grad_norm": 1.535574457206513, + "learning_rate": 1.5365156827587823e-07, + "loss": 0.0835, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9245 + }, + { + "epoch": 0.8891667067365485, + "grad_norm": 1.9086043395817742, + "learning_rate": 1.5338837368877895e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9246 + }, + { + "epoch": 0.8892628744530461, + "grad_norm": 1.9420663190513778, + "learning_rate": 1.531253975777583e-07, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9247 + }, + { + "epoch": 0.8893590421695436, + "grad_norm": 1.7480570304780092, + "learning_rate": 1.5286263996730027e-07, + "loss": 0.1391, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9248 + }, + { + "epoch": 0.8894552098860412, + "grad_norm": 1.7188748367745215, + "learning_rate": 1.526001008818681e-07, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9249 + }, + { + "epoch": 0.8895513776025388, + "grad_norm": 3.124812196176488, + "learning_rate": 1.52337780345905e-07, + "loss": 0.136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9250 + }, + { + "epoch": 0.8896475453190364, + "grad_norm": 1.6610927854637763, + "learning_rate": 1.5207567838383424e-07, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9251 + }, + { + "epoch": 0.889743713035534, + "grad_norm": 2.2456579783523645, + "learning_rate": 1.5181379502005738e-07, + "loss": 0.1067, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9252 + }, + { + "epoch": 0.8898398807520316, + "grad_norm": 2.0742756927009887, + "learning_rate": 1.5155213027895688e-07, + "loss": 0.1486, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9253 + }, + { + "epoch": 0.8899360484685291, + "grad_norm": 1.628230244722497, + "learning_rate": 1.5129068418489434e-07, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9254 + }, + { + "epoch": 0.8900322161850267, + "grad_norm": 2.1514103383632786, + "learning_rate": 1.5102945676221142e-07, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9255 + }, + { + "epoch": 0.8901283839015243, + "grad_norm": 1.7001112063199015, + "learning_rate": 1.507684480352292e-07, + "loss": 0.1071, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9256 + }, + { + "epoch": 0.8902245516180218, + "grad_norm": 2.152695841830525, + "learning_rate": 1.5050765802824775e-07, + "loss": 0.0978, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9257 + }, + { + "epoch": 0.8903207193345194, + "grad_norm": 2.5734844482122203, + "learning_rate": 1.5024708676554756e-07, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9258 + }, + { + "epoch": 0.890416887051017, + "grad_norm": 1.7030315907736975, + "learning_rate": 1.4998673427138842e-07, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9259 + }, + { + "epoch": 0.8905130547675145, + "grad_norm": 1.8085997560988072, + "learning_rate": 1.497266005700107e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9260 + }, + { + "epoch": 0.8906092224840121, + "grad_norm": 1.9539882869977108, + "learning_rate": 1.494666856856325e-07, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9261 + }, + { + "epoch": 0.8907053902005097, + "grad_norm": 2.011285368312101, + "learning_rate": 1.4920698964245306e-07, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9262 + }, + { + "epoch": 0.8908015579170072, + "grad_norm": 1.8489778456670514, + "learning_rate": 1.4894751246465111e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9263 + }, + { + "epoch": 0.8908977256335048, + "grad_norm": 1.932376163929489, + "learning_rate": 1.4868825417638427e-07, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9264 + }, + { + "epoch": 0.8909938933500025, + "grad_norm": 2.5838649182327504, + "learning_rate": 1.4842921480179019e-07, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9265 + }, + { + "epoch": 0.8910900610665, + "grad_norm": 1.6215645852300296, + "learning_rate": 1.4817039436498621e-07, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9266 + }, + { + "epoch": 0.8911862287829976, + "grad_norm": 2.2904214068093833, + "learning_rate": 1.4791179289006973e-07, + "loss": 0.1483, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9267 + }, + { + "epoch": 0.8912823964994951, + "grad_norm": 1.8306059401137924, + "learning_rate": 1.476534104011168e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9268 + }, + { + "epoch": 0.8913785642159927, + "grad_norm": 1.3909994120408085, + "learning_rate": 1.4739524692218316e-07, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9269 + }, + { + "epoch": 0.8914747319324903, + "grad_norm": 1.5942213332397044, + "learning_rate": 1.471373024773054e-07, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9270 + }, + { + "epoch": 0.8915708996489878, + "grad_norm": 1.7621261637099344, + "learning_rate": 1.4687957709049822e-07, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9271 + }, + { + "epoch": 0.8916670673654854, + "grad_norm": 2.4581822519724414, + "learning_rate": 1.4662207078575685e-07, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9272 + }, + { + "epoch": 0.891763235081983, + "grad_norm": 1.8571382599727653, + "learning_rate": 1.463647835870563e-07, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9273 + }, + { + "epoch": 0.8918594027984805, + "grad_norm": 1.8716480687797263, + "learning_rate": 1.461077155183499e-07, + "loss": 0.1341, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9274 + }, + { + "epoch": 0.8919555705149781, + "grad_norm": 1.871059505003169, + "learning_rate": 1.4585086660357155e-07, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9275 + }, + { + "epoch": 0.8920517382314757, + "grad_norm": 1.7290237548216323, + "learning_rate": 1.4559423686663494e-07, + "loss": 0.116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9276 + }, + { + "epoch": 0.8921479059479732, + "grad_norm": 2.3590639761651873, + "learning_rate": 1.4533782633143285e-07, + "loss": 0.0895, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9277 + }, + { + "epoch": 0.8922440736644708, + "grad_norm": 1.8113344858463145, + "learning_rate": 1.4508163502183786e-07, + "loss": 0.081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9278 + }, + { + "epoch": 0.8923402413809685, + "grad_norm": 1.8327415607238038, + "learning_rate": 1.448256629617023e-07, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9279 + }, + { + "epoch": 0.892436409097466, + "grad_norm": 1.8038279274504376, + "learning_rate": 1.4456991017485737e-07, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9280 + }, + { + "epoch": 0.8925325768139636, + "grad_norm": 1.9580145784973855, + "learning_rate": 1.443143766851146e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9281 + }, + { + "epoch": 0.8926287445304611, + "grad_norm": 2.4356000834073246, + "learning_rate": 1.4405906251626496e-07, + "loss": 0.0924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9282 + }, + { + "epoch": 0.8927249122469587, + "grad_norm": 3.6281649977221493, + "learning_rate": 1.4380396769207883e-07, + "loss": 0.1374, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9283 + }, + { + "epoch": 0.8928210799634563, + "grad_norm": 1.429114849151928, + "learning_rate": 1.435490922363067e-07, + "loss": 0.0828, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9284 + }, + { + "epoch": 0.8929172476799538, + "grad_norm": 1.755514059279063, + "learning_rate": 1.4329443617267736e-07, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9285 + }, + { + "epoch": 0.8930134153964514, + "grad_norm": 1.7530653485747936, + "learning_rate": 1.4303999952490045e-07, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9286 + }, + { + "epoch": 0.893109583112949, + "grad_norm": 2.0593132597578796, + "learning_rate": 1.427857823166648e-07, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9287 + }, + { + "epoch": 0.8932057508294465, + "grad_norm": 1.8722159972807066, + "learning_rate": 1.425317845716384e-07, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9288 + }, + { + "epoch": 0.8933019185459441, + "grad_norm": 1.8677533186806037, + "learning_rate": 1.422780063134696e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9289 + }, + { + "epoch": 0.8933980862624417, + "grad_norm": 1.9592179530920466, + "learning_rate": 1.4202444756578588e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9290 + }, + { + "epoch": 0.8934942539789392, + "grad_norm": 2.3055060601190323, + "learning_rate": 1.417711083521936e-07, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9291 + }, + { + "epoch": 0.8935904216954368, + "grad_norm": 1.6507059890568807, + "learning_rate": 1.4151798869628003e-07, + "loss": 0.0694, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9292 + }, + { + "epoch": 0.8936865894119345, + "grad_norm": 1.835856825096212, + "learning_rate": 1.4126508862161076e-07, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9293 + }, + { + "epoch": 0.893782757128432, + "grad_norm": 1.57244379302634, + "learning_rate": 1.4101240815173223e-07, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9294 + }, + { + "epoch": 0.8938789248449296, + "grad_norm": 1.8361635877424907, + "learning_rate": 1.4075994731016895e-07, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9295 + }, + { + "epoch": 0.8939750925614272, + "grad_norm": 1.4469000376959038, + "learning_rate": 1.4050770612042603e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9296 + }, + { + "epoch": 0.8940712602779247, + "grad_norm": 1.626752973538991, + "learning_rate": 1.4025568460598826e-07, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9297 + }, + { + "epoch": 0.8941674279944223, + "grad_norm": 1.5721516913168472, + "learning_rate": 1.4000388279031857e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9298 + }, + { + "epoch": 0.8942635957109198, + "grad_norm": 1.5959349873193531, + "learning_rate": 1.3975230069686096e-07, + "loss": 0.0898, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9299 + }, + { + "epoch": 0.8943597634274174, + "grad_norm": 2.2922159710828, + "learning_rate": 1.3950093834903865e-07, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9300 + }, + { + "epoch": 0.894455931143915, + "grad_norm": 1.70214957222686, + "learning_rate": 1.3924979577025405e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9301 + }, + { + "epoch": 0.8945520988604125, + "grad_norm": 2.803203836280369, + "learning_rate": 1.3899887298388899e-07, + "loss": 0.1443, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9302 + }, + { + "epoch": 0.8946482665769101, + "grad_norm": 1.868378923350435, + "learning_rate": 1.3874817001330504e-07, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9303 + }, + { + "epoch": 0.8947444342934077, + "grad_norm": 1.4581001838414287, + "learning_rate": 1.3849768688184357e-07, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9304 + }, + { + "epoch": 0.8948406020099052, + "grad_norm": 1.774532148685867, + "learning_rate": 1.3824742361282533e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9305 + }, + { + "epoch": 0.8949367697264028, + "grad_norm": 2.9873359200377005, + "learning_rate": 1.3799738022955034e-07, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9306 + }, + { + "epoch": 0.8950329374429005, + "grad_norm": 1.7633804147364067, + "learning_rate": 1.3774755675529854e-07, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9307 + }, + { + "epoch": 0.895129105159398, + "grad_norm": 1.7300488127367597, + "learning_rate": 1.3749795321332887e-07, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9308 + }, + { + "epoch": 0.8952252728758956, + "grad_norm": 1.9718561928162277, + "learning_rate": 1.372485696268805e-07, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9309 + }, + { + "epoch": 0.8953214405923932, + "grad_norm": 1.382312256121986, + "learning_rate": 1.3699940601917123e-07, + "loss": 0.0816, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9310 + }, + { + "epoch": 0.8954176083088907, + "grad_norm": 1.8603163852420848, + "learning_rate": 1.3675046241339918e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9311 + }, + { + "epoch": 0.8955137760253883, + "grad_norm": 1.4551499877111231, + "learning_rate": 1.365017388327422e-07, + "loss": 0.0885, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9312 + }, + { + "epoch": 0.8956099437418858, + "grad_norm": 1.6037928379415076, + "learning_rate": 1.362532353003565e-07, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9313 + }, + { + "epoch": 0.8957061114583834, + "grad_norm": 2.24689855534029, + "learning_rate": 1.360049518393783e-07, + "loss": 0.0904, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9314 + }, + { + "epoch": 0.895802279174881, + "grad_norm": 1.6128977799724562, + "learning_rate": 1.3575688847292378e-07, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9315 + }, + { + "epoch": 0.8958984468913785, + "grad_norm": 2.54401443617787, + "learning_rate": 1.3550904522408842e-07, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9316 + }, + { + "epoch": 0.8959946146078761, + "grad_norm": 1.7194886872086443, + "learning_rate": 1.3526142211594706e-07, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9317 + }, + { + "epoch": 0.8960907823243737, + "grad_norm": 1.7736335318377694, + "learning_rate": 1.3501401917155433e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9318 + }, + { + "epoch": 0.8961869500408712, + "grad_norm": 2.038388184844838, + "learning_rate": 1.3476683641394372e-07, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9319 + }, + { + "epoch": 0.8962831177573688, + "grad_norm": 2.1038136108491323, + "learning_rate": 1.3451987386612852e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9320 + }, + { + "epoch": 0.8963792854738665, + "grad_norm": 1.910110378605448, + "learning_rate": 1.34273131551102e-07, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9321 + }, + { + "epoch": 0.896475453190364, + "grad_norm": 1.8230885919944706, + "learning_rate": 1.3402660949183661e-07, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9322 + }, + { + "epoch": 0.8965716209068616, + "grad_norm": 1.69731824489623, + "learning_rate": 1.3378030771128424e-07, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9323 + }, + { + "epoch": 0.8966677886233592, + "grad_norm": 1.884046813761118, + "learning_rate": 1.3353422623237606e-07, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9324 + }, + { + "epoch": 0.8967639563398567, + "grad_norm": 1.7169174996555665, + "learning_rate": 1.3328836507802256e-07, + "loss": 0.0856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9325 + }, + { + "epoch": 0.8968601240563543, + "grad_norm": 1.5007535241287886, + "learning_rate": 1.3304272427111493e-07, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9326 + }, + { + "epoch": 0.8969562917728519, + "grad_norm": 1.6522328401523312, + "learning_rate": 1.327973038345229e-07, + "loss": 0.1103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9327 + }, + { + "epoch": 0.8970524594893494, + "grad_norm": 1.5443406762045593, + "learning_rate": 1.3255210379109485e-07, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9328 + }, + { + "epoch": 0.897148627205847, + "grad_norm": 2.1263605243430117, + "learning_rate": 1.323071241636606e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9329 + }, + { + "epoch": 0.8972447949223445, + "grad_norm": 2.3744331671897703, + "learning_rate": 1.3206236497502829e-07, + "loss": 0.1543, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9330 + }, + { + "epoch": 0.8973409626388421, + "grad_norm": 1.9344190006088957, + "learning_rate": 1.3181782624798523e-07, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9331 + }, + { + "epoch": 0.8974371303553397, + "grad_norm": 1.7428321291501179, + "learning_rate": 1.3157350800529877e-07, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9332 + }, + { + "epoch": 0.8975332980718372, + "grad_norm": 1.5472463226833002, + "learning_rate": 1.3132941026971602e-07, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9333 + }, + { + "epoch": 0.8976294657883348, + "grad_norm": 1.53272015649029, + "learning_rate": 1.3108553306396265e-07, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9334 + }, + { + "epoch": 0.8977256335048325, + "grad_norm": 1.9836750782965569, + "learning_rate": 1.3084187641074497e-07, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9335 + }, + { + "epoch": 0.89782180122133, + "grad_norm": 1.6948575281925589, + "learning_rate": 1.3059844033274733e-07, + "loss": 0.0913, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9336 + }, + { + "epoch": 0.8979179689378276, + "grad_norm": 1.9165464885399714, + "learning_rate": 1.3035522485263464e-07, + "loss": 0.0874, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9337 + }, + { + "epoch": 0.8980141366543252, + "grad_norm": 1.7460154227138247, + "learning_rate": 1.30112229993051e-07, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9338 + }, + { + "epoch": 0.8981103043708227, + "grad_norm": 1.7490096608200743, + "learning_rate": 1.2986945577661975e-07, + "loss": 0.1155, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9339 + }, + { + "epoch": 0.8982064720873203, + "grad_norm": 2.172130592848484, + "learning_rate": 1.2962690222594444e-07, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9340 + }, + { + "epoch": 0.8983026398038179, + "grad_norm": 2.117511741972471, + "learning_rate": 1.2938456936360672e-07, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9341 + }, + { + "epoch": 0.8983988075203154, + "grad_norm": 1.6168449123141782, + "learning_rate": 1.2914245721216857e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9342 + }, + { + "epoch": 0.898494975236813, + "grad_norm": 2.0182724144767854, + "learning_rate": 1.2890056579417138e-07, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9343 + }, + { + "epoch": 0.8985911429533106, + "grad_norm": 2.048946329020221, + "learning_rate": 1.286588951321363e-07, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9344 + }, + { + "epoch": 0.8986873106698081, + "grad_norm": 2.240515526200922, + "learning_rate": 1.2841744524856285e-07, + "loss": 0.1234, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9345 + }, + { + "epoch": 0.8987834783863057, + "grad_norm": 2.2766239639177863, + "learning_rate": 1.2817621616593167e-07, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9346 + }, + { + "epoch": 0.8988796461028032, + "grad_norm": 1.6870974921533723, + "learning_rate": 1.2793520790670117e-07, + "loss": 0.0943, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9347 + }, + { + "epoch": 0.8989758138193008, + "grad_norm": 2.7786720754552774, + "learning_rate": 1.276944204933095e-07, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9348 + }, + { + "epoch": 0.8990719815357985, + "grad_norm": 1.9841679098706348, + "learning_rate": 1.274538539481754e-07, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9349 + }, + { + "epoch": 0.899168149252296, + "grad_norm": 1.7698941178740222, + "learning_rate": 1.2721350829369595e-07, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9350 + }, + { + "epoch": 0.8992643169687936, + "grad_norm": 1.7799637063061826, + "learning_rate": 1.2697338355224854e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9351 + }, + { + "epoch": 0.8993604846852912, + "grad_norm": 2.0930489772185177, + "learning_rate": 1.267334797461886e-07, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9352 + }, + { + "epoch": 0.8994566524017887, + "grad_norm": 1.6506008699692658, + "learning_rate": 1.264937968978522e-07, + "loss": 0.0907, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9353 + }, + { + "epoch": 0.8995528201182863, + "grad_norm": 1.8132142884476121, + "learning_rate": 1.2625433502955447e-07, + "loss": 0.1392, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9354 + }, + { + "epoch": 0.8996489878347839, + "grad_norm": 1.494636211378008, + "learning_rate": 1.2601509416358986e-07, + "loss": 0.0918, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9355 + }, + { + "epoch": 0.8997451555512814, + "grad_norm": 1.80823390199759, + "learning_rate": 1.2577607432223278e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9356 + }, + { + "epoch": 0.899841323267779, + "grad_norm": 2.331192330869194, + "learning_rate": 1.2553727552773648e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9357 + }, + { + "epoch": 0.8999374909842766, + "grad_norm": 1.678114921887167, + "learning_rate": 1.252986978023335e-07, + "loss": 0.0878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9358 + }, + { + "epoch": 0.9000336587007741, + "grad_norm": 1.7624485683299818, + "learning_rate": 1.2506034116823634e-07, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9359 + }, + { + "epoch": 0.9001298264172717, + "grad_norm": 2.1510453069344577, + "learning_rate": 1.2482220564763669e-07, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9360 + }, + { + "epoch": 0.9002259941337692, + "grad_norm": 2.16418042871089, + "learning_rate": 1.2458429126270544e-07, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9361 + }, + { + "epoch": 0.9003221618502668, + "grad_norm": 2.010268089605386, + "learning_rate": 1.243465980355932e-07, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9362 + }, + { + "epoch": 0.9004183295667645, + "grad_norm": 2.6006603233475025, + "learning_rate": 1.2410912598843e-07, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9363 + }, + { + "epoch": 0.900514497283262, + "grad_norm": 1.8460024033864537, + "learning_rate": 1.238718751433249e-07, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9364 + }, + { + "epoch": 0.9006106649997596, + "grad_norm": 1.4007335078897993, + "learning_rate": 1.2363484552236654e-07, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9365 + }, + { + "epoch": 0.9007068327162572, + "grad_norm": 1.7818344613309167, + "learning_rate": 1.2339803714762316e-07, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9366 + }, + { + "epoch": 0.9008030004327547, + "grad_norm": 1.7109077003950846, + "learning_rate": 1.231614500411424e-07, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9367 + }, + { + "epoch": 0.9008991681492523, + "grad_norm": 1.7887520231798424, + "learning_rate": 1.2292508422495158e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9368 + }, + { + "epoch": 0.9009953358657499, + "grad_norm": 1.6443037865371624, + "learning_rate": 1.2268893972105595e-07, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9369 + }, + { + "epoch": 0.9010915035822474, + "grad_norm": 1.3535550987316638, + "learning_rate": 1.2245301655144203e-07, + "loss": 0.0692, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9370 + }, + { + "epoch": 0.901187671298745, + "grad_norm": 1.5396090559723326, + "learning_rate": 1.222173147380748e-07, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9371 + }, + { + "epoch": 0.9012838390152426, + "grad_norm": 1.794088481711432, + "learning_rate": 1.219818343028986e-07, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9372 + }, + { + "epoch": 0.9013800067317401, + "grad_norm": 2.6545716410583426, + "learning_rate": 1.2174657526783733e-07, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9373 + }, + { + "epoch": 0.9014761744482377, + "grad_norm": 2.6533507189828796, + "learning_rate": 1.2151153765479456e-07, + "loss": 0.1266, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9374 + }, + { + "epoch": 0.9015723421647353, + "grad_norm": 2.4805654168185116, + "learning_rate": 1.212767214856525e-07, + "loss": 0.1479, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9375 + }, + { + "epoch": 0.9016685098812328, + "grad_norm": 1.7197157383539883, + "learning_rate": 1.210421267822734e-07, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9376 + }, + { + "epoch": 0.9017646775977305, + "grad_norm": 1.7124062438818248, + "learning_rate": 1.2080775356649866e-07, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9377 + }, + { + "epoch": 0.901860845314228, + "grad_norm": 2.04747059249426, + "learning_rate": 1.2057360186014916e-07, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9378 + }, + { + "epoch": 0.9019570130307256, + "grad_norm": 2.2336553961685044, + "learning_rate": 1.2033967168502525e-07, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9379 + }, + { + "epoch": 0.9020531807472232, + "grad_norm": 1.9645337191930072, + "learning_rate": 1.2010596306290588e-07, + "loss": 0.1514, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9380 + }, + { + "epoch": 0.9021493484637207, + "grad_norm": 2.1142767351699807, + "learning_rate": 1.1987247601555034e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9381 + }, + { + "epoch": 0.9022455161802183, + "grad_norm": 1.9665307929576068, + "learning_rate": 1.1963921056469706e-07, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9382 + }, + { + "epoch": 0.9023416838967159, + "grad_norm": 2.1906835522777945, + "learning_rate": 1.194061667320634e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9383 + }, + { + "epoch": 0.9024378516132134, + "grad_norm": 1.6821929187436222, + "learning_rate": 1.191733445393467e-07, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9384 + }, + { + "epoch": 0.902534019329711, + "grad_norm": 1.472271010039445, + "learning_rate": 1.1894074400822353e-07, + "loss": 0.091, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9385 + }, + { + "epoch": 0.9026301870462086, + "grad_norm": 1.950518495126589, + "learning_rate": 1.1870836516034878e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9386 + }, + { + "epoch": 0.9027263547627061, + "grad_norm": 1.6045694467476304, + "learning_rate": 1.184762080173582e-07, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9387 + }, + { + "epoch": 0.9028225224792037, + "grad_norm": 1.9137874757849709, + "learning_rate": 1.1824427260086618e-07, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9388 + }, + { + "epoch": 0.9029186901957013, + "grad_norm": 1.5540804965841917, + "learning_rate": 1.1801255893246683e-07, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9389 + }, + { + "epoch": 0.9030148579121988, + "grad_norm": 1.489073055173217, + "learning_rate": 1.1778106703373316e-07, + "loss": 0.0923, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9390 + }, + { + "epoch": 0.9031110256286965, + "grad_norm": 2.219246493527863, + "learning_rate": 1.175497969262171e-07, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9391 + }, + { + "epoch": 0.9032071933451941, + "grad_norm": 2.0333187947792903, + "learning_rate": 1.1731874863145143e-07, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9392 + }, + { + "epoch": 0.9033033610616916, + "grad_norm": 1.8087036286333722, + "learning_rate": 1.1708792217094672e-07, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9393 + }, + { + "epoch": 0.9033995287781892, + "grad_norm": 1.9173582709459285, + "learning_rate": 1.1685731756619439e-07, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9394 + }, + { + "epoch": 0.9034956964946868, + "grad_norm": 1.9672288650188836, + "learning_rate": 1.1662693483866339e-07, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9395 + }, + { + "epoch": 0.9035918642111843, + "grad_norm": 2.045313876752119, + "learning_rate": 1.1639677400980321e-07, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9396 + }, + { + "epoch": 0.9036880319276819, + "grad_norm": 2.5284693750478335, + "learning_rate": 1.1616683510104338e-07, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9397 + }, + { + "epoch": 0.9037841996441794, + "grad_norm": 2.190726099066811, + "learning_rate": 1.1593711813379066e-07, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9398 + }, + { + "epoch": 0.903880367360677, + "grad_norm": 1.5005350046720982, + "learning_rate": 1.1570762312943295e-07, + "loss": 0.0975, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9399 + }, + { + "epoch": 0.9039765350771746, + "grad_norm": 1.6644999654206878, + "learning_rate": 1.154783501093365e-07, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9400 + }, + { + "epoch": 0.9040727027936721, + "grad_norm": 1.721783517780511, + "learning_rate": 1.1524929909484784e-07, + "loss": 0.0968, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9401 + }, + { + "epoch": 0.9041688705101697, + "grad_norm": 1.899615935992443, + "learning_rate": 1.1502047010729212e-07, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9402 + }, + { + "epoch": 0.9042650382266673, + "grad_norm": 1.5411905082019064, + "learning_rate": 1.1479186316797342e-07, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9403 + }, + { + "epoch": 0.9043612059431648, + "grad_norm": 2.2296507345094394, + "learning_rate": 1.145634782981761e-07, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9404 + }, + { + "epoch": 0.9044573736596625, + "grad_norm": 2.4867949305606705, + "learning_rate": 1.1433531551916344e-07, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9405 + }, + { + "epoch": 0.9045535413761601, + "grad_norm": 1.7298124494162137, + "learning_rate": 1.1410737485217788e-07, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9406 + }, + { + "epoch": 0.9046497090926576, + "grad_norm": 1.73679326326659, + "learning_rate": 1.1387965631844189e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9407 + }, + { + "epoch": 0.9047458768091552, + "grad_norm": 2.209699330159749, + "learning_rate": 1.1365215993915573e-07, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9408 + }, + { + "epoch": 0.9048420445256528, + "grad_norm": 1.7449313616570639, + "learning_rate": 1.1342488573550054e-07, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9409 + }, + { + "epoch": 0.9049382122421503, + "grad_norm": 1.8951675102990988, + "learning_rate": 1.1319783372863601e-07, + "loss": 0.1113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9410 + }, + { + "epoch": 0.9050343799586479, + "grad_norm": 2.0698442442938716, + "learning_rate": 1.1297100393970139e-07, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9411 + }, + { + "epoch": 0.9051305476751454, + "grad_norm": 1.9081955685848846, + "learning_rate": 1.1274439638981532e-07, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9412 + }, + { + "epoch": 0.905226715391643, + "grad_norm": 1.9532907799515447, + "learning_rate": 1.1251801110007565e-07, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9413 + }, + { + "epoch": 0.9053228831081406, + "grad_norm": 2.2150518261973824, + "learning_rate": 1.1229184809155885e-07, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9414 + }, + { + "epoch": 0.9054190508246381, + "grad_norm": 2.254243417449479, + "learning_rate": 1.1206590738532169e-07, + "loss": 0.1057, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9415 + }, + { + "epoch": 0.9055152185411357, + "grad_norm": 1.7834405156406203, + "learning_rate": 1.1184018900240012e-07, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9416 + }, + { + "epoch": 0.9056113862576333, + "grad_norm": 1.582789417110845, + "learning_rate": 1.11614692963809e-07, + "loss": 0.0958, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9417 + }, + { + "epoch": 0.9057075539741308, + "grad_norm": 1.926697860958739, + "learning_rate": 1.1138941929054264e-07, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9418 + }, + { + "epoch": 0.9058037216906285, + "grad_norm": 2.8610780572714165, + "learning_rate": 1.1116436800357456e-07, + "loss": 0.132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9419 + }, + { + "epoch": 0.9058998894071261, + "grad_norm": 1.7989024772776907, + "learning_rate": 1.1093953912385769e-07, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9420 + }, + { + "epoch": 0.9059960571236236, + "grad_norm": 2.3433101288382425, + "learning_rate": 1.107149326723242e-07, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9421 + }, + { + "epoch": 0.9060922248401212, + "grad_norm": 1.571249343384735, + "learning_rate": 1.1049054866988568e-07, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9422 + }, + { + "epoch": 0.9061883925566188, + "grad_norm": 2.0439052619031233, + "learning_rate": 1.1026638713743265e-07, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9423 + }, + { + "epoch": 0.9062845602731163, + "grad_norm": 1.8440663410978446, + "learning_rate": 1.1004244809583591e-07, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9424 + }, + { + "epoch": 0.9063807279896139, + "grad_norm": 1.7270164486391315, + "learning_rate": 1.0981873156594381e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9425 + }, + { + "epoch": 0.9064768957061115, + "grad_norm": 1.6052426330142016, + "learning_rate": 1.0959523756858547e-07, + "loss": 0.0855, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9426 + }, + { + "epoch": 0.906573063422609, + "grad_norm": 1.5955266323477995, + "learning_rate": 1.0937196612456902e-07, + "loss": 0.097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9427 + }, + { + "epoch": 0.9066692311391066, + "grad_norm": 1.9428653262366438, + "learning_rate": 1.091489172546814e-07, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9428 + }, + { + "epoch": 0.9067653988556041, + "grad_norm": 1.6587965886056373, + "learning_rate": 1.0892609097968882e-07, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9429 + }, + { + "epoch": 0.9068615665721017, + "grad_norm": 1.6924676834126988, + "learning_rate": 1.0870348732033769e-07, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9430 + }, + { + "epoch": 0.9069577342885993, + "grad_norm": 2.7341965868616636, + "learning_rate": 1.0848110629735231e-07, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9431 + }, + { + "epoch": 0.9070539020050968, + "grad_norm": 1.6723971684550285, + "learning_rate": 1.0825894793143721e-07, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9432 + }, + { + "epoch": 0.9071500697215945, + "grad_norm": 1.9621095516713443, + "learning_rate": 1.0803701224327611e-07, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9433 + }, + { + "epoch": 0.9072462374380921, + "grad_norm": 1.6351044406972934, + "learning_rate": 1.0781529925353168e-07, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9434 + }, + { + "epoch": 0.9073424051545896, + "grad_norm": 1.8507206916538799, + "learning_rate": 1.0759380898284655e-07, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9435 + }, + { + "epoch": 0.9074385728710872, + "grad_norm": 1.6441908969377668, + "learning_rate": 1.0737254145184145e-07, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9436 + }, + { + "epoch": 0.9075347405875848, + "grad_norm": 1.9220394585921141, + "learning_rate": 1.0715149668111713e-07, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9437 + }, + { + "epoch": 0.9076309083040823, + "grad_norm": 1.7685287076443617, + "learning_rate": 1.0693067469125323e-07, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9438 + }, + { + "epoch": 0.9077270760205799, + "grad_norm": 1.580342671220384, + "learning_rate": 1.0671007550280943e-07, + "loss": 0.0982, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9439 + }, + { + "epoch": 0.9078232437370775, + "grad_norm": 1.452449265414379, + "learning_rate": 1.0648969913632401e-07, + "loss": 0.0773, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9440 + }, + { + "epoch": 0.907919411453575, + "grad_norm": 1.9249158660631034, + "learning_rate": 1.0626954561231473e-07, + "loss": 0.1212, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9441 + }, + { + "epoch": 0.9080155791700726, + "grad_norm": 1.8544308340385682, + "learning_rate": 1.0604961495127797e-07, + "loss": 0.1141, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9442 + }, + { + "epoch": 0.9081117468865701, + "grad_norm": 2.2259929601849895, + "learning_rate": 1.0582990717369012e-07, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9443 + }, + { + "epoch": 0.9082079146030677, + "grad_norm": 1.877187865650888, + "learning_rate": 1.0561042230000679e-07, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9444 + }, + { + "epoch": 0.9083040823195653, + "grad_norm": 2.1185517881669877, + "learning_rate": 1.0539116035066243e-07, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9445 + }, + { + "epoch": 0.9084002500360628, + "grad_norm": 1.834193984519045, + "learning_rate": 1.051721213460713e-07, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9446 + }, + { + "epoch": 0.9084964177525605, + "grad_norm": 1.7848527256768145, + "learning_rate": 1.0495330530662595e-07, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9447 + }, + { + "epoch": 0.9085925854690581, + "grad_norm": 2.6503193979206605, + "learning_rate": 1.0473471225269898e-07, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9448 + }, + { + "epoch": 0.9086887531855556, + "grad_norm": 1.817394186211026, + "learning_rate": 1.0451634220464246e-07, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9449 + }, + { + "epoch": 0.9087849209020532, + "grad_norm": 1.7500852848027895, + "learning_rate": 1.042981951827865e-07, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9450 + }, + { + "epoch": 0.9088810886185508, + "grad_norm": 1.4884611792197426, + "learning_rate": 1.0408027120744179e-07, + "loss": 0.0916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9451 + }, + { + "epoch": 0.9089772563350483, + "grad_norm": 2.0375691524961956, + "learning_rate": 1.0386257029889767e-07, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9452 + }, + { + "epoch": 0.9090734240515459, + "grad_norm": 1.5906947140665288, + "learning_rate": 1.0364509247742238e-07, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9453 + }, + { + "epoch": 0.9091695917680435, + "grad_norm": 2.1915059030819157, + "learning_rate": 1.034278377632636e-07, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9454 + }, + { + "epoch": 0.909265759484541, + "grad_norm": 1.8939570891186859, + "learning_rate": 1.0321080617664846e-07, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9455 + }, + { + "epoch": 0.9093619272010386, + "grad_norm": 2.2580647433881404, + "learning_rate": 1.0299399773778362e-07, + "loss": 0.1488, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9456 + }, + { + "epoch": 0.9094580949175362, + "grad_norm": 2.204829388616577, + "learning_rate": 1.027774124668543e-07, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9457 + }, + { + "epoch": 0.9095542626340337, + "grad_norm": 1.9599761595598781, + "learning_rate": 1.0256105038402492e-07, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9458 + }, + { + "epoch": 0.9096504303505313, + "grad_norm": 1.268449328204468, + "learning_rate": 1.0234491150943964e-07, + "loss": 0.0824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9459 + }, + { + "epoch": 0.9097465980670288, + "grad_norm": 1.8302456022380207, + "learning_rate": 1.0212899586322183e-07, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9460 + }, + { + "epoch": 0.9098427657835265, + "grad_norm": 2.0629418363018805, + "learning_rate": 1.0191330346547317e-07, + "loss": 0.1447, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9461 + }, + { + "epoch": 0.9099389335000241, + "grad_norm": 1.7296599275162545, + "learning_rate": 1.0169783433627568e-07, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9462 + }, + { + "epoch": 0.9100351012165216, + "grad_norm": 2.6439951987443178, + "learning_rate": 1.0148258849569048e-07, + "loss": 0.1433, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9463 + }, + { + "epoch": 0.9101312689330192, + "grad_norm": 2.0656808214195026, + "learning_rate": 1.0126756596375687e-07, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9464 + }, + { + "epoch": 0.9102274366495168, + "grad_norm": 2.6269104707903557, + "learning_rate": 1.0105276676049408e-07, + "loss": 0.1194, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9465 + }, + { + "epoch": 0.9103236043660143, + "grad_norm": 1.6066465271435428, + "learning_rate": 1.0083819090590086e-07, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9466 + }, + { + "epoch": 0.9104197720825119, + "grad_norm": 2.850997171306238, + "learning_rate": 1.0062383841995483e-07, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9467 + }, + { + "epoch": 0.9105159397990095, + "grad_norm": 1.4209003048958977, + "learning_rate": 1.0040970932261279e-07, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9468 + }, + { + "epoch": 0.910612107515507, + "grad_norm": 1.7083340727526721, + "learning_rate": 1.0019580363381077e-07, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9469 + }, + { + "epoch": 0.9107082752320046, + "grad_norm": 2.014142981008924, + "learning_rate": 9.998212137346363e-08, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9470 + }, + { + "epoch": 0.9108044429485022, + "grad_norm": 1.8128742872981083, + "learning_rate": 9.976866256146606e-08, + "loss": 0.1364, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9471 + }, + { + "epoch": 0.9109006106649997, + "grad_norm": 1.7301769818650925, + "learning_rate": 9.955542721769156e-08, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9472 + }, + { + "epoch": 0.9109967783814973, + "grad_norm": 1.8112882426704047, + "learning_rate": 9.934241536199291e-08, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9473 + }, + { + "epoch": 0.9110929460979948, + "grad_norm": 1.6155364872027917, + "learning_rate": 9.912962701420281e-08, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9474 + }, + { + "epoch": 0.9111891138144925, + "grad_norm": 1.7981805726924431, + "learning_rate": 9.891706219413128e-08, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9475 + }, + { + "epoch": 0.9112852815309901, + "grad_norm": 1.7628388698107895, + "learning_rate": 9.870472092156941e-08, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9476 + }, + { + "epoch": 0.9113814492474877, + "grad_norm": 1.9133253743129932, + "learning_rate": 9.849260321628667e-08, + "loss": 0.1439, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9477 + }, + { + "epoch": 0.9114776169639852, + "grad_norm": 1.6758407431522995, + "learning_rate": 9.828070909803145e-08, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9478 + }, + { + "epoch": 0.9115737846804828, + "grad_norm": 2.6320723121035656, + "learning_rate": 9.806903858653238e-08, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9479 + }, + { + "epoch": 0.9116699523969803, + "grad_norm": 2.408012314553719, + "learning_rate": 9.785759170149622e-08, + "loss": 0.1416, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9480 + }, + { + "epoch": 0.9117661201134779, + "grad_norm": 1.6580603779195904, + "learning_rate": 9.764636846260916e-08, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9481 + }, + { + "epoch": 0.9118622878299755, + "grad_norm": 1.63681213925593, + "learning_rate": 9.743536888953658e-08, + "loss": 0.0934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9482 + }, + { + "epoch": 0.911958455546473, + "grad_norm": 2.160678266670368, + "learning_rate": 9.722459300192333e-08, + "loss": 0.0955, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9483 + }, + { + "epoch": 0.9120546232629706, + "grad_norm": 2.678165102082058, + "learning_rate": 9.701404081939313e-08, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9484 + }, + { + "epoch": 0.9121507909794682, + "grad_norm": 1.6792099905959679, + "learning_rate": 9.680371236154896e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9485 + }, + { + "epoch": 0.9122469586959657, + "grad_norm": 1.8733777388727109, + "learning_rate": 9.659360764797343e-08, + "loss": 0.1061, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9486 + }, + { + "epoch": 0.9123431264124633, + "grad_norm": 2.0566361622266314, + "learning_rate": 9.638372669822732e-08, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9487 + }, + { + "epoch": 0.9124392941289609, + "grad_norm": 2.142186300558693, + "learning_rate": 9.617406953185138e-08, + "loss": 0.1273, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9488 + }, + { + "epoch": 0.9125354618454585, + "grad_norm": 2.0001466683623694, + "learning_rate": 9.596463616836499e-08, + "loss": 0.1279, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9489 + }, + { + "epoch": 0.9126316295619561, + "grad_norm": 1.8915538501386278, + "learning_rate": 9.575542662726756e-08, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9490 + }, + { + "epoch": 0.9127277972784537, + "grad_norm": 2.26324348568221, + "learning_rate": 9.554644092803656e-08, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9491 + }, + { + "epoch": 0.9128239649949512, + "grad_norm": 1.7730157552321202, + "learning_rate": 9.533767909012919e-08, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9492 + }, + { + "epoch": 0.9129201327114488, + "grad_norm": 1.3581449350693657, + "learning_rate": 9.512914113298188e-08, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9493 + }, + { + "epoch": 0.9130163004279463, + "grad_norm": 2.2518688628960337, + "learning_rate": 9.492082707601047e-08, + "loss": 0.1666, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9494 + }, + { + "epoch": 0.9131124681444439, + "grad_norm": 1.6169523102902197, + "learning_rate": 9.471273693860889e-08, + "loss": 0.1229, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9495 + }, + { + "epoch": 0.9132086358609415, + "grad_norm": 2.2680504124196585, + "learning_rate": 9.450487074015108e-08, + "loss": 0.1312, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9496 + }, + { + "epoch": 0.913304803577439, + "grad_norm": 1.8629400677965409, + "learning_rate": 9.429722849999045e-08, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9497 + }, + { + "epoch": 0.9134009712939366, + "grad_norm": 2.213938954377453, + "learning_rate": 9.40898102374585e-08, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9498 + }, + { + "epoch": 0.9134971390104342, + "grad_norm": 1.7423530684137354, + "learning_rate": 9.38826159718667e-08, + "loss": 0.1284, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9499 + }, + { + "epoch": 0.9135933067269317, + "grad_norm": 2.611271888273708, + "learning_rate": 9.36756457225052e-08, + "loss": 0.1346, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9500 + }, + { + "epoch": 0.9136894744434293, + "grad_norm": 2.155941752539458, + "learning_rate": 9.346889950864385e-08, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9501 + }, + { + "epoch": 0.9137856421599269, + "grad_norm": 1.590897988763299, + "learning_rate": 9.326237734953142e-08, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9502 + }, + { + "epoch": 0.9138818098764245, + "grad_norm": 1.9730691841289882, + "learning_rate": 9.305607926439503e-08, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9503 + }, + { + "epoch": 0.9139779775929221, + "grad_norm": 1.8260832635960642, + "learning_rate": 9.285000527244181e-08, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9504 + }, + { + "epoch": 0.9140741453094197, + "grad_norm": 1.7556423043700602, + "learning_rate": 9.264415539285837e-08, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9505 + }, + { + "epoch": 0.9141703130259172, + "grad_norm": 1.7110698741164303, + "learning_rate": 9.243852964480937e-08, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9506 + }, + { + "epoch": 0.9142664807424148, + "grad_norm": 1.6825681168110271, + "learning_rate": 9.223312804743922e-08, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9507 + }, + { + "epoch": 0.9143626484589124, + "grad_norm": 1.6839782941745816, + "learning_rate": 9.202795061987208e-08, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9508 + }, + { + "epoch": 0.9144588161754099, + "grad_norm": 1.7388322414019726, + "learning_rate": 9.182299738120931e-08, + "loss": 0.1249, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9509 + }, + { + "epoch": 0.9145549838919075, + "grad_norm": 1.9145130676302144, + "learning_rate": 9.161826835053344e-08, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9510 + }, + { + "epoch": 0.914651151608405, + "grad_norm": 1.246591425231309, + "learning_rate": 9.141376354690506e-08, + "loss": 0.0701, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9511 + }, + { + "epoch": 0.9147473193249026, + "grad_norm": 1.6761762767025226, + "learning_rate": 9.120948298936422e-08, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9512 + }, + { + "epoch": 0.9148434870414002, + "grad_norm": 1.8023364878556614, + "learning_rate": 9.100542669692985e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9513 + }, + { + "epoch": 0.9149396547578977, + "grad_norm": 1.7894425459689272, + "learning_rate": 9.080159468860094e-08, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9514 + }, + { + "epoch": 0.9150358224743953, + "grad_norm": 2.1920325099442595, + "learning_rate": 9.059798698335343e-08, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9515 + }, + { + "epoch": 0.9151319901908929, + "grad_norm": 1.7327037485101264, + "learning_rate": 9.03946036001449e-08, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9516 + }, + { + "epoch": 0.9152281579073905, + "grad_norm": 2.8151482242353025, + "learning_rate": 9.019144455791024e-08, + "loss": 0.0915, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9517 + }, + { + "epoch": 0.9153243256238881, + "grad_norm": 2.045604893755879, + "learning_rate": 8.998850987556457e-08, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9518 + }, + { + "epoch": 0.9154204933403857, + "grad_norm": 1.480975461243748, + "learning_rate": 8.978579957200167e-08, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9519 + }, + { + "epoch": 0.9155166610568832, + "grad_norm": 1.8577199540120444, + "learning_rate": 8.958331366609424e-08, + "loss": 0.1075, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9520 + }, + { + "epoch": 0.9156128287733808, + "grad_norm": 1.7696140613397537, + "learning_rate": 8.938105217669413e-08, + "loss": 0.1253, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9521 + }, + { + "epoch": 0.9157089964898784, + "grad_norm": 1.8453356250423139, + "learning_rate": 8.917901512263238e-08, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9522 + }, + { + "epoch": 0.9158051642063759, + "grad_norm": 2.778161277706855, + "learning_rate": 8.89772025227198e-08, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9523 + }, + { + "epoch": 0.9159013319228735, + "grad_norm": 1.4802996276580382, + "learning_rate": 8.87756143957455e-08, + "loss": 0.0889, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9524 + }, + { + "epoch": 0.915997499639371, + "grad_norm": 2.3382880251511957, + "learning_rate": 8.857425076047754e-08, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9525 + }, + { + "epoch": 0.9160936673558686, + "grad_norm": 1.6505512295594615, + "learning_rate": 8.83731116356637e-08, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9526 + }, + { + "epoch": 0.9161898350723662, + "grad_norm": 1.313497046767053, + "learning_rate": 8.817219704003066e-08, + "loss": 0.0855, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9527 + }, + { + "epoch": 0.9162860027888637, + "grad_norm": 2.2908813015190037, + "learning_rate": 8.797150699228374e-08, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9528 + }, + { + "epoch": 0.9163821705053613, + "grad_norm": 2.3171831066988777, + "learning_rate": 8.777104151110827e-08, + "loss": 0.1401, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9529 + }, + { + "epoch": 0.9164783382218589, + "grad_norm": 1.5939070696014863, + "learning_rate": 8.757080061516793e-08, + "loss": 0.0942, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9530 + }, + { + "epoch": 0.9165745059383565, + "grad_norm": 1.8031888476425313, + "learning_rate": 8.73707843231053e-08, + "loss": 0.1111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9531 + }, + { + "epoch": 0.9166706736548541, + "grad_norm": 1.8860633897789294, + "learning_rate": 8.7170992653543e-08, + "loss": 0.1189, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9532 + }, + { + "epoch": 0.9167668413713517, + "grad_norm": 1.7027550624573402, + "learning_rate": 8.697142562508199e-08, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9533 + }, + { + "epoch": 0.9168630090878492, + "grad_norm": 1.6655641799035024, + "learning_rate": 8.677208325630265e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9534 + }, + { + "epoch": 0.9169591768043468, + "grad_norm": 2.0264650977849743, + "learning_rate": 8.657296556576406e-08, + "loss": 0.1083, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9535 + }, + { + "epoch": 0.9170553445208444, + "grad_norm": 2.086973361158293, + "learning_rate": 8.637407257200498e-08, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9536 + }, + { + "epoch": 0.9171515122373419, + "grad_norm": 1.7632767286910465, + "learning_rate": 8.617540429354226e-08, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9537 + }, + { + "epoch": 0.9172476799538395, + "grad_norm": 1.5128589692947447, + "learning_rate": 8.597696074887335e-08, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9538 + }, + { + "epoch": 0.917343847670337, + "grad_norm": 2.146976199444127, + "learning_rate": 8.577874195647318e-08, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9539 + }, + { + "epoch": 0.9174400153868346, + "grad_norm": 2.1391328405921763, + "learning_rate": 8.55807479347967e-08, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9540 + }, + { + "epoch": 0.9175361831033322, + "grad_norm": 2.088056895852012, + "learning_rate": 8.53829787022778e-08, + "loss": 0.1272, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9541 + }, + { + "epoch": 0.9176323508198297, + "grad_norm": 1.6964467370558078, + "learning_rate": 8.518543427732951e-08, + "loss": 0.1196, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9542 + }, + { + "epoch": 0.9177285185363273, + "grad_norm": 1.9309987721084512, + "learning_rate": 8.498811467834322e-08, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9543 + }, + { + "epoch": 0.9178246862528249, + "grad_norm": 1.611769976476202, + "learning_rate": 8.479101992369038e-08, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9544 + }, + { + "epoch": 0.9179208539693225, + "grad_norm": 2.7779349129952693, + "learning_rate": 8.4594150031721e-08, + "loss": 0.1208, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9545 + }, + { + "epoch": 0.9180170216858201, + "grad_norm": 1.4520646516457558, + "learning_rate": 8.439750502076405e-08, + "loss": 0.0911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9546 + }, + { + "epoch": 0.9181131894023177, + "grad_norm": 2.119006471924981, + "learning_rate": 8.420108490912793e-08, + "loss": 0.1129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9547 + }, + { + "epoch": 0.9182093571188152, + "grad_norm": 1.9113800500731433, + "learning_rate": 8.400488971509968e-08, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9548 + }, + { + "epoch": 0.9183055248353128, + "grad_norm": 1.9632471994689764, + "learning_rate": 8.38089194569458e-08, + "loss": 0.0919, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9549 + }, + { + "epoch": 0.9184016925518104, + "grad_norm": 1.6231724105719372, + "learning_rate": 8.36131741529117e-08, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9550 + }, + { + "epoch": 0.9184978602683079, + "grad_norm": 1.8740775470426176, + "learning_rate": 8.341765382122142e-08, + "loss": 0.1225, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9551 + }, + { + "epoch": 0.9185940279848055, + "grad_norm": 2.4472105984953876, + "learning_rate": 8.322235848007898e-08, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9552 + }, + { + "epoch": 0.9186901957013031, + "grad_norm": 1.9455953132018962, + "learning_rate": 8.302728814766708e-08, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9553 + }, + { + "epoch": 0.9187863634178006, + "grad_norm": 1.943182825593709, + "learning_rate": 8.283244284214648e-08, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9554 + }, + { + "epoch": 0.9188825311342982, + "grad_norm": 1.3456938085421146, + "learning_rate": 8.26378225816582e-08, + "loss": 0.0839, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9555 + }, + { + "epoch": 0.9189786988507957, + "grad_norm": 1.9870431438252956, + "learning_rate": 8.244342738432192e-08, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9556 + }, + { + "epoch": 0.9190748665672933, + "grad_norm": 1.7258883044847928, + "learning_rate": 8.22492572682368e-08, + "loss": 0.086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9557 + }, + { + "epoch": 0.9191710342837909, + "grad_norm": 2.449224816051306, + "learning_rate": 8.205531225148e-08, + "loss": 0.0743, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9558 + }, + { + "epoch": 0.9192672020002886, + "grad_norm": 2.4778957022727024, + "learning_rate": 8.186159235210877e-08, + "loss": 0.0844, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9559 + }, + { + "epoch": 0.9193633697167861, + "grad_norm": 2.0821037262682327, + "learning_rate": 8.166809758815897e-08, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9560 + }, + { + "epoch": 0.9194595374332837, + "grad_norm": 1.8842515332613363, + "learning_rate": 8.147482797764478e-08, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9561 + }, + { + "epoch": 0.9195557051497812, + "grad_norm": 2.738152029494304, + "learning_rate": 8.128178353856097e-08, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9562 + }, + { + "epoch": 0.9196518728662788, + "grad_norm": 1.7456890090560704, + "learning_rate": 8.108896428888041e-08, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9563 + }, + { + "epoch": 0.9197480405827764, + "grad_norm": 1.9239990058577052, + "learning_rate": 8.089637024655483e-08, + "loss": 0.1215, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9564 + }, + { + "epoch": 0.9198442082992739, + "grad_norm": 25.17624312196556, + "learning_rate": 8.070400142951546e-08, + "loss": 0.0921, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9565 + }, + { + "epoch": 0.9199403760157715, + "grad_norm": 1.65009239709398, + "learning_rate": 8.051185785567211e-08, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9566 + }, + { + "epoch": 0.9200365437322691, + "grad_norm": 2.000006156775916, + "learning_rate": 8.031993954291384e-08, + "loss": 0.1254, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9567 + }, + { + "epoch": 0.9201327114487666, + "grad_norm": 1.9394887292944716, + "learning_rate": 8.012824650910938e-08, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9568 + }, + { + "epoch": 0.9202288791652642, + "grad_norm": 1.7772188684725463, + "learning_rate": 7.993677877210531e-08, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9569 + }, + { + "epoch": 0.9203250468817618, + "grad_norm": 2.2965950100745522, + "learning_rate": 7.974553634972848e-08, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9570 + }, + { + "epoch": 0.9204212145982593, + "grad_norm": 1.6532233919372226, + "learning_rate": 7.955451925978325e-08, + "loss": 0.0983, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9571 + }, + { + "epoch": 0.9205173823147569, + "grad_norm": 1.8333718794975007, + "learning_rate": 7.936372752005401e-08, + "loss": 0.1268, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9572 + }, + { + "epoch": 0.9206135500312546, + "grad_norm": 1.5785636583513873, + "learning_rate": 7.917316114830432e-08, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9573 + }, + { + "epoch": 0.9207097177477521, + "grad_norm": 2.0455149326408715, + "learning_rate": 7.898282016227638e-08, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9574 + }, + { + "epoch": 0.9208058854642497, + "grad_norm": 1.455050093183869, + "learning_rate": 7.879270457969185e-08, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9575 + }, + { + "epoch": 0.9209020531807472, + "grad_norm": 2.235437932625088, + "learning_rate": 7.860281441825018e-08, + "loss": 0.1014, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9576 + }, + { + "epoch": 0.9209982208972448, + "grad_norm": 2.2889713247222905, + "learning_rate": 7.841314969563085e-08, + "loss": 0.1318, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9577 + }, + { + "epoch": 0.9210943886137424, + "grad_norm": 2.3094028379892166, + "learning_rate": 7.822371042949279e-08, + "loss": 0.0887, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9578 + }, + { + "epoch": 0.9211905563302399, + "grad_norm": 2.156279572263675, + "learning_rate": 7.80344966374727e-08, + "loss": 0.1048, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9579 + }, + { + "epoch": 0.9212867240467375, + "grad_norm": 1.5415732832657314, + "learning_rate": 7.784550833718707e-08, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9580 + }, + { + "epoch": 0.9213828917632351, + "grad_norm": 2.2876573700397307, + "learning_rate": 7.765674554623182e-08, + "loss": 0.1502, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9581 + }, + { + "epoch": 0.9214790594797326, + "grad_norm": 1.7180387157059096, + "learning_rate": 7.746820828218037e-08, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9582 + }, + { + "epoch": 0.9215752271962302, + "grad_norm": 1.4151674540871173, + "learning_rate": 7.727989656258649e-08, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9583 + }, + { + "epoch": 0.9216713949127278, + "grad_norm": 1.7903698621686104, + "learning_rate": 7.709181040498253e-08, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9584 + }, + { + "epoch": 0.9217675626292253, + "grad_norm": 1.5470098332690763, + "learning_rate": 7.690394982687977e-08, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9585 + }, + { + "epoch": 0.9218637303457229, + "grad_norm": 1.9941821738538457, + "learning_rate": 7.671631484576891e-08, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9586 + }, + { + "epoch": 0.9219598980622206, + "grad_norm": 1.5971096470634183, + "learning_rate": 7.652890547911878e-08, + "loss": 0.0907, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9587 + }, + { + "epoch": 0.9220560657787181, + "grad_norm": 1.9530043074436652, + "learning_rate": 7.634172174437793e-08, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9588 + }, + { + "epoch": 0.9221522334952157, + "grad_norm": 1.9795063022708066, + "learning_rate": 7.615476365897351e-08, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9589 + }, + { + "epoch": 0.9222484012117133, + "grad_norm": 1.7906570074277768, + "learning_rate": 7.596803124031243e-08, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9590 + }, + { + "epoch": 0.9223445689282108, + "grad_norm": 1.7555652274275986, + "learning_rate": 7.578152450577914e-08, + "loss": 0.1013, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9591 + }, + { + "epoch": 0.9224407366447084, + "grad_norm": 1.747082472134528, + "learning_rate": 7.559524347273861e-08, + "loss": 0.138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9592 + }, + { + "epoch": 0.922536904361206, + "grad_norm": 1.784167594501891, + "learning_rate": 7.540918815853366e-08, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9593 + }, + { + "epoch": 0.9226330720777035, + "grad_norm": 2.602312991881418, + "learning_rate": 7.522335858048707e-08, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9594 + }, + { + "epoch": 0.9227292397942011, + "grad_norm": 1.4395743311492397, + "learning_rate": 7.503775475589975e-08, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9595 + }, + { + "epoch": 0.9228254075106986, + "grad_norm": 1.8043122834014786, + "learning_rate": 7.485237670205176e-08, + "loss": 0.0973, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9596 + }, + { + "epoch": 0.9229215752271962, + "grad_norm": 1.9620895485084286, + "learning_rate": 7.466722443620261e-08, + "loss": 0.1009, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9597 + }, + { + "epoch": 0.9230177429436938, + "grad_norm": 1.7286416228903567, + "learning_rate": 7.448229797559048e-08, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9598 + }, + { + "epoch": 0.9231139106601913, + "grad_norm": 1.5355124921652652, + "learning_rate": 7.429759733743241e-08, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9599 + }, + { + "epoch": 0.9232100783766889, + "grad_norm": 2.3617660181478084, + "learning_rate": 7.411312253892466e-08, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9600 + }, + { + "epoch": 0.9233062460931866, + "grad_norm": 2.272575541035341, + "learning_rate": 7.39288735972421e-08, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9601 + }, + { + "epoch": 0.9234024138096841, + "grad_norm": 2.5428460420760715, + "learning_rate": 7.374485052953934e-08, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9602 + }, + { + "epoch": 0.9234985815261817, + "grad_norm": 2.1503095898512328, + "learning_rate": 7.356105335294905e-08, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9603 + }, + { + "epoch": 0.9235947492426793, + "grad_norm": 2.3480202324129347, + "learning_rate": 7.337748208458312e-08, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9604 + }, + { + "epoch": 0.9236909169591768, + "grad_norm": 2.3241780550042077, + "learning_rate": 7.319413674153287e-08, + "loss": 0.0965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9605 + }, + { + "epoch": 0.9237870846756744, + "grad_norm": 1.89811831816546, + "learning_rate": 7.301101734086824e-08, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9606 + }, + { + "epoch": 0.923883252392172, + "grad_norm": 1.9737315757452751, + "learning_rate": 7.282812389963784e-08, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9607 + }, + { + "epoch": 0.9239794201086695, + "grad_norm": 1.8973148700963955, + "learning_rate": 7.264545643486997e-08, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9608 + }, + { + "epoch": 0.9240755878251671, + "grad_norm": 2.8162914724132806, + "learning_rate": 7.246301496357133e-08, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9609 + }, + { + "epoch": 0.9241717555416646, + "grad_norm": 1.8385499885519674, + "learning_rate": 7.228079950272748e-08, + "loss": 0.1286, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9610 + }, + { + "epoch": 0.9242679232581622, + "grad_norm": 1.9794131089742812, + "learning_rate": 7.209881006930347e-08, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9611 + }, + { + "epoch": 0.9243640909746598, + "grad_norm": 2.0600398005591662, + "learning_rate": 7.191704668024296e-08, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9612 + }, + { + "epoch": 0.9244602586911573, + "grad_norm": 1.7335108111804292, + "learning_rate": 7.173550935246853e-08, + "loss": 0.1275, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9613 + }, + { + "epoch": 0.9245564264076549, + "grad_norm": 1.563865913691849, + "learning_rate": 7.155419810288222e-08, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9614 + }, + { + "epoch": 0.9246525941241526, + "grad_norm": 1.8047600514775275, + "learning_rate": 7.137311294836413e-08, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9615 + }, + { + "epoch": 0.9247487618406501, + "grad_norm": 2.2015739590592056, + "learning_rate": 7.119225390577383e-08, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9616 + }, + { + "epoch": 0.9248449295571477, + "grad_norm": 1.6396092865629912, + "learning_rate": 7.10116209919498e-08, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9617 + }, + { + "epoch": 0.9249410972736453, + "grad_norm": 1.9003701885513784, + "learning_rate": 7.083121422370998e-08, + "loss": 0.1386, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9618 + }, + { + "epoch": 0.9250372649901428, + "grad_norm": 1.640631755412438, + "learning_rate": 7.065103361785008e-08, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9619 + }, + { + "epoch": 0.9251334327066404, + "grad_norm": 1.659477120469492, + "learning_rate": 7.047107919114588e-08, + "loss": 0.1004, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9620 + }, + { + "epoch": 0.925229600423138, + "grad_norm": 2.0340139934837267, + "learning_rate": 7.029135096035116e-08, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9621 + }, + { + "epoch": 0.9253257681396355, + "grad_norm": 2.102213462498955, + "learning_rate": 7.011184894219952e-08, + "loss": 0.1434, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9622 + }, + { + "epoch": 0.9254219358561331, + "grad_norm": 1.7826224183908064, + "learning_rate": 6.993257315340313e-08, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9623 + }, + { + "epoch": 0.9255181035726306, + "grad_norm": 1.5463036922662565, + "learning_rate": 6.975352361065307e-08, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9624 + }, + { + "epoch": 0.9256142712891282, + "grad_norm": 2.6677873264584866, + "learning_rate": 6.95747003306188e-08, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9625 + }, + { + "epoch": 0.9257104390056258, + "grad_norm": 1.6559698039668305, + "learning_rate": 6.939610332994978e-08, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9626 + }, + { + "epoch": 0.9258066067221233, + "grad_norm": 1.5146171925084235, + "learning_rate": 6.92177326252741e-08, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9627 + }, + { + "epoch": 0.9259027744386209, + "grad_norm": 1.431518156046813, + "learning_rate": 6.90395882331979e-08, + "loss": 0.086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9628 + }, + { + "epoch": 0.9259989421551186, + "grad_norm": 1.4577750733250616, + "learning_rate": 6.88616701703071e-08, + "loss": 0.0924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9629 + }, + { + "epoch": 0.9260951098716161, + "grad_norm": 1.813201646400531, + "learning_rate": 6.868397845316676e-08, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9630 + }, + { + "epoch": 0.9261912775881137, + "grad_norm": 1.9512908501736679, + "learning_rate": 6.850651309832035e-08, + "loss": 0.1316, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9631 + }, + { + "epoch": 0.9262874453046113, + "grad_norm": 1.8623277462951968, + "learning_rate": 6.832927412229017e-08, + "loss": 0.0696, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9632 + }, + { + "epoch": 0.9263836130211088, + "grad_norm": 1.748327696087302, + "learning_rate": 6.815226154157779e-08, + "loss": 0.1162, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9633 + }, + { + "epoch": 0.9264797807376064, + "grad_norm": 1.9390874412323875, + "learning_rate": 6.797547537266358e-08, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9634 + }, + { + "epoch": 0.926575948454104, + "grad_norm": 1.842840114665454, + "learning_rate": 6.779891563200664e-08, + "loss": 0.1224, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9635 + }, + { + "epoch": 0.9266721161706015, + "grad_norm": 1.6512792134572394, + "learning_rate": 6.762258233604546e-08, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9636 + }, + { + "epoch": 0.9267682838870991, + "grad_norm": 2.156221013063638, + "learning_rate": 6.74464755011972e-08, + "loss": 0.1169, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9637 + }, + { + "epoch": 0.9268644516035967, + "grad_norm": 1.7823379170402351, + "learning_rate": 6.727059514385765e-08, + "loss": 0.1393, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9638 + }, + { + "epoch": 0.9269606193200942, + "grad_norm": 1.8142551719605975, + "learning_rate": 6.709494128040145e-08, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9639 + }, + { + "epoch": 0.9270567870365918, + "grad_norm": 2.2312025099626847, + "learning_rate": 6.691951392718332e-08, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9640 + }, + { + "epoch": 0.9271529547530893, + "grad_norm": 2.26951527391741, + "learning_rate": 6.674431310053519e-08, + "loss": 0.1388, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9641 + }, + { + "epoch": 0.9272491224695869, + "grad_norm": 2.7769553256939994, + "learning_rate": 6.656933881676986e-08, + "loss": 0.1002, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9642 + }, + { + "epoch": 0.9273452901860846, + "grad_norm": 1.6427324236383225, + "learning_rate": 6.63945910921765e-08, + "loss": 0.0918, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9643 + }, + { + "epoch": 0.9274414579025821, + "grad_norm": 1.4096029197319877, + "learning_rate": 6.622006994302544e-08, + "loss": 0.0989, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9644 + }, + { + "epoch": 0.9275376256190797, + "grad_norm": 1.9044420972244605, + "learning_rate": 6.604577538556506e-08, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9645 + }, + { + "epoch": 0.9276337933355773, + "grad_norm": 1.8278435607302934, + "learning_rate": 6.587170743602239e-08, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9646 + }, + { + "epoch": 0.9277299610520748, + "grad_norm": 2.0617615911689993, + "learning_rate": 6.569786611060391e-08, + "loss": 0.0858, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9647 + }, + { + "epoch": 0.9278261287685724, + "grad_norm": 2.2887632138016185, + "learning_rate": 6.5524251425495e-08, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9648 + }, + { + "epoch": 0.92792229648507, + "grad_norm": 1.4318556833951388, + "learning_rate": 6.535086339685887e-08, + "loss": 0.0947, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9649 + }, + { + "epoch": 0.9280184642015675, + "grad_norm": 2.005519059278128, + "learning_rate": 6.517770204083895e-08, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9650 + }, + { + "epoch": 0.9281146319180651, + "grad_norm": 2.3182252301581596, + "learning_rate": 6.500476737355682e-08, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9651 + }, + { + "epoch": 0.9282107996345627, + "grad_norm": 1.6744556458892714, + "learning_rate": 6.483205941111347e-08, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9652 + }, + { + "epoch": 0.9283069673510602, + "grad_norm": 1.9038141403248574, + "learning_rate": 6.46595781695883e-08, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9653 + }, + { + "epoch": 0.9284031350675578, + "grad_norm": 1.4719075003503725, + "learning_rate": 6.448732366503979e-08, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9654 + }, + { + "epoch": 0.9284993027840553, + "grad_norm": 1.527111476752081, + "learning_rate": 6.431529591350543e-08, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9655 + }, + { + "epoch": 0.9285954705005529, + "grad_norm": 1.6023403115214148, + "learning_rate": 6.414349493100131e-08, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9656 + }, + { + "epoch": 0.9286916382170506, + "grad_norm": 2.477512522823719, + "learning_rate": 6.397192073352294e-08, + "loss": 0.1455, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9657 + }, + { + "epoch": 0.9287878059335481, + "grad_norm": 2.413365746494497, + "learning_rate": 6.380057333704393e-08, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9658 + }, + { + "epoch": 0.9288839736500457, + "grad_norm": 1.7366403903744574, + "learning_rate": 6.362945275751736e-08, + "loss": 0.1233, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9659 + }, + { + "epoch": 0.9289801413665433, + "grad_norm": 1.7939582233724647, + "learning_rate": 6.345855901087522e-08, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9660 + }, + { + "epoch": 0.9290763090830408, + "grad_norm": 2.0376651726894623, + "learning_rate": 6.328789211302754e-08, + "loss": 0.0879, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9661 + }, + { + "epoch": 0.9291724767995384, + "grad_norm": 2.6626437735520296, + "learning_rate": 6.311745207986469e-08, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9662 + }, + { + "epoch": 0.929268644516036, + "grad_norm": 2.4553785945370366, + "learning_rate": 6.294723892725452e-08, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9663 + }, + { + "epoch": 0.9293648122325335, + "grad_norm": 1.4567139218665068, + "learning_rate": 6.277725267104489e-08, + "loss": 0.0878, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9664 + }, + { + "epoch": 0.9294609799490311, + "grad_norm": 2.0366438084724585, + "learning_rate": 6.260749332706179e-08, + "loss": 0.1425, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9665 + }, + { + "epoch": 0.9295571476655287, + "grad_norm": 1.769414416825991, + "learning_rate": 6.243796091111004e-08, + "loss": 0.0924, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9666 + }, + { + "epoch": 0.9296533153820262, + "grad_norm": 1.7743748944350746, + "learning_rate": 6.226865543897343e-08, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9667 + }, + { + "epoch": 0.9297494830985238, + "grad_norm": 1.6309516878139956, + "learning_rate": 6.209957692641544e-08, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9668 + }, + { + "epoch": 0.9298456508150214, + "grad_norm": 1.6274091115791172, + "learning_rate": 6.193072538917738e-08, + "loss": 0.1011, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9669 + }, + { + "epoch": 0.9299418185315189, + "grad_norm": 1.948875769230118, + "learning_rate": 6.176210084297974e-08, + "loss": 0.1277, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9670 + }, + { + "epoch": 0.9300379862480166, + "grad_norm": 1.970157384165116, + "learning_rate": 6.159370330352216e-08, + "loss": 0.1355, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9671 + }, + { + "epoch": 0.9301341539645142, + "grad_norm": 1.720791217287212, + "learning_rate": 6.142553278648239e-08, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9672 + }, + { + "epoch": 0.9302303216810117, + "grad_norm": 1.662034327614408, + "learning_rate": 6.125758930751818e-08, + "loss": 0.1192, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9673 + }, + { + "epoch": 0.9303264893975093, + "grad_norm": 1.855650641457014, + "learning_rate": 6.108987288226536e-08, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9674 + }, + { + "epoch": 0.9304226571140068, + "grad_norm": 1.7081811911723919, + "learning_rate": 6.092238352633867e-08, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9675 + }, + { + "epoch": 0.9305188248305044, + "grad_norm": 1.9165440656001034, + "learning_rate": 6.0755121255332e-08, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9676 + }, + { + "epoch": 0.930614992547002, + "grad_norm": 2.0398453682094364, + "learning_rate": 6.058808608481792e-08, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9677 + }, + { + "epoch": 0.9307111602634995, + "grad_norm": 1.8704037914033391, + "learning_rate": 6.042127803034759e-08, + "loss": 0.097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9678 + }, + { + "epoch": 0.9308073279799971, + "grad_norm": 2.069390261874944, + "learning_rate": 6.025469710745163e-08, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9679 + }, + { + "epoch": 0.9309034956964947, + "grad_norm": 1.9126609464081863, + "learning_rate": 6.008834333163876e-08, + "loss": 0.1333, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9680 + }, + { + "epoch": 0.9309996634129922, + "grad_norm": 1.684273168110399, + "learning_rate": 5.992221671839799e-08, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9681 + }, + { + "epoch": 0.9310958311294898, + "grad_norm": 1.7542213447970805, + "learning_rate": 5.975631728319498e-08, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9682 + }, + { + "epoch": 0.9311919988459874, + "grad_norm": 1.776743630785876, + "learning_rate": 5.959064504147571e-08, + "loss": 0.0972, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9683 + }, + { + "epoch": 0.9312881665624849, + "grad_norm": 2.0324299485813624, + "learning_rate": 5.942520000866508e-08, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9684 + }, + { + "epoch": 0.9313843342789826, + "grad_norm": 1.6694479522234569, + "learning_rate": 5.92599822001666e-08, + "loss": 0.0811, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9685 + }, + { + "epoch": 0.9314805019954802, + "grad_norm": 1.5817862031664631, + "learning_rate": 5.909499163136184e-08, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9686 + }, + { + "epoch": 0.9315766697119777, + "grad_norm": 1.438129971882124, + "learning_rate": 5.893022831761297e-08, + "loss": 0.0891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9687 + }, + { + "epoch": 0.9316728374284753, + "grad_norm": 1.8949292491018401, + "learning_rate": 5.876569227425855e-08, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9688 + }, + { + "epoch": 0.9317690051449729, + "grad_norm": 1.9094968670606058, + "learning_rate": 5.860138351661826e-08, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9689 + }, + { + "epoch": 0.9318651728614704, + "grad_norm": 1.897265575409585, + "learning_rate": 5.84373020599896e-08, + "loss": 0.1104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9690 + }, + { + "epoch": 0.931961340577968, + "grad_norm": 1.5447325445868998, + "learning_rate": 5.8273447919648673e-08, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9691 + }, + { + "epoch": 0.9320575082944655, + "grad_norm": 1.5862805239638695, + "learning_rate": 5.8109821110851064e-08, + "loss": 0.0992, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9692 + }, + { + "epoch": 0.9321536760109631, + "grad_norm": 1.3806047031597712, + "learning_rate": 5.794642164883096e-08, + "loss": 0.0819, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9693 + }, + { + "epoch": 0.9322498437274607, + "grad_norm": 1.8621322356832923, + "learning_rate": 5.778324954880094e-08, + "loss": 0.1356, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9694 + }, + { + "epoch": 0.9323460114439582, + "grad_norm": 1.425293256809764, + "learning_rate": 5.762030482595271e-08, + "loss": 0.0759, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9695 + }, + { + "epoch": 0.9324421791604558, + "grad_norm": 2.481030918550776, + "learning_rate": 5.745758749545749e-08, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9696 + }, + { + "epoch": 0.9325383468769534, + "grad_norm": 2.706173960653659, + "learning_rate": 5.7295097572464265e-08, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9697 + }, + { + "epoch": 0.9326345145934509, + "grad_norm": 3.127261350603557, + "learning_rate": 5.7132835072101486e-08, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9698 + }, + { + "epoch": 0.9327306823099486, + "grad_norm": 1.8828688350941922, + "learning_rate": 5.6970800009475966e-08, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9699 + }, + { + "epoch": 0.9328268500264462, + "grad_norm": 3.442740045464127, + "learning_rate": 5.680899239967369e-08, + "loss": 0.1418, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9700 + }, + { + "epoch": 0.9329230177429437, + "grad_norm": 1.9966498788906974, + "learning_rate": 5.6647412257759825e-08, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9701 + }, + { + "epoch": 0.9330191854594413, + "grad_norm": 1.6309779479134028, + "learning_rate": 5.648605959877734e-08, + "loss": 0.117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9702 + }, + { + "epoch": 0.9331153531759389, + "grad_norm": 2.3482422047662928, + "learning_rate": 5.632493443774922e-08, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9703 + }, + { + "epoch": 0.9332115208924364, + "grad_norm": 1.6818290172101988, + "learning_rate": 5.616403678967625e-08, + "loss": 0.1147, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9704 + }, + { + "epoch": 0.933307688608934, + "grad_norm": 1.7284211046436835, + "learning_rate": 5.600336666953837e-08, + "loss": 0.0957, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9705 + }, + { + "epoch": 0.9334038563254315, + "grad_norm": 1.9979026391668437, + "learning_rate": 5.584292409229475e-08, + "loss": 0.0909, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9706 + }, + { + "epoch": 0.9335000240419291, + "grad_norm": 2.286566444929855, + "learning_rate": 5.568270907288287e-08, + "loss": 0.14, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9707 + }, + { + "epoch": 0.9335961917584267, + "grad_norm": 1.8404922710231553, + "learning_rate": 5.5522721626219135e-08, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9708 + }, + { + "epoch": 0.9336923594749242, + "grad_norm": 1.9033125323744717, + "learning_rate": 5.536296176719913e-08, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9709 + }, + { + "epoch": 0.9337885271914218, + "grad_norm": 1.3567504250550297, + "learning_rate": 5.5203429510696515e-08, + "loss": 0.0914, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9710 + }, + { + "epoch": 0.9338846949079194, + "grad_norm": 1.8666220211871511, + "learning_rate": 5.504412487156413e-08, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9711 + }, + { + "epoch": 0.9339808626244169, + "grad_norm": 1.686474536292126, + "learning_rate": 5.4885047864634275e-08, + "loss": 0.1122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9712 + }, + { + "epoch": 0.9340770303409146, + "grad_norm": 2.4532148279976056, + "learning_rate": 5.472619850471678e-08, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9713 + }, + { + "epoch": 0.9341731980574122, + "grad_norm": 1.798844848027301, + "learning_rate": 5.456757680660174e-08, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9714 + }, + { + "epoch": 0.9342693657739097, + "grad_norm": 1.714417589067142, + "learning_rate": 5.44091827850568e-08, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9715 + }, + { + "epoch": 0.9343655334904073, + "grad_norm": 2.010220672945334, + "learning_rate": 5.4251016454828495e-08, + "loss": 0.0952, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9716 + }, + { + "epoch": 0.9344617012069049, + "grad_norm": 1.9371649211749289, + "learning_rate": 5.4093077830643384e-08, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9717 + }, + { + "epoch": 0.9345578689234024, + "grad_norm": 1.7435716669699888, + "learning_rate": 5.393536692720525e-08, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9718 + }, + { + "epoch": 0.9346540366399, + "grad_norm": 1.774260114213117, + "learning_rate": 5.37778837591979e-08, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9719 + }, + { + "epoch": 0.9347502043563976, + "grad_norm": 1.9058646554002547, + "learning_rate": 5.3620628341283234e-08, + "loss": 0.0843, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9720 + }, + { + "epoch": 0.9348463720728951, + "grad_norm": 1.843493564466743, + "learning_rate": 5.346360068810258e-08, + "loss": 0.1128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9721 + }, + { + "epoch": 0.9349425397893927, + "grad_norm": 1.8730514291399358, + "learning_rate": 5.3306800814275084e-08, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9722 + }, + { + "epoch": 0.9350387075058902, + "grad_norm": 3.9011831990433117, + "learning_rate": 5.3150228734399624e-08, + "loss": 0.1323, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9723 + }, + { + "epoch": 0.9351348752223878, + "grad_norm": 2.5040878452309414, + "learning_rate": 5.2993884463053425e-08, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9724 + }, + { + "epoch": 0.9352310429388854, + "grad_norm": 2.283716114718228, + "learning_rate": 5.2837768014792066e-08, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9725 + }, + { + "epoch": 0.9353272106553829, + "grad_norm": 1.9607211900729595, + "learning_rate": 5.268187940415115e-08, + "loss": 0.1444, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9726 + }, + { + "epoch": 0.9354233783718806, + "grad_norm": 1.9137031105273643, + "learning_rate": 5.2526218645644055e-08, + "loss": 0.1407, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9727 + }, + { + "epoch": 0.9355195460883782, + "grad_norm": 1.4105346819465063, + "learning_rate": 5.2370785753763364e-08, + "loss": 0.0846, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9728 + }, + { + "epoch": 0.9356157138048757, + "grad_norm": 1.9975955666493737, + "learning_rate": 5.221558074297972e-08, + "loss": 0.099, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9729 + }, + { + "epoch": 0.9357118815213733, + "grad_norm": 2.8574111087109304, + "learning_rate": 5.206060362774379e-08, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9730 + }, + { + "epoch": 0.9358080492378709, + "grad_norm": 1.921859482099466, + "learning_rate": 5.1905854422484306e-08, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9731 + }, + { + "epoch": 0.9359042169543684, + "grad_norm": 1.5854876107221156, + "learning_rate": 5.175133314160891e-08, + "loss": 0.0996, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9732 + }, + { + "epoch": 0.936000384670866, + "grad_norm": 2.2433921050600056, + "learning_rate": 5.159703979950359e-08, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9733 + }, + { + "epoch": 0.9360965523873636, + "grad_norm": 1.8402215187739484, + "learning_rate": 5.144297441053353e-08, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9734 + }, + { + "epoch": 0.9361927201038611, + "grad_norm": 1.9204109841892403, + "learning_rate": 5.128913698904281e-08, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9735 + }, + { + "epoch": 0.9362888878203587, + "grad_norm": 1.5160338621451852, + "learning_rate": 5.113552754935414e-08, + "loss": 0.0801, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9736 + }, + { + "epoch": 0.9363850555368562, + "grad_norm": 1.672800932442337, + "learning_rate": 5.098214610576912e-08, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9737 + }, + { + "epoch": 0.9364812232533538, + "grad_norm": 2.039804687189585, + "learning_rate": 5.082899267256775e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9738 + }, + { + "epoch": 0.9365773909698514, + "grad_norm": 1.9589502558301393, + "learning_rate": 5.0676067264009146e-08, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9739 + }, + { + "epoch": 0.9366735586863489, + "grad_norm": 1.6913459624311877, + "learning_rate": 5.052336989433082e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9740 + }, + { + "epoch": 0.9367697264028466, + "grad_norm": 1.7919865657383829, + "learning_rate": 5.0370900577749734e-08, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9741 + }, + { + "epoch": 0.9368658941193442, + "grad_norm": 2.049168501085168, + "learning_rate": 5.021865932846093e-08, + "loss": 0.1363, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9742 + }, + { + "epoch": 0.9369620618358417, + "grad_norm": 1.8952560851955975, + "learning_rate": 5.0066646160638886e-08, + "loss": 0.1119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9743 + }, + { + "epoch": 0.9370582295523393, + "grad_norm": 1.9538436549951206, + "learning_rate": 4.9914861088435904e-08, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9744 + }, + { + "epoch": 0.9371543972688369, + "grad_norm": 2.2559708705390853, + "learning_rate": 4.9763304125983735e-08, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9745 + }, + { + "epoch": 0.9372505649853344, + "grad_norm": 1.881926299438498, + "learning_rate": 4.9611975287393034e-08, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9746 + }, + { + "epoch": 0.937346732701832, + "grad_norm": 1.4768305576015475, + "learning_rate": 4.946087458675281e-08, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9747 + }, + { + "epoch": 0.9374429004183296, + "grad_norm": 1.8828153391354954, + "learning_rate": 4.931000203813069e-08, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9748 + }, + { + "epoch": 0.9375390681348271, + "grad_norm": 1.751006812461315, + "learning_rate": 4.9159357655574055e-08, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9749 + }, + { + "epoch": 0.9376352358513247, + "grad_norm": 1.7323844343588817, + "learning_rate": 4.9008941453107527e-08, + "loss": 0.1156, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9750 + }, + { + "epoch": 0.9377314035678223, + "grad_norm": 1.9988451615702674, + "learning_rate": 4.8858753444735455e-08, + "loss": 0.1213, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9751 + }, + { + "epoch": 0.9378275712843198, + "grad_norm": 1.6499640242216451, + "learning_rate": 4.870879364444109e-08, + "loss": 0.0916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9752 + }, + { + "epoch": 0.9379237390008174, + "grad_norm": 2.041207176314007, + "learning_rate": 4.855906206618577e-08, + "loss": 0.1144, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9753 + }, + { + "epoch": 0.9380199067173149, + "grad_norm": 1.8602251466293023, + "learning_rate": 4.840955872391001e-08, + "loss": 0.1218, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9754 + }, + { + "epoch": 0.9381160744338126, + "grad_norm": 2.4349499625226647, + "learning_rate": 4.826028363153323e-08, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9755 + }, + { + "epoch": 0.9382122421503102, + "grad_norm": 2.0944868764638547, + "learning_rate": 4.811123680295293e-08, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9756 + }, + { + "epoch": 0.9383084098668077, + "grad_norm": 1.5938457596587514, + "learning_rate": 4.796241825204634e-08, + "loss": 0.1, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9757 + }, + { + "epoch": 0.9384045775833053, + "grad_norm": 1.6532935427160176, + "learning_rate": 4.781382799266821e-08, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9758 + }, + { + "epoch": 0.9385007452998029, + "grad_norm": 2.0800736563990796, + "learning_rate": 4.766546603865302e-08, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9759 + }, + { + "epoch": 0.9385969130163004, + "grad_norm": 2.1590305105092384, + "learning_rate": 4.75173324038139e-08, + "loss": 0.1445, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9760 + }, + { + "epoch": 0.938693080732798, + "grad_norm": 1.7643151398404415, + "learning_rate": 4.7369427101942314e-08, + "loss": 0.1354, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9761 + }, + { + "epoch": 0.9387892484492956, + "grad_norm": 1.9795557281748877, + "learning_rate": 4.7221750146808357e-08, + "loss": 0.1436, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9762 + }, + { + "epoch": 0.9388854161657931, + "grad_norm": 2.0263437381503384, + "learning_rate": 4.707430155216158e-08, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9763 + }, + { + "epoch": 0.9389815838822907, + "grad_norm": 2.016745553376035, + "learning_rate": 4.692708133172991e-08, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9764 + }, + { + "epoch": 0.9390777515987883, + "grad_norm": 1.8475723847190584, + "learning_rate": 4.678008949921986e-08, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9765 + }, + { + "epoch": 0.9391739193152858, + "grad_norm": 1.917285997800477, + "learning_rate": 4.663332606831661e-08, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9766 + }, + { + "epoch": 0.9392700870317834, + "grad_norm": 1.6853973317465678, + "learning_rate": 4.648679105268422e-08, + "loss": 0.1012, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9767 + }, + { + "epoch": 0.939366254748281, + "grad_norm": 2.4073646862759124, + "learning_rate": 4.6340484465965396e-08, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9768 + }, + { + "epoch": 0.9394624224647786, + "grad_norm": 1.7934304295835861, + "learning_rate": 4.619440632178229e-08, + "loss": 0.1031, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9769 + }, + { + "epoch": 0.9395585901812762, + "grad_norm": 1.6982631447358327, + "learning_rate": 4.604855663373459e-08, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9770 + }, + { + "epoch": 0.9396547578977738, + "grad_norm": 1.7082836147470466, + "learning_rate": 4.590293541540197e-08, + "loss": 0.0993, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9771 + }, + { + "epoch": 0.9397509256142713, + "grad_norm": 2.1188474534366715, + "learning_rate": 4.575754268034138e-08, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9772 + }, + { + "epoch": 0.9398470933307689, + "grad_norm": 1.6975419061195185, + "learning_rate": 4.561237844208977e-08, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9773 + }, + { + "epoch": 0.9399432610472664, + "grad_norm": 1.9162051201445611, + "learning_rate": 4.546744271416187e-08, + "loss": 0.1089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9774 + }, + { + "epoch": 0.940039428763764, + "grad_norm": 1.9569119337884187, + "learning_rate": 4.532273551005217e-08, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9775 + }, + { + "epoch": 0.9401355964802616, + "grad_norm": 1.5929997544283385, + "learning_rate": 4.5178256843233235e-08, + "loss": 0.0927, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9776 + }, + { + "epoch": 0.9402317641967591, + "grad_norm": 1.8588954755913063, + "learning_rate": 4.5034006727156245e-08, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9777 + }, + { + "epoch": 0.9403279319132567, + "grad_norm": 1.501731408334551, + "learning_rate": 4.488998517525128e-08, + "loss": 0.0906, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9778 + }, + { + "epoch": 0.9404240996297543, + "grad_norm": 1.8143682461081503, + "learning_rate": 4.474619220092707e-08, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9779 + }, + { + "epoch": 0.9405202673462518, + "grad_norm": 2.1296666030095217, + "learning_rate": 4.4602627817571245e-08, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9780 + }, + { + "epoch": 0.9406164350627494, + "grad_norm": 1.7853037215693177, + "learning_rate": 4.445929203855004e-08, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9781 + }, + { + "epoch": 0.940712602779247, + "grad_norm": 1.7604721200837945, + "learning_rate": 4.43161848772089e-08, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9782 + }, + { + "epoch": 0.9408087704957446, + "grad_norm": 2.607830340382243, + "learning_rate": 4.4173306346870507e-08, + "loss": 0.1414, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9783 + }, + { + "epoch": 0.9409049382122422, + "grad_norm": 1.9705208687813727, + "learning_rate": 4.40306564608381e-08, + "loss": 0.1069, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9784 + }, + { + "epoch": 0.9410011059287398, + "grad_norm": 1.7302274670713382, + "learning_rate": 4.388823523239216e-08, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9785 + }, + { + "epoch": 0.9410972736452373, + "grad_norm": 2.184461481398552, + "learning_rate": 4.3746042674792934e-08, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9786 + }, + { + "epoch": 0.9411934413617349, + "grad_norm": 1.7248799938695152, + "learning_rate": 4.360407880127898e-08, + "loss": 0.0844, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9787 + }, + { + "epoch": 0.9412896090782324, + "grad_norm": 1.785466147143816, + "learning_rate": 4.346234362506724e-08, + "loss": 0.1446, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9788 + }, + { + "epoch": 0.94138577679473, + "grad_norm": 3.732271192321167, + "learning_rate": 4.332083715935381e-08, + "loss": 0.1098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9789 + }, + { + "epoch": 0.9414819445112276, + "grad_norm": 2.291722769884264, + "learning_rate": 4.317955941731344e-08, + "loss": 0.1555, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9790 + }, + { + "epoch": 0.9415781122277251, + "grad_norm": 2.178102983638949, + "learning_rate": 4.303851041209922e-08, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9791 + }, + { + "epoch": 0.9416742799442227, + "grad_norm": 1.794708016816308, + "learning_rate": 4.2897690156843144e-08, + "loss": 0.1278, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9792 + }, + { + "epoch": 0.9417704476607203, + "grad_norm": 2.1821731569665754, + "learning_rate": 4.275709866465666e-08, + "loss": 0.1387, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9793 + }, + { + "epoch": 0.9418666153772178, + "grad_norm": 1.7086458061310823, + "learning_rate": 4.26167359486282e-08, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9794 + }, + { + "epoch": 0.9419627830937154, + "grad_norm": 1.6816834719678793, + "learning_rate": 4.247660202182674e-08, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9795 + }, + { + "epoch": 0.942058950810213, + "grad_norm": 2.1392211901737106, + "learning_rate": 4.233669689729852e-08, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9796 + }, + { + "epoch": 0.9421551185267106, + "grad_norm": 2.1661244749449775, + "learning_rate": 4.2197020588069515e-08, + "loss": 0.0813, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9797 + }, + { + "epoch": 0.9422512862432082, + "grad_norm": 1.855153921578492, + "learning_rate": 4.2057573107144034e-08, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9798 + }, + { + "epoch": 0.9423474539597058, + "grad_norm": 1.928841509019493, + "learning_rate": 4.191835446750503e-08, + "loss": 0.1148, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9799 + }, + { + "epoch": 0.9424436216762033, + "grad_norm": 1.7649107496998053, + "learning_rate": 4.1779364682113796e-08, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9800 + }, + { + "epoch": 0.9425397893927009, + "grad_norm": 1.5088288058951902, + "learning_rate": 4.164060376391082e-08, + "loss": 0.0914, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9801 + }, + { + "epoch": 0.9426359571091985, + "grad_norm": 1.7393369554774838, + "learning_rate": 4.1502071725815216e-08, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9802 + }, + { + "epoch": 0.942732124825696, + "grad_norm": 1.9347603448989084, + "learning_rate": 4.1363768580724714e-08, + "loss": 0.1302, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9803 + }, + { + "epoch": 0.9428282925421936, + "grad_norm": 1.5478077019164986, + "learning_rate": 4.122569434151569e-08, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9804 + }, + { + "epoch": 0.9429244602586911, + "grad_norm": 1.6257148220380067, + "learning_rate": 4.10878490210434e-08, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9805 + }, + { + "epoch": 0.9430206279751887, + "grad_norm": 1.9095310076073282, + "learning_rate": 4.0950232632141205e-08, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9806 + }, + { + "epoch": 0.9431167956916863, + "grad_norm": 1.98787116178471, + "learning_rate": 4.081284518762163e-08, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9807 + }, + { + "epoch": 0.9432129634081838, + "grad_norm": 2.506047572411601, + "learning_rate": 4.067568670027638e-08, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9808 + }, + { + "epoch": 0.9433091311246814, + "grad_norm": 1.9915783916265353, + "learning_rate": 4.05387571828747e-08, + "loss": 0.1293, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9809 + }, + { + "epoch": 0.943405298841179, + "grad_norm": 1.613335815055456, + "learning_rate": 4.0402056648165544e-08, + "loss": 0.0953, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9810 + }, + { + "epoch": 0.9435014665576766, + "grad_norm": 1.6407776865548938, + "learning_rate": 4.0265585108875684e-08, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9811 + }, + { + "epoch": 0.9435976342741742, + "grad_norm": 2.1626546122624997, + "learning_rate": 4.012934257771134e-08, + "loss": 0.1335, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9812 + }, + { + "epoch": 0.9436938019906718, + "grad_norm": 2.219652572551751, + "learning_rate": 3.999332906735709e-08, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9813 + }, + { + "epoch": 0.9437899697071693, + "grad_norm": 1.8420855839440626, + "learning_rate": 3.985754459047586e-08, + "loss": 0.1291, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9814 + }, + { + "epoch": 0.9438861374236669, + "grad_norm": 2.0840340109970255, + "learning_rate": 3.972198915970976e-08, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9815 + }, + { + "epoch": 0.9439823051401645, + "grad_norm": 2.0021168892497627, + "learning_rate": 3.958666278767953e-08, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9816 + }, + { + "epoch": 0.944078472856662, + "grad_norm": 1.692218757688774, + "learning_rate": 3.9451565486983976e-08, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9817 + }, + { + "epoch": 0.9441746405731596, + "grad_norm": 2.4630375396115998, + "learning_rate": 3.931669727020138e-08, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9818 + }, + { + "epoch": 0.9442708082896571, + "grad_norm": 2.181065006851409, + "learning_rate": 3.918205814988835e-08, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9819 + }, + { + "epoch": 0.9443669760061547, + "grad_norm": 3.0211033064214, + "learning_rate": 3.904764813858014e-08, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9820 + }, + { + "epoch": 0.9444631437226523, + "grad_norm": 1.924936707377635, + "learning_rate": 3.8913467248790635e-08, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9821 + }, + { + "epoch": 0.9445593114391498, + "grad_norm": 1.47145809743358, + "learning_rate": 3.8779515493012345e-08, + "loss": 0.1006, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9822 + }, + { + "epoch": 0.9446554791556474, + "grad_norm": 1.7824649982737493, + "learning_rate": 3.8645792883716946e-08, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9823 + }, + { + "epoch": 0.944751646872145, + "grad_norm": 1.886310430750337, + "learning_rate": 3.851229943335394e-08, + "loss": 0.0951, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9824 + }, + { + "epoch": 0.9448478145886426, + "grad_norm": 1.841330033011552, + "learning_rate": 3.837903515435226e-08, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9825 + }, + { + "epoch": 0.9449439823051402, + "grad_norm": 1.8653310563884076, + "learning_rate": 3.824600005911894e-08, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9826 + }, + { + "epoch": 0.9450401500216378, + "grad_norm": 1.6961658842873317, + "learning_rate": 3.811319416003989e-08, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9827 + }, + { + "epoch": 0.9451363177381353, + "grad_norm": 1.672172492213186, + "learning_rate": 3.798061746947995e-08, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9828 + }, + { + "epoch": 0.9452324854546329, + "grad_norm": 2.071318228929593, + "learning_rate": 3.784826999978231e-08, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9829 + }, + { + "epoch": 0.9453286531711305, + "grad_norm": 1.918744451595897, + "learning_rate": 3.771615176326876e-08, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9830 + }, + { + "epoch": 0.945424820887628, + "grad_norm": 3.040858184552248, + "learning_rate": 3.758426277223976e-08, + "loss": 0.1383, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9831 + }, + { + "epoch": 0.9455209886041256, + "grad_norm": 2.299717559667111, + "learning_rate": 3.745260303897491e-08, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9832 + }, + { + "epoch": 0.9456171563206232, + "grad_norm": 1.8834330330150373, + "learning_rate": 3.73211725757322e-08, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9833 + }, + { + "epoch": 0.9457133240371207, + "grad_norm": 2.3912465586251868, + "learning_rate": 3.718997139474767e-08, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9834 + }, + { + "epoch": 0.9458094917536183, + "grad_norm": 1.68047894245222, + "learning_rate": 3.705899950823655e-08, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9835 + }, + { + "epoch": 0.9459056594701158, + "grad_norm": 1.505311770925357, + "learning_rate": 3.692825692839325e-08, + "loss": 0.0916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9836 + }, + { + "epoch": 0.9460018271866134, + "grad_norm": 1.5165519449836566, + "learning_rate": 3.67977436673897e-08, + "loss": 0.0833, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9837 + }, + { + "epoch": 0.946097994903111, + "grad_norm": 2.3898954701281947, + "learning_rate": 3.6667459737377284e-08, + "loss": 0.1054, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9838 + }, + { + "epoch": 0.9461941626196086, + "grad_norm": 1.4104203526989956, + "learning_rate": 3.653740515048576e-08, + "loss": 0.0855, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9839 + }, + { + "epoch": 0.9462903303361062, + "grad_norm": 1.6177379808984653, + "learning_rate": 3.640757991882349e-08, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9840 + }, + { + "epoch": 0.9463864980526038, + "grad_norm": 1.6179726317812257, + "learning_rate": 3.627798405447774e-08, + "loss": 0.0876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9841 + }, + { + "epoch": 0.9464826657691013, + "grad_norm": 1.7115866459022695, + "learning_rate": 3.614861756951416e-08, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9842 + }, + { + "epoch": 0.9465788334855989, + "grad_norm": 2.1408751519216356, + "learning_rate": 3.601948047597698e-08, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9843 + }, + { + "epoch": 0.9466750012020965, + "grad_norm": 1.7273290116425117, + "learning_rate": 3.5890572785889645e-08, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9844 + }, + { + "epoch": 0.946771168918594, + "grad_norm": 2.2283426379238014, + "learning_rate": 3.576189451125339e-08, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9845 + }, + { + "epoch": 0.9468673366350916, + "grad_norm": 1.9058605266706705, + "learning_rate": 3.5633445664048636e-08, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9846 + }, + { + "epoch": 0.9469635043515892, + "grad_norm": 1.6829059395539934, + "learning_rate": 3.550522625623415e-08, + "loss": 0.1227, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9847 + }, + { + "epoch": 0.9470596720680867, + "grad_norm": 1.773040330553651, + "learning_rate": 3.5377236299748154e-08, + "loss": 0.0991, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9848 + }, + { + "epoch": 0.9471558397845843, + "grad_norm": 1.9335764850064039, + "learning_rate": 3.5249475806506394e-08, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9849 + }, + { + "epoch": 0.9472520075010818, + "grad_norm": 2.08438022883405, + "learning_rate": 3.512194478840353e-08, + "loss": 0.1256, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9850 + }, + { + "epoch": 0.9473481752175794, + "grad_norm": 2.7873794385418154, + "learning_rate": 3.499464325731339e-08, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9851 + }, + { + "epoch": 0.947444342934077, + "grad_norm": 1.7137422465915377, + "learning_rate": 3.486757122508816e-08, + "loss": 0.1115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9852 + }, + { + "epoch": 0.9475405106505747, + "grad_norm": 1.7615966196548007, + "learning_rate": 3.4740728703558377e-08, + "loss": 0.1079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9853 + }, + { + "epoch": 0.9476366783670722, + "grad_norm": 1.9290774184856534, + "learning_rate": 3.461411570453377e-08, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9854 + }, + { + "epoch": 0.9477328460835698, + "grad_norm": 1.7598630316086534, + "learning_rate": 3.4487732239801564e-08, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9855 + }, + { + "epoch": 0.9478290138000673, + "grad_norm": 3.9979956652098045, + "learning_rate": 3.43615783211293e-08, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9856 + }, + { + "epoch": 0.9479251815165649, + "grad_norm": 2.5770197503737573, + "learning_rate": 3.423565396026202e-08, + "loss": 0.1298, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9857 + }, + { + "epoch": 0.9480213492330625, + "grad_norm": 1.9960587335805944, + "learning_rate": 3.41099591689234e-08, + "loss": 0.1406, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9858 + }, + { + "epoch": 0.94811751694956, + "grad_norm": 9.639292211709297, + "learning_rate": 3.3984493958816034e-08, + "loss": 0.1052, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9859 + }, + { + "epoch": 0.9482136846660576, + "grad_norm": 1.633188949649865, + "learning_rate": 3.385925834162113e-08, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9860 + }, + { + "epoch": 0.9483098523825552, + "grad_norm": 1.7075386858452501, + "learning_rate": 3.37342523289988e-08, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9861 + }, + { + "epoch": 0.9484060200990527, + "grad_norm": 1.8407010174412362, + "learning_rate": 3.360947593258668e-08, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9862 + }, + { + "epoch": 0.9485021878155503, + "grad_norm": 2.2172628961201992, + "learning_rate": 3.348492916400242e-08, + "loss": 0.1306, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9863 + }, + { + "epoch": 0.9485983555320479, + "grad_norm": 1.6282131527352273, + "learning_rate": 3.3360612034841475e-08, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9864 + }, + { + "epoch": 0.9486945232485454, + "grad_norm": 1.7205330290809806, + "learning_rate": 3.323652455667847e-08, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9865 + }, + { + "epoch": 0.948790690965043, + "grad_norm": 2.2496421823249486, + "learning_rate": 3.3112666741065836e-08, + "loss": 0.1003, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9866 + }, + { + "epoch": 0.9488868586815407, + "grad_norm": 1.6533507402862395, + "learning_rate": 3.298903859953517e-08, + "loss": 0.0749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9867 + }, + { + "epoch": 0.9489830263980382, + "grad_norm": 1.55608932780515, + "learning_rate": 3.2865640143596446e-08, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9868 + }, + { + "epoch": 0.9490791941145358, + "grad_norm": 2.1073643686389043, + "learning_rate": 3.2742471384738793e-08, + "loss": 0.1077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9869 + }, + { + "epoch": 0.9491753618310333, + "grad_norm": 2.0285670909332545, + "learning_rate": 3.261953233442944e-08, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9870 + }, + { + "epoch": 0.9492715295475309, + "grad_norm": 1.6088569258992784, + "learning_rate": 3.249682300411422e-08, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9871 + }, + { + "epoch": 0.9493676972640285, + "grad_norm": 1.9247825958500915, + "learning_rate": 3.237434340521789e-08, + "loss": 0.1042, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9872 + }, + { + "epoch": 0.949463864980526, + "grad_norm": 2.0121004742594604, + "learning_rate": 3.2252093549143546e-08, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9873 + }, + { + "epoch": 0.9495600326970236, + "grad_norm": 1.7789844288150312, + "learning_rate": 3.2130073447272926e-08, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9874 + }, + { + "epoch": 0.9496562004135212, + "grad_norm": 1.3409526306357646, + "learning_rate": 3.200828311096638e-08, + "loss": 0.0752, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9875 + }, + { + "epoch": 0.9497523681300187, + "grad_norm": 2.6960816943686354, + "learning_rate": 3.188672255156316e-08, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9876 + }, + { + "epoch": 0.9498485358465163, + "grad_norm": 2.3134728623627314, + "learning_rate": 3.1765391780380906e-08, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9877 + }, + { + "epoch": 0.9499447035630139, + "grad_norm": 1.9864821232325085, + "learning_rate": 3.164429080871556e-08, + "loss": 0.1321, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9878 + }, + { + "epoch": 0.9500408712795114, + "grad_norm": 2.575908772879704, + "learning_rate": 3.152341964784228e-08, + "loss": 0.1193, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9879 + }, + { + "epoch": 0.950137038996009, + "grad_norm": 1.6006861703348565, + "learning_rate": 3.1402778309014284e-08, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9880 + }, + { + "epoch": 0.9502332067125067, + "grad_norm": 1.8007017996578671, + "learning_rate": 3.128236680346342e-08, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9881 + }, + { + "epoch": 0.9503293744290042, + "grad_norm": 3.6437284615326795, + "learning_rate": 3.116218514240099e-08, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9882 + }, + { + "epoch": 0.9504255421455018, + "grad_norm": 1.8264174364115986, + "learning_rate": 3.1042233337015834e-08, + "loss": 0.1205, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9883 + }, + { + "epoch": 0.9505217098619994, + "grad_norm": 1.7069946535030274, + "learning_rate": 3.092251139847568e-08, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9884 + }, + { + "epoch": 0.9506178775784969, + "grad_norm": 1.4220869135687948, + "learning_rate": 3.0803019337926895e-08, + "loss": 0.0865, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9885 + }, + { + "epoch": 0.9507140452949945, + "grad_norm": 1.9386602907114148, + "learning_rate": 3.0683757166495024e-08, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9886 + }, + { + "epoch": 0.950810213011492, + "grad_norm": 2.0844040614571355, + "learning_rate": 3.056472489528367e-08, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9887 + }, + { + "epoch": 0.9509063807279896, + "grad_norm": 2.2024581634413343, + "learning_rate": 3.0445922535374263e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9888 + }, + { + "epoch": 0.9510025484444872, + "grad_norm": 2.1016309178905406, + "learning_rate": 3.0327350097828504e-08, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9889 + }, + { + "epoch": 0.9510987161609847, + "grad_norm": 2.6210109008063203, + "learning_rate": 3.020900759368561e-08, + "loss": 0.1237, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9890 + }, + { + "epoch": 0.9511948838774823, + "grad_norm": 2.245967150470824, + "learning_rate": 3.009089503396345e-08, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9891 + }, + { + "epoch": 0.9512910515939799, + "grad_norm": 1.682884950992591, + "learning_rate": 2.99730124296585e-08, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9892 + }, + { + "epoch": 0.9513872193104774, + "grad_norm": 2.5969061952146033, + "learning_rate": 2.9855359791746144e-08, + "loss": 0.1516, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9893 + }, + { + "epoch": 0.951483387026975, + "grad_norm": 2.175118456590099, + "learning_rate": 2.9737937131180394e-08, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9894 + }, + { + "epoch": 0.9515795547434727, + "grad_norm": 1.926942161937013, + "learning_rate": 2.9620744458893068e-08, + "loss": 0.1063, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9895 + }, + { + "epoch": 0.9516757224599702, + "grad_norm": 1.8961291659576098, + "learning_rate": 2.9503781785795715e-08, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9896 + }, + { + "epoch": 0.9517718901764678, + "grad_norm": 2.031847957002783, + "learning_rate": 2.93870491227774e-08, + "loss": 0.1226, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9897 + }, + { + "epoch": 0.9518680578929654, + "grad_norm": 1.5664325829046744, + "learning_rate": 2.927054648070665e-08, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9898 + }, + { + "epoch": 0.9519642256094629, + "grad_norm": 1.9037862728708355, + "learning_rate": 2.9154273870430074e-08, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9899 + }, + { + "epoch": 0.9520603933259605, + "grad_norm": 1.6848508770949044, + "learning_rate": 2.903823130277289e-08, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9900 + }, + { + "epoch": 0.952156561042458, + "grad_norm": 2.6438032822443653, + "learning_rate": 2.8922418788538963e-08, + "loss": 0.119, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9901 + }, + { + "epoch": 0.9522527287589556, + "grad_norm": 1.4695428706549265, + "learning_rate": 2.880683633851078e-08, + "loss": 0.0934, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9902 + }, + { + "epoch": 0.9523488964754532, + "grad_norm": 2.1968500237167676, + "learning_rate": 2.869148396344945e-08, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9903 + }, + { + "epoch": 0.9524450641919507, + "grad_norm": 1.9467929831642945, + "learning_rate": 2.857636167409472e-08, + "loss": 0.1246, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9904 + }, + { + "epoch": 0.9525412319084483, + "grad_norm": 1.6576518839693941, + "learning_rate": 2.8461469481164682e-08, + "loss": 0.1105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9905 + }, + { + "epoch": 0.9526373996249459, + "grad_norm": 1.7196556560684417, + "learning_rate": 2.834680739535578e-08, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9906 + }, + { + "epoch": 0.9527335673414434, + "grad_norm": 1.6295972177965994, + "learning_rate": 2.8232375427343916e-08, + "loss": 0.0768, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9907 + }, + { + "epoch": 0.952829735057941, + "grad_norm": 1.9797430772719575, + "learning_rate": 2.8118173587782516e-08, + "loss": 0.1501, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9908 + }, + { + "epoch": 0.9529259027744387, + "grad_norm": 1.9573404465441628, + "learning_rate": 2.8004201887304737e-08, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9909 + }, + { + "epoch": 0.9530220704909362, + "grad_norm": 1.4994648448932295, + "learning_rate": 2.7890460336520987e-08, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9910 + }, + { + "epoch": 0.9531182382074338, + "grad_norm": 2.193455058408386, + "learning_rate": 2.7776948946021398e-08, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9911 + }, + { + "epoch": 0.9532144059239314, + "grad_norm": 1.9673432670152111, + "learning_rate": 2.766366772637391e-08, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9912 + }, + { + "epoch": 0.9533105736404289, + "grad_norm": 1.7182283612854314, + "learning_rate": 2.7550616688125642e-08, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9913 + }, + { + "epoch": 0.9534067413569265, + "grad_norm": 2.145901055111322, + "learning_rate": 2.7437795841801508e-08, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9914 + }, + { + "epoch": 0.953502909073424, + "grad_norm": 1.957467957007798, + "learning_rate": 2.732520519790588e-08, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9915 + }, + { + "epoch": 0.9535990767899216, + "grad_norm": 2.164464346092608, + "learning_rate": 2.7212844766920932e-08, + "loss": 0.11, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9916 + }, + { + "epoch": 0.9536952445064192, + "grad_norm": 1.6417791261080372, + "learning_rate": 2.710071455930802e-08, + "loss": 0.0954, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9917 + }, + { + "epoch": 0.9537914122229167, + "grad_norm": 2.7473316082333863, + "learning_rate": 2.6988814585506563e-08, + "loss": 0.0997, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9918 + }, + { + "epoch": 0.9538875799394143, + "grad_norm": 1.4853931872436246, + "learning_rate": 2.6877144855934624e-08, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9919 + }, + { + "epoch": 0.9539837476559119, + "grad_norm": 2.496278430211396, + "learning_rate": 2.676570538098944e-08, + "loss": 0.1142, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9920 + }, + { + "epoch": 0.9540799153724094, + "grad_norm": 1.6881653015893856, + "learning_rate": 2.665449617104604e-08, + "loss": 0.1037, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9921 + }, + { + "epoch": 0.954176083088907, + "grad_norm": 1.8045031172729908, + "learning_rate": 2.6543517236458096e-08, + "loss": 0.1308, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9922 + }, + { + "epoch": 0.9542722508054047, + "grad_norm": 1.8019680955975903, + "learning_rate": 2.6432768587558444e-08, + "loss": 0.1288, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9923 + }, + { + "epoch": 0.9543684185219022, + "grad_norm": 1.9627301155197672, + "learning_rate": 2.632225023465801e-08, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9924 + }, + { + "epoch": 0.9544645862383998, + "grad_norm": 1.5787506578605843, + "learning_rate": 2.6211962188046335e-08, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9925 + }, + { + "epoch": 0.9545607539548974, + "grad_norm": 2.2578956630338696, + "learning_rate": 2.6101904457991324e-08, + "loss": 0.1325, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9926 + }, + { + "epoch": 0.9546569216713949, + "grad_norm": 1.7282085768266935, + "learning_rate": 2.5992077054739772e-08, + "loss": 0.0949, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9927 + }, + { + "epoch": 0.9547530893878925, + "grad_norm": 2.1516004956831556, + "learning_rate": 2.5882479988517394e-08, + "loss": 0.1337, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9928 + }, + { + "epoch": 0.9548492571043901, + "grad_norm": 1.897176371463825, + "learning_rate": 2.577311326952714e-08, + "loss": 0.1109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9929 + }, + { + "epoch": 0.9549454248208876, + "grad_norm": 1.3613732719148104, + "learning_rate": 2.566397690795197e-08, + "loss": 0.0817, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9930 + }, + { + "epoch": 0.9550415925373852, + "grad_norm": 2.827210595669454, + "learning_rate": 2.5555070913952374e-08, + "loss": 0.1504, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9931 + }, + { + "epoch": 0.9551377602538828, + "grad_norm": 2.226258344851551, + "learning_rate": 2.544639529766829e-08, + "loss": 0.1362, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9932 + }, + { + "epoch": 0.9552339279703803, + "grad_norm": 1.9894922095291332, + "learning_rate": 2.5337950069217467e-08, + "loss": 0.1092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9933 + }, + { + "epoch": 0.9553300956868779, + "grad_norm": 1.5807305454843674, + "learning_rate": 2.5229735238696263e-08, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9934 + }, + { + "epoch": 0.9554262634033754, + "grad_norm": 2.067418553385924, + "learning_rate": 2.512175081617996e-08, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9935 + }, + { + "epoch": 0.955522431119873, + "grad_norm": 1.5171429509826158, + "learning_rate": 2.5013996811722175e-08, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9936 + }, + { + "epoch": 0.9556185988363707, + "grad_norm": 1.542331030804248, + "learning_rate": 2.4906473235355443e-08, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9937 + }, + { + "epoch": 0.9557147665528682, + "grad_norm": 1.5918035529397063, + "learning_rate": 2.4799180097089815e-08, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9938 + }, + { + "epoch": 0.9558109342693658, + "grad_norm": 1.862011416085569, + "learning_rate": 2.469211740691535e-08, + "loss": 0.1146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9939 + }, + { + "epoch": 0.9559071019858634, + "grad_norm": 1.7600556117627142, + "learning_rate": 2.4585285174799357e-08, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9940 + }, + { + "epoch": 0.9560032697023609, + "grad_norm": 1.6734916531918194, + "learning_rate": 2.447868341068832e-08, + "loss": 0.0935, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9941 + }, + { + "epoch": 0.9560994374188585, + "grad_norm": 1.8943014312832565, + "learning_rate": 2.4372312124507082e-08, + "loss": 0.1183, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9942 + }, + { + "epoch": 0.9561956051353561, + "grad_norm": 1.705511074800575, + "learning_rate": 2.4266171326159382e-08, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9943 + }, + { + "epoch": 0.9562917728518536, + "grad_norm": 1.7276287129754218, + "learning_rate": 2.416026102552732e-08, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9944 + }, + { + "epoch": 0.9563879405683512, + "grad_norm": 1.6317268706373984, + "learning_rate": 2.4054581232470785e-08, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9945 + }, + { + "epoch": 0.9564841082848488, + "grad_norm": 1.7314004020628, + "learning_rate": 2.3949131956829408e-08, + "loss": 0.1138, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9946 + }, + { + "epoch": 0.9565802760013463, + "grad_norm": 2.122986027843465, + "learning_rate": 2.384391320842061e-08, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9947 + }, + { + "epoch": 0.9566764437178439, + "grad_norm": 2.0268827232930864, + "learning_rate": 2.3738924997040726e-08, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9948 + }, + { + "epoch": 0.9567726114343414, + "grad_norm": 1.268864795483377, + "learning_rate": 2.3634167332464153e-08, + "loss": 0.0856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9949 + }, + { + "epoch": 0.956868779150839, + "grad_norm": 1.9501481635415954, + "learning_rate": 2.352964022444476e-08, + "loss": 0.1106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9950 + }, + { + "epoch": 0.9569649468673367, + "grad_norm": 1.7506975317272135, + "learning_rate": 2.3425343682713376e-08, + "loss": 0.0902, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9951 + }, + { + "epoch": 0.9570611145838342, + "grad_norm": 1.7544696938275184, + "learning_rate": 2.332127771698084e-08, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9952 + }, + { + "epoch": 0.9571572823003318, + "grad_norm": 1.8033546761346024, + "learning_rate": 2.321744233693607e-08, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9953 + }, + { + "epoch": 0.9572534500168294, + "grad_norm": 2.851468705590334, + "learning_rate": 2.3113837552246333e-08, + "loss": 0.1599, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9954 + }, + { + "epoch": 0.9573496177333269, + "grad_norm": 2.21135753055992, + "learning_rate": 2.3010463372557245e-08, + "loss": 0.1368, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9955 + }, + { + "epoch": 0.9574457854498245, + "grad_norm": 1.7263749875318084, + "learning_rate": 2.2907319807493612e-08, + "loss": 0.1276, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9956 + }, + { + "epoch": 0.9575419531663221, + "grad_norm": 1.592750763841115, + "learning_rate": 2.28044068666583e-08, + "loss": 0.0888, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9957 + }, + { + "epoch": 0.9576381208828196, + "grad_norm": 2.5899487874111005, + "learning_rate": 2.2701724559632542e-08, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9958 + }, + { + "epoch": 0.9577342885993172, + "grad_norm": 1.8556402397468712, + "learning_rate": 2.2599272895976466e-08, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9959 + }, + { + "epoch": 0.9578304563158148, + "grad_norm": 2.1975352671875097, + "learning_rate": 2.2497051885228825e-08, + "loss": 0.109, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9960 + }, + { + "epoch": 0.9579266240323123, + "grad_norm": 1.7443748270378983, + "learning_rate": 2.2395061536906735e-08, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9961 + }, + { + "epoch": 0.9580227917488099, + "grad_norm": 1.558378259691869, + "learning_rate": 2.2293301860505102e-08, + "loss": 0.1177, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9962 + }, + { + "epoch": 0.9581189594653075, + "grad_norm": 2.1195488839472407, + "learning_rate": 2.219177286549884e-08, + "loss": 0.1118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9963 + }, + { + "epoch": 0.958215127181805, + "grad_norm": 1.6005828897498653, + "learning_rate": 2.2090474561340113e-08, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9964 + }, + { + "epoch": 0.9583112948983027, + "grad_norm": 1.8038289772028955, + "learning_rate": 2.198940695745999e-08, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9965 + }, + { + "epoch": 0.9584074626148003, + "grad_norm": 2.031198256132762, + "learning_rate": 2.1888570063268723e-08, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9966 + }, + { + "epoch": 0.9585036303312978, + "grad_norm": 2.55511059066574, + "learning_rate": 2.1787963888154072e-08, + "loss": 0.1342, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9967 + }, + { + "epoch": 0.9585997980477954, + "grad_norm": 1.5501825067591604, + "learning_rate": 2.168758844148272e-08, + "loss": 0.0965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9968 + }, + { + "epoch": 0.958695965764293, + "grad_norm": 1.8045253744969754, + "learning_rate": 2.1587443732599966e-08, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9969 + }, + { + "epoch": 0.9587921334807905, + "grad_norm": 1.5614123264535633, + "learning_rate": 2.148752977082974e-08, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9970 + }, + { + "epoch": 0.9588883011972881, + "grad_norm": 1.512663904046806, + "learning_rate": 2.1387846565474047e-08, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9971 + }, + { + "epoch": 0.9589844689137856, + "grad_norm": 1.985898110261801, + "learning_rate": 2.1288394125813792e-08, + "loss": 0.0861, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9972 + }, + { + "epoch": 0.9590806366302832, + "grad_norm": 1.9683020102702107, + "learning_rate": 2.118917246110852e-08, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9973 + }, + { + "epoch": 0.9591768043467808, + "grad_norm": 1.9266181782746281, + "learning_rate": 2.1090181580595558e-08, + "loss": 0.1422, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9974 + }, + { + "epoch": 0.9592729720632783, + "grad_norm": 1.6777935067308143, + "learning_rate": 2.0991421493491425e-08, + "loss": 0.1174, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9975 + }, + { + "epoch": 0.9593691397797759, + "grad_norm": 2.2474602373513957, + "learning_rate": 2.089289220899099e-08, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9976 + }, + { + "epoch": 0.9594653074962735, + "grad_norm": 1.8582499163250272, + "learning_rate": 2.0794593736267744e-08, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9977 + }, + { + "epoch": 0.959561475212771, + "grad_norm": 1.8291233684633743, + "learning_rate": 2.0696526084473535e-08, + "loss": 0.0779, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9978 + }, + { + "epoch": 0.9596576429292687, + "grad_norm": 1.5033593270080032, + "learning_rate": 2.0598689262738557e-08, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9979 + }, + { + "epoch": 0.9597538106457663, + "grad_norm": 1.8251286192537086, + "learning_rate": 2.050108328017164e-08, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9980 + }, + { + "epoch": 0.9598499783622638, + "grad_norm": 1.7809160522052632, + "learning_rate": 2.040370814586051e-08, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9981 + }, + { + "epoch": 0.9599461460787614, + "grad_norm": 2.081955215043933, + "learning_rate": 2.0306563868870698e-08, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9982 + }, + { + "epoch": 0.960042313795259, + "grad_norm": 1.5073898360446432, + "learning_rate": 2.020965045824691e-08, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9983 + }, + { + "epoch": 0.9601384815117565, + "grad_norm": 2.107705762850501, + "learning_rate": 2.011296792301165e-08, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9984 + }, + { + "epoch": 0.9602346492282541, + "grad_norm": 1.8280497918887533, + "learning_rate": 2.0016516272166887e-08, + "loss": 0.1008, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9985 + }, + { + "epoch": 0.9603308169447516, + "grad_norm": 2.028383298637877, + "learning_rate": 1.99202955146921e-08, + "loss": 0.1245, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9986 + }, + { + "epoch": 0.9604269846612492, + "grad_norm": 1.729723943922753, + "learning_rate": 1.982430565954596e-08, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9987 + }, + { + "epoch": 0.9605231523777468, + "grad_norm": 2.223653420293686, + "learning_rate": 1.972854671566493e-08, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9988 + }, + { + "epoch": 0.9606193200942443, + "grad_norm": 4.03548345115448, + "learning_rate": 1.9633018691964935e-08, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9989 + }, + { + "epoch": 0.9607154878107419, + "grad_norm": 2.7396577080714084, + "learning_rate": 1.9537721597339966e-08, + "loss": 0.1326, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9990 + }, + { + "epoch": 0.9608116555272395, + "grad_norm": 3.0249324106076863, + "learning_rate": 1.9442655440662095e-08, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9991 + }, + { + "epoch": 0.960907823243737, + "grad_norm": 1.9978380608679251, + "learning_rate": 1.93478202307823e-08, + "loss": 0.1267, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9992 + }, + { + "epoch": 0.9610039909602347, + "grad_norm": 2.841214665393591, + "learning_rate": 1.9253215976530183e-08, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9993 + }, + { + "epoch": 0.9611001586767323, + "grad_norm": 1.4533307949877914, + "learning_rate": 1.915884268671342e-08, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9994 + }, + { + "epoch": 0.9611963263932298, + "grad_norm": 2.3973085393711426, + "learning_rate": 1.9064700370118593e-08, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9995 + }, + { + "epoch": 0.9612924941097274, + "grad_norm": 1.8465780067970368, + "learning_rate": 1.897078903551064e-08, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9996 + }, + { + "epoch": 0.961388661826225, + "grad_norm": 1.5063710807626345, + "learning_rate": 1.887710869163284e-08, + "loss": 0.0892, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9997 + }, + { + "epoch": 0.9614848295427225, + "grad_norm": 1.6763428507032143, + "learning_rate": 1.8783659347207383e-08, + "loss": 0.118, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9998 + }, + { + "epoch": 0.9615809972592201, + "grad_norm": 1.5844151726848466, + "learning_rate": 1.8690441010934258e-08, + "loss": 0.1145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 9999 + }, + { + "epoch": 0.9616771649757176, + "grad_norm": 2.194566579280202, + "learning_rate": 1.8597453691492628e-08, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10000 + }, + { + "epoch": 0.9617733326922152, + "grad_norm": 1.758477767552263, + "learning_rate": 1.850469739753974e-08, + "loss": 0.079, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10001 + }, + { + "epoch": 0.9618695004087128, + "grad_norm": 1.594079985220966, + "learning_rate": 1.841217213771146e-08, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10002 + }, + { + "epoch": 0.9619656681252103, + "grad_norm": 1.5073279276152969, + "learning_rate": 1.8319877920622288e-08, + "loss": 0.089, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10003 + }, + { + "epoch": 0.9620618358417079, + "grad_norm": 2.585841449324789, + "learning_rate": 1.822781475486507e-08, + "loss": 0.115, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10004 + }, + { + "epoch": 0.9621580035582055, + "grad_norm": 2.1149104379918375, + "learning_rate": 1.813598264901101e-08, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10005 + }, + { + "epoch": 0.962254171274703, + "grad_norm": 1.6721806003108197, + "learning_rate": 1.804438161160993e-08, + "loss": 0.092, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10006 + }, + { + "epoch": 0.9623503389912007, + "grad_norm": 2.2974748639929636, + "learning_rate": 1.7953011651190012e-08, + "loss": 0.0916, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10007 + }, + { + "epoch": 0.9624465067076983, + "grad_norm": 1.9978376568785428, + "learning_rate": 1.7861872776258617e-08, + "loss": 0.1206, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10008 + }, + { + "epoch": 0.9625426744241958, + "grad_norm": 1.5121995129865846, + "learning_rate": 1.7770964995300343e-08, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10009 + }, + { + "epoch": 0.9626388421406934, + "grad_norm": 1.779705409520184, + "learning_rate": 1.768028831677926e-08, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10010 + }, + { + "epoch": 0.962735009857191, + "grad_norm": 2.001474725945376, + "learning_rate": 1.7589842749137776e-08, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10011 + }, + { + "epoch": 0.9628311775736885, + "grad_norm": 2.017375767411723, + "learning_rate": 1.749962830079638e-08, + "loss": 0.1575, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10012 + }, + { + "epoch": 0.9629273452901861, + "grad_norm": 1.9301432724728234, + "learning_rate": 1.7409644980154462e-08, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10013 + }, + { + "epoch": 0.9630235130066837, + "grad_norm": 1.967374846977026, + "learning_rate": 1.7319892795589487e-08, + "loss": 0.098, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10014 + }, + { + "epoch": 0.9631196807231812, + "grad_norm": 2.055593478584145, + "learning_rate": 1.723037175545783e-08, + "loss": 0.1292, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10015 + }, + { + "epoch": 0.9632158484396788, + "grad_norm": 2.0110011121012112, + "learning_rate": 1.714108186809421e-08, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10016 + }, + { + "epoch": 0.9633120161561763, + "grad_norm": 1.7423213955087524, + "learning_rate": 1.7052023141811702e-08, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10017 + }, + { + "epoch": 0.9634081838726739, + "grad_norm": 1.568569232784416, + "learning_rate": 1.6963195584901728e-08, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10018 + }, + { + "epoch": 0.9635043515891715, + "grad_norm": 1.447580467308973, + "learning_rate": 1.687459920563461e-08, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10019 + }, + { + "epoch": 0.963600519305669, + "grad_norm": 1.8735479338718413, + "learning_rate": 1.678623401225876e-08, + "loss": 0.106, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10020 + }, + { + "epoch": 0.9636966870221667, + "grad_norm": 1.8311706979366065, + "learning_rate": 1.6698100013001205e-08, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10021 + }, + { + "epoch": 0.9637928547386643, + "grad_norm": 1.85936919275616, + "learning_rate": 1.6610197216067604e-08, + "loss": 0.1209, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10022 + }, + { + "epoch": 0.9638890224551618, + "grad_norm": 1.8400122365315172, + "learning_rate": 1.652252562964196e-08, + "loss": 0.113, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10023 + }, + { + "epoch": 0.9639851901716594, + "grad_norm": 1.7806305262005762, + "learning_rate": 1.643508526188692e-08, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10024 + }, + { + "epoch": 0.964081357888157, + "grad_norm": 1.9375792870554207, + "learning_rate": 1.6347876120942907e-08, + "loss": 0.1282, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10025 + }, + { + "epoch": 0.9641775256046545, + "grad_norm": 2.036683108205046, + "learning_rate": 1.6260898214929544e-08, + "loss": 0.1357, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10026 + }, + { + "epoch": 0.9642736933211521, + "grad_norm": 1.7695117930808706, + "learning_rate": 1.6174151551945073e-08, + "loss": 0.1039, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10027 + }, + { + "epoch": 0.9643698610376497, + "grad_norm": 2.3904306462566196, + "learning_rate": 1.608763614006553e-08, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10028 + }, + { + "epoch": 0.9644660287541472, + "grad_norm": 2.0283262238899953, + "learning_rate": 1.600135198734559e-08, + "loss": 0.0966, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10029 + }, + { + "epoch": 0.9645621964706448, + "grad_norm": 1.5783372524221597, + "learning_rate": 1.5915299101818826e-08, + "loss": 0.1179, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10030 + }, + { + "epoch": 0.9646583641871423, + "grad_norm": 1.8005804795218023, + "learning_rate": 1.5829477491496882e-08, + "loss": 0.0984, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10031 + }, + { + "epoch": 0.9647545319036399, + "grad_norm": 2.85408381178131, + "learning_rate": 1.574388716437003e-08, + "loss": 0.1474, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10032 + }, + { + "epoch": 0.9648506996201375, + "grad_norm": 2.2955021537524205, + "learning_rate": 1.565852812840718e-08, + "loss": 0.1424, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10033 + }, + { + "epoch": 0.964946867336635, + "grad_norm": 1.8350230581335851, + "learning_rate": 1.5573400391555027e-08, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10034 + }, + { + "epoch": 0.9650430350531327, + "grad_norm": 1.6594328485968992, + "learning_rate": 1.5488503961739454e-08, + "loss": 0.1126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10035 + }, + { + "epoch": 0.9651392027696303, + "grad_norm": 1.6877834235710545, + "learning_rate": 1.5403838846864694e-08, + "loss": 0.1062, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10036 + }, + { + "epoch": 0.9652353704861278, + "grad_norm": 1.6881759426608014, + "learning_rate": 1.5319405054813054e-08, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10037 + }, + { + "epoch": 0.9653315382026254, + "grad_norm": 2.2746030313617163, + "learning_rate": 1.523520259344574e-08, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10038 + }, + { + "epoch": 0.965427705919123, + "grad_norm": 1.7633954612141, + "learning_rate": 1.5151231470602045e-08, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10039 + }, + { + "epoch": 0.9655238736356205, + "grad_norm": 1.8506981179912707, + "learning_rate": 1.5067491694100156e-08, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10040 + }, + { + "epoch": 0.9656200413521181, + "grad_norm": 2.228283210083374, + "learning_rate": 1.4983983271736337e-08, + "loss": 0.0829, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10041 + }, + { + "epoch": 0.9657162090686157, + "grad_norm": 1.9399201666400336, + "learning_rate": 1.4900706211285198e-08, + "loss": 0.1085, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10042 + }, + { + "epoch": 0.9658123767851132, + "grad_norm": 1.9709988807619907, + "learning_rate": 1.4817660520500543e-08, + "loss": 0.1163, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10043 + }, + { + "epoch": 0.9659085445016108, + "grad_norm": 1.6175148290073842, + "learning_rate": 1.4734846207113962e-08, + "loss": 0.0938, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10044 + }, + { + "epoch": 0.9660047122181084, + "grad_norm": 1.6094570109532835, + "learning_rate": 1.4652263278835954e-08, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10045 + }, + { + "epoch": 0.9661008799346059, + "grad_norm": 1.9054099334277537, + "learning_rate": 1.4569911743354537e-08, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10046 + }, + { + "epoch": 0.9661970476511035, + "grad_norm": 2.038348367026533, + "learning_rate": 1.4487791608337465e-08, + "loss": 0.1248, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10047 + }, + { + "epoch": 0.966293215367601, + "grad_norm": 2.2087727780097928, + "learning_rate": 1.4405902881430289e-08, + "loss": 0.114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10048 + }, + { + "epoch": 0.9663893830840987, + "grad_norm": 2.243657748902123, + "learning_rate": 1.4324245570256634e-08, + "loss": 0.1334, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10049 + }, + { + "epoch": 0.9664855508005963, + "grad_norm": 1.5257538226075877, + "learning_rate": 1.4242819682419584e-08, + "loss": 0.1029, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10050 + }, + { + "epoch": 0.9665817185170938, + "grad_norm": 1.8118268426468278, + "learning_rate": 1.4161625225499743e-08, + "loss": 0.1139, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10051 + }, + { + "epoch": 0.9666778862335914, + "grad_norm": 1.6972873827379367, + "learning_rate": 1.4080662207056895e-08, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10052 + }, + { + "epoch": 0.966774053950089, + "grad_norm": 1.6001366040384917, + "learning_rate": 1.3999930634628622e-08, + "loss": 0.0995, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10053 + }, + { + "epoch": 0.9668702216665865, + "grad_norm": 1.5023000722531306, + "learning_rate": 1.3919430515731414e-08, + "loss": 0.1068, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10054 + }, + { + "epoch": 0.9669663893830841, + "grad_norm": 2.2453309261937546, + "learning_rate": 1.3839161857859829e-08, + "loss": 0.094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10055 + }, + { + "epoch": 0.9670625570995817, + "grad_norm": 1.479004013943654, + "learning_rate": 1.3759124668487057e-08, + "loss": 0.082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10056 + }, + { + "epoch": 0.9671587248160792, + "grad_norm": 1.556173680361669, + "learning_rate": 1.3679318955065468e-08, + "loss": 0.0912, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10057 + }, + { + "epoch": 0.9672548925325768, + "grad_norm": 1.9283174743507525, + "learning_rate": 1.35997447250244e-08, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10058 + }, + { + "epoch": 0.9673510602490744, + "grad_norm": 1.8037614201838772, + "learning_rate": 1.3520401985772646e-08, + "loss": 0.1084, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10059 + }, + { + "epoch": 0.9674472279655719, + "grad_norm": 1.8853470998200572, + "learning_rate": 1.3441290744697632e-08, + "loss": 0.1036, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10060 + }, + { + "epoch": 0.9675433956820695, + "grad_norm": 1.644677394453107, + "learning_rate": 1.33624110091643e-08, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10061 + }, + { + "epoch": 0.967639563398567, + "grad_norm": 1.6046911645992108, + "learning_rate": 1.3283762786517051e-08, + "loss": 0.1097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10062 + }, + { + "epoch": 0.9677357311150647, + "grad_norm": 1.523027440655364, + "learning_rate": 1.3205346084077808e-08, + "loss": 0.1136, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10063 + }, + { + "epoch": 0.9678318988315623, + "grad_norm": 1.8746090605344832, + "learning_rate": 1.3127160909147674e-08, + "loss": 0.1202, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10064 + }, + { + "epoch": 0.9679280665480599, + "grad_norm": 1.5775252340189645, + "learning_rate": 1.3049207269005825e-08, + "loss": 0.0911, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10065 + }, + { + "epoch": 0.9680242342645574, + "grad_norm": 2.191092018478297, + "learning_rate": 1.297148517091007e-08, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10066 + }, + { + "epoch": 0.968120401981055, + "grad_norm": 2.099880050658832, + "learning_rate": 1.2893994622096285e-08, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10067 + }, + { + "epoch": 0.9682165696975525, + "grad_norm": 2.4701845127910516, + "learning_rate": 1.2816735629779253e-08, + "loss": 0.1365, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10068 + }, + { + "epoch": 0.9683127374140501, + "grad_norm": 2.8490797493489715, + "learning_rate": 1.2739708201152112e-08, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10069 + }, + { + "epoch": 0.9684089051305477, + "grad_norm": 1.8007134066639907, + "learning_rate": 1.2662912343386069e-08, + "loss": 0.1087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10070 + }, + { + "epoch": 0.9685050728470452, + "grad_norm": 1.752093463937681, + "learning_rate": 1.2586348063630959e-08, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10071 + }, + { + "epoch": 0.9686012405635428, + "grad_norm": 1.89438237557001, + "learning_rate": 1.2510015369015527e-08, + "loss": 0.1557, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10072 + }, + { + "epoch": 0.9686974082800404, + "grad_norm": 2.163298299228873, + "learning_rate": 1.2433914266646307e-08, + "loss": 0.1581, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10073 + }, + { + "epoch": 0.9687935759965379, + "grad_norm": 2.044789705365836, + "learning_rate": 1.2358044763608745e-08, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10074 + }, + { + "epoch": 0.9688897437130355, + "grad_norm": 1.733462445728443, + "learning_rate": 1.2282406866966078e-08, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10075 + }, + { + "epoch": 0.968985911429533, + "grad_norm": 1.9561070240070786, + "learning_rate": 1.220700058376073e-08, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10076 + }, + { + "epoch": 0.9690820791460307, + "grad_norm": 1.7910691436332986, + "learning_rate": 1.2131825921012919e-08, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10077 + }, + { + "epoch": 0.9691782468625283, + "grad_norm": 2.1052363600181097, + "learning_rate": 1.205688288572232e-08, + "loss": 0.146, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10078 + }, + { + "epoch": 0.9692744145790259, + "grad_norm": 1.8305934360628975, + "learning_rate": 1.1982171484865579e-08, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10079 + }, + { + "epoch": 0.9693705822955234, + "grad_norm": 1.775331418675757, + "learning_rate": 1.1907691725398795e-08, + "loss": 0.0981, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10080 + }, + { + "epoch": 0.969466750012021, + "grad_norm": 1.816208277934731, + "learning_rate": 1.1833443614256423e-08, + "loss": 0.1304, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10081 + }, + { + "epoch": 0.9695629177285185, + "grad_norm": 2.1426582643348704, + "learning_rate": 1.1759427158350988e-08, + "loss": 0.121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10082 + }, + { + "epoch": 0.9696590854450161, + "grad_norm": 2.5704097933653993, + "learning_rate": 1.1685642364573646e-08, + "loss": 0.1398, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10083 + }, + { + "epoch": 0.9697552531615137, + "grad_norm": 1.5724696124647746, + "learning_rate": 1.16120892397939e-08, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10084 + }, + { + "epoch": 0.9698514208780112, + "grad_norm": 1.6214553846506627, + "learning_rate": 1.1538767790859885e-08, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10085 + }, + { + "epoch": 0.9699475885945088, + "grad_norm": 2.213563038194973, + "learning_rate": 1.1465678024598081e-08, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10086 + }, + { + "epoch": 0.9700437563110064, + "grad_norm": 1.674192173896842, + "learning_rate": 1.1392819947813328e-08, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10087 + }, + { + "epoch": 0.9701399240275039, + "grad_norm": 1.5523222232880614, + "learning_rate": 1.132019356728853e-08, + "loss": 0.0937, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10088 + }, + { + "epoch": 0.9702360917440015, + "grad_norm": 1.6541593899990015, + "learning_rate": 1.1247798889785777e-08, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10089 + }, + { + "epoch": 0.9703322594604991, + "grad_norm": 2.415256659529282, + "learning_rate": 1.1175635922045235e-08, + "loss": 0.1319, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10090 + }, + { + "epoch": 0.9704284271769967, + "grad_norm": 1.5573485998646532, + "learning_rate": 1.110370467078542e-08, + "loss": 0.1015, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10091 + }, + { + "epoch": 0.9705245948934943, + "grad_norm": 1.6735630532935475, + "learning_rate": 1.1032005142703195e-08, + "loss": 0.1132, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10092 + }, + { + "epoch": 0.9706207626099919, + "grad_norm": 1.7660056359924161, + "learning_rate": 1.0960537344474331e-08, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10093 + }, + { + "epoch": 0.9707169303264894, + "grad_norm": 1.4816073215802423, + "learning_rate": 1.0889301282752119e-08, + "loss": 0.0908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10094 + }, + { + "epoch": 0.970813098042987, + "grad_norm": 1.540110445011988, + "learning_rate": 1.0818296964169305e-08, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10095 + }, + { + "epoch": 0.9709092657594846, + "grad_norm": 2.4463080050832837, + "learning_rate": 1.0747524395336439e-08, + "loss": 0.1019, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10096 + }, + { + "epoch": 0.9710054334759821, + "grad_norm": 1.6260257566455478, + "learning_rate": 1.0676983582842693e-08, + "loss": 0.0964, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10097 + }, + { + "epoch": 0.9711016011924797, + "grad_norm": 1.6653549948788784, + "learning_rate": 1.060667453325559e-08, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10098 + }, + { + "epoch": 0.9711977689089772, + "grad_norm": 1.7667968910637368, + "learning_rate": 1.0536597253121006e-08, + "loss": 0.1238, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10099 + }, + { + "epoch": 0.9712939366254748, + "grad_norm": 1.7722654031284346, + "learning_rate": 1.0466751748963443e-08, + "loss": 0.1187, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10100 + }, + { + "epoch": 0.9713901043419724, + "grad_norm": 1.8499085151827752, + "learning_rate": 1.039713802728548e-08, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10101 + }, + { + "epoch": 0.9714862720584699, + "grad_norm": 3.5282675838831508, + "learning_rate": 1.0327756094568874e-08, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10102 + }, + { + "epoch": 0.9715824397749675, + "grad_norm": 2.0507571761290264, + "learning_rate": 1.0258605957272627e-08, + "loss": 0.097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10103 + }, + { + "epoch": 0.9716786074914651, + "grad_norm": 1.8916763991527288, + "learning_rate": 1.0189687621835198e-08, + "loss": 0.1352, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10104 + }, + { + "epoch": 0.9717747752079627, + "grad_norm": 1.8834523713626121, + "learning_rate": 1.0121001094673121e-08, + "loss": 0.0876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10105 + }, + { + "epoch": 0.9718709429244603, + "grad_norm": 2.2009992008142993, + "learning_rate": 1.0052546382181006e-08, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10106 + }, + { + "epoch": 0.9719671106409579, + "grad_norm": 2.5698090309997013, + "learning_rate": 9.98432349073264e-09, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10107 + }, + { + "epoch": 0.9720632783574554, + "grad_norm": 5.57061086171051, + "learning_rate": 9.916332426679054e-09, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10108 + }, + { + "epoch": 0.972159446073953, + "grad_norm": 1.624764327944915, + "learning_rate": 9.848573196351019e-09, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10109 + }, + { + "epoch": 0.9722556137904506, + "grad_norm": 2.7526205687177674, + "learning_rate": 9.7810458060571e-09, + "loss": 0.105, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10110 + }, + { + "epoch": 0.9723517815069481, + "grad_norm": 3.0543855466440877, + "learning_rate": 9.713750262083932e-09, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10111 + }, + { + "epoch": 0.9724479492234457, + "grad_norm": 2.1907377535938837, + "learning_rate": 9.646686570697062e-09, + "loss": 0.1491, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10112 + }, + { + "epoch": 0.9725441169399432, + "grad_norm": 2.296961826065514, + "learning_rate": 9.579854738140382e-09, + "loss": 0.135, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10113 + }, + { + "epoch": 0.9726402846564408, + "grad_norm": 1.5226828054704873, + "learning_rate": 9.513254770636138e-09, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10114 + }, + { + "epoch": 0.9727364523729384, + "grad_norm": 1.8078295687109407, + "learning_rate": 9.446886674384925e-09, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10115 + }, + { + "epoch": 0.9728326200894359, + "grad_norm": 2.5714930374076377, + "learning_rate": 9.38075045556569e-09, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10116 + }, + { + "epoch": 0.9729287878059335, + "grad_norm": 1.9264836476212388, + "learning_rate": 9.314846120336007e-09, + "loss": 0.133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10117 + }, + { + "epoch": 0.9730249555224311, + "grad_norm": 2.5337199481405985, + "learning_rate": 9.249173674831802e-09, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10118 + }, + { + "epoch": 0.9731211232389287, + "grad_norm": 2.923192751174087, + "learning_rate": 9.183733125167072e-09, + "loss": 0.1088, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10119 + }, + { + "epoch": 0.9732172909554263, + "grad_norm": 2.392049443566221, + "learning_rate": 9.118524477434999e-09, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10120 + }, + { + "epoch": 0.9733134586719239, + "grad_norm": 1.6144174018667783, + "learning_rate": 9.053547737706281e-09, + "loss": 0.1166, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10121 + }, + { + "epoch": 0.9734096263884214, + "grad_norm": 1.5281342644297693, + "learning_rate": 8.988802912030803e-09, + "loss": 0.077, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10122 + }, + { + "epoch": 0.973505794104919, + "grad_norm": 1.5825944295213639, + "learning_rate": 8.924290006435966e-09, + "loss": 0.1027, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10123 + }, + { + "epoch": 0.9736019618214166, + "grad_norm": 1.8436447689920272, + "learning_rate": 8.860009026928629e-09, + "loss": 0.1022, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10124 + }, + { + "epoch": 0.9736981295379141, + "grad_norm": 1.844149558276086, + "learning_rate": 8.795959979493174e-09, + "loss": 0.1168, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10125 + }, + { + "epoch": 0.9737942972544117, + "grad_norm": 1.4508825885192875, + "learning_rate": 8.732142870092886e-09, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10126 + }, + { + "epoch": 0.9738904649709093, + "grad_norm": 1.915403573165048, + "learning_rate": 8.668557704669123e-09, + "loss": 0.128, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10127 + }, + { + "epoch": 0.9739866326874068, + "grad_norm": 1.7993316156262764, + "learning_rate": 8.605204489142426e-09, + "loss": 0.1064, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10128 + }, + { + "epoch": 0.9740828004039044, + "grad_norm": 2.2311617844555562, + "learning_rate": 8.54208322941058e-09, + "loss": 0.145, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10129 + }, + { + "epoch": 0.974178968120402, + "grad_norm": 1.8615236891099338, + "learning_rate": 8.479193931350549e-09, + "loss": 0.1164, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10130 + }, + { + "epoch": 0.9742751358368995, + "grad_norm": 2.005994804261882, + "learning_rate": 8.416536600817372e-09, + "loss": 0.1371, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10131 + }, + { + "epoch": 0.9743713035533971, + "grad_norm": 1.5353551220039645, + "learning_rate": 8.354111243644713e-09, + "loss": 0.0925, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10132 + }, + { + "epoch": 0.9744674712698947, + "grad_norm": 1.787078922236061, + "learning_rate": 8.291917865644594e-09, + "loss": 0.0843, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10133 + }, + { + "epoch": 0.9745636389863923, + "grad_norm": 2.0832001160646447, + "learning_rate": 8.22995647260738e-09, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10134 + }, + { + "epoch": 0.9746598067028899, + "grad_norm": 1.6748329974257432, + "learning_rate": 8.168227070301793e-09, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10135 + }, + { + "epoch": 0.9747559744193874, + "grad_norm": 1.65243078639926, + "learning_rate": 8.106729664475178e-09, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10136 + }, + { + "epoch": 0.974852142135885, + "grad_norm": 2.150405102783579, + "learning_rate": 8.045464260852676e-09, + "loss": 0.0904, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10137 + }, + { + "epoch": 0.9749483098523826, + "grad_norm": 2.016034693928103, + "learning_rate": 7.984430865138892e-09, + "loss": 0.1607, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10138 + }, + { + "epoch": 0.9750444775688801, + "grad_norm": 1.831084554528756, + "learning_rate": 7.92362948301567e-09, + "loss": 0.1178, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10139 + }, + { + "epoch": 0.9751406452853777, + "grad_norm": 2.040153806087827, + "learning_rate": 7.863060120144316e-09, + "loss": 0.1116, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10140 + }, + { + "epoch": 0.9752368130018753, + "grad_norm": 2.4486330027363556, + "learning_rate": 7.802722782163375e-09, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10141 + }, + { + "epoch": 0.9753329807183728, + "grad_norm": 1.6698073742968151, + "learning_rate": 7.74261747469085e-09, + "loss": 0.1024, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10142 + }, + { + "epoch": 0.9754291484348704, + "grad_norm": 2.0741413324368647, + "learning_rate": 7.682744203322546e-09, + "loss": 0.0976, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10143 + }, + { + "epoch": 0.975525316151368, + "grad_norm": 1.5475368583733746, + "learning_rate": 7.62310297363289e-09, + "loss": 0.0979, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10144 + }, + { + "epoch": 0.9756214838678655, + "grad_norm": 1.956817903195199, + "learning_rate": 7.563693791174665e-09, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10145 + }, + { + "epoch": 0.9757176515843631, + "grad_norm": 1.5077237149537916, + "learning_rate": 7.504516661479e-09, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10146 + }, + { + "epoch": 0.9758138193008608, + "grad_norm": 2.7187713467041075, + "learning_rate": 7.4455715900556554e-09, + "loss": 0.0909, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10147 + }, + { + "epoch": 0.9759099870173583, + "grad_norm": 2.839088618960733, + "learning_rate": 7.386858582392187e-09, + "loss": 0.1413, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10148 + }, + { + "epoch": 0.9760061547338559, + "grad_norm": 1.922960361835494, + "learning_rate": 7.328377643955053e-09, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10149 + }, + { + "epoch": 0.9761023224503534, + "grad_norm": 1.676518939709684, + "learning_rate": 7.270128780189345e-09, + "loss": 0.1149, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10150 + }, + { + "epoch": 0.976198490166851, + "grad_norm": 2.3151610931779487, + "learning_rate": 7.212111996517668e-09, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10151 + }, + { + "epoch": 0.9762946578833486, + "grad_norm": 1.6123684398664337, + "learning_rate": 7.154327298342089e-09, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10152 + }, + { + "epoch": 0.9763908255998461, + "grad_norm": 1.9162145963701576, + "learning_rate": 7.0967746910419185e-09, + "loss": 0.1426, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10153 + }, + { + "epoch": 0.9764869933163437, + "grad_norm": 2.833286555763556, + "learning_rate": 7.039454179976202e-09, + "loss": 0.087, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10154 + }, + { + "epoch": 0.9765831610328413, + "grad_norm": 1.6228433811687937, + "learning_rate": 6.982365770480948e-09, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10155 + }, + { + "epoch": 0.9766793287493388, + "grad_norm": 3.476011170801557, + "learning_rate": 6.92550946787135e-09, + "loss": 0.1549, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10156 + }, + { + "epoch": 0.9767754964658364, + "grad_norm": 1.8489691997658748, + "learning_rate": 6.868885277441506e-09, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10157 + }, + { + "epoch": 0.976871664182334, + "grad_norm": 1.5968929777568925, + "learning_rate": 6.812493204462478e-09, + "loss": 0.1232, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10158 + }, + { + "epoch": 0.9769678318988315, + "grad_norm": 1.6660242985147535, + "learning_rate": 6.7563332541850655e-09, + "loss": 0.0876, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10159 + }, + { + "epoch": 0.9770639996153291, + "grad_norm": 1.7226407762449178, + "learning_rate": 6.700405431837587e-09, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10160 + }, + { + "epoch": 0.9771601673318268, + "grad_norm": 1.8594940882854265, + "learning_rate": 6.644709742627264e-09, + "loss": 0.13, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10161 + }, + { + "epoch": 0.9772563350483243, + "grad_norm": 1.6397565227397897, + "learning_rate": 6.589246191739674e-09, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10162 + }, + { + "epoch": 0.9773525027648219, + "grad_norm": 1.8583979593794304, + "learning_rate": 6.534014784338183e-09, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10163 + }, + { + "epoch": 0.9774486704813194, + "grad_norm": 1.6378957092724742, + "learning_rate": 6.4790155255653466e-09, + "loss": 0.0962, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10164 + }, + { + "epoch": 0.977544838197817, + "grad_norm": 2.1467879262539507, + "learning_rate": 6.42424842054179e-09, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10165 + }, + { + "epoch": 0.9776410059143146, + "grad_norm": 1.6413259306581, + "learning_rate": 6.369713474366213e-09, + "loss": 0.1058, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10166 + }, + { + "epoch": 0.9777371736308121, + "grad_norm": 1.68221924723652, + "learning_rate": 6.315410692115942e-09, + "loss": 0.0797, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10167 + }, + { + "epoch": 0.9778333413473097, + "grad_norm": 1.697152785163296, + "learning_rate": 6.2613400788472115e-09, + "loss": 0.1114, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10168 + }, + { + "epoch": 0.9779295090638073, + "grad_norm": 2.1373497145277094, + "learning_rate": 6.207501639593494e-09, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10169 + }, + { + "epoch": 0.9780256767803048, + "grad_norm": 2.5135809312759045, + "learning_rate": 6.153895379368003e-09, + "loss": 0.1247, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10170 + }, + { + "epoch": 0.9781218444968024, + "grad_norm": 2.6415609172110583, + "learning_rate": 6.100521303160911e-09, + "loss": 0.1219, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10171 + }, + { + "epoch": 0.9782180122133, + "grad_norm": 1.7003716655121857, + "learning_rate": 6.047379415941856e-09, + "loss": 0.09, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10172 + }, + { + "epoch": 0.9783141799297975, + "grad_norm": 1.7557431767725362, + "learning_rate": 5.994469722658547e-09, + "loss": 0.1086, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10173 + }, + { + "epoch": 0.9784103476462951, + "grad_norm": 1.9300880880524862, + "learning_rate": 5.941792228237042e-09, + "loss": 0.129, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10174 + }, + { + "epoch": 0.9785065153627928, + "grad_norm": 1.893350689606379, + "learning_rate": 5.889346937581475e-09, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10175 + }, + { + "epoch": 0.9786026830792903, + "grad_norm": 1.8788920177288027, + "learning_rate": 5.837133855574884e-09, + "loss": 0.1236, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10176 + }, + { + "epoch": 0.9786988507957879, + "grad_norm": 1.79400435653193, + "learning_rate": 5.785152987078379e-09, + "loss": 0.1464, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10177 + }, + { + "epoch": 0.9787950185122855, + "grad_norm": 1.8638326811349453, + "learning_rate": 5.7334043369314254e-09, + "loss": 0.1107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10178 + }, + { + "epoch": 0.978891186228783, + "grad_norm": 2.118655107331415, + "learning_rate": 5.681887909952388e-09, + "loss": 0.125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10179 + }, + { + "epoch": 0.9789873539452806, + "grad_norm": 1.6873390479674897, + "learning_rate": 5.6306037109371544e-09, + "loss": 0.112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10180 + }, + { + "epoch": 0.9790835216617781, + "grad_norm": 2.526707005024809, + "learning_rate": 5.5795517446605164e-09, + "loss": 0.0969, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10181 + }, + { + "epoch": 0.9791796893782757, + "grad_norm": 2.1140042550008613, + "learning_rate": 5.528732015875615e-09, + "loss": 0.1131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10182 + }, + { + "epoch": 0.9792758570947733, + "grad_norm": 1.794231063741747, + "learning_rate": 5.478144529313944e-09, + "loss": 0.1017, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10183 + }, + { + "epoch": 0.9793720248112708, + "grad_norm": 1.5178070390327545, + "learning_rate": 5.427789289685348e-09, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10184 + }, + { + "epoch": 0.9794681925277684, + "grad_norm": 1.6454697076125124, + "learning_rate": 5.377666301677742e-09, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10185 + }, + { + "epoch": 0.979564360244266, + "grad_norm": 2.0694791093069953, + "learning_rate": 5.3277755699582264e-09, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10186 + }, + { + "epoch": 0.9796605279607635, + "grad_norm": 1.4228825223632378, + "learning_rate": 5.27811709917142e-09, + "loss": 0.0749, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10187 + }, + { + "epoch": 0.9797566956772611, + "grad_norm": 2.6816884052001666, + "learning_rate": 5.228690893940569e-09, + "loss": 0.1385, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10188 + }, + { + "epoch": 0.9798528633937588, + "grad_norm": 1.6826219786123495, + "learning_rate": 5.1794969588678245e-09, + "loss": 0.1094, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10189 + }, + { + "epoch": 0.9799490311102563, + "grad_norm": 1.518792973525403, + "learning_rate": 5.130535298532857e-09, + "loss": 0.1049, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10190 + }, + { + "epoch": 0.9800451988267539, + "grad_norm": 2.2681293472344137, + "learning_rate": 5.081805917494243e-09, + "loss": 0.1255, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10191 + }, + { + "epoch": 0.9801413665432515, + "grad_norm": 1.4605683961730542, + "learning_rate": 5.033308820289185e-09, + "loss": 0.0908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10192 + }, + { + "epoch": 0.980237534259749, + "grad_norm": 1.8477165048222035, + "learning_rate": 4.985044011432128e-09, + "loss": 0.1032, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10193 + }, + { + "epoch": 0.9803337019762466, + "grad_norm": 1.7488888708885741, + "learning_rate": 4.937011495417532e-09, + "loss": 0.0961, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10194 + }, + { + "epoch": 0.9804298696927441, + "grad_norm": 1.8342319005597758, + "learning_rate": 4.889211276716544e-09, + "loss": 0.1259, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10195 + }, + { + "epoch": 0.9805260374092417, + "grad_norm": 1.6609781324320392, + "learning_rate": 4.841643359780324e-09, + "loss": 0.1283, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10196 + }, + { + "epoch": 0.9806222051257393, + "grad_norm": 1.7356297031585581, + "learning_rate": 4.7943077490369974e-09, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10197 + }, + { + "epoch": 0.9807183728422368, + "grad_norm": 2.035895505504051, + "learning_rate": 4.747204448893594e-09, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10198 + }, + { + "epoch": 0.9808145405587344, + "grad_norm": 1.922065352188388, + "learning_rate": 4.700333463735774e-09, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10199 + }, + { + "epoch": 0.980910708275232, + "grad_norm": 2.092353464451518, + "learning_rate": 4.653694797927544e-09, + "loss": 0.1005, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10200 + }, + { + "epoch": 0.9810068759917295, + "grad_norm": 3.0155970631059175, + "learning_rate": 4.607288455810987e-09, + "loss": 0.1669, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10201 + }, + { + "epoch": 0.9811030437082271, + "grad_norm": 1.5304566539962334, + "learning_rate": 4.56111444170626e-09, + "loss": 0.1137, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10202 + }, + { + "epoch": 0.9811992114247248, + "grad_norm": 1.5537627230481998, + "learning_rate": 4.515172759912978e-09, + "loss": 0.1124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10203 + }, + { + "epoch": 0.9812953791412223, + "grad_norm": 1.7908982952063413, + "learning_rate": 4.4694634147077195e-09, + "loss": 0.1161, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10204 + }, + { + "epoch": 0.9813915468577199, + "grad_norm": 1.8095038565839088, + "learning_rate": 4.423986410346526e-09, + "loss": 0.1095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10205 + }, + { + "epoch": 0.9814877145742175, + "grad_norm": 1.7010853806301391, + "learning_rate": 4.378741751063509e-09, + "loss": 0.1081, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10206 + }, + { + "epoch": 0.981583882290715, + "grad_norm": 1.7228108004210534, + "learning_rate": 4.333729441071133e-09, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10207 + }, + { + "epoch": 0.9816800500072126, + "grad_norm": 1.945238265109328, + "learning_rate": 4.288949484559934e-09, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10208 + }, + { + "epoch": 0.9817762177237102, + "grad_norm": 3.0150315740066755, + "learning_rate": 4.244401885698801e-09, + "loss": 0.1328, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10209 + }, + { + "epoch": 0.9818723854402077, + "grad_norm": 2.2719129559149125, + "learning_rate": 4.200086648635804e-09, + "loss": 0.108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10210 + }, + { + "epoch": 0.9819685531567053, + "grad_norm": 1.3320831472223391, + "learning_rate": 4.156003777496531e-09, + "loss": 0.0824, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10211 + }, + { + "epoch": 0.9820647208732028, + "grad_norm": 2.991658897190282, + "learning_rate": 4.112153276385477e-09, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10212 + }, + { + "epoch": 0.9821608885897004, + "grad_norm": 1.5886096023347247, + "learning_rate": 4.068535149384656e-09, + "loss": 0.0946, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10213 + }, + { + "epoch": 0.982257056306198, + "grad_norm": 1.923045759052226, + "learning_rate": 4.025149400555817e-09, + "loss": 0.1263, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10214 + }, + { + "epoch": 0.9823532240226955, + "grad_norm": 1.7007683872561676, + "learning_rate": 3.981996033937674e-09, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10215 + }, + { + "epoch": 0.9824493917391931, + "grad_norm": 1.6720282739020684, + "learning_rate": 3.939075053548125e-09, + "loss": 0.1034, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10216 + }, + { + "epoch": 0.9825455594556908, + "grad_norm": 1.5435869035487133, + "learning_rate": 3.896386463383139e-09, + "loss": 0.0959, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10217 + }, + { + "epoch": 0.9826417271721883, + "grad_norm": 2.1201918814201886, + "learning_rate": 3.853930267417316e-09, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10218 + }, + { + "epoch": 0.9827378948886859, + "grad_norm": 1.7282882155835073, + "learning_rate": 3.811706469603604e-09, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10219 + }, + { + "epoch": 0.9828340626051835, + "grad_norm": 1.965908603103737, + "learning_rate": 3.769715073872749e-09, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10220 + }, + { + "epoch": 0.982930230321681, + "grad_norm": 1.598073989046231, + "learning_rate": 3.727956084134398e-09, + "loss": 0.0999, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10221 + }, + { + "epoch": 0.9830263980381786, + "grad_norm": 2.374003951246323, + "learning_rate": 3.6864295042765563e-09, + "loss": 0.1262, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10222 + }, + { + "epoch": 0.9831225657546762, + "grad_norm": 1.9533830130469632, + "learning_rate": 3.6451353381652952e-09, + "loss": 0.1123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10223 + }, + { + "epoch": 0.9832187334711737, + "grad_norm": 1.7195273322369804, + "learning_rate": 3.6040735896455957e-09, + "loss": 0.1211, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10224 + }, + { + "epoch": 0.9833149011876713, + "grad_norm": 2.0477739872410843, + "learning_rate": 3.5632442625399554e-09, + "loss": 0.1201, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10225 + }, + { + "epoch": 0.9834110689041689, + "grad_norm": 1.8032281215285522, + "learning_rate": 3.522647360649778e-09, + "loss": 0.0865, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10226 + }, + { + "epoch": 0.9835072366206664, + "grad_norm": 2.098460685022426, + "learning_rate": 3.4822828877548175e-09, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10227 + }, + { + "epoch": 0.983603404337164, + "grad_norm": 1.5012209733041673, + "learning_rate": 3.442150847613457e-09, + "loss": 0.1056, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10228 + }, + { + "epoch": 0.9836995720536615, + "grad_norm": 1.631658149640939, + "learning_rate": 3.4022512439615963e-09, + "loss": 0.0998, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10229 + }, + { + "epoch": 0.9837957397701591, + "grad_norm": 1.8678135823780109, + "learning_rate": 3.362584080514042e-09, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10230 + }, + { + "epoch": 0.9838919074866568, + "grad_norm": 2.5149570789743887, + "learning_rate": 3.3231493609642284e-09, + "loss": 0.1066, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10231 + }, + { + "epoch": 0.9839880752031543, + "grad_norm": 1.783215308832597, + "learning_rate": 3.283947088983663e-09, + "loss": 0.096, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10232 + }, + { + "epoch": 0.9840842429196519, + "grad_norm": 2.273083436788094, + "learning_rate": 3.2449772682216495e-09, + "loss": 0.0905, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10233 + }, + { + "epoch": 0.9841804106361495, + "grad_norm": 3.0269817898727016, + "learning_rate": 3.206239902306951e-09, + "loss": 0.1159, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10234 + }, + { + "epoch": 0.984276578352647, + "grad_norm": 1.9067349414412182, + "learning_rate": 3.1677349948461277e-09, + "loss": 0.123, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10235 + }, + { + "epoch": 0.9843727460691446, + "grad_norm": 1.864457399547299, + "learning_rate": 3.129462549423534e-09, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10236 + }, + { + "epoch": 0.9844689137856422, + "grad_norm": 2.4248097764102097, + "learning_rate": 3.0914225696029865e-09, + "loss": 0.1133, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10237 + }, + { + "epoch": 0.9845650815021397, + "grad_norm": 1.7347962159864938, + "learning_rate": 3.0536150589258183e-09, + "loss": 0.1307, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10238 + }, + { + "epoch": 0.9846612492186373, + "grad_norm": 1.6146337277209792, + "learning_rate": 3.0160400209119922e-09, + "loss": 0.0941, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10239 + }, + { + "epoch": 0.9847574169351349, + "grad_norm": 2.17999487392423, + "learning_rate": 2.978697459060098e-09, + "loss": 0.1165, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10240 + }, + { + "epoch": 0.9848535846516324, + "grad_norm": 1.706431700772482, + "learning_rate": 2.9415873768467994e-09, + "loss": 0.097, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10241 + }, + { + "epoch": 0.98494975236813, + "grad_norm": 1.7089826985572272, + "learning_rate": 2.9047097777271106e-09, + "loss": 0.1152, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10242 + }, + { + "epoch": 0.9850459200846275, + "grad_norm": 1.8510366761900148, + "learning_rate": 2.8680646651341184e-09, + "loss": 0.1186, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10243 + }, + { + "epoch": 0.9851420878011251, + "grad_norm": 1.9327059581667962, + "learning_rate": 2.8316520424800933e-09, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10244 + }, + { + "epoch": 0.9852382555176228, + "grad_norm": 1.619596354256581, + "learning_rate": 2.7954719131548235e-09, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10245 + }, + { + "epoch": 0.9853344232341203, + "grad_norm": 2.4919268003846646, + "learning_rate": 2.7595242805267263e-09, + "loss": 0.1112, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10246 + }, + { + "epoch": 0.9854305909506179, + "grad_norm": 2.0243071432370012, + "learning_rate": 2.723809147942846e-09, + "loss": 0.1188, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10247 + }, + { + "epoch": 0.9855267586671155, + "grad_norm": 1.749641132995684, + "learning_rate": 2.6883265187283014e-09, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10248 + }, + { + "epoch": 0.985622926383613, + "grad_norm": 1.8907361445217445, + "learning_rate": 2.6530763961865603e-09, + "loss": 0.126, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10249 + }, + { + "epoch": 0.9857190941001106, + "grad_norm": 2.0955405588618445, + "learning_rate": 2.6180587835997195e-09, + "loss": 0.1203, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10250 + }, + { + "epoch": 0.9858152618166082, + "grad_norm": 2.1481212886792367, + "learning_rate": 2.583273684227672e-09, + "loss": 0.1381, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10251 + }, + { + "epoch": 0.9859114295331057, + "grad_norm": 1.6466275549225733, + "learning_rate": 2.548721101309215e-09, + "loss": 0.1195, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10252 + }, + { + "epoch": 0.9860075972496033, + "grad_norm": 1.6614801945315318, + "learning_rate": 2.5144010380612206e-09, + "loss": 0.1207, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10253 + }, + { + "epoch": 0.9861037649661009, + "grad_norm": 1.570285522918103, + "learning_rate": 2.4803134976791876e-09, + "loss": 0.0994, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10254 + }, + { + "epoch": 0.9861999326825984, + "grad_norm": 2.3095797377496674, + "learning_rate": 2.446458483336411e-09, + "loss": 0.1505, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10255 + }, + { + "epoch": 0.986296100399096, + "grad_norm": 1.7566390767873579, + "learning_rate": 2.412835998185092e-09, + "loss": 0.1296, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10256 + }, + { + "epoch": 0.9863922681155936, + "grad_norm": 2.457823057237324, + "learning_rate": 2.3794460453555046e-09, + "loss": 0.1317, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10257 + }, + { + "epoch": 0.9864884358320911, + "grad_norm": 2.0609474598732636, + "learning_rate": 2.34628862795655e-09, + "loss": 0.093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10258 + }, + { + "epoch": 0.9865846035485888, + "grad_norm": 1.8319665353909238, + "learning_rate": 2.3133637490752037e-09, + "loss": 0.1353, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10259 + }, + { + "epoch": 0.9866807712650864, + "grad_norm": 1.8266531361707659, + "learning_rate": 2.280671411776514e-09, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10260 + }, + { + "epoch": 0.9867769389815839, + "grad_norm": 1.697000681016517, + "learning_rate": 2.2482116191044345e-09, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10261 + }, + { + "epoch": 0.9868731066980815, + "grad_norm": 1.6794733354486209, + "learning_rate": 2.21598437408127e-09, + "loss": 0.1082, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10262 + }, + { + "epoch": 0.986969274414579, + "grad_norm": 1.8597534317041078, + "learning_rate": 2.1839896797073988e-09, + "loss": 0.0965, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10263 + }, + { + "epoch": 0.9870654421310766, + "grad_norm": 2.0989705155236367, + "learning_rate": 2.1522275389615487e-09, + "loss": 0.1108, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10264 + }, + { + "epoch": 0.9871616098475742, + "grad_norm": 1.6212210331133237, + "learning_rate": 2.120697954800799e-09, + "loss": 0.1035, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10265 + }, + { + "epoch": 0.9872577775640717, + "grad_norm": 2.064453526870436, + "learning_rate": 2.089400930160579e-09, + "loss": 0.1297, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10266 + }, + { + "epoch": 0.9873539452805693, + "grad_norm": 1.4789062874979728, + "learning_rate": 2.058336467954947e-09, + "loss": 0.0847, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10267 + }, + { + "epoch": 0.9874501129970669, + "grad_norm": 2.622011683557671, + "learning_rate": 2.027504571076033e-09, + "loss": 0.1329, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10268 + }, + { + "epoch": 0.9875462807135644, + "grad_norm": 1.9677230126894751, + "learning_rate": 1.996905242394043e-09, + "loss": 0.1184, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10269 + }, + { + "epoch": 0.987642448430062, + "grad_norm": 1.8712499461823813, + "learning_rate": 1.9665384847583622e-09, + "loss": 0.1261, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10270 + }, + { + "epoch": 0.9877386161465596, + "grad_norm": 1.48208410063533, + "learning_rate": 1.9364043009961755e-09, + "loss": 0.0823, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10271 + }, + { + "epoch": 0.9878347838630571, + "grad_norm": 2.4263098340338307, + "learning_rate": 1.9065026939127374e-09, + "loss": 0.1294, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10272 + }, + { + "epoch": 0.9879309515795548, + "grad_norm": 2.134850346855154, + "learning_rate": 1.876833666291933e-09, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10273 + }, + { + "epoch": 0.9880271192960524, + "grad_norm": 2.182820840788696, + "learning_rate": 1.8473972208962743e-09, + "loss": 0.1221, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10274 + }, + { + "epoch": 0.9881232870125499, + "grad_norm": 1.9359553673402916, + "learning_rate": 1.8181933604666246e-09, + "loss": 0.1257, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10275 + }, + { + "epoch": 0.9882194547290475, + "grad_norm": 1.8399879603252047, + "learning_rate": 1.7892220877213651e-09, + "loss": 0.1102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10276 + }, + { + "epoch": 0.988315622445545, + "grad_norm": 1.802154897258278, + "learning_rate": 1.7604834053580599e-09, + "loss": 0.1016, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10277 + }, + { + "epoch": 0.9884117901620426, + "grad_norm": 1.979187944408902, + "learning_rate": 1.7319773160526243e-09, + "loss": 0.1344, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10278 + }, + { + "epoch": 0.9885079578785402, + "grad_norm": 1.901157040609816, + "learning_rate": 1.7037038224584913e-09, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10279 + }, + { + "epoch": 0.9886041255950377, + "grad_norm": 1.576727681911159, + "learning_rate": 1.6756629272085545e-09, + "loss": 0.1143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10280 + }, + { + "epoch": 0.9887002933115353, + "grad_norm": 1.8322612636460502, + "learning_rate": 1.647854632913226e-09, + "loss": 0.1026, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10281 + }, + { + "epoch": 0.9887964610280329, + "grad_norm": 2.3101527519704015, + "learning_rate": 1.6202789421615461e-09, + "loss": 0.1222, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10282 + }, + { + "epoch": 0.9888926287445304, + "grad_norm": 2.447231064404267, + "learning_rate": 1.5929358575206277e-09, + "loss": 0.0944, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10283 + }, + { + "epoch": 0.988988796461028, + "grad_norm": 1.553570070283577, + "learning_rate": 1.5658253815367674e-09, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10284 + }, + { + "epoch": 0.9890849641775256, + "grad_norm": 1.9500741014664633, + "learning_rate": 1.5389475167337798e-09, + "loss": 0.131, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10285 + }, + { + "epoch": 0.9891811318940231, + "grad_norm": 1.4882592439476856, + "learning_rate": 1.5123022656141073e-09, + "loss": 0.0988, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10286 + }, + { + "epoch": 0.9892772996105208, + "grad_norm": 1.91935631700026, + "learning_rate": 1.485889630658266e-09, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10287 + }, + { + "epoch": 0.9893734673270184, + "grad_norm": 1.9040560775435933, + "learning_rate": 1.4597096143253996e-09, + "loss": 0.1243, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10288 + }, + { + "epoch": 0.9894696350435159, + "grad_norm": 1.617615390739929, + "learning_rate": 1.433762219053003e-09, + "loss": 0.1121, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10289 + }, + { + "epoch": 0.9895658027600135, + "grad_norm": 2.008439427775605, + "learning_rate": 1.4080474472569216e-09, + "loss": 0.1349, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10290 + }, + { + "epoch": 0.989661970476511, + "grad_norm": 1.6798812879603282, + "learning_rate": 1.3825653013313512e-09, + "loss": 0.1173, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10291 + }, + { + "epoch": 0.9897581381930086, + "grad_norm": 2.5559470418567893, + "learning_rate": 1.3573157836485605e-09, + "loss": 0.1271, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10292 + }, + { + "epoch": 0.9898543059095062, + "grad_norm": 2.0708384429079154, + "learning_rate": 1.3322988965594474e-09, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10293 + }, + { + "epoch": 0.9899504736260037, + "grad_norm": 1.7480808686892584, + "learning_rate": 1.3075146423932595e-09, + "loss": 0.0891, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10294 + }, + { + "epoch": 0.9900466413425013, + "grad_norm": 2.1298873316638103, + "learning_rate": 1.28296302345704e-09, + "loss": 0.1167, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10295 + }, + { + "epoch": 0.9901428090589989, + "grad_norm": 1.4993828129891604, + "learning_rate": 1.2586440420372936e-09, + "loss": 0.1043, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10296 + }, + { + "epoch": 0.9902389767754964, + "grad_norm": 1.8267886524011527, + "learning_rate": 1.2345577003974874e-09, + "loss": 0.0856, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10297 + }, + { + "epoch": 0.990335144491994, + "grad_norm": 2.1391468945171823, + "learning_rate": 1.2107040007805493e-09, + "loss": 0.1046, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10298 + }, + { + "epoch": 0.9904313122084916, + "grad_norm": 2.1820087320856114, + "learning_rate": 1.187082945407203e-09, + "loss": 0.1153, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10299 + }, + { + "epoch": 0.9905274799249891, + "grad_norm": 1.856892437279283, + "learning_rate": 1.1636945364768005e-09, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10300 + }, + { + "epoch": 0.9906236476414868, + "grad_norm": 1.5557259588319856, + "learning_rate": 1.1405387761664888e-09, + "loss": 0.1007, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10301 + }, + { + "epoch": 0.9907198153579844, + "grad_norm": 2.3034660549029984, + "learning_rate": 1.1176156666325989e-09, + "loss": 0.1384, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10302 + }, + { + "epoch": 0.9908159830744819, + "grad_norm": 2.181243149565534, + "learning_rate": 1.094925210008979e-09, + "loss": 0.1018, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10303 + }, + { + "epoch": 0.9909121507909795, + "grad_norm": 1.7801085886703165, + "learning_rate": 1.0724674084083841e-09, + "loss": 0.1045, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10304 + }, + { + "epoch": 0.9910083185074771, + "grad_norm": 2.1546901022586282, + "learning_rate": 1.0502422639216414e-09, + "loss": 0.1176, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10305 + }, + { + "epoch": 0.9911044862239746, + "grad_norm": 1.4397335572724288, + "learning_rate": 1.0282497786179292e-09, + "loss": 0.0945, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10306 + }, + { + "epoch": 0.9912006539404722, + "grad_norm": 2.1387640932242302, + "learning_rate": 1.0064899545444983e-09, + "loss": 0.1269, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10307 + }, + { + "epoch": 0.9912968216569698, + "grad_norm": 1.6399649396527065, + "learning_rate": 9.849627937280616e-10, + "loss": 0.0974, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10308 + }, + { + "epoch": 0.9913929893734673, + "grad_norm": 2.0415586144541127, + "learning_rate": 9.636682981720158e-10, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10309 + }, + { + "epoch": 0.9914891570899649, + "grad_norm": 1.57826582650163, + "learning_rate": 9.426064698594973e-10, + "loss": 0.1117, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10310 + }, + { + "epoch": 0.9915853248064624, + "grad_norm": 1.6916540879054767, + "learning_rate": 9.217773107511596e-10, + "loss": 0.1065, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10311 + }, + { + "epoch": 0.99168149252296, + "grad_norm": 1.723630354786572, + "learning_rate": 9.011808227865626e-10, + "loss": 0.107, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10312 + }, + { + "epoch": 0.9917776602394576, + "grad_norm": 1.655018921552952, + "learning_rate": 8.808170078827837e-10, + "loss": 0.12, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10313 + }, + { + "epoch": 0.9918738279559551, + "grad_norm": 1.621678681777036, + "learning_rate": 8.606858679360841e-10, + "loss": 0.0967, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10314 + }, + { + "epoch": 0.9919699956724528, + "grad_norm": 1.6480724401113007, + "learning_rate": 8.4078740482052e-10, + "loss": 0.0956, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10315 + }, + { + "epoch": 0.9920661633889504, + "grad_norm": 1.8994817795848267, + "learning_rate": 8.211216203890538e-10, + "loss": 0.1322, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10316 + }, + { + "epoch": 0.9921623311054479, + "grad_norm": 1.6776715112910348, + "learning_rate": 8.016885164721655e-10, + "loss": 0.1021, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10317 + }, + { + "epoch": 0.9922584988219455, + "grad_norm": 1.7650239599182755, + "learning_rate": 7.824880948795188e-10, + "loss": 0.103, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10318 + }, + { + "epoch": 0.9923546665384431, + "grad_norm": 1.7137647866534742, + "learning_rate": 7.635203573985728e-10, + "loss": 0.1242, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10319 + }, + { + "epoch": 0.9924508342549406, + "grad_norm": 1.553182543428376, + "learning_rate": 7.447853057954146e-10, + "loss": 0.1001, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10320 + }, + { + "epoch": 0.9925470019714382, + "grad_norm": 1.8755582783605058, + "learning_rate": 7.262829418142047e-10, + "loss": 0.1239, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10321 + }, + { + "epoch": 0.9926431696879358, + "grad_norm": 2.048543494110024, + "learning_rate": 7.080132671774542e-10, + "loss": 0.122, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10322 + }, + { + "epoch": 0.9927393374044333, + "grad_norm": 1.6850059531168462, + "learning_rate": 6.899762835860246e-10, + "loss": 0.1051, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10323 + }, + { + "epoch": 0.9928355051209309, + "grad_norm": 1.7740166010971306, + "learning_rate": 6.721719927196835e-10, + "loss": 0.1154, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10324 + }, + { + "epoch": 0.9929316728374284, + "grad_norm": 1.9161338351872184, + "learning_rate": 6.546003962357161e-10, + "loss": 0.0939, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10325 + }, + { + "epoch": 0.993027840553926, + "grad_norm": 2.0996552055427933, + "learning_rate": 6.372614957700363e-10, + "loss": 0.1038, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10326 + }, + { + "epoch": 0.9931240082704236, + "grad_norm": 1.7195685231253945, + "learning_rate": 6.201552929371857e-10, + "loss": 0.1127, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10327 + }, + { + "epoch": 0.9932201759869211, + "grad_norm": 1.5306093032795258, + "learning_rate": 6.032817893297793e-10, + "loss": 0.101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10328 + }, + { + "epoch": 0.9933163437034188, + "grad_norm": 2.241019655565526, + "learning_rate": 5.866409865185052e-10, + "loss": 0.1457, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10329 + }, + { + "epoch": 0.9934125114199164, + "grad_norm": 1.8344048902174952, + "learning_rate": 5.702328860529571e-10, + "loss": 0.1223, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10330 + }, + { + "epoch": 0.9935086791364139, + "grad_norm": 1.8215080296165052, + "learning_rate": 5.540574894608019e-10, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10331 + }, + { + "epoch": 0.9936048468529115, + "grad_norm": 1.9979639288563669, + "learning_rate": 5.381147982475022e-10, + "loss": 0.1348, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10332 + }, + { + "epoch": 0.9937010145694091, + "grad_norm": 1.5968313778921783, + "learning_rate": 5.224048138979809e-10, + "loss": 0.1125, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10333 + }, + { + "epoch": 0.9937971822859066, + "grad_norm": 2.067143160524875, + "learning_rate": 5.069275378746796e-10, + "loss": 0.1191, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10334 + }, + { + "epoch": 0.9938933500024042, + "grad_norm": 1.4688588105672442, + "learning_rate": 4.916829716183901e-10, + "loss": 0.0869, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10335 + }, + { + "epoch": 0.9939895177189018, + "grad_norm": 1.781987050530134, + "learning_rate": 4.766711165488103e-10, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10336 + }, + { + "epoch": 0.9940856854353993, + "grad_norm": 1.8360331746789054, + "learning_rate": 4.6189197406315556e-10, + "loss": 0.1377, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10337 + }, + { + "epoch": 0.9941818531518969, + "grad_norm": 1.6898675658492612, + "learning_rate": 4.4734554553782505e-10, + "loss": 0.1339, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10338 + }, + { + "epoch": 0.9942780208683945, + "grad_norm": 2.1474212484477055, + "learning_rate": 4.3303183232673575e-10, + "loss": 0.1315, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10339 + }, + { + "epoch": 0.994374188584892, + "grad_norm": 1.7376795751369492, + "learning_rate": 4.1895083576271035e-10, + "loss": 0.1078, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10340 + }, + { + "epoch": 0.9944703563013896, + "grad_norm": 1.5569635702580351, + "learning_rate": 4.051025571566447e-10, + "loss": 0.0796, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10341 + }, + { + "epoch": 0.9945665240178871, + "grad_norm": 2.3614732269067042, + "learning_rate": 3.914869977980629e-10, + "loss": 0.143, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10342 + }, + { + "epoch": 0.9946626917343848, + "grad_norm": 3.272069040234644, + "learning_rate": 3.78104158954562e-10, + "loss": 0.1309, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10343 + }, + { + "epoch": 0.9947588594508824, + "grad_norm": 1.900321159134599, + "learning_rate": 3.6495404187181224e-10, + "loss": 0.1055, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10344 + }, + { + "epoch": 0.99485502716738, + "grad_norm": 1.9359107963391593, + "learning_rate": 3.5203664777438973e-10, + "loss": 0.1157, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10345 + }, + { + "epoch": 0.9949511948838775, + "grad_norm": 1.973660492370003, + "learning_rate": 3.39351977865221e-10, + "loss": 0.1453, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10346 + }, + { + "epoch": 0.9950473626003751, + "grad_norm": 1.8730138339757578, + "learning_rate": 3.2690003332475074e-10, + "loss": 0.1228, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10347 + }, + { + "epoch": 0.9951435303168726, + "grad_norm": 1.5782699626047216, + "learning_rate": 3.146808153123293e-10, + "loss": 0.1033, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10348 + }, + { + "epoch": 0.9952396980333702, + "grad_norm": 1.5317032214221555, + "learning_rate": 3.026943249659353e-10, + "loss": 0.104, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10349 + }, + { + "epoch": 0.9953358657498678, + "grad_norm": 2.3782866852037836, + "learning_rate": 2.909405634013429e-10, + "loss": 0.1359, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10350 + }, + { + "epoch": 0.9954320334663653, + "grad_norm": 2.5750289048525996, + "learning_rate": 2.79419531713232e-10, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10351 + }, + { + "epoch": 0.9955282011828629, + "grad_norm": 1.531403217492074, + "learning_rate": 2.681312309735229e-10, + "loss": 0.1044, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10352 + }, + { + "epoch": 0.9956243688993605, + "grad_norm": 1.721753375206467, + "learning_rate": 2.5707566223359683e-10, + "loss": 0.1252, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10353 + }, + { + "epoch": 0.995720536615858, + "grad_norm": 2.1010641994390733, + "learning_rate": 2.4625282652290803e-10, + "loss": 0.1472, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10354 + }, + { + "epoch": 0.9958167043323556, + "grad_norm": 2.0764386447147523, + "learning_rate": 2.35662724848984e-10, + "loss": 0.1175, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10355 + }, + { + "epoch": 0.9959128720488531, + "grad_norm": 1.5454375538365648, + "learning_rate": 2.2530535819742515e-10, + "loss": 0.0933, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10356 + }, + { + "epoch": 0.9960090397653508, + "grad_norm": 1.9776220965131521, + "learning_rate": 2.1518072753301532e-10, + "loss": 0.0971, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10357 + }, + { + "epoch": 0.9961052074818484, + "grad_norm": 2.0685198649964547, + "learning_rate": 2.0528883379833387e-10, + "loss": 0.1181, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10358 + }, + { + "epoch": 0.996201375198346, + "grad_norm": 1.7913441456905106, + "learning_rate": 1.956296779140332e-10, + "loss": 0.134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10359 + }, + { + "epoch": 0.9962975429148435, + "grad_norm": 1.4948369341271344, + "learning_rate": 1.8620326077967155e-10, + "loss": 0.0901, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10360 + }, + { + "epoch": 0.9963937106313411, + "grad_norm": 1.5798713737786532, + "learning_rate": 1.770095832728802e-10, + "loss": 0.0908, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10361 + }, + { + "epoch": 0.9964898783478386, + "grad_norm": 1.8517105130798064, + "learning_rate": 1.6804864624936356e-10, + "loss": 0.1258, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10362 + }, + { + "epoch": 0.9965860460643362, + "grad_norm": 1.4203580943586571, + "learning_rate": 1.593204505437318e-10, + "loss": 0.0883, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10363 + }, + { + "epoch": 0.9966822137808338, + "grad_norm": 1.6255529331151657, + "learning_rate": 1.508249969683906e-10, + "loss": 0.0932, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10364 + }, + { + "epoch": 0.9967783814973313, + "grad_norm": 1.4919142241190402, + "learning_rate": 1.4256228631437385e-10, + "loss": 0.102, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10365 + }, + { + "epoch": 0.9968745492138289, + "grad_norm": 1.7801814120668775, + "learning_rate": 1.3453231935078858e-10, + "loss": 0.0963, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10366 + }, + { + "epoch": 0.9969707169303265, + "grad_norm": 1.9107752168226384, + "learning_rate": 1.2673509682564754e-10, + "loss": 0.0987, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10367 + }, + { + "epoch": 0.997066884646824, + "grad_norm": 2.045908716401278, + "learning_rate": 1.191706194644815e-10, + "loss": 0.1158, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10368 + }, + { + "epoch": 0.9971630523633216, + "grad_norm": 1.7262410843164735, + "learning_rate": 1.1183888797172693e-10, + "loss": 0.111, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10369 + }, + { + "epoch": 0.9972592200798192, + "grad_norm": 2.0759271289133125, + "learning_rate": 1.04739903030171e-10, + "loss": 0.1171, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10370 + }, + { + "epoch": 0.9973553877963168, + "grad_norm": 1.4804252142533616, + "learning_rate": 9.787366530039644e-11, + "loss": 0.0848, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10371 + }, + { + "epoch": 0.9974515555128144, + "grad_norm": 1.693003844707916, + "learning_rate": 9.124017542189168e-11, + "loss": 0.1076, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10372 + }, + { + "epoch": 0.997547723229312, + "grad_norm": 1.5509843412336939, + "learning_rate": 8.483943401221828e-11, + "loss": 0.1074, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10373 + }, + { + "epoch": 0.9976438909458095, + "grad_norm": 2.024868720269295, + "learning_rate": 7.867144166728846e-11, + "loss": 0.1338, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10374 + }, + { + "epoch": 0.9977400586623071, + "grad_norm": 2.2562190361724404, + "learning_rate": 7.273619896136508e-11, + "loss": 0.1361, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10375 + }, + { + "epoch": 0.9978362263788046, + "grad_norm": 1.7840884087034738, + "learning_rate": 6.703370644706165e-11, + "loss": 0.0922, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10376 + }, + { + "epoch": 0.9979323940953022, + "grad_norm": 1.6133239953053464, + "learning_rate": 6.156396465506475e-11, + "loss": 0.1172, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10377 + }, + { + "epoch": 0.9980285618117998, + "grad_norm": 1.3837640549660213, + "learning_rate": 5.632697409496679e-11, + "loss": 0.0847, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10378 + }, + { + "epoch": 0.9981247295282973, + "grad_norm": 1.8386797994782382, + "learning_rate": 5.1322735254155656e-11, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10379 + }, + { + "epoch": 0.9982208972447949, + "grad_norm": 1.5175627185741967, + "learning_rate": 4.6551248598647457e-11, + "loss": 0.1022, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10380 + }, + { + "epoch": 0.9983170649612925, + "grad_norm": 1.7902332815508188, + "learning_rate": 4.2012514572531416e-11, + "loss": 0.1041, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10381 + }, + { + "epoch": 0.99841323267779, + "grad_norm": 1.7640970925509434, + "learning_rate": 3.7706533598524944e-11, + "loss": 0.1134, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10382 + }, + { + "epoch": 0.9985094003942876, + "grad_norm": 1.7943922043368137, + "learning_rate": 3.3633306077696105e-11, + "loss": 0.1198, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10383 + }, + { + "epoch": 0.9986055681107852, + "grad_norm": 1.674528621180003, + "learning_rate": 2.979283238863095e-11, + "loss": 0.0881, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10384 + }, + { + "epoch": 0.9987017358272828, + "grad_norm": 1.931143532778631, + "learning_rate": 2.6185112889653976e-11, + "loss": 0.1214, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10385 + }, + { + "epoch": 0.9987979035437804, + "grad_norm": 2.043487942658006, + "learning_rate": 2.2810147916052517e-11, + "loss": 0.1235, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10386 + }, + { + "epoch": 0.998894071260278, + "grad_norm": 2.25320420414659, + "learning_rate": 1.9667937782297253e-11, + "loss": 0.124, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10387 + }, + { + "epoch": 0.9989902389767755, + "grad_norm": 2.8317937081863644, + "learning_rate": 1.6758482781209505e-11, + "loss": 0.1244, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10388 + }, + { + "epoch": 0.9990864066932731, + "grad_norm": 2.728123492480274, + "learning_rate": 1.4081783183128584e-11, + "loss": 0.1073, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10389 + }, + { + "epoch": 0.9991825744097707, + "grad_norm": 1.7603954607657681, + "learning_rate": 1.1637839237577109e-11, + "loss": 0.1101, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10390 + }, + { + "epoch": 0.9992787421262682, + "grad_norm": 1.9238704944998937, + "learning_rate": 9.426651171873247e-12, + "loss": 0.1295, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10391 + }, + { + "epoch": 0.9993749098427658, + "grad_norm": 1.7822760391466883, + "learning_rate": 7.448219192240923e-12, + "loss": 0.1028, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10392 + }, + { + "epoch": 0.9994710775592633, + "grad_norm": 1.6993437326616545, + "learning_rate": 5.702543482699607e-12, + "loss": 0.1093, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10393 + }, + { + "epoch": 0.9995672452757609, + "grad_norm": 1.7346984099329896, + "learning_rate": 4.189624205896969e-12, + "loss": 0.1314, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10394 + }, + { + "epoch": 0.9996634129922585, + "grad_norm": 2.1123939315149918, + "learning_rate": 2.909461502276223e-12, + "loss": 0.095, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10395 + }, + { + "epoch": 0.999759580708756, + "grad_norm": 1.8219599124675487, + "learning_rate": 1.862055491463899e-12, + "loss": 0.1053, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10396 + }, + { + "epoch": 0.9998557484252536, + "grad_norm": 1.9427353310910211, + "learning_rate": 1.0474062708820677e-12, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10397 + }, + { + "epoch": 0.9999519161417512, + "grad_norm": 2.068876305137086, + "learning_rate": 4.655139163034505e-13, + "loss": 0.1301, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10398 + }, + { + "epoch": 1.0, + "grad_norm": 1.4204671300448284, + "learning_rate": 1.163784818514202e-13, + "loss": 0.0652, + "memory/device_mem_reserved(gib)": 134.04, + "memory/max_mem_active(gib)": 79.48, + "memory/max_mem_allocated(gib)": 79.48, + "step": 10399 + } + ], + "logging_steps": 1, + "max_steps": 10399, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.898032976022733e+16, + "train_batch_size": 5, + "trial_name": null, + "trial_params": null +}