{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 635, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.039473684210526314, "grad_norm": 2.7101221158705315, "learning_rate": 2.5e-06, "loss": 0.2752, "loss_nan_ranks": 0, "loss_rank_avg": 0.08712838590145111, "step": 5, "valid_targets_mean": 1098.3, "valid_targets_min": 183 }, { "epoch": 0.07894736842105263, "grad_norm": 0.6720046606073936, "learning_rate": 5.625e-06, "loss": 0.2439, "loss_nan_ranks": 0, "loss_rank_avg": 0.07804884016513824, "step": 10, "valid_targets_mean": 1631.9, "valid_targets_min": 151 }, { "epoch": 0.11842105263157894, "grad_norm": 0.6437435853139324, "learning_rate": 8.750000000000001e-06, "loss": 0.2391, "loss_nan_ranks": 0, "loss_rank_avg": 0.07761339098215103, "step": 15, "valid_targets_mean": 1372.2, "valid_targets_min": 154 }, { "epoch": 0.15789473684210525, "grad_norm": 0.4983040471881677, "learning_rate": 1.1875e-05, "loss": 0.2225, "loss_nan_ranks": 0, "loss_rank_avg": 0.07829396426677704, "step": 20, "valid_targets_mean": 1649.2, "valid_targets_min": 180 }, { "epoch": 0.19736842105263158, "grad_norm": 0.5111850966892129, "learning_rate": 1.5000000000000002e-05, "loss": 0.217, "loss_nan_ranks": 0, "loss_rank_avg": 0.06428208202123642, "step": 25, "valid_targets_mean": 1005.1, "valid_targets_min": 143 }, { "epoch": 0.23684210526315788, "grad_norm": 0.4558264344788375, "learning_rate": 1.8125e-05, "loss": 0.205, "loss_nan_ranks": 0, "loss_rank_avg": 0.06763358414173126, "step": 30, "valid_targets_mean": 1172.6, "valid_targets_min": 241 }, { "epoch": 0.27631578947368424, "grad_norm": 0.40445710122340867, "learning_rate": 2.125e-05, "loss": 0.2243, "loss_nan_ranks": 0, "loss_rank_avg": 0.06543241441249847, "step": 35, "valid_targets_mean": 1309.4, "valid_targets_min": 196 }, { "epoch": 0.3157894736842105, "grad_norm": 0.4705113895066734, "learning_rate": 2.4375000000000003e-05, "loss": 0.203, "loss_nan_ranks": 0, "loss_rank_avg": 0.0628533810377121, "step": 40, "valid_targets_mean": 1237.4, "valid_targets_min": 158 }, { "epoch": 0.35526315789473684, "grad_norm": 0.4809178508506929, "learning_rate": 2.75e-05, "loss": 0.1997, "loss_nan_ranks": 0, "loss_rank_avg": 0.12881824374198914, "step": 45, "valid_targets_mean": 2262.9, "valid_targets_min": 188 }, { "epoch": 0.39473684210526316, "grad_norm": 0.435491062747422, "learning_rate": 3.0625000000000006e-05, "loss": 0.1805, "loss_nan_ranks": 0, "loss_rank_avg": 0.04893481358885765, "step": 50, "valid_targets_mean": 1406.6, "valid_targets_min": 183 }, { "epoch": 0.4342105263157895, "grad_norm": 0.4451767610433573, "learning_rate": 3.375e-05, "loss": 0.1679, "loss_nan_ranks": 0, "loss_rank_avg": 0.03604075312614441, "step": 55, "valid_targets_mean": 1128.7, "valid_targets_min": 174 }, { "epoch": 0.47368421052631576, "grad_norm": 0.6219574923141601, "learning_rate": 3.6875e-05, "loss": 0.1687, "loss_nan_ranks": 0, "loss_rank_avg": 0.053396932780742645, "step": 60, "valid_targets_mean": 1284.9, "valid_targets_min": 164 }, { "epoch": 0.5131578947368421, "grad_norm": 0.5221002170864094, "learning_rate": 4e-05, "loss": 0.1636, "loss_nan_ranks": 0, "loss_rank_avg": 0.04181867092847824, "step": 65, "valid_targets_mean": 1188.7, "valid_targets_min": 148 }, { "epoch": 0.5526315789473685, "grad_norm": 0.6639765625439825, "learning_rate": 3.9992432713989036e-05, "loss": 0.1421, "loss_nan_ranks": 0, "loss_rank_avg": 0.04784128814935684, "step": 70, "valid_targets_mean": 1187.2, "valid_targets_min": 177 }, { "epoch": 0.5921052631578947, "grad_norm": 0.46259251697351567, "learning_rate": 3.9969736582337885e-05, "loss": 0.1484, "loss_nan_ranks": 0, "loss_rank_avg": 0.034555964171886444, "step": 75, "valid_targets_mean": 1227.6, "valid_targets_min": 157 }, { "epoch": 0.631578947368421, "grad_norm": 0.5546889551420198, "learning_rate": 3.9931928779858504e-05, "loss": 0.1488, "loss_nan_ranks": 0, "loss_rank_avg": 0.05153804272413254, "step": 80, "valid_targets_mean": 1633.4, "valid_targets_min": 178 }, { "epoch": 0.6710526315789473, "grad_norm": 0.4873817524684509, "learning_rate": 3.987903791679637e-05, "loss": 0.1313, "loss_nan_ranks": 0, "loss_rank_avg": 0.04136563092470169, "step": 85, "valid_targets_mean": 1061.1, "valid_targets_min": 182 }, { "epoch": 0.7105263157894737, "grad_norm": 0.47560875777139394, "learning_rate": 3.981110401718031e-05, "loss": 0.1181, "loss_nan_ranks": 0, "loss_rank_avg": 0.03862156718969345, "step": 90, "valid_targets_mean": 1340.9, "valid_targets_min": 162 }, { "epoch": 0.75, "grad_norm": 0.5472764400469118, "learning_rate": 3.972817848853514e-05, "loss": 0.1243, "loss_nan_ranks": 0, "loss_rank_avg": 0.04823871701955795, "step": 95, "valid_targets_mean": 1437.3, "valid_targets_min": 172 }, { "epoch": 0.7894736842105263, "grad_norm": 0.49828554280744025, "learning_rate": 3.963032408298016e-05, "loss": 0.109, "loss_nan_ranks": 0, "loss_rank_avg": 0.04446114972233772, "step": 100, "valid_targets_mean": 1533.0, "valid_targets_min": 163 }, { "epoch": 0.8289473684210527, "grad_norm": 0.5878611883389838, "learning_rate": 3.951761484974277e-05, "loss": 0.1074, "loss_nan_ranks": 0, "loss_rank_avg": 0.02875318005681038, "step": 105, "valid_targets_mean": 977.9, "valid_targets_min": 149 }, { "epoch": 0.868421052631579, "grad_norm": 0.4384622912476601, "learning_rate": 3.939013607912339e-05, "loss": 0.1009, "loss_nan_ranks": 0, "loss_rank_avg": 0.027352716773748398, "step": 110, "valid_targets_mean": 1062.1, "valid_targets_min": 174 }, { "epoch": 0.9078947368421053, "grad_norm": 0.7358865371320961, "learning_rate": 3.924798423795378e-05, "loss": 0.0958, "loss_nan_ranks": 0, "loss_rank_avg": 0.03918096423149109, "step": 115, "valid_targets_mean": 1641.5, "valid_targets_min": 160 }, { "epoch": 0.9473684210526315, "grad_norm": 0.480301320941627, "learning_rate": 3.9091266896597836e-05, "loss": 0.0892, "loss_nan_ranks": 0, "loss_rank_avg": 0.029040634632110596, "step": 120, "valid_targets_mean": 1399.7, "valid_targets_min": 174 }, { "epoch": 0.9868421052631579, "grad_norm": 0.5014218049820629, "learning_rate": 3.892010264755007e-05, "loss": 0.0853, "loss_nan_ranks": 0, "loss_rank_avg": 0.029846064746379852, "step": 125, "valid_targets_mean": 1298.8, "valid_targets_min": 209 }, { "epoch": 1.0236842105263158, "grad_norm": 0.560533198633975, "learning_rate": 3.87346210156932e-05, "loss": 0.0855, "loss_nan_ranks": 0, "loss_rank_avg": 0.01870962604880333, "step": 130, "valid_targets_mean": 1233.2, "valid_targets_min": 155 }, { "epoch": 1.063157894736842, "grad_norm": 0.44081672680392586, "learning_rate": 3.853496236028305e-05, "loss": 0.0716, "loss_nan_ranks": 0, "loss_rank_avg": 0.019279945641756058, "step": 135, "valid_targets_mean": 1207.5, "valid_targets_min": 144 }, { "epoch": 1.1026315789473684, "grad_norm": 0.43522763684111954, "learning_rate": 3.8321277768734616e-05, "loss": 0.0644, "loss_nan_ranks": 0, "loss_rank_avg": 0.020765936002135277, "step": 140, "valid_targets_mean": 1457.6, "valid_targets_min": 272 }, { "epoch": 1.1421052631578947, "grad_norm": 0.5456262017486112, "learning_rate": 3.809372894228994e-05, "loss": 0.0602, "loss_nan_ranks": 0, "loss_rank_avg": 0.02137976698577404, "step": 145, "valid_targets_mean": 1659.1, "valid_targets_min": 233 }, { "epoch": 1.181578947368421, "grad_norm": 0.43506375549791015, "learning_rate": 3.7852488073654134e-05, "loss": 0.0614, "loss_nan_ranks": 0, "loss_rank_avg": 0.02248087152838707, "step": 150, "valid_targets_mean": 1701.4, "valid_targets_min": 177 }, { "epoch": 1.2210526315789474, "grad_norm": 0.48815812479013726, "learning_rate": 3.759773771669225e-05, "loss": 0.0567, "loss_nan_ranks": 0, "loss_rank_avg": 0.015485924668610096, "step": 155, "valid_targets_mean": 1169.9, "valid_targets_min": 195 }, { "epoch": 1.2605263157894737, "grad_norm": 0.4827611028800932, "learning_rate": 3.7329670648285544e-05, "loss": 0.0559, "loss_nan_ranks": 0, "loss_rank_avg": 0.02176966890692711, "step": 160, "valid_targets_mean": 1400.1, "valid_targets_min": 173 }, { "epoch": 1.3, "grad_norm": 0.42087863742760584, "learning_rate": 3.7048489722451686e-05, "loss": 0.0551, "loss_nan_ranks": 0, "loss_rank_avg": 0.0193997323513031, "step": 165, "valid_targets_mean": 1503.6, "valid_targets_min": 194 }, { "epoch": 1.3394736842105264, "grad_norm": 0.4500851785253807, "learning_rate": 3.675440771683934e-05, "loss": 0.0557, "loss_nan_ranks": 0, "loss_rank_avg": 0.019145645201206207, "step": 170, "valid_targets_mean": 1133.2, "valid_targets_min": 140 }, { "epoch": 1.3789473684210527, "grad_norm": 0.41186540634394436, "learning_rate": 3.644764717171323e-05, "loss": 0.0553, "loss_nan_ranks": 0, "loss_rank_avg": 0.01412944309413433, "step": 175, "valid_targets_mean": 1097.2, "valid_targets_min": 159 }, { "epoch": 1.418421052631579, "grad_norm": 0.7927653690959717, "learning_rate": 3.6128440221551524e-05, "loss": 0.0515, "loss_nan_ranks": 0, "loss_rank_avg": 0.010866289958357811, "step": 180, "valid_targets_mean": 1333.7, "valid_targets_min": 217 }, { "epoch": 1.4578947368421051, "grad_norm": 0.38462617335464633, "learning_rate": 3.579702841938309e-05, "loss": 0.0463, "loss_nan_ranks": 0, "loss_rank_avg": 0.011463098227977753, "step": 185, "valid_targets_mean": 1433.3, "valid_targets_min": 175 }, { "epoch": 1.4973684210526317, "grad_norm": 0.40969922530373465, "learning_rate": 3.5453662553997364e-05, "loss": 0.0486, "loss_nan_ranks": 0, "loss_rank_avg": 0.01705111190676689, "step": 190, "valid_targets_mean": 1453.7, "valid_targets_min": 187 }, { "epoch": 1.5368421052631578, "grad_norm": 0.38525298770844896, "learning_rate": 3.509860246016533e-05, "loss": 0.043, "loss_nan_ranks": 0, "loss_rank_avg": 0.012572815641760826, "step": 195, "valid_targets_mean": 1171.6, "valid_targets_min": 142 }, { "epoch": 1.5763157894736843, "grad_norm": 0.4478523955298112, "learning_rate": 3.473211682201509e-05, "loss": 0.0445, "loss_nan_ranks": 0, "loss_rank_avg": 0.015072207897901535, "step": 200, "valid_targets_mean": 1104.8, "valid_targets_min": 176 }, { "epoch": 1.6157894736842104, "grad_norm": 0.4582852792295964, "learning_rate": 3.435448296971093e-05, "loss": 0.0438, "loss_nan_ranks": 0, "loss_rank_avg": 0.015221774578094482, "step": 205, "valid_targets_mean": 1156.8, "valid_targets_min": 149 }, { "epoch": 1.655263157894737, "grad_norm": 0.34843178352442494, "learning_rate": 3.3965986669589636e-05, "loss": 0.0409, "loss_nan_ranks": 0, "loss_rank_avg": 0.012481506913900375, "step": 210, "valid_targets_mean": 1381.1, "valid_targets_min": 165 }, { "epoch": 1.694736842105263, "grad_norm": 0.37618823095235254, "learning_rate": 3.3566921907912915e-05, "loss": 0.0405, "loss_nan_ranks": 0, "loss_rank_avg": 0.01590465009212494, "step": 215, "valid_targets_mean": 1294.9, "valid_targets_min": 210 }, { "epoch": 1.7342105263157894, "grad_norm": 0.3454738553849686, "learning_rate": 3.3157590668399636e-05, "loss": 0.0378, "loss_nan_ranks": 0, "loss_rank_avg": 0.009255459532141685, "step": 220, "valid_targets_mean": 1063.1, "valid_targets_min": 136 }, { "epoch": 1.7736842105263158, "grad_norm": 0.4372141910904212, "learning_rate": 3.2738302703706045e-05, "loss": 0.038, "loss_nan_ranks": 0, "loss_rank_avg": 0.013019935227930546, "step": 225, "valid_targets_mean": 1120.8, "valid_targets_min": 184 }, { "epoch": 1.813157894736842, "grad_norm": 0.38544120425992434, "learning_rate": 3.230937530102713e-05, "loss": 0.0349, "loss_nan_ranks": 0, "loss_rank_avg": 0.012557389214634895, "step": 230, "valid_targets_mean": 1151.8, "valid_targets_min": 164 }, { "epoch": 1.8526315789473684, "grad_norm": 0.461305283488143, "learning_rate": 3.187113304199629e-05, "loss": 0.0367, "loss_nan_ranks": 0, "loss_rank_avg": 0.010037990286946297, "step": 235, "valid_targets_mean": 902.8, "valid_targets_min": 199 }, { "epoch": 1.8921052631578947, "grad_norm": 0.48219383992125125, "learning_rate": 3.142390755706515e-05, "loss": 0.0303, "loss_nan_ranks": 0, "loss_rank_avg": 0.010203387588262558, "step": 240, "valid_targets_mean": 1120.5, "valid_targets_min": 165 }, { "epoch": 1.931578947368421, "grad_norm": 0.36216222935440284, "learning_rate": 3.096803727454929e-05, "loss": 0.0335, "loss_nan_ranks": 0, "loss_rank_avg": 0.010378671810030937, "step": 245, "valid_targets_mean": 1385.8, "valid_targets_min": 153 }, { "epoch": 1.9710526315789474, "grad_norm": 0.370142901468045, "learning_rate": 3.0503867164529876e-05, "loss": 0.0281, "loss_nan_ranks": 0, "loss_rank_avg": 0.008373407647013664, "step": 250, "valid_targets_mean": 1199.4, "valid_targets_min": 164 }, { "epoch": 2.0078947368421054, "grad_norm": 0.33231922527568125, "learning_rate": 3.0031748477804937e-05, "loss": 0.0283, "loss_nan_ranks": 0, "loss_rank_avg": 0.009517144411802292, "step": 255, "valid_targets_mean": 1587.1, "valid_targets_min": 178 }, { "epoch": 2.0473684210526315, "grad_norm": 0.3392722141606185, "learning_rate": 2.9552038480087834e-05, "loss": 0.0196, "loss_nan_ranks": 0, "loss_rank_avg": 0.006903958506882191, "step": 260, "valid_targets_mean": 1733.2, "valid_targets_min": 152 }, { "epoch": 2.086842105263158, "grad_norm": 0.36612732190517466, "learning_rate": 2.9065100181654066e-05, "loss": 0.0215, "loss_nan_ranks": 0, "loss_rank_avg": 0.007419587578624487, "step": 265, "valid_targets_mean": 1234.6, "valid_targets_min": 153 }, { "epoch": 2.126315789473684, "grad_norm": 0.2787988117626485, "learning_rate": 2.8571302062641035e-05, "loss": 0.0179, "loss_nan_ranks": 0, "loss_rank_avg": 0.0044573224149644375, "step": 270, "valid_targets_mean": 1761.1, "valid_targets_min": 190 }, { "epoch": 2.1657894736842107, "grad_norm": 0.3467826395125668, "learning_rate": 2.8071017794208563e-05, "loss": 0.0204, "loss_nan_ranks": 0, "loss_rank_avg": 0.007707627955824137, "step": 275, "valid_targets_mean": 1301.9, "valid_targets_min": 205 }, { "epoch": 2.205263157894737, "grad_norm": 0.33762572722407624, "learning_rate": 2.7564625955771248e-05, "loss": 0.0173, "loss_nan_ranks": 0, "loss_rank_avg": 0.0057760318741202354, "step": 280, "valid_targets_mean": 1499.3, "valid_targets_min": 166 }, { "epoch": 2.2447368421052634, "grad_norm": 0.3099319055576973, "learning_rate": 2.7052509748516605e-05, "loss": 0.0199, "loss_nan_ranks": 0, "loss_rank_avg": 0.004758172202855349, "step": 285, "valid_targets_mean": 1326.5, "valid_targets_min": 183 }, { "epoch": 2.2842105263157895, "grad_norm": 0.3831315209740773, "learning_rate": 2.6535056705425738e-05, "loss": 0.0197, "loss_nan_ranks": 0, "loss_rank_avg": 0.00792803056538105, "step": 290, "valid_targets_mean": 1181.6, "valid_targets_min": 147 }, { "epoch": 2.3236842105263156, "grad_norm": 0.358374636729335, "learning_rate": 2.6012658398016087e-05, "loss": 0.0186, "loss_nan_ranks": 0, "loss_rank_avg": 0.011393646709620953, "step": 295, "valid_targets_mean": 1791.4, "valid_targets_min": 136 }, { "epoch": 2.363157894736842, "grad_norm": 0.35819376020142746, "learning_rate": 2.548571014002803e-05, "loss": 0.0176, "loss_nan_ranks": 0, "loss_rank_avg": 0.005979396402835846, "step": 300, "valid_targets_mean": 1508.6, "valid_targets_min": 194 }, { "epoch": 2.4026315789473682, "grad_norm": 0.2711736038743071, "learning_rate": 2.4954610688279685e-05, "loss": 0.0154, "loss_nan_ranks": 0, "loss_rank_avg": 0.00425342470407486, "step": 305, "valid_targets_mean": 1517.4, "valid_targets_min": 198 }, { "epoch": 2.442105263157895, "grad_norm": 0.31787661132541684, "learning_rate": 2.441976194091622e-05, "loss": 0.0149, "loss_nan_ranks": 0, "loss_rank_avg": 0.0034432020038366318, "step": 310, "valid_targets_mean": 2022.6, "valid_targets_min": 178 }, { "epoch": 2.481578947368421, "grad_norm": 0.2333114782334447, "learning_rate": 2.388156863328202e-05, "loss": 0.0149, "loss_nan_ranks": 0, "loss_rank_avg": 0.004565492272377014, "step": 315, "valid_targets_mean": 1516.2, "valid_targets_min": 194 }, { "epoch": 2.5210526315789474, "grad_norm": 0.27252193158318655, "learning_rate": 2.33404380316459e-05, "loss": 0.0154, "loss_nan_ranks": 0, "loss_rank_avg": 0.004314662888646126, "step": 320, "valid_targets_mean": 1531.6, "valid_targets_min": 185 }, { "epoch": 2.5605263157894735, "grad_norm": 0.2646921009616989, "learning_rate": 2.2796779625011044e-05, "loss": 0.0159, "loss_nan_ranks": 0, "loss_rank_avg": 0.004364798776805401, "step": 325, "valid_targets_mean": 1456.7, "valid_targets_min": 199 }, { "epoch": 2.6, "grad_norm": 0.3213395146333426, "learning_rate": 2.225100481524297e-05, "loss": 0.0137, "loss_nan_ranks": 0, "loss_rank_avg": 0.0039319987408816814, "step": 330, "valid_targets_mean": 1025.5, "valid_targets_min": 160 }, { "epoch": 2.639473684210526, "grad_norm": 0.2780173897620716, "learning_rate": 2.1703526605749996e-05, "loss": 0.0139, "loss_nan_ranks": 0, "loss_rank_avg": 0.006375589407980442, "step": 335, "valid_targets_mean": 1586.2, "valid_targets_min": 181 }, { "epoch": 2.6789473684210527, "grad_norm": 0.2906655206831874, "learning_rate": 2.115475928895172e-05, "loss": 0.0115, "loss_nan_ranks": 0, "loss_rank_avg": 0.003931432031095028, "step": 340, "valid_targets_mean": 1379.0, "valid_targets_min": 184 }, { "epoch": 2.718421052631579, "grad_norm": 0.27995864195600473, "learning_rate": 2.060511813277211e-05, "loss": 0.0139, "loss_nan_ranks": 0, "loss_rank_avg": 0.006246473640203476, "step": 345, "valid_targets_mean": 1619.8, "valid_targets_min": 201 }, { "epoch": 2.7578947368421054, "grad_norm": 0.2654587147347259, "learning_rate": 2.0055019066394395e-05, "loss": 0.0113, "loss_nan_ranks": 0, "loss_rank_avg": 0.004931578412652016, "step": 350, "valid_targets_mean": 1317.7, "valid_targets_min": 186 }, { "epoch": 2.7973684210526315, "grad_norm": 0.27487846891107415, "learning_rate": 1.950487836551553e-05, "loss": 0.0112, "loss_nan_ranks": 0, "loss_rank_avg": 0.004259842913597822, "step": 355, "valid_targets_mean": 1522.6, "valid_targets_min": 163 }, { "epoch": 2.836842105263158, "grad_norm": 0.24813669980000164, "learning_rate": 1.89551123373385e-05, "loss": 0.0109, "loss_nan_ranks": 0, "loss_rank_avg": 0.00526574207469821, "step": 360, "valid_targets_mean": 1681.4, "valid_targets_min": 155 }, { "epoch": 2.876315789473684, "grad_norm": 0.2148527283547528, "learning_rate": 1.8406137005540742e-05, "loss": 0.0111, "loss_nan_ranks": 0, "loss_rank_avg": 0.003941260743886232, "step": 365, "valid_targets_mean": 2068.9, "valid_targets_min": 133 }, { "epoch": 2.9157894736842103, "grad_norm": 0.23960759956448818, "learning_rate": 1.7858367795457123e-05, "loss": 0.0109, "loss_nan_ranks": 0, "loss_rank_avg": 0.003958859946578741, "step": 370, "valid_targets_mean": 1485.9, "valid_targets_min": 160 }, { "epoch": 2.955263157894737, "grad_norm": 0.254849836848687, "learning_rate": 1.731221921971571e-05, "loss": 0.0094, "loss_nan_ranks": 0, "loss_rank_avg": 0.0032655491959303617, "step": 375, "valid_targets_mean": 1554.8, "valid_targets_min": 160 }, { "epoch": 2.9947368421052634, "grad_norm": 0.27719641584695914, "learning_rate": 1.6768104564564218e-05, "loss": 0.0111, "loss_nan_ranks": 0, "loss_rank_avg": 0.003281503450125456, "step": 380, "valid_targets_mean": 1154.4, "valid_targets_min": 164 }, { "epoch": 3.031578947368421, "grad_norm": 0.22370233055486194, "learning_rate": 1.6226435577124472e-05, "loss": 0.0058, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013954448513686657, "step": 385, "valid_targets_mean": 1573.0, "valid_targets_min": 171 }, { "epoch": 3.0710526315789473, "grad_norm": 0.24453665685903728, "learning_rate": 1.56876221538116e-05, "loss": 0.0061, "loss_nan_ranks": 0, "loss_rank_avg": 0.0025098701007664204, "step": 390, "valid_targets_mean": 1707.7, "valid_targets_min": 176 }, { "epoch": 3.110526315789474, "grad_norm": 0.2101409857018548, "learning_rate": 1.5152072030153675e-05, "loss": 0.0053, "loss_nan_ranks": 0, "loss_rank_avg": 0.0012396189849823713, "step": 395, "valid_targets_mean": 1477.7, "valid_targets_min": 161 }, { "epoch": 3.15, "grad_norm": 0.25820663865679544, "learning_rate": 1.4620190472246591e-05, "loss": 0.0055, "loss_nan_ranks": 0, "loss_rank_avg": 0.0017856699414551258, "step": 400, "valid_targets_mean": 1216.4, "valid_targets_min": 185 }, { "epoch": 3.1894736842105265, "grad_norm": 0.23638813763071392, "learning_rate": 1.4092379970077614e-05, "loss": 0.0058, "loss_nan_ranks": 0, "loss_rank_avg": 0.0028168372809886932, "step": 405, "valid_targets_mean": 1182.0, "valid_targets_min": 170 }, { "epoch": 3.2289473684210526, "grad_norm": 0.21220592177098743, "learning_rate": 1.356903993294969e-05, "loss": 0.005, "loss_nan_ranks": 0, "loss_rank_avg": 0.0019877608865499496, "step": 410, "valid_targets_mean": 1676.9, "valid_targets_min": 188 }, { "epoch": 3.268421052631579, "grad_norm": 0.19615170429824216, "learning_rate": 1.3050566387237016e-05, "loss": 0.0055, "loss_nan_ranks": 0, "loss_rank_avg": 0.0015852284850552678, "step": 415, "valid_targets_mean": 1484.3, "valid_targets_min": 169 }, { "epoch": 3.307894736842105, "grad_norm": 0.1854613719614594, "learning_rate": 1.2537351676700546e-05, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010197525843977928, "step": 420, "valid_targets_mean": 1258.9, "valid_targets_min": 145 }, { "epoch": 3.3473684210526318, "grad_norm": 0.17882896036015392, "learning_rate": 1.2029784165590255e-05, "loss": 0.0048, "loss_nan_ranks": 0, "loss_rank_avg": 0.0011634776601567864, "step": 425, "valid_targets_mean": 1308.6, "valid_targets_min": 382 }, { "epoch": 3.386842105263158, "grad_norm": 0.18364115316539406, "learning_rate": 1.1528247944758763e-05, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.0011106678284704685, "step": 430, "valid_targets_mean": 1281.0, "valid_targets_min": 173 }, { "epoch": 3.4263157894736844, "grad_norm": 0.19097308753983788, "learning_rate": 1.1033122541008878e-05, "loss": 0.0044, "loss_nan_ranks": 0, "loss_rank_avg": 0.0017777649918571115, "step": 435, "valid_targets_mean": 1274.7, "valid_targets_min": 202 }, { "epoch": 3.4657894736842105, "grad_norm": 0.15926746517762233, "learning_rate": 1.0544782629894747e-05, "loss": 0.0046, "loss_nan_ranks": 0, "loss_rank_avg": 0.0015381370903924108, "step": 440, "valid_targets_mean": 1386.2, "valid_targets_min": 195 }, { "epoch": 3.5052631578947366, "grad_norm": 0.174810684332455, "learning_rate": 1.0063597752194161e-05, "loss": 0.0038, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009160505142062902, "step": 445, "valid_targets_mean": 964.0, "valid_targets_min": 155 }, { "epoch": 3.544736842105263, "grad_norm": 0.192078064373805, "learning_rate": 9.589932034266496e-06, "loss": 0.0045, "loss_nan_ranks": 0, "loss_rank_avg": 0.0019689835608005524, "step": 450, "valid_targets_mean": 1525.8, "valid_targets_min": 177 }, { "epoch": 3.5842105263157897, "grad_norm": 0.2033514016874955, "learning_rate": 9.124143912507863e-06, "loss": 0.0043, "loss_nan_ranks": 0, "loss_rank_avg": 0.0015643007354810834, "step": 455, "valid_targets_mean": 1371.5, "valid_targets_min": 154 }, { "epoch": 3.623684210526316, "grad_norm": 0.24485830656740784, "learning_rate": 8.666585862112053e-06, "loss": 0.0041, "loss_nan_ranks": 0, "loss_rank_avg": 0.001371587160974741, "step": 460, "valid_targets_mean": 1083.1, "valid_targets_min": 169 }, { "epoch": 3.663157894736842, "grad_norm": 0.1677702191763626, "learning_rate": 8.217604130342458e-06, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0021031424403190613, "step": 465, "valid_targets_mean": 1427.4, "valid_targets_min": 205 }, { "epoch": 3.7026315789473685, "grad_norm": 0.1839699022121503, "learning_rate": 7.777538474516882e-06, "loss": 0.0042, "loss_nan_ranks": 0, "loss_rank_avg": 0.0016583388205617666, "step": 470, "valid_targets_mean": 1526.5, "valid_targets_min": 233 }, { "epoch": 3.7421052631578946, "grad_norm": 0.1479206464184152, "learning_rate": 7.346721904903445e-06, "loss": 0.0035, "loss_nan_ranks": 0, "loss_rank_avg": 0.0009570215479470789, "step": 475, "valid_targets_mean": 1203.3, "valid_targets_min": 176 }, { "epoch": 3.781578947368421, "grad_norm": 0.1532494123801064, "learning_rate": 6.9254804327222025e-06, "loss": 0.0031, "loss_nan_ranks": 0, "loss_rank_avg": 0.0022052465938031673, "step": 480, "valid_targets_mean": 1251.1, "valid_targets_min": 186 }, { "epoch": 3.8210526315789473, "grad_norm": 0.17242621333687375, "learning_rate": 6.51413282344312e-06, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014414711622521281, "step": 485, "valid_targets_mean": 1203.7, "valid_targets_min": 144 }, { "epoch": 3.860526315789474, "grad_norm": 0.14297707465232612, "learning_rate": 6.112990355567135e-06, "loss": 0.0037, "loss_nan_ranks": 0, "loss_rank_avg": 0.0018792328191921115, "step": 490, "valid_targets_mean": 1380.6, "valid_targets_min": 171 }, { "epoch": 3.9, "grad_norm": 0.11572932414812029, "learning_rate": 5.722356585072802e-06, "loss": 0.0036, "loss_nan_ranks": 0, "loss_rank_avg": 0.0013993874890729785, "step": 495, "valid_targets_mean": 1705.8, "valid_targets_min": 159 }, { "epoch": 3.9394736842105265, "grad_norm": 0.14113528192306735, "learning_rate": 5.342527115706808e-06, "loss": 0.0033, "loss_nan_ranks": 0, "loss_rank_avg": 0.0014047269942238927, "step": 500, "valid_targets_mean": 1542.8, "valid_targets_min": 176 }, { "epoch": 3.9789473684210526, "grad_norm": 0.09132107373911931, "learning_rate": 4.97378937529217e-06, "loss": 0.0027, "loss_nan_ranks": 0, "loss_rank_avg": 0.0010275390231981874, "step": 505, "valid_targets_mean": 1654.9, "valid_targets_min": 137 }, { "epoch": 4.015789473684211, "grad_norm": 0.09296060370058007, "learning_rate": 4.616422398223348e-06, "loss": 0.0028, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001814200368244201, "step": 510, "valid_targets_mean": 1199.0, "valid_targets_min": 151 }, { "epoch": 4.0552631578947365, "grad_norm": 0.10547268704764956, "learning_rate": 4.270696614312988e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008387513225898147, "step": 515, "valid_targets_mean": 1132.9, "valid_targets_min": 161 }, { "epoch": 4.094736842105263, "grad_norm": 0.10463920170231994, "learning_rate": 3.936873644149908e-06, "loss": 0.0017, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004102217499166727, "step": 520, "valid_targets_mean": 1242.0, "valid_targets_min": 150 }, { "epoch": 4.13421052631579, "grad_norm": 0.09778803531588182, "learning_rate": 3.615206101123334e-06, "loss": 0.0016, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003775907098315656, "step": 525, "valid_targets_mean": 1185.2, "valid_targets_min": 136 }, { "epoch": 4.173684210526316, "grad_norm": 0.12290327439880455, "learning_rate": 3.3059374002631195e-06, "loss": 0.0018, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005829845904372633, "step": 530, "valid_targets_mean": 1468.1, "valid_targets_min": 171 }, { "epoch": 4.213157894736842, "grad_norm": 0.09619221701294631, "learning_rate": 3.0093015740406306e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.00027496751863509417, "step": 535, "valid_targets_mean": 1326.9, "valid_targets_min": 224 }, { "epoch": 4.252631578947368, "grad_norm": 0.06768085034659244, "learning_rate": 2.7255230952696796e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006069997325539589, "step": 540, "valid_targets_mean": 1782.5, "valid_targets_min": 199 }, { "epoch": 4.292105263157895, "grad_norm": 0.05864181286947488, "learning_rate": 2.4548167072415276e-06, "loss": 0.0015, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004936070181429386, "step": 545, "valid_targets_mean": 1289.1, "valid_targets_min": 182 }, { "epoch": 4.331578947368421, "grad_norm": 0.07397829236902932, "learning_rate": 2.1973872612224946e-06, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002663229242898524, "step": 550, "valid_targets_mean": 1558.1, "valid_targets_min": 136 }, { "epoch": 4.371052631578947, "grad_norm": 0.07889017533468562, "learning_rate": 1.953429561437148e-06, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002549627679400146, "step": 555, "valid_targets_mean": 1230.8, "valid_targets_min": 188 }, { "epoch": 4.410526315789474, "grad_norm": 0.050627641307866365, "learning_rate": 1.7231282176543751e-06, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.0004842726921197027, "step": 560, "valid_targets_mean": 1473.0, "valid_targets_min": 172 }, { "epoch": 4.45, "grad_norm": 0.06959584902781396, "learning_rate": 1.506657505487885e-06, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0006876724073663354, "step": 565, "valid_targets_mean": 1243.2, "valid_targets_min": 128 }, { "epoch": 4.489473684210527, "grad_norm": 0.05547652617295266, "learning_rate": 1.3041812345168748e-06, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00027280105859972537, "step": 570, "valid_targets_mean": 1170.8, "valid_targets_min": 163 }, { "epoch": 4.528947368421052, "grad_norm": 0.068233896171716, "learning_rate": 1.115852624326632e-06, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.00038275844417512417, "step": 575, "valid_targets_mean": 1655.1, "valid_targets_min": 168 }, { "epoch": 4.568421052631579, "grad_norm": 0.06546491879189355, "learning_rate": 9.418141885628906e-07, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003734099445864558, "step": 580, "valid_targets_mean": 1131.7, "valid_targets_min": 158 }, { "epoch": 4.6078947368421055, "grad_norm": 0.08620881353352064, "learning_rate": 7.821976270876885e-07, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0008482255507260561, "step": 585, "valid_targets_mean": 1263.7, "valid_targets_min": 162 }, { "epoch": 4.647368421052631, "grad_norm": 0.055532001906231546, "learning_rate": 6.371237263182961e-07, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0002104710874846205, "step": 590, "valid_targets_mean": 1241.9, "valid_targets_min": 180 }, { "epoch": 4.686842105263158, "grad_norm": 0.06595782866237919, "learning_rate": 5.067022678247014e-07, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00038550980389118195, "step": 595, "valid_targets_mean": 1458.9, "valid_targets_min": 380 }, { "epoch": 4.726315789473684, "grad_norm": 0.09191680898141588, "learning_rate": 3.910319452547451e-07, "loss": 0.0014, "loss_nan_ranks": 0, "loss_rank_avg": 0.0007334852707572281, "step": 600, "valid_targets_mean": 1157.8, "valid_targets_min": 163 }, { "epoch": 4.765789473684211, "grad_norm": 0.05461989130375346, "learning_rate": 2.902002896498091e-07, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00043108745012432337, "step": 605, "valid_targets_mean": 1706.4, "valid_targets_min": 157 }, { "epoch": 4.8052631578947365, "grad_norm": 0.065858631443395, "learning_rate": 2.04283603207589e-07, "loss": 0.001, "loss_nan_ranks": 0, "loss_rank_avg": 0.0001452796277590096, "step": 610, "valid_targets_mean": 1308.6, "valid_targets_min": 296 }, { "epoch": 4.844736842105263, "grad_norm": 0.05898942373450508, "learning_rate": 1.3334690154202855e-07, "loss": 0.0013, "loss_nan_ranks": 0, "loss_rank_avg": 0.0003439486026763916, "step": 615, "valid_targets_mean": 894.8, "valid_targets_min": 162 }, { "epoch": 4.88421052631579, "grad_norm": 0.04586729554260685, "learning_rate": 7.744386448414132e-08, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.0005539880366995931, "step": 620, "valid_targets_mean": 1505.0, "valid_targets_min": 159 }, { "epoch": 4.923684210526316, "grad_norm": 0.06977053244907701, "learning_rate": 3.661679546095975e-08, "loss": 0.0012, "loss_nan_ranks": 0, "loss_rank_avg": 0.00043664619443006814, "step": 625, "valid_targets_mean": 1268.6, "valid_targets_min": 178 }, { "epoch": 4.963157894736842, "grad_norm": 0.05601061550329874, "learning_rate": 1.0896589483313514e-08, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00028773670783266425, "step": 630, "valid_targets_mean": 1422.5, "valid_targets_min": 148 }, { "epoch": 5.0, "grad_norm": 0.07861432844707215, "learning_rate": 3.0270976669077854e-10, "loss": 0.0011, "loss_nan_ranks": 0, "loss_rank_avg": 0.00038876585313118994, "step": 635, "valid_targets_mean": 1438.5, "valid_targets_min": 148 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.00038876585313118994, "step": 635, "total_flos": 4.917242886430392e+17, "train_loss": 0.046322415347438396, "train_runtime": 4175.8531, "train_samples_per_second": 14.556, "train_steps_per_second": 0.152, "valid_targets_mean": 1438.5, "valid_targets_min": 148 } ], "logging_steps": 5, "max_steps": 635, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.917242886430392e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }