{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 916, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010917030567685589, "grad_norm": 23.97355412514402, "learning_rate": 1.7391304347826088e-06, "loss": 0.8471972465515136, "loss_nan_ranks": 0, "loss_rank_avg": 0.8320431709289551, "step": 5, "valid_targets_mean": 6070.1, "valid_targets_min": 3127 }, { "epoch": 0.021834061135371178, "grad_norm": 16.44833961543885, "learning_rate": 3.91304347826087e-06, "loss": 0.7716878890991211, "loss_nan_ranks": 0, "loss_rank_avg": 0.6962907314300537, "step": 10, "valid_targets_mean": 5662.4, "valid_targets_min": 2362 }, { "epoch": 0.03275109170305677, "grad_norm": 4.498775399652974, "learning_rate": 6.086956521739132e-06, "loss": 0.6434809684753418, "loss_nan_ranks": 0, "loss_rank_avg": 0.6177502274513245, "step": 15, "valid_targets_mean": 6107.5, "valid_targets_min": 2135 }, { "epoch": 0.043668122270742356, "grad_norm": 1.4989001821141923, "learning_rate": 8.260869565217392e-06, "loss": 0.5608213901519775, "loss_nan_ranks": 0, "loss_rank_avg": 0.5441532731056213, "step": 20, "valid_targets_mean": 7056.8, "valid_targets_min": 4356 }, { "epoch": 0.05458515283842795, "grad_norm": 1.2886970573156626, "learning_rate": 1.0434782608695653e-05, "loss": 0.5343934535980225, "loss_nan_ranks": 0, "loss_rank_avg": 0.5189746618270874, "step": 25, "valid_targets_mean": 6624.3, "valid_targets_min": 3066 }, { "epoch": 0.06550218340611354, "grad_norm": 0.9408460707709296, "learning_rate": 1.2608695652173915e-05, "loss": 0.5223332405090332, "loss_nan_ranks": 0, "loss_rank_avg": 0.49547022581100464, "step": 30, "valid_targets_mean": 5644.4, "valid_targets_min": 3012 }, { "epoch": 0.07641921397379912, "grad_norm": 0.7267194441358712, "learning_rate": 1.4782608695652174e-05, "loss": 0.47780380249023435, "loss_nan_ranks": 0, "loss_rank_avg": 0.46327659487724304, "step": 35, "valid_targets_mean": 6334.2, "valid_targets_min": 3084 }, { "epoch": 0.08733624454148471, "grad_norm": 0.7191637810205865, "learning_rate": 1.6956521739130437e-05, "loss": 0.461358642578125, "loss_nan_ranks": 0, "loss_rank_avg": 0.5052458643913269, "step": 40, "valid_targets_mean": 7072.8, "valid_targets_min": 2706 }, { "epoch": 0.0982532751091703, "grad_norm": 0.6358266987840093, "learning_rate": 1.9130434782608697e-05, "loss": 0.46322293281555177, "loss_nan_ranks": 0, "loss_rank_avg": 0.4820753037929535, "step": 45, "valid_targets_mean": 6938.0, "valid_targets_min": 3407 }, { "epoch": 0.1091703056768559, "grad_norm": 0.4867667146092501, "learning_rate": 2.1304347826086958e-05, "loss": 0.43762006759643557, "loss_nan_ranks": 0, "loss_rank_avg": 0.40962767601013184, "step": 50, "valid_targets_mean": 7073.7, "valid_targets_min": 1806 }, { "epoch": 0.12008733624454149, "grad_norm": 0.5410855438103472, "learning_rate": 2.3478260869565222e-05, "loss": 0.3973851203918457, "loss_nan_ranks": 0, "loss_rank_avg": 0.35147738456726074, "step": 55, "valid_targets_mean": 6016.2, "valid_targets_min": 3636 }, { "epoch": 0.13100436681222707, "grad_norm": 0.5600459106450499, "learning_rate": 2.565217391304348e-05, "loss": 0.41819043159484864, "loss_nan_ranks": 0, "loss_rank_avg": 0.4194186329841614, "step": 60, "valid_targets_mean": 5616.1, "valid_targets_min": 2217 }, { "epoch": 0.14192139737991266, "grad_norm": 0.49765725200725186, "learning_rate": 2.782608695652174e-05, "loss": 0.41376023292541503, "loss_nan_ranks": 0, "loss_rank_avg": 0.457670122385025, "step": 65, "valid_targets_mean": 7007.8, "valid_targets_min": 2414 }, { "epoch": 0.15283842794759825, "grad_norm": 0.48594372127143926, "learning_rate": 3.0000000000000004e-05, "loss": 0.3903087615966797, "loss_nan_ranks": 0, "loss_rank_avg": 0.3828747272491455, "step": 70, "valid_targets_mean": 6693.9, "valid_targets_min": 2094 }, { "epoch": 0.16375545851528384, "grad_norm": 0.5317483860685074, "learning_rate": 3.2173913043478265e-05, "loss": 0.3808147430419922, "loss_nan_ranks": 0, "loss_rank_avg": 0.343553364276886, "step": 75, "valid_targets_mean": 5179.4, "valid_targets_min": 2163 }, { "epoch": 0.17467248908296942, "grad_norm": 0.4362892418379526, "learning_rate": 3.4347826086956526e-05, "loss": 0.40294761657714845, "loss_nan_ranks": 0, "loss_rank_avg": 0.417472779750824, "step": 80, "valid_targets_mean": 7473.6, "valid_targets_min": 2654 }, { "epoch": 0.185589519650655, "grad_norm": 0.49451775346184396, "learning_rate": 3.6521739130434786e-05, "loss": 0.34819955825805665, "loss_nan_ranks": 0, "loss_rank_avg": 0.32027876377105713, "step": 85, "valid_targets_mean": 6283.4, "valid_targets_min": 2991 }, { "epoch": 0.1965065502183406, "grad_norm": 0.4911989992991641, "learning_rate": 3.869565217391305e-05, "loss": 0.36500887870788573, "loss_nan_ranks": 0, "loss_rank_avg": 0.3651946187019348, "step": 90, "valid_targets_mean": 5925.4, "valid_targets_min": 3286 }, { "epoch": 0.2074235807860262, "grad_norm": 0.4587833630947856, "learning_rate": 3.9999418562271446e-05, "loss": 0.3630575656890869, "loss_nan_ranks": 0, "loss_rank_avg": 0.329254150390625, "step": 95, "valid_targets_mean": 6210.7, "valid_targets_min": 2415 }, { "epoch": 0.2183406113537118, "grad_norm": 0.5039991989569258, "learning_rate": 3.999287777607105e-05, "loss": 0.3551477432250977, "loss_nan_ranks": 0, "loss_rank_avg": 0.40160879492759705, "step": 100, "valid_targets_mean": 7218.4, "valid_targets_min": 2438 }, { "epoch": 0.2292576419213974, "grad_norm": 0.5094824282802719, "learning_rate": 3.9979071791285046e-05, "loss": 0.3545681476593018, "loss_nan_ranks": 0, "loss_rank_avg": 0.3493620753288269, "step": 105, "valid_targets_mean": 6402.4, "valid_targets_min": 2544 }, { "epoch": 0.24017467248908297, "grad_norm": 0.5562548306695689, "learning_rate": 3.99580056248611e-05, "loss": 0.3572681427001953, "loss_nan_ranks": 0, "loss_rank_avg": 0.3697173595428467, "step": 110, "valid_targets_mean": 5979.5, "valid_targets_min": 2393 }, { "epoch": 0.25109170305676853, "grad_norm": 0.47399899957452146, "learning_rate": 3.992968693201943e-05, "loss": 0.3389609336853027, "loss_nan_ranks": 0, "loss_rank_avg": 0.3378722071647644, "step": 115, "valid_targets_mean": 6158.7, "valid_targets_min": 1549 }, { "epoch": 0.26200873362445415, "grad_norm": 0.6064655567982438, "learning_rate": 3.9894126003471045e-05, "loss": 0.31346755027770995, "loss_nan_ranks": 0, "loss_rank_avg": 0.32061848044395447, "step": 120, "valid_targets_mean": 5997.6, "valid_targets_min": 2586 }, { "epoch": 0.27292576419213976, "grad_norm": 0.4882802684865627, "learning_rate": 3.985133576167816e-05, "loss": 0.3278614044189453, "loss_nan_ranks": 0, "loss_rank_avg": 0.3414720296859741, "step": 125, "valid_targets_mean": 6426.0, "valid_targets_min": 2283 }, { "epoch": 0.2838427947598253, "grad_norm": 0.5433484811754706, "learning_rate": 3.9801331756158346e-05, "loss": 0.34977922439575193, "loss_nan_ranks": 0, "loss_rank_avg": 0.29019778966903687, "step": 130, "valid_targets_mean": 4994.9, "valid_targets_min": 2484 }, { "epoch": 0.29475982532751094, "grad_norm": 0.47284144257824057, "learning_rate": 3.974413215783398e-05, "loss": 0.3298785209655762, "loss_nan_ranks": 0, "loss_rank_avg": 0.3123893439769745, "step": 135, "valid_targets_mean": 7171.4, "valid_targets_min": 1408 }, { "epoch": 0.3056768558951965, "grad_norm": 0.46756385403664763, "learning_rate": 3.967975775242912e-05, "loss": 0.3594933032989502, "loss_nan_ranks": 0, "loss_rank_avg": 0.359480082988739, "step": 140, "valid_targets_mean": 5973.4, "valid_targets_min": 2763 }, { "epoch": 0.3165938864628821, "grad_norm": 0.44249741753609867, "learning_rate": 3.9608231932916236e-05, "loss": 0.336076545715332, "loss_nan_ranks": 0, "loss_rank_avg": 0.30599284172058105, "step": 145, "valid_targets_mean": 6560.2, "valid_targets_min": 3274 }, { "epoch": 0.32751091703056767, "grad_norm": 0.45316261029229854, "learning_rate": 3.952958069101541e-05, "loss": 0.3227931022644043, "loss_nan_ranks": 0, "loss_rank_avg": 0.30612146854400635, "step": 150, "valid_targets_mean": 5900.1, "valid_targets_min": 2675 }, { "epoch": 0.3384279475982533, "grad_norm": 0.4829929158249486, "learning_rate": 3.9443832607749214e-05, "loss": 0.326050329208374, "loss_nan_ranks": 0, "loss_rank_avg": 0.3084082007408142, "step": 155, "valid_targets_mean": 6959.6, "valid_targets_min": 2035 }, { "epoch": 0.34934497816593885, "grad_norm": 0.4876619431594521, "learning_rate": 3.9351018843056736e-05, "loss": 0.3491052627563477, "loss_nan_ranks": 0, "loss_rank_avg": 0.3449532389640808, "step": 160, "valid_targets_mean": 6381.8, "valid_targets_min": 3010 }, { "epoch": 0.36026200873362446, "grad_norm": 0.47150593505681493, "learning_rate": 3.925117312447032e-05, "loss": 0.3180710792541504, "loss_nan_ranks": 0, "loss_rank_avg": 0.37008243799209595, "step": 165, "valid_targets_mean": 7208.1, "valid_targets_min": 2038 }, { "epoch": 0.37117903930131, "grad_norm": 1.0836378185382227, "learning_rate": 3.914433173485938e-05, "loss": 0.30317959785461424, "loss_nan_ranks": 0, "loss_rank_avg": 0.351075679063797, "step": 170, "valid_targets_mean": 7132.2, "valid_targets_min": 2947 }, { "epoch": 0.38209606986899564, "grad_norm": 0.5578953481005375, "learning_rate": 3.903053349924556e-05, "loss": 0.30793395042419436, "loss_nan_ranks": 0, "loss_rank_avg": 0.32196080684661865, "step": 175, "valid_targets_mean": 5697.4, "valid_targets_min": 1736 }, { "epoch": 0.3930131004366812, "grad_norm": 0.4548426399215528, "learning_rate": 3.8909819770694174e-05, "loss": 0.31206459999084474, "loss_nan_ranks": 0, "loss_rank_avg": 0.34161555767059326, "step": 180, "valid_targets_mean": 7057.4, "valid_targets_min": 1929 }, { "epoch": 0.4039301310043668, "grad_norm": 0.49428781537079597, "learning_rate": 3.878223441528694e-05, "loss": 0.3230273246765137, "loss_nan_ranks": 0, "loss_rank_avg": 0.33783310651779175, "step": 185, "valid_targets_mean": 6174.7, "valid_targets_min": 3044 }, { "epoch": 0.4148471615720524, "grad_norm": 0.5459711907578669, "learning_rate": 3.864782379618146e-05, "loss": 0.31689255237579345, "loss_nan_ranks": 0, "loss_rank_avg": 0.3205436170101166, "step": 190, "valid_targets_mean": 5417.1, "valid_targets_min": 2076 }, { "epoch": 0.425764192139738, "grad_norm": 0.4382130184363095, "learning_rate": 3.8506636756763406e-05, "loss": 0.3044698476791382, "loss_nan_ranks": 0, "loss_rank_avg": 0.2877098321914673, "step": 195, "valid_targets_mean": 6759.7, "valid_targets_min": 2333 }, { "epoch": 0.4366812227074236, "grad_norm": 0.47473879379554257, "learning_rate": 3.835872460289733e-05, "loss": 0.3210113763809204, "loss_nan_ranks": 0, "loss_rank_avg": 0.3006540536880493, "step": 200, "valid_targets_mean": 5824.4, "valid_targets_min": 2457 }, { "epoch": 0.44759825327510916, "grad_norm": 0.5031626940984227, "learning_rate": 3.820414108428266e-05, "loss": 0.31898131370544436, "loss_nan_ranks": 0, "loss_rank_avg": 0.2764798700809479, "step": 205, "valid_targets_mean": 5396.9, "valid_targets_min": 2789 }, { "epoch": 0.4585152838427948, "grad_norm": 0.4533933035125517, "learning_rate": 3.804294237492161e-05, "loss": 0.31290626525878906, "loss_nan_ranks": 0, "loss_rank_avg": 0.3159083127975464, "step": 210, "valid_targets_mean": 6517.4, "valid_targets_min": 2785 }, { "epoch": 0.46943231441048033, "grad_norm": 0.464714989108791, "learning_rate": 3.7875187052706195e-05, "loss": 0.3238206386566162, "loss_nan_ranks": 0, "loss_rank_avg": 0.3460172712802887, "step": 215, "valid_targets_mean": 6189.4, "valid_targets_min": 3081 }, { "epoch": 0.48034934497816595, "grad_norm": 0.5287899971687461, "learning_rate": 3.7700936078131615e-05, "loss": 0.3185282707214355, "loss_nan_ranks": 0, "loss_rank_avg": 0.3045659065246582, "step": 220, "valid_targets_mean": 6245.6, "valid_targets_min": 2281 }, { "epoch": 0.4912663755458515, "grad_norm": 0.4416873588272455, "learning_rate": 3.7520252772143874e-05, "loss": 0.31445748805999757, "loss_nan_ranks": 0, "loss_rank_avg": 0.30497437715530396, "step": 225, "valid_targets_mean": 6309.1, "valid_targets_min": 2540 }, { "epoch": 0.5021834061135371, "grad_norm": 0.4441385829244796, "learning_rate": 3.7333202793129596e-05, "loss": 0.2954303741455078, "loss_nan_ranks": 0, "loss_rank_avg": 0.34913989901542664, "step": 230, "valid_targets_mean": 7467.4, "valid_targets_min": 2632 }, { "epoch": 0.5131004366812227, "grad_norm": 0.4526274660514342, "learning_rate": 3.713985411305653e-05, "loss": 0.3343640327453613, "loss_nan_ranks": 0, "loss_rank_avg": 0.2784854471683502, "step": 235, "valid_targets_mean": 6575.2, "valid_targets_min": 2319 }, { "epoch": 0.5240174672489083, "grad_norm": 0.44865561810882726, "learning_rate": 3.694027699277318e-05, "loss": 0.321301531791687, "loss_nan_ranks": 0, "loss_rank_avg": 0.3048713207244873, "step": 240, "valid_targets_mean": 7111.2, "valid_targets_min": 3207 }, { "epoch": 0.5349344978165939, "grad_norm": 0.4774215542318369, "learning_rate": 3.6734543956476844e-05, "loss": 0.30802216529846194, "loss_nan_ranks": 0, "loss_rank_avg": 0.28203749656677246, "step": 245, "valid_targets_mean": 5559.9, "valid_targets_min": 1897 }, { "epoch": 0.5458515283842795, "grad_norm": 0.4626170014257541, "learning_rate": 3.652272976535902e-05, "loss": 0.2966586112976074, "loss_nan_ranks": 0, "loss_rank_avg": 0.26324623823165894, "step": 250, "valid_targets_mean": 6001.4, "valid_targets_min": 1402 }, { "epoch": 0.5567685589519651, "grad_norm": 0.4513941545271317, "learning_rate": 3.630491139043805e-05, "loss": 0.31173756122589114, "loss_nan_ranks": 0, "loss_rank_avg": 0.30301469564437866, "step": 255, "valid_targets_mean": 6790.6, "valid_targets_min": 3157 }, { "epoch": 0.5676855895196506, "grad_norm": 0.48836875987610767, "learning_rate": 3.608116798458862e-05, "loss": 0.31902124881744387, "loss_nan_ranks": 0, "loss_rank_avg": 0.3262016177177429, "step": 260, "valid_targets_mean": 6513.8, "valid_targets_min": 2605 }, { "epoch": 0.5786026200873362, "grad_norm": 0.5204472402450062, "learning_rate": 3.585158085377847e-05, "loss": 0.3127915382385254, "loss_nan_ranks": 0, "loss_rank_avg": 0.31610095500946045, "step": 265, "valid_targets_mean": 6214.8, "valid_targets_min": 4124 }, { "epoch": 0.5895196506550219, "grad_norm": 0.4276793901088946, "learning_rate": 3.5616233427522706e-05, "loss": 0.31319239139556887, "loss_nan_ranks": 0, "loss_rank_avg": 0.3183065950870514, "step": 270, "valid_targets_mean": 7797.1, "valid_targets_min": 2919 }, { "epoch": 0.6004366812227074, "grad_norm": 0.46626133557890165, "learning_rate": 3.5375211228566335e-05, "loss": 0.29652061462402346, "loss_nan_ranks": 0, "loss_rank_avg": 0.29145967960357666, "step": 275, "valid_targets_mean": 6113.0, "valid_targets_min": 2369 }, { "epoch": 0.611353711790393, "grad_norm": 0.46963293139085044, "learning_rate": 3.5128601841806284e-05, "loss": 0.2907128810882568, "loss_nan_ranks": 0, "loss_rank_avg": 0.3028712868690491, "step": 280, "valid_targets_mean": 6179.6, "valid_targets_min": 2821 }, { "epoch": 0.6222707423580786, "grad_norm": 0.40444092526933895, "learning_rate": 3.487649488246392e-05, "loss": 0.33228726387023927, "loss_nan_ranks": 0, "loss_rank_avg": 0.33172208070755005, "step": 285, "valid_targets_mean": 9279.1, "valid_targets_min": 3331 }, { "epoch": 0.6331877729257642, "grad_norm": 0.49960335287367863, "learning_rate": 3.461898196351988e-05, "loss": 0.28729748725891113, "loss_nan_ranks": 0, "loss_rank_avg": 0.26969748735427856, "step": 290, "valid_targets_mean": 5421.1, "valid_targets_min": 1601 }, { "epoch": 0.6441048034934498, "grad_norm": 0.4671399513255991, "learning_rate": 3.435615666242286e-05, "loss": 0.30917346477508545, "loss_nan_ranks": 0, "loss_rank_avg": 0.31987130641937256, "step": 295, "valid_targets_mean": 7005.5, "valid_targets_min": 1902 }, { "epoch": 0.6550218340611353, "grad_norm": 0.5010427337859534, "learning_rate": 3.408811448708459e-05, "loss": 0.2833234786987305, "loss_nan_ranks": 0, "loss_rank_avg": 0.2702004611492157, "step": 300, "valid_targets_mean": 5127.1, "valid_targets_min": 2388 }, { "epoch": 0.665938864628821, "grad_norm": 0.46623409201577953, "learning_rate": 3.3814952841173264e-05, "loss": 0.30996222496032716, "loss_nan_ranks": 0, "loss_rank_avg": 0.31286269426345825, "step": 305, "valid_targets_mean": 6143.9, "valid_targets_min": 1949 }, { "epoch": 0.6768558951965066, "grad_norm": 0.45108845377737133, "learning_rate": 3.3536770988718085e-05, "loss": 0.30775420665740966, "loss_nan_ranks": 0, "loss_rank_avg": 0.295767605304718, "step": 310, "valid_targets_mean": 5889.1, "valid_targets_min": 2627 }, { "epoch": 0.6877729257641921, "grad_norm": 0.45843724673860725, "learning_rate": 3.3253670018037785e-05, "loss": 0.31847896575927737, "loss_nan_ranks": 0, "loss_rank_avg": 0.32040661573410034, "step": 315, "valid_targets_mean": 6830.5, "valid_targets_min": 2513 }, { "epoch": 0.6986899563318777, "grad_norm": 0.4636961184009317, "learning_rate": 3.296575280500621e-05, "loss": 0.312093448638916, "loss_nan_ranks": 0, "loss_rank_avg": 0.32836127281188965, "step": 320, "valid_targets_mean": 6474.9, "valid_targets_min": 2388 }, { "epoch": 0.7096069868995634, "grad_norm": 0.46616098960820607, "learning_rate": 3.267312397566831e-05, "loss": 0.31085991859436035, "loss_nan_ranks": 0, "loss_rank_avg": 0.3339681625366211, "step": 325, "valid_targets_mean": 6729.9, "valid_targets_min": 2334 }, { "epoch": 0.7205240174672489, "grad_norm": 0.5197078653795671, "learning_rate": 3.237588986822015e-05, "loss": 0.3105789661407471, "loss_nan_ranks": 0, "loss_rank_avg": 0.3157581686973572, "step": 330, "valid_targets_mean": 6074.1, "valid_targets_min": 1577 }, { "epoch": 0.7314410480349345, "grad_norm": 0.4909660489000409, "learning_rate": 3.207415849436676e-05, "loss": 0.29068455696105955, "loss_nan_ranks": 0, "loss_rank_avg": 0.2891122102737427, "step": 335, "valid_targets_mean": 5588.0, "valid_targets_min": 1408 }, { "epoch": 0.74235807860262, "grad_norm": 0.43015922928241795, "learning_rate": 3.176803950007183e-05, "loss": 0.29242496490478515, "loss_nan_ranks": 0, "loss_rank_avg": 0.27574315667152405, "step": 340, "valid_targets_mean": 6069.9, "valid_targets_min": 2853 }, { "epoch": 0.7532751091703057, "grad_norm": 0.6041983369675445, "learning_rate": 3.1457644125713515e-05, "loss": 0.29975247383117676, "loss_nan_ranks": 0, "loss_rank_avg": 0.30201852321624756, "step": 345, "valid_targets_mean": 6917.0, "valid_targets_min": 3361 }, { "epoch": 0.7641921397379913, "grad_norm": 0.4204388354299931, "learning_rate": 3.11430851656609e-05, "loss": 0.30193428993225097, "loss_nan_ranks": 0, "loss_rank_avg": 0.31914663314819336, "step": 350, "valid_targets_mean": 7636.1, "valid_targets_min": 1958 }, { "epoch": 0.7751091703056768, "grad_norm": 0.4504239970667579, "learning_rate": 3.082447692728569e-05, "loss": 0.31245744228363037, "loss_nan_ranks": 0, "loss_rank_avg": 0.31146177649497986, "step": 355, "valid_targets_mean": 8039.9, "valid_targets_min": 3663 }, { "epoch": 0.7860262008733624, "grad_norm": 0.46836524970683285, "learning_rate": 3.0501935189424183e-05, "loss": 0.29535226821899413, "loss_nan_ranks": 0, "loss_rank_avg": 0.2897907495498657, "step": 360, "valid_targets_mean": 5315.2, "valid_targets_min": 2710 }, { "epoch": 0.7969432314410481, "grad_norm": 0.4768673027798935, "learning_rate": 3.0175577160304414e-05, "loss": 0.30240421295166015, "loss_nan_ranks": 0, "loss_rank_avg": 0.32443946599960327, "step": 365, "valid_targets_mean": 5723.1, "valid_targets_min": 1703 }, { "epoch": 0.8078602620087336, "grad_norm": 0.5021057524759523, "learning_rate": 2.9845521434953978e-05, "loss": 0.28707261085510255, "loss_nan_ranks": 0, "loss_rank_avg": 0.3061946630477905, "step": 370, "valid_targets_mean": 5492.1, "valid_targets_min": 1687 }, { "epoch": 0.8187772925764192, "grad_norm": 0.4390211566204834, "learning_rate": 2.9511887952103856e-05, "loss": 0.2990086078643799, "loss_nan_ranks": 0, "loss_rank_avg": 0.2667640745639801, "step": 375, "valid_targets_mean": 5792.3, "valid_targets_min": 2004 }, { "epoch": 0.8296943231441049, "grad_norm": 0.42773163784932056, "learning_rate": 2.917479795060394e-05, "loss": 0.28385176658630373, "loss_nan_ranks": 0, "loss_rank_avg": 0.2780212461948395, "step": 380, "valid_targets_mean": 6261.9, "valid_targets_min": 2909 }, { "epoch": 0.8406113537117904, "grad_norm": 0.43838912776777467, "learning_rate": 2.8834373925366143e-05, "loss": 0.31748170852661134, "loss_nan_ranks": 0, "loss_rank_avg": 0.2924686372280121, "step": 385, "valid_targets_mean": 6542.3, "valid_targets_min": 2553 }, { "epoch": 0.851528384279476, "grad_norm": 0.42091706473471313, "learning_rate": 2.8490739582851064e-05, "loss": 0.3101039409637451, "loss_nan_ranks": 0, "loss_rank_avg": 0.30723536014556885, "step": 390, "valid_targets_mean": 7012.5, "valid_targets_min": 2030 }, { "epoch": 0.8624454148471615, "grad_norm": 0.4720814293496353, "learning_rate": 2.8144019796114347e-05, "loss": 0.2909966230392456, "loss_nan_ranks": 0, "loss_rank_avg": 0.3162909746170044, "step": 395, "valid_targets_mean": 5623.2, "valid_targets_min": 1638 }, { "epoch": 0.8733624454148472, "grad_norm": 0.474498487722906, "learning_rate": 2.7794340559429173e-05, "loss": 0.2948309421539307, "loss_nan_ranks": 0, "loss_rank_avg": 0.2760256230831146, "step": 400, "valid_targets_mean": 5053.5, "valid_targets_min": 2070 }, { "epoch": 0.8842794759825328, "grad_norm": 0.4086922100952553, "learning_rate": 2.7441828942501287e-05, "loss": 0.308381175994873, "loss_nan_ranks": 0, "loss_rank_avg": 0.3421776294708252, "step": 405, "valid_targets_mean": 7963.8, "valid_targets_min": 2735 }, { "epoch": 0.8951965065502183, "grad_norm": 0.49830232898771865, "learning_rate": 2.708661304429319e-05, "loss": 0.3140126705169678, "loss_nan_ranks": 0, "loss_rank_avg": 0.3323034644126892, "step": 410, "valid_targets_mean": 5652.9, "valid_targets_min": 2106 }, { "epoch": 0.9061135371179039, "grad_norm": 0.45151489153823055, "learning_rate": 2.672882194647437e-05, "loss": 0.2748565673828125, "loss_nan_ranks": 0, "loss_rank_avg": 0.2600163221359253, "step": 415, "valid_targets_mean": 5964.6, "valid_targets_min": 1781 }, { "epoch": 0.9170305676855895, "grad_norm": 0.5157292900089483, "learning_rate": 2.63685856665144e-05, "loss": 0.30159821510314944, "loss_nan_ranks": 0, "loss_rank_avg": 0.2724754214286804, "step": 420, "valid_targets_mean": 5100.9, "valid_targets_min": 1685 }, { "epoch": 0.9279475982532751, "grad_norm": 0.4154678575302912, "learning_rate": 2.600603511043596e-05, "loss": 0.2914860725402832, "loss_nan_ranks": 0, "loss_rank_avg": 0.2768551707267761, "step": 425, "valid_targets_mean": 7506.4, "valid_targets_min": 2830 }, { "epoch": 0.9388646288209607, "grad_norm": 0.43105933374082683, "learning_rate": 2.5641302025245e-05, "loss": 0.30306272506713866, "loss_nan_ranks": 0, "loss_rank_avg": 0.3191959857940674, "step": 430, "valid_targets_mean": 6967.1, "valid_targets_min": 3228 }, { "epoch": 0.9497816593886463, "grad_norm": 0.4440229733569268, "learning_rate": 2.527451895105527e-05, "loss": 0.30209167003631593, "loss_nan_ranks": 0, "loss_rank_avg": 0.3098873496055603, "step": 435, "valid_targets_mean": 6239.5, "valid_targets_min": 2650 }, { "epoch": 0.9606986899563319, "grad_norm": 0.43250142252814877, "learning_rate": 2.490581917292465e-05, "loss": 0.28155202865600587, "loss_nan_ranks": 0, "loss_rank_avg": 0.2818743586540222, "step": 440, "valid_targets_mean": 6028.2, "valid_targets_min": 2377 }, { "epoch": 0.9716157205240175, "grad_norm": 0.45575963071517345, "learning_rate": 2.4535336672420822e-05, "loss": 0.2749873161315918, "loss_nan_ranks": 0, "loss_rank_avg": 0.2577822804450989, "step": 445, "valid_targets_mean": 4985.2, "valid_targets_min": 1893 }, { "epoch": 0.982532751091703, "grad_norm": 0.42313422993586447, "learning_rate": 2.4163206078933746e-05, "loss": 0.3029885530471802, "loss_nan_ranks": 0, "loss_rank_avg": 0.3205031156539917, "step": 450, "valid_targets_mean": 6993.4, "valid_targets_min": 2421 }, { "epoch": 0.9934497816593887, "grad_norm": 0.7217873437448888, "learning_rate": 2.3789562620752794e-05, "loss": 0.2700057506561279, "loss_nan_ranks": 0, "loss_rank_avg": 0.25710558891296387, "step": 455, "valid_targets_mean": 4697.0, "valid_targets_min": 1542 }, { "epoch": 1.0043668122270741, "grad_norm": 0.4665925016494147, "learning_rate": 2.3414542075926264e-05, "loss": 0.26653170585632324, "loss_nan_ranks": 0, "loss_rank_avg": 0.23522576689720154, "step": 460, "valid_targets_mean": 5158.9, "valid_targets_min": 2722 }, { "epoch": 1.01528384279476, "grad_norm": 0.4457938137045032, "learning_rate": 2.303828072292102e-05, "loss": 0.2688295841217041, "loss_nan_ranks": 0, "loss_rank_avg": 0.27591580152511597, "step": 465, "valid_targets_mean": 7101.9, "valid_targets_min": 2103 }, { "epoch": 1.0262008733624455, "grad_norm": 0.46619185178517386, "learning_rate": 2.2660915291100405e-05, "loss": 0.2735262870788574, "loss_nan_ranks": 0, "loss_rank_avg": 0.269025057554245, "step": 470, "valid_targets_mean": 5832.4, "valid_targets_min": 1687 }, { "epoch": 1.037117903930131, "grad_norm": 0.4520987195861466, "learning_rate": 2.2282582911038233e-05, "loss": 0.2863640785217285, "loss_nan_ranks": 0, "loss_rank_avg": 0.271045446395874, "step": 475, "valid_targets_mean": 6123.1, "valid_targets_min": 3374 }, { "epoch": 1.0480349344978166, "grad_norm": 0.43499650101365894, "learning_rate": 2.1903421064686986e-05, "loss": 0.2872426986694336, "loss_nan_ranks": 0, "loss_rank_avg": 0.34755998849868774, "step": 480, "valid_targets_mean": 6883.9, "valid_targets_min": 1670 }, { "epoch": 1.0589519650655022, "grad_norm": 0.4146124676252627, "learning_rate": 2.1523567535418364e-05, "loss": 0.2680305004119873, "loss_nan_ranks": 0, "loss_rank_avg": 0.25058215856552124, "step": 485, "valid_targets_mean": 6466.3, "valid_targets_min": 3936 }, { "epoch": 1.0698689956331877, "grad_norm": 0.4635624815366992, "learning_rate": 2.1143160357954248e-05, "loss": 0.29656684398651123, "loss_nan_ranks": 0, "loss_rank_avg": 0.30588576197624207, "step": 490, "valid_targets_mean": 6585.0, "valid_targets_min": 2669 }, { "epoch": 1.0807860262008733, "grad_norm": 0.4369473108599916, "learning_rate": 2.0762337768206375e-05, "loss": 0.2733741521835327, "loss_nan_ranks": 0, "loss_rank_avg": 0.25768232345581055, "step": 495, "valid_targets_mean": 5832.3, "valid_targets_min": 1685 }, { "epoch": 1.091703056768559, "grad_norm": 0.42289930274223964, "learning_rate": 2.0381238153042893e-05, "loss": 0.29384002685546873, "loss_nan_ranks": 0, "loss_rank_avg": 0.2882288992404938, "step": 500, "valid_targets_mean": 6513.4, "valid_targets_min": 4253 }, { "epoch": 1.1026200873362446, "grad_norm": 0.45325772775592466, "learning_rate": 2e-05, "loss": 0.2764073133468628, "loss_nan_ranks": 0, "loss_rank_avg": 0.2837229371070862, "step": 505, "valid_targets_mean": 5923.6, "valid_targets_min": 2763 }, { "epoch": 1.1135371179039302, "grad_norm": 0.45930194362232246, "learning_rate": 1.961876184695711e-05, "loss": 0.26736135482788087, "loss_nan_ranks": 0, "loss_rank_avg": 0.2585921883583069, "step": 510, "valid_targets_mean": 5564.2, "valid_targets_min": 2591 }, { "epoch": 1.1244541484716157, "grad_norm": 0.4244964628962656, "learning_rate": 1.923766223179363e-05, "loss": 0.2759768009185791, "loss_nan_ranks": 0, "loss_rank_avg": 0.30197593569755554, "step": 515, "valid_targets_mean": 7331.6, "valid_targets_min": 3274 }, { "epoch": 1.1353711790393013, "grad_norm": 0.429677628258983, "learning_rate": 1.8856839642045755e-05, "loss": 0.287129545211792, "loss_nan_ranks": 0, "loss_rank_avg": 0.30640876293182373, "step": 520, "valid_targets_mean": 7008.9, "valid_targets_min": 2186 }, { "epoch": 1.1462882096069869, "grad_norm": 0.49262290067457826, "learning_rate": 1.8476432464581643e-05, "loss": 0.29739720821380616, "loss_nan_ranks": 0, "loss_rank_avg": 0.3235873579978943, "step": 525, "valid_targets_mean": 6338.9, "valid_targets_min": 3707 }, { "epoch": 1.1572052401746724, "grad_norm": 0.4795056995224667, "learning_rate": 1.8096578935313017e-05, "loss": 0.28720841407775877, "loss_nan_ranks": 0, "loss_rank_avg": 0.30534911155700684, "step": 530, "valid_targets_mean": 5817.9, "valid_targets_min": 2520 }, { "epoch": 1.1681222707423582, "grad_norm": 0.47857486223431067, "learning_rate": 1.7717417088961774e-05, "loss": 0.2760012149810791, "loss_nan_ranks": 0, "loss_rank_avg": 0.23776975274085999, "step": 535, "valid_targets_mean": 4775.9, "valid_targets_min": 2292 }, { "epoch": 1.1790393013100438, "grad_norm": 0.45146351896107023, "learning_rate": 1.7339084708899598e-05, "loss": 0.2970390796661377, "loss_nan_ranks": 0, "loss_rank_avg": 0.3195308744907379, "step": 540, "valid_targets_mean": 6972.8, "valid_targets_min": 2906 }, { "epoch": 1.1899563318777293, "grad_norm": 0.4097096575943617, "learning_rate": 1.6961719277078984e-05, "loss": 0.2921260356903076, "loss_nan_ranks": 0, "loss_rank_avg": 0.2827908992767334, "step": 545, "valid_targets_mean": 6858.3, "valid_targets_min": 3347 }, { "epoch": 1.2008733624454149, "grad_norm": 0.43593671863074857, "learning_rate": 1.658545792407374e-05, "loss": 0.2665548801422119, "loss_nan_ranks": 0, "loss_rank_avg": 0.28798261284828186, "step": 550, "valid_targets_mean": 6864.7, "valid_targets_min": 1860 }, { "epoch": 1.2117903930131004, "grad_norm": 0.44522925113185535, "learning_rate": 1.621043737924721e-05, "loss": 0.2860284328460693, "loss_nan_ranks": 0, "loss_rank_avg": 0.25376588106155396, "step": 555, "valid_targets_mean": 5837.0, "valid_targets_min": 1775 }, { "epoch": 1.222707423580786, "grad_norm": 0.3952671324511318, "learning_rate": 1.5836793921066257e-05, "loss": 0.2863175630569458, "loss_nan_ranks": 0, "loss_rank_avg": 0.3394501805305481, "step": 560, "valid_targets_mean": 8117.4, "valid_targets_min": 2995 }, { "epoch": 1.2336244541484715, "grad_norm": 0.49565654666088405, "learning_rate": 1.5464663327579184e-05, "loss": 0.2866960525512695, "loss_nan_ranks": 0, "loss_rank_avg": 0.29558897018432617, "step": 565, "valid_targets_mean": 6017.1, "valid_targets_min": 2565 }, { "epoch": 1.244541484716157, "grad_norm": 0.43450631186991545, "learning_rate": 1.5094180827075352e-05, "loss": 0.31006269454956054, "loss_nan_ranks": 0, "loss_rank_avg": 0.2496386170387268, "step": 570, "valid_targets_mean": 5905.9, "valid_targets_min": 2405 }, { "epoch": 1.2554585152838427, "grad_norm": 0.5028433691205966, "learning_rate": 1.4725481048944743e-05, "loss": 0.3058916568756104, "loss_nan_ranks": 0, "loss_rank_avg": 0.31864607334136963, "step": 575, "valid_targets_mean": 5089.2, "valid_targets_min": 1897 }, { "epoch": 1.2663755458515285, "grad_norm": 0.47742068386305536, "learning_rate": 1.4358697974755003e-05, "loss": 0.25602130889892577, "loss_nan_ranks": 0, "loss_rank_avg": 0.2617161273956299, "step": 580, "valid_targets_mean": 5747.3, "valid_targets_min": 1825 }, { "epoch": 1.277292576419214, "grad_norm": 0.440427806229025, "learning_rate": 1.3993964889564042e-05, "loss": 0.2923725605010986, "loss_nan_ranks": 0, "loss_rank_avg": 0.28836917877197266, "step": 585, "valid_targets_mean": 7605.1, "valid_targets_min": 2039 }, { "epoch": 1.2882096069868996, "grad_norm": 0.4336821253275736, "learning_rate": 1.3631414333485604e-05, "loss": 0.2683080196380615, "loss_nan_ranks": 0, "loss_rank_avg": 0.2734491229057312, "step": 590, "valid_targets_mean": 6418.4, "valid_targets_min": 1570 }, { "epoch": 1.2991266375545851, "grad_norm": 0.4322776110438146, "learning_rate": 1.3271178053525637e-05, "loss": 0.27221074104309084, "loss_nan_ranks": 0, "loss_rank_avg": 0.22799690067768097, "step": 595, "valid_targets_mean": 5739.1, "valid_targets_min": 3309 }, { "epoch": 1.3100436681222707, "grad_norm": 0.4661211489411781, "learning_rate": 1.2913386955706818e-05, "loss": 0.2867571830749512, "loss_nan_ranks": 0, "loss_rank_avg": 0.26590877771377563, "step": 600, "valid_targets_mean": 5675.8, "valid_targets_min": 2652 }, { "epoch": 1.3209606986899565, "grad_norm": 0.47773041383068804, "learning_rate": 1.2558171057498721e-05, "loss": 0.27502286434173584, "loss_nan_ranks": 0, "loss_rank_avg": 0.2569202184677124, "step": 605, "valid_targets_mean": 5278.8, "valid_targets_min": 2394 }, { "epoch": 1.3318777292576418, "grad_norm": 0.4270577800675276, "learning_rate": 1.220565944057083e-05, "loss": 0.2755491018295288, "loss_nan_ranks": 0, "loss_rank_avg": 0.30080080032348633, "step": 610, "valid_targets_mean": 7222.7, "valid_targets_min": 3406 }, { "epoch": 1.3427947598253276, "grad_norm": 0.4624647823485258, "learning_rate": 1.1855980203885655e-05, "loss": 0.27418065071105957, "loss_nan_ranks": 0, "loss_rank_avg": 0.25339916348457336, "step": 615, "valid_targets_mean": 5919.3, "valid_targets_min": 2536 }, { "epoch": 1.3537117903930131, "grad_norm": 0.44228891630532596, "learning_rate": 1.1509260417148946e-05, "loss": 0.2685377597808838, "loss_nan_ranks": 0, "loss_rank_avg": 0.22867035865783691, "step": 620, "valid_targets_mean": 6337.2, "valid_targets_min": 2942 }, { "epoch": 1.3646288209606987, "grad_norm": 0.4254300198079291, "learning_rate": 1.1165626074633855e-05, "loss": 0.24722394943237305, "loss_nan_ranks": 0, "loss_rank_avg": 0.2426937222480774, "step": 625, "valid_targets_mean": 6267.9, "valid_targets_min": 3086 }, { "epoch": 1.3755458515283843, "grad_norm": 0.4423757474549803, "learning_rate": 1.0825202049396068e-05, "loss": 0.3087230443954468, "loss_nan_ranks": 0, "loss_rank_avg": 0.3001897931098938, "step": 630, "valid_targets_mean": 7265.6, "valid_targets_min": 2503 }, { "epoch": 1.3864628820960698, "grad_norm": 0.42911160349166183, "learning_rate": 1.048811204789615e-05, "loss": 0.2663888931274414, "loss_nan_ranks": 0, "loss_rank_avg": 0.29229551553726196, "step": 635, "valid_targets_mean": 6481.4, "valid_targets_min": 2382 }, { "epoch": 1.3973799126637554, "grad_norm": 0.4465491677902793, "learning_rate": 1.0154478565046024e-05, "loss": 0.2666156768798828, "loss_nan_ranks": 0, "loss_rank_avg": 0.26371318101882935, "step": 640, "valid_targets_mean": 6268.8, "valid_targets_min": 3392 }, { "epoch": 1.408296943231441, "grad_norm": 0.40975289944754867, "learning_rate": 9.82442283969559e-06, "loss": 0.26854419708251953, "loss_nan_ranks": 0, "loss_rank_avg": 0.28493642807006836, "step": 645, "valid_targets_mean": 7381.6, "valid_targets_min": 3262 }, { "epoch": 1.4192139737991267, "grad_norm": 0.43758552780822313, "learning_rate": 9.498064810575827e-06, "loss": 0.27019357681274414, "loss_nan_ranks": 0, "loss_rank_avg": 0.26352784037590027, "step": 650, "valid_targets_mean": 5970.1, "valid_targets_min": 3286 }, { "epoch": 1.4301310043668123, "grad_norm": 0.4679112831431724, "learning_rate": 9.175523072714306e-06, "loss": 0.28339765071868894, "loss_nan_ranks": 0, "loss_rank_avg": 0.24877110123634338, "step": 655, "valid_targets_mean": 5000.4, "valid_targets_min": 2359 }, { "epoch": 1.4410480349344978, "grad_norm": 0.4125281624711557, "learning_rate": 8.856914834339108e-06, "loss": 0.2724790573120117, "loss_nan_ranks": 0, "loss_rank_avg": 0.24608677625656128, "step": 660, "valid_targets_mean": 5711.4, "valid_targets_min": 2617 }, { "epoch": 1.4519650655021834, "grad_norm": 0.39241827276959507, "learning_rate": 8.54235587428649e-06, "loss": 0.2661336898803711, "loss_nan_ranks": 0, "loss_rank_avg": 0.2594236731529236, "step": 665, "valid_targets_mean": 6546.2, "valid_targets_min": 3388 }, { "epoch": 1.462882096069869, "grad_norm": 0.4786957581674601, "learning_rate": 8.231960499928177e-06, "loss": 0.28855769634246825, "loss_nan_ranks": 0, "loss_rank_avg": 0.27102068066596985, "step": 670, "valid_targets_mean": 5566.9, "valid_targets_min": 1779 }, { "epoch": 1.4737991266375545, "grad_norm": 0.4351248782993878, "learning_rate": 7.925841505633245e-06, "loss": 0.2816459178924561, "loss_nan_ranks": 0, "loss_rank_avg": 0.31636539101600647, "step": 675, "valid_targets_mean": 6610.5, "valid_targets_min": 2329 }, { "epoch": 1.48471615720524, "grad_norm": 0.40261813997194623, "learning_rate": 7.624110131779858e-06, "loss": 0.2781404972076416, "loss_nan_ranks": 0, "loss_rank_avg": 0.24254852533340454, "step": 680, "valid_targets_mean": 6586.9, "valid_targets_min": 2475 }, { "epoch": 1.4956331877729259, "grad_norm": 0.45111341591466647, "learning_rate": 7.326876024331697e-06, "loss": 0.27823724746704104, "loss_nan_ranks": 0, "loss_rank_avg": 0.2785475254058838, "step": 685, "valid_targets_mean": 6843.3, "valid_targets_min": 2490 }, { "epoch": 1.5065502183406112, "grad_norm": 0.47989898976543627, "learning_rate": 7.034247194993799e-06, "loss": 0.2877494335174561, "loss_nan_ranks": 0, "loss_rank_avg": 0.2664431929588318, "step": 690, "valid_targets_mean": 7352.7, "valid_targets_min": 3487 }, { "epoch": 1.517467248908297, "grad_norm": 0.3793878920198028, "learning_rate": 6.746329981962216e-06, "loss": 0.30165538787841795, "loss_nan_ranks": 0, "loss_rank_avg": 0.2932560443878174, "step": 695, "valid_targets_mean": 7878.4, "valid_targets_min": 2039 }, { "epoch": 1.5283842794759825, "grad_norm": 0.4335070163306581, "learning_rate": 6.4632290112819244e-06, "loss": 0.2659743785858154, "loss_nan_ranks": 0, "loss_rank_avg": 0.2679065465927124, "step": 700, "valid_targets_mean": 5860.9, "valid_targets_min": 1837 }, { "epoch": 1.539301310043668, "grad_norm": 0.4108838574380644, "learning_rate": 6.185047158826745e-06, "loss": 0.2714824199676514, "loss_nan_ranks": 0, "loss_rank_avg": 0.2939089834690094, "step": 705, "valid_targets_mean": 6828.3, "valid_targets_min": 1506 }, { "epoch": 1.5502183406113537, "grad_norm": 0.4008351160991761, "learning_rate": 5.911885512915414e-06, "loss": 0.26355881690979005, "loss_nan_ranks": 0, "loss_rank_avg": 0.2693825662136078, "step": 710, "valid_targets_mean": 7229.0, "valid_targets_min": 3708 }, { "epoch": 1.5611353711790392, "grad_norm": 0.4520523192209753, "learning_rate": 5.643843337577142e-06, "loss": 0.2929125547409058, "loss_nan_ranks": 0, "loss_rank_avg": 0.37793591618537903, "step": 715, "valid_targets_mean": 7321.9, "valid_targets_min": 2200 }, { "epoch": 1.572052401746725, "grad_norm": 0.3712017517805635, "learning_rate": 5.381018036480122e-06, "loss": 0.26763505935668946, "loss_nan_ranks": 0, "loss_rank_avg": 0.22761614620685577, "step": 720, "valid_targets_mean": 6550.9, "valid_targets_min": 1700 }, { "epoch": 1.5829694323144103, "grad_norm": 0.4301318981081664, "learning_rate": 5.12350511753608e-06, "loss": 0.2855483293533325, "loss_nan_ranks": 0, "loss_rank_avg": 0.2551010847091675, "step": 725, "valid_targets_mean": 5807.2, "valid_targets_min": 2923 }, { "epoch": 1.5938864628820961, "grad_norm": 0.41490359370729274, "learning_rate": 4.871398158193716e-06, "loss": 0.28847355842590333, "loss_nan_ranks": 0, "loss_rank_avg": 0.27511632442474365, "step": 730, "valid_targets_mean": 6769.8, "valid_targets_min": 2809 }, { "epoch": 1.6048034934497817, "grad_norm": 0.4121468644866119, "learning_rate": 4.624788771433663e-06, "loss": 0.27226307392120364, "loss_nan_ranks": 0, "loss_rank_avg": 0.2759858965873718, "step": 735, "valid_targets_mean": 6813.2, "valid_targets_min": 2471 }, { "epoch": 1.6157205240174672, "grad_norm": 0.4381181843001764, "learning_rate": 4.383766572477299e-06, "loss": 0.28523242473602295, "loss_nan_ranks": 0, "loss_rank_avg": 0.3504903316497803, "step": 740, "valid_targets_mean": 8048.2, "valid_targets_min": 1949 }, { "epoch": 1.6266375545851528, "grad_norm": 0.42372212637255846, "learning_rate": 4.148419146221539e-06, "loss": 0.287901782989502, "loss_nan_ranks": 0, "loss_rank_avg": 0.2580595314502716, "step": 745, "valid_targets_mean": 6647.2, "valid_targets_min": 2654 }, { "epoch": 1.6375545851528384, "grad_norm": 0.4657499537869996, "learning_rate": 3.918832015411386e-06, "loss": 0.2840440273284912, "loss_nan_ranks": 0, "loss_rank_avg": 0.278178334236145, "step": 750, "valid_targets_mean": 6806.5, "valid_targets_min": 2223 }, { "epoch": 1.6484716157205241, "grad_norm": 0.4480572329984135, "learning_rate": 3.6950886095619565e-06, "loss": 0.29393668174743653, "loss_nan_ranks": 0, "loss_rank_avg": 0.26252424716949463, "step": 755, "valid_targets_mean": 6345.5, "valid_targets_min": 2342 }, { "epoch": 1.6593886462882095, "grad_norm": 0.42975475708025695, "learning_rate": 3.4772702346409816e-06, "loss": 0.27251133918762205, "loss_nan_ranks": 0, "loss_rank_avg": 0.3180385231971741, "step": 760, "valid_targets_mean": 7213.9, "valid_targets_min": 2377 }, { "epoch": 1.6703056768558953, "grad_norm": 0.43713096004853763, "learning_rate": 3.2654560435231587e-06, "loss": 0.2740299940109253, "loss_nan_ranks": 0, "loss_rank_avg": 0.27496880292892456, "step": 765, "valid_targets_mean": 6637.6, "valid_targets_min": 2710 }, { "epoch": 1.6812227074235808, "grad_norm": 0.43391330278676915, "learning_rate": 3.0597230072268204e-06, "loss": 0.2601801872253418, "loss_nan_ranks": 0, "loss_rank_avg": 0.2714815139770508, "step": 770, "valid_targets_mean": 6418.2, "valid_targets_min": 2034 }, { "epoch": 1.6921397379912664, "grad_norm": 0.3928691708213179, "learning_rate": 2.8601458869434797e-06, "loss": 0.2689314842224121, "loss_nan_ranks": 0, "loss_rank_avg": 0.2807304561138153, "step": 775, "valid_targets_mean": 7170.4, "valid_targets_min": 2090 }, { "epoch": 1.703056768558952, "grad_norm": 0.4651589323488624, "learning_rate": 2.6667972068704062e-06, "loss": 0.2841538906097412, "loss_nan_ranks": 0, "loss_rank_avg": 0.2884107828140259, "step": 780, "valid_targets_mean": 6856.2, "valid_targets_min": 2351 }, { "epoch": 1.7139737991266375, "grad_norm": 0.39666158424529135, "learning_rate": 2.4797472278561373e-06, "loss": 0.27666077613830564, "loss_nan_ranks": 0, "loss_rank_avg": 0.24769391119480133, "step": 785, "valid_targets_mean": 6501.9, "valid_targets_min": 1955 }, { "epoch": 1.7248908296943233, "grad_norm": 0.4306895111327366, "learning_rate": 2.2990639218683896e-06, "loss": 0.25469510555267333, "loss_nan_ranks": 0, "loss_rank_avg": 0.2645609974861145, "step": 790, "valid_targets_mean": 6159.2, "valid_targets_min": 2270 }, { "epoch": 1.7358078602620086, "grad_norm": 0.49308166187707725, "learning_rate": 2.1248129472938108e-06, "loss": 0.2721285581588745, "loss_nan_ranks": 0, "loss_rank_avg": 0.2587571144104004, "step": 795, "valid_targets_mean": 4831.8, "valid_targets_min": 2435 }, { "epoch": 1.7467248908296944, "grad_norm": 0.42216486791928226, "learning_rate": 1.9570576250783956e-06, "loss": 0.27977159023284914, "loss_nan_ranks": 0, "loss_rank_avg": 0.26291507482528687, "step": 800, "valid_targets_mean": 6199.1, "valid_targets_min": 2622 }, { "epoch": 1.75764192139738, "grad_norm": 0.4654268877720134, "learning_rate": 1.7958589157173477e-06, "loss": 0.2724721431732178, "loss_nan_ranks": 0, "loss_rank_avg": 0.259472131729126, "step": 805, "valid_targets_mean": 5427.6, "valid_targets_min": 2346 }, { "epoch": 1.7685589519650655, "grad_norm": 0.37814330494778786, "learning_rate": 1.6412753971026706e-06, "loss": 0.2536904335021973, "loss_nan_ranks": 0, "loss_rank_avg": 0.2878243327140808, "step": 810, "valid_targets_mean": 7845.9, "valid_targets_min": 3773 }, { "epoch": 1.779475982532751, "grad_norm": 0.3774653954339668, "learning_rate": 1.493363243236594e-06, "loss": 0.2921349287033081, "loss_nan_ranks": 0, "loss_rank_avg": 0.29652535915374756, "step": 815, "valid_targets_mean": 7847.5, "valid_targets_min": 3211 }, { "epoch": 1.7903930131004366, "grad_norm": 0.4249004972385194, "learning_rate": 1.3521762038185427e-06, "loss": 0.265821647644043, "loss_nan_ranks": 0, "loss_rank_avg": 0.2325274795293808, "step": 820, "valid_targets_mean": 5882.7, "valid_targets_min": 2332 }, { "epoch": 1.8013100436681224, "grad_norm": 0.44849592528154936, "learning_rate": 1.2177655847130642e-06, "loss": 0.2603058576583862, "loss_nan_ranks": 0, "loss_rank_avg": 0.2660709321498871, "step": 825, "valid_targets_mean": 6299.3, "valid_targets_min": 2513 }, { "epoch": 1.8122270742358078, "grad_norm": 0.4412332022589713, "learning_rate": 1.090180229305826e-06, "loss": 0.26507813930511476, "loss_nan_ranks": 0, "loss_rank_avg": 0.26395004987716675, "step": 830, "valid_targets_mean": 5745.1, "valid_targets_min": 3093 }, { "epoch": 1.8231441048034935, "grad_norm": 0.441183820760464, "learning_rate": 9.694665007544457e-07, "loss": 0.26718969345092775, "loss_nan_ranks": 0, "loss_rank_avg": 0.28944191336631775, "step": 835, "valid_targets_mean": 6187.9, "valid_targets_min": 1934 }, { "epoch": 1.8340611353711789, "grad_norm": 0.4769369701160225, "learning_rate": 8.5566826514063e-07, "loss": 0.27736477851867675, "loss_nan_ranks": 0, "loss_rank_avg": 0.24203220009803772, "step": 840, "valid_targets_mean": 5243.2, "valid_targets_min": 1786 }, { "epoch": 1.8449781659388647, "grad_norm": 0.40690149870472064, "learning_rate": 7.488268755296823e-07, "loss": 0.28839449882507323, "loss_nan_ranks": 0, "loss_rank_avg": 0.30097952485084534, "step": 845, "valid_targets_mean": 7257.2, "valid_targets_min": 2494 }, { "epoch": 1.8558951965065502, "grad_norm": 0.3987127764542192, "learning_rate": 6.489811569432647e-07, "loss": 0.2919907093048096, "loss_nan_ranks": 0, "loss_rank_avg": 0.27398210763931274, "step": 850, "valid_targets_mean": 7581.4, "valid_targets_min": 2821 }, { "epoch": 1.8668122270742358, "grad_norm": 0.4407039941358569, "learning_rate": 5.561673922507882e-07, "loss": 0.28237299919128417, "loss_nan_ranks": 0, "loss_rank_avg": 0.2721756398677826, "step": 855, "valid_targets_mean": 6709.9, "valid_targets_min": 1880 }, { "epoch": 1.8777292576419216, "grad_norm": 0.4360586080010118, "learning_rate": 4.7041930898459986e-07, "loss": 0.2742650270462036, "loss_nan_ranks": 0, "loss_rank_avg": 0.29414263367652893, "step": 860, "valid_targets_mean": 6460.5, "valid_targets_min": 2630 }, { "epoch": 1.888646288209607, "grad_norm": 0.5559141110463411, "learning_rate": 3.9176806708376647e-07, "loss": 0.27784156799316406, "loss_nan_ranks": 0, "loss_rank_avg": 0.29185181856155396, "step": 865, "valid_targets_mean": 7061.6, "valid_targets_min": 1785 }, { "epoch": 1.8995633187772927, "grad_norm": 0.43404638098301274, "learning_rate": 3.202422475708855e-07, "loss": 0.2740302085876465, "loss_nan_ranks": 0, "loss_rank_avg": 0.30367159843444824, "step": 870, "valid_targets_mean": 6377.9, "valid_targets_min": 1835 }, { "epoch": 1.910480349344978, "grad_norm": 0.4463157707741355, "learning_rate": 2.5586784216602876e-07, "loss": 0.26655850410461424, "loss_nan_ranks": 0, "loss_rank_avg": 0.26920467615127563, "step": 875, "valid_targets_mean": 6093.8, "valid_targets_min": 1974 }, { "epoch": 1.9213973799126638, "grad_norm": 0.45629272669606924, "learning_rate": 1.9866824384165984e-07, "loss": 0.2759099006652832, "loss_nan_ranks": 0, "loss_rank_avg": 0.2627640962600708, "step": 880, "valid_targets_mean": 5625.0, "valid_targets_min": 2321 }, { "epoch": 1.9323144104803494, "grad_norm": 0.4414494821171371, "learning_rate": 1.4866423832184285e-07, "loss": 0.29251482486724856, "loss_nan_ranks": 0, "loss_rank_avg": 0.35426586866378784, "step": 885, "valid_targets_mean": 6907.3, "valid_targets_min": 2846 }, { "epoch": 1.943231441048035, "grad_norm": 0.4453099247422022, "learning_rate": 1.0587399652895791e-07, "loss": 0.2867130279541016, "loss_nan_ranks": 0, "loss_rank_avg": 0.2867542803287506, "step": 890, "valid_targets_mean": 6339.5, "valid_targets_min": 2401 }, { "epoch": 1.9541484716157205, "grad_norm": 0.4786735695112415, "learning_rate": 7.031306798057192e-08, "loss": 0.2892791748046875, "loss_nan_ranks": 0, "loss_rank_avg": 0.38508570194244385, "step": 895, "valid_targets_mean": 6240.5, "valid_targets_min": 2163 }, { "epoch": 1.965065502183406, "grad_norm": 0.46985380653675574, "learning_rate": 4.199437513890514e-08, "loss": 0.26402432918548585, "loss_nan_ranks": 0, "loss_rank_avg": 0.2793194055557251, "step": 900, "valid_targets_mean": 5158.8, "valid_targets_min": 2004 }, { "epoch": 1.9759825327510918, "grad_norm": 0.556719131410453, "learning_rate": 2.0928208714954267e-08, "loss": 0.2528956174850464, "loss_nan_ranks": 0, "loss_rank_avg": 0.2757381498813629, "step": 905, "valid_targets_mean": 6110.4, "valid_targets_min": 2082 }, { "epoch": 1.9868995633187772, "grad_norm": 0.42994401194705534, "learning_rate": 7.122223928954786e-09, "loss": 0.27558696269989014, "loss_nan_ranks": 0, "loss_rank_avg": 0.27204233407974243, "step": 910, "valid_targets_mean": 6037.2, "valid_targets_min": 2161 }, { "epoch": 1.997816593886463, "grad_norm": 0.4320570802820311, "learning_rate": 5.814377285573436e-10, "loss": 0.27978763580322263, "loss_nan_ranks": 0, "loss_rank_avg": 0.30974888801574707, "step": 915, "valid_targets_mean": 6158.3, "valid_targets_min": 1983 }, { "epoch": 2.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.23816466331481934, "step": 916, "total_flos": 504378422657024.0, "train_loss": 0.31247848031562486, "train_runtime": 13244.7102, "train_samples_per_second": 1.105, "train_steps_per_second": 0.069, "valid_targets_mean": 6961.6, "valid_targets_min": 2674 } ], "logging_steps": 5, "max_steps": 916, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 504378422657024.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }