{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 8.418368339538574, "learning_rate": 0.0, "loss": 0.7392, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 10.15345287322998, "learning_rate": 1.953125e-08, "loss": 0.7479, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 9.759095191955566, "learning_rate": 3.90625e-08, "loss": 0.7443, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 9.811012268066406, "learning_rate": 5.859375000000001e-08, "loss": 0.7443, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 10.611157417297363, "learning_rate": 7.8125e-08, "loss": 0.752, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 9.444116592407227, "learning_rate": 9.765625e-08, "loss": 0.7434, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 10.364922523498535, "learning_rate": 1.1718750000000002e-07, "loss": 0.7478, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 10.462395668029785, "learning_rate": 1.3671875000000001e-07, "loss": 0.75, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 10.068516731262207, "learning_rate": 1.5625e-07, "loss": 0.7463, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 10.155673027038574, "learning_rate": 1.7578125e-07, "loss": 0.7487, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 9.941401481628418, "learning_rate": 1.953125e-07, "loss": 0.7471, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 9.910322189331055, "learning_rate": 2.1484375e-07, "loss": 0.7451, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 9.858898162841797, "learning_rate": 2.3437500000000003e-07, "loss": 0.7441, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 10.202731132507324, "learning_rate": 2.5390625000000003e-07, "loss": 0.7469, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 9.71875, "learning_rate": 2.7343750000000003e-07, "loss": 0.7432, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 10.0158052444458, "learning_rate": 2.9296875000000003e-07, "loss": 0.7438, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 9.542566299438477, "learning_rate": 3.125e-07, "loss": 0.7436, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 10.09278392791748, "learning_rate": 3.3203125e-07, "loss": 0.746, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 10.34857177734375, "learning_rate": 3.515625e-07, "loss": 0.7468, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 9.3331298828125, "learning_rate": 3.7109375e-07, "loss": 0.7414, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 10.089090347290039, "learning_rate": 3.90625e-07, "loss": 0.7424, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 9.910309791564941, "learning_rate": 4.1015625e-07, "loss": 0.7386, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 10.325440406799316, "learning_rate": 4.296875e-07, "loss": 0.7385, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 9.854129791259766, "learning_rate": 4.4921875e-07, "loss": 0.7352, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 9.582125663757324, "learning_rate": 4.6875000000000006e-07, "loss": 0.7336, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 9.579532623291016, "learning_rate": 4.8828125e-07, "loss": 0.7327, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 9.505870819091797, "learning_rate": 5.078125000000001e-07, "loss": 0.7302, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 10.382901191711426, "learning_rate": 5.2734375e-07, "loss": 0.7359, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 9.743589401245117, "learning_rate": 5.468750000000001e-07, "loss": 0.731, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 9.844606399536133, "learning_rate": 5.6640625e-07, "loss": 0.7204, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 9.851621627807617, "learning_rate": 5.859375000000001e-07, "loss": 0.7189, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 10.291962623596191, "learning_rate": 6.0546875e-07, "loss": 0.7255, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 9.620966911315918, "learning_rate": 6.25e-07, "loss": 0.6998, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 8.927258491516113, "learning_rate": 6.445312500000001e-07, "loss": 0.6968, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 9.370047569274902, "learning_rate": 6.640625e-07, "loss": 0.6948, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 9.377239227294922, "learning_rate": 6.835937500000001e-07, "loss": 0.6935, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 8.534720420837402, "learning_rate": 7.03125e-07, "loss": 0.6898, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 8.93862247467041, "learning_rate": 7.226562500000001e-07, "loss": 0.6877, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 9.523263931274414, "learning_rate": 7.421875e-07, "loss": 0.6863, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 9.235346794128418, "learning_rate": 7.617187500000001e-07, "loss": 0.687, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 8.30202579498291, "learning_rate": 7.8125e-07, "loss": 0.676, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 8.58492660522461, "learning_rate": 8.007812500000001e-07, "loss": 0.6597, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 7.814978122711182, "learning_rate": 8.203125e-07, "loss": 0.6592, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 8.537942886352539, "learning_rate": 8.398437500000001e-07, "loss": 0.6558, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 7.438014030456543, "learning_rate": 8.59375e-07, "loss": 0.6526, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 7.786512851715088, "learning_rate": 8.789062500000001e-07, "loss": 0.6111, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 6.062485694885254, "learning_rate": 8.984375e-07, "loss": 0.6093, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 6.930081844329834, "learning_rate": 9.179687500000001e-07, "loss": 0.5897, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 7.747795581817627, "learning_rate": 9.375000000000001e-07, "loss": 0.5869, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 6.641242027282715, "learning_rate": 9.570312500000002e-07, "loss": 0.5858, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 6.912637710571289, "learning_rate": 9.765625e-07, "loss": 0.5871, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 6.703718185424805, "learning_rate": 9.9609375e-07, "loss": 0.577, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 7.151025772094727, "learning_rate": 1.0156250000000001e-06, "loss": 0.566, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 6.451506614685059, "learning_rate": 1.0351562500000002e-06, "loss": 0.5762, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 6.565618515014648, "learning_rate": 1.0546875e-06, "loss": 0.5698, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 5.815990447998047, "learning_rate": 1.07421875e-06, "loss": 0.5706, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 5.818483352661133, "learning_rate": 1.0937500000000001e-06, "loss": 0.5529, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 6.59625244140625, "learning_rate": 1.1132812500000002e-06, "loss": 0.5289, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 6.386470794677734, "learning_rate": 1.1328125e-06, "loss": 0.519, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 5.449082374572754, "learning_rate": 1.15234375e-06, "loss": 0.5155, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 5.186278343200684, "learning_rate": 1.1718750000000001e-06, "loss": 0.4965, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 4.745887756347656, "learning_rate": 1.1914062500000002e-06, "loss": 0.4804, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 3.5234646797180176, "learning_rate": 1.2109375e-06, "loss": 0.5002, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 4.352346897125244, "learning_rate": 1.23046875e-06, "loss": 0.443, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 3.998364210128784, "learning_rate": 1.25e-06, "loss": 0.4446, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 4.418766498565674, "learning_rate": 1.2695312500000002e-06, "loss": 0.4109, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 4.533385753631592, "learning_rate": 1.2890625000000002e-06, "loss": 0.4079, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 4.2911858558654785, "learning_rate": 1.30859375e-06, "loss": 0.3998, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 3.3334152698516846, "learning_rate": 1.328125e-06, "loss": 0.4377, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 4.104737281799316, "learning_rate": 1.3476562500000001e-06, "loss": 0.3895, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 2.776451349258423, "learning_rate": 1.3671875000000002e-06, "loss": 0.4407, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 3.7980704307556152, "learning_rate": 1.38671875e-06, "loss": 0.3873, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 1.6591250896453857, "learning_rate": 1.40625e-06, "loss": 0.4872, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 3.578198194503784, "learning_rate": 1.4257812500000001e-06, "loss": 0.391, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 3.7660505771636963, "learning_rate": 1.4453125000000002e-06, "loss": 0.3651, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 2.4438931941986084, "learning_rate": 1.46484375e-06, "loss": 0.4181, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 2.5131750106811523, "learning_rate": 1.484375e-06, "loss": 0.4045, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 2.6990439891815186, "learning_rate": 1.5039062500000001e-06, "loss": 0.3847, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 2.498847007751465, "learning_rate": 1.5234375000000002e-06, "loss": 0.3827, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 2.230093002319336, "learning_rate": 1.54296875e-06, "loss": 0.385, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 1.4546222686767578, "learning_rate": 1.5625e-06, "loss": 0.4237, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 1.6985565423965454, "learning_rate": 1.5820312500000001e-06, "loss": 0.3978, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 0.9925466179847717, "learning_rate": 1.6015625000000002e-06, "loss": 0.4332, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 1.967852234840393, "learning_rate": 1.6210937500000002e-06, "loss": 0.3563, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 1.8508011102676392, "learning_rate": 1.640625e-06, "loss": 0.3552, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 1.927220344543457, "learning_rate": 1.6601562500000001e-06, "loss": 0.3435, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 1.8627420663833618, "learning_rate": 1.6796875000000002e-06, "loss": 0.3378, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 1.2966957092285156, "learning_rate": 1.6992187500000002e-06, "loss": 0.3677, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 1.185817003250122, "learning_rate": 1.71875e-06, "loss": 0.3686, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 1.6611731052398682, "learning_rate": 1.7382812500000001e-06, "loss": 0.3187, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 1.1184557676315308, "learning_rate": 1.7578125000000002e-06, "loss": 0.3519, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 1.53602933883667, "learning_rate": 1.7773437500000002e-06, "loss": 0.3112, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 0.47965794801712036, "learning_rate": 1.796875e-06, "loss": 0.4405, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 1.1964378356933594, "learning_rate": 1.81640625e-06, "loss": 0.327, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 1.1732618808746338, "learning_rate": 1.8359375000000002e-06, "loss": 0.3289, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 1.518040418624878, "learning_rate": 1.8554687500000002e-06, "loss": 0.3006, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 0.9585156440734863, "learning_rate": 1.8750000000000003e-06, "loss": 0.3375, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 0.8591076731681824, "learning_rate": 1.89453125e-06, "loss": 0.3407, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 0.382030189037323, "learning_rate": 1.9140625000000004e-06, "loss": 0.3779, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 0.546159565448761, "learning_rate": 1.93359375e-06, "loss": 0.3512, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 1.0804990530014038, "learning_rate": 1.953125e-06, "loss": 0.2928, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 0.42944350838661194, "learning_rate": 1.97265625e-06, "loss": 0.4103, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 0.7951771020889282, "learning_rate": 1.9921875e-06, "loss": 0.3325, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 0.6192995309829712, "learning_rate": 2.01171875e-06, "loss": 0.3393, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 0.9277344346046448, "learning_rate": 2.0312500000000002e-06, "loss": 0.304, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 0.563682496547699, "learning_rate": 2.0507812500000003e-06, "loss": 0.3388, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 0.6028745174407959, "learning_rate": 2.0703125000000003e-06, "loss": 0.3338, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 1.1696522235870361, "learning_rate": 2.08984375e-06, "loss": 0.2618, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 0.6657910943031311, "learning_rate": 2.109375e-06, "loss": 0.3202, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 0.6566706895828247, "learning_rate": 2.12890625e-06, "loss": 0.3169, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 0.8761516213417053, "learning_rate": 2.1484375e-06, "loss": 0.2913, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 0.7856684327125549, "learning_rate": 2.16796875e-06, "loss": 0.319, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 0.6466297507286072, "learning_rate": 2.1875000000000002e-06, "loss": 0.3745, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 1.3271493911743164, "learning_rate": 2.2070312500000003e-06, "loss": 0.3161, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 1.0889081954956055, "learning_rate": 2.2265625000000003e-06, "loss": 0.2938, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 1.3634874820709229, "learning_rate": 2.2460937500000004e-06, "loss": 0.3705, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 1.1092658042907715, "learning_rate": 2.265625e-06, "loss": 0.2776, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 1.486769437789917, "learning_rate": 2.28515625e-06, "loss": 0.2686, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 1.491504430770874, "learning_rate": 2.3046875e-06, "loss": 0.3351, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 0.8927040696144104, "learning_rate": 2.32421875e-06, "loss": 0.3443, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 0.9576985239982605, "learning_rate": 2.3437500000000002e-06, "loss": 0.3447, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 1.0377744436264038, "learning_rate": 2.3632812500000003e-06, "loss": 0.2977, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 2.3870327472686768, "learning_rate": 2.3828125000000003e-06, "loss": 0.2733, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 0.8880771994590759, "learning_rate": 2.4023437500000004e-06, "loss": 0.3152, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 1.1455634832382202, "learning_rate": 2.421875e-06, "loss": 0.258, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 1.2236453294754028, "learning_rate": 2.44140625e-06, "loss": 0.2463, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 1.0131515264511108, "learning_rate": 2.4609375e-06, "loss": 0.297, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 1.5476044416427612, "learning_rate": 2.48046875e-06, "loss": 0.3097, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 1.3560242652893066, "learning_rate": 2.5e-06, "loss": 0.2818, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 1.1146399974822998, "learning_rate": 2.5195312500000003e-06, "loss": 0.2431, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 2.0772366523742676, "learning_rate": 2.5390625000000003e-06, "loss": 0.3047, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 1.7009674310684204, "learning_rate": 2.5585937500000004e-06, "loss": 0.3647, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 1.3443853855133057, "learning_rate": 2.5781250000000004e-06, "loss": 0.2075, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 3.157778024673462, "learning_rate": 2.59765625e-06, "loss": 0.2382, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 1.3062468767166138, "learning_rate": 2.6171875e-06, "loss": 0.2225, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 1.257946491241455, "learning_rate": 2.63671875e-06, "loss": 0.3554, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 1.588822841644287, "learning_rate": 2.65625e-06, "loss": 0.2822, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 0.9658819437026978, "learning_rate": 2.6757812500000002e-06, "loss": 0.2681, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 0.776848316192627, "learning_rate": 2.6953125000000003e-06, "loss": 0.2942, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 1.4799704551696777, "learning_rate": 2.7148437500000003e-06, "loss": 0.2371, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 0.7714605331420898, "learning_rate": 2.7343750000000004e-06, "loss": 0.313, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 2.5308949947357178, "learning_rate": 2.75390625e-06, "loss": 0.2566, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 1.5935903787612915, "learning_rate": 2.7734375e-06, "loss": 0.312, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 2.777427911758423, "learning_rate": 2.79296875e-06, "loss": 0.2821, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 3.2048614025115967, "learning_rate": 2.8125e-06, "loss": 0.2599, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 0.9811825752258301, "learning_rate": 2.8320312500000002e-06, "loss": 0.2286, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 3.4738259315490723, "learning_rate": 2.8515625000000003e-06, "loss": 0.3182, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 4.143899917602539, "learning_rate": 2.8710937500000003e-06, "loss": 0.2737, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 2.9675586223602295, "learning_rate": 2.8906250000000004e-06, "loss": 0.225, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 1.8224599361419678, "learning_rate": 2.9101562500000004e-06, "loss": 0.2352, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 3.0269761085510254, "learning_rate": 2.9296875e-06, "loss": 0.3094, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 1.1625525951385498, "learning_rate": 2.94921875e-06, "loss": 0.2841, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 1.7056978940963745, "learning_rate": 2.96875e-06, "loss": 0.1892, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 2.0785140991210938, "learning_rate": 2.9882812500000002e-06, "loss": 0.2606, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 0.9804434776306152, "learning_rate": 3.0078125000000003e-06, "loss": 0.2541, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 1.8790603876113892, "learning_rate": 3.0273437500000003e-06, "loss": 0.2416, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 1.7792613506317139, "learning_rate": 3.0468750000000004e-06, "loss": 0.228, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 1.3157331943511963, "learning_rate": 3.0664062500000004e-06, "loss": 0.29, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 1.4892604351043701, "learning_rate": 3.0859375e-06, "loss": 0.2059, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 3.0041415691375732, "learning_rate": 3.10546875e-06, "loss": 0.297, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 1.540193796157837, "learning_rate": 3.125e-06, "loss": 0.2917, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 2.6902782917022705, "learning_rate": 3.14453125e-06, "loss": 0.1926, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 1.0680965185165405, "learning_rate": 3.1640625000000003e-06, "loss": 0.2229, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 0.9713494777679443, "learning_rate": 3.1835937500000003e-06, "loss": 0.1639, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 1.2318549156188965, "learning_rate": 3.2031250000000004e-06, "loss": 0.1948, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 1.3571010828018188, "learning_rate": 3.2226562500000004e-06, "loss": 0.2323, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 1.749537706375122, "learning_rate": 3.2421875000000005e-06, "loss": 0.2045, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 1.2661300897598267, "learning_rate": 3.26171875e-06, "loss": 0.1608, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 1.8492060899734497, "learning_rate": 3.28125e-06, "loss": 0.1848, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 2.12788724899292, "learning_rate": 3.30078125e-06, "loss": 0.1651, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 4.169435024261475, "learning_rate": 3.3203125000000002e-06, "loss": 0.239, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 1.6760138273239136, "learning_rate": 3.3398437500000003e-06, "loss": 0.1691, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 0.8379729986190796, "learning_rate": 3.3593750000000003e-06, "loss": 0.1768, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 1.232727289199829, "learning_rate": 3.3789062500000004e-06, "loss": 0.1874, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 1.4350289106369019, "learning_rate": 3.3984375000000004e-06, "loss": 0.2138, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 1.8706411123275757, "learning_rate": 3.41796875e-06, "loss": 0.2054, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 1.4090348482131958, "learning_rate": 3.4375e-06, "loss": 0.153, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 2.3767008781433105, "learning_rate": 3.45703125e-06, "loss": 0.1692, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 2.910409927368164, "learning_rate": 3.4765625000000002e-06, "loss": 0.1732, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 1.5833803415298462, "learning_rate": 3.4960937500000003e-06, "loss": 0.1293, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 3.7404887676239014, "learning_rate": 3.5156250000000003e-06, "loss": 0.1932, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 3.7882049083709717, "learning_rate": 3.5351562500000004e-06, "loss": 0.2102, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 3.6663601398468018, "learning_rate": 3.5546875000000004e-06, "loss": 0.2677, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 5.748714923858643, "learning_rate": 3.5742187500000005e-06, "loss": 0.2434, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 2.2931840419769287, "learning_rate": 3.59375e-06, "loss": 0.1683, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 3.073819637298584, "learning_rate": 3.61328125e-06, "loss": 0.1696, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 3.3745152950286865, "learning_rate": 3.6328125e-06, "loss": 0.1844, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 1.0107594728469849, "learning_rate": 3.6523437500000003e-06, "loss": 0.1719, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 2.5899014472961426, "learning_rate": 3.6718750000000003e-06, "loss": 0.1489, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 3.486682176589966, "learning_rate": 3.6914062500000004e-06, "loss": 0.1945, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 4.100020408630371, "learning_rate": 3.7109375000000004e-06, "loss": 0.1454, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 7.739872932434082, "learning_rate": 3.7304687500000005e-06, "loss": 0.1966, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 3.0246500968933105, "learning_rate": 3.7500000000000005e-06, "loss": 0.2143, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 3.3592824935913086, "learning_rate": 3.76953125e-06, "loss": 0.1473, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 4.133052349090576, "learning_rate": 3.7890625e-06, "loss": 0.1896, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 2.7173526287078857, "learning_rate": 3.8085937500000002e-06, "loss": 0.1525, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 2.25923228263855, "learning_rate": 3.828125000000001e-06, "loss": 0.1727, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 9.056709289550781, "learning_rate": 3.84765625e-06, "loss": 0.1909, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 12.273653984069824, "learning_rate": 3.8671875e-06, "loss": 0.2539, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 9.685579299926758, "learning_rate": 3.88671875e-06, "loss": 0.2277, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 1.5767991542816162, "learning_rate": 3.90625e-06, "loss": 0.1492, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 1.1916193962097168, "learning_rate": 3.92578125e-06, "loss": 0.1146, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 1.9071476459503174, "learning_rate": 3.9453125e-06, "loss": 0.1319, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 2.7392489910125732, "learning_rate": 3.96484375e-06, "loss": 0.1604, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 1.1657382249832153, "learning_rate": 3.984375e-06, "loss": 0.1476, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 1.6959543228149414, "learning_rate": 4.00390625e-06, "loss": 0.1477, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 3.2479031085968018, "learning_rate": 4.0234375e-06, "loss": 0.1754, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 1.9703972339630127, "learning_rate": 4.0429687500000004e-06, "loss": 0.1548, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 1.369796872138977, "learning_rate": 4.0625000000000005e-06, "loss": 0.1207, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 3.970050573348999, "learning_rate": 4.0820312500000005e-06, "loss": 0.2022, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 1.9519346952438354, "learning_rate": 4.101562500000001e-06, "loss": 0.1508, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 1.921578049659729, "learning_rate": 4.121093750000001e-06, "loss": 0.1543, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 1.6480791568756104, "learning_rate": 4.140625000000001e-06, "loss": 0.1539, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 1.3284991979599, "learning_rate": 4.160156250000001e-06, "loss": 0.1925, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 0.988721489906311, "learning_rate": 4.1796875e-06, "loss": 0.1615, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 1.903036117553711, "learning_rate": 4.19921875e-06, "loss": 0.1689, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 0.9211412072181702, "learning_rate": 4.21875e-06, "loss": 0.1311, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 2.2512800693511963, "learning_rate": 4.23828125e-06, "loss": 0.1472, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 1.5690715312957764, "learning_rate": 4.2578125e-06, "loss": 0.1292, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 1.0853551626205444, "learning_rate": 4.27734375e-06, "loss": 0.1428, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 2.465315103530884, "learning_rate": 4.296875e-06, "loss": 0.1529, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 3.591296434402466, "learning_rate": 4.31640625e-06, "loss": 0.2109, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 1.9189027547836304, "learning_rate": 4.3359375e-06, "loss": 0.1753, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 1.0441923141479492, "learning_rate": 4.35546875e-06, "loss": 0.1515, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 1.745715618133545, "learning_rate": 4.3750000000000005e-06, "loss": 0.1311, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 1.168297290802002, "learning_rate": 4.3945312500000005e-06, "loss": 0.1381, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 0.5969564318656921, "learning_rate": 4.4140625000000006e-06, "loss": 0.1283, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 5.004308223724365, "learning_rate": 4.433593750000001e-06, "loss": 0.1707, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 0.745837390422821, "learning_rate": 4.453125000000001e-06, "loss": 0.1604, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 3.120706081390381, "learning_rate": 4.472656250000001e-06, "loss": 0.172, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 0.46705833077430725, "learning_rate": 4.492187500000001e-06, "loss": 0.1244, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 1.1996073722839355, "learning_rate": 4.51171875e-06, "loss": 0.1635, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 1.5993646383285522, "learning_rate": 4.53125e-06, "loss": 0.1295, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 0.5964046716690063, "learning_rate": 4.55078125e-06, "loss": 0.1437, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 1.8821343183517456, "learning_rate": 4.5703125e-06, "loss": 0.1224, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 2.0376639366149902, "learning_rate": 4.58984375e-06, "loss": 0.124, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 1.057521104812622, "learning_rate": 4.609375e-06, "loss": 0.1005, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 0.706886887550354, "learning_rate": 4.62890625e-06, "loss": 0.1213, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 0.7897224426269531, "learning_rate": 4.6484375e-06, "loss": 0.0996, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 1.1942460536956787, "learning_rate": 4.66796875e-06, "loss": 0.1156, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 1.4859144687652588, "learning_rate": 4.6875000000000004e-06, "loss": 0.1255, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 0.65531325340271, "learning_rate": 4.7070312500000005e-06, "loss": 0.1035, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 2.3028523921966553, "learning_rate": 4.7265625000000005e-06, "loss": 0.1537, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 1.096729040145874, "learning_rate": 4.746093750000001e-06, "loss": 0.1372, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 0.9091118574142456, "learning_rate": 4.765625000000001e-06, "loss": 0.1415, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 0.4930439293384552, "learning_rate": 4.785156250000001e-06, "loss": 0.0888, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 0.499423086643219, "learning_rate": 4.804687500000001e-06, "loss": 0.1159, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 0.7473634481430054, "learning_rate": 4.824218750000001e-06, "loss": 0.1056, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 1.5958285331726074, "learning_rate": 4.84375e-06, "loss": 0.1319, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 2.1473584175109863, "learning_rate": 4.86328125e-06, "loss": 0.1129, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 0.681753396987915, "learning_rate": 4.8828125e-06, "loss": 0.1314, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 2.0187032222747803, "learning_rate": 4.90234375e-06, "loss": 0.1395, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 2.0100409984588623, "learning_rate": 4.921875e-06, "loss": 0.1282, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 1.1588890552520752, "learning_rate": 4.94140625e-06, "loss": 0.1093, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 1.460558533668518, "learning_rate": 4.9609375e-06, "loss": 0.1305, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 0.9396835565567017, "learning_rate": 4.98046875e-06, "loss": 0.138, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 2.849827289581299, "learning_rate": 5e-06, "loss": 0.1259, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 1.3623026609420776, "learning_rate": 5.0195312500000005e-06, "loss": 0.1118, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 2.8209924697875977, "learning_rate": 5.0390625000000005e-06, "loss": 0.1566, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 1.0809645652770996, "learning_rate": 5.0585937500000006e-06, "loss": 0.1214, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 0.6757979989051819, "learning_rate": 5.078125000000001e-06, "loss": 0.1211, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 3.156147003173828, "learning_rate": 5.097656250000001e-06, "loss": 0.1312, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 1.9846651554107666, "learning_rate": 5.117187500000001e-06, "loss": 0.1378, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 0.6380707025527954, "learning_rate": 5.136718750000001e-06, "loss": 0.1333, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 2.819024085998535, "learning_rate": 5.156250000000001e-06, "loss": 0.1436, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 1.1484800577163696, "learning_rate": 5.17578125e-06, "loss": 0.1224, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 1.493029236793518, "learning_rate": 5.1953125e-06, "loss": 0.1202, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 1.7791023254394531, "learning_rate": 5.21484375e-06, "loss": 0.1128, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 1.285009503364563, "learning_rate": 5.234375e-06, "loss": 0.1133, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 1.2068349123001099, "learning_rate": 5.25390625e-06, "loss": 0.1163, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 0.39156562089920044, "learning_rate": 5.2734375e-06, "loss": 0.104, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 4.177847385406494, "learning_rate": 5.29296875e-06, "loss": 0.1275, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 3.6713578701019287, "learning_rate": 5.3125e-06, "loss": 0.1327, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 0.8355847001075745, "learning_rate": 5.3320312500000004e-06, "loss": 0.1045, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 3.212862014770508, "learning_rate": 5.3515625000000005e-06, "loss": 0.1315, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 1.892137885093689, "learning_rate": 5.3710937500000005e-06, "loss": 0.1058, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 0.867725670337677, "learning_rate": 5.390625000000001e-06, "loss": 0.1208, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 4.204258918762207, "learning_rate": 5.410156250000001e-06, "loss": 0.1288, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 3.882392406463623, "learning_rate": 5.429687500000001e-06, "loss": 0.1455, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 1.2202495336532593, "learning_rate": 5.449218750000001e-06, "loss": 0.1163, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 1.3391743898391724, "learning_rate": 5.468750000000001e-06, "loss": 0.124, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 1.1069475412368774, "learning_rate": 5.488281250000001e-06, "loss": 0.0999, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 1.9380618333816528, "learning_rate": 5.5078125e-06, "loss": 0.1381, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 1.3193589448928833, "learning_rate": 5.52734375e-06, "loss": 0.1516, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 0.9794448614120483, "learning_rate": 5.546875e-06, "loss": 0.1155, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 0.6853723526000977, "learning_rate": 5.56640625e-06, "loss": 0.1032, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 1.158639907836914, "learning_rate": 5.5859375e-06, "loss": 0.1146, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 0.43171876668930054, "learning_rate": 5.60546875e-06, "loss": 0.0965, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 0.5822612643241882, "learning_rate": 5.625e-06, "loss": 0.1034, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 1.0245718955993652, "learning_rate": 5.64453125e-06, "loss": 0.1153, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 0.6434977650642395, "learning_rate": 5.6640625000000005e-06, "loss": 0.0934, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 0.9082045555114746, "learning_rate": 5.6835937500000005e-06, "loss": 0.1109, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 2.979217290878296, "learning_rate": 5.7031250000000006e-06, "loss": 0.1183, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 2.3223655223846436, "learning_rate": 5.722656250000001e-06, "loss": 0.0923, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 0.7602518200874329, "learning_rate": 5.742187500000001e-06, "loss": 0.1001, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 1.8402540683746338, "learning_rate": 5.761718750000001e-06, "loss": 0.1179, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 0.7053990364074707, "learning_rate": 5.781250000000001e-06, "loss": 0.0867, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 1.9451613426208496, "learning_rate": 5.800781250000001e-06, "loss": 0.0804, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 1.743586540222168, "learning_rate": 5.820312500000001e-06, "loss": 0.0909, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 0.3728269636631012, "learning_rate": 5.83984375e-06, "loss": 0.1065, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 2.2477612495422363, "learning_rate": 5.859375e-06, "loss": 0.1108, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 1.0013954639434814, "learning_rate": 5.87890625e-06, "loss": 0.0938, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 1.21222722530365, "learning_rate": 5.8984375e-06, "loss": 0.0967, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 0.809727132320404, "learning_rate": 5.91796875e-06, "loss": 0.1174, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 0.7303671836853027, "learning_rate": 5.9375e-06, "loss": 0.0903, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 1.0141801834106445, "learning_rate": 5.95703125e-06, "loss": 0.1119, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 0.6254080533981323, "learning_rate": 5.9765625000000004e-06, "loss": 0.0904, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 0.6163007616996765, "learning_rate": 5.9960937500000005e-06, "loss": 0.0963, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 0.45044270157814026, "learning_rate": 6.0156250000000005e-06, "loss": 0.0746, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 2.0810325145721436, "learning_rate": 6.035156250000001e-06, "loss": 0.1139, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 1.4634443521499634, "learning_rate": 6.054687500000001e-06, "loss": 0.0957, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 0.9293150305747986, "learning_rate": 6.074218750000001e-06, "loss": 0.1048, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 1.8220579624176025, "learning_rate": 6.093750000000001e-06, "loss": 0.1016, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 2.3090155124664307, "learning_rate": 6.113281250000001e-06, "loss": 0.1085, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 1.7273184061050415, "learning_rate": 6.132812500000001e-06, "loss": 0.0886, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 1.0151546001434326, "learning_rate": 6.152343750000001e-06, "loss": 0.0828, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 1.3930292129516602, "learning_rate": 6.171875e-06, "loss": 0.1015, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 1.4134658575057983, "learning_rate": 6.19140625e-06, "loss": 0.0983, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 0.6953068971633911, "learning_rate": 6.2109375e-06, "loss": 0.1028, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 4.692896366119385, "learning_rate": 6.23046875e-06, "loss": 0.1051, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 3.4860949516296387, "learning_rate": 6.25e-06, "loss": 0.102, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 2.90629506111145, "learning_rate": 6.26953125e-06, "loss": 0.0992, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 3.373619318008423, "learning_rate": 6.2890625e-06, "loss": 0.103, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 2.716407299041748, "learning_rate": 6.3085937500000005e-06, "loss": 0.1111, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 1.5577681064605713, "learning_rate": 6.3281250000000005e-06, "loss": 0.0951, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 1.1260861158370972, "learning_rate": 6.3476562500000006e-06, "loss": 0.1072, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 1.853028416633606, "learning_rate": 6.367187500000001e-06, "loss": 0.0998, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 1.216904878616333, "learning_rate": 6.386718750000001e-06, "loss": 0.0846, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 1.7740590572357178, "learning_rate": 6.406250000000001e-06, "loss": 0.1012, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 1.3835229873657227, "learning_rate": 6.425781250000001e-06, "loss": 0.0931, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 0.7738873362541199, "learning_rate": 6.445312500000001e-06, "loss": 0.0897, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 0.39759376645088196, "learning_rate": 6.464843750000001e-06, "loss": 0.0978, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 2.9466965198516846, "learning_rate": 6.484375000000001e-06, "loss": 0.0997, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 1.0792769193649292, "learning_rate": 6.50390625e-06, "loss": 0.1157, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 0.7943093776702881, "learning_rate": 6.5234375e-06, "loss": 0.0942, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 1.9633665084838867, "learning_rate": 6.54296875e-06, "loss": 0.1031, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 1.6843271255493164, "learning_rate": 6.5625e-06, "loss": 0.0934, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 1.7671217918395996, "learning_rate": 6.58203125e-06, "loss": 0.1138, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 0.5526740550994873, "learning_rate": 6.6015625e-06, "loss": 0.0671, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 1.5048056840896606, "learning_rate": 6.6210937500000004e-06, "loss": 0.0826, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 1.7511680126190186, "learning_rate": 6.6406250000000005e-06, "loss": 0.0994, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 0.3957027792930603, "learning_rate": 6.6601562500000005e-06, "loss": 0.0954, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 1.5948799848556519, "learning_rate": 6.679687500000001e-06, "loss": 0.105, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 0.3366754651069641, "learning_rate": 6.699218750000001e-06, "loss": 0.0866, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 0.41276153922080994, "learning_rate": 6.718750000000001e-06, "loss": 0.0756, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 0.4426693916320801, "learning_rate": 6.738281250000001e-06, "loss": 0.0852, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 1.6189439296722412, "learning_rate": 6.757812500000001e-06, "loss": 0.0991, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 0.8648014664649963, "learning_rate": 6.777343750000001e-06, "loss": 0.0894, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 1.311645746231079, "learning_rate": 6.796875000000001e-06, "loss": 0.1, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 0.630502462387085, "learning_rate": 6.816406250000001e-06, "loss": 0.0689, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 0.6845799088478088, "learning_rate": 6.8359375e-06, "loss": 0.0819, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 0.4520905315876007, "learning_rate": 6.85546875e-06, "loss": 0.0722, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 0.6726033687591553, "learning_rate": 6.875e-06, "loss": 0.0781, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 0.5412749648094177, "learning_rate": 6.89453125e-06, "loss": 0.0738, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 1.0463664531707764, "learning_rate": 6.9140625e-06, "loss": 0.0929, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 1.3673542737960815, "learning_rate": 6.93359375e-06, "loss": 0.0842, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 0.6364579200744629, "learning_rate": 6.9531250000000004e-06, "loss": 0.0873, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 0.5306549072265625, "learning_rate": 6.9726562500000005e-06, "loss": 0.0865, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 0.5433744192123413, "learning_rate": 6.9921875000000006e-06, "loss": 0.0904, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 0.7439037561416626, "learning_rate": 7.011718750000001e-06, "loss": 0.0942, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 0.4790016710758209, "learning_rate": 7.031250000000001e-06, "loss": 0.0887, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 1.1901979446411133, "learning_rate": 7.050781250000001e-06, "loss": 0.0912, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 0.907569169998169, "learning_rate": 7.070312500000001e-06, "loss": 0.0744, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 1.061004638671875, "learning_rate": 7.089843750000001e-06, "loss": 0.0905, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 1.104610562324524, "learning_rate": 7.109375000000001e-06, "loss": 0.0804, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 0.7802277207374573, "learning_rate": 7.128906250000001e-06, "loss": 0.0933, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 1.034757375717163, "learning_rate": 7.148437500000001e-06, "loss": 0.0907, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 1.0877426862716675, "learning_rate": 7.16796875e-06, "loss": 0.104, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 2.2016618251800537, "learning_rate": 7.1875e-06, "loss": 0.131, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 0.5993258357048035, "learning_rate": 7.20703125e-06, "loss": 0.078, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 0.9686576724052429, "learning_rate": 7.2265625e-06, "loss": 0.0988, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 0.39695701003074646, "learning_rate": 7.24609375e-06, "loss": 0.0683, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 1.2179231643676758, "learning_rate": 7.265625e-06, "loss": 0.1055, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 0.8560897707939148, "learning_rate": 7.2851562500000005e-06, "loss": 0.1044, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 0.8827965259552002, "learning_rate": 7.3046875000000005e-06, "loss": 0.0961, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 0.3205031752586365, "learning_rate": 7.3242187500000006e-06, "loss": 0.0921, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 0.4771062731742859, "learning_rate": 7.343750000000001e-06, "loss": 0.0748, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 0.46320006251335144, "learning_rate": 7.363281250000001e-06, "loss": 0.0706, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 1.0367357730865479, "learning_rate": 7.382812500000001e-06, "loss": 0.0944, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 0.42803052067756653, "learning_rate": 7.402343750000001e-06, "loss": 0.0722, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 0.8347897529602051, "learning_rate": 7.421875000000001e-06, "loss": 0.0784, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 1.1745778322219849, "learning_rate": 7.441406250000001e-06, "loss": 0.0928, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 1.0787360668182373, "learning_rate": 7.460937500000001e-06, "loss": 0.0768, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 0.7759567499160767, "learning_rate": 7.480468750000001e-06, "loss": 0.0963, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 0.258644163608551, "learning_rate": 7.500000000000001e-06, "loss": 0.0703, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 0.9315176606178284, "learning_rate": 7.51953125e-06, "loss": 0.0909, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 0.36836808919906616, "learning_rate": 7.5390625e-06, "loss": 0.0786, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 1.6158008575439453, "learning_rate": 7.55859375e-06, "loss": 0.1261, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 0.18771962821483612, "learning_rate": 7.578125e-06, "loss": 0.0738, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 0.686974823474884, "learning_rate": 7.5976562500000004e-06, "loss": 0.0754, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 0.38613879680633545, "learning_rate": 7.6171875000000005e-06, "loss": 0.0626, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 0.4756171405315399, "learning_rate": 7.63671875e-06, "loss": 0.0715, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 1.3220406770706177, "learning_rate": 7.656250000000001e-06, "loss": 0.0987, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 0.8586684465408325, "learning_rate": 7.67578125e-06, "loss": 0.085, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 0.8617177605628967, "learning_rate": 7.6953125e-06, "loss": 0.0794, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 0.39825716614723206, "learning_rate": 7.71484375e-06, "loss": 0.0553, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 0.8723248839378357, "learning_rate": 7.734375e-06, "loss": 0.0671, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 0.25883203744888306, "learning_rate": 7.753906250000001e-06, "loss": 0.0644, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 1.4946699142456055, "learning_rate": 7.7734375e-06, "loss": 0.0924, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 1.0306260585784912, "learning_rate": 7.792968750000001e-06, "loss": 0.0764, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 2.2745819091796875, "learning_rate": 7.8125e-06, "loss": 0.0882, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 0.24008168280124664, "learning_rate": 7.832031250000001e-06, "loss": 0.0705, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 0.8975988626480103, "learning_rate": 7.8515625e-06, "loss": 0.103, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 0.3926302492618561, "learning_rate": 7.871093750000001e-06, "loss": 0.0663, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 0.6575235724449158, "learning_rate": 7.890625e-06, "loss": 0.1037, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 1.7770209312438965, "learning_rate": 7.910156250000001e-06, "loss": 0.0787, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 1.4723460674285889, "learning_rate": 7.9296875e-06, "loss": 0.0833, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 0.39463406801223755, "learning_rate": 7.949218750000001e-06, "loss": 0.0787, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 0.6617352366447449, "learning_rate": 7.96875e-06, "loss": 0.0679, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 0.7348319888114929, "learning_rate": 7.988281250000001e-06, "loss": 0.0799, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 0.4457240104675293, "learning_rate": 8.0078125e-06, "loss": 0.0851, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 0.9478713274002075, "learning_rate": 8.02734375e-06, "loss": 0.0865, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 0.606083869934082, "learning_rate": 8.046875e-06, "loss": 0.0899, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 0.30984267592430115, "learning_rate": 8.06640625e-06, "loss": 0.0871, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 0.6757116913795471, "learning_rate": 8.085937500000001e-06, "loss": 0.0758, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 0.5953440070152283, "learning_rate": 8.10546875e-06, "loss": 0.0821, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 0.5044622421264648, "learning_rate": 8.125000000000001e-06, "loss": 0.0764, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 0.486581951379776, "learning_rate": 8.14453125e-06, "loss": 0.0945, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 0.4492497742176056, "learning_rate": 8.164062500000001e-06, "loss": 0.0676, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 0.5971747040748596, "learning_rate": 8.18359375e-06, "loss": 0.095, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 0.7309678792953491, "learning_rate": 8.203125000000001e-06, "loss": 0.082, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 0.5311971306800842, "learning_rate": 8.22265625e-06, "loss": 0.0765, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 1.0750926733016968, "learning_rate": 8.242187500000001e-06, "loss": 0.0828, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 0.6859942078590393, "learning_rate": 8.26171875e-06, "loss": 0.0802, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 0.4942722022533417, "learning_rate": 8.281250000000001e-06, "loss": 0.0842, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 1.6463786363601685, "learning_rate": 8.30078125e-06, "loss": 0.1038, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 0.5965959429740906, "learning_rate": 8.320312500000001e-06, "loss": 0.0941, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 1.3462581634521484, "learning_rate": 8.33984375e-06, "loss": 0.0834, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 1.7199170589447021, "learning_rate": 8.359375e-06, "loss": 0.0932, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 0.20677590370178223, "learning_rate": 8.37890625e-06, "loss": 0.07, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 0.8300001621246338, "learning_rate": 8.3984375e-06, "loss": 0.0804, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 0.22473149001598358, "learning_rate": 8.417968750000001e-06, "loss": 0.0674, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 0.33211854100227356, "learning_rate": 8.4375e-06, "loss": 0.0712, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 1.2813169956207275, "learning_rate": 8.457031250000001e-06, "loss": 0.1069, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 0.37439393997192383, "learning_rate": 8.4765625e-06, "loss": 0.0708, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 0.549589216709137, "learning_rate": 8.496093750000001e-06, "loss": 0.0772, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 0.348509818315506, "learning_rate": 8.515625e-06, "loss": 0.0786, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 0.6553314328193665, "learning_rate": 8.535156250000001e-06, "loss": 0.0731, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 0.45078328251838684, "learning_rate": 8.5546875e-06, "loss": 0.0761, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 0.5574279427528381, "learning_rate": 8.574218750000001e-06, "loss": 0.0925, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 0.5764532685279846, "learning_rate": 8.59375e-06, "loss": 0.0687, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 0.6837357878684998, "learning_rate": 8.613281250000001e-06, "loss": 0.0725, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 0.6926409602165222, "learning_rate": 8.6328125e-06, "loss": 0.076, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 0.6090654730796814, "learning_rate": 8.652343750000002e-06, "loss": 0.0756, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 0.9661151766777039, "learning_rate": 8.671875e-06, "loss": 0.0699, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 0.23082959651947021, "learning_rate": 8.69140625e-06, "loss": 0.0642, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 0.5208766460418701, "learning_rate": 8.7109375e-06, "loss": 0.0853, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 0.5792698264122009, "learning_rate": 8.73046875e-06, "loss": 0.0873, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 0.26105165481567383, "learning_rate": 8.750000000000001e-06, "loss": 0.0713, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 0.5843310952186584, "learning_rate": 8.76953125e-06, "loss": 0.0887, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 0.5321477055549622, "learning_rate": 8.789062500000001e-06, "loss": 0.0918, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 0.8663888573646545, "learning_rate": 8.80859375e-06, "loss": 0.0781, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 0.3772644102573395, "learning_rate": 8.828125000000001e-06, "loss": 0.0719, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 0.48802992701530457, "learning_rate": 8.84765625e-06, "loss": 0.0806, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 0.29011815786361694, "learning_rate": 8.867187500000001e-06, "loss": 0.0668, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 0.34528788924217224, "learning_rate": 8.88671875e-06, "loss": 0.0649, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 0.32204708456993103, "learning_rate": 8.906250000000001e-06, "loss": 0.0751, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 0.9025880098342896, "learning_rate": 8.92578125e-06, "loss": 0.0872, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 0.32949012517929077, "learning_rate": 8.945312500000001e-06, "loss": 0.075, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 0.6693001985549927, "learning_rate": 8.96484375e-06, "loss": 0.0775, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 0.9776612520217896, "learning_rate": 8.984375000000002e-06, "loss": 0.0869, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 0.5173477530479431, "learning_rate": 9.00390625e-06, "loss": 0.0684, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 0.6831491589546204, "learning_rate": 9.0234375e-06, "loss": 0.0683, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 0.43543848395347595, "learning_rate": 9.042968750000001e-06, "loss": 0.0725, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 0.23942022025585175, "learning_rate": 9.0625e-06, "loss": 0.075, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 0.8371548056602478, "learning_rate": 9.082031250000001e-06, "loss": 0.0894, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 0.27666836977005005, "learning_rate": 9.1015625e-06, "loss": 0.0705, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 0.23446024954319, "learning_rate": 9.121093750000001e-06, "loss": 0.062, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 0.6195204257965088, "learning_rate": 9.140625e-06, "loss": 0.0648, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 0.23461221158504486, "learning_rate": 9.160156250000001e-06, "loss": 0.0753, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 1.0777877569198608, "learning_rate": 9.1796875e-06, "loss": 0.0851, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 0.30955249071121216, "learning_rate": 9.199218750000001e-06, "loss": 0.0701, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 0.2564811408519745, "learning_rate": 9.21875e-06, "loss": 0.0821, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 0.943649411201477, "learning_rate": 9.238281250000001e-06, "loss": 0.0783, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 0.3192828297615051, "learning_rate": 9.2578125e-06, "loss": 0.0752, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 0.5889668464660645, "learning_rate": 9.277343750000001e-06, "loss": 0.0715, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 0.27542608976364136, "learning_rate": 9.296875e-06, "loss": 0.0716, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 0.3944077491760254, "learning_rate": 9.316406250000002e-06, "loss": 0.0639, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 0.3439261317253113, "learning_rate": 9.3359375e-06, "loss": 0.0824, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 0.6405535340309143, "learning_rate": 9.35546875e-06, "loss": 0.0727, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 0.2839511036872864, "learning_rate": 9.375000000000001e-06, "loss": 0.0819, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 0.30496376752853394, "learning_rate": 9.39453125e-06, "loss": 0.0792, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 0.3909819722175598, "learning_rate": 9.414062500000001e-06, "loss": 0.086, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 0.2786772847175598, "learning_rate": 9.43359375e-06, "loss": 0.0676, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 0.5376121401786804, "learning_rate": 9.453125000000001e-06, "loss": 0.0795, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 0.35610121488571167, "learning_rate": 9.47265625e-06, "loss": 0.0676, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 0.21355125308036804, "learning_rate": 9.492187500000001e-06, "loss": 0.0612, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 1.203452467918396, "learning_rate": 9.51171875e-06, "loss": 0.0809, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 0.31012779474258423, "learning_rate": 9.531250000000001e-06, "loss": 0.0679, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 0.5277306437492371, "learning_rate": 9.55078125e-06, "loss": 0.0717, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 0.8270869851112366, "learning_rate": 9.570312500000001e-06, "loss": 0.0685, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 0.16546498239040375, "learning_rate": 9.58984375e-06, "loss": 0.058, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 0.25332343578338623, "learning_rate": 9.609375000000001e-06, "loss": 0.061, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 0.5378696918487549, "learning_rate": 9.62890625e-06, "loss": 0.0977, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 0.4584948420524597, "learning_rate": 9.648437500000002e-06, "loss": 0.0724, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 0.3505585491657257, "learning_rate": 9.66796875e-06, "loss": 0.0632, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 0.35553300380706787, "learning_rate": 9.6875e-06, "loss": 0.068, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 0.2598213851451874, "learning_rate": 9.707031250000001e-06, "loss": 0.0678, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 0.6684858202934265, "learning_rate": 9.7265625e-06, "loss": 0.0689, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 0.3481522798538208, "learning_rate": 9.746093750000001e-06, "loss": 0.0663, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 1.2858556509017944, "learning_rate": 9.765625e-06, "loss": 0.0751, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 0.7194646596908569, "learning_rate": 9.785156250000001e-06, "loss": 0.0685, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 0.5670276880264282, "learning_rate": 9.8046875e-06, "loss": 0.0539, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 0.959624707698822, "learning_rate": 9.824218750000001e-06, "loss": 0.0783, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 0.31827056407928467, "learning_rate": 9.84375e-06, "loss": 0.0842, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 0.32843518257141113, "learning_rate": 9.863281250000001e-06, "loss": 0.0705, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 0.32881346344947815, "learning_rate": 9.8828125e-06, "loss": 0.0762, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 0.47554075717926025, "learning_rate": 9.902343750000001e-06, "loss": 0.077, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 0.27398163080215454, "learning_rate": 9.921875e-06, "loss": 0.0686, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 0.9075603485107422, "learning_rate": 9.941406250000002e-06, "loss": 0.0895, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 0.2974821627140045, "learning_rate": 9.9609375e-06, "loss": 0.092, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 0.2829720377922058, "learning_rate": 9.980468750000002e-06, "loss": 0.0755, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 0.3422648310661316, "learning_rate": 1e-05, "loss": 0.0655, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 0.307434618473053, "learning_rate": 9.999999444557077e-06, "loss": 0.0662, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 0.40517011284828186, "learning_rate": 9.999997778228428e-06, "loss": 0.0734, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 0.25487565994262695, "learning_rate": 9.999995001014424e-06, "loss": 0.0702, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 0.1970520317554474, "learning_rate": 9.999991112915685e-06, "loss": 0.0686, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 0.8457881808280945, "learning_rate": 9.999986113933071e-06, "loss": 0.0973, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 1.4761416912078857, "learning_rate": 9.999980004067694e-06, "loss": 0.0967, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 0.21639198064804077, "learning_rate": 9.99997278332091e-06, "loss": 0.0671, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 0.2911241352558136, "learning_rate": 9.999964451694328e-06, "loss": 0.0844, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 0.3027070164680481, "learning_rate": 9.999955009189795e-06, "loss": 0.0603, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 0.333798885345459, "learning_rate": 9.999944455809408e-06, "loss": 0.0673, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 0.2469066083431244, "learning_rate": 9.999932791555516e-06, "loss": 0.0609, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 0.33408477902412415, "learning_rate": 9.999920016430706e-06, "loss": 0.0799, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 0.2717113196849823, "learning_rate": 9.99990613043782e-06, "loss": 0.0802, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 0.2127266675233841, "learning_rate": 9.999891133579941e-06, "loss": 0.0647, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 0.23153534531593323, "learning_rate": 9.999875025860401e-06, "loss": 0.062, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 0.5054033994674683, "learning_rate": 9.99985780728278e-06, "loss": 0.0721, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 0.8351548314094543, "learning_rate": 9.999839477850903e-06, "loss": 0.0723, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 0.2393503040075302, "learning_rate": 9.999820037568844e-06, "loss": 0.0818, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 0.5086777806282043, "learning_rate": 9.999799486440917e-06, "loss": 0.0802, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 0.34324291348457336, "learning_rate": 9.999777824471694e-06, "loss": 0.0805, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 0.2886224687099457, "learning_rate": 9.999755051665985e-06, "loss": 0.0602, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 0.43416866660118103, "learning_rate": 9.99973116802885e-06, "loss": 0.0741, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 0.33439090847969055, "learning_rate": 9.999706173565594e-06, "loss": 0.072, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 0.6087399125099182, "learning_rate": 9.999680068281773e-06, "loss": 0.0723, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 0.3520873785018921, "learning_rate": 9.999652852183184e-06, "loss": 0.0791, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 0.49485716223716736, "learning_rate": 9.999624525275875e-06, "loss": 0.073, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 0.28606924414634705, "learning_rate": 9.99959508756614e-06, "loss": 0.0655, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 0.16347961127758026, "learning_rate": 9.99956453906052e-06, "loss": 0.0692, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 0.5480762720108032, "learning_rate": 9.999532879765801e-06, "loss": 0.0779, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 0.5112860202789307, "learning_rate": 9.999500109689018e-06, "loss": 0.0792, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 0.5088459849357605, "learning_rate": 9.999466228837452e-06, "loss": 0.0677, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 0.5212608575820923, "learning_rate": 9.999431237218629e-06, "loss": 0.0828, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 0.42111507058143616, "learning_rate": 9.999395134840323e-06, "loss": 0.0717, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 0.7046345472335815, "learning_rate": 9.999357921710557e-06, "loss": 0.0688, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 0.9656537771224976, "learning_rate": 9.999319597837599e-06, "loss": 0.0745, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 0.4382452964782715, "learning_rate": 9.99928016322996e-06, "loss": 0.0717, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 0.390513151884079, "learning_rate": 9.999239617896406e-06, "loss": 0.0869, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 0.6684224009513855, "learning_rate": 9.999197961845943e-06, "loss": 0.0733, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 0.7726281881332397, "learning_rate": 9.999155195087826e-06, "loss": 0.0673, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 0.46526479721069336, "learning_rate": 9.999111317631559e-06, "loss": 0.0806, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 0.43037235736846924, "learning_rate": 9.999066329486888e-06, "loss": 0.0686, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 0.371461421251297, "learning_rate": 9.999020230663809e-06, "loss": 0.0665, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 0.31313058733940125, "learning_rate": 9.998973021172564e-06, "loss": 0.0632, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 0.46056580543518066, "learning_rate": 9.998924701023645e-06, "loss": 0.0861, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 0.3750181496143341, "learning_rate": 9.998875270227781e-06, "loss": 0.0704, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 0.38398128747940063, "learning_rate": 9.99882472879596e-06, "loss": 0.0691, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 0.25918012857437134, "learning_rate": 9.998773076739409e-06, "loss": 0.0798, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 0.20077002048492432, "learning_rate": 9.998720314069606e-06, "loss": 0.0661, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 0.20485985279083252, "learning_rate": 9.99866644079827e-06, "loss": 0.0579, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 0.7875776886940002, "learning_rate": 9.998611456937373e-06, "loss": 0.0787, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 0.4693462550640106, "learning_rate": 9.99855536249913e-06, "loss": 0.0775, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 0.45938241481781006, "learning_rate": 9.998498157496004e-06, "loss": 0.0752, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 0.31756290793418884, "learning_rate": 9.998439841940706e-06, "loss": 0.07, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 0.21975305676460266, "learning_rate": 9.998380415846191e-06, "loss": 0.0683, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 0.22947803139686584, "learning_rate": 9.998319879225662e-06, "loss": 0.0644, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 0.2660812437534332, "learning_rate": 9.998258232092571e-06, "loss": 0.0789, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 0.24950997531414032, "learning_rate": 9.998195474460613e-06, "loss": 0.0714, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 0.20082105696201324, "learning_rate": 9.998131606343729e-06, "loss": 0.0745, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 0.18244269490242004, "learning_rate": 9.998066627756113e-06, "loss": 0.0717, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 0.22144366800785065, "learning_rate": 9.9980005387122e-06, "loss": 0.071, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 0.5746071934700012, "learning_rate": 9.997933339226675e-06, "loss": 0.0687, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 0.42938482761383057, "learning_rate": 9.997865029314464e-06, "loss": 0.0681, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 0.3178096413612366, "learning_rate": 9.997795608990749e-06, "loss": 0.0761, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 0.38649052381515503, "learning_rate": 9.99772507827095e-06, "loss": 0.0626, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 0.6353598237037659, "learning_rate": 9.997653437170739e-06, "loss": 0.0741, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 0.19021578133106232, "learning_rate": 9.997580685706032e-06, "loss": 0.0767, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 0.32874229550361633, "learning_rate": 9.997506823892993e-06, "loss": 0.078, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 0.3569730520248413, "learning_rate": 9.997431851748034e-06, "loss": 0.0618, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 0.16421523690223694, "learning_rate": 9.99735576928781e-06, "loss": 0.0655, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 0.2602977752685547, "learning_rate": 9.997278576529228e-06, "loss": 0.0556, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 0.24413453042507172, "learning_rate": 9.997200273489434e-06, "loss": 0.0639, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 0.2928454279899597, "learning_rate": 9.997120860185827e-06, "loss": 0.0765, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 0.2616055905818939, "learning_rate": 9.997040336636052e-06, "loss": 0.0609, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 0.32062095403671265, "learning_rate": 9.996958702857997e-06, "loss": 0.0717, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 0.3872973322868347, "learning_rate": 9.996875958869803e-06, "loss": 0.0735, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 0.3312254548072815, "learning_rate": 9.996792104689849e-06, "loss": 0.0581, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 0.25432220101356506, "learning_rate": 9.99670714033677e-06, "loss": 0.0747, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 0.23763450980186462, "learning_rate": 9.996621065829442e-06, "loss": 0.0603, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 0.22701150178909302, "learning_rate": 9.996533881186986e-06, "loss": 0.0694, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 0.30617454648017883, "learning_rate": 9.996445586428776e-06, "loss": 0.072, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 0.26625022292137146, "learning_rate": 9.996356181574425e-06, "loss": 0.057, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 0.2888217270374298, "learning_rate": 9.9962656666438e-06, "loss": 0.071, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 0.2204950451850891, "learning_rate": 9.996174041657012e-06, "loss": 0.0615, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 0.13854967057704926, "learning_rate": 9.996081306634416e-06, "loss": 0.0679, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 0.5071834921836853, "learning_rate": 9.995987461596617e-06, "loss": 0.0649, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 0.10590600222349167, "learning_rate": 9.995892506564461e-06, "loss": 0.056, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 0.22220443189144135, "learning_rate": 9.995796441559052e-06, "loss": 0.0654, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 0.3544573485851288, "learning_rate": 9.995699266601728e-06, "loss": 0.0675, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 0.7824773788452148, "learning_rate": 9.995600981714082e-06, "loss": 0.0773, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 0.475772500038147, "learning_rate": 9.995501586917949e-06, "loss": 0.0653, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 0.21665002405643463, "learning_rate": 9.99540108223541e-06, "loss": 0.0617, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 0.4099491834640503, "learning_rate": 9.9952994676888e-06, "loss": 0.0511, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 0.3150854706764221, "learning_rate": 9.995196743300693e-06, "loss": 0.073, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 0.20201274752616882, "learning_rate": 9.995092909093911e-06, "loss": 0.0678, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 0.1853351593017578, "learning_rate": 9.994987965091525e-06, "loss": 0.0601, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 0.22082747519016266, "learning_rate": 9.994881911316849e-06, "loss": 0.0625, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 0.13272465765476227, "learning_rate": 9.99477474779345e-06, "loss": 0.0594, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 0.3367891311645508, "learning_rate": 9.994666474545133e-06, "loss": 0.0824, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 0.49696457386016846, "learning_rate": 9.994557091595956e-06, "loss": 0.0775, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 0.3193632662296295, "learning_rate": 9.99444659897022e-06, "loss": 0.0587, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 0.16105099022388458, "learning_rate": 9.994334996692476e-06, "loss": 0.0626, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 0.20135925710201263, "learning_rate": 9.994222284787519e-06, "loss": 0.0634, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 0.3215431571006775, "learning_rate": 9.99410846328039e-06, "loss": 0.0707, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 0.22639484703540802, "learning_rate": 9.993993532196376e-06, "loss": 0.0686, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 0.170465350151062, "learning_rate": 9.993877491561015e-06, "loss": 0.0602, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 0.2311295121908188, "learning_rate": 9.99376034140009e-06, "loss": 0.0648, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 0.2748769223690033, "learning_rate": 9.993642081739623e-06, "loss": 0.0677, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 0.1898260861635208, "learning_rate": 9.993522712605895e-06, "loss": 0.0572, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 0.48516175150871277, "learning_rate": 9.993402234025422e-06, "loss": 0.0676, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 0.883155345916748, "learning_rate": 9.993280646024975e-06, "loss": 0.0856, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 0.3484979569911957, "learning_rate": 9.993157948631566e-06, "loss": 0.0748, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 0.34296268224716187, "learning_rate": 9.993034141872459e-06, "loss": 0.0628, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 0.45566171407699585, "learning_rate": 9.992909225775157e-06, "loss": 0.0748, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 0.2791772484779358, "learning_rate": 9.992783200367414e-06, "loss": 0.0739, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 0.28958454728126526, "learning_rate": 9.992656065677234e-06, "loss": 0.0736, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 0.2867238223552704, "learning_rate": 9.992527821732858e-06, "loss": 0.0537, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 0.3086458146572113, "learning_rate": 9.992398468562782e-06, "loss": 0.0622, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 0.2586793899536133, "learning_rate": 9.992268006195744e-06, "loss": 0.073, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 0.12805968523025513, "learning_rate": 9.992136434660733e-06, "loss": 0.0684, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 0.5480097532272339, "learning_rate": 9.992003753986976e-06, "loss": 0.0664, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 0.11143112927675247, "learning_rate": 9.991869964203955e-06, "loss": 0.0587, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 0.16738198697566986, "learning_rate": 9.991735065341394e-06, "loss": 0.0654, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 0.2313215285539627, "learning_rate": 9.991599057429266e-06, "loss": 0.0565, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 0.6348254680633545, "learning_rate": 9.991461940497786e-06, "loss": 0.0842, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 0.1594105064868927, "learning_rate": 9.991323714577421e-06, "loss": 0.0693, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 0.27286815643310547, "learning_rate": 9.99118437969888e-06, "loss": 0.0633, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 0.26407018303871155, "learning_rate": 9.99104393589312e-06, "loss": 0.067, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 0.3460780680179596, "learning_rate": 9.990902383191346e-06, "loss": 0.0725, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 0.32648995518684387, "learning_rate": 9.990759721625005e-06, "loss": 0.0587, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 0.10919462144374847, "learning_rate": 9.990615951225797e-06, "loss": 0.0637, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 0.6007724404335022, "learning_rate": 9.99047107202566e-06, "loss": 0.081, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 0.6738625764846802, "learning_rate": 9.990325084056787e-06, "loss": 0.076, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 0.19357089698314667, "learning_rate": 9.99017798735161e-06, "loss": 0.0599, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 0.5190718173980713, "learning_rate": 9.990029781942814e-06, "loss": 0.0719, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 0.4739391505718231, "learning_rate": 9.989880467863323e-06, "loss": 0.0736, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 0.11877908557653427, "learning_rate": 9.989730045146313e-06, "loss": 0.0571, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 0.33531472086906433, "learning_rate": 9.989578513825205e-06, "loss": 0.0704, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 0.16386152803897858, "learning_rate": 9.989425873933666e-06, "loss": 0.0596, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 0.20186340808868408, "learning_rate": 9.989272125505606e-06, "loss": 0.0584, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 0.3681241273880005, "learning_rate": 9.98911726857519e-06, "loss": 0.0716, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 0.14482703804969788, "learning_rate": 9.988961303176818e-06, "loss": 0.0564, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 0.2485005110502243, "learning_rate": 9.988804229345146e-06, "loss": 0.0682, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 0.23796312510967255, "learning_rate": 9.98864604711507e-06, "loss": 0.0598, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 0.20602256059646606, "learning_rate": 9.988486756521733e-06, "loss": 0.0733, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 0.16266320645809174, "learning_rate": 9.98832635760053e-06, "loss": 0.0561, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 0.5816202163696289, "learning_rate": 9.988164850387095e-06, "loss": 0.0712, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 0.2564311623573303, "learning_rate": 9.988002234917312e-06, "loss": 0.0774, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 0.19413109123706818, "learning_rate": 9.987838511227311e-06, "loss": 0.0699, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 0.26913022994995117, "learning_rate": 9.987673679353467e-06, "loss": 0.0661, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 0.22454896569252014, "learning_rate": 9.987507739332401e-06, "loss": 0.0771, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 0.36840108036994934, "learning_rate": 9.987340691200984e-06, "loss": 0.0607, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 0.2481817603111267, "learning_rate": 9.987172534996326e-06, "loss": 0.061, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 0.16671735048294067, "learning_rate": 9.98700327075579e-06, "loss": 0.0724, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 0.21849016845226288, "learning_rate": 9.986832898516985e-06, "loss": 0.0613, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 0.299607515335083, "learning_rate": 9.986661418317759e-06, "loss": 0.0697, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 0.25894302129745483, "learning_rate": 9.986488830196215e-06, "loss": 0.0662, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 0.1478806883096695, "learning_rate": 9.986315134190694e-06, "loss": 0.0608, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 0.2791368365287781, "learning_rate": 9.98614033033979e-06, "loss": 0.0725, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 0.35832637548446655, "learning_rate": 9.985964418682342e-06, "loss": 0.0676, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 0.17418646812438965, "learning_rate": 9.985787399257431e-06, "loss": 0.0556, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 0.26794227957725525, "learning_rate": 9.985609272104387e-06, "loss": 0.0671, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 0.14732183516025543, "learning_rate": 9.985430037262787e-06, "loss": 0.0664, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 0.5374524593353271, "learning_rate": 9.98524969477245e-06, "loss": 0.0677, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 0.402891606092453, "learning_rate": 9.985068244673449e-06, "loss": 0.0583, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 0.2691417336463928, "learning_rate": 9.984885687006093e-06, "loss": 0.0944, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 0.17511487007141113, "learning_rate": 9.984702021810944e-06, "loss": 0.0648, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 0.4689241051673889, "learning_rate": 9.98451724912881e-06, "loss": 0.0633, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 0.3026001751422882, "learning_rate": 9.984331369000739e-06, "loss": 0.0696, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 0.17035682499408722, "learning_rate": 9.984144381468035e-06, "loss": 0.0715, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 0.1732121855020523, "learning_rate": 9.983956286572238e-06, "loss": 0.0713, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 0.23125560581684113, "learning_rate": 9.983767084355141e-06, "loss": 0.0646, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 0.34436535835266113, "learning_rate": 9.983576774858776e-06, "loss": 0.0603, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 0.5508801341056824, "learning_rate": 9.983385358125432e-06, "loss": 0.0702, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 0.34866368770599365, "learning_rate": 9.983192834197633e-06, "loss": 0.0698, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 0.2983378469944, "learning_rate": 9.982999203118153e-06, "loss": 0.0617, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 0.34938088059425354, "learning_rate": 9.982804464930016e-06, "loss": 0.0752, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 0.3541445732116699, "learning_rate": 9.982608619676485e-06, "loss": 0.0736, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 0.3485894203186035, "learning_rate": 9.982411667401076e-06, "loss": 0.0588, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 0.3548313081264496, "learning_rate": 9.982213608147541e-06, "loss": 0.0789, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 0.23557603359222412, "learning_rate": 9.982014441959891e-06, "loss": 0.0676, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 0.4336819350719452, "learning_rate": 9.98181416888237e-06, "loss": 0.0544, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 0.18855924904346466, "learning_rate": 9.981612788959481e-06, "loss": 0.0696, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 0.21622717380523682, "learning_rate": 9.981410302235962e-06, "loss": 0.0667, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 0.32856518030166626, "learning_rate": 9.9812067087568e-06, "loss": 0.0713, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 0.288418710231781, "learning_rate": 9.98100200856723e-06, "loss": 0.0668, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 0.12429755181074142, "learning_rate": 9.980796201712734e-06, "loss": 0.0685, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 0.26470378041267395, "learning_rate": 9.980589288239034e-06, "loss": 0.0615, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 0.25604113936424255, "learning_rate": 9.980381268192103e-06, "loss": 0.0685, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 0.43434834480285645, "learning_rate": 9.980172141618159e-06, "loss": 0.076, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 0.236979678273201, "learning_rate": 9.979961908563663e-06, "loss": 0.0654, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 0.24735423922538757, "learning_rate": 9.979750569075325e-06, "loss": 0.0606, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 0.46090269088745117, "learning_rate": 9.979538123200102e-06, "loss": 0.0768, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 0.5463074445724487, "learning_rate": 9.979324570985194e-06, "loss": 0.0745, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 0.139039546251297, "learning_rate": 9.979109912478044e-06, "loss": 0.0692, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 0.6088494658470154, "learning_rate": 9.978894147726346e-06, "loss": 0.0863, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 0.10323652625083923, "learning_rate": 9.97867727677804e-06, "loss": 0.052, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 0.16749493777751923, "learning_rate": 9.978459299681306e-06, "loss": 0.0724, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 0.19910065829753876, "learning_rate": 9.978240216484579e-06, "loss": 0.0658, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 0.11937715858221054, "learning_rate": 9.978020027236529e-06, "loss": 0.0629, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 0.12178565561771393, "learning_rate": 9.977798731986079e-06, "loss": 0.0568, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 0.20190706849098206, "learning_rate": 9.977576330782397e-06, "loss": 0.0667, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 0.104694664478302, "learning_rate": 9.977352823674893e-06, "loss": 0.0633, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 0.6544679403305054, "learning_rate": 9.977128210713227e-06, "loss": 0.0875, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 0.1876298189163208, "learning_rate": 9.976902491947303e-06, "loss": 0.0607, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 0.20566023886203766, "learning_rate": 9.976675667427268e-06, "loss": 0.0626, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 0.3420122265815735, "learning_rate": 9.976447737203521e-06, "loss": 0.0741, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 0.1830596923828125, "learning_rate": 9.976218701326701e-06, "loss": 0.0597, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 0.1563132107257843, "learning_rate": 9.975988559847693e-06, "loss": 0.0661, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 0.08359020948410034, "learning_rate": 9.975757312817634e-06, "loss": 0.0547, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 0.11874379962682724, "learning_rate": 9.975524960287895e-06, "loss": 0.0599, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 0.26406776905059814, "learning_rate": 9.975291502310105e-06, "loss": 0.0593, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 0.15652814507484436, "learning_rate": 9.975056938936129e-06, "loss": 0.0593, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 0.19168223440647125, "learning_rate": 9.974821270218086e-06, "loss": 0.0679, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 0.35224664211273193, "learning_rate": 9.974584496208334e-06, "loss": 0.0724, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 0.14837811887264252, "learning_rate": 9.974346616959476e-06, "loss": 0.0745, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 0.18856723606586456, "learning_rate": 9.974107632524368e-06, "loss": 0.075, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 0.14400970935821533, "learning_rate": 9.973867542956104e-06, "loss": 0.0587, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 0.21298417448997498, "learning_rate": 9.973626348308027e-06, "loss": 0.0691, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 0.21018731594085693, "learning_rate": 9.973384048633728e-06, "loss": 0.0615, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 0.16097280383110046, "learning_rate": 9.973140643987034e-06, "loss": 0.0633, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 0.460025429725647, "learning_rate": 9.97289613442203e-06, "loss": 0.0838, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 0.12238900363445282, "learning_rate": 9.972650519993037e-06, "loss": 0.0654, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 0.1143098846077919, "learning_rate": 9.972403800754626e-06, "loss": 0.0608, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 0.31963130831718445, "learning_rate": 9.972155976761613e-06, "loss": 0.0675, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 0.2624821066856384, "learning_rate": 9.971907048069058e-06, "loss": 0.0638, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 0.16319581866264343, "learning_rate": 9.971657014732268e-06, "loss": 0.0765, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 0.15907008945941925, "learning_rate": 9.971405876806792e-06, "loss": 0.0555, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 0.20824238657951355, "learning_rate": 9.971153634348431e-06, "loss": 0.0625, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 0.21576674282550812, "learning_rate": 9.970900287413225e-06, "loss": 0.0595, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 0.19350719451904297, "learning_rate": 9.970645836057464e-06, "loss": 0.0789, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 0.2772654891014099, "learning_rate": 9.970390280337681e-06, "loss": 0.0606, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 0.10858184099197388, "learning_rate": 9.970133620310652e-06, "loss": 0.0644, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 0.2750256359577179, "learning_rate": 9.969875856033402e-06, "loss": 0.0537, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 0.22457242012023926, "learning_rate": 9.969616987563202e-06, "loss": 0.0628, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 0.26538771390914917, "learning_rate": 9.969357014957564e-06, "loss": 0.0468, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 0.16110704839229584, "learning_rate": 9.969095938274251e-06, "loss": 0.0667, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 0.1862396001815796, "learning_rate": 9.968833757571268e-06, "loss": 0.0626, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 0.11955965310335159, "learning_rate": 9.968570472906862e-06, "loss": 0.0601, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 0.17265619337558746, "learning_rate": 9.968306084339534e-06, "loss": 0.0668, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 0.45380550622940063, "learning_rate": 9.96804059192802e-06, "loss": 0.072, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 0.1560068130493164, "learning_rate": 9.96777399573131e-06, "loss": 0.0528, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 0.262149453163147, "learning_rate": 9.967506295808634e-06, "loss": 0.068, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 0.14070959389209747, "learning_rate": 9.96723749221947e-06, "loss": 0.0567, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 0.16073502600193024, "learning_rate": 9.96696758502354e-06, "loss": 0.0636, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 0.20051845908164978, "learning_rate": 9.966696574280808e-06, "loss": 0.0578, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 0.26265639066696167, "learning_rate": 9.966424460051489e-06, "loss": 0.0814, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 0.2630269527435303, "learning_rate": 9.96615124239604e-06, "loss": 0.0645, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 0.4198019802570343, "learning_rate": 9.965876921375165e-06, "loss": 0.0638, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 0.15232129395008087, "learning_rate": 9.965601497049812e-06, "loss": 0.0529, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 0.21151717007160187, "learning_rate": 9.965324969481172e-06, "loss": 0.0665, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 0.21839646995067596, "learning_rate": 9.965047338730685e-06, "loss": 0.0703, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 0.3060857057571411, "learning_rate": 9.964768604860033e-06, "loss": 0.0675, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 0.23859061300754547, "learning_rate": 9.964488767931144e-06, "loss": 0.0637, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 0.2014082372188568, "learning_rate": 9.964207828006191e-06, "loss": 0.0786, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 0.15632256865501404, "learning_rate": 9.963925785147595e-06, "loss": 0.0671, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 0.12398216873407364, "learning_rate": 9.963642639418018e-06, "loss": 0.0691, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 0.24457117915153503, "learning_rate": 9.963358390880367e-06, "loss": 0.0704, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 0.21781039237976074, "learning_rate": 9.963073039597798e-06, "loss": 0.0637, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 0.23226183652877808, "learning_rate": 9.962786585633708e-06, "loss": 0.0614, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 0.1495114266872406, "learning_rate": 9.962499029051742e-06, "loss": 0.0558, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 0.11129950731992722, "learning_rate": 9.962210369915787e-06, "loss": 0.0631, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 0.3463837802410126, "learning_rate": 9.961920608289977e-06, "loss": 0.0664, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 0.13874255120754242, "learning_rate": 9.96162974423869e-06, "loss": 0.0561, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 0.19912952184677124, "learning_rate": 9.961337777826549e-06, "loss": 0.074, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 0.3118946850299835, "learning_rate": 9.961044709118425e-06, "loss": 0.0679, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 0.17632941901683807, "learning_rate": 9.960750538179428e-06, "loss": 0.0611, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 0.16707774996757507, "learning_rate": 9.960455265074918e-06, "loss": 0.0717, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 0.15059629082679749, "learning_rate": 9.960158889870495e-06, "loss": 0.058, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 0.23729129135608673, "learning_rate": 9.959861412632011e-06, "loss": 0.0648, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 0.18790262937545776, "learning_rate": 9.959562833425557e-06, "loss": 0.0706, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 0.15660767257213593, "learning_rate": 9.95926315231747e-06, "loss": 0.076, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 0.26271963119506836, "learning_rate": 9.958962369374333e-06, "loss": 0.0639, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 0.2580389380455017, "learning_rate": 9.95866048466297e-06, "loss": 0.0694, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 0.15766389667987823, "learning_rate": 9.958357498250457e-06, "loss": 0.0671, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 0.13060270249843597, "learning_rate": 9.95805341020411e-06, "loss": 0.0648, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 0.09915348142385483, "learning_rate": 9.957748220591487e-06, "loss": 0.0651, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 0.27039235830307007, "learning_rate": 9.9574419294804e-06, "loss": 0.0692, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 0.11356810480356216, "learning_rate": 9.957134536938894e-06, "loss": 0.0537, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 0.19104036688804626, "learning_rate": 9.956826043035268e-06, "loss": 0.0656, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 0.31994011998176575, "learning_rate": 9.956516447838063e-06, "loss": 0.0691, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 0.3229871690273285, "learning_rate": 9.95620575141606e-06, "loss": 0.0681, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 0.1797296702861786, "learning_rate": 9.955893953838293e-06, "loss": 0.0633, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 0.09580152481794357, "learning_rate": 9.955581055174034e-06, "loss": 0.0655, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 0.14433415234088898, "learning_rate": 9.9552670554928e-06, "loss": 0.0666, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 0.178819939494133, "learning_rate": 9.954951954864361e-06, "loss": 0.0549, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 0.2438785582780838, "learning_rate": 9.954635753358718e-06, "loss": 0.0731, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 0.14678087830543518, "learning_rate": 9.954318451046128e-06, "loss": 0.0528, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 0.2966175675392151, "learning_rate": 9.954000047997088e-06, "loss": 0.074, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 0.12414100021123886, "learning_rate": 9.953680544282338e-06, "loss": 0.0602, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 0.10310539603233337, "learning_rate": 9.953359939972866e-06, "loss": 0.0644, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 0.35076138377189636, "learning_rate": 9.953038235139902e-06, "loss": 0.0637, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 0.07930601388216019, "learning_rate": 9.952715429854923e-06, "loss": 0.05, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 0.23477379977703094, "learning_rate": 9.952391524189646e-06, "loss": 0.0641, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 0.15283626317977905, "learning_rate": 9.952066518216039e-06, "loss": 0.0594, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 0.19218508899211884, "learning_rate": 9.951740412006308e-06, "loss": 0.0576, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 0.26756763458251953, "learning_rate": 9.95141320563291e-06, "loss": 0.0572, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 0.13634531199932098, "learning_rate": 9.951084899168537e-06, "loss": 0.0511, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 0.36252808570861816, "learning_rate": 9.950755492686138e-06, "loss": 0.061, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 0.1169428899884224, "learning_rate": 9.950424986258893e-06, "loss": 0.065, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 0.21692967414855957, "learning_rate": 9.950093379960238e-06, "loss": 0.0581, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 0.20237968862056732, "learning_rate": 9.949760673863846e-06, "loss": 0.0556, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 0.16441026329994202, "learning_rate": 9.949426868043638e-06, "loss": 0.0617, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 0.11673978716135025, "learning_rate": 9.949091962573775e-06, "loss": 0.0552, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 0.13296225666999817, "learning_rate": 9.94875595752867e-06, "loss": 0.054, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 0.19055138528347015, "learning_rate": 9.948418852982973e-06, "loss": 0.0619, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 0.20321059226989746, "learning_rate": 9.948080649011582e-06, "loss": 0.0738, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 0.11705703288316727, "learning_rate": 9.947741345689635e-06, "loss": 0.0619, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 0.3448009192943573, "learning_rate": 9.947400943092522e-06, "loss": 0.0735, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 0.2872038781642914, "learning_rate": 9.94705944129587e-06, "loss": 0.0708, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 0.10818947851657867, "learning_rate": 9.946716840375552e-06, "loss": 0.0622, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 0.2151806801557541, "learning_rate": 9.946373140407688e-06, "loss": 0.0756, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 0.34472256898880005, "learning_rate": 9.946028341468642e-06, "loss": 0.0746, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 0.12072296440601349, "learning_rate": 9.945682443635015e-06, "loss": 0.0488, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 0.12026335299015045, "learning_rate": 9.945335446983662e-06, "loss": 0.058, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 0.1249072253704071, "learning_rate": 9.944987351591677e-06, "loss": 0.0574, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 0.2706172466278076, "learning_rate": 9.944638157536399e-06, "loss": 0.0623, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 0.2100857049226761, "learning_rate": 9.94428786489541e-06, "loss": 0.0778, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 0.14325763285160065, "learning_rate": 9.943936473746539e-06, "loss": 0.0621, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 0.17802830040454865, "learning_rate": 9.943583984167853e-06, "loss": 0.0807, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 0.1874418407678604, "learning_rate": 9.94323039623767e-06, "loss": 0.0558, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 0.14262698590755463, "learning_rate": 9.942875710034549e-06, "loss": 0.0658, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 0.23345832526683807, "learning_rate": 9.942519925637293e-06, "loss": 0.0633, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 0.1243460401892662, "learning_rate": 9.942163043124951e-06, "loss": 0.0598, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 0.18604415655136108, "learning_rate": 9.941805062576811e-06, "loss": 0.0584, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 0.15850841999053955, "learning_rate": 9.941445984072408e-06, "loss": 0.0652, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 0.1762789934873581, "learning_rate": 9.941085807691524e-06, "loss": 0.0494, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 0.44905710220336914, "learning_rate": 9.94072453351418e-06, "loss": 0.0671, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 0.12595324218273163, "learning_rate": 9.940362161620644e-06, "loss": 0.057, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 0.08148655295372009, "learning_rate": 9.939998692091427e-06, "loss": 0.0631, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 0.13432787358760834, "learning_rate": 9.939634125007279e-06, "loss": 0.0589, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 0.14791490137577057, "learning_rate": 9.939268460449205e-06, "loss": 0.0601, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 0.17448103427886963, "learning_rate": 9.938901698498444e-06, "loss": 0.0775, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 0.20801030099391937, "learning_rate": 9.938533839236483e-06, "loss": 0.0682, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 0.19613459706306458, "learning_rate": 9.938164882745051e-06, "loss": 0.0794, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 0.13219910860061646, "learning_rate": 9.937794829106122e-06, "loss": 0.0688, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 0.18955564498901367, "learning_rate": 9.937423678401913e-06, "loss": 0.0513, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 0.19209711253643036, "learning_rate": 9.937051430714888e-06, "loss": 0.0557, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 0.14002352952957153, "learning_rate": 9.936678086127749e-06, "loss": 0.0518, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 0.1758090704679489, "learning_rate": 9.936303644723446e-06, "loss": 0.0641, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 0.29259905219078064, "learning_rate": 9.93592810658517e-06, "loss": 0.0647, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 0.3268146216869354, "learning_rate": 9.935551471796358e-06, "loss": 0.0646, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 0.13680067658424377, "learning_rate": 9.935173740440692e-06, "loss": 0.0625, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 0.10641245543956757, "learning_rate": 9.93479491260209e-06, "loss": 0.0594, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 0.14655427634716034, "learning_rate": 9.934414988364722e-06, "loss": 0.0705, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 0.19448672235012054, "learning_rate": 9.934033967812998e-06, "loss": 0.0613, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 0.31608954071998596, "learning_rate": 9.933651851031573e-06, "loss": 0.0712, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 0.14354972541332245, "learning_rate": 9.933268638105345e-06, "loss": 0.0689, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 0.1422015279531479, "learning_rate": 9.932884329119452e-06, "loss": 0.0702, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 0.36151570081710815, "learning_rate": 9.932498924159281e-06, "loss": 0.0621, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 0.11206286400556564, "learning_rate": 9.93211242331046e-06, "loss": 0.0619, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 0.21981729567050934, "learning_rate": 9.931724826658861e-06, "loss": 0.0671, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 0.3377426266670227, "learning_rate": 9.931336134290598e-06, "loss": 0.0864, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 0.23964527249336243, "learning_rate": 9.930946346292032e-06, "loss": 0.0676, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 0.2784067690372467, "learning_rate": 9.930555462749762e-06, "loss": 0.0568, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 0.20535778999328613, "learning_rate": 9.930163483750636e-06, "loss": 0.0626, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 0.14969690144062042, "learning_rate": 9.92977040938174e-06, "loss": 0.066, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 0.4052814841270447, "learning_rate": 9.929376239730408e-06, "loss": 0.0647, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 0.19101139903068542, "learning_rate": 9.928980974884215e-06, "loss": 0.0805, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 0.2976991534233093, "learning_rate": 9.928584614930981e-06, "loss": 0.0753, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 0.11786027252674103, "learning_rate": 9.928187159958764e-06, "loss": 0.0604, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 0.1552998274564743, "learning_rate": 9.927788610055875e-06, "loss": 0.0724, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 0.12976153194904327, "learning_rate": 9.92738896531086e-06, "loss": 0.0572, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 0.1763717085123062, "learning_rate": 9.926988225812511e-06, "loss": 0.0648, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 0.14647836983203888, "learning_rate": 9.926586391649863e-06, "loss": 0.0506, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 0.11310587078332901, "learning_rate": 9.926183462912196e-06, "loss": 0.0586, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 0.12231869995594025, "learning_rate": 9.925779439689028e-06, "loss": 0.0681, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 0.12905502319335938, "learning_rate": 9.925374322070126e-06, "loss": 0.0664, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 0.12131915241479874, "learning_rate": 9.9249681101455e-06, "loss": 0.0665, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 0.10328512638807297, "learning_rate": 9.924560804005397e-06, "loss": 0.0505, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 0.1702745258808136, "learning_rate": 9.924152403740315e-06, "loss": 0.0598, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 0.1705143004655838, "learning_rate": 9.923742909440987e-06, "loss": 0.065, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 0.23443137109279633, "learning_rate": 9.923332321198396e-06, "loss": 0.0576, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 0.2216503769159317, "learning_rate": 9.922920639103766e-06, "loss": 0.0628, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 0.47200679779052734, "learning_rate": 9.92250786324856e-06, "loss": 0.0709, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 0.18233835697174072, "learning_rate": 9.922093993724492e-06, "loss": 0.0574, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 0.49692538380622864, "learning_rate": 9.92167903062351e-06, "loss": 0.0653, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 0.4492480456829071, "learning_rate": 9.921262974037813e-06, "loss": 0.075, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 0.22590376436710358, "learning_rate": 9.920845824059836e-06, "loss": 0.0569, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 0.13267886638641357, "learning_rate": 9.920427580782263e-06, "loss": 0.0541, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 0.4452623426914215, "learning_rate": 9.920008244298016e-06, "loss": 0.074, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 0.10106030851602554, "learning_rate": 9.919587814700262e-06, "loss": 0.0538, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 0.1941806823015213, "learning_rate": 9.919166292082414e-06, "loss": 0.0681, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 0.20108845829963684, "learning_rate": 9.91874367653812e-06, "loss": 0.0622, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 0.1337002068758011, "learning_rate": 9.91831996816128e-06, "loss": 0.0656, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 0.17049017548561096, "learning_rate": 9.917895167046027e-06, "loss": 0.0656, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 0.12331446260213852, "learning_rate": 9.917469273286749e-06, "loss": 0.0629, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 0.22337855398654938, "learning_rate": 9.917042286978064e-06, "loss": 0.0551, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 0.11044814437627792, "learning_rate": 9.916614208214841e-06, "loss": 0.0659, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 0.1931377500295639, "learning_rate": 9.91618503709219e-06, "loss": 0.0617, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 0.1668328493833542, "learning_rate": 9.915754773705461e-06, "loss": 0.0589, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 0.0960727110505104, "learning_rate": 9.915323418150252e-06, "loss": 0.062, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 0.14470407366752625, "learning_rate": 9.914890970522397e-06, "loss": 0.0691, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 0.2168419063091278, "learning_rate": 9.914457430917977e-06, "loss": 0.0767, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 0.138703390955925, "learning_rate": 9.914022799433315e-06, "loss": 0.0655, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 0.10027100145816803, "learning_rate": 9.913587076164976e-06, "loss": 0.0568, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 0.34620144963264465, "learning_rate": 9.913150261209767e-06, "loss": 0.0715, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 0.13244691491127014, "learning_rate": 9.91271235466474e-06, "loss": 0.0527, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 0.30651313066482544, "learning_rate": 9.912273356627188e-06, "loss": 0.0599, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 0.21463845670223236, "learning_rate": 9.911833267194643e-06, "loss": 0.077, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 0.06626396626234055, "learning_rate": 9.911392086464886e-06, "loss": 0.0581, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 0.11070942878723145, "learning_rate": 9.910949814535936e-06, "loss": 0.0624, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 0.24647875130176544, "learning_rate": 9.910506451506056e-06, "loss": 0.0584, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 0.10982261598110199, "learning_rate": 9.910061997473753e-06, "loss": 0.0558, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 0.11431637406349182, "learning_rate": 9.909616452537772e-06, "loss": 0.0615, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 0.09679293632507324, "learning_rate": 9.909169816797102e-06, "loss": 0.0598, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 0.1297469586133957, "learning_rate": 9.908722090350979e-06, "loss": 0.0594, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 0.10869890451431274, "learning_rate": 9.908273273298874e-06, "loss": 0.0649, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 0.10731326043605804, "learning_rate": 9.907823365740507e-06, "loss": 0.0594, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 0.12142162770032883, "learning_rate": 9.907372367775834e-06, "loss": 0.0574, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 0.11591633409261703, "learning_rate": 9.906920279505058e-06, "loss": 0.0579, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 0.33519354462623596, "learning_rate": 9.906467101028625e-06, "loss": 0.0757, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 0.2909300923347473, "learning_rate": 9.906012832447219e-06, "loss": 0.0714, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 0.16251471638679504, "learning_rate": 9.905557473861764e-06, "loss": 0.0567, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 0.18168160319328308, "learning_rate": 9.905101025373438e-06, "loss": 0.06, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 0.08110957592725754, "learning_rate": 9.904643487083648e-06, "loss": 0.0512, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 0.13370802998542786, "learning_rate": 9.90418485909405e-06, "loss": 0.0604, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 0.11894714832305908, "learning_rate": 9.903725141506539e-06, "loss": 0.0509, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 0.10287127643823624, "learning_rate": 9.903264334423258e-06, "loss": 0.0457, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 0.1030578464269638, "learning_rate": 9.902802437946584e-06, "loss": 0.0562, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 0.14378179609775543, "learning_rate": 9.902339452179142e-06, "loss": 0.0726, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 0.12883642315864563, "learning_rate": 9.901875377223796e-06, "loss": 0.0538, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 0.1451183706521988, "learning_rate": 9.901410213183653e-06, "loss": 0.0656, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 0.24463386833667755, "learning_rate": 9.900943960162061e-06, "loss": 0.0838, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 0.132344588637352, "learning_rate": 9.900476618262612e-06, "loss": 0.0615, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 0.21368573606014252, "learning_rate": 9.900008187589138e-06, "loss": 0.0513, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 0.18858206272125244, "learning_rate": 9.899538668245713e-06, "loss": 0.0546, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 0.13338884711265564, "learning_rate": 9.899068060336656e-06, "loss": 0.0626, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 0.1417304128408432, "learning_rate": 9.898596363966523e-06, "loss": 0.0673, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 0.14845922589302063, "learning_rate": 9.898123579240115e-06, "loss": 0.0636, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 0.13543111085891724, "learning_rate": 9.897649706262474e-06, "loss": 0.0643, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 0.37563520669937134, "learning_rate": 9.897174745138883e-06, "loss": 0.067, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 0.12612587213516235, "learning_rate": 9.896698695974866e-06, "loss": 0.053, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 0.13495926558971405, "learning_rate": 9.896221558876195e-06, "loss": 0.0752, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 0.3785794675350189, "learning_rate": 9.895743333948875e-06, "loss": 0.0753, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 0.11455098539590836, "learning_rate": 9.895264021299158e-06, "loss": 0.0555, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 0.12753097712993622, "learning_rate": 9.894783621033538e-06, "loss": 0.0665, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 0.293069452047348, "learning_rate": 9.894302133258747e-06, "loss": 0.0838, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 0.10868740826845169, "learning_rate": 9.893819558081759e-06, "loss": 0.0627, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 0.16716448962688446, "learning_rate": 9.893335895609792e-06, "loss": 0.0636, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 0.13632145524024963, "learning_rate": 9.892851145950308e-06, "loss": 0.0561, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 0.1462588608264923, "learning_rate": 9.892365309211005e-06, "loss": 0.0606, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 0.15149500966072083, "learning_rate": 9.891878385499825e-06, "loss": 0.0593, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 0.17113947868347168, "learning_rate": 9.891390374924949e-06, "loss": 0.0613, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 0.20750883221626282, "learning_rate": 9.890901277594806e-06, "loss": 0.0651, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 0.298068106174469, "learning_rate": 9.89041109361806e-06, "loss": 0.0676, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 0.1947329193353653, "learning_rate": 9.889919823103618e-06, "loss": 0.0638, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 0.2318676859140396, "learning_rate": 9.889427466160633e-06, "loss": 0.0708, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 0.28227633237838745, "learning_rate": 9.888934022898488e-06, "loss": 0.064, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 0.305225133895874, "learning_rate": 9.888439493426824e-06, "loss": 0.0619, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 0.1762465536594391, "learning_rate": 9.887943877855505e-06, "loss": 0.0566, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 0.10540644824504852, "learning_rate": 9.887447176294653e-06, "loss": 0.0636, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 0.1605265736579895, "learning_rate": 9.88694938885462e-06, "loss": 0.0576, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 0.18294094502925873, "learning_rate": 9.886450515646005e-06, "loss": 0.0634, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 0.18944856524467468, "learning_rate": 9.885950556779644e-06, "loss": 0.0692, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 0.15448077023029327, "learning_rate": 9.885449512366617e-06, "loss": 0.0649, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 0.11163438856601715, "learning_rate": 9.884947382518247e-06, "loss": 0.0569, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 0.11259389668703079, "learning_rate": 9.88444416734609e-06, "loss": 0.0624, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 0.19734534621238708, "learning_rate": 9.883939866961956e-06, "loss": 0.0534, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 0.14896391332149506, "learning_rate": 9.883434481477885e-06, "loss": 0.0695, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 0.20819157361984253, "learning_rate": 9.882928011006163e-06, "loss": 0.0686, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 0.1401354819536209, "learning_rate": 9.882420455659316e-06, "loss": 0.0705, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 0.12186621874570847, "learning_rate": 9.881911815550111e-06, "loss": 0.0681, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 0.12804366648197174, "learning_rate": 9.881402090791556e-06, "loss": 0.0619, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 0.16693753004074097, "learning_rate": 9.880891281496901e-06, "loss": 0.0752, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 0.14233076572418213, "learning_rate": 9.880379387779637e-06, "loss": 0.0544, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 0.21483980119228363, "learning_rate": 9.879866409753493e-06, "loss": 0.0774, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 0.1446903944015503, "learning_rate": 9.879352347532442e-06, "loss": 0.0689, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 0.1751873940229416, "learning_rate": 9.878837201230697e-06, "loss": 0.0592, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 0.16801853477954865, "learning_rate": 9.878320970962712e-06, "loss": 0.0723, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 0.11921393871307373, "learning_rate": 9.877803656843182e-06, "loss": 0.0542, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 0.16606928408145905, "learning_rate": 9.877285258987039e-06, "loss": 0.0558, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 0.12866249680519104, "learning_rate": 9.876765777509463e-06, "loss": 0.0584, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 0.08070845901966095, "learning_rate": 9.87624521252587e-06, "loss": 0.0639, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 0.14054515957832336, "learning_rate": 9.875723564151918e-06, "loss": 0.0682, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 0.22316615283489227, "learning_rate": 9.875200832503505e-06, "loss": 0.0616, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 0.13511516153812408, "learning_rate": 9.874677017696769e-06, "loss": 0.0673, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 0.11388219892978668, "learning_rate": 9.87415211984809e-06, "loss": 0.0626, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 0.10045699030160904, "learning_rate": 9.873626139074088e-06, "loss": 0.0669, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 0.2370460331439972, "learning_rate": 9.873099075491626e-06, "loss": 0.0546, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 0.11635512113571167, "learning_rate": 9.872570929217804e-06, "loss": 0.0569, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 0.10984958708286285, "learning_rate": 9.872041700369965e-06, "loss": 0.0621, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 0.30887946486473083, "learning_rate": 9.871511389065689e-06, "loss": 0.0598, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 0.3602089285850525, "learning_rate": 9.870979995422803e-06, "loss": 0.0746, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 0.12381267547607422, "learning_rate": 9.870447519559366e-06, "loss": 0.0593, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 0.10142610222101212, "learning_rate": 9.869913961593685e-06, "loss": 0.0478, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 0.27200353145599365, "learning_rate": 9.869379321644306e-06, "loss": 0.0789, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 0.22954724729061127, "learning_rate": 9.868843599830009e-06, "loss": 0.0685, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 0.29330578446388245, "learning_rate": 9.868306796269822e-06, "loss": 0.0655, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 0.18471679091453552, "learning_rate": 9.86776891108301e-06, "loss": 0.0532, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 0.3636901378631592, "learning_rate": 9.86722994438908e-06, "loss": 0.086, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 0.1179409921169281, "learning_rate": 9.866689896307778e-06, "loss": 0.0518, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 0.20418959856033325, "learning_rate": 9.866148766959087e-06, "loss": 0.0675, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 0.20037396252155304, "learning_rate": 9.865606556463239e-06, "loss": 0.0548, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 0.117300845682621, "learning_rate": 9.865063264940695e-06, "loss": 0.0549, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 0.17742489278316498, "learning_rate": 9.864518892512167e-06, "loss": 0.0677, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 0.14584453403949738, "learning_rate": 9.863973439298597e-06, "loss": 0.0528, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 0.3877631723880768, "learning_rate": 9.863426905421179e-06, "loss": 0.0722, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 0.16270019114017487, "learning_rate": 9.862879291001334e-06, "loss": 0.0721, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 0.2178175002336502, "learning_rate": 9.862330596160732e-06, "loss": 0.0725, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 0.1476183831691742, "learning_rate": 9.861780821021282e-06, "loss": 0.0664, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 0.12568335235118866, "learning_rate": 9.861229965705129e-06, "loss": 0.0571, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 0.17660611867904663, "learning_rate": 9.86067803033466e-06, "loss": 0.0611, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 0.07704568654298782, "learning_rate": 9.860125015032506e-06, "loss": 0.0509, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 0.15181100368499756, "learning_rate": 9.859570919921533e-06, "loss": 0.0641, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 0.11841840296983719, "learning_rate": 9.859015745124844e-06, "loss": 0.0522, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 0.09160614758729935, "learning_rate": 9.858459490765792e-06, "loss": 0.056, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 0.1524711400270462, "learning_rate": 9.857902156967961e-06, "loss": 0.0697, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 0.09298769384622574, "learning_rate": 9.857343743855178e-06, "loss": 0.0534, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 0.268963098526001, "learning_rate": 9.856784251551512e-06, "loss": 0.0596, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 0.12364041805267334, "learning_rate": 9.856223680181267e-06, "loss": 0.0625, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 0.1848570555448532, "learning_rate": 9.85566202986899e-06, "loss": 0.0736, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 0.17346052825450897, "learning_rate": 9.855099300739463e-06, "loss": 0.0559, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 0.13477133214473724, "learning_rate": 9.854535492917718e-06, "loss": 0.0644, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 0.17667235434055328, "learning_rate": 9.853970606529018e-06, "loss": 0.0507, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 0.21858945488929749, "learning_rate": 9.853404641698866e-06, "loss": 0.0646, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 0.15180754661560059, "learning_rate": 9.85283759855301e-06, "loss": 0.0529, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 0.12625917792320251, "learning_rate": 9.852269477217428e-06, "loss": 0.0657, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 0.1098974421620369, "learning_rate": 9.85170027781835e-06, "loss": 0.0607, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 0.15308064222335815, "learning_rate": 9.851130000482236e-06, "loss": 0.0617, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 0.08849264681339264, "learning_rate": 9.85055864533579e-06, "loss": 0.0568, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 0.2045230120420456, "learning_rate": 9.849986212505952e-06, "loss": 0.0686, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 0.09279713779687881, "learning_rate": 9.849412702119905e-06, "loss": 0.0534, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 0.2065090537071228, "learning_rate": 9.848838114305069e-06, "loss": 0.0595, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 0.16339974105358124, "learning_rate": 9.848262449189105e-06, "loss": 0.069, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 0.12037990987300873, "learning_rate": 9.847685706899913e-06, "loss": 0.0539, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 0.17287029325962067, "learning_rate": 9.84710788756563e-06, "loss": 0.0596, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 0.1264815479516983, "learning_rate": 9.846528991314638e-06, "loss": 0.0599, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 0.23635433614253998, "learning_rate": 9.845949018275551e-06, "loss": 0.0641, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 0.2966170608997345, "learning_rate": 9.845367968577229e-06, "loss": 0.0702, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 0.30346253514289856, "learning_rate": 9.844785842348764e-06, "loss": 0.069, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 0.22347807884216309, "learning_rate": 9.844202639719492e-06, "loss": 0.0633, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 0.13574407994747162, "learning_rate": 9.84361836081899e-06, "loss": 0.0615, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 0.25480279326438904, "learning_rate": 9.84303300577707e-06, "loss": 0.0546, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 0.27658361196517944, "learning_rate": 9.842446574723786e-06, "loss": 0.0672, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 0.2519839406013489, "learning_rate": 9.841859067789425e-06, "loss": 0.0615, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 0.2130017727613449, "learning_rate": 9.841270485104522e-06, "loss": 0.0701, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 0.19124212861061096, "learning_rate": 9.840680826799845e-06, "loss": 0.0549, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 0.2773110270500183, "learning_rate": 9.840090093006403e-06, "loss": 0.0595, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 0.15070822834968567, "learning_rate": 9.839498283855444e-06, "loss": 0.0674, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 0.21120916306972504, "learning_rate": 9.838905399478453e-06, "loss": 0.067, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.18491707742214203, "learning_rate": 9.838311440007159e-06, "loss": 0.064, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 0.14394253492355347, "learning_rate": 9.83771640557352e-06, "loss": 0.05, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 0.21199549734592438, "learning_rate": 9.837120296309744e-06, "loss": 0.0651, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 0.16575691103935242, "learning_rate": 9.836523112348271e-06, "loss": 0.0579, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 0.20041997730731964, "learning_rate": 9.835924853821783e-06, "loss": 0.0631, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 0.11447960138320923, "learning_rate": 9.8353255208632e-06, "loss": 0.061, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 0.12609648704528809, "learning_rate": 9.834725113605676e-06, "loss": 0.0656, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 0.08064878731966019, "learning_rate": 9.83412363218261e-06, "loss": 0.0593, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 0.24787980318069458, "learning_rate": 9.833521076727638e-06, "loss": 0.0668, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 0.13141943514347076, "learning_rate": 9.832917447374637e-06, "loss": 0.0543, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 0.19062291085720062, "learning_rate": 9.832312744257715e-06, "loss": 0.0607, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 0.14565879106521606, "learning_rate": 9.831706967511223e-06, "loss": 0.0575, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 0.12660273909568787, "learning_rate": 9.831100117269755e-06, "loss": 0.0597, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 0.07955345511436462, "learning_rate": 9.830492193668135e-06, "loss": 0.0601, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 0.11692028492689133, "learning_rate": 9.829883196841433e-06, "loss": 0.0508, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 0.3517708480358124, "learning_rate": 9.829273126924952e-06, "loss": 0.0749, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 0.21569019556045532, "learning_rate": 9.828661984054238e-06, "loss": 0.0783, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 0.2591201961040497, "learning_rate": 9.82804976836507e-06, "loss": 0.062, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 0.1722613275051117, "learning_rate": 9.827436479993468e-06, "loss": 0.0586, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 0.10858240723609924, "learning_rate": 9.826822119075694e-06, "loss": 0.0602, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 0.31637465953826904, "learning_rate": 9.826206685748242e-06, "loss": 0.0846, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 0.0888538584113121, "learning_rate": 9.825590180147852e-06, "loss": 0.0543, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 0.21787358820438385, "learning_rate": 9.82497260241149e-06, "loss": 0.0643, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 0.13881242275238037, "learning_rate": 9.824353952676375e-06, "loss": 0.0581, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 0.12120595574378967, "learning_rate": 9.823734231079953e-06, "loss": 0.0635, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 0.3206697106361389, "learning_rate": 9.823113437759912e-06, "loss": 0.0645, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 0.193722665309906, "learning_rate": 9.822491572854178e-06, "loss": 0.0627, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 0.15933315455913544, "learning_rate": 9.821868636500917e-06, "loss": 0.0655, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 0.34818926453590393, "learning_rate": 9.82124462883853e-06, "loss": 0.0591, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 0.16311492025852203, "learning_rate": 9.820619550005656e-06, "loss": 0.0692, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 0.14351660013198853, "learning_rate": 9.819993400141176e-06, "loss": 0.0556, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 0.3244920074939728, "learning_rate": 9.819366179384204e-06, "loss": 0.0763, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 0.24143345654010773, "learning_rate": 9.818737887874097e-06, "loss": 0.0615, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 0.28388017416000366, "learning_rate": 9.818108525750442e-06, "loss": 0.0593, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 0.18652001023292542, "learning_rate": 9.817478093153074e-06, "loss": 0.0653, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 0.305928498506546, "learning_rate": 9.816846590222058e-06, "loss": 0.0701, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 0.17123079299926758, "learning_rate": 9.8162140170977e-06, "loss": 0.0561, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 0.0704401433467865, "learning_rate": 9.815580373920543e-06, "loss": 0.0491, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 0.11881448328495026, "learning_rate": 9.81494566083137e-06, "loss": 0.0763, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 0.18594476580619812, "learning_rate": 9.814309877971195e-06, "loss": 0.0731, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 0.1570468246936798, "learning_rate": 9.81367302548128e-06, "loss": 0.0661, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 0.10449712723493576, "learning_rate": 9.813035103503116e-06, "loss": 0.0645, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 0.23328496515750885, "learning_rate": 9.812396112178437e-06, "loss": 0.0616, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 0.11587532609701157, "learning_rate": 9.811756051649209e-06, "loss": 0.0532, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 0.16876375675201416, "learning_rate": 9.811114922057642e-06, "loss": 0.0709, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 0.2723919451236725, "learning_rate": 9.810472723546178e-06, "loss": 0.0662, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 0.1288823038339615, "learning_rate": 9.8098294562575e-06, "loss": 0.0651, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 0.2607540488243103, "learning_rate": 9.809185120334528e-06, "loss": 0.0724, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 0.2325623482465744, "learning_rate": 9.808539715920415e-06, "loss": 0.0639, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 0.21747064590454102, "learning_rate": 9.807893243158562e-06, "loss": 0.0562, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 0.1104842945933342, "learning_rate": 9.807245702192593e-06, "loss": 0.0631, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 0.14361517131328583, "learning_rate": 9.80659709316638e-06, "loss": 0.0627, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 0.15435124933719635, "learning_rate": 9.805947416224034e-06, "loss": 0.0586, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 0.17233793437480927, "learning_rate": 9.80529667150989e-06, "loss": 0.0516, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 0.12212517857551575, "learning_rate": 9.804644859168534e-06, "loss": 0.0506, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 0.24616850912570953, "learning_rate": 9.80399197934478e-06, "loss": 0.0713, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 0.12111727148294449, "learning_rate": 9.803338032183686e-06, "loss": 0.0537, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 0.16227266192436218, "learning_rate": 9.802683017830544e-06, "loss": 0.0559, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 0.18903931975364685, "learning_rate": 9.802026936430883e-06, "loss": 0.0609, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 0.24231933057308197, "learning_rate": 9.801369788130468e-06, "loss": 0.0721, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 0.10821939259767532, "learning_rate": 9.800711573075303e-06, "loss": 0.0688, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 0.1857915073633194, "learning_rate": 9.80005229141163e-06, "loss": 0.0632, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 0.2219444215297699, "learning_rate": 9.799391943285923e-06, "loss": 0.0632, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 0.14204542338848114, "learning_rate": 9.798730528844899e-06, "loss": 0.0571, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 0.0987272784113884, "learning_rate": 9.79806804823551e-06, "loss": 0.0621, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 0.13027925789356232, "learning_rate": 9.79740450160494e-06, "loss": 0.0691, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 0.09345601499080658, "learning_rate": 9.796739889100617e-06, "loss": 0.0484, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 0.3502131402492523, "learning_rate": 9.796074210870204e-06, "loss": 0.0762, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 0.1381777971982956, "learning_rate": 9.795407467061596e-06, "loss": 0.0626, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 0.16318777203559875, "learning_rate": 9.794739657822929e-06, "loss": 0.069, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 0.12794514000415802, "learning_rate": 9.794070783302576e-06, "loss": 0.0539, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 0.136275976896286, "learning_rate": 9.793400843649146e-06, "loss": 0.0606, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 0.09046459943056107, "learning_rate": 9.792729839011484e-06, "loss": 0.0618, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 0.14696314930915833, "learning_rate": 9.792057769538672e-06, "loss": 0.0774, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 0.20452666282653809, "learning_rate": 9.791384635380028e-06, "loss": 0.0522, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 0.1326860785484314, "learning_rate": 9.790710436685105e-06, "loss": 0.0638, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 0.14528921246528625, "learning_rate": 9.790035173603699e-06, "loss": 0.0623, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 0.14557646214962006, "learning_rate": 9.789358846285835e-06, "loss": 0.0602, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 0.21987715363502502, "learning_rate": 9.788681454881778e-06, "loss": 0.0777, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 0.1740642637014389, "learning_rate": 9.78800299954203e-06, "loss": 0.0686, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 0.142862930893898, "learning_rate": 9.787323480417328e-06, "loss": 0.0694, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 0.1680867224931717, "learning_rate": 9.786642897658645e-06, "loss": 0.0619, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 0.11188060790300369, "learning_rate": 9.78596125141719e-06, "loss": 0.058, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 0.17903371155261993, "learning_rate": 9.785278541844409e-06, "loss": 0.0707, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 0.12653855979442596, "learning_rate": 9.784594769091989e-06, "loss": 0.0519, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 0.12052393704652786, "learning_rate": 9.783909933311844e-06, "loss": 0.0501, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 0.13071514666080475, "learning_rate": 9.78322403465613e-06, "loss": 0.0826, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 0.3312719166278839, "learning_rate": 9.782537073277238e-06, "loss": 0.0817, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 0.26267802715301514, "learning_rate": 9.781849049327796e-06, "loss": 0.0687, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 0.21299926936626434, "learning_rate": 9.781159962960667e-06, "loss": 0.0686, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 0.12209179997444153, "learning_rate": 9.78046981432895e-06, "loss": 0.061, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 0.469722718000412, "learning_rate": 9.77977860358598e-06, "loss": 0.0804, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 0.09851899743080139, "learning_rate": 9.779086330885328e-06, "loss": 0.0632, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 0.12927858531475067, "learning_rate": 9.778392996380803e-06, "loss": 0.0663, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 0.21880201995372772, "learning_rate": 9.777698600226446e-06, "loss": 0.0669, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 0.14712147414684296, "learning_rate": 9.777003142576536e-06, "loss": 0.0493, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 0.13878042995929718, "learning_rate": 9.77630662358559e-06, "loss": 0.0714, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 0.09081218391656876, "learning_rate": 9.775609043408356e-06, "loss": 0.0549, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 0.22904753684997559, "learning_rate": 9.774910402199821e-06, "loss": 0.057, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 0.14746686816215515, "learning_rate": 9.774210700115209e-06, "loss": 0.0614, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 0.34671056270599365, "learning_rate": 9.773509937309978e-06, "loss": 0.0819, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 0.11539232730865479, "learning_rate": 9.772808113939819e-06, "loss": 0.0754, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 0.1860557347536087, "learning_rate": 9.77210523016066e-06, "loss": 0.0603, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 0.16714385151863098, "learning_rate": 9.771401286128668e-06, "loss": 0.055, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 0.3282321095466614, "learning_rate": 9.770696282000245e-06, "loss": 0.0787, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 0.13387230038642883, "learning_rate": 9.769990217932023e-06, "loss": 0.0616, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 0.26564377546310425, "learning_rate": 9.769283094080878e-06, "loss": 0.0755, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 0.2545482814311981, "learning_rate": 9.768574910603912e-06, "loss": 0.0739, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 0.15106137096881866, "learning_rate": 9.767865667658472e-06, "loss": 0.0542, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 0.22675997018814087, "learning_rate": 9.76715536540213e-06, "loss": 0.0653, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 0.21441836655139923, "learning_rate": 9.766444003992704e-06, "loss": 0.0572, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 0.21190577745437622, "learning_rate": 9.765731583588237e-06, "loss": 0.0668, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 0.2068076878786087, "learning_rate": 9.765018104347017e-06, "loss": 0.0694, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 0.1516696661710739, "learning_rate": 9.764303566427561e-06, "loss": 0.0464, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 0.1416846513748169, "learning_rate": 9.763587969988626e-06, "loss": 0.0683, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 0.2237204611301422, "learning_rate": 9.762871315189198e-06, "loss": 0.0662, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 0.3408699929714203, "learning_rate": 9.7621536021885e-06, "loss": 0.0779, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 0.19041699171066284, "learning_rate": 9.761434831145995e-06, "loss": 0.0632, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 0.11793535947799683, "learning_rate": 9.760715002221375e-06, "loss": 0.0585, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 0.0759640783071518, "learning_rate": 9.759994115574571e-06, "loss": 0.0496, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 0.2415218949317932, "learning_rate": 9.759272171365746e-06, "loss": 0.0619, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 0.25592300295829773, "learning_rate": 9.758549169755302e-06, "loss": 0.0656, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 0.11726826429367065, "learning_rate": 9.757825110903872e-06, "loss": 0.0511, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 0.1094171553850174, "learning_rate": 9.757099994972323e-06, "loss": 0.0533, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 0.1807614266872406, "learning_rate": 9.756373822121762e-06, "loss": 0.0553, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 0.08958163112401962, "learning_rate": 9.75564659251353e-06, "loss": 0.0595, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 0.18197466433048248, "learning_rate": 9.754918306309197e-06, "loss": 0.069, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 0.2017972320318222, "learning_rate": 9.754188963670573e-06, "loss": 0.0565, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 0.09066823124885559, "learning_rate": 9.753458564759701e-06, "loss": 0.0657, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 0.11548539996147156, "learning_rate": 9.752727109738859e-06, "loss": 0.0655, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 0.08770432323217392, "learning_rate": 9.751994598770563e-06, "loss": 0.0628, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 0.13114763796329498, "learning_rate": 9.751261032017553e-06, "loss": 0.0541, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 0.13579313457012177, "learning_rate": 9.750526409642818e-06, "loss": 0.0667, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 0.16832681000232697, "learning_rate": 9.749790731809568e-06, "loss": 0.0695, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 0.10433074086904526, "learning_rate": 9.74905399868126e-06, "loss": 0.0641, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 0.3002864718437195, "learning_rate": 9.748316210421573e-06, "loss": 0.0585, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.09785444289445877, "learning_rate": 9.747577367194432e-06, "loss": 0.0649, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 0.21232300996780396, "learning_rate": 9.74683746916399e-06, "loss": 0.0564, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 0.18426164984703064, "learning_rate": 9.746096516494632e-06, "loss": 0.076, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 0.20721805095672607, "learning_rate": 9.745354509350983e-06, "loss": 0.0628, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 0.16171908378601074, "learning_rate": 9.744611447897902e-06, "loss": 0.0598, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 0.07053082436323166, "learning_rate": 9.743867332300478e-06, "loss": 0.0543, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 0.15964972972869873, "learning_rate": 9.743122162724038e-06, "loss": 0.0676, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 0.08520420640707016, "learning_rate": 9.742375939334141e-06, "loss": 0.0514, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 0.1439020186662674, "learning_rate": 9.74162866229658e-06, "loss": 0.0707, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 0.1140434741973877, "learning_rate": 9.740880331777383e-06, "loss": 0.056, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 0.14752788841724396, "learning_rate": 9.740130947942812e-06, "loss": 0.0604, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 0.10229813307523727, "learning_rate": 9.739380510959365e-06, "loss": 0.0475, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 0.07179433852434158, "learning_rate": 9.738629020993769e-06, "loss": 0.0565, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 0.12785232067108154, "learning_rate": 9.737876478212989e-06, "loss": 0.0561, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 0.15933631360530853, "learning_rate": 9.737122882784225e-06, "loss": 0.0638, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 0.10215414315462112, "learning_rate": 9.736368234874904e-06, "loss": 0.0569, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 0.08677025139331818, "learning_rate": 9.735612534652697e-06, "loss": 0.0591, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 0.11907114088535309, "learning_rate": 9.734855782285499e-06, "loss": 0.0609, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 0.11239484697580338, "learning_rate": 9.734097977941446e-06, "loss": 0.0618, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 0.12433817237615585, "learning_rate": 9.733339121788903e-06, "loss": 0.0674, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 0.1718212068080902, "learning_rate": 9.73257921399647e-06, "loss": 0.0639, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 0.15974289178848267, "learning_rate": 9.731818254732983e-06, "loss": 0.0604, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 0.11889119446277618, "learning_rate": 9.73105624416751e-06, "loss": 0.0646, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 0.06790953874588013, "learning_rate": 9.73029318246935e-06, "loss": 0.0543, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 0.1432446986436844, "learning_rate": 9.72952906980804e-06, "loss": 0.0522, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 0.18369907140731812, "learning_rate": 9.72876390635335e-06, "loss": 0.0718, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 0.19259203970432281, "learning_rate": 9.727997692275275e-06, "loss": 0.0601, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 0.09571042656898499, "learning_rate": 9.727230427744058e-06, "loss": 0.0551, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 0.11738298088312149, "learning_rate": 9.726462112930165e-06, "loss": 0.0706, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 0.10570383071899414, "learning_rate": 9.725692748004295e-06, "loss": 0.0564, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 0.21683332324028015, "learning_rate": 9.724922333137385e-06, "loss": 0.0636, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 0.19050659239292145, "learning_rate": 9.724150868500607e-06, "loss": 0.0577, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 0.09392861276865005, "learning_rate": 9.72337835426536e-06, "loss": 0.055, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 0.17604370415210724, "learning_rate": 9.722604790603279e-06, "loss": 0.0692, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 0.1806258261203766, "learning_rate": 9.721830177686231e-06, "loss": 0.0619, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.12096353620290756, "learning_rate": 9.72105451568632e-06, "loss": 0.0568, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 0.09624340385198593, "learning_rate": 9.720277804775879e-06, "loss": 0.0613, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 0.20353379845619202, "learning_rate": 9.719500045127475e-06, "loss": 0.0658, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 0.1756592094898224, "learning_rate": 9.718721236913909e-06, "loss": 0.0623, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 0.20648884773254395, "learning_rate": 9.717941380308216e-06, "loss": 0.0615, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 0.2687858045101166, "learning_rate": 9.717160475483659e-06, "loss": 0.0613, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 0.19959841668605804, "learning_rate": 9.71637852261374e-06, "loss": 0.0699, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 0.132990300655365, "learning_rate": 9.71559552187219e-06, "loss": 0.0578, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 0.11314260959625244, "learning_rate": 9.714811473432973e-06, "loss": 0.0515, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 0.11423920094966888, "learning_rate": 9.714026377470287e-06, "loss": 0.0558, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 0.12581773102283478, "learning_rate": 9.713240234158565e-06, "loss": 0.0545, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 0.08658990263938904, "learning_rate": 9.712453043672467e-06, "loss": 0.0522, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 0.1354345679283142, "learning_rate": 9.71166480618689e-06, "loss": 0.0672, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 0.24837462604045868, "learning_rate": 9.71087552187696e-06, "loss": 0.0586, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 0.12682096660137177, "learning_rate": 9.710085190918044e-06, "loss": 0.0496, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 0.16638877987861633, "learning_rate": 9.70929381348573e-06, "loss": 0.0761, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 0.08854410797357559, "learning_rate": 9.708501389755846e-06, "loss": 0.0651, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 0.14819195866584778, "learning_rate": 9.70770791990445e-06, "loss": 0.0542, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 0.33103519678115845, "learning_rate": 9.706913404107832e-06, "loss": 0.0619, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 0.1974218785762787, "learning_rate": 9.706117842542517e-06, "loss": 0.0752, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 0.19409878551959991, "learning_rate": 9.70532123538526e-06, "loss": 0.059, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 0.11685080081224442, "learning_rate": 9.704523582813049e-06, "loss": 0.0575, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 0.09413900226354599, "learning_rate": 9.703724885003102e-06, "loss": 0.0703, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 0.09243375062942505, "learning_rate": 9.702925142132876e-06, "loss": 0.0527, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 0.1911759376525879, "learning_rate": 9.70212435438005e-06, "loss": 0.0617, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 0.16268093883991241, "learning_rate": 9.701322521922549e-06, "loss": 0.0565, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 0.13928598165512085, "learning_rate": 9.700519644938513e-06, "loss": 0.0727, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 0.12590697407722473, "learning_rate": 9.699715723606327e-06, "loss": 0.0603, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 0.11335597187280655, "learning_rate": 9.698910758104603e-06, "loss": 0.0573, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 0.21772651374340057, "learning_rate": 9.698104748612187e-06, "loss": 0.073, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 0.15274693071842194, "learning_rate": 9.697297695308157e-06, "loss": 0.0727, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 0.08351927250623703, "learning_rate": 9.696489598371817e-06, "loss": 0.0494, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 0.1315496861934662, "learning_rate": 9.695680457982713e-06, "loss": 0.0609, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 0.09024148434400558, "learning_rate": 9.694870274320616e-06, "loss": 0.0509, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 0.09396906197071075, "learning_rate": 9.694059047565529e-06, "loss": 0.0565, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 0.16301396489143372, "learning_rate": 9.69324677789769e-06, "loss": 0.0558, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 0.25597670674324036, "learning_rate": 9.692433465497562e-06, "loss": 0.0546, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 0.09936001151800156, "learning_rate": 9.69161911054585e-06, "loss": 0.0588, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 0.13514883816242218, "learning_rate": 9.690803713223485e-06, "loss": 0.0559, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 0.15304124355316162, "learning_rate": 9.689987273711626e-06, "loss": 0.0525, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 0.14031068980693817, "learning_rate": 9.68916979219167e-06, "loss": 0.0739, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 0.13752925395965576, "learning_rate": 9.68835126884524e-06, "loss": 0.0591, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 0.10860387235879898, "learning_rate": 9.687531703854196e-06, "loss": 0.0586, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 0.1553175449371338, "learning_rate": 9.686711097400625e-06, "loss": 0.0612, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 0.129857137799263, "learning_rate": 9.685889449666849e-06, "loss": 0.0658, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 0.13070262968540192, "learning_rate": 9.685066760835417e-06, "loss": 0.0731, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 0.09362918883562088, "learning_rate": 9.684243031089113e-06, "loss": 0.0555, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 0.15234635770320892, "learning_rate": 9.68341826061095e-06, "loss": 0.0613, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 0.2987450659275055, "learning_rate": 9.682592449584174e-06, "loss": 0.0802, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 0.18369470536708832, "learning_rate": 9.68176559819226e-06, "loss": 0.0582, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 0.1447945088148117, "learning_rate": 9.680937706618919e-06, "loss": 0.056, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 0.18607449531555176, "learning_rate": 9.680108775048087e-06, "loss": 0.0535, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 0.14651992917060852, "learning_rate": 9.679278803663932e-06, "loss": 0.0639, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 0.24039317667484283, "learning_rate": 9.678447792650858e-06, "loss": 0.0621, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 0.1789376586675644, "learning_rate": 9.677615742193495e-06, "loss": 0.0577, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 0.1082272082567215, "learning_rate": 9.676782652476705e-06, "loss": 0.0551, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 0.1209690272808075, "learning_rate": 9.675948523685583e-06, "loss": 0.0621, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 0.10207803547382355, "learning_rate": 9.675113356005453e-06, "loss": 0.0586, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 0.4450748562812805, "learning_rate": 9.674277149621869e-06, "loss": 0.0731, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 0.12428788095712662, "learning_rate": 9.673439904720619e-06, "loss": 0.0543, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 0.12168528884649277, "learning_rate": 9.672601621487718e-06, "loss": 0.0593, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 0.21034829318523407, "learning_rate": 9.671762300109415e-06, "loss": 0.0593, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 0.21819692850112915, "learning_rate": 9.670921940772186e-06, "loss": 0.0619, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 0.21968793869018555, "learning_rate": 9.670080543662742e-06, "loss": 0.0756, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 0.1471056044101715, "learning_rate": 9.669238108968018e-06, "loss": 0.0667, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 0.2946205735206604, "learning_rate": 9.668394636875188e-06, "loss": 0.0589, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 0.10063092410564423, "learning_rate": 9.667550127571653e-06, "loss": 0.0572, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 0.16292837262153625, "learning_rate": 9.666704581245041e-06, "loss": 0.0628, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 0.12110070139169693, "learning_rate": 9.665857998083212e-06, "loss": 0.0563, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 0.16630499064922333, "learning_rate": 9.66501037827426e-06, "loss": 0.0704, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 0.16524265706539154, "learning_rate": 9.664161722006506e-06, "loss": 0.0691, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 0.1976855844259262, "learning_rate": 9.663312029468504e-06, "loss": 0.0591, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 0.1383598893880844, "learning_rate": 9.662461300849031e-06, "loss": 0.0553, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 0.10756823420524597, "learning_rate": 9.661609536337104e-06, "loss": 0.0636, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 0.20554111897945404, "learning_rate": 9.660756736121964e-06, "loss": 0.0714, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 0.24698476493358612, "learning_rate": 9.659902900393086e-06, "loss": 0.0723, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 0.19732069969177246, "learning_rate": 9.659048029340169e-06, "loss": 0.0635, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 0.12400031089782715, "learning_rate": 9.658192123153149e-06, "loss": 0.0512, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 0.11339864879846573, "learning_rate": 9.657335182022187e-06, "loss": 0.0685, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 0.1923193484544754, "learning_rate": 9.656477206137675e-06, "loss": 0.0746, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 0.1567767858505249, "learning_rate": 9.655618195690239e-06, "loss": 0.0575, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 0.11089717596769333, "learning_rate": 9.654758150870728e-06, "loss": 0.054, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 0.16776683926582336, "learning_rate": 9.653897071870226e-06, "loss": 0.0581, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 0.1934538185596466, "learning_rate": 9.653034958880045e-06, "loss": 0.0604, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 0.1102573350071907, "learning_rate": 9.652171812091728e-06, "loss": 0.0526, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 0.14131209254264832, "learning_rate": 9.651307631697044e-06, "loss": 0.0738, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 0.17292150855064392, "learning_rate": 9.650442417887995e-06, "loss": 0.0448, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 0.1934477686882019, "learning_rate": 9.649576170856814e-06, "loss": 0.0588, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 0.14021454751491547, "learning_rate": 9.64870889079596e-06, "loss": 0.0578, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 0.11519742012023926, "learning_rate": 9.64784057789812e-06, "loss": 0.0501, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 0.1633152812719345, "learning_rate": 9.646971232356215e-06, "loss": 0.0576, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 0.0886378362774849, "learning_rate": 9.646100854363396e-06, "loss": 0.0599, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 0.2946096360683441, "learning_rate": 9.64522944411304e-06, "loss": 0.0794, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 0.413547545671463, "learning_rate": 9.644357001798752e-06, "loss": 0.0647, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 0.11941397190093994, "learning_rate": 9.643483527614372e-06, "loss": 0.0562, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 0.13306306302547455, "learning_rate": 9.642609021753964e-06, "loss": 0.0705, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 0.11522408574819565, "learning_rate": 9.641733484411823e-06, "loss": 0.0594, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 0.27820175886154175, "learning_rate": 9.640856915782477e-06, "loss": 0.0664, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 0.208049938082695, "learning_rate": 9.639979316060675e-06, "loss": 0.0626, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 0.10272421687841415, "learning_rate": 9.639100685441403e-06, "loss": 0.0544, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 0.14335787296295166, "learning_rate": 9.638221024119869e-06, "loss": 0.0673, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 0.09008989483118057, "learning_rate": 9.637340332291518e-06, "loss": 0.0517, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 0.26006656885147095, "learning_rate": 9.636458610152015e-06, "loss": 0.0689, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 0.3128778338432312, "learning_rate": 9.635575857897264e-06, "loss": 0.067, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.1038985401391983, "learning_rate": 9.634692075723386e-06, "loss": 0.0551, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 0.11203119903802872, "learning_rate": 9.633807263826745e-06, "loss": 0.0558, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 0.1287047117948532, "learning_rate": 9.632921422403918e-06, "loss": 0.0538, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 0.1571081578731537, "learning_rate": 9.632034551651723e-06, "loss": 0.0704, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 0.12661001086235046, "learning_rate": 9.631146651767202e-06, "loss": 0.063, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 0.22255393862724304, "learning_rate": 9.630257722947625e-06, "loss": 0.0615, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 0.17946527898311615, "learning_rate": 9.629367765390494e-06, "loss": 0.0645, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 0.08773456513881683, "learning_rate": 9.628476779293536e-06, "loss": 0.0556, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.14591743052005768, "learning_rate": 9.627584764854706e-06, "loss": 0.0545, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 0.21078869700431824, "learning_rate": 9.626691722272193e-06, "loss": 0.0575, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 0.09772265702486038, "learning_rate": 9.625797651744406e-06, "loss": 0.0559, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 0.15119917690753937, "learning_rate": 9.62490255346999e-06, "loss": 0.0681, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 0.08892771601676941, "learning_rate": 9.624006427647817e-06, "loss": 0.057, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 0.06335862725973129, "learning_rate": 9.623109274476982e-06, "loss": 0.0536, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 0.09020481258630753, "learning_rate": 9.622211094156812e-06, "loss": 0.0577, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 0.18476906418800354, "learning_rate": 9.621311886886866e-06, "loss": 0.0556, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 0.08714055269956589, "learning_rate": 9.620411652866926e-06, "loss": 0.0589, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 0.19808092713356018, "learning_rate": 9.619510392297e-06, "loss": 0.0694, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 0.11981858313083649, "learning_rate": 9.618608105377331e-06, "loss": 0.0589, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 0.14435414969921112, "learning_rate": 9.617704792308387e-06, "loss": 0.063, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 0.09223278611898422, "learning_rate": 9.61680045329086e-06, "loss": 0.0655, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 0.107960045337677, "learning_rate": 9.615895088525677e-06, "loss": 0.0538, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 0.11241523921489716, "learning_rate": 9.614988698213987e-06, "loss": 0.0565, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 0.12265320122241974, "learning_rate": 9.614081282557172e-06, "loss": 0.0678, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 0.13979703187942505, "learning_rate": 9.613172841756835e-06, "loss": 0.0564, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 0.19452083110809326, "learning_rate": 9.612263376014815e-06, "loss": 0.0618, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 0.1685357540845871, "learning_rate": 9.611352885533171e-06, "loss": 0.0704, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 0.21531374752521515, "learning_rate": 9.610441370514196e-06, "loss": 0.0653, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 0.25259748101234436, "learning_rate": 9.609528831160407e-06, "loss": 0.0611, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 0.22623737156391144, "learning_rate": 9.608615267674548e-06, "loss": 0.077, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 0.14679932594299316, "learning_rate": 9.607700680259593e-06, "loss": 0.0606, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 0.07921451330184937, "learning_rate": 9.606785069118742e-06, "loss": 0.0448, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 0.185092493891716, "learning_rate": 9.605868434455426e-06, "loss": 0.0607, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 0.22336804866790771, "learning_rate": 9.604950776473294e-06, "loss": 0.08, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 0.21807129681110382, "learning_rate": 9.604032095376234e-06, "loss": 0.0747, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 0.10885133594274521, "learning_rate": 9.603112391368354e-06, "loss": 0.0655, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 0.12133431434631348, "learning_rate": 9.602191664653992e-06, "loss": 0.0508, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 0.1447887271642685, "learning_rate": 9.601269915437713e-06, "loss": 0.0756, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 0.07850594818592072, "learning_rate": 9.600347143924305e-06, "loss": 0.0631, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 0.31915852427482605, "learning_rate": 9.599423350318791e-06, "loss": 0.0612, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 0.1748472899198532, "learning_rate": 9.598498534826414e-06, "loss": 0.0587, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 0.22900421917438507, "learning_rate": 9.597572697652649e-06, "loss": 0.0565, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.0767344981431961, "learning_rate": 9.596645839003196e-06, "loss": 0.0412, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 0.15396389365196228, "learning_rate": 9.595717959083978e-06, "loss": 0.0694, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 0.22560396790504456, "learning_rate": 9.594789058101154e-06, "loss": 0.0725, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 0.1255747228860855, "learning_rate": 9.593859136261102e-06, "loss": 0.0609, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 0.11610090732574463, "learning_rate": 9.592928193770427e-06, "loss": 0.07, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 0.14322476089000702, "learning_rate": 9.591996230835968e-06, "loss": 0.0625, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 0.09420809894800186, "learning_rate": 9.591063247664783e-06, "loss": 0.0583, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 0.12771189212799072, "learning_rate": 9.59012924446416e-06, "loss": 0.0557, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 0.09039872139692307, "learning_rate": 9.589194221441614e-06, "loss": 0.0611, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 0.1724810004234314, "learning_rate": 9.588258178804884e-06, "loss": 0.0543, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 0.1463858038187027, "learning_rate": 9.587321116761938e-06, "loss": 0.0565, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 0.14973844587802887, "learning_rate": 9.586383035520972e-06, "loss": 0.0641, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 0.18708333373069763, "learning_rate": 9.585443935290403e-06, "loss": 0.0687, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 0.0886424332857132, "learning_rate": 9.58450381627888e-06, "loss": 0.0531, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 0.10425245761871338, "learning_rate": 9.583562678695275e-06, "loss": 0.0457, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 0.25004687905311584, "learning_rate": 9.582620522748686e-06, "loss": 0.0606, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 0.12445501983165741, "learning_rate": 9.58167734864844e-06, "loss": 0.07, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 0.10133945196866989, "learning_rate": 9.58073315660409e-06, "loss": 0.061, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 0.08093154430389404, "learning_rate": 9.579787946825411e-06, "loss": 0.0577, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 0.14252522587776184, "learning_rate": 9.57884171952241e-06, "loss": 0.0638, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 0.4070395231246948, "learning_rate": 9.577894474905314e-06, "loss": 0.0679, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 0.09410806745290756, "learning_rate": 9.576946213184583e-06, "loss": 0.0665, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 0.19735394418239594, "learning_rate": 9.575996934570896e-06, "loss": 0.0637, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 0.13025206327438354, "learning_rate": 9.57504663927516e-06, "loss": 0.0572, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.14309681951999664, "learning_rate": 9.574095327508513e-06, "loss": 0.0546, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 0.09660980105400085, "learning_rate": 9.573142999482313e-06, "loss": 0.0545, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 0.06903456151485443, "learning_rate": 9.572189655408144e-06, "loss": 0.0428, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 0.1452283412218094, "learning_rate": 9.571235295497818e-06, "loss": 0.0624, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 0.11928586661815643, "learning_rate": 9.570279919963373e-06, "loss": 0.055, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 0.09166033565998077, "learning_rate": 9.569323529017071e-06, "loss": 0.06, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 0.13682132959365845, "learning_rate": 9.568366122871399e-06, "loss": 0.0565, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 0.13583888113498688, "learning_rate": 9.567407701739075e-06, "loss": 0.0622, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 0.22932438552379608, "learning_rate": 9.566448265833034e-06, "loss": 0.061, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 0.1866452395915985, "learning_rate": 9.56548781536644e-06, "loss": 0.0677, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 0.11099332571029663, "learning_rate": 9.564526350552689e-06, "loss": 0.0649, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 0.09118405729532242, "learning_rate": 9.56356387160539e-06, "loss": 0.0472, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 0.1455242782831192, "learning_rate": 9.562600378738389e-06, "loss": 0.0656, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 0.2254544198513031, "learning_rate": 9.561635872165747e-06, "loss": 0.0575, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 0.07536441832780838, "learning_rate": 9.56067035210176e-06, "loss": 0.0577, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 0.14047251641750336, "learning_rate": 9.559703818760943e-06, "loss": 0.078, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 0.1337832510471344, "learning_rate": 9.558736272358036e-06, "loss": 0.0544, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 0.1600685715675354, "learning_rate": 9.557767713108009e-06, "loss": 0.0807, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 0.16914726793766022, "learning_rate": 9.55679814122605e-06, "loss": 0.0658, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 0.16327542066574097, "learning_rate": 9.555827556927578e-06, "loss": 0.0667, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.12105036526918411, "learning_rate": 9.554855960428234e-06, "loss": 0.0451, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 0.2242659032344818, "learning_rate": 9.553883351943882e-06, "loss": 0.0576, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 0.24578042328357697, "learning_rate": 9.55290973169062e-06, "loss": 0.0687, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 0.09725630283355713, "learning_rate": 9.55193509988476e-06, "loss": 0.0555, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 0.26195263862609863, "learning_rate": 9.55095945674284e-06, "loss": 0.0654, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 0.14479464292526245, "learning_rate": 9.549982802481632e-06, "loss": 0.058, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 0.15381118655204773, "learning_rate": 9.549005137318122e-06, "loss": 0.0602, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 0.15689349174499512, "learning_rate": 9.548026461469527e-06, "loss": 0.0559, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 0.11661595851182938, "learning_rate": 9.547046775153285e-06, "loss": 0.0561, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 0.10219145566225052, "learning_rate": 9.54606607858706e-06, "loss": 0.0616, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 0.20997142791748047, "learning_rate": 9.545084371988743e-06, "loss": 0.0733, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 0.2924201786518097, "learning_rate": 9.54410165557644e-06, "loss": 0.0709, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.09145636111497879, "learning_rate": 9.543117929568497e-06, "loss": 0.0504, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 0.1567152738571167, "learning_rate": 9.542133194183469e-06, "loss": 0.0634, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 0.1257854551076889, "learning_rate": 9.541147449640145e-06, "loss": 0.0575, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 0.08621395379304886, "learning_rate": 9.540160696157532e-06, "loss": 0.0592, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 0.2199631631374359, "learning_rate": 9.539172933954867e-06, "loss": 0.0618, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 0.09417786449193954, "learning_rate": 9.538184163251608e-06, "loss": 0.0585, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 0.08583566546440125, "learning_rate": 9.537194384267436e-06, "loss": 0.0564, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 0.07352281361818314, "learning_rate": 9.536203597222259e-06, "loss": 0.0554, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 0.094137042760849, "learning_rate": 9.535211802336204e-06, "loss": 0.0546, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 0.09724165499210358, "learning_rate": 9.534218999829627e-06, "loss": 0.044, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 0.13249924778938293, "learning_rate": 9.533225189923107e-06, "loss": 0.063, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 0.08321166783571243, "learning_rate": 9.532230372837446e-06, "loss": 0.0503, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 0.25556933879852295, "learning_rate": 9.531234548793667e-06, "loss": 0.0873, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 0.14439603686332703, "learning_rate": 9.530237718013023e-06, "loss": 0.0607, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 0.10811512172222137, "learning_rate": 9.529239880716983e-06, "loss": 0.0585, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 0.13207733631134033, "learning_rate": 9.528241037127247e-06, "loss": 0.0715, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 0.13029232621192932, "learning_rate": 9.527241187465735e-06, "loss": 0.0581, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 0.2059054672718048, "learning_rate": 9.526240331954589e-06, "loss": 0.0728, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 0.08938279002904892, "learning_rate": 9.525238470816176e-06, "loss": 0.0606, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 0.09800935536623001, "learning_rate": 9.524235604273088e-06, "loss": 0.0564, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 0.12442838400602341, "learning_rate": 9.523231732548139e-06, "loss": 0.0612, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.11647076159715652, "learning_rate": 9.522226855864366e-06, "loss": 0.048, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.07078691571950912, "learning_rate": 9.521220974445032e-06, "loss": 0.0478, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 0.15142172574996948, "learning_rate": 9.520214088513616e-06, "loss": 0.0645, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 0.18732064962387085, "learning_rate": 9.519206198293828e-06, "loss": 0.0718, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 0.14921057224273682, "learning_rate": 9.5181973040096e-06, "loss": 0.058, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 0.11635854095220566, "learning_rate": 9.517187405885082e-06, "loss": 0.0508, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 0.08138604462146759, "learning_rate": 9.516176504144652e-06, "loss": 0.0623, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 0.12100403010845184, "learning_rate": 9.515164599012908e-06, "loss": 0.0609, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 0.25032225251197815, "learning_rate": 9.514151690714672e-06, "loss": 0.0689, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 0.12523317337036133, "learning_rate": 9.513137779474992e-06, "loss": 0.0589, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 0.11888711899518967, "learning_rate": 9.512122865519135e-06, "loss": 0.0484, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 0.08427634090185165, "learning_rate": 9.511106949072588e-06, "loss": 0.0532, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 0.21707645058631897, "learning_rate": 9.51009003036107e-06, "loss": 0.0496, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 0.08970598131418228, "learning_rate": 9.509072109610514e-06, "loss": 0.0647, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 0.1286509931087494, "learning_rate": 9.508053187047077e-06, "loss": 0.0531, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.15500234067440033, "learning_rate": 9.507033262897142e-06, "loss": 0.0572, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 0.1414647400379181, "learning_rate": 9.506012337387315e-06, "loss": 0.061, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 0.136396586894989, "learning_rate": 9.504990410744422e-06, "loss": 0.0743, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 0.11259236931800842, "learning_rate": 9.503967483195509e-06, "loss": 0.0545, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 0.1325119137763977, "learning_rate": 9.502943554967848e-06, "loss": 0.0497, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 0.1202562153339386, "learning_rate": 9.501918626288935e-06, "loss": 0.0537, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 0.13172058761119843, "learning_rate": 9.500892697386482e-06, "loss": 0.0691, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 0.12131015956401825, "learning_rate": 9.499865768488429e-06, "loss": 0.048, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 0.17954206466674805, "learning_rate": 9.498837839822936e-06, "loss": 0.0607, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 0.1619453877210617, "learning_rate": 9.497808911618385e-06, "loss": 0.0661, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 0.11165923625230789, "learning_rate": 9.496778984103381e-06, "loss": 0.058, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 0.11461926251649857, "learning_rate": 9.49574805750675e-06, "loss": 0.0628, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 0.2133544683456421, "learning_rate": 9.49471613205754e-06, "loss": 0.0709, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 0.09536061435937881, "learning_rate": 9.493683207985022e-06, "loss": 0.0617, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 0.2051803171634674, "learning_rate": 9.492649285518688e-06, "loss": 0.0716, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 0.15165308117866516, "learning_rate": 9.49161436488825e-06, "loss": 0.0668, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 0.19542625546455383, "learning_rate": 9.490578446323646e-06, "loss": 0.0618, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 0.07483123987913132, "learning_rate": 9.489541530055034e-06, "loss": 0.0497, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 0.14248552918434143, "learning_rate": 9.488503616312793e-06, "loss": 0.0569, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 0.08330897241830826, "learning_rate": 9.48746470532752e-06, "loss": 0.0665, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 0.09850907325744629, "learning_rate": 9.48642479733004e-06, "loss": 0.0656, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 0.13631866872310638, "learning_rate": 9.4853838925514e-06, "loss": 0.0531, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 0.2595193088054657, "learning_rate": 9.484341991222858e-06, "loss": 0.0643, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 0.11602319031953812, "learning_rate": 9.483299093575909e-06, "loss": 0.0595, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 0.1415349543094635, "learning_rate": 9.482255199842254e-06, "loss": 0.0746, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 0.08512642234563828, "learning_rate": 9.481210310253826e-06, "loss": 0.0591, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 0.1908259391784668, "learning_rate": 9.480164425042775e-06, "loss": 0.073, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.1279132217168808, "learning_rate": 9.479117544441472e-06, "loss": 0.0478, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 0.18936574459075928, "learning_rate": 9.47806966868251e-06, "loss": 0.0635, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.10035517066717148, "learning_rate": 9.477020797998707e-06, "loss": 0.0551, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.07415193319320679, "learning_rate": 9.47597093262309e-06, "loss": 0.0512, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 0.1455686241388321, "learning_rate": 9.474920072788925e-06, "loss": 0.0651, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.17697560787200928, "learning_rate": 9.47386821872968e-06, "loss": 0.06, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 0.09073685109615326, "learning_rate": 9.47281537067906e-06, "loss": 0.0517, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 0.21384583413600922, "learning_rate": 9.471761528870978e-06, "loss": 0.0543, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 0.0921383649110794, "learning_rate": 9.470706693539578e-06, "loss": 0.0623, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 0.12522783875465393, "learning_rate": 9.469650864919217e-06, "loss": 0.0661, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 0.14635710418224335, "learning_rate": 9.46859404324448e-06, "loss": 0.0566, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 0.26365572214126587, "learning_rate": 9.467536228750166e-06, "loss": 0.0605, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 0.07440594583749771, "learning_rate": 9.466477421671296e-06, "loss": 0.0591, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 0.12294812500476837, "learning_rate": 9.465417622243116e-06, "loss": 0.0542, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 0.20385755598545074, "learning_rate": 9.464356830701086e-06, "loss": 0.0525, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 0.2611122131347656, "learning_rate": 9.463295047280892e-06, "loss": 0.0668, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 0.2804173231124878, "learning_rate": 9.462232272218437e-06, "loss": 0.0763, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 0.1891596019268036, "learning_rate": 9.461168505749847e-06, "loss": 0.0623, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 0.12497933954000473, "learning_rate": 9.460103748111462e-06, "loss": 0.0521, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 0.17575328052043915, "learning_rate": 9.459037999539852e-06, "loss": 0.0554, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 0.14950904250144958, "learning_rate": 9.4579712602718e-06, "loss": 0.0752, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 0.07742664963006973, "learning_rate": 9.456903530544312e-06, "loss": 0.0632, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 0.10340191423892975, "learning_rate": 9.455834810594611e-06, "loss": 0.0601, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 0.09633906185626984, "learning_rate": 9.454765100660144e-06, "loss": 0.0634, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.09452580660581589, "learning_rate": 9.453694400978576e-06, "loss": 0.0548, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 0.08691687136888504, "learning_rate": 9.452622711787793e-06, "loss": 0.0593, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 0.11660002171993256, "learning_rate": 9.451550033325896e-06, "loss": 0.0658, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 0.12412439286708832, "learning_rate": 9.450476365831214e-06, "loss": 0.073, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.11085356771945953, "learning_rate": 9.449401709542289e-06, "loss": 0.0598, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 0.1361926794052124, "learning_rate": 9.448326064697886e-06, "loss": 0.0678, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 0.10671371966600418, "learning_rate": 9.447249431536987e-06, "loss": 0.0431, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 0.14991550147533417, "learning_rate": 9.446171810298799e-06, "loss": 0.0632, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 0.11107556521892548, "learning_rate": 9.44509320122274e-06, "loss": 0.0475, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 0.10754016041755676, "learning_rate": 9.444013604548457e-06, "loss": 0.0507, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 0.12343791872262955, "learning_rate": 9.442933020515808e-06, "loss": 0.0506, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 0.24561093747615814, "learning_rate": 9.441851449364878e-06, "loss": 0.0726, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 0.1866828203201294, "learning_rate": 9.440768891335962e-06, "loss": 0.066, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 0.10959124565124512, "learning_rate": 9.439685346669585e-06, "loss": 0.0615, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 0.14464472234249115, "learning_rate": 9.438600815606483e-06, "loss": 0.0665, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 0.11902739107608795, "learning_rate": 9.437515298387617e-06, "loss": 0.0587, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 0.3161785900592804, "learning_rate": 9.436428795254159e-06, "loss": 0.0727, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 0.12713360786437988, "learning_rate": 9.43534130644751e-06, "loss": 0.07, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 0.1509532779455185, "learning_rate": 9.43425283220928e-06, "loss": 0.0685, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 0.0729907900094986, "learning_rate": 9.43316337278131e-06, "loss": 0.0565, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 0.24018403887748718, "learning_rate": 9.432072928405648e-06, "loss": 0.0601, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 0.13655854761600494, "learning_rate": 9.430981499324567e-06, "loss": 0.0593, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 0.10212410241365433, "learning_rate": 9.429889085780559e-06, "loss": 0.0584, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 0.09244275838136673, "learning_rate": 9.42879568801633e-06, "loss": 0.0493, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 0.11833977699279785, "learning_rate": 9.427701306274812e-06, "loss": 0.0668, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 0.16042114794254303, "learning_rate": 9.42660594079915e-06, "loss": 0.0642, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 0.11916590481996536, "learning_rate": 9.42550959183271e-06, "loss": 0.0691, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 0.11488353461027145, "learning_rate": 9.424412259619073e-06, "loss": 0.0574, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 0.12557260692119598, "learning_rate": 9.423313944402043e-06, "loss": 0.0563, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 0.14531689882278442, "learning_rate": 9.422214646425641e-06, "loss": 0.0677, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 0.15759357810020447, "learning_rate": 9.421114365934105e-06, "loss": 0.0718, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 0.29660260677337646, "learning_rate": 9.420013103171893e-06, "loss": 0.0653, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 0.09138604998588562, "learning_rate": 9.418910858383681e-06, "loss": 0.052, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 0.12408078461885452, "learning_rate": 9.41780763181436e-06, "loss": 0.0651, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 0.08117390424013138, "learning_rate": 9.416703423709044e-06, "loss": 0.059, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 0.07926711440086365, "learning_rate": 9.415598234313064e-06, "loss": 0.0579, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 0.08162106573581696, "learning_rate": 9.414492063871964e-06, "loss": 0.0578, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 0.0782177522778511, "learning_rate": 9.413384912631512e-06, "loss": 0.0631, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 0.13579177856445312, "learning_rate": 9.412276780837692e-06, "loss": 0.0655, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 0.12877385318279266, "learning_rate": 9.411167668736707e-06, "loss": 0.0607, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 0.1385236382484436, "learning_rate": 9.410057576574974e-06, "loss": 0.0683, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 0.0901247039437294, "learning_rate": 9.408946504599131e-06, "loss": 0.0701, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 0.10341189801692963, "learning_rate": 9.40783445305603e-06, "loss": 0.067, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 0.15986619889736176, "learning_rate": 9.406721422192748e-06, "loss": 0.0596, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 0.1283135712146759, "learning_rate": 9.405607412256573e-06, "loss": 0.0467, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 0.16939795017242432, "learning_rate": 9.404492423495012e-06, "loss": 0.0702, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 0.14660269021987915, "learning_rate": 9.403376456155792e-06, "loss": 0.0642, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 0.33365267515182495, "learning_rate": 9.402259510486855e-06, "loss": 0.0716, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 0.09654317051172256, "learning_rate": 9.401141586736359e-06, "loss": 0.0547, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 0.14885520935058594, "learning_rate": 9.400022685152683e-06, "loss": 0.0806, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 0.15750113129615784, "learning_rate": 9.398902805984417e-06, "loss": 0.0722, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 0.13797684013843536, "learning_rate": 9.397781949480381e-06, "loss": 0.0678, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 0.21764962375164032, "learning_rate": 9.396660115889596e-06, "loss": 0.0711, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 0.1643546223640442, "learning_rate": 9.395537305461312e-06, "loss": 0.0692, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 0.0943455919623375, "learning_rate": 9.394413518444989e-06, "loss": 0.0506, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 0.10683037340641022, "learning_rate": 9.39328875509031e-06, "loss": 0.0625, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 0.07272457331418991, "learning_rate": 9.39216301564717e-06, "loss": 0.0655, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 0.11120195686817169, "learning_rate": 9.391036300365681e-06, "loss": 0.0484, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 0.13768555223941803, "learning_rate": 9.389908609496177e-06, "loss": 0.0577, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 0.07694307714700699, "learning_rate": 9.388779943289204e-06, "loss": 0.0583, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 0.13914568722248077, "learning_rate": 9.387650301995523e-06, "loss": 0.057, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 0.09015458822250366, "learning_rate": 9.386519685866117e-06, "loss": 0.0561, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 0.14204317331314087, "learning_rate": 9.385388095152184e-06, "loss": 0.0575, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 0.42456185817718506, "learning_rate": 9.384255530105136e-06, "loss": 0.0824, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 0.10293526947498322, "learning_rate": 9.383121990976602e-06, "loss": 0.0567, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 0.13045576214790344, "learning_rate": 9.381987478018431e-06, "loss": 0.0612, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 0.5087506175041199, "learning_rate": 9.380851991482685e-06, "loss": 0.0825, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 0.17060281336307526, "learning_rate": 9.379715531621642e-06, "loss": 0.0725, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 0.19170060753822327, "learning_rate": 9.3785780986878e-06, "loss": 0.0655, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 0.14923259615898132, "learning_rate": 9.377439692933869e-06, "loss": 0.053, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 0.15345416963100433, "learning_rate": 9.376300314612775e-06, "loss": 0.045, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 0.13442835211753845, "learning_rate": 9.375159963977668e-06, "loss": 0.0704, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 0.07893019169569016, "learning_rate": 9.374018641281898e-06, "loss": 0.0584, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 0.0838099792599678, "learning_rate": 9.37287634677905e-06, "loss": 0.0626, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 0.11747212707996368, "learning_rate": 9.371733080722911e-06, "loss": 0.0505, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 0.18348950147628784, "learning_rate": 9.37058884336749e-06, "loss": 0.0703, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 0.09542372077703476, "learning_rate": 9.36944363496701e-06, "loss": 0.0585, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 0.10547519475221634, "learning_rate": 9.368297455775911e-06, "loss": 0.0585, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 0.12005060166120529, "learning_rate": 9.367150306048847e-06, "loss": 0.061, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 0.08109734207391739, "learning_rate": 9.36600218604069e-06, "loss": 0.0615, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 0.0935017317533493, "learning_rate": 9.364853096006523e-06, "loss": 0.0679, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 0.10872925817966461, "learning_rate": 9.36370303620165e-06, "loss": 0.0624, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 0.14083994925022125, "learning_rate": 9.362552006881588e-06, "loss": 0.063, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 0.11629628390073776, "learning_rate": 9.361400008302068e-06, "loss": 0.0649, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.10147950798273087, "learning_rate": 9.36024704071904e-06, "loss": 0.0525, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 0.15648336708545685, "learning_rate": 9.359093104388663e-06, "loss": 0.0582, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 0.17359381914138794, "learning_rate": 9.35793819956732e-06, "loss": 0.0649, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 0.15265420079231262, "learning_rate": 9.356782326511602e-06, "loss": 0.0777, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 0.1748921126127243, "learning_rate": 9.355625485478319e-06, "loss": 0.0677, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 0.10682426393032074, "learning_rate": 9.354467676724491e-06, "loss": 0.0586, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 0.20872528851032257, "learning_rate": 9.353308900507361e-06, "loss": 0.0657, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 0.16573700308799744, "learning_rate": 9.352149157084383e-06, "loss": 0.0659, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 0.09846627712249756, "learning_rate": 9.350988446713221e-06, "loss": 0.0626, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 0.06702626496553421, "learning_rate": 9.349826769651762e-06, "loss": 0.0493, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 0.1739048808813095, "learning_rate": 9.348664126158103e-06, "loss": 0.0634, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 0.1795676201581955, "learning_rate": 9.347500516490555e-06, "loss": 0.0556, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 0.06715057045221329, "learning_rate": 9.346335940907648e-06, "loss": 0.0567, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 0.116844542324543, "learning_rate": 9.345170399668127e-06, "loss": 0.0567, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.11537214368581772, "learning_rate": 9.344003893030942e-06, "loss": 0.0448, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 0.11708736419677734, "learning_rate": 9.342836421255268e-06, "loss": 0.0601, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 0.10845532268285751, "learning_rate": 9.341667984600489e-06, "loss": 0.0604, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 0.11052821576595306, "learning_rate": 9.340498583326208e-06, "loss": 0.0691, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.16587680578231812, "learning_rate": 9.339328217692233e-06, "loss": 0.0527, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 0.14331631362438202, "learning_rate": 9.3381568879586e-06, "loss": 0.0524, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 0.10715391486883163, "learning_rate": 9.336984594385547e-06, "loss": 0.0591, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 0.0779247060418129, "learning_rate": 9.335811337233533e-06, "loss": 0.0534, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 0.289474219083786, "learning_rate": 9.334637116763227e-06, "loss": 0.0606, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 0.31116312742233276, "learning_rate": 9.333461933235517e-06, "loss": 0.0724, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 0.12387705594301224, "learning_rate": 9.332285786911498e-06, "loss": 0.0568, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 0.12403843551874161, "learning_rate": 9.331108678052485e-06, "loss": 0.0589, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 0.12096072733402252, "learning_rate": 9.329930606920005e-06, "loss": 0.078, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 0.1466527134180069, "learning_rate": 9.3287515737758e-06, "loss": 0.0505, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.1464054137468338, "learning_rate": 9.32757157888182e-06, "loss": 0.0558, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 0.13454656302928925, "learning_rate": 9.326390622500236e-06, "loss": 0.0541, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 0.09741924703121185, "learning_rate": 9.32520870489343e-06, "loss": 0.0529, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 0.14203445613384247, "learning_rate": 9.324025826323995e-06, "loss": 0.0591, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 0.0787510871887207, "learning_rate": 9.322841987054741e-06, "loss": 0.0533, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 0.1578119695186615, "learning_rate": 9.321657187348689e-06, "loss": 0.0561, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 0.1769454926252365, "learning_rate": 9.320471427469076e-06, "loss": 0.0626, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 0.07388993352651596, "learning_rate": 9.319284707679348e-06, "loss": 0.0674, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 0.16025005280971527, "learning_rate": 9.31809702824317e-06, "loss": 0.0701, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 0.13991519808769226, "learning_rate": 9.316908389424416e-06, "loss": 0.0682, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 0.252258837223053, "learning_rate": 9.315718791487175e-06, "loss": 0.0797, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 0.13949467241764069, "learning_rate": 9.314528234695747e-06, "loss": 0.0576, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 0.2147936224937439, "learning_rate": 9.31333671931465e-06, "loss": 0.0583, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 0.20353902876377106, "learning_rate": 9.312144245608608e-06, "loss": 0.0743, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 0.28502485156059265, "learning_rate": 9.31095081384256e-06, "loss": 0.0735, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 0.10752645879983902, "learning_rate": 9.309756424281664e-06, "loss": 0.0615, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 0.1732015162706375, "learning_rate": 9.308561077191284e-06, "loss": 0.0819, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.15058422088623047, "learning_rate": 9.307364772837e-06, "loss": 0.0621, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 0.11341333389282227, "learning_rate": 9.306167511484601e-06, "loss": 0.053, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 0.1096411719918251, "learning_rate": 9.304969293400092e-06, "loss": 0.0547, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 0.13581228256225586, "learning_rate": 9.303770118849692e-06, "loss": 0.0588, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 0.11425590515136719, "learning_rate": 9.302569988099825e-06, "loss": 0.0621, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 0.09570146352052689, "learning_rate": 9.301368901417138e-06, "loss": 0.0731, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 0.07544465363025665, "learning_rate": 9.300166859068482e-06, "loss": 0.0546, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 0.13831138610839844, "learning_rate": 9.298963861320927e-06, "loss": 0.0588, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 0.1632225662469864, "learning_rate": 9.297759908441747e-06, "loss": 0.0653, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 0.15194888412952423, "learning_rate": 9.296555000698435e-06, "loss": 0.0529, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 0.09923337399959564, "learning_rate": 9.295349138358693e-06, "loss": 0.0569, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 0.12262725830078125, "learning_rate": 9.294142321690438e-06, "loss": 0.0639, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 0.12006746232509613, "learning_rate": 9.292934550961796e-06, "loss": 0.0679, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 0.1334952712059021, "learning_rate": 9.291725826441107e-06, "loss": 0.0666, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 0.10752203315496445, "learning_rate": 9.29051614839692e-06, "loss": 0.0692, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 0.0950808897614479, "learning_rate": 9.289305517098e-06, "loss": 0.0555, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 0.07343694567680359, "learning_rate": 9.28809393281332e-06, "loss": 0.0581, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 0.07718537002801895, "learning_rate": 9.286881395812066e-06, "loss": 0.0615, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 0.10060244053602219, "learning_rate": 9.285667906363637e-06, "loss": 0.0575, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 0.11807809770107269, "learning_rate": 9.284453464737644e-06, "loss": 0.0531, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 0.07758715003728867, "learning_rate": 9.283238071203907e-06, "loss": 0.0687, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 0.07013069093227386, "learning_rate": 9.282021726032457e-06, "loss": 0.0457, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 0.12863537669181824, "learning_rate": 9.280804429493542e-06, "loss": 0.057, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 0.07350098341703415, "learning_rate": 9.279586181857613e-06, "loss": 0.05, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 0.10295460373163223, "learning_rate": 9.278366983395341e-06, "loss": 0.0614, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 0.1512940376996994, "learning_rate": 9.277146834377601e-06, "loss": 0.064, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 0.095369353890419, "learning_rate": 9.275925735075484e-06, "loss": 0.062, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.1113160103559494, "learning_rate": 9.274703685760287e-06, "loss": 0.0582, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 0.1665259152650833, "learning_rate": 9.273480686703526e-06, "loss": 0.0684, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.18092504143714905, "learning_rate": 9.272256738176924e-06, "loss": 0.0642, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 0.2820509374141693, "learning_rate": 9.271031840452409e-06, "loss": 0.0767, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 0.07562088966369629, "learning_rate": 9.26980599380213e-06, "loss": 0.0579, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 0.10569064319133759, "learning_rate": 9.268579198498438e-06, "loss": 0.0703, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 0.19317372143268585, "learning_rate": 9.267351454813904e-06, "loss": 0.0662, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 0.09313713759183884, "learning_rate": 9.266122763021302e-06, "loss": 0.0583, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 0.11143337190151215, "learning_rate": 9.264893123393618e-06, "loss": 0.0655, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 0.12702536582946777, "learning_rate": 9.26366253620405e-06, "loss": 0.0656, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 0.10285069048404694, "learning_rate": 9.26243100172601e-06, "loss": 0.0634, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 0.1048772782087326, "learning_rate": 9.261198520233113e-06, "loss": 0.0564, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 0.08698191493749619, "learning_rate": 9.25996509199919e-06, "loss": 0.0606, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 0.08804859966039658, "learning_rate": 9.258730717298281e-06, "loss": 0.0492, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 0.11674980074167252, "learning_rate": 9.257495396404635e-06, "loss": 0.0665, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 0.09284793585538864, "learning_rate": 9.256259129592711e-06, "loss": 0.0576, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 0.09265895932912827, "learning_rate": 9.255021917137181e-06, "loss": 0.0523, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 0.10842230170965195, "learning_rate": 9.253783759312924e-06, "loss": 0.0618, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 0.0958453118801117, "learning_rate": 9.252544656395033e-06, "loss": 0.0584, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 0.16776596009731293, "learning_rate": 9.251304608658806e-06, "loss": 0.0562, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 0.11746557801961899, "learning_rate": 9.250063616379754e-06, "loss": 0.0763, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 0.14839543402194977, "learning_rate": 9.248821679833596e-06, "loss": 0.065, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 0.07950638979673386, "learning_rate": 9.247578799296263e-06, "loss": 0.062, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 0.1771143227815628, "learning_rate": 9.246334975043896e-06, "loss": 0.0597, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 0.22173507511615753, "learning_rate": 9.245090207352842e-06, "loss": 0.0631, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 0.1140032708644867, "learning_rate": 9.243844496499661e-06, "loss": 0.0545, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 0.21627014875411987, "learning_rate": 9.242597842761123e-06, "loss": 0.0743, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 0.12088041007518768, "learning_rate": 9.241350246414203e-06, "loss": 0.0526, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 0.1085529699921608, "learning_rate": 9.24010170773609e-06, "loss": 0.0658, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 0.13032349944114685, "learning_rate": 9.23885222700418e-06, "loss": 0.0582, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 0.14116045832633972, "learning_rate": 9.237601804496081e-06, "loss": 0.0622, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 0.11442182958126068, "learning_rate": 9.236350440489608e-06, "loss": 0.0677, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 0.14581190049648285, "learning_rate": 9.235098135262783e-06, "loss": 0.0705, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 0.19386470317840576, "learning_rate": 9.233844889093842e-06, "loss": 0.0629, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 0.16168266534805298, "learning_rate": 9.232590702261227e-06, "loss": 0.0582, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 0.10662825405597687, "learning_rate": 9.23133557504359e-06, "loss": 0.0505, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 0.0754404291510582, "learning_rate": 9.23007950771979e-06, "loss": 0.0634, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 0.13538742065429688, "learning_rate": 9.228822500568898e-06, "loss": 0.0651, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 0.09469803422689438, "learning_rate": 9.227564553870192e-06, "loss": 0.0541, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 0.16042697429656982, "learning_rate": 9.226305667903159e-06, "loss": 0.0653, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 0.15044525265693665, "learning_rate": 9.225045842947496e-06, "loss": 0.0677, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 0.11127857863903046, "learning_rate": 9.223785079283106e-06, "loss": 0.0502, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 0.06274158507585526, "learning_rate": 9.2225233771901e-06, "loss": 0.0507, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 0.0686018317937851, "learning_rate": 9.221260736948803e-06, "loss": 0.0532, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 0.10446065664291382, "learning_rate": 9.219997158839743e-06, "loss": 0.0655, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 0.14388935267925262, "learning_rate": 9.21873264314366e-06, "loss": 0.0602, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.1322965919971466, "learning_rate": 9.217467190141498e-06, "loss": 0.0446, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 0.11740697175264359, "learning_rate": 9.216200800114412e-06, "loss": 0.0595, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 0.10362280160188675, "learning_rate": 9.214933473343765e-06, "loss": 0.0643, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 0.14969000220298767, "learning_rate": 9.213665210111131e-06, "loss": 0.0557, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 0.10951008647680283, "learning_rate": 9.212396010698286e-06, "loss": 0.0573, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.19546303153038025, "learning_rate": 9.211125875387217e-06, "loss": 0.0564, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 0.12294816225767136, "learning_rate": 9.209854804460121e-06, "loss": 0.053, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 0.1626112163066864, "learning_rate": 9.208582798199402e-06, "loss": 0.0775, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 0.14537742733955383, "learning_rate": 9.207309856887664e-06, "loss": 0.0513, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 0.18365225195884705, "learning_rate": 9.206035980807734e-06, "loss": 0.0593, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 0.31009215116500854, "learning_rate": 9.204761170242635e-06, "loss": 0.0728, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 0.0789821520447731, "learning_rate": 9.203485425475598e-06, "loss": 0.0474, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 0.09666991233825684, "learning_rate": 9.202208746790069e-06, "loss": 0.0587, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 0.11696695536375046, "learning_rate": 9.200931134469692e-06, "loss": 0.048, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 0.18829213082790375, "learning_rate": 9.199652588798327e-06, "loss": 0.0633, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 0.19135211408138275, "learning_rate": 9.198373110060037e-06, "loss": 0.0713, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 0.06174227595329285, "learning_rate": 9.197092698539092e-06, "loss": 0.051, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 0.1394706815481186, "learning_rate": 9.19581135451997e-06, "loss": 0.0588, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 0.2464209496974945, "learning_rate": 9.194529078287358e-06, "loss": 0.0668, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 0.22161677479743958, "learning_rate": 9.193245870126147e-06, "loss": 0.0566, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 0.12351313978433609, "learning_rate": 9.191961730321437e-06, "loss": 0.057, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 0.0860113799571991, "learning_rate": 9.190676659158535e-06, "loss": 0.0591, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 0.07104422897100449, "learning_rate": 9.189390656922955e-06, "loss": 0.0539, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 0.1298554539680481, "learning_rate": 9.188103723900414e-06, "loss": 0.0557, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 0.13107702136039734, "learning_rate": 9.186815860376843e-06, "loss": 0.0572, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 0.07559025287628174, "learning_rate": 9.185527066638375e-06, "loss": 0.057, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 0.1431111842393875, "learning_rate": 9.184237342971349e-06, "loss": 0.0669, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 0.0811234638094902, "learning_rate": 9.182946689662314e-06, "loss": 0.0519, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 0.09488983452320099, "learning_rate": 9.181655106998023e-06, "loss": 0.0674, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.0942876785993576, "learning_rate": 9.180362595265435e-06, "loss": 0.0565, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 0.12844541668891907, "learning_rate": 9.179069154751718e-06, "loss": 0.0608, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 0.0766405388712883, "learning_rate": 9.177774785744245e-06, "loss": 0.0571, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 0.17382246255874634, "learning_rate": 9.176479488530594e-06, "loss": 0.0575, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 0.23486676812171936, "learning_rate": 9.175183263398553e-06, "loss": 0.0706, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 0.062324732542037964, "learning_rate": 9.17388611063611e-06, "loss": 0.0419, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 0.08726660162210464, "learning_rate": 9.172588030531467e-06, "loss": 0.0607, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 0.16227032244205475, "learning_rate": 9.171289023373022e-06, "loss": 0.0513, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 0.09110695123672485, "learning_rate": 9.16998908944939e-06, "loss": 0.0602, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 0.1226256787776947, "learning_rate": 9.168688229049386e-06, "loss": 0.0589, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 0.06561797857284546, "learning_rate": 9.167386442462029e-06, "loss": 0.0566, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 0.23880644142627716, "learning_rate": 9.166083729976547e-06, "loss": 0.0642, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 0.13925901055335999, "learning_rate": 9.164780091882374e-06, "loss": 0.048, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 0.1151660829782486, "learning_rate": 9.163475528469148e-06, "loss": 0.0548, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 0.09660404920578003, "learning_rate": 9.162170040026714e-06, "loss": 0.0492, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 0.1655428558588028, "learning_rate": 9.16086362684512e-06, "loss": 0.0603, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 0.11988646537065506, "learning_rate": 9.159556289214623e-06, "loss": 0.0602, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 0.1339820772409439, "learning_rate": 9.158248027425683e-06, "loss": 0.0547, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 0.1298798769712448, "learning_rate": 9.156938841768965e-06, "loss": 0.058, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 0.10734289139509201, "learning_rate": 9.155628732535342e-06, "loss": 0.063, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 0.07164137810468674, "learning_rate": 9.15431770001589e-06, "loss": 0.0503, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 0.12122996151447296, "learning_rate": 9.153005744501886e-06, "loss": 0.0667, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 0.14725837111473083, "learning_rate": 9.151692866284824e-06, "loss": 0.0677, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.08587142825126648, "learning_rate": 9.150379065656389e-06, "loss": 0.0522, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 0.11939387023448944, "learning_rate": 9.149064342908482e-06, "loss": 0.0621, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 0.1315840184688568, "learning_rate": 9.147748698333203e-06, "loss": 0.079, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 0.106773242354393, "learning_rate": 9.146432132222858e-06, "loss": 0.0539, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 0.09244965016841888, "learning_rate": 9.145114644869957e-06, "loss": 0.0553, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 0.14347238838672638, "learning_rate": 9.143796236567218e-06, "loss": 0.0699, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 0.1348712295293808, "learning_rate": 9.142476907607558e-06, "loss": 0.0574, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 0.135457843542099, "learning_rate": 9.141156658284104e-06, "loss": 0.0726, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 0.10221424698829651, "learning_rate": 9.139835488890186e-06, "loss": 0.0601, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 0.08817987889051437, "learning_rate": 9.138513399719335e-06, "loss": 0.0633, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 0.14025245606899261, "learning_rate": 9.13719039106529e-06, "loss": 0.0687, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 0.14417782425880432, "learning_rate": 9.135866463221994e-06, "loss": 0.0663, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 0.13010068237781525, "learning_rate": 9.134541616483594e-06, "loss": 0.0584, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 0.10084611177444458, "learning_rate": 9.13321585114444e-06, "loss": 0.0542, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 0.07973722368478775, "learning_rate": 9.131889167499086e-06, "loss": 0.0501, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 0.08521132171154022, "learning_rate": 9.130561565842293e-06, "loss": 0.0565, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 0.08393573015928268, "learning_rate": 9.129233046469021e-06, "loss": 0.0705, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 0.07507529109716415, "learning_rate": 9.12790360967444e-06, "loss": 0.0519, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 0.08360160887241364, "learning_rate": 9.126573255753917e-06, "loss": 0.0579, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 0.15243670344352722, "learning_rate": 9.125241985003028e-06, "loss": 0.0678, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 0.13643868267536163, "learning_rate": 9.123909797717551e-06, "loss": 0.0671, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 0.1167929619550705, "learning_rate": 9.122576694193467e-06, "loss": 0.0664, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 0.21806983649730682, "learning_rate": 9.121242674726962e-06, "loss": 0.0702, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 0.14250212907791138, "learning_rate": 9.119907739614424e-06, "loss": 0.0638, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 0.1117790937423706, "learning_rate": 9.118571889152445e-06, "loss": 0.0652, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 0.13548235595226288, "learning_rate": 9.117235123637822e-06, "loss": 0.0601, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.20711949467658997, "learning_rate": 9.115897443367552e-06, "loss": 0.0591, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 0.1533740907907486, "learning_rate": 9.114558848638836e-06, "loss": 0.0475, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.08011586219072342, "learning_rate": 9.113219339749084e-06, "loss": 0.062, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 0.2554720640182495, "learning_rate": 9.1118789169959e-06, "loss": 0.0752, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 0.10738785564899445, "learning_rate": 9.110537580677094e-06, "loss": 0.0553, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 0.09777938574552536, "learning_rate": 9.109195331090685e-06, "loss": 0.0594, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 0.09753333032131195, "learning_rate": 9.10785216853489e-06, "loss": 0.0556, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.08128040283918381, "learning_rate": 9.106508093308123e-06, "loss": 0.0477, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 0.1463455855846405, "learning_rate": 9.105163105709011e-06, "loss": 0.0497, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 0.0889294445514679, "learning_rate": 9.103817206036383e-06, "loss": 0.0674, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 0.14642231166362762, "learning_rate": 9.10247039458926e-06, "loss": 0.0507, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 0.1605082005262375, "learning_rate": 9.101122671666878e-06, "loss": 0.0593, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 0.11761059612035751, "learning_rate": 9.09977403756867e-06, "loss": 0.0624, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.11591097712516785, "learning_rate": 9.098424492594268e-06, "loss": 0.0597, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 0.07802049815654755, "learning_rate": 9.097074037043512e-06, "loss": 0.0567, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 0.10196822881698608, "learning_rate": 9.095722671216443e-06, "loss": 0.0575, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 0.11805730313062668, "learning_rate": 9.094370395413306e-06, "loss": 0.0722, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 0.10176362097263336, "learning_rate": 9.09301720993454e-06, "loss": 0.0614, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 0.13527430593967438, "learning_rate": 9.091663115080797e-06, "loss": 0.0603, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 0.0966874435544014, "learning_rate": 9.090308111152924e-06, "loss": 0.0582, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 0.1440831571817398, "learning_rate": 9.08895219845197e-06, "loss": 0.0735, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 0.10074283927679062, "learning_rate": 9.087595377279192e-06, "loss": 0.0593, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 0.08768688887357712, "learning_rate": 9.086237647936043e-06, "loss": 0.0564, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 0.12570731341838837, "learning_rate": 9.084879010724177e-06, "loss": 0.0528, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 0.13290341198444366, "learning_rate": 9.083519465945456e-06, "loss": 0.0773, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 0.09008724987506866, "learning_rate": 9.082159013901937e-06, "loss": 0.0584, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 0.22026807069778442, "learning_rate": 9.080797654895883e-06, "loss": 0.0674, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 0.2918842136859894, "learning_rate": 9.079435389229755e-06, "loss": 0.0707, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 0.07988253235816956, "learning_rate": 9.07807221720622e-06, "loss": 0.052, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 0.06601937860250473, "learning_rate": 9.07670813912814e-06, "loss": 0.0518, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 0.08175086975097656, "learning_rate": 9.075343155298589e-06, "loss": 0.0601, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 0.0858030840754509, "learning_rate": 9.073977266020826e-06, "loss": 0.055, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 0.1396360844373703, "learning_rate": 9.072610471598327e-06, "loss": 0.0619, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 0.116581991314888, "learning_rate": 9.07124277233476e-06, "loss": 0.0586, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 0.16538318991661072, "learning_rate": 9.069874168533996e-06, "loss": 0.0591, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 0.10086296498775482, "learning_rate": 9.068504660500111e-06, "loss": 0.0567, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 0.0772620216012001, "learning_rate": 9.067134248537374e-06, "loss": 0.0558, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 0.10259094834327698, "learning_rate": 9.065762932950262e-06, "loss": 0.0588, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 0.09373628348112106, "learning_rate": 9.06439071404345e-06, "loss": 0.0554, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 0.08430735766887665, "learning_rate": 9.063017592121812e-06, "loss": 0.0575, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 0.12337193638086319, "learning_rate": 9.061643567490425e-06, "loss": 0.0636, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 0.19271771609783173, "learning_rate": 9.060268640454565e-06, "loss": 0.0552, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 0.1024722084403038, "learning_rate": 9.058892811319713e-06, "loss": 0.0641, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 0.0901414081454277, "learning_rate": 9.057516080391544e-06, "loss": 0.0561, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 0.11979672312736511, "learning_rate": 9.056138447975936e-06, "loss": 0.0598, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 0.09142925590276718, "learning_rate": 9.05475991437897e-06, "loss": 0.0473, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 0.1570976972579956, "learning_rate": 9.053380479906919e-06, "loss": 0.0645, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 0.08913545310497284, "learning_rate": 9.052000144866269e-06, "loss": 0.0589, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 0.10123582184314728, "learning_rate": 9.050618909563693e-06, "loss": 0.0644, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.22603657841682434, "learning_rate": 9.049236774306073e-06, "loss": 0.0575, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 0.11336732655763626, "learning_rate": 9.04785373940049e-06, "loss": 0.0645, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 0.12874531745910645, "learning_rate": 9.046469805154218e-06, "loss": 0.0627, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 0.1004297211766243, "learning_rate": 9.045084971874738e-06, "loss": 0.0598, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 0.0908496230840683, "learning_rate": 9.043699239869727e-06, "loss": 0.0546, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 0.17856816947460175, "learning_rate": 9.042312609447066e-06, "loss": 0.0639, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 0.08339660614728928, "learning_rate": 9.040925080914832e-06, "loss": 0.0542, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 0.15772897005081177, "learning_rate": 9.039536654581297e-06, "loss": 0.049, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 0.09835033118724823, "learning_rate": 9.038147330754944e-06, "loss": 0.0566, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 0.09093257039785385, "learning_rate": 9.036757109744447e-06, "loss": 0.065, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 0.09400083124637604, "learning_rate": 9.035365991858679e-06, "loss": 0.0633, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 0.1803499460220337, "learning_rate": 9.033973977406718e-06, "loss": 0.0632, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 0.07193783670663834, "learning_rate": 9.032581066697836e-06, "loss": 0.0483, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 0.10995186865329742, "learning_rate": 9.031187260041505e-06, "loss": 0.0645, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 0.21461914479732513, "learning_rate": 9.0297925577474e-06, "loss": 0.0652, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 0.1334448605775833, "learning_rate": 9.028396960125392e-06, "loss": 0.073, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 0.2770049273967743, "learning_rate": 9.027000467485547e-06, "loss": 0.0755, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 0.24150621891021729, "learning_rate": 9.025603080138136e-06, "loss": 0.0543, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 0.12542055547237396, "learning_rate": 9.024204798393627e-06, "loss": 0.0583, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 0.09790155291557312, "learning_rate": 9.022805622562687e-06, "loss": 0.0607, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.23965518176555634, "learning_rate": 9.02140555295618e-06, "loss": 0.0602, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 0.13574010133743286, "learning_rate": 9.020004589885167e-06, "loss": 0.0625, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 0.07548405975103378, "learning_rate": 9.018602733660915e-06, "loss": 0.0559, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.08395164459943771, "learning_rate": 9.01719998459488e-06, "loss": 0.0553, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.09370497614145279, "learning_rate": 9.015796342998724e-06, "loss": 0.0516, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 0.18416453897953033, "learning_rate": 9.014391809184302e-06, "loss": 0.0631, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 0.06106359139084816, "learning_rate": 9.01298638346367e-06, "loss": 0.0432, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 0.10220681875944138, "learning_rate": 9.011580066149081e-06, "loss": 0.0628, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 0.20663969218730927, "learning_rate": 9.010172857552989e-06, "loss": 0.0672, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 0.15281496942043304, "learning_rate": 9.008764757988042e-06, "loss": 0.0557, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 0.10622524470090866, "learning_rate": 9.007355767767085e-06, "loss": 0.062, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 0.18840238451957703, "learning_rate": 9.005945887203167e-06, "loss": 0.0663, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 0.08051823824644089, "learning_rate": 9.004535116609532e-06, "loss": 0.0532, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 0.09038352221250534, "learning_rate": 9.003123456299617e-06, "loss": 0.0646, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 0.09870997071266174, "learning_rate": 9.001710906587064e-06, "loss": 0.0609, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 0.07672318816184998, "learning_rate": 9.000297467785708e-06, "loss": 0.0498, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 0.19151458144187927, "learning_rate": 8.998883140209582e-06, "loss": 0.0669, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 0.1176842674612999, "learning_rate": 8.99746792417292e-06, "loss": 0.0636, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 0.2278236299753189, "learning_rate": 8.996051819990148e-06, "loss": 0.0646, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 0.10333704203367233, "learning_rate": 8.994634827975892e-06, "loss": 0.0518, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 0.21999262273311615, "learning_rate": 8.993216948444978e-06, "loss": 0.0674, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 0.11548788100481033, "learning_rate": 8.991798181712423e-06, "loss": 0.0532, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 0.10428768396377563, "learning_rate": 8.99037852809345e-06, "loss": 0.0608, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.06850625574588776, "learning_rate": 8.988957987903467e-06, "loss": 0.0569, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 0.14123603701591492, "learning_rate": 8.987536561458088e-06, "loss": 0.0555, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 0.07783903926610947, "learning_rate": 8.986114249073122e-06, "loss": 0.0599, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 0.10077962279319763, "learning_rate": 8.984691051064576e-06, "loss": 0.0565, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 0.14446553587913513, "learning_rate": 8.98326696774865e-06, "loss": 0.0649, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 0.07862744480371475, "learning_rate": 8.981841999441743e-06, "loss": 0.0499, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 0.1019892767071724, "learning_rate": 8.980416146460452e-06, "loss": 0.0567, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 0.15698666870594025, "learning_rate": 8.978989409121565e-06, "loss": 0.0599, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.07246425747871399, "learning_rate": 8.977561787742074e-06, "loss": 0.045, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 0.14864228665828705, "learning_rate": 8.976133282639166e-06, "loss": 0.0503, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 0.2542366087436676, "learning_rate": 8.974703894130218e-06, "loss": 0.066, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 0.19191670417785645, "learning_rate": 8.973273622532806e-06, "loss": 0.0734, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 0.2263438105583191, "learning_rate": 8.97184246816471e-06, "loss": 0.0644, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 0.1328941285610199, "learning_rate": 8.970410431343892e-06, "loss": 0.0696, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.10161998122930527, "learning_rate": 8.968977512388524e-06, "loss": 0.06, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 0.24135875701904297, "learning_rate": 8.967543711616968e-06, "loss": 0.0733, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 0.06619159877300262, "learning_rate": 8.966109029347777e-06, "loss": 0.0515, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.07188434153795242, "learning_rate": 8.96467346589971e-06, "loss": 0.0482, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 0.12862654030323029, "learning_rate": 8.963237021591714e-06, "loss": 0.0603, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.06714965403079987, "learning_rate": 8.961799696742933e-06, "loss": 0.0609, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 0.1190795749425888, "learning_rate": 8.960361491672708e-06, "loss": 0.0578, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 0.09838184714317322, "learning_rate": 8.958922406700578e-06, "loss": 0.0558, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 0.13587163388729095, "learning_rate": 8.957482442146271e-06, "loss": 0.0567, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.09172370284795761, "learning_rate": 8.956041598329716e-06, "loss": 0.059, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 0.09530129283666611, "learning_rate": 8.954599875571039e-06, "loss": 0.0572, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 0.06439581513404846, "learning_rate": 8.953157274190552e-06, "loss": 0.0579, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 0.08595748990774155, "learning_rate": 8.951713794508771e-06, "loss": 0.0509, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 0.09370198845863342, "learning_rate": 8.950269436846405e-06, "loss": 0.0477, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 0.10823030024766922, "learning_rate": 8.948824201524355e-06, "loss": 0.054, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 0.11652180552482605, "learning_rate": 8.947378088863722e-06, "loss": 0.0627, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 0.18081149458885193, "learning_rate": 8.945931099185798e-06, "loss": 0.0718, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 0.12070924043655396, "learning_rate": 8.94448323281207e-06, "loss": 0.0683, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 0.12397658824920654, "learning_rate": 8.943034490064222e-06, "loss": 0.0453, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.07914320379495621, "learning_rate": 8.941584871264131e-06, "loss": 0.0431, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 0.0895942822098732, "learning_rate": 8.940134376733869e-06, "loss": 0.0575, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 0.1888960748910904, "learning_rate": 8.938683006795704e-06, "loss": 0.0726, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 0.07364051043987274, "learning_rate": 8.937230761772098e-06, "loss": 0.0501, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 0.12854871153831482, "learning_rate": 8.935777641985704e-06, "loss": 0.0632, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 0.09358397126197815, "learning_rate": 8.934323647759373e-06, "loss": 0.0717, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 0.16000182926654816, "learning_rate": 8.932868779416148e-06, "loss": 0.0545, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 0.08850177377462387, "learning_rate": 8.931413037279271e-06, "loss": 0.0588, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 0.1262662261724472, "learning_rate": 8.929956421672172e-06, "loss": 0.056, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 0.07836612313985825, "learning_rate": 8.92849893291848e-06, "loss": 0.0606, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 0.06409004330635071, "learning_rate": 8.927040571342014e-06, "loss": 0.0592, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 0.1139410063624382, "learning_rate": 8.92558133726679e-06, "loss": 0.0642, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 0.22242383658885956, "learning_rate": 8.924121231017012e-06, "loss": 0.0775, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 0.173257976770401, "learning_rate": 8.922660252917088e-06, "loss": 0.0631, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 0.09230751544237137, "learning_rate": 8.92119840329161e-06, "loss": 0.0524, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.09191995859146118, "learning_rate": 8.919735682465372e-06, "loss": 0.0681, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 0.12986984848976135, "learning_rate": 8.918272090763352e-06, "loss": 0.072, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 0.11656541377305984, "learning_rate": 8.91680762851073e-06, "loss": 0.0574, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 0.15251760184764862, "learning_rate": 8.915342296032874e-06, "loss": 0.0564, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 0.09057562798261642, "learning_rate": 8.913876093655351e-06, "loss": 0.0652, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 0.08447655290365219, "learning_rate": 8.912409021703914e-06, "loss": 0.0576, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 0.07842619717121124, "learning_rate": 8.910941080504514e-06, "loss": 0.0667, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 0.2822706699371338, "learning_rate": 8.909472270383293e-06, "loss": 0.0691, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 0.10380539298057556, "learning_rate": 8.90800259166659e-06, "loss": 0.0499, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 0.10829564929008484, "learning_rate": 8.906532044680933e-06, "loss": 0.0549, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 0.10509602725505829, "learning_rate": 8.905060629753041e-06, "loss": 0.0649, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 0.1826213300228119, "learning_rate": 8.903588347209833e-06, "loss": 0.0656, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 0.18578200042247772, "learning_rate": 8.902115197378414e-06, "loss": 0.0685, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 0.11505020409822464, "learning_rate": 8.900641180586086e-06, "loss": 0.061, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 0.0694383978843689, "learning_rate": 8.89916629716034e-06, "loss": 0.0546, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 0.07129878550767899, "learning_rate": 8.897690547428861e-06, "loss": 0.0624, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 0.0981866642832756, "learning_rate": 8.89621393171953e-06, "loss": 0.0549, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.23175670206546783, "learning_rate": 8.894736450360415e-06, "loss": 0.0625, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 0.08757485449314117, "learning_rate": 8.893258103679779e-06, "loss": 0.0575, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 0.11945714801549911, "learning_rate": 8.891778892006077e-06, "loss": 0.0642, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 0.09163924306631088, "learning_rate": 8.890298815667956e-06, "loss": 0.0634, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 0.06600875407457352, "learning_rate": 8.888817874994254e-06, "loss": 0.0543, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 0.11660096049308777, "learning_rate": 8.887336070314005e-06, "loss": 0.058, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 0.07893349975347519, "learning_rate": 8.88585340195643e-06, "loss": 0.0605, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 0.11193133890628815, "learning_rate": 8.884369870250945e-06, "loss": 0.0587, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.0698772743344307, "learning_rate": 8.882885475527156e-06, "loss": 0.0537, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 0.10344893485307693, "learning_rate": 8.881400218114861e-06, "loss": 0.061, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 0.09729818254709244, "learning_rate": 8.879914098344053e-06, "loss": 0.0621, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 0.1751793473958969, "learning_rate": 8.878427116544912e-06, "loss": 0.063, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 0.06974544376134872, "learning_rate": 8.876939273047813e-06, "loss": 0.0587, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 0.18597562611103058, "learning_rate": 8.875450568183318e-06, "loss": 0.0606, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 0.09995389729738235, "learning_rate": 8.873961002282185e-06, "loss": 0.0645, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 0.1211259737610817, "learning_rate": 8.872470575675361e-06, "loss": 0.0705, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.18177910149097443, "learning_rate": 8.870979288693985e-06, "loss": 0.0555, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.056736771017313004, "learning_rate": 8.86948714166939e-06, "loss": 0.0554, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 0.15769393742084503, "learning_rate": 8.86799413493309e-06, "loss": 0.0505, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 0.13382971286773682, "learning_rate": 8.866500268816803e-06, "loss": 0.063, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 0.19520248472690582, "learning_rate": 8.865005543652428e-06, "loss": 0.0743, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 0.0949278250336647, "learning_rate": 8.863509959772064e-06, "loss": 0.0537, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 0.0984840914607048, "learning_rate": 8.86201351750799e-06, "loss": 0.0577, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 0.08918996155261993, "learning_rate": 8.860516217192683e-06, "loss": 0.0498, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 0.15697138011455536, "learning_rate": 8.85901805915881e-06, "loss": 0.0592, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 0.13616934418678284, "learning_rate": 8.85751904373923e-06, "loss": 0.0602, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 0.07457908242940903, "learning_rate": 8.856019171266984e-06, "loss": 0.0534, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 0.15038126707077026, "learning_rate": 8.854518442075313e-06, "loss": 0.065, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 0.07869472354650497, "learning_rate": 8.853016856497646e-06, "loss": 0.067, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 0.15372885763645172, "learning_rate": 8.8515144148676e-06, "loss": 0.0524, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 0.10663909465074539, "learning_rate": 8.85001111751898e-06, "loss": 0.0667, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 0.1152806207537651, "learning_rate": 8.848506964785789e-06, "loss": 0.0548, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 0.17290273308753967, "learning_rate": 8.847001957002211e-06, "loss": 0.0703, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 0.2238747775554657, "learning_rate": 8.845496094502628e-06, "loss": 0.0624, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.07478366047143936, "learning_rate": 8.843989377621606e-06, "loss": 0.0564, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 0.08850187063217163, "learning_rate": 8.842481806693906e-06, "loss": 0.0488, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 0.1627633273601532, "learning_rate": 8.840973382054472e-06, "loss": 0.0505, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 0.12472415715456009, "learning_rate": 8.839464104038445e-06, "loss": 0.0596, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 0.14962022006511688, "learning_rate": 8.83795397298115e-06, "loss": 0.053, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.07088729739189148, "learning_rate": 8.836442989218104e-06, "loss": 0.0508, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 0.11009050905704498, "learning_rate": 8.834931153085014e-06, "loss": 0.0595, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 0.09922576695680618, "learning_rate": 8.833418464917774e-06, "loss": 0.0561, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 0.11813496053218842, "learning_rate": 8.831904925052468e-06, "loss": 0.0563, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 0.14210468530654907, "learning_rate": 8.830390533825373e-06, "loss": 0.071, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 0.21025675535202026, "learning_rate": 8.828875291572951e-06, "loss": 0.0688, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 0.11823628097772598, "learning_rate": 8.827359198631854e-06, "loss": 0.0543, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 0.17020493745803833, "learning_rate": 8.825842255338923e-06, "loss": 0.071, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 0.11182764172554016, "learning_rate": 8.824324462031189e-06, "loss": 0.0612, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 0.15734902024269104, "learning_rate": 8.822805819045869e-06, "loss": 0.0648, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 0.12593860924243927, "learning_rate": 8.821286326720372e-06, "loss": 0.0684, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 0.1327076256275177, "learning_rate": 8.819765985392297e-06, "loss": 0.0756, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 0.2200179249048233, "learning_rate": 8.818244795399425e-06, "loss": 0.0704, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 0.10482647269964218, "learning_rate": 8.81672275707973e-06, "loss": 0.0581, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 0.13692212104797363, "learning_rate": 8.815199870771378e-06, "loss": 0.0592, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 0.11292284727096558, "learning_rate": 8.813676136812717e-06, "loss": 0.0577, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 0.16795040667057037, "learning_rate": 8.812151555542286e-06, "loss": 0.0583, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 0.2246537059545517, "learning_rate": 8.81062612729881e-06, "loss": 0.0641, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 0.12202957272529602, "learning_rate": 8.80909985242121e-06, "loss": 0.0558, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 0.1604660004377365, "learning_rate": 8.807572731248583e-06, "loss": 0.0573, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 0.11383584886789322, "learning_rate": 8.806044764120226e-06, "loss": 0.06, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 0.14161717891693115, "learning_rate": 8.804515951375615e-06, "loss": 0.0691, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 0.09190711379051208, "learning_rate": 8.802986293354418e-06, "loss": 0.0566, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.09214141219854355, "learning_rate": 8.80145579039649e-06, "loss": 0.0719, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.1353534758090973, "learning_rate": 8.799924442841873e-06, "loss": 0.0533, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 0.15285374224185944, "learning_rate": 8.798392251030801e-06, "loss": 0.0594, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 0.26487353444099426, "learning_rate": 8.796859215303688e-06, "loss": 0.0674, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 0.06960611045360565, "learning_rate": 8.795325336001143e-06, "loss": 0.0579, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 0.07976159453392029, "learning_rate": 8.793790613463956e-06, "loss": 0.0527, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 0.11924749612808228, "learning_rate": 8.792255048033106e-06, "loss": 0.0653, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 0.17433559894561768, "learning_rate": 8.790718640049767e-06, "loss": 0.0791, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 0.12744823098182678, "learning_rate": 8.789181389855288e-06, "loss": 0.0523, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 0.2580777406692505, "learning_rate": 8.787643297791214e-06, "loss": 0.0604, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 0.1254112422466278, "learning_rate": 8.78610436419927e-06, "loss": 0.0599, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 0.06689402461051941, "learning_rate": 8.784564589421373e-06, "loss": 0.0616, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 0.17431867122650146, "learning_rate": 8.783023973799632e-06, "loss": 0.0685, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 0.13246382772922516, "learning_rate": 8.78148251767633e-06, "loss": 0.0518, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 0.08722802996635437, "learning_rate": 8.779940221393946e-06, "loss": 0.0592, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 0.1484355479478836, "learning_rate": 8.778397085295141e-06, "loss": 0.07, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.07910045981407166, "learning_rate": 8.776853109722765e-06, "loss": 0.0692, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.08701161295175552, "learning_rate": 8.775308295019857e-06, "loss": 0.0521, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 0.12570512294769287, "learning_rate": 8.773762641529637e-06, "loss": 0.0567, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 0.08338518440723419, "learning_rate": 8.772216149595515e-06, "loss": 0.0588, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 0.1568083018064499, "learning_rate": 8.770668819561085e-06, "loss": 0.0597, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 0.16667498648166656, "learning_rate": 8.769120651770128e-06, "loss": 0.0639, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 0.06654808670282364, "learning_rate": 8.767571646566615e-06, "loss": 0.0508, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 0.07161732763051987, "learning_rate": 8.766021804294697e-06, "loss": 0.0458, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 0.12300421297550201, "learning_rate": 8.764471125298712e-06, "loss": 0.0619, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 0.12164690345525742, "learning_rate": 8.76291960992319e-06, "loss": 0.0701, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 0.1316850334405899, "learning_rate": 8.761367258512838e-06, "loss": 0.0631, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 0.13597087562084198, "learning_rate": 8.759814071412554e-06, "loss": 0.0596, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 0.10063929110765457, "learning_rate": 8.758260048967421e-06, "loss": 0.0616, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.13576631247997284, "learning_rate": 8.75670519152271e-06, "loss": 0.0696, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 0.07596871256828308, "learning_rate": 8.755149499423871e-06, "loss": 0.0598, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.06157350912690163, "learning_rate": 8.753592973016545e-06, "loss": 0.0526, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 0.13868331909179688, "learning_rate": 8.752035612646557e-06, "loss": 0.0665, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 0.1340802162885666, "learning_rate": 8.750477418659914e-06, "loss": 0.051, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 0.07859569787979126, "learning_rate": 8.748918391402816e-06, "loss": 0.0578, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 0.11137951165437698, "learning_rate": 8.74735853122164e-06, "loss": 0.0525, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 0.11107952147722244, "learning_rate": 8.745797838462951e-06, "loss": 0.0687, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 0.1398782730102539, "learning_rate": 8.7442363134735e-06, "loss": 0.0622, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 0.07136952877044678, "learning_rate": 8.742673956600225e-06, "loss": 0.0656, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.08190649747848511, "learning_rate": 8.741110768190242e-06, "loss": 0.0552, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 0.06136547774076462, "learning_rate": 8.739546748590857e-06, "loss": 0.0496, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 0.08690688014030457, "learning_rate": 8.73798189814956e-06, "loss": 0.0584, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 0.08263164013624191, "learning_rate": 8.736416217214026e-06, "loss": 0.0624, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.08879518508911133, "learning_rate": 8.734849706132112e-06, "loss": 0.0701, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 0.08832747489213943, "learning_rate": 8.733282365251858e-06, "loss": 0.0544, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.07986890524625778, "learning_rate": 8.731714194921498e-06, "loss": 0.0553, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 0.11472956091165543, "learning_rate": 8.73014519548944e-06, "loss": 0.0593, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 0.09633506834506989, "learning_rate": 8.72857536730428e-06, "loss": 0.0639, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 0.07375850528478622, "learning_rate": 8.7270047107148e-06, "loss": 0.0527, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 0.19890864193439484, "learning_rate": 8.72543322606996e-06, "loss": 0.0626, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 0.09014913439750671, "learning_rate": 8.72386091371891e-06, "loss": 0.0527, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 0.05719691887497902, "learning_rate": 8.722287774010983e-06, "loss": 0.0475, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 0.1469888836145401, "learning_rate": 8.720713807295692e-06, "loss": 0.0648, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 0.13780193030834198, "learning_rate": 8.71913901392274e-06, "loss": 0.0606, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 0.08774321526288986, "learning_rate": 8.71756339424201e-06, "loss": 0.0595, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 0.09345687925815582, "learning_rate": 8.715986948603566e-06, "loss": 0.0551, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 0.15814104676246643, "learning_rate": 8.71440967735766e-06, "loss": 0.0652, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 0.12075088918209076, "learning_rate": 8.712831580854724e-06, "loss": 0.0592, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 0.19549250602722168, "learning_rate": 8.711252659445378e-06, "loss": 0.0603, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 0.16618205606937408, "learning_rate": 8.709672913480418e-06, "loss": 0.0504, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 0.12995705008506775, "learning_rate": 8.70809234331083e-06, "loss": 0.0691, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 0.07058446109294891, "learning_rate": 8.706510949287782e-06, "loss": 0.0689, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 0.0807037204504013, "learning_rate": 8.70492873176262e-06, "loss": 0.059, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 0.08723057806491852, "learning_rate": 8.703345691086882e-06, "loss": 0.0532, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 0.08304658532142639, "learning_rate": 8.701761827612278e-06, "loss": 0.0544, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 0.0998571440577507, "learning_rate": 8.700177141690708e-06, "loss": 0.048, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 0.13895174860954285, "learning_rate": 8.698591633674256e-06, "loss": 0.0543, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 0.08819921314716339, "learning_rate": 8.697005303915183e-06, "loss": 0.0566, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 0.08781103789806366, "learning_rate": 8.695418152765933e-06, "loss": 0.0592, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 0.12774920463562012, "learning_rate": 8.693830180579139e-06, "loss": 0.059, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 0.16559399664402008, "learning_rate": 8.69224138770761e-06, "loss": 0.0688, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 0.07491327077150345, "learning_rate": 8.69065177450434e-06, "loss": 0.059, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 0.07410928606987, "learning_rate": 8.689061341322505e-06, "loss": 0.0492, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 0.11816352605819702, "learning_rate": 8.687470088515464e-06, "loss": 0.0608, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 0.1662806272506714, "learning_rate": 8.685878016436753e-06, "loss": 0.0586, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 0.08253519982099533, "learning_rate": 8.684285125440099e-06, "loss": 0.0521, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.09232985973358154, "learning_rate": 8.682691415879402e-06, "loss": 0.0544, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 0.0949568971991539, "learning_rate": 8.681096888108751e-06, "loss": 0.06, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 0.1284104585647583, "learning_rate": 8.679501542482412e-06, "loss": 0.0633, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 0.07480017095804214, "learning_rate": 8.677905379354834e-06, "loss": 0.0508, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 0.10044672340154648, "learning_rate": 8.67630839908065e-06, "loss": 0.06, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 0.19018816947937012, "learning_rate": 8.674710602014672e-06, "loss": 0.0618, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 0.15320220589637756, "learning_rate": 8.673111988511892e-06, "loss": 0.0577, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 0.10474290698766708, "learning_rate": 8.671512558927483e-06, "loss": 0.0613, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 0.1536766141653061, "learning_rate": 8.669912313616811e-06, "loss": 0.0686, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.1213560700416565, "learning_rate": 8.668311252935407e-06, "loss": 0.0536, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 0.10965201258659363, "learning_rate": 8.66670937723899e-06, "loss": 0.0606, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 0.10541883111000061, "learning_rate": 8.665106686883461e-06, "loss": 0.0511, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 0.07343833893537521, "learning_rate": 8.663503182224906e-06, "loss": 0.0545, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 0.05402567237615585, "learning_rate": 8.66189886361958e-06, "loss": 0.0518, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 0.11147692054510117, "learning_rate": 8.660293731423929e-06, "loss": 0.0623, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 0.08041944354772568, "learning_rate": 8.658687785994579e-06, "loss": 0.0529, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 0.1622878611087799, "learning_rate": 8.657081027688332e-06, "loss": 0.0559, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 0.0810280367732048, "learning_rate": 8.655473456862172e-06, "loss": 0.0597, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 0.1926123946905136, "learning_rate": 8.653865073873265e-06, "loss": 0.0567, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 0.10853835940361023, "learning_rate": 8.652255879078959e-06, "loss": 0.0595, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.17704902589321136, "learning_rate": 8.650645872836779e-06, "loss": 0.0678, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 0.06913629919290543, "learning_rate": 8.649035055504431e-06, "loss": 0.0552, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.09145285934209824, "learning_rate": 8.647423427439804e-06, "loss": 0.0473, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 0.12889626622200012, "learning_rate": 8.645810989000962e-06, "loss": 0.0521, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.13907121121883392, "learning_rate": 8.644197740546153e-06, "loss": 0.0602, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.11688807606697083, "learning_rate": 8.642583682433808e-06, "loss": 0.0578, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 0.14084574580192566, "learning_rate": 8.640968815022529e-06, "loss": 0.0639, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 0.12107362598180771, "learning_rate": 8.6393531386711e-06, "loss": 0.0682, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 0.10864291340112686, "learning_rate": 8.637736653738496e-06, "loss": 0.0593, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 0.08558547496795654, "learning_rate": 8.636119360583857e-06, "loss": 0.0496, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 0.0778111144900322, "learning_rate": 8.63450125956651e-06, "loss": 0.0521, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 0.11156675219535828, "learning_rate": 8.63288235104596e-06, "loss": 0.0567, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 0.0594489723443985, "learning_rate": 8.631262635381892e-06, "loss": 0.0535, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 0.1522628366947174, "learning_rate": 8.629642112934169e-06, "loss": 0.0642, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.07627158612012863, "learning_rate": 8.628020784062837e-06, "loss": 0.0541, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 0.09886953234672546, "learning_rate": 8.626398649128113e-06, "loss": 0.0594, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.07697094976902008, "learning_rate": 8.624775708490403e-06, "loss": 0.0496, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 0.14056174457073212, "learning_rate": 8.623151962510284e-06, "loss": 0.0538, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 0.10778473317623138, "learning_rate": 8.621527411548517e-06, "loss": 0.0665, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 0.13320448994636536, "learning_rate": 8.619902055966043e-06, "loss": 0.0674, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 0.09006623923778534, "learning_rate": 8.618275896123973e-06, "loss": 0.0571, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 0.0831742137670517, "learning_rate": 8.616648932383607e-06, "loss": 0.0541, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 0.09182346612215042, "learning_rate": 8.615021165106415e-06, "loss": 0.0585, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 0.07010889053344727, "learning_rate": 8.613392594654056e-06, "loss": 0.053, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.0726848766207695, "learning_rate": 8.611763221388356e-06, "loss": 0.0494, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 0.1134180873632431, "learning_rate": 8.610133045671325e-06, "loss": 0.0623, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 0.16100890934467316, "learning_rate": 8.608502067865155e-06, "loss": 0.0583, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.05743236839771271, "learning_rate": 8.606870288332206e-06, "loss": 0.0573, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 0.09763561934232712, "learning_rate": 8.605237707435028e-06, "loss": 0.0655, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 0.13785859942436218, "learning_rate": 8.603604325536338e-06, "loss": 0.0568, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 0.24861334264278412, "learning_rate": 8.60197014299904e-06, "loss": 0.0634, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 0.06950637698173523, "learning_rate": 8.600335160186208e-06, "loss": 0.0616, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 0.061781659722328186, "learning_rate": 8.598699377461104e-06, "loss": 0.0508, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.06541842967271805, "learning_rate": 8.597062795187157e-06, "loss": 0.0572, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 0.11333271861076355, "learning_rate": 8.595425413727979e-06, "loss": 0.0551, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 0.20861491560935974, "learning_rate": 8.593787233447357e-06, "loss": 0.0672, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.08947115391492844, "learning_rate": 8.592148254709262e-06, "loss": 0.0563, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 0.11556817591190338, "learning_rate": 8.590508477877834e-06, "loss": 0.0588, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 0.09256958216428757, "learning_rate": 8.588867903317395e-06, "loss": 0.0653, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 0.14222873747348785, "learning_rate": 8.587226531392443e-06, "loss": 0.0647, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 0.08910281956195831, "learning_rate": 8.585584362467652e-06, "loss": 0.0565, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 0.10415314882993698, "learning_rate": 8.583941396907877e-06, "loss": 0.0533, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.10370435565710068, "learning_rate": 8.582297635078149e-06, "loss": 0.0513, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 0.1696261614561081, "learning_rate": 8.58065307734367e-06, "loss": 0.0567, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 0.187167227268219, "learning_rate": 8.579007724069823e-06, "loss": 0.0664, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.06371961534023285, "learning_rate": 8.577361575622171e-06, "loss": 0.047, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 0.11325095593929291, "learning_rate": 8.575714632366451e-06, "loss": 0.0551, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 0.12702545523643494, "learning_rate": 8.574066894668573e-06, "loss": 0.0562, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 0.09612581133842468, "learning_rate": 8.57241836289463e-06, "loss": 0.0578, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 0.10569386184215546, "learning_rate": 8.570769037410885e-06, "loss": 0.0577, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 0.28435853123664856, "learning_rate": 8.56911891858378e-06, "loss": 0.0876, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 0.1274014115333557, "learning_rate": 8.56746800677994e-06, "loss": 0.06, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 0.128562331199646, "learning_rate": 8.565816302366151e-06, "loss": 0.0489, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 0.07346993684768677, "learning_rate": 8.564163805709393e-06, "loss": 0.0539, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.07140374183654785, "learning_rate": 8.562510517176807e-06, "loss": 0.0531, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 0.19033443927764893, "learning_rate": 8.560856437135716e-06, "loss": 0.0656, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 0.0966298058629036, "learning_rate": 8.559201565953623e-06, "loss": 0.0681, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 0.10570862889289856, "learning_rate": 8.557545903998197e-06, "loss": 0.0647, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 0.09830564260482788, "learning_rate": 8.555889451637294e-06, "loss": 0.0691, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 0.22043894231319427, "learning_rate": 8.554232209238935e-06, "loss": 0.0679, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 0.14604054391384125, "learning_rate": 8.552574177171326e-06, "loss": 0.0479, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 0.08597854524850845, "learning_rate": 8.55091535580284e-06, "loss": 0.0563, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 0.07525794208049774, "learning_rate": 8.54925574550203e-06, "loss": 0.0561, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 0.11521807312965393, "learning_rate": 8.547595346637624e-06, "loss": 0.0591, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.10362289100885391, "learning_rate": 8.545934159578527e-06, "loss": 0.0526, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 0.08490835130214691, "learning_rate": 8.544272184693814e-06, "loss": 0.0626, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 0.12970662117004395, "learning_rate": 8.542609422352738e-06, "loss": 0.0514, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 0.2226606160402298, "learning_rate": 8.540945872924728e-06, "loss": 0.0678, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 0.14171746373176575, "learning_rate": 8.539281536779388e-06, "loss": 0.0609, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.11859855055809021, "learning_rate": 8.537616414286491e-06, "loss": 0.0613, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.08169940114021301, "learning_rate": 8.535950505815993e-06, "loss": 0.0603, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 0.14575441181659698, "learning_rate": 8.53428381173802e-06, "loss": 0.0608, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 0.15680377185344696, "learning_rate": 8.532616332422872e-06, "loss": 0.0599, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 0.09899143874645233, "learning_rate": 8.530948068241028e-06, "loss": 0.0424, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 0.11453903466463089, "learning_rate": 8.529279019563133e-06, "loss": 0.0573, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 0.12014251202344894, "learning_rate": 8.527609186760017e-06, "loss": 0.058, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 0.11375628411769867, "learning_rate": 8.525938570202676e-06, "loss": 0.058, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 0.13415221869945526, "learning_rate": 8.524267170262283e-06, "loss": 0.0645, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.12565039098262787, "learning_rate": 8.522594987310184e-06, "loss": 0.0471, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 0.10820017755031586, "learning_rate": 8.520922021717903e-06, "loss": 0.0524, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 0.08937463909387589, "learning_rate": 8.519248273857132e-06, "loss": 0.0555, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.09597858041524887, "learning_rate": 8.51757374409974e-06, "loss": 0.0536, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 0.08448883891105652, "learning_rate": 8.51589843281777e-06, "loss": 0.0622, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 0.08781048655509949, "learning_rate": 8.514222340383438e-06, "loss": 0.0544, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 0.07233543694019318, "learning_rate": 8.512545467169133e-06, "loss": 0.059, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 0.09586496651172638, "learning_rate": 8.510867813547417e-06, "loss": 0.048, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 0.1357373148202896, "learning_rate": 8.509189379891029e-06, "loss": 0.0507, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 0.10363945364952087, "learning_rate": 8.507510166572875e-06, "loss": 0.0574, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 0.08372747153043747, "learning_rate": 8.50583017396604e-06, "loss": 0.0522, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 0.0999271348118782, "learning_rate": 8.504149402443782e-06, "loss": 0.0466, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 0.06038287654519081, "learning_rate": 8.502467852379526e-06, "loss": 0.0571, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.08893509954214096, "learning_rate": 8.500785524146875e-06, "loss": 0.0478, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 0.12838898599147797, "learning_rate": 8.499102418119607e-06, "loss": 0.0535, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.05657706782221794, "learning_rate": 8.497418534671666e-06, "loss": 0.0501, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 0.09454955160617828, "learning_rate": 8.495733874177176e-06, "loss": 0.0527, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 0.19610576331615448, "learning_rate": 8.494048437010427e-06, "loss": 0.0629, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 0.09347248822450638, "learning_rate": 8.492362223545884e-06, "loss": 0.0543, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 0.0997520461678505, "learning_rate": 8.49067523415819e-06, "loss": 0.0465, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 0.14419475197792053, "learning_rate": 8.48898746922215e-06, "loss": 0.0705, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 0.14305084943771362, "learning_rate": 8.487298929112751e-06, "loss": 0.0769, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 0.09002139419317245, "learning_rate": 8.485609614205146e-06, "loss": 0.0544, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.06622818112373352, "learning_rate": 8.483919524874661e-06, "loss": 0.0463, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 0.06702997535467148, "learning_rate": 8.482228661496797e-06, "loss": 0.0504, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 0.0785529762506485, "learning_rate": 8.480537024447227e-06, "loss": 0.0639, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 0.08954072743654251, "learning_rate": 8.478844614101792e-06, "loss": 0.0551, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 0.06507627665996552, "learning_rate": 8.477151430836505e-06, "loss": 0.0515, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 0.09935592859983444, "learning_rate": 8.475457475027555e-06, "loss": 0.0541, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 0.2587105631828308, "learning_rate": 8.473762747051302e-06, "loss": 0.0693, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.10209248214960098, "learning_rate": 8.472067247284272e-06, "loss": 0.0591, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 0.17315250635147095, "learning_rate": 8.470370976103171e-06, "loss": 0.0565, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 0.19038860499858856, "learning_rate": 8.468673933884867e-06, "loss": 0.0613, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 0.19898459315299988, "learning_rate": 8.466976121006407e-06, "loss": 0.0623, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 0.3001520335674286, "learning_rate": 8.465277537845004e-06, "loss": 0.0697, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 0.163874089717865, "learning_rate": 8.463578184778047e-06, "loss": 0.0591, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.07691826671361923, "learning_rate": 8.461878062183092e-06, "loss": 0.0588, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 0.10372108221054077, "learning_rate": 8.460177170437865e-06, "loss": 0.0666, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 0.10774005204439163, "learning_rate": 8.458475509920272e-06, "loss": 0.0711, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 0.1515970230102539, "learning_rate": 8.456773081008376e-06, "loss": 0.0512, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 0.14926902949810028, "learning_rate": 8.455069884080422e-06, "loss": 0.0575, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 0.12059512734413147, "learning_rate": 8.45336591951482e-06, "loss": 0.059, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.0680592954158783, "learning_rate": 8.451661187690154e-06, "loss": 0.0477, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 0.06238493695855141, "learning_rate": 8.449955688985174e-06, "loss": 0.0436, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 0.10857166349887848, "learning_rate": 8.448249423778802e-06, "loss": 0.0601, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 0.1111806333065033, "learning_rate": 8.446542392450134e-06, "loss": 0.0623, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 0.16968534886837006, "learning_rate": 8.444834595378434e-06, "loss": 0.0589, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 0.2209327667951584, "learning_rate": 8.443126032943132e-06, "loss": 0.0663, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 0.06405304372310638, "learning_rate": 8.441416705523834e-06, "loss": 0.0537, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 0.1818762868642807, "learning_rate": 8.439706613500312e-06, "loss": 0.0598, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 0.1101333349943161, "learning_rate": 8.43799575725251e-06, "loss": 0.0569, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 0.1401910036802292, "learning_rate": 8.436284137160544e-06, "loss": 0.0526, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 0.13566100597381592, "learning_rate": 8.434571753604693e-06, "loss": 0.06, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 0.07844170182943344, "learning_rate": 8.432858606965411e-06, "loss": 0.0596, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 0.09180639684200287, "learning_rate": 8.43114469762332e-06, "loss": 0.0542, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 0.1492108255624771, "learning_rate": 8.429430025959212e-06, "loss": 0.0581, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 0.10837198793888092, "learning_rate": 8.427714592354046e-06, "loss": 0.0569, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 0.11306025087833405, "learning_rate": 8.425998397188955e-06, "loss": 0.0546, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 0.13280536234378815, "learning_rate": 8.424281440845236e-06, "loss": 0.0488, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 0.16915033757686615, "learning_rate": 8.42256372370436e-06, "loss": 0.0569, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 0.18225112557411194, "learning_rate": 8.420845246147961e-06, "loss": 0.0722, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 0.12059040367603302, "learning_rate": 8.41912600855785e-06, "loss": 0.0664, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 0.17339931428432465, "learning_rate": 8.417406011316e-06, "loss": 0.0678, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 0.07844103127717972, "learning_rate": 8.415685254804552e-06, "loss": 0.0556, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.12895849347114563, "learning_rate": 8.413963739405824e-06, "loss": 0.0564, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 0.11328329145908356, "learning_rate": 8.412241465502294e-06, "loss": 0.0665, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 0.23394834995269775, "learning_rate": 8.410518433476613e-06, "loss": 0.073, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 0.1445436030626297, "learning_rate": 8.408794643711601e-06, "loss": 0.0569, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 0.0630577802658081, "learning_rate": 8.407070096590243e-06, "loss": 0.0626, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.1404297947883606, "learning_rate": 8.405344792495694e-06, "loss": 0.0532, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.13675762712955475, "learning_rate": 8.403618731811277e-06, "loss": 0.0565, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 0.1530148833990097, "learning_rate": 8.401891914920483e-06, "loss": 0.0713, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 0.09240156412124634, "learning_rate": 8.400164342206973e-06, "loss": 0.0515, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.10640855878591537, "learning_rate": 8.398436014054575e-06, "loss": 0.0638, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 0.12092624604701996, "learning_rate": 8.39670693084728e-06, "loss": 0.0541, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.07634814083576202, "learning_rate": 8.394977092969253e-06, "loss": 0.0561, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 0.1598120629787445, "learning_rate": 8.393246500804825e-06, "loss": 0.0667, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 0.09049724787473679, "learning_rate": 8.391515154738495e-06, "loss": 0.0506, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.11980580538511276, "learning_rate": 8.389783055154925e-06, "loss": 0.0623, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 0.06720571219921112, "learning_rate": 8.388050202438952e-06, "loss": 0.0597, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.12596674263477325, "learning_rate": 8.386316596975574e-06, "loss": 0.0519, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 0.11663366109132767, "learning_rate": 8.38458223914996e-06, "loss": 0.056, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 0.21597954630851746, "learning_rate": 8.38284712934744e-06, "loss": 0.0589, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 0.08508048206567764, "learning_rate": 8.381111267953523e-06, "loss": 0.0502, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 0.07456376403570175, "learning_rate": 8.379374655353874e-06, "loss": 0.0529, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 0.06352357566356659, "learning_rate": 8.377637291934329e-06, "loss": 0.0496, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 0.0742134153842926, "learning_rate": 8.37589917808089e-06, "loss": 0.0599, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 0.28845295310020447, "learning_rate": 8.374160314179727e-06, "loss": 0.0807, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 0.12292209267616272, "learning_rate": 8.372420700617176e-06, "loss": 0.0551, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 0.09641236066818237, "learning_rate": 8.370680337779737e-06, "loss": 0.0653, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 0.12048415094614029, "learning_rate": 8.368939226054083e-06, "loss": 0.0526, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 0.09177891165018082, "learning_rate": 8.367197365827047e-06, "loss": 0.0584, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 0.14287613332271576, "learning_rate": 8.36545475748563e-06, "loss": 0.0635, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 0.12669865787029266, "learning_rate": 8.363711401417e-06, "loss": 0.0647, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 0.09081318229436874, "learning_rate": 8.361967298008494e-06, "loss": 0.0617, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 0.2385437786579132, "learning_rate": 8.360222447647606e-06, "loss": 0.0619, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 0.07681569457054138, "learning_rate": 8.358476850722007e-06, "loss": 0.0533, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 0.06319331377744675, "learning_rate": 8.356730507619526e-06, "loss": 0.0577, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 0.11956153064966202, "learning_rate": 8.354983418728165e-06, "loss": 0.0546, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 0.09834164381027222, "learning_rate": 8.353235584436082e-06, "loss": 0.0514, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.15939736366271973, "learning_rate": 8.351487005131606e-06, "loss": 0.0605, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.07467974722385406, "learning_rate": 8.349737681203234e-06, "loss": 0.053, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 0.15229380130767822, "learning_rate": 8.347987613039626e-06, "loss": 0.0562, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 0.15419666469097137, "learning_rate": 8.346236801029605e-06, "loss": 0.0668, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 0.12731757760047913, "learning_rate": 8.344485245562165e-06, "loss": 0.0582, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 0.10916362702846527, "learning_rate": 8.342732947026457e-06, "loss": 0.0515, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 0.1589113026857376, "learning_rate": 8.340979905811805e-06, "loss": 0.061, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 0.09544054418802261, "learning_rate": 8.339226122307696e-06, "loss": 0.067, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 0.10557595640420914, "learning_rate": 8.337471596903774e-06, "loss": 0.0584, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.08690935373306274, "learning_rate": 8.335716329989863e-06, "loss": 0.0564, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 0.10998164862394333, "learning_rate": 8.333960321955937e-06, "loss": 0.065, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 0.06925298273563385, "learning_rate": 8.332203573192143e-06, "loss": 0.0506, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.06384888291358948, "learning_rate": 8.330446084088791e-06, "loss": 0.0512, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 0.11368469893932343, "learning_rate": 8.328687855036355e-06, "loss": 0.0608, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 0.12885479629039764, "learning_rate": 8.326928886425471e-06, "loss": 0.0637, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 0.10765840858221054, "learning_rate": 8.325169178646946e-06, "loss": 0.0599, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.12572570145130157, "learning_rate": 8.323408732091743e-06, "loss": 0.0648, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 0.07732948660850525, "learning_rate": 8.321647547150995e-06, "loss": 0.0515, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 0.10906482487916946, "learning_rate": 8.319885624215996e-06, "loss": 0.0674, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 0.12370310723781586, "learning_rate": 8.318122963678206e-06, "loss": 0.0701, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.1140928566455841, "learning_rate": 8.316359565929248e-06, "loss": 0.0619, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.06854376941919327, "learning_rate": 8.314595431360906e-06, "loss": 0.0561, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.0726468563079834, "learning_rate": 8.312830560365136e-06, "loss": 0.0607, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 0.0926753506064415, "learning_rate": 8.311064953334046e-06, "loss": 0.0605, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 0.0945417508482933, "learning_rate": 8.309298610659917e-06, "loss": 0.0591, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 0.08381234109401703, "learning_rate": 8.307531532735188e-06, "loss": 0.0569, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 0.08570068329572678, "learning_rate": 8.305763719952467e-06, "loss": 0.0503, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 0.1330103874206543, "learning_rate": 8.303995172704519e-06, "loss": 0.0562, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 0.09325860440731049, "learning_rate": 8.302225891384275e-06, "loss": 0.0632, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.14283011853694916, "learning_rate": 8.300455876384827e-06, "loss": 0.0659, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 0.12137575447559357, "learning_rate": 8.298685128099437e-06, "loss": 0.0737, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 0.07533496618270874, "learning_rate": 8.29691364692152e-06, "loss": 0.0562, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 0.07065509259700775, "learning_rate": 8.29514143324466e-06, "loss": 0.0573, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 0.1336119920015335, "learning_rate": 8.293368487462604e-06, "loss": 0.0695, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 0.08370397984981537, "learning_rate": 8.29159480996926e-06, "loss": 0.0559, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.09119946509599686, "learning_rate": 8.289820401158695e-06, "loss": 0.0589, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 0.10566353797912598, "learning_rate": 8.288045261425146e-06, "loss": 0.0597, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 0.06595487147569656, "learning_rate": 8.286269391163006e-06, "loss": 0.057, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 0.11053820699453354, "learning_rate": 8.284492790766835e-06, "loss": 0.055, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 0.10524913668632507, "learning_rate": 8.282715460631354e-06, "loss": 0.0514, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 0.15870285034179688, "learning_rate": 8.280937401151441e-06, "loss": 0.0657, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 0.08150215446949005, "learning_rate": 8.279158612722145e-06, "loss": 0.049, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 0.13968031108379364, "learning_rate": 8.277379095738668e-06, "loss": 0.0657, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 0.14214521646499634, "learning_rate": 8.27559885059638e-06, "loss": 0.0524, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 0.13999231159687042, "learning_rate": 8.273817877690809e-06, "loss": 0.0546, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.11714603006839752, "learning_rate": 8.272036177417649e-06, "loss": 0.0598, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 0.17218750715255737, "learning_rate": 8.270253750172754e-06, "loss": 0.0655, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.08502223342657089, "learning_rate": 8.268470596352134e-06, "loss": 0.0518, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 0.11582108587026596, "learning_rate": 8.26668671635197e-06, "loss": 0.0625, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 0.09523182362318039, "learning_rate": 8.264902110568598e-06, "loss": 0.0539, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 0.15861265361309052, "learning_rate": 8.263116779398514e-06, "loss": 0.0619, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 0.197810098528862, "learning_rate": 8.261330723238381e-06, "loss": 0.0519, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 0.12366627156734467, "learning_rate": 8.25954394248502e-06, "loss": 0.0563, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 0.08203567564487457, "learning_rate": 8.25775643753541e-06, "loss": 0.0571, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 0.11588960886001587, "learning_rate": 8.255968208786694e-06, "loss": 0.0518, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.15939249098300934, "learning_rate": 8.25417925663618e-06, "loss": 0.0574, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.10463227331638336, "learning_rate": 8.252389581481328e-06, "loss": 0.0609, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.22092558443546295, "learning_rate": 8.250599183719763e-06, "loss": 0.0547, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 0.13770195841789246, "learning_rate": 8.248808063749273e-06, "loss": 0.0603, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 0.08938091993331909, "learning_rate": 8.247016221967802e-06, "loss": 0.0508, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 0.09749958664178848, "learning_rate": 8.245223658773459e-06, "loss": 0.0519, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 0.16059844195842743, "learning_rate": 8.243430374564507e-06, "loss": 0.0528, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 0.24714115262031555, "learning_rate": 8.241636369739376e-06, "loss": 0.0758, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 0.206368088722229, "learning_rate": 8.23984164469665e-06, "loss": 0.0689, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 0.13613943755626678, "learning_rate": 8.23804619983508e-06, "loss": 0.0518, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 0.06830716133117676, "learning_rate": 8.236250035553569e-06, "loss": 0.0521, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.17686553299427032, "learning_rate": 8.234453152251183e-06, "loss": 0.0624, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 0.11844190210103989, "learning_rate": 8.23265555032715e-06, "loss": 0.0674, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 0.15501601994037628, "learning_rate": 8.23085723018086e-06, "loss": 0.0583, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 0.14211295545101166, "learning_rate": 8.229058192211851e-06, "loss": 0.051, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 0.10050410032272339, "learning_rate": 8.227258436819836e-06, "loss": 0.0512, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 0.10047059506177902, "learning_rate": 8.225457964404675e-06, "loss": 0.0603, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 0.0753963440656662, "learning_rate": 8.223656775366393e-06, "loss": 0.0521, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 0.06394664943218231, "learning_rate": 8.221854870105172e-06, "loss": 0.0474, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 0.10473164916038513, "learning_rate": 8.220052249021356e-06, "loss": 0.064, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.08836869150400162, "learning_rate": 8.218248912515443e-06, "loss": 0.0499, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 0.1184493824839592, "learning_rate": 8.216444860988098e-06, "loss": 0.0613, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 0.06553347408771515, "learning_rate": 8.214640094840136e-06, "loss": 0.0545, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 0.10925287008285522, "learning_rate": 8.212834614472538e-06, "loss": 0.0561, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 0.1637182980775833, "learning_rate": 8.211028420286437e-06, "loss": 0.0778, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.10560283064842224, "learning_rate": 8.209221512683132e-06, "loss": 0.058, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 0.1626710742712021, "learning_rate": 8.207413892064073e-06, "loss": 0.0595, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 0.08230918645858765, "learning_rate": 8.205605558830873e-06, "loss": 0.0579, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 0.10784580558538437, "learning_rate": 8.203796513385307e-06, "loss": 0.0607, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 0.09750746190547943, "learning_rate": 8.201986756129297e-06, "loss": 0.0619, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 0.07111188769340515, "learning_rate": 8.200176287464931e-06, "loss": 0.0586, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 0.08847260475158691, "learning_rate": 8.198365107794457e-06, "loss": 0.0646, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.08069943636655807, "learning_rate": 8.196553217520275e-06, "loss": 0.058, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.10849206894636154, "learning_rate": 8.194740617044948e-06, "loss": 0.057, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.05467933416366577, "learning_rate": 8.192927306771193e-06, "loss": 0.0464, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 0.10962095111608505, "learning_rate": 8.191113287101884e-06, "loss": 0.0617, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 0.09212049096822739, "learning_rate": 8.18929855844006e-06, "loss": 0.0585, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 0.11555980890989304, "learning_rate": 8.187483121188908e-06, "loss": 0.0557, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 0.05930820107460022, "learning_rate": 8.185666975751778e-06, "loss": 0.0538, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 0.06951795518398285, "learning_rate": 8.183850122532174e-06, "loss": 0.045, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 0.07182539254426956, "learning_rate": 8.182032561933764e-06, "loss": 0.057, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 0.09469356387853622, "learning_rate": 8.180214294360365e-06, "loss": 0.0608, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.06005279719829559, "learning_rate": 8.178395320215953e-06, "loss": 0.0529, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 0.13677753508090973, "learning_rate": 8.176575639904668e-06, "loss": 0.0531, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 0.11666645854711533, "learning_rate": 8.174755253830797e-06, "loss": 0.0703, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 0.1654483824968338, "learning_rate": 8.17293416239879e-06, "loss": 0.069, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 0.06324901431798935, "learning_rate": 8.171112366013252e-06, "loss": 0.0534, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 0.1180112361907959, "learning_rate": 8.169289865078942e-06, "loss": 0.0615, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 0.12424131482839584, "learning_rate": 8.167466660000781e-06, "loss": 0.0674, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 0.1127358227968216, "learning_rate": 8.165642751183844e-06, "loss": 0.0629, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 0.1048799455165863, "learning_rate": 8.163818139033359e-06, "loss": 0.0585, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.13223358988761902, "learning_rate": 8.161992823954715e-06, "loss": 0.0547, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 0.16483533382415771, "learning_rate": 8.160166806353455e-06, "loss": 0.0541, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.0695590078830719, "learning_rate": 8.15834008663528e-06, "loss": 0.0549, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 0.10829152166843414, "learning_rate": 8.156512665206043e-06, "loss": 0.0654, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.12892191112041473, "learning_rate": 8.154684542471754e-06, "loss": 0.0517, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.09720274806022644, "learning_rate": 8.152855718838583e-06, "loss": 0.0534, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 0.11125198751688004, "learning_rate": 8.151026194712854e-06, "loss": 0.0603, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.10104135423898697, "learning_rate": 8.149195970501043e-06, "loss": 0.0526, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 0.17175991833209991, "learning_rate": 8.147365046609786e-06, "loss": 0.0656, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 0.05751888081431389, "learning_rate": 8.145533423445869e-06, "loss": 0.0559, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 0.08807696402072906, "learning_rate": 8.14370110141624e-06, "loss": 0.0645, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.11923784017562866, "learning_rate": 8.141868080927998e-06, "loss": 0.0574, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 0.15164606273174286, "learning_rate": 8.140034362388398e-06, "loss": 0.0519, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 0.17615538835525513, "learning_rate": 8.13819994620485e-06, "loss": 0.0538, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.13373549282550812, "learning_rate": 8.136364832784923e-06, "loss": 0.0613, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 0.14091703295707703, "learning_rate": 8.134529022536332e-06, "loss": 0.0543, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 0.05764051154255867, "learning_rate": 8.132692515866959e-06, "loss": 0.0519, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 0.12546293437480927, "learning_rate": 8.130855313184824e-06, "loss": 0.0617, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 0.08010329306125641, "learning_rate": 8.129017414898121e-06, "loss": 0.0583, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 0.08387430012226105, "learning_rate": 8.127178821415183e-06, "loss": 0.0517, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 0.0723242461681366, "learning_rate": 8.125339533144507e-06, "loss": 0.057, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.11370567977428436, "learning_rate": 8.123499550494737e-06, "loss": 0.0462, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 0.13282926380634308, "learning_rate": 8.12165887387468e-06, "loss": 0.0582, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 0.07781500369310379, "learning_rate": 8.11981750369329e-06, "loss": 0.056, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.07305893301963806, "learning_rate": 8.117975440359677e-06, "loss": 0.0521, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.07660122960805893, "learning_rate": 8.116132684283104e-06, "loss": 0.0599, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 0.1269443780183792, "learning_rate": 8.114289235872993e-06, "loss": 0.0536, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.16480009257793427, "learning_rate": 8.112445095538915e-06, "loss": 0.0771, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.06898778676986694, "learning_rate": 8.110600263690592e-06, "loss": 0.0504, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.13934050500392914, "learning_rate": 8.10875474073791e-06, "loss": 0.0593, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 0.2081681489944458, "learning_rate": 8.106908527090895e-06, "loss": 0.0701, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 0.06421887129545212, "learning_rate": 8.10506162315974e-06, "loss": 0.0571, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 0.06590869277715683, "learning_rate": 8.103214029354783e-06, "loss": 0.0522, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 0.08611997216939926, "learning_rate": 8.101365746086514e-06, "loss": 0.0585, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.057966772466897964, "learning_rate": 8.099516773765581e-06, "loss": 0.0601, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.0938766822218895, "learning_rate": 8.097667112802784e-06, "loss": 0.0645, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 0.07757169753313065, "learning_rate": 8.095816763609077e-06, "loss": 0.0567, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 0.0824366956949234, "learning_rate": 8.093965726595565e-06, "loss": 0.0651, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 0.10571043193340302, "learning_rate": 8.092114002173503e-06, "loss": 0.0568, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 0.17655225098133087, "learning_rate": 8.090261590754304e-06, "loss": 0.066, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 0.07997741550207138, "learning_rate": 8.088408492749534e-06, "loss": 0.0629, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 0.11516809463500977, "learning_rate": 8.086554708570901e-06, "loss": 0.0567, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 0.09471706300973892, "learning_rate": 8.084700238630283e-06, "loss": 0.0579, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 0.10455150902271271, "learning_rate": 8.082845083339698e-06, "loss": 0.0694, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 0.1243710070848465, "learning_rate": 8.080989243111315e-06, "loss": 0.0504, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 0.0766463354229927, "learning_rate": 8.079132718357465e-06, "loss": 0.0578, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 0.1991044580936432, "learning_rate": 8.07727550949062e-06, "loss": 0.0541, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.12404149025678635, "learning_rate": 8.075417616923413e-06, "loss": 0.0542, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 0.10397928953170776, "learning_rate": 8.073559041068626e-06, "loss": 0.0577, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.10589592903852463, "learning_rate": 8.071699782339188e-06, "loss": 0.0648, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 0.1096700131893158, "learning_rate": 8.06983984114819e-06, "loss": 0.062, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 0.17747123539447784, "learning_rate": 8.067979217908864e-06, "loss": 0.0633, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 0.0667467713356018, "learning_rate": 8.066117913034597e-06, "loss": 0.0511, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 0.10891728848218918, "learning_rate": 8.06425592693893e-06, "loss": 0.0643, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 0.10342031717300415, "learning_rate": 8.062393260035557e-06, "loss": 0.0502, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 0.2209322303533554, "learning_rate": 8.060529912738316e-06, "loss": 0.0644, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 0.09633129835128784, "learning_rate": 8.058665885461201e-06, "loss": 0.0671, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 0.09726747125387192, "learning_rate": 8.056801178618357e-06, "loss": 0.046, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 0.09298671782016754, "learning_rate": 8.05493579262408e-06, "loss": 0.0512, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 0.19402924180030823, "learning_rate": 8.053069727892813e-06, "loss": 0.0768, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 0.10542802512645721, "learning_rate": 8.051202984839157e-06, "loss": 0.0576, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 0.10379576683044434, "learning_rate": 8.049335563877858e-06, "loss": 0.0459, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 0.16322369873523712, "learning_rate": 8.047467465423813e-06, "loss": 0.0638, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 0.22117061913013458, "learning_rate": 8.045598689892072e-06, "loss": 0.0626, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 0.1165585070848465, "learning_rate": 8.043729237697835e-06, "loss": 0.0651, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.11911625415086746, "learning_rate": 8.041859109256452e-06, "loss": 0.044, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 0.19637556374073029, "learning_rate": 8.03998830498342e-06, "loss": 0.0644, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 0.44576066732406616, "learning_rate": 8.038116825294393e-06, "loss": 0.0909, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 0.07670523226261139, "learning_rate": 8.036244670605166e-06, "loss": 0.0664, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 0.16171570122241974, "learning_rate": 8.034371841331693e-06, "loss": 0.064, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 0.1852671205997467, "learning_rate": 8.032498337890073e-06, "loss": 0.0673, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 0.23483552038669586, "learning_rate": 8.030624160696554e-06, "loss": 0.0554, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 0.17221838235855103, "learning_rate": 8.02874931016754e-06, "loss": 0.0604, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 0.18501004576683044, "learning_rate": 8.026873786719574e-06, "loss": 0.0597, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 0.1294027864933014, "learning_rate": 8.024997590769359e-06, "loss": 0.0522, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 0.14816318452358246, "learning_rate": 8.02312072273374e-06, "loss": 0.0612, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.07400203496217728, "learning_rate": 8.021243183029715e-06, "loss": 0.049, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 0.10568646341562271, "learning_rate": 8.019364972074432e-06, "loss": 0.06, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.10304877161979675, "learning_rate": 8.017486090285185e-06, "loss": 0.0572, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.12742991745471954, "learning_rate": 8.01560653807942e-06, "loss": 0.0598, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 0.1111554279923439, "learning_rate": 8.013726315874729e-06, "loss": 0.065, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.07953966408967972, "learning_rate": 8.011845424088856e-06, "loss": 0.0506, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.10336009413003922, "learning_rate": 8.009963863139689e-06, "loss": 0.0544, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 0.09540779888629913, "learning_rate": 8.008081633445272e-06, "loss": 0.0671, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.2605428993701935, "learning_rate": 8.00619873542379e-06, "loss": 0.0638, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 0.12949782609939575, "learning_rate": 8.004315169493586e-06, "loss": 0.0604, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.09767904132604599, "learning_rate": 8.002430936073137e-06, "loss": 0.0581, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 0.14500366151332855, "learning_rate": 8.000546035581083e-06, "loss": 0.0676, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.1215381845831871, "learning_rate": 7.998660468436202e-06, "loss": 0.0609, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 0.15894336998462677, "learning_rate": 7.996774235057425e-06, "loss": 0.057, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.0700206309556961, "learning_rate": 7.994887335863832e-06, "loss": 0.0621, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.11334362626075745, "learning_rate": 7.992999771274646e-06, "loss": 0.0561, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.0965767428278923, "learning_rate": 7.991111541709244e-06, "loss": 0.0586, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 0.08798698335886002, "learning_rate": 7.989222647587146e-06, "loss": 0.0586, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 0.08846911042928696, "learning_rate": 7.987333089328018e-06, "loss": 0.0566, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.10189858078956604, "learning_rate": 7.985442867351682e-06, "loss": 0.0472, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 0.14416027069091797, "learning_rate": 7.983551982078097e-06, "loss": 0.0653, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 0.07568563520908356, "learning_rate": 7.98166043392738e-06, "loss": 0.0671, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 0.0858255997300148, "learning_rate": 7.979768223319786e-06, "loss": 0.0619, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.08973351120948792, "learning_rate": 7.977875350675721e-06, "loss": 0.0625, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 0.0795707032084465, "learning_rate": 7.975981816415741e-06, "loss": 0.0617, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 0.058353573083877563, "learning_rate": 7.974087620960543e-06, "loss": 0.0545, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 0.11309468001127243, "learning_rate": 7.972192764730975e-06, "loss": 0.0588, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 0.06588121503591537, "learning_rate": 7.970297248148033e-06, "loss": 0.063, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.07571166753768921, "learning_rate": 7.968401071632854e-06, "loss": 0.0581, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 0.09647103399038315, "learning_rate": 7.966504235606726e-06, "loss": 0.0688, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 0.16693340241909027, "learning_rate": 7.964606740491085e-06, "loss": 0.0669, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 0.07317418605089188, "learning_rate": 7.962708586707508e-06, "loss": 0.0635, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 0.0958135575056076, "learning_rate": 7.960809774677722e-06, "loss": 0.0627, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.06765443086624146, "learning_rate": 7.958910304823603e-06, "loss": 0.0556, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.07561981678009033, "learning_rate": 7.957010177567167e-06, "loss": 0.0507, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 0.059713415801525116, "learning_rate": 7.955109393330577e-06, "loss": 0.0488, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 0.1252477914094925, "learning_rate": 7.953207952536147e-06, "loss": 0.0649, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 0.06876673549413681, "learning_rate": 7.951305855606333e-06, "loss": 0.0525, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 0.07338748872280121, "learning_rate": 7.949403102963738e-06, "loss": 0.0554, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 0.07287417352199554, "learning_rate": 7.947499695031108e-06, "loss": 0.0581, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.0944846123456955, "learning_rate": 7.94559563223134e-06, "loss": 0.062, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 0.09137991070747375, "learning_rate": 7.943690914987472e-06, "loss": 0.0493, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 0.06462689489126205, "learning_rate": 7.941785543722686e-06, "loss": 0.0643, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 0.07694479078054428, "learning_rate": 7.939879518860316e-06, "loss": 0.0635, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 0.06789828836917877, "learning_rate": 7.937972840823836e-06, "loss": 0.0557, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 0.13896119594573975, "learning_rate": 7.936065510036863e-06, "loss": 0.0641, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 0.21414408087730408, "learning_rate": 7.934157526923167e-06, "loss": 0.0562, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 0.06543333828449249, "learning_rate": 7.932248891906657e-06, "loss": 0.0515, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 0.06999102979898453, "learning_rate": 7.930339605411387e-06, "loss": 0.0506, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 0.09160561859607697, "learning_rate": 7.92842966786156e-06, "loss": 0.0591, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 0.045092832297086716, "learning_rate": 7.926519079681514e-06, "loss": 0.0485, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 0.08866021037101746, "learning_rate": 7.924607841295744e-06, "loss": 0.0605, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 0.08626998215913773, "learning_rate": 7.92269595312888e-06, "loss": 0.0578, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 0.07087317854166031, "learning_rate": 7.920783415605703e-06, "loss": 0.0559, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 0.15547366440296173, "learning_rate": 7.918870229151134e-06, "loss": 0.0655, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 0.05981743708252907, "learning_rate": 7.916956394190238e-06, "loss": 0.0574, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.07392057776451111, "learning_rate": 7.915041911148229e-06, "loss": 0.045, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 0.11345337331295013, "learning_rate": 7.913126780450455e-06, "loss": 0.0513, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 0.07811589539051056, "learning_rate": 7.911211002522422e-06, "loss": 0.0486, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.09779399633407593, "learning_rate": 7.909294577789765e-06, "loss": 0.0484, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 0.0724678635597229, "learning_rate": 7.907377506678274e-06, "loss": 0.0575, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 0.1062382310628891, "learning_rate": 7.905459789613878e-06, "loss": 0.0547, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 0.09570898115634918, "learning_rate": 7.90354142702265e-06, "loss": 0.0602, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 0.07921335101127625, "learning_rate": 7.901622419330805e-06, "loss": 0.0599, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 0.09618836641311646, "learning_rate": 7.899702766964705e-06, "loss": 0.048, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 0.10177461057901382, "learning_rate": 7.89778247035085e-06, "loss": 0.0603, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 0.07192817330360413, "learning_rate": 7.895861529915889e-06, "loss": 0.0585, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 0.1290222853422165, "learning_rate": 7.893939946086609e-06, "loss": 0.0561, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 0.07160850614309311, "learning_rate": 7.892017719289941e-06, "loss": 0.053, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 0.09807463735342026, "learning_rate": 7.890094849952964e-06, "loss": 0.0644, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 0.06802725046873093, "learning_rate": 7.888171338502893e-06, "loss": 0.0518, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 0.07088422775268555, "learning_rate": 7.886247185367088e-06, "loss": 0.0667, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.06389138102531433, "learning_rate": 7.884322390973053e-06, "loss": 0.0599, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 0.10061181336641312, "learning_rate": 7.882396955748432e-06, "loss": 0.0633, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 0.08740940690040588, "learning_rate": 7.880470880121015e-06, "loss": 0.0619, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 0.14025476574897766, "learning_rate": 7.878544164518731e-06, "loss": 0.0568, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 0.06330757588148117, "learning_rate": 7.87661680936965e-06, "loss": 0.0554, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 0.15942424535751343, "learning_rate": 7.87468881510199e-06, "loss": 0.0712, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 0.14120303094387054, "learning_rate": 7.872760182144104e-06, "loss": 0.0692, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.056883346289396286, "learning_rate": 7.870830910924491e-06, "loss": 0.0478, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 0.06659538298845291, "learning_rate": 7.868901001871797e-06, "loss": 0.0633, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 0.1240743100643158, "learning_rate": 7.866970455414793e-06, "loss": 0.0619, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.06547722220420837, "learning_rate": 7.86503927198241e-06, "loss": 0.054, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 0.09129791706800461, "learning_rate": 7.863107452003711e-06, "loss": 0.0507, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 0.07088819146156311, "learning_rate": 7.861174995907901e-06, "loss": 0.0523, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 0.07303763180971146, "learning_rate": 7.85924190412433e-06, "loss": 0.0504, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 0.15694142878055573, "learning_rate": 7.857308177082484e-06, "loss": 0.0757, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 0.07101074606180191, "learning_rate": 7.855373815211995e-06, "loss": 0.054, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.09014160931110382, "learning_rate": 7.853438818942633e-06, "loss": 0.0645, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 0.06407833099365234, "learning_rate": 7.851503188704312e-06, "loss": 0.0564, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 0.0860549658536911, "learning_rate": 7.849566924927082e-06, "loss": 0.0622, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 0.1305493265390396, "learning_rate": 7.84763002804114e-06, "loss": 0.0575, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 0.14540015161037445, "learning_rate": 7.845692498476816e-06, "loss": 0.0558, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 0.12493133544921875, "learning_rate": 7.843754336664589e-06, "loss": 0.0541, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 0.07868760079145432, "learning_rate": 7.84181554303507e-06, "loss": 0.0614, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 0.06106138229370117, "learning_rate": 7.839876118019019e-06, "loss": 0.0476, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.061713725328445435, "learning_rate": 7.837936062047329e-06, "loss": 0.0537, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 0.12501981854438782, "learning_rate": 7.835995375551038e-06, "loss": 0.0568, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 0.11759748309850693, "learning_rate": 7.83405405896132e-06, "loss": 0.0578, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.17379729449748993, "learning_rate": 7.832112112709496e-06, "loss": 0.0726, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.05523383989930153, "learning_rate": 7.830169537227015e-06, "loss": 0.049, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 0.1499798446893692, "learning_rate": 7.828226332945479e-06, "loss": 0.0558, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 0.19223394989967346, "learning_rate": 7.82628250029662e-06, "loss": 0.0648, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 0.08553807437419891, "learning_rate": 7.824338039712316e-06, "loss": 0.0605, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 0.09974987059831619, "learning_rate": 7.82239295162458e-06, "loss": 0.0451, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 0.10399340838193893, "learning_rate": 7.820447236465565e-06, "loss": 0.0642, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 0.10966131836175919, "learning_rate": 7.818500894667566e-06, "loss": 0.0701, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 0.09616638720035553, "learning_rate": 7.816553926663018e-06, "loss": 0.0671, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 0.10935399681329727, "learning_rate": 7.81460633288449e-06, "loss": 0.0518, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 0.23751670122146606, "learning_rate": 7.812658113764691e-06, "loss": 0.0617, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 0.10603509098291397, "learning_rate": 7.810709269736476e-06, "loss": 0.0526, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 0.06978169828653336, "learning_rate": 7.808759801232829e-06, "loss": 0.0417, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.110854372382164, "learning_rate": 7.80680970868688e-06, "loss": 0.0612, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 0.15933725237846375, "learning_rate": 7.804858992531893e-06, "loss": 0.0587, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.07528204470872879, "learning_rate": 7.802907653201275e-06, "loss": 0.0553, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 0.0986090674996376, "learning_rate": 7.800955691128568e-06, "loss": 0.0574, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 0.10515158623456955, "learning_rate": 7.799003106747453e-06, "loss": 0.0664, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 0.07588968425989151, "learning_rate": 7.79704990049175e-06, "loss": 0.061, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 0.07685665041208267, "learning_rate": 7.795096072795418e-06, "loss": 0.0497, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.09307359158992767, "learning_rate": 7.793141624092551e-06, "loss": 0.0601, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 0.12352464348077774, "learning_rate": 7.791186554817383e-06, "loss": 0.0469, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.08027436584234238, "learning_rate": 7.789230865404287e-06, "loss": 0.055, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 0.14121924340724945, "learning_rate": 7.787274556287771e-06, "loss": 0.0639, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 0.07956371456384659, "learning_rate": 7.785317627902484e-06, "loss": 0.0494, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.06096837669610977, "learning_rate": 7.783360080683212e-06, "loss": 0.0521, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 0.0800992101430893, "learning_rate": 7.781401915064873e-06, "loss": 0.0601, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 0.06788543611764908, "learning_rate": 7.779443131482529e-06, "loss": 0.0501, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 0.15257303416728973, "learning_rate": 7.777483730371375e-06, "loss": 0.0671, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 0.10094641894102097, "learning_rate": 7.77552371216675e-06, "loss": 0.0461, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 0.10034562647342682, "learning_rate": 7.773563077304123e-06, "loss": 0.0557, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.05410931259393692, "learning_rate": 7.7716018262191e-06, "loss": 0.0461, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 0.09139810502529144, "learning_rate": 7.769639959347428e-06, "loss": 0.0581, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 0.10230701416730881, "learning_rate": 7.767677477124988e-06, "loss": 0.048, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 0.12864337861537933, "learning_rate": 7.765714379987804e-06, "loss": 0.058, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 0.09878251701593399, "learning_rate": 7.763750668372023e-06, "loss": 0.0657, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.1086375042796135, "learning_rate": 7.761786342713941e-06, "loss": 0.0569, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 0.09609600901603699, "learning_rate": 7.75982140344999e-06, "loss": 0.0601, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 0.08142641931772232, "learning_rate": 7.757855851016727e-06, "loss": 0.0508, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 0.09419004619121552, "learning_rate": 7.755889685850858e-06, "loss": 0.0442, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 0.10958628356456757, "learning_rate": 7.75392290838922e-06, "loss": 0.0665, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 0.10143431276082993, "learning_rate": 7.751955519068783e-06, "loss": 0.0522, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 0.06858308613300323, "learning_rate": 7.74998751832666e-06, "loss": 0.0561, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 0.08638472855091095, "learning_rate": 7.748018906600092e-06, "loss": 0.05, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 0.0962972566485405, "learning_rate": 7.746049684326462e-06, "loss": 0.0604, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 0.0754188671708107, "learning_rate": 7.744079851943286e-06, "loss": 0.05, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 0.075643390417099, "learning_rate": 7.742109409888213e-06, "loss": 0.0571, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.07173145562410355, "learning_rate": 7.740138358599035e-06, "loss": 0.05, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 0.09355877339839935, "learning_rate": 7.73816669851367e-06, "loss": 0.0569, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 0.10098036378622055, "learning_rate": 7.73619443007018e-06, "loss": 0.0589, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 0.08217095583677292, "learning_rate": 7.734221553706756e-06, "loss": 0.0566, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 0.10425549745559692, "learning_rate": 7.732248069861726e-06, "loss": 0.0537, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 0.15019117295742035, "learning_rate": 7.730273978973552e-06, "loss": 0.053, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 0.07021111249923706, "learning_rate": 7.728299281480833e-06, "loss": 0.056, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 0.0813564583659172, "learning_rate": 7.726323977822304e-06, "loss": 0.0578, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.1024462878704071, "learning_rate": 7.72434806843683e-06, "loss": 0.0565, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 0.061579104512929916, "learning_rate": 7.72237155376341e-06, "loss": 0.0504, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 0.07926111668348312, "learning_rate": 7.720394434241185e-06, "loss": 0.0613, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 0.09924912452697754, "learning_rate": 7.718416710309425e-06, "loss": 0.0559, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 0.1063460260629654, "learning_rate": 7.716438382407534e-06, "loss": 0.0614, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 0.13597221672534943, "learning_rate": 7.714459450975052e-06, "loss": 0.0691, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 0.120763398706913, "learning_rate": 7.712479916451651e-06, "loss": 0.0622, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 0.07413850724697113, "learning_rate": 7.710499779277141e-06, "loss": 0.0537, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.10784955322742462, "learning_rate": 7.708519039891462e-06, "loss": 0.0585, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 0.10047662258148193, "learning_rate": 7.70653769873469e-06, "loss": 0.0536, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 0.19210632145404816, "learning_rate": 7.70455575624703e-06, "loss": 0.0811, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 0.07793720066547394, "learning_rate": 7.702573212868827e-06, "loss": 0.0563, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 0.08761702477931976, "learning_rate": 7.70059006904056e-06, "loss": 0.0518, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 0.07638629525899887, "learning_rate": 7.698606325202832e-06, "loss": 0.0497, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 0.06353484094142914, "learning_rate": 7.69662198179639e-06, "loss": 0.0599, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 0.08124653249979019, "learning_rate": 7.694637039262109e-06, "loss": 0.0628, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.11983674019575119, "learning_rate": 7.692651498040996e-06, "loss": 0.0562, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.0896652489900589, "learning_rate": 7.690665358574197e-06, "loss": 0.0551, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.06928026676177979, "learning_rate": 7.688678621302981e-06, "loss": 0.0487, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 0.05857439339160919, "learning_rate": 7.686691286668761e-06, "loss": 0.0597, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 0.06668413430452347, "learning_rate": 7.684703355113074e-06, "loss": 0.0448, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 0.059205640107393265, "learning_rate": 7.682714827077595e-06, "loss": 0.0512, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 0.06375496834516525, "learning_rate": 7.68072570300413e-06, "loss": 0.0493, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 0.08572086691856384, "learning_rate": 7.678735983334615e-06, "loss": 0.0511, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.09988102316856384, "learning_rate": 7.676745668511121e-06, "loss": 0.0635, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.06739804148674011, "learning_rate": 7.67475475897585e-06, "loss": 0.0592, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 0.12213905155658722, "learning_rate": 7.672763255171138e-06, "loss": 0.0545, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 0.06416672468185425, "learning_rate": 7.67077115753945e-06, "loss": 0.0505, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 0.08865586668252945, "learning_rate": 7.668778466523386e-06, "loss": 0.0654, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.07605335861444473, "learning_rate": 7.666785182565676e-06, "loss": 0.0504, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 0.15699328482151031, "learning_rate": 7.664791306109183e-06, "loss": 0.0685, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 0.12709051370620728, "learning_rate": 7.6627968375969e-06, "loss": 0.0662, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 0.09919607639312744, "learning_rate": 7.660801777471951e-06, "loss": 0.0701, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.10762123018503189, "learning_rate": 7.658806126177596e-06, "loss": 0.0669, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 0.0735398456454277, "learning_rate": 7.65680988415722e-06, "loss": 0.0585, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 0.07897898554801941, "learning_rate": 7.654813051854345e-06, "loss": 0.0511, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 0.12187356501817703, "learning_rate": 7.652815629712616e-06, "loss": 0.0577, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 0.09855581074953079, "learning_rate": 7.650817618175824e-06, "loss": 0.0531, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 0.06484176963567734, "learning_rate": 7.648819017687875e-06, "loss": 0.06, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 0.08715000003576279, "learning_rate": 7.646819828692813e-06, "loss": 0.0542, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.08342069387435913, "learning_rate": 7.644820051634813e-06, "loss": 0.0598, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 0.10961803048849106, "learning_rate": 7.64281968695818e-06, "loss": 0.0601, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.08728913962841034, "learning_rate": 7.640818735107351e-06, "loss": 0.0543, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 0.12582024931907654, "learning_rate": 7.638817196526887e-06, "loss": 0.0599, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 0.057439129799604416, "learning_rate": 7.636815071661488e-06, "loss": 0.0561, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 0.07265079021453857, "learning_rate": 7.634812360955982e-06, "loss": 0.0637, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 0.09397273510694504, "learning_rate": 7.63280906485532e-06, "loss": 0.0639, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.08034280687570572, "learning_rate": 7.630805183804593e-06, "loss": 0.0527, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 0.07248464971780777, "learning_rate": 7.628800718249017e-06, "loss": 0.0472, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.07138349860906601, "learning_rate": 7.626795668633938e-06, "loss": 0.0488, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 0.12641344964504242, "learning_rate": 7.624790035404831e-06, "loss": 0.0583, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.09708906710147858, "learning_rate": 7.622783819007305e-06, "loss": 0.0516, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 0.06814908981323242, "learning_rate": 7.620777019887091e-06, "loss": 0.067, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 0.06822478026151657, "learning_rate": 7.6187696384900585e-06, "loss": 0.0574, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 0.0784217044711113, "learning_rate": 7.616761675262199e-06, "loss": 0.0655, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 0.05810046195983887, "learning_rate": 7.614753130649638e-06, "loss": 0.0644, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 0.07352738827466965, "learning_rate": 7.612744005098625e-06, "loss": 0.053, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.08905082941055298, "learning_rate": 7.6107342990555466e-06, "loss": 0.0594, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 0.08231975883245468, "learning_rate": 7.60872401296691e-06, "loss": 0.0521, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 0.09088157117366791, "learning_rate": 7.606713147279356e-06, "loss": 0.05, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 0.17041145265102386, "learning_rate": 7.604701702439652e-06, "loss": 0.0708, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 0.053382646292448044, "learning_rate": 7.602689678894697e-06, "loss": 0.0471, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.12172995507717133, "learning_rate": 7.6006770770915165e-06, "loss": 0.0547, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.10699079185724258, "learning_rate": 7.598663897477263e-06, "loss": 0.0525, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 0.10263951867818832, "learning_rate": 7.59665014049922e-06, "loss": 0.06, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 0.06829964369535446, "learning_rate": 7.594635806604797e-06, "loss": 0.0562, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 0.06904228776693344, "learning_rate": 7.592620896241536e-06, "loss": 0.0585, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 0.12225880473852158, "learning_rate": 7.590605409857103e-06, "loss": 0.0595, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.09444096684455872, "learning_rate": 7.58858934789929e-06, "loss": 0.0574, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 0.09983532130718231, "learning_rate": 7.586572710816025e-06, "loss": 0.0682, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 0.11220551282167435, "learning_rate": 7.584555499055355e-06, "loss": 0.0596, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 0.10212071985006332, "learning_rate": 7.58253771306546e-06, "loss": 0.0592, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 0.1312166005373001, "learning_rate": 7.5805193532946445e-06, "loss": 0.0534, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 0.13995113968849182, "learning_rate": 7.578500420191344e-06, "loss": 0.0547, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.05224383994936943, "learning_rate": 7.576480914204118e-06, "loss": 0.0592, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 0.0585506334900856, "learning_rate": 7.574460835781654e-06, "loss": 0.0517, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 0.14536647498607635, "learning_rate": 7.572440185372769e-06, "loss": 0.0724, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.14446552097797394, "learning_rate": 7.570418963426405e-06, "loss": 0.059, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 0.11392788589000702, "learning_rate": 7.568397170391631e-06, "loss": 0.0641, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 0.09063342958688736, "learning_rate": 7.566374806717642e-06, "loss": 0.0586, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 0.13560383021831512, "learning_rate": 7.564351872853763e-06, "loss": 0.0562, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.08720708638429642, "learning_rate": 7.562328369249443e-06, "loss": 0.057, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 0.07066087424755096, "learning_rate": 7.560304296354259e-06, "loss": 0.0547, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 0.16262200474739075, "learning_rate": 7.5582796546179125e-06, "loss": 0.0596, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 0.06893585622310638, "learning_rate": 7.556254444490232e-06, "loss": 0.0563, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 0.08660320192575455, "learning_rate": 7.554228666421176e-06, "loss": 0.0509, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.07529589533805847, "learning_rate": 7.552202320860823e-06, "loss": 0.0558, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 0.16626983880996704, "learning_rate": 7.550175408259383e-06, "loss": 0.0642, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 0.1798056811094284, "learning_rate": 7.548147929067189e-06, "loss": 0.058, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 0.12200801074504852, "learning_rate": 7.546119883734699e-06, "loss": 0.0626, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 0.09026719629764557, "learning_rate": 7.544091272712501e-06, "loss": 0.0589, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 0.08042921125888824, "learning_rate": 7.542062096451306e-06, "loss": 0.067, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 0.07545115798711777, "learning_rate": 7.540032355401948e-06, "loss": 0.0482, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 0.13100972771644592, "learning_rate": 7.53800205001539e-06, "loss": 0.0504, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 0.09556630998849869, "learning_rate": 7.53597118074272e-06, "loss": 0.051, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 0.09199392050504684, "learning_rate": 7.5339397480351525e-06, "loss": 0.0586, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 0.0763036236166954, "learning_rate": 7.531907752344023e-06, "loss": 0.059, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 0.11300285905599594, "learning_rate": 7.529875194120795e-06, "loss": 0.0675, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 0.12922517955303192, "learning_rate": 7.527842073817056e-06, "loss": 0.058, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.0815335139632225, "learning_rate": 7.525808391884521e-06, "loss": 0.0463, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 0.12381331622600555, "learning_rate": 7.523774148775027e-06, "loss": 0.0549, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 0.0894315093755722, "learning_rate": 7.521739344940535e-06, "loss": 0.0552, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 0.13441312313079834, "learning_rate": 7.519703980833133e-06, "loss": 0.062, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 0.05328723415732384, "learning_rate": 7.517668056905033e-06, "loss": 0.0542, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 0.07080575078725815, "learning_rate": 7.515631573608568e-06, "loss": 0.0573, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.12312419712543488, "learning_rate": 7.513594531396202e-06, "loss": 0.0581, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 0.08509278297424316, "learning_rate": 7.511556930720517e-06, "loss": 0.0491, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 0.06703925877809525, "learning_rate": 7.5095187720342224e-06, "loss": 0.0633, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 0.05239326134324074, "learning_rate": 7.50748005579015e-06, "loss": 0.0521, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.06824148446321487, "learning_rate": 7.505440782441256e-06, "loss": 0.0587, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 0.058327652513980865, "learning_rate": 7.503400952440618e-06, "loss": 0.0505, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 0.07420212775468826, "learning_rate": 7.501360566241444e-06, "loss": 0.0509, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.08400644361972809, "learning_rate": 7.499319624297059e-06, "loss": 0.0597, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 0.09336897730827332, "learning_rate": 7.497278127060914e-06, "loss": 0.053, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 0.14244525134563446, "learning_rate": 7.4952360749865825e-06, "loss": 0.0644, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 0.21795009076595306, "learning_rate": 7.493193468527764e-06, "loss": 0.0559, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 0.17426034808158875, "learning_rate": 7.491150308138275e-06, "loss": 0.068, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 0.08370193094015121, "learning_rate": 7.489106594272063e-06, "loss": 0.0592, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 0.13647030293941498, "learning_rate": 7.487062327383192e-06, "loss": 0.0644, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 0.15011228621006012, "learning_rate": 7.485017507925853e-06, "loss": 0.0624, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 0.13627803325653076, "learning_rate": 7.482972136354359e-06, "loss": 0.0516, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.11514364182949066, "learning_rate": 7.480926213123142e-06, "loss": 0.0529, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.07106674462556839, "learning_rate": 7.4788797386867596e-06, "loss": 0.0435, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 0.10973572731018066, "learning_rate": 7.476832713499896e-06, "loss": 0.0604, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 0.10691248625516891, "learning_rate": 7.474785138017349e-06, "loss": 0.0596, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 0.2309638261795044, "learning_rate": 7.472737012694045e-06, "loss": 0.0728, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 0.09582538157701492, "learning_rate": 7.470688337985029e-06, "loss": 0.0563, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.06506337970495224, "learning_rate": 7.468639114345473e-06, "loss": 0.0561, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 0.09930125623941422, "learning_rate": 7.466589342230664e-06, "loss": 0.0676, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.08098753541707993, "learning_rate": 7.464539022096018e-06, "loss": 0.0561, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 0.21690267324447632, "learning_rate": 7.462488154397067e-06, "loss": 0.0677, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 0.1357370764017105, "learning_rate": 7.460436739589467e-06, "loss": 0.0647, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.06995152682065964, "learning_rate": 7.458384778128997e-06, "loss": 0.054, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 0.11507825553417206, "learning_rate": 7.4563322704715556e-06, "loss": 0.0631, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.06356590986251831, "learning_rate": 7.45427921707316e-06, "loss": 0.0464, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 0.1354607790708542, "learning_rate": 7.452225618389959e-06, "loss": 0.0584, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.09187083691358566, "learning_rate": 7.450171474878207e-06, "loss": 0.0525, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 0.17032547295093536, "learning_rate": 7.4481167869942934e-06, "loss": 0.0781, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 0.05613027140498161, "learning_rate": 7.446061555194721e-06, "loss": 0.0458, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 0.0951920673251152, "learning_rate": 7.4440057799361155e-06, "loss": 0.0574, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 0.0783630833029747, "learning_rate": 7.441949461675223e-06, "loss": 0.058, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 0.07382829487323761, "learning_rate": 7.439892600868911e-06, "loss": 0.0584, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.07492915540933609, "learning_rate": 7.437835197974167e-06, "loss": 0.0575, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 0.14610262215137482, "learning_rate": 7.435777253448099e-06, "loss": 0.0731, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 0.13597333431243896, "learning_rate": 7.433718767747934e-06, "loss": 0.0636, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.0778408870100975, "learning_rate": 7.431659741331022e-06, "loss": 0.0554, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 0.07887953519821167, "learning_rate": 7.429600174654832e-06, "loss": 0.0586, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 0.08267669379711151, "learning_rate": 7.427540068176951e-06, "loss": 0.0518, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 0.08244861662387848, "learning_rate": 7.4254794223550885e-06, "loss": 0.0519, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 0.08819426596164703, "learning_rate": 7.423418237647073e-06, "loss": 0.0628, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.09071652591228485, "learning_rate": 7.421356514510853e-06, "loss": 0.0566, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.0625394880771637, "learning_rate": 7.419294253404497e-06, "loss": 0.0421, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 0.12970782816410065, "learning_rate": 7.417231454786189e-06, "loss": 0.0589, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 0.08983805030584335, "learning_rate": 7.41516811911424e-06, "loss": 0.0567, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 0.16461674869060516, "learning_rate": 7.4131042468470725e-06, "loss": 0.0607, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 0.1314351111650467, "learning_rate": 7.411039838443234e-06, "loss": 0.0669, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 0.054034724831581116, "learning_rate": 7.4089748943613895e-06, "loss": 0.0617, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 0.11598209291696548, "learning_rate": 7.406909415060321e-06, "loss": 0.0661, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 0.055543020367622375, "learning_rate": 7.404843400998931e-06, "loss": 0.0481, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 0.09085724502801895, "learning_rate": 7.4027768526362395e-06, "loss": 0.051, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 0.0704951211810112, "learning_rate": 7.4007097704313894e-06, "loss": 0.0629, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 0.14094457030296326, "learning_rate": 7.398642154843637e-06, "loss": 0.0757, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 0.07899443805217743, "learning_rate": 7.39657400633236e-06, "loss": 0.0591, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 0.054882097989320755, "learning_rate": 7.394505325357053e-06, "loss": 0.0534, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.10198832303285599, "learning_rate": 7.392436112377331e-06, "loss": 0.0497, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 0.08506826311349869, "learning_rate": 7.390366367852923e-06, "loss": 0.0605, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.0864626094698906, "learning_rate": 7.388296092243683e-06, "loss": 0.0621, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.08982832729816437, "learning_rate": 7.386225286009576e-06, "loss": 0.051, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.0632956475019455, "learning_rate": 7.384153949610689e-06, "loss": 0.0548, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.07463330775499344, "learning_rate": 7.382082083507226e-06, "loss": 0.0554, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 0.06821958720684052, "learning_rate": 7.380009688159507e-06, "loss": 0.051, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 0.10633592307567596, "learning_rate": 7.377936764027973e-06, "loss": 0.0523, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.0872223898768425, "learning_rate": 7.375863311573179e-06, "loss": 0.0532, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 0.10361529141664505, "learning_rate": 7.373789331255799e-06, "loss": 0.0558, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 0.15337345004081726, "learning_rate": 7.371714823536624e-06, "loss": 0.0686, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 0.08887475728988647, "learning_rate": 7.369639788876561e-06, "loss": 0.0662, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 0.19383278489112854, "learning_rate": 7.367564227736639e-06, "loss": 0.0634, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 0.15027165412902832, "learning_rate": 7.365488140577997e-06, "loss": 0.0542, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 0.0901922658085823, "learning_rate": 7.3634115278618955e-06, "loss": 0.0733, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 0.08899257332086563, "learning_rate": 7.36133439004971e-06, "loss": 0.0566, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 0.09819028526544571, "learning_rate": 7.3592567276029336e-06, "loss": 0.0532, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.09588440507650375, "learning_rate": 7.357178540983174e-06, "loss": 0.0506, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 0.1453322172164917, "learning_rate": 7.355099830652159e-06, "loss": 0.0569, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 0.08187595754861832, "learning_rate": 7.353020597071729e-06, "loss": 0.0609, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 0.10904838144779205, "learning_rate": 7.350940840703842e-06, "loss": 0.0612, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.07324374467134476, "learning_rate": 7.348860562010574e-06, "loss": 0.0569, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 0.07277946174144745, "learning_rate": 7.346779761454113e-06, "loss": 0.0583, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 0.13054852187633514, "learning_rate": 7.3446984394967705e-06, "loss": 0.0704, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 0.09972017258405685, "learning_rate": 7.342616596600961e-06, "loss": 0.0614, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 0.15212926268577576, "learning_rate": 7.3405342332292286e-06, "loss": 0.0683, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 0.061692189425230026, "learning_rate": 7.338451349844225e-06, "loss": 0.0514, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 0.07050781697034836, "learning_rate": 7.336367946908718e-06, "loss": 0.057, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 0.07188370078802109, "learning_rate": 7.334284024885595e-06, "loss": 0.0554, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 0.10229357331991196, "learning_rate": 7.332199584237854e-06, "loss": 0.0554, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 0.10058000683784485, "learning_rate": 7.330114625428609e-06, "loss": 0.052, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.06298250705003738, "learning_rate": 7.328029148921093e-06, "loss": 0.0526, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.09214940667152405, "learning_rate": 7.32594315517865e-06, "loss": 0.045, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 0.07608672231435776, "learning_rate": 7.32385664466474e-06, "loss": 0.0539, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.07963283360004425, "learning_rate": 7.321769617842937e-06, "loss": 0.0507, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 0.13389237225055695, "learning_rate": 7.319682075176932e-06, "loss": 0.0653, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 0.08664917200803757, "learning_rate": 7.317594017130529e-06, "loss": 0.05, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.058186497539281845, "learning_rate": 7.3155054441676485e-06, "loss": 0.054, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 0.057512316852808, "learning_rate": 7.313416356752321e-06, "loss": 0.0567, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 0.11560056358575821, "learning_rate": 7.311326755348697e-06, "loss": 0.0644, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 0.09315485507249832, "learning_rate": 7.309236640421033e-06, "loss": 0.0567, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 0.09406132251024246, "learning_rate": 7.30714601243371e-06, "loss": 0.0572, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 0.07155133783817291, "learning_rate": 7.305054871851217e-06, "loss": 0.0515, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 0.10946089774370193, "learning_rate": 7.302963219138156e-06, "loss": 0.0582, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 0.09112812578678131, "learning_rate": 7.3008710547592465e-06, "loss": 0.0564, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 0.08327817171812057, "learning_rate": 7.298778379179317e-06, "loss": 0.0636, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 0.06902216374874115, "learning_rate": 7.296685192863313e-06, "loss": 0.0539, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 0.11997321248054504, "learning_rate": 7.2945914962762954e-06, "loss": 0.066, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 0.09431637823581696, "learning_rate": 7.292497289883432e-06, "loss": 0.0691, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 0.10210125148296356, "learning_rate": 7.29040257415001e-06, "loss": 0.0572, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 0.06110319867730141, "learning_rate": 7.288307349541427e-06, "loss": 0.0557, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.08174359798431396, "learning_rate": 7.286211616523193e-06, "loss": 0.0625, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 0.1237817332148552, "learning_rate": 7.284115375560934e-06, "loss": 0.0796, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.14542421698570251, "learning_rate": 7.282018627120386e-06, "loss": 0.0572, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.13320815563201904, "learning_rate": 7.279921371667397e-06, "loss": 0.0633, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 0.09702219069004059, "learning_rate": 7.2778236096679325e-06, "loss": 0.0666, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 0.12556202709674835, "learning_rate": 7.275725341588064e-06, "loss": 0.0601, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.06868747621774673, "learning_rate": 7.27362656789398e-06, "loss": 0.0539, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 0.06805485486984253, "learning_rate": 7.2715272890519815e-06, "loss": 0.0504, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 0.09473183006048203, "learning_rate": 7.2694275055284795e-06, "loss": 0.0624, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 0.0642184466123581, "learning_rate": 7.267327217789998e-06, "loss": 0.0627, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 0.06881683319807053, "learning_rate": 7.26522642630317e-06, "loss": 0.0671, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.07999224960803986, "learning_rate": 7.263125131534749e-06, "loss": 0.0656, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.06515117734670639, "learning_rate": 7.26102333395159e-06, "loss": 0.051, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 0.0791514441370964, "learning_rate": 7.2589210340206675e-06, "loss": 0.0503, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 0.16887758672237396, "learning_rate": 7.256818232209062e-06, "loss": 0.0778, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 0.08025657385587692, "learning_rate": 7.25471492898397e-06, "loss": 0.0509, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 0.11278219521045685, "learning_rate": 7.2526111248126976e-06, "loss": 0.0646, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 0.0715537741780281, "learning_rate": 7.250506820162661e-06, "loss": 0.0623, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 0.0901024118065834, "learning_rate": 7.248402015501388e-06, "loss": 0.0607, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.06518837064504623, "learning_rate": 7.246296711296519e-06, "loss": 0.0538, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 0.07582315802574158, "learning_rate": 7.244190908015805e-06, "loss": 0.0557, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 0.08162117749452591, "learning_rate": 7.2420846061271065e-06, "loss": 0.0661, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 0.0917133316397667, "learning_rate": 7.239977806098398e-06, "loss": 0.0574, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 0.09635115414857864, "learning_rate": 7.237870508397757e-06, "loss": 0.0542, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 0.06625650078058243, "learning_rate": 7.235762713493384e-06, "loss": 0.0613, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.12036395817995071, "learning_rate": 7.2336544218535776e-06, "loss": 0.0548, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 0.0990767851471901, "learning_rate": 7.231545633946755e-06, "loss": 0.0623, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 0.07731076329946518, "learning_rate": 7.229436350241439e-06, "loss": 0.0551, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.07193966209888458, "learning_rate": 7.2273265712062646e-06, "loss": 0.055, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.06437227129936218, "learning_rate": 7.225216297309977e-06, "loss": 0.0577, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 0.08663952350616455, "learning_rate": 7.22310552902143e-06, "loss": 0.0553, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.06433497369289398, "learning_rate": 7.220994266809591e-06, "loss": 0.0506, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.1040053442120552, "learning_rate": 7.21888251114353e-06, "loss": 0.0536, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 0.10156742483377457, "learning_rate": 7.2167702624924345e-06, "loss": 0.0578, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 0.19887082278728485, "learning_rate": 7.2146575213255945e-06, "loss": 0.0627, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.060479048639535904, "learning_rate": 7.212544288112415e-06, "loss": 0.0527, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.09359202533960342, "learning_rate": 7.21043056332241e-06, "loss": 0.055, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 0.1950647085905075, "learning_rate": 7.208316347425197e-06, "loss": 0.061, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 0.11875019967556, "learning_rate": 7.206201640890509e-06, "loss": 0.0643, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 0.06105506420135498, "learning_rate": 7.204086444188184e-06, "loss": 0.0504, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 0.06989710032939911, "learning_rate": 7.201970757788172e-06, "loss": 0.044, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 0.08091244846582413, "learning_rate": 7.199854582160529e-06, "loss": 0.0518, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 0.15671247243881226, "learning_rate": 7.197737917775422e-06, "loss": 0.0655, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 0.17779508233070374, "learning_rate": 7.1956207651031254e-06, "loss": 0.0661, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 0.12587058544158936, "learning_rate": 7.193503124614021e-06, "loss": 0.0588, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 0.10819704830646515, "learning_rate": 7.191384996778601e-06, "loss": 0.0592, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 0.1385854035615921, "learning_rate": 7.189266382067464e-06, "loss": 0.0653, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.1264498084783554, "learning_rate": 7.1871472809513185e-06, "loss": 0.0511, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 0.16911643743515015, "learning_rate": 7.185027693900982e-06, "loss": 0.0738, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 0.1481788456439972, "learning_rate": 7.182907621387376e-06, "loss": 0.0581, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.0914827436208725, "learning_rate": 7.180787063881534e-06, "loss": 0.0522, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 0.06969261914491653, "learning_rate": 7.178666021854593e-06, "loss": 0.0516, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 0.06805802881717682, "learning_rate": 7.176544495777804e-06, "loss": 0.0456, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 0.09709832072257996, "learning_rate": 7.174422486122517e-06, "loss": 0.0545, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.1954614669084549, "learning_rate": 7.1722999933602e-06, "loss": 0.0609, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 0.16820897161960602, "learning_rate": 7.170177017962415e-06, "loss": 0.0719, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 0.0578019879758358, "learning_rate": 7.168053560400845e-06, "loss": 0.0558, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 0.08451075106859207, "learning_rate": 7.16592962114727e-06, "loss": 0.067, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 0.06147473677992821, "learning_rate": 7.163805200673584e-06, "loss": 0.058, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 0.09017428755760193, "learning_rate": 7.161680299451782e-06, "loss": 0.0609, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 0.07302389293909073, "learning_rate": 7.159554917953968e-06, "loss": 0.0584, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 0.07956859469413757, "learning_rate": 7.157429056652357e-06, "loss": 0.0534, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 0.095341756939888, "learning_rate": 7.155302716019263e-06, "loss": 0.0651, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 0.09133975952863693, "learning_rate": 7.153175896527112e-06, "loss": 0.0495, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 0.05250406265258789, "learning_rate": 7.151048598648436e-06, "loss": 0.0457, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 0.14263901114463806, "learning_rate": 7.148920822855869e-06, "loss": 0.0734, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 0.10996381938457489, "learning_rate": 7.146792569622157e-06, "loss": 0.059, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 0.07712361961603165, "learning_rate": 7.144663839420147e-06, "loss": 0.0606, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 0.07414129376411438, "learning_rate": 7.142534632722797e-06, "loss": 0.0571, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 0.09773746132850647, "learning_rate": 7.140404950003164e-06, "loss": 0.059, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 0.07897235453128815, "learning_rate": 7.138274791734421e-06, "loss": 0.0547, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 0.08894190192222595, "learning_rate": 7.136144158389834e-06, "loss": 0.0475, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 0.10658182203769684, "learning_rate": 7.134013050442785e-06, "loss": 0.0576, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 0.18938586115837097, "learning_rate": 7.1318814683667555e-06, "loss": 0.0748, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.09779561311006546, "learning_rate": 7.129749412635337e-06, "loss": 0.0501, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.09538628906011581, "learning_rate": 7.1276168837222215e-06, "loss": 0.0473, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 0.08485840260982513, "learning_rate": 7.125483882101208e-06, "loss": 0.0514, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 0.21455559134483337, "learning_rate": 7.123350408246203e-06, "loss": 0.0611, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 0.09058542549610138, "learning_rate": 7.121216462631213e-06, "loss": 0.0535, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.07849879562854767, "learning_rate": 7.1190820457303535e-06, "loss": 0.0418, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 0.07735027372837067, "learning_rate": 7.116947158017842e-06, "loss": 0.0464, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 0.13072244822978973, "learning_rate": 7.114811799968005e-06, "loss": 0.0718, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 0.1601284146308899, "learning_rate": 7.1126759720552665e-06, "loss": 0.0571, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.059251684695482254, "learning_rate": 7.11053967475416e-06, "loss": 0.0476, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 0.0663936510682106, "learning_rate": 7.108402908539323e-06, "loss": 0.053, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.10683826357126236, "learning_rate": 7.106265673885494e-06, "loss": 0.0504, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.12036573141813278, "learning_rate": 7.104127971267521e-06, "loss": 0.0518, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 0.08263006061315536, "learning_rate": 7.10198980116035e-06, "loss": 0.056, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 0.13500182330608368, "learning_rate": 7.099851164039035e-06, "loss": 0.0577, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 0.09807919710874557, "learning_rate": 7.0977120603787296e-06, "loss": 0.0449, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 0.07617517560720444, "learning_rate": 7.095572490654698e-06, "loss": 0.0649, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 0.14119607210159302, "learning_rate": 7.0934324553423015e-06, "loss": 0.0633, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 0.07097364962100983, "learning_rate": 7.091291954917007e-06, "loss": 0.0529, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 0.13153976202011108, "learning_rate": 7.089150989854385e-06, "loss": 0.057, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.06164942681789398, "learning_rate": 7.0870095606301095e-06, "loss": 0.0487, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 0.09704865515232086, "learning_rate": 7.084867667719957e-06, "loss": 0.0521, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 0.11537488549947739, "learning_rate": 7.082725311599808e-06, "loss": 0.0547, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 0.118534155189991, "learning_rate": 7.080582492745642e-06, "loss": 0.0554, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 0.09579268097877502, "learning_rate": 7.0784392116335475e-06, "loss": 0.0585, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 0.09383444488048553, "learning_rate": 7.076295468739711e-06, "loss": 0.0511, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 0.07952997088432312, "learning_rate": 7.074151264540425e-06, "loss": 0.0572, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 0.08209698647260666, "learning_rate": 7.0720065995120815e-06, "loss": 0.0515, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 0.07904350757598877, "learning_rate": 7.069861474131176e-06, "loss": 0.0471, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.17717577517032623, "learning_rate": 7.067715888874307e-06, "loss": 0.0717, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 0.10761949419975281, "learning_rate": 7.065569844218175e-06, "loss": 0.0547, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 0.17551404237747192, "learning_rate": 7.0634233406395806e-06, "loss": 0.0553, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 0.07459433376789093, "learning_rate": 7.061276378615428e-06, "loss": 0.0575, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.15593355894088745, "learning_rate": 7.059128958622725e-06, "loss": 0.0723, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 0.13753746449947357, "learning_rate": 7.056981081138578e-06, "loss": 0.0569, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 0.08912981301546097, "learning_rate": 7.054832746640196e-06, "loss": 0.0648, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 0.0813378170132637, "learning_rate": 7.05268395560489e-06, "loss": 0.0576, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 0.07869594544172287, "learning_rate": 7.050534708510073e-06, "loss": 0.0509, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.07211904227733612, "learning_rate": 7.048385005833258e-06, "loss": 0.056, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 0.10056667029857635, "learning_rate": 7.04623484805206e-06, "loss": 0.0628, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 0.10075977444648743, "learning_rate": 7.044084235644196e-06, "loss": 0.0457, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.07636750489473343, "learning_rate": 7.041933169087482e-06, "loss": 0.0495, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 0.17187777161598206, "learning_rate": 7.039781648859836e-06, "loss": 0.0617, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 0.11809638887643814, "learning_rate": 7.037629675439276e-06, "loss": 0.0575, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 0.07581383734941483, "learning_rate": 7.035477249303923e-06, "loss": 0.0583, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 0.07824836671352386, "learning_rate": 7.033324370931993e-06, "loss": 0.0596, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.05474739149212837, "learning_rate": 7.031171040801813e-06, "loss": 0.0535, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 0.1388292759656906, "learning_rate": 7.029017259391797e-06, "loss": 0.06, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 0.11155546456575394, "learning_rate": 7.026863027180472e-06, "loss": 0.0603, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 0.11671453714370728, "learning_rate": 7.024708344646455e-06, "loss": 0.0571, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 0.07159892469644547, "learning_rate": 7.022553212268469e-06, "loss": 0.0533, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 0.04815491661429405, "learning_rate": 7.020397630525336e-06, "loss": 0.0532, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.10957951098680496, "learning_rate": 7.018241599895974e-06, "loss": 0.059, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.10943147540092468, "learning_rate": 7.016085120859406e-06, "loss": 0.062, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.09014751762151718, "learning_rate": 7.013928193894753e-06, "loss": 0.0452, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.0834885686635971, "learning_rate": 7.011770819481234e-06, "loss": 0.0603, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 0.08585279434919357, "learning_rate": 7.0096129980981674e-06, "loss": 0.0567, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 0.05737416818737984, "learning_rate": 7.0074547302249755e-06, "loss": 0.0511, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.07775627076625824, "learning_rate": 7.005296016341171e-06, "loss": 0.0492, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 0.08115414530038834, "learning_rate": 7.003136856926374e-06, "loss": 0.0482, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 0.0907064750790596, "learning_rate": 7.0009772524603e-06, "loss": 0.0618, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 0.13129863142967224, "learning_rate": 6.998817203422763e-06, "loss": 0.0616, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 0.12028679251670837, "learning_rate": 6.996656710293679e-06, "loss": 0.0561, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 0.068229541182518, "learning_rate": 6.994495773553056e-06, "loss": 0.049, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 0.07521704584360123, "learning_rate": 6.992334393681008e-06, "loss": 0.0633, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.054367545992136, "learning_rate": 6.990172571157744e-06, "loss": 0.0452, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 0.07001733779907227, "learning_rate": 6.988010306463571e-06, "loss": 0.0538, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 0.14699722826480865, "learning_rate": 6.985847600078894e-06, "loss": 0.0552, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 0.11331825703382492, "learning_rate": 6.98368445248422e-06, "loss": 0.0611, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.10459937155246735, "learning_rate": 6.981520864160147e-06, "loss": 0.0567, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 0.1312699168920517, "learning_rate": 6.979356835587377e-06, "loss": 0.0566, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 0.08409721404314041, "learning_rate": 6.977192367246709e-06, "loss": 0.0482, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.10965581983327866, "learning_rate": 6.9750274596190344e-06, "loss": 0.0592, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 0.09547901898622513, "learning_rate": 6.972862113185353e-06, "loss": 0.0536, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 0.10982763022184372, "learning_rate": 6.970696328426749e-06, "loss": 0.062, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 0.08038683980703354, "learning_rate": 6.968530105824413e-06, "loss": 0.0509, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.04834312945604324, "learning_rate": 6.966363445859629e-06, "loss": 0.0464, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.09814132004976273, "learning_rate": 6.96419634901378e-06, "loss": 0.0542, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 0.07823590189218521, "learning_rate": 6.962028815768347e-06, "loss": 0.0577, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 0.07173289358615875, "learning_rate": 6.959860846604903e-06, "loss": 0.0608, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 0.141898050904274, "learning_rate": 6.957692442005126e-06, "loss": 0.0586, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 0.07853236794471741, "learning_rate": 6.95552360245078e-06, "loss": 0.0559, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.05896672233939171, "learning_rate": 6.953354328423737e-06, "loss": 0.0532, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 0.12334073334932327, "learning_rate": 6.951184620405958e-06, "loss": 0.0638, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 0.08795122057199478, "learning_rate": 6.949014478879502e-06, "loss": 0.0486, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 0.17679718136787415, "learning_rate": 6.946843904326527e-06, "loss": 0.0649, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 0.09225644916296005, "learning_rate": 6.944672897229282e-06, "loss": 0.0593, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.13547204434871674, "learning_rate": 6.942501458070117e-06, "loss": 0.0616, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.1372753530740738, "learning_rate": 6.940329587331477e-06, "loss": 0.0587, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 0.1379670351743698, "learning_rate": 6.938157285495901e-06, "loss": 0.0485, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.10194910317659378, "learning_rate": 6.935984553046025e-06, "loss": 0.0564, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.056245386600494385, "learning_rate": 6.93381139046458e-06, "loss": 0.0483, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.10786651074886322, "learning_rate": 6.931637798234394e-06, "loss": 0.0624, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 0.11471886187791824, "learning_rate": 6.929463776838389e-06, "loss": 0.0665, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.08977705240249634, "learning_rate": 6.927289326759585e-06, "loss": 0.0531, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.12835247814655304, "learning_rate": 6.925114448481089e-06, "loss": 0.0542, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 0.2618628740310669, "learning_rate": 6.922939142486118e-06, "loss": 0.0762, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 0.0835617259144783, "learning_rate": 6.9207634092579686e-06, "loss": 0.0565, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 0.06683164089918137, "learning_rate": 6.9185872492800434e-06, "loss": 0.0537, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.07778338342905045, "learning_rate": 6.916410663035832e-06, "loss": 0.0537, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.06858164072036743, "learning_rate": 6.9142336510089235e-06, "loss": 0.0535, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 0.11500164866447449, "learning_rate": 6.912056213683001e-06, "loss": 0.0641, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.07355430722236633, "learning_rate": 6.909878351541841e-06, "loss": 0.051, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.09869277477264404, "learning_rate": 6.907700065069315e-06, "loss": 0.0512, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 0.09215593338012695, "learning_rate": 6.905521354749387e-06, "loss": 0.0582, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 0.11059533804655075, "learning_rate": 6.90334222106612e-06, "loss": 0.0535, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 0.12816864252090454, "learning_rate": 6.901162664503662e-06, "loss": 0.059, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.07946906983852386, "learning_rate": 6.898982685546267e-06, "loss": 0.0618, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 0.166132390499115, "learning_rate": 6.896802284678273e-06, "loss": 0.067, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 0.0972137525677681, "learning_rate": 6.894621462384116e-06, "loss": 0.0618, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 0.11386136710643768, "learning_rate": 6.8924402191483245e-06, "loss": 0.0645, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.08456778526306152, "learning_rate": 6.890258555455521e-06, "loss": 0.0628, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 0.10200271010398865, "learning_rate": 6.888076471790423e-06, "loss": 0.0598, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 0.0846422091126442, "learning_rate": 6.8858939686378376e-06, "loss": 0.0607, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 0.11954864859580994, "learning_rate": 6.8837110464826685e-06, "loss": 0.0592, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 0.08739769458770752, "learning_rate": 6.881527705809912e-06, "loss": 0.0601, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 0.09226857870817184, "learning_rate": 6.879343947104653e-06, "loss": 0.0547, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 0.053899869322776794, "learning_rate": 6.8771597708520766e-06, "loss": 0.0568, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 0.0880824625492096, "learning_rate": 6.874975177537455e-06, "loss": 0.0639, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 0.1268283873796463, "learning_rate": 6.872790167646155e-06, "loss": 0.0576, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 0.09511521458625793, "learning_rate": 6.870604741663638e-06, "loss": 0.0672, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 0.07320459932088852, "learning_rate": 6.868418900075452e-06, "loss": 0.0642, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 0.08099667727947235, "learning_rate": 6.866232643367243e-06, "loss": 0.0537, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.06871986389160156, "learning_rate": 6.864045972024749e-06, "loss": 0.0521, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 0.07185818254947662, "learning_rate": 6.861858886533796e-06, "loss": 0.0518, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 0.08470741659402847, "learning_rate": 6.859671387380307e-06, "loss": 0.055, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 0.06655906140804291, "learning_rate": 6.85748347505029e-06, "loss": 0.0603, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 0.10242514312267303, "learning_rate": 6.855295150029853e-06, "loss": 0.0642, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.070052869617939, "learning_rate": 6.853106412805192e-06, "loss": 0.0493, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 0.09232047200202942, "learning_rate": 6.850917263862591e-06, "loss": 0.0608, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 0.07037322223186493, "learning_rate": 6.848727703688432e-06, "loss": 0.0441, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 0.06893081963062286, "learning_rate": 6.846537732769185e-06, "loss": 0.0575, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 0.09121154248714447, "learning_rate": 6.8443473515914105e-06, "loss": 0.0574, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 0.08726773411035538, "learning_rate": 6.842156560641762e-06, "loss": 0.0569, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.09879203885793686, "learning_rate": 6.839965360406983e-06, "loss": 0.0455, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.08595768362283707, "learning_rate": 6.837773751373908e-06, "loss": 0.0548, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 0.11344341188669205, "learning_rate": 6.835581734029462e-06, "loss": 0.0657, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.07109066098928452, "learning_rate": 6.833389308860662e-06, "loss": 0.0458, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.06517943739891052, "learning_rate": 6.831196476354615e-06, "loss": 0.0562, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 0.09775274991989136, "learning_rate": 6.829003236998517e-06, "loss": 0.052, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.053729843348264694, "learning_rate": 6.8268095912796574e-06, "loss": 0.052, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.1109117716550827, "learning_rate": 6.824615539685413e-06, "loss": 0.0598, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 0.07003789395093918, "learning_rate": 6.822421082703253e-06, "loss": 0.0524, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 0.09301482141017914, "learning_rate": 6.820226220820733e-06, "loss": 0.0559, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 0.07757960259914398, "learning_rate": 6.818030954525505e-06, "loss": 0.0511, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 0.07305886596441269, "learning_rate": 6.815835284305304e-06, "loss": 0.0655, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 0.0665612742304802, "learning_rate": 6.8136392106479624e-06, "loss": 0.0568, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.05506373569369316, "learning_rate": 6.81144273404139e-06, "loss": 0.0582, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.09961587190628052, "learning_rate": 6.8092458549736e-06, "loss": 0.0805, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.09211671352386475, "learning_rate": 6.807048573932687e-06, "loss": 0.0584, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 0.14438463747501373, "learning_rate": 6.8048508914068355e-06, "loss": 0.0566, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 0.08693329244852066, "learning_rate": 6.802652807884322e-06, "loss": 0.054, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.05047117546200752, "learning_rate": 6.80045432385351e-06, "loss": 0.0532, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.10447914153337479, "learning_rate": 6.798255439802852e-06, "loss": 0.0565, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 0.13263794779777527, "learning_rate": 6.796056156220892e-06, "loss": 0.0596, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 0.062131185084581375, "learning_rate": 6.793856473596256e-06, "loss": 0.0541, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.05098222568631172, "learning_rate": 6.791656392417666e-06, "loss": 0.0489, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.06298184394836426, "learning_rate": 6.789455913173933e-06, "loss": 0.039, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 0.07364723831415176, "learning_rate": 6.787255036353947e-06, "loss": 0.0532, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 0.10926280915737152, "learning_rate": 6.785053762446696e-06, "loss": 0.064, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.0682474672794342, "learning_rate": 6.782852091941254e-06, "loss": 0.0598, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 0.10974869877099991, "learning_rate": 6.780650025326778e-06, "loss": 0.0562, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 0.11465629935264587, "learning_rate": 6.778447563092523e-06, "loss": 0.0659, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 0.14083904027938843, "learning_rate": 6.776244705727818e-06, "loss": 0.0655, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 0.10190916806459427, "learning_rate": 6.774041453722093e-06, "loss": 0.0701, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.09102560579776764, "learning_rate": 6.771837807564861e-06, "loss": 0.0554, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 0.10790526121854782, "learning_rate": 6.769633767745718e-06, "loss": 0.06, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 0.1519855260848999, "learning_rate": 6.767429334754354e-06, "loss": 0.063, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 0.18170496821403503, "learning_rate": 6.7652245090805426e-06, "loss": 0.072, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 0.07497373968362808, "learning_rate": 6.763019291214146e-06, "loss": 0.0565, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.10275483131408691, "learning_rate": 6.760813681645114e-06, "loss": 0.0752, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 0.07803136855363846, "learning_rate": 6.758607680863481e-06, "loss": 0.0532, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 0.09836062788963318, "learning_rate": 6.756401289359371e-06, "loss": 0.0567, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 0.1329224705696106, "learning_rate": 6.754194507622995e-06, "loss": 0.0698, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.1494830697774887, "learning_rate": 6.7519873361446475e-06, "loss": 0.0561, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 0.08763881772756577, "learning_rate": 6.7497797754147134e-06, "loss": 0.0555, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 0.07725747674703598, "learning_rate": 6.74757182592366e-06, "loss": 0.0551, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 0.2059878706932068, "learning_rate": 6.7453634881620445e-06, "loss": 0.0693, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.1193673387169838, "learning_rate": 6.743154762620511e-06, "loss": 0.0492, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 0.1175648421049118, "learning_rate": 6.740945649789784e-06, "loss": 0.0527, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 0.10513085871934891, "learning_rate": 6.738736150160681e-06, "loss": 0.0602, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.06232018396258354, "learning_rate": 6.736526264224101e-06, "loss": 0.046, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.07144875824451447, "learning_rate": 6.734315992471032e-06, "loss": 0.0526, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 0.08913610875606537, "learning_rate": 6.7321053353925446e-06, "loss": 0.0536, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 0.10953003168106079, "learning_rate": 6.729894293479795e-06, "loss": 0.0662, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 0.06883594393730164, "learning_rate": 6.727682867224028e-06, "loss": 0.0513, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 0.08202064782381058, "learning_rate": 6.725471057116573e-06, "loss": 0.0567, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 0.09328076243400574, "learning_rate": 6.723258863648841e-06, "loss": 0.0565, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 0.10278838872909546, "learning_rate": 6.72104628731233e-06, "loss": 0.0664, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 0.059748027473688126, "learning_rate": 6.718833328598629e-06, "loss": 0.0624, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 0.09233184903860092, "learning_rate": 6.716619987999404e-06, "loss": 0.0589, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 0.05490773171186447, "learning_rate": 6.714406266006408e-06, "loss": 0.0534, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 0.12245440483093262, "learning_rate": 6.712192163111481e-06, "loss": 0.0737, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 0.15884923934936523, "learning_rate": 6.709977679806543e-06, "loss": 0.0726, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 0.0725327581167221, "learning_rate": 6.707762816583608e-06, "loss": 0.0548, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 0.09650014340877533, "learning_rate": 6.705547573934759e-06, "loss": 0.0652, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 0.16213057935237885, "learning_rate": 6.703331952352181e-06, "loss": 0.0631, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 0.06412389129400253, "learning_rate": 6.70111595232813e-06, "loss": 0.0581, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 0.06183336302638054, "learning_rate": 6.6988995743549516e-06, "loss": 0.053, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.08165358752012253, "learning_rate": 6.696682818925074e-06, "loss": 0.0514, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 0.06417994946241379, "learning_rate": 6.694465686531011e-06, "loss": 0.0604, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 0.10400689393281937, "learning_rate": 6.692248177665357e-06, "loss": 0.0588, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.07748997211456299, "learning_rate": 6.690030292820792e-06, "loss": 0.0589, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 0.0815877690911293, "learning_rate": 6.687812032490081e-06, "loss": 0.0544, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 0.0745096281170845, "learning_rate": 6.685593397166069e-06, "loss": 0.0533, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 0.08108503371477127, "learning_rate": 6.683374387341688e-06, "loss": 0.0485, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.16697022318840027, "learning_rate": 6.681155003509949e-06, "loss": 0.0551, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 0.07809825241565704, "learning_rate": 6.67893524616395e-06, "loss": 0.0658, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 0.1142234355211258, "learning_rate": 6.67671511579687e-06, "loss": 0.0656, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 0.06896363943815231, "learning_rate": 6.67449461290197e-06, "loss": 0.0469, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.07907751202583313, "learning_rate": 6.6722737379726e-06, "loss": 0.0586, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 0.0888015478849411, "learning_rate": 6.670052491502182e-06, "loss": 0.0528, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 0.07694089412689209, "learning_rate": 6.667830873984228e-06, "loss": 0.0486, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 0.14733359217643738, "learning_rate": 6.66560888591233e-06, "loss": 0.0552, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 0.12255591154098511, "learning_rate": 6.663386527780166e-06, "loss": 0.0526, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.14432848989963531, "learning_rate": 6.66116380008149e-06, "loss": 0.063, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 0.118925541639328, "learning_rate": 6.6589407033101435e-06, "loss": 0.0817, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.0841217190027237, "learning_rate": 6.656717237960047e-06, "loss": 0.0505, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.07605477422475815, "learning_rate": 6.654493404525204e-06, "loss": 0.0542, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 0.13461202383041382, "learning_rate": 6.652269203499699e-06, "loss": 0.0534, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 0.12062787264585495, "learning_rate": 6.650044635377698e-06, "loss": 0.046, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 0.13376371562480927, "learning_rate": 6.64781970065345e-06, "loss": 0.0531, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 0.10696426033973694, "learning_rate": 6.645594399821286e-06, "loss": 0.058, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.1347237229347229, "learning_rate": 6.6433687333756165e-06, "loss": 0.0499, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.05495820939540863, "learning_rate": 6.641142701810932e-06, "loss": 0.0488, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 0.06705522537231445, "learning_rate": 6.638916305621807e-06, "loss": 0.0578, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 0.06392254680395126, "learning_rate": 6.636689545302898e-06, "loss": 0.0585, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 0.10886353999376297, "learning_rate": 6.634462421348935e-06, "loss": 0.0516, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 0.09992308169603348, "learning_rate": 6.63223493425474e-06, "loss": 0.0569, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.09133165329694748, "learning_rate": 6.630007084515205e-06, "loss": 0.0508, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.057873208075761795, "learning_rate": 6.627778872625311e-06, "loss": 0.0503, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 0.06911873072385788, "learning_rate": 6.625550299080115e-06, "loss": 0.0561, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.09163713455200195, "learning_rate": 6.6233213643747525e-06, "loss": 0.0594, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 0.11593172699213028, "learning_rate": 6.621092069004445e-06, "loss": 0.0513, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.09784039109945297, "learning_rate": 6.618862413464491e-06, "loss": 0.0556, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 0.1650886982679367, "learning_rate": 6.616632398250266e-06, "loss": 0.0586, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 0.13435402512550354, "learning_rate": 6.614402023857231e-06, "loss": 0.0572, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.12329612672328949, "learning_rate": 6.612171290780925e-06, "loss": 0.0522, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 0.09121139347553253, "learning_rate": 6.6099401995169635e-06, "loss": 0.0533, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 0.06583743542432785, "learning_rate": 6.607708750561046e-06, "loss": 0.0539, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.07693086564540863, "learning_rate": 6.605476944408948e-06, "loss": 0.0559, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 0.08906704187393188, "learning_rate": 6.603244781556527e-06, "loss": 0.0676, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 0.09041880816221237, "learning_rate": 6.601012262499718e-06, "loss": 0.0534, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 0.1179598718881607, "learning_rate": 6.598779387734535e-06, "loss": 0.0612, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 0.10010252892971039, "learning_rate": 6.596546157757075e-06, "loss": 0.0579, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 0.12738141417503357, "learning_rate": 6.594312573063506e-06, "loss": 0.0566, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 0.06353096663951874, "learning_rate": 6.592078634150084e-06, "loss": 0.0548, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.13906899094581604, "learning_rate": 6.589844341513137e-06, "loss": 0.0737, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 0.07172109186649323, "learning_rate": 6.587609695649073e-06, "loss": 0.0508, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 0.08918724209070206, "learning_rate": 6.585374697054382e-06, "loss": 0.0589, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 0.06958266347646713, "learning_rate": 6.583139346225627e-06, "loss": 0.0547, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.11271663010120392, "learning_rate": 6.580903643659453e-06, "loss": 0.0607, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 0.10723292082548141, "learning_rate": 6.578667589852583e-06, "loss": 0.0486, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 0.18366055190563202, "learning_rate": 6.576431185301815e-06, "loss": 0.069, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.10983866453170776, "learning_rate": 6.574194430504027e-06, "loss": 0.0518, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 0.15493211150169373, "learning_rate": 6.571957325956178e-06, "loss": 0.0724, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 0.06558022648096085, "learning_rate": 6.569719872155299e-06, "loss": 0.0546, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 0.16418516635894775, "learning_rate": 6.567482069598503e-06, "loss": 0.0499, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 0.2373981475830078, "learning_rate": 6.565243918782975e-06, "loss": 0.0593, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.1044880822300911, "learning_rate": 6.563005420205984e-06, "loss": 0.0554, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 0.07868069410324097, "learning_rate": 6.560766574364874e-06, "loss": 0.0565, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 0.08970625698566437, "learning_rate": 6.558527381757063e-06, "loss": 0.0626, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 0.17061953246593475, "learning_rate": 6.55628784288005e-06, "loss": 0.0748, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 0.11291279643774033, "learning_rate": 6.5540479582314085e-06, "loss": 0.0616, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.08940315991640091, "learning_rate": 6.55180772830879e-06, "loss": 0.0468, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 0.40845969319343567, "learning_rate": 6.5495671536099235e-06, "loss": 0.071, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 0.11320692300796509, "learning_rate": 6.5473262346326125e-06, "loss": 0.0556, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 0.2811656892299652, "learning_rate": 6.545084971874738e-06, "loss": 0.0718, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 0.13145054876804352, "learning_rate": 6.542843365834257e-06, "loss": 0.0631, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.12128516286611557, "learning_rate": 6.540601417009205e-06, "loss": 0.0513, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.10613241046667099, "learning_rate": 6.538359125897691e-06, "loss": 0.0665, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 0.2133593112230301, "learning_rate": 6.536116492997899e-06, "loss": 0.0666, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 0.10902238637208939, "learning_rate": 6.5338735188080916e-06, "loss": 0.0658, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.07563775777816772, "learning_rate": 6.53163020382661e-06, "loss": 0.0513, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 0.07897273451089859, "learning_rate": 6.529386548551864e-06, "loss": 0.0747, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.0806957334280014, "learning_rate": 6.5271425534823415e-06, "loss": 0.0633, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 0.08409737050533295, "learning_rate": 6.524898219116612e-06, "loss": 0.0506, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 0.07567441463470459, "learning_rate": 6.522653545953309e-06, "loss": 0.0559, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 0.08083777874708176, "learning_rate": 6.520408534491154e-06, "loss": 0.0564, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 0.12759728729724884, "learning_rate": 6.518163185228932e-06, "loss": 0.0471, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 0.052872903645038605, "learning_rate": 6.515917498665511e-06, "loss": 0.0606, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 0.06983977556228638, "learning_rate": 6.51367147529983e-06, "loss": 0.0551, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 0.06229531019926071, "learning_rate": 6.511425115630906e-06, "loss": 0.0591, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.10568074136972427, "learning_rate": 6.509178420157828e-06, "loss": 0.0611, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 0.08260844647884369, "learning_rate": 6.506931389379759e-06, "loss": 0.0623, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 0.10199171304702759, "learning_rate": 6.50468402379594e-06, "loss": 0.054, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 0.09629447013139725, "learning_rate": 6.502436323905683e-06, "loss": 0.0605, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 0.07824987918138504, "learning_rate": 6.500188290208377e-06, "loss": 0.057, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.08190549910068512, "learning_rate": 6.49793992320348e-06, "loss": 0.0618, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 0.062427207827568054, "learning_rate": 6.495691223390534e-06, "loss": 0.0495, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.0820026695728302, "learning_rate": 6.4934421912691445e-06, "loss": 0.0567, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 0.08863535523414612, "learning_rate": 6.4911928273389946e-06, "loss": 0.0539, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.05661880224943161, "learning_rate": 6.488943132099845e-06, "loss": 0.0496, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.15727047622203827, "learning_rate": 6.486693106051523e-06, "loss": 0.0641, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.053386736661195755, "learning_rate": 6.484442749693935e-06, "loss": 0.0497, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 0.09125269949436188, "learning_rate": 6.482192063527058e-06, "loss": 0.0448, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.08085928112268448, "learning_rate": 6.479941048050944e-06, "loss": 0.0546, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 0.06823569536209106, "learning_rate": 6.477689703765717e-06, "loss": 0.0505, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.09683550894260406, "learning_rate": 6.475438031171574e-06, "loss": 0.0594, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.10020381957292557, "learning_rate": 6.4731860307687845e-06, "loss": 0.0575, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 0.11012996733188629, "learning_rate": 6.470933703057693e-06, "loss": 0.05, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.07399550825357437, "learning_rate": 6.468681048538715e-06, "loss": 0.0587, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 0.0933084562420845, "learning_rate": 6.4664280677123385e-06, "loss": 0.066, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.07790762931108475, "learning_rate": 6.464174761079124e-06, "loss": 0.0495, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 0.08920785784721375, "learning_rate": 6.461921129139704e-06, "loss": 0.0625, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 0.07596756517887115, "learning_rate": 6.459667172394788e-06, "loss": 0.051, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 0.0717211589217186, "learning_rate": 6.4574128913451495e-06, "loss": 0.0588, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 0.2063763588666916, "learning_rate": 6.455158286491641e-06, "loss": 0.0784, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 0.06571359932422638, "learning_rate": 6.452903358335182e-06, "loss": 0.0608, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 0.1025443822145462, "learning_rate": 6.450648107376767e-06, "loss": 0.0682, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.09388419985771179, "learning_rate": 6.4483925341174625e-06, "loss": 0.0554, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 0.08818738907575607, "learning_rate": 6.4461366390584025e-06, "loss": 0.0621, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 0.113322414457798, "learning_rate": 6.443880422700799e-06, "loss": 0.0605, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.1092340275645256, "learning_rate": 6.441623885545929e-06, "loss": 0.0579, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 0.07686268538236618, "learning_rate": 6.439367028095145e-06, "loss": 0.0639, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 0.17728793621063232, "learning_rate": 6.437109850849868e-06, "loss": 0.0581, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 0.08642666786909103, "learning_rate": 6.434852354311592e-06, "loss": 0.0593, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 0.06669125705957413, "learning_rate": 6.432594538981881e-06, "loss": 0.0681, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 0.08053373545408249, "learning_rate": 6.430336405362371e-06, "loss": 0.056, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.06737292557954788, "learning_rate": 6.428077953954766e-06, "loss": 0.0611, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 0.11203668266534805, "learning_rate": 6.425819185260842e-06, "loss": 0.0555, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 0.11135946959257126, "learning_rate": 6.42356009978245e-06, "loss": 0.0556, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 0.07612090557813644, "learning_rate": 6.421300698021502e-06, "loss": 0.0472, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 0.1391729712486267, "learning_rate": 6.419040980479989e-06, "loss": 0.053, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 0.15236999094486237, "learning_rate": 6.416780947659967e-06, "loss": 0.0549, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 0.08453120291233063, "learning_rate": 6.4145206000635626e-06, "loss": 0.0688, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.08104661107063293, "learning_rate": 6.412259938192978e-06, "loss": 0.0607, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 0.07741765677928925, "learning_rate": 6.4099989625504756e-06, "loss": 0.0599, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.10759464651346207, "learning_rate": 6.4077376736383954e-06, "loss": 0.0559, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 0.08939128369092941, "learning_rate": 6.405476071959142e-06, "loss": 0.0604, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 0.08868896961212158, "learning_rate": 6.403214158015194e-06, "loss": 0.0556, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 0.06358442455530167, "learning_rate": 6.400951932309097e-06, "loss": 0.0623, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 0.05355167016386986, "learning_rate": 6.3986893953434625e-06, "loss": 0.0548, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 0.055782243609428406, "learning_rate": 6.396426547620979e-06, "loss": 0.0483, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.14813925325870514, "learning_rate": 6.394163389644397e-06, "loss": 0.0661, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 0.07869859039783478, "learning_rate": 6.391899921916538e-06, "loss": 0.0601, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 0.06494079530239105, "learning_rate": 6.389636144940294e-06, "loss": 0.0622, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 0.08561616390943527, "learning_rate": 6.387372059218626e-06, "loss": 0.0591, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 0.08547477424144745, "learning_rate": 6.38510766525456e-06, "loss": 0.0694, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 0.08253978937864304, "learning_rate": 6.382842963551193e-06, "loss": 0.0516, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 0.07728943228721619, "learning_rate": 6.380577954611691e-06, "loss": 0.062, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 0.08254211395978928, "learning_rate": 6.378312638939286e-06, "loss": 0.0637, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 0.07865884900093079, "learning_rate": 6.3760470170372815e-06, "loss": 0.0686, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.0624002180993557, "learning_rate": 6.373781089409043e-06, "loss": 0.0547, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 0.07224325090646744, "learning_rate": 6.371514856558013e-06, "loss": 0.0608, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 0.06739581376314163, "learning_rate": 6.369248318987692e-06, "loss": 0.0525, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 0.07330010831356049, "learning_rate": 6.3669814772016555e-06, "loss": 0.0559, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.07758884131908417, "learning_rate": 6.3647143317035445e-06, "loss": 0.0518, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.07226346433162689, "learning_rate": 6.362446882997064e-06, "loss": 0.0601, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.10004837065935135, "learning_rate": 6.360179131585993e-06, "loss": 0.0656, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 0.05385778844356537, "learning_rate": 6.357911077974173e-06, "loss": 0.0519, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 0.06651291996240616, "learning_rate": 6.355642722665512e-06, "loss": 0.0642, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 0.184090718626976, "learning_rate": 6.353374066163988e-06, "loss": 0.0791, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 0.09934399276971817, "learning_rate": 6.351105108973644e-06, "loss": 0.0699, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.07800034433603287, "learning_rate": 6.34883585159859e-06, "loss": 0.0519, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 0.12907163798809052, "learning_rate": 6.346566294543008e-06, "loss": 0.0667, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 0.0960291400551796, "learning_rate": 6.344296438311134e-06, "loss": 0.063, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 0.08868960291147232, "learning_rate": 6.342026283407286e-06, "loss": 0.0669, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.08918401598930359, "learning_rate": 6.339755830335834e-06, "loss": 0.0525, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 0.11344598233699799, "learning_rate": 6.337485079601224e-06, "loss": 0.0467, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 0.061054907739162445, "learning_rate": 6.335214031707966e-06, "loss": 0.0484, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 0.06080305576324463, "learning_rate": 6.332942687160632e-06, "loss": 0.0617, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 0.0722733810544014, "learning_rate": 6.3306710464638645e-06, "loss": 0.0653, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 0.1348603367805481, "learning_rate": 6.328399110122371e-06, "loss": 0.0683, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 0.10472768545150757, "learning_rate": 6.3261268786409225e-06, "loss": 0.0558, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.07597149908542633, "learning_rate": 6.323854352524359e-06, "loss": 0.0561, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 0.0666356235742569, "learning_rate": 6.321581532277581e-06, "loss": 0.0633, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 0.09170471876859665, "learning_rate": 6.319308418405559e-06, "loss": 0.0566, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 0.06030910462141037, "learning_rate": 6.317035011413327e-06, "loss": 0.0542, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.07142475247383118, "learning_rate": 6.314761311805983e-06, "loss": 0.0544, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 0.1712421178817749, "learning_rate": 6.312487320088693e-06, "loss": 0.0689, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 0.06897075474262238, "learning_rate": 6.3102130367666855e-06, "loss": 0.0541, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 0.10415107756853104, "learning_rate": 6.307938462345253e-06, "loss": 0.0469, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 0.0932057574391365, "learning_rate": 6.305663597329756e-06, "loss": 0.0478, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.07316473126411438, "learning_rate": 6.303388442225616e-06, "loss": 0.0433, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.04627536982297897, "learning_rate": 6.30111299753832e-06, "loss": 0.0423, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.07751167565584183, "learning_rate": 6.298837263773423e-06, "loss": 0.06, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.13298293948173523, "learning_rate": 6.2965612414365365e-06, "loss": 0.0645, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.0681137666106224, "learning_rate": 6.294284931033344e-06, "loss": 0.0533, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 0.08050356805324554, "learning_rate": 6.292008333069589e-06, "loss": 0.0574, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 0.08436112850904465, "learning_rate": 6.289731448051079e-06, "loss": 0.0479, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 0.10936355590820312, "learning_rate": 6.287454276483687e-06, "loss": 0.0685, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 0.08168741315603256, "learning_rate": 6.2851768188733485e-06, "loss": 0.0517, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 0.05526608228683472, "learning_rate": 6.282899075726061e-06, "loss": 0.0532, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.0664900466799736, "learning_rate": 6.280621047547888e-06, "loss": 0.0523, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 0.11503461003303528, "learning_rate": 6.278342734844955e-06, "loss": 0.0607, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.06439942866563797, "learning_rate": 6.276064138123453e-06, "loss": 0.0543, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 0.06626441329717636, "learning_rate": 6.27378525788963e-06, "loss": 0.0536, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 0.08230222761631012, "learning_rate": 6.271506094649804e-06, "loss": 0.063, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.1204947754740715, "learning_rate": 6.269226648910356e-06, "loss": 0.0667, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 0.1075262650847435, "learning_rate": 6.266946921177721e-06, "loss": 0.0505, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.06815242767333984, "learning_rate": 6.264666911958404e-06, "loss": 0.0531, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 0.06327887624502182, "learning_rate": 6.262386621758975e-06, "loss": 0.052, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 0.16248869895935059, "learning_rate": 6.2601060510860565e-06, "loss": 0.0655, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 0.14458800852298737, "learning_rate": 6.2578252004463436e-06, "loss": 0.0656, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.07936953008174896, "learning_rate": 6.255544070346588e-06, "loss": 0.0479, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.12383989244699478, "learning_rate": 6.2532626612936035e-06, "loss": 0.0683, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 0.06421736627817154, "learning_rate": 6.250980973794268e-06, "loss": 0.0506, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 0.06998440623283386, "learning_rate": 6.248699008355522e-06, "loss": 0.0524, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.144201397895813, "learning_rate": 6.2464167654843645e-06, "loss": 0.0623, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 0.06312202662229538, "learning_rate": 6.2441342456878565e-06, "loss": 0.0598, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.11098698526620865, "learning_rate": 6.2418514494731245e-06, "loss": 0.0641, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 0.1365237683057785, "learning_rate": 6.239568377347352e-06, "loss": 0.062, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.11513406783342361, "learning_rate": 6.237285029817786e-06, "loss": 0.0566, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.10236146301031113, "learning_rate": 6.235001407391732e-06, "loss": 0.0615, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 0.09108424931764603, "learning_rate": 6.232717510576563e-06, "loss": 0.065, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 0.09009023010730743, "learning_rate": 6.230433339879706e-06, "loss": 0.0605, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 0.0768827423453331, "learning_rate": 6.228148895808652e-06, "loss": 0.0494, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 0.10571278631687164, "learning_rate": 6.225864178870954e-06, "loss": 0.057, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.07472261786460876, "learning_rate": 6.22357918957422e-06, "loss": 0.0576, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.06877429783344269, "learning_rate": 6.221293928426128e-06, "loss": 0.0564, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 0.17056265473365784, "learning_rate": 6.219008395934405e-06, "loss": 0.0601, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 0.06882792711257935, "learning_rate": 6.216722592606847e-06, "loss": 0.0493, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.07200402021408081, "learning_rate": 6.214436518951308e-06, "loss": 0.0598, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 0.07887646555900574, "learning_rate": 6.212150175475701e-06, "loss": 0.0543, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 0.12216023355722427, "learning_rate": 6.209863562687998e-06, "loss": 0.0609, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.07080455124378204, "learning_rate": 6.207576681096233e-06, "loss": 0.0478, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.07089585810899734, "learning_rate": 6.2052895312085e-06, "loss": 0.0638, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 0.1385379284620285, "learning_rate": 6.203002113532949e-06, "loss": 0.0538, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 0.12379177659749985, "learning_rate": 6.200714428577794e-06, "loss": 0.0582, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 0.13947060704231262, "learning_rate": 6.198426476851305e-06, "loss": 0.072, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.09665253013372421, "learning_rate": 6.196138258861815e-06, "loss": 0.0476, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.06559226661920547, "learning_rate": 6.193849775117709e-06, "loss": 0.0588, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.07134711742401123, "learning_rate": 6.191561026127444e-06, "loss": 0.0504, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 0.09515178948640823, "learning_rate": 6.18927201239952e-06, "loss": 0.0608, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.05247556418180466, "learning_rate": 6.186982734442505e-06, "loss": 0.0483, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 0.09441831707954407, "learning_rate": 6.184693192765028e-06, "loss": 0.066, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 0.0672101303935051, "learning_rate": 6.1824033878757685e-06, "loss": 0.0505, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 0.059879764914512634, "learning_rate": 6.180113320283473e-06, "loss": 0.0539, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 0.09449557960033417, "learning_rate": 6.177822990496939e-06, "loss": 0.0636, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.06777766346931458, "learning_rate": 6.175532399025027e-06, "loss": 0.0529, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 0.09355130046606064, "learning_rate": 6.173241546376654e-06, "loss": 0.0629, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 0.10966841876506805, "learning_rate": 6.170950433060795e-06, "loss": 0.059, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 0.05997709929943085, "learning_rate": 6.168659059586483e-06, "loss": 0.0472, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 0.11223527789115906, "learning_rate": 6.166367426462808e-06, "loss": 0.0541, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.06279238313436508, "learning_rate": 6.16407553419892e-06, "loss": 0.0616, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 0.07901693135499954, "learning_rate": 6.161783383304024e-06, "loss": 0.055, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 0.08406124264001846, "learning_rate": 6.159490974287386e-06, "loss": 0.0531, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 0.06327223032712936, "learning_rate": 6.157198307658323e-06, "loss": 0.0482, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 0.07437260448932648, "learning_rate": 6.154905383926218e-06, "loss": 0.0571, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 0.10087069123983383, "learning_rate": 6.152612203600502e-06, "loss": 0.0746, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.07982473075389862, "learning_rate": 6.150318767190668e-06, "loss": 0.0602, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 0.06144179403781891, "learning_rate": 6.148025075206268e-06, "loss": 0.0575, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.05224761739373207, "learning_rate": 6.145731128156904e-06, "loss": 0.054, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 0.08550706505775452, "learning_rate": 6.143436926552242e-06, "loss": 0.0514, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 0.07717172056436539, "learning_rate": 6.141142470902001e-06, "loss": 0.0515, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.10043489933013916, "learning_rate": 6.138847761715955e-06, "loss": 0.0597, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 0.06744732707738876, "learning_rate": 6.1365527995039366e-06, "loss": 0.0554, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.08274946361780167, "learning_rate": 6.134257584775833e-06, "loss": 0.0492, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 0.08637569099664688, "learning_rate": 6.131962118041591e-06, "loss": 0.0581, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.07334895431995392, "learning_rate": 6.129666399811209e-06, "loss": 0.0433, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 0.12500083446502686, "learning_rate": 6.127370430594745e-06, "loss": 0.0674, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 0.09523773193359375, "learning_rate": 6.125074210902307e-06, "loss": 0.0531, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 0.07417818903923035, "learning_rate": 6.122777741244067e-06, "loss": 0.0551, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 0.10363226383924484, "learning_rate": 6.120481022130245e-06, "loss": 0.0639, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.09273096174001694, "learning_rate": 6.118184054071124e-06, "loss": 0.0405, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.08319522440433502, "learning_rate": 6.115886837577031e-06, "loss": 0.0519, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 0.08106455206871033, "learning_rate": 6.113589373158361e-06, "loss": 0.0494, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 0.06937011331319809, "learning_rate": 6.111291661325556e-06, "loss": 0.0576, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 0.09353863447904587, "learning_rate": 6.108993702589114e-06, "loss": 0.0577, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 0.16527360677719116, "learning_rate": 6.106695497459591e-06, "loss": 0.0576, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 0.10062602162361145, "learning_rate": 6.104397046447593e-06, "loss": 0.053, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.0767626091837883, "learning_rate": 6.102098350063786e-06, "loss": 0.0522, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.05726369097828865, "learning_rate": 6.099799408818889e-06, "loss": 0.0555, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.07182945311069489, "learning_rate": 6.097500223223669e-06, "loss": 0.0532, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.10707037150859833, "learning_rate": 6.095200793788958e-06, "loss": 0.0559, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 0.07204322516918182, "learning_rate": 6.092901121025634e-06, "loss": 0.0666, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 0.12514851987361908, "learning_rate": 6.090601205444632e-06, "loss": 0.0553, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 0.10021685063838959, "learning_rate": 6.088301047556942e-06, "loss": 0.0636, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 0.08394736051559448, "learning_rate": 6.086000647873604e-06, "loss": 0.0596, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.06464853137731552, "learning_rate": 6.083700006905715e-06, "loss": 0.0528, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.07000961899757385, "learning_rate": 6.081399125164429e-06, "loss": 0.0457, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.07993113249540329, "learning_rate": 6.079098003160943e-06, "loss": 0.0612, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.06938081234693527, "learning_rate": 6.076796641406518e-06, "loss": 0.0562, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 0.2030966877937317, "learning_rate": 6.074495040412465e-06, "loss": 0.0634, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 0.12418700754642487, "learning_rate": 6.072193200690142e-06, "loss": 0.0666, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.06777318567037582, "learning_rate": 6.069891122750971e-06, "loss": 0.0546, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 0.10390620678663254, "learning_rate": 6.067588807106416e-06, "loss": 0.0487, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 0.1818864941596985, "learning_rate": 6.0652862542680034e-06, "loss": 0.0516, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 0.07090095430612564, "learning_rate": 6.062983464747305e-06, "loss": 0.0552, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.0424765981733799, "learning_rate": 6.06068043905595e-06, "loss": 0.0437, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 0.07426904886960983, "learning_rate": 6.0583771777056166e-06, "loss": 0.056, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 0.0604395754635334, "learning_rate": 6.056073681208038e-06, "loss": 0.052, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 0.0996762216091156, "learning_rate": 6.053769950074997e-06, "loss": 0.0531, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 0.1585984081029892, "learning_rate": 6.051465984818332e-06, "loss": 0.0559, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.12170780450105667, "learning_rate": 6.049161785949931e-06, "loss": 0.0628, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 0.09125715494155884, "learning_rate": 6.046857353981732e-06, "loss": 0.0578, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 0.10135780274868011, "learning_rate": 6.044552689425731e-06, "loss": 0.052, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 0.15927459299564362, "learning_rate": 6.042247792793968e-06, "loss": 0.0634, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.09284419566392899, "learning_rate": 6.0399426645985424e-06, "loss": 0.0497, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 0.09570826590061188, "learning_rate": 6.037637305351599e-06, "loss": 0.0603, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 0.19415979087352753, "learning_rate": 6.035331715565333e-06, "loss": 0.0719, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 0.08016663044691086, "learning_rate": 6.033025895752002e-06, "loss": 0.0519, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.07611318677663803, "learning_rate": 6.030719846423897e-06, "loss": 0.0485, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.14830414950847626, "learning_rate": 6.028413568093375e-06, "loss": 0.0638, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 0.15805557370185852, "learning_rate": 6.026107061272838e-06, "loss": 0.0673, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 0.12108290940523148, "learning_rate": 6.023800326474738e-06, "loss": 0.0608, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.09559735655784607, "learning_rate": 6.0214933642115794e-06, "loss": 0.0665, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.0716857835650444, "learning_rate": 6.019186174995916e-06, "loss": 0.0514, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 0.06854943931102753, "learning_rate": 6.016878759340352e-06, "loss": 0.0569, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.07586152851581573, "learning_rate": 6.014571117757545e-06, "loss": 0.0685, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 0.13217760622501373, "learning_rate": 6.012263250760199e-06, "loss": 0.0651, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 0.09446246176958084, "learning_rate": 6.009955158861066e-06, "loss": 0.0666, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 0.08598238229751587, "learning_rate": 6.007646842572959e-06, "loss": 0.0576, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.12458083033561707, "learning_rate": 6.005338302408724e-06, "loss": 0.0634, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.11943662911653519, "learning_rate": 6.0030295388812736e-06, "loss": 0.0625, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.05860460177063942, "learning_rate": 6.000720552503557e-06, "loss": 0.0538, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.07355856150388718, "learning_rate": 5.998411343788582e-06, "loss": 0.0529, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.07895243167877197, "learning_rate": 5.996101913249402e-06, "loss": 0.0601, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.06522977352142334, "learning_rate": 5.993792261399115e-06, "loss": 0.0477, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 0.10326121002435684, "learning_rate": 5.991482388750878e-06, "loss": 0.0465, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 0.06738188117742538, "learning_rate": 5.989172295817889e-06, "loss": 0.0525, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 0.07071881741285324, "learning_rate": 5.9868619831134e-06, "loss": 0.051, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 0.08746907114982605, "learning_rate": 5.984551451150709e-06, "loss": 0.055, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.09521444141864777, "learning_rate": 5.9822407004431625e-06, "loss": 0.0499, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 0.06677281856536865, "learning_rate": 5.979929731504158e-06, "loss": 0.054, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 0.11049292981624603, "learning_rate": 5.977618544847139e-06, "loss": 0.0657, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 0.10042135417461395, "learning_rate": 5.975307140985599e-06, "loss": 0.0581, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 0.0837547779083252, "learning_rate": 5.972995520433078e-06, "loss": 0.0625, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 0.09009163081645966, "learning_rate": 5.970683683703168e-06, "loss": 0.0573, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.08384451270103455, "learning_rate": 5.968371631309502e-06, "loss": 0.0569, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 0.073735810816288, "learning_rate": 5.966059363765771e-06, "loss": 0.056, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 0.07661660015583038, "learning_rate": 5.9637468815857016e-06, "loss": 0.0493, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 0.08424374461174011, "learning_rate": 5.961434185283079e-06, "loss": 0.0473, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 0.07654188573360443, "learning_rate": 5.959121275371732e-06, "loss": 0.0658, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 0.07212419807910919, "learning_rate": 5.956808152365532e-06, "loss": 0.0645, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 0.08849088102579117, "learning_rate": 5.954494816778408e-06, "loss": 0.0539, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 0.07505054771900177, "learning_rate": 5.952181269124324e-06, "loss": 0.0534, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 0.09406333416700363, "learning_rate": 5.949867509917303e-06, "loss": 0.0609, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 0.13375528156757355, "learning_rate": 5.9475535396714055e-06, "loss": 0.0504, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.12429803609848022, "learning_rate": 5.945239358900746e-06, "loss": 0.0658, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.0958346426486969, "learning_rate": 5.94292496811948e-06, "loss": 0.0517, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.08108352869749069, "learning_rate": 5.940610367841815e-06, "loss": 0.0579, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 0.08271027356386185, "learning_rate": 5.938295558581999e-06, "loss": 0.0573, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 0.12272245436906815, "learning_rate": 5.935980540854332e-06, "loss": 0.0626, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.11117541790008545, "learning_rate": 5.933665315173158e-06, "loss": 0.052, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 0.06393425166606903, "learning_rate": 5.931349882052866e-06, "loss": 0.0461, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 0.15229089558124542, "learning_rate": 5.929034242007895e-06, "loss": 0.0619, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 0.14759227633476257, "learning_rate": 5.926718395552723e-06, "loss": 0.0612, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 0.06454740464687347, "learning_rate": 5.924402343201883e-06, "loss": 0.0517, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 0.07700614631175995, "learning_rate": 5.922086085469947e-06, "loss": 0.0499, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.0960083082318306, "learning_rate": 5.919769622871533e-06, "loss": 0.052, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.09275231510400772, "learning_rate": 5.917452955921309e-06, "loss": 0.0513, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 0.18969422578811646, "learning_rate": 5.915136085133983e-06, "loss": 0.0645, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 0.07065574079751968, "learning_rate": 5.9128190110243115e-06, "loss": 0.0569, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.09544246643781662, "learning_rate": 5.910501734107097e-06, "loss": 0.05, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 0.056452132761478424, "learning_rate": 5.908184254897183e-06, "loss": 0.0523, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 0.17534074187278748, "learning_rate": 5.905866573909462e-06, "loss": 0.0516, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 0.20733299851417542, "learning_rate": 5.9035486916588705e-06, "loss": 0.0643, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 0.07739725708961487, "learning_rate": 5.901230608660386e-06, "loss": 0.0453, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 0.08093880861997604, "learning_rate": 5.898912325429038e-06, "loss": 0.0539, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 0.09610890597105026, "learning_rate": 5.896593842479893e-06, "loss": 0.0603, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.16534267365932465, "learning_rate": 5.8942751603280645e-06, "loss": 0.0547, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.13911870121955872, "learning_rate": 5.891956279488715e-06, "loss": 0.0562, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.11090738326311111, "learning_rate": 5.889637200477041e-06, "loss": 0.0535, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 0.0929110050201416, "learning_rate": 5.887317923808294e-06, "loss": 0.0639, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 0.051575277000665665, "learning_rate": 5.88499844999776e-06, "loss": 0.0427, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.12665139138698578, "learning_rate": 5.882678779560776e-06, "loss": 0.0544, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 0.09506968408823013, "learning_rate": 5.880358913012722e-06, "loss": 0.055, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 0.07357814908027649, "learning_rate": 5.878038850869012e-06, "loss": 0.0498, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 0.17972005903720856, "learning_rate": 5.875718593645118e-06, "loss": 0.0637, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 0.16522160172462463, "learning_rate": 5.873398141856545e-06, "loss": 0.0692, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 0.08997686207294464, "learning_rate": 5.871077496018844e-06, "loss": 0.0593, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 0.059062182903289795, "learning_rate": 5.868756656647611e-06, "loss": 0.048, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.08347082883119583, "learning_rate": 5.866435624258483e-06, "loss": 0.0704, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 0.12959474325180054, "learning_rate": 5.86411439936714e-06, "loss": 0.0561, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.1747497022151947, "learning_rate": 5.861792982489306e-06, "loss": 0.0659, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 0.10820722579956055, "learning_rate": 5.8594713741407465e-06, "loss": 0.0609, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 0.10930780321359634, "learning_rate": 5.857149574837269e-06, "loss": 0.058, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.06710872799158096, "learning_rate": 5.854827585094725e-06, "loss": 0.0684, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.09196294844150543, "learning_rate": 5.852505405429007e-06, "loss": 0.0474, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 0.09803707897663116, "learning_rate": 5.850183036356054e-06, "loss": 0.0569, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 0.08821534365415573, "learning_rate": 5.847860478391838e-06, "loss": 0.0641, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.06847880035638809, "learning_rate": 5.845537732052381e-06, "loss": 0.0539, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 0.10288870334625244, "learning_rate": 5.8432147978537444e-06, "loss": 0.0573, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.06782296299934387, "learning_rate": 5.840891676312029e-06, "loss": 0.0471, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.0955425575375557, "learning_rate": 5.838568367943383e-06, "loss": 0.053, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.09659922868013382, "learning_rate": 5.836244873263989e-06, "loss": 0.0647, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.07166006416082382, "learning_rate": 5.8339211927900776e-06, "loss": 0.0631, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 0.0924183651804924, "learning_rate": 5.831597327037914e-06, "loss": 0.0472, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 0.11349605768918991, "learning_rate": 5.829273276523811e-06, "loss": 0.056, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.10150394588708878, "learning_rate": 5.82694904176412e-06, "loss": 0.0545, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.07182224094867706, "learning_rate": 5.82462462327523e-06, "loss": 0.0447, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 0.08324785530567169, "learning_rate": 5.822300021573574e-06, "loss": 0.0635, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 0.08254586905241013, "learning_rate": 5.819975237175629e-06, "loss": 0.0581, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.10009109973907471, "learning_rate": 5.817650270597906e-06, "loss": 0.0555, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 0.09259898960590363, "learning_rate": 5.815325122356959e-06, "loss": 0.0582, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 0.056303102523088455, "learning_rate": 5.8129997929693845e-06, "loss": 0.0439, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.0972529724240303, "learning_rate": 5.810674282951817e-06, "loss": 0.0605, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.10356615483760834, "learning_rate": 5.808348592820932e-06, "loss": 0.0461, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 0.04331741854548454, "learning_rate": 5.806022723093445e-06, "loss": 0.0548, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 0.06814830750226974, "learning_rate": 5.80369667428611e-06, "loss": 0.0504, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 0.07690490037202835, "learning_rate": 5.801370446915724e-06, "loss": 0.0645, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 0.08783779293298721, "learning_rate": 5.799044041499119e-06, "loss": 0.0644, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 0.11782825738191605, "learning_rate": 5.7967174585531705e-06, "loss": 0.0614, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.05171806737780571, "learning_rate": 5.794390698594793e-06, "loss": 0.0495, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.09828703105449677, "learning_rate": 5.792063762140938e-06, "loss": 0.0593, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 0.05754660442471504, "learning_rate": 5.789736649708598e-06, "loss": 0.046, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 0.11819140613079071, "learning_rate": 5.787409361814805e-06, "loss": 0.0553, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.057152777910232544, "learning_rate": 5.785081898976627e-06, "loss": 0.0499, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 0.08550133556127548, "learning_rate": 5.782754261711177e-06, "loss": 0.0618, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 0.057685598731040955, "learning_rate": 5.7804264505356e-06, "loss": 0.045, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 0.05396835505962372, "learning_rate": 5.778098465967082e-06, "loss": 0.051, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 0.05737585201859474, "learning_rate": 5.7757703085228515e-06, "loss": 0.0494, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.064215287566185, "learning_rate": 5.773441978720167e-06, "loss": 0.0529, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 0.13215480744838715, "learning_rate": 5.771113477076335e-06, "loss": 0.0585, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 0.11296103149652481, "learning_rate": 5.7687848041086905e-06, "loss": 0.0678, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 0.08208421617746353, "learning_rate": 5.766455960334616e-06, "loss": 0.0602, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.09308162331581116, "learning_rate": 5.764126946271526e-06, "loss": 0.0607, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.08824166655540466, "learning_rate": 5.761797762436872e-06, "loss": 0.061, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.08489792793989182, "learning_rate": 5.759468409348149e-06, "loss": 0.0552, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.060033079236745834, "learning_rate": 5.757138887522884e-06, "loss": 0.0515, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 0.08751153945922852, "learning_rate": 5.754809197478644e-06, "loss": 0.0534, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 0.10821184515953064, "learning_rate": 5.752479339733033e-06, "loss": 0.0601, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 0.09197007864713669, "learning_rate": 5.750149314803691e-06, "loss": 0.0539, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 0.1772177666425705, "learning_rate": 5.747819123208299e-06, "loss": 0.0602, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.09061227738857269, "learning_rate": 5.7454887654645706e-06, "loss": 0.0464, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 0.10019562393426895, "learning_rate": 5.7431582420902576e-06, "loss": 0.0603, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.05177532508969307, "learning_rate": 5.740827553603149e-06, "loss": 0.0575, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.07263757288455963, "learning_rate": 5.738496700521073e-06, "loss": 0.0613, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 0.08672714233398438, "learning_rate": 5.736165683361889e-06, "loss": 0.0609, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.08172661066055298, "learning_rate": 5.7338345026434995e-06, "loss": 0.058, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 0.07163896411657333, "learning_rate": 5.731503158883835e-06, "loss": 0.0515, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 0.13077804446220398, "learning_rate": 5.729171652600869e-06, "loss": 0.0598, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.06353092193603516, "learning_rate": 5.726839984312611e-06, "loss": 0.0553, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 0.08224077522754669, "learning_rate": 5.724508154537101e-06, "loss": 0.0511, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 0.08443323522806168, "learning_rate": 5.72217616379242e-06, "loss": 0.0527, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.06012674421072006, "learning_rate": 5.719844012596683e-06, "loss": 0.05, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 0.13161225616931915, "learning_rate": 5.7175117014680415e-06, "loss": 0.0599, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 0.07339667528867722, "learning_rate": 5.71517923092468e-06, "loss": 0.0594, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 0.16701708734035492, "learning_rate": 5.712846601484822e-06, "loss": 0.0617, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 0.11264581978321075, "learning_rate": 5.710513813666722e-06, "loss": 0.059, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 0.10360930114984512, "learning_rate": 5.708180867988676e-06, "loss": 0.0554, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.07958459854125977, "learning_rate": 5.705847764969008e-06, "loss": 0.0643, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 0.16634461283683777, "learning_rate": 5.703514505126081e-06, "loss": 0.0613, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 0.0652889683842659, "learning_rate": 5.701181088978295e-06, "loss": 0.0472, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 0.061099328100681305, "learning_rate": 5.698847517044076e-06, "loss": 0.0483, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 0.09003553539514542, "learning_rate": 5.696513789841897e-06, "loss": 0.0678, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.07713866233825684, "learning_rate": 5.6941799078902525e-06, "loss": 0.0506, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 0.07725438475608826, "learning_rate": 5.691845871707682e-06, "loss": 0.0542, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.08192838728427887, "learning_rate": 5.689511681812755e-06, "loss": 0.0535, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.05348249152302742, "learning_rate": 5.687177338724073e-06, "loss": 0.0554, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 0.11193079501390457, "learning_rate": 5.684842842960276e-06, "loss": 0.0635, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 0.08095802366733551, "learning_rate": 5.682508195040032e-06, "loss": 0.0654, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 0.08628056198358536, "learning_rate": 5.68017339548205e-06, "loss": 0.0588, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 0.14912277460098267, "learning_rate": 5.6778384448050694e-06, "loss": 0.068, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 0.0795455276966095, "learning_rate": 5.675503343527861e-06, "loss": 0.052, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.08628484606742859, "learning_rate": 5.673168092169231e-06, "loss": 0.0509, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.12588058412075043, "learning_rate": 5.670832691248021e-06, "loss": 0.0574, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 0.14404898881912231, "learning_rate": 5.668497141283101e-06, "loss": 0.0508, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 0.09494880586862564, "learning_rate": 5.66616144279338e-06, "loss": 0.0563, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 0.1217559352517128, "learning_rate": 5.663825596297794e-06, "loss": 0.0617, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 0.12132520973682404, "learning_rate": 5.661489602315314e-06, "loss": 0.0608, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.11694680154323578, "learning_rate": 5.6591534613649505e-06, "loss": 0.0557, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 0.16462478041648865, "learning_rate": 5.656817173965733e-06, "loss": 0.0638, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.05384455993771553, "learning_rate": 5.6544807406367365e-06, "loss": 0.0475, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.07385455816984177, "learning_rate": 5.6521441618970605e-06, "loss": 0.0541, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 0.08250287920236588, "learning_rate": 5.649807438265842e-06, "loss": 0.0597, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 0.05712904781103134, "learning_rate": 5.647470570262246e-06, "loss": 0.052, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 0.08744799345731735, "learning_rate": 5.64513355840547e-06, "loss": 0.0576, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.07268746197223663, "learning_rate": 5.642796403214747e-06, "loss": 0.0517, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 0.07675604522228241, "learning_rate": 5.640459105209337e-06, "loss": 0.0518, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 0.1426275074481964, "learning_rate": 5.638121664908537e-06, "loss": 0.0631, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.06692129373550415, "learning_rate": 5.635784082831671e-06, "loss": 0.0514, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 0.07923354208469391, "learning_rate": 5.633446359498098e-06, "loss": 0.0644, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 0.08710286766290665, "learning_rate": 5.6311084954272055e-06, "loss": 0.0616, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.06869623810052872, "learning_rate": 5.628770491138414e-06, "loss": 0.0587, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 0.14515218138694763, "learning_rate": 5.626432347151173e-06, "loss": 0.0718, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 0.06517757475376129, "learning_rate": 5.624094063984967e-06, "loss": 0.0536, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.09927365928888321, "learning_rate": 5.621755642159309e-06, "loss": 0.0694, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 0.1492815911769867, "learning_rate": 5.61941708219374e-06, "loss": 0.0548, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 0.0728856548666954, "learning_rate": 5.617078384607839e-06, "loss": 0.0522, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 0.060995783656835556, "learning_rate": 5.614739549921208e-06, "loss": 0.0446, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.11620491743087769, "learning_rate": 5.612400578653484e-06, "loss": 0.069, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 0.08882202953100204, "learning_rate": 5.610061471324335e-06, "loss": 0.0661, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 0.06847734749317169, "learning_rate": 5.607722228453452e-06, "loss": 0.0531, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.07396023720502853, "learning_rate": 5.605382850560565e-06, "loss": 0.0494, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 0.07982312887907028, "learning_rate": 5.6030433381654305e-06, "loss": 0.0576, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 0.07360611855983734, "learning_rate": 5.600703691787833e-06, "loss": 0.057, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 0.08477191627025604, "learning_rate": 5.598363911947591e-06, "loss": 0.0579, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 0.07065518200397491, "learning_rate": 5.596023999164547e-06, "loss": 0.0565, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 0.12612996995449066, "learning_rate": 5.593683953958579e-06, "loss": 0.0566, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.09779742360115051, "learning_rate": 5.591343776849591e-06, "loss": 0.0563, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.06125519424676895, "learning_rate": 5.5890034683575145e-06, "loss": 0.0583, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 0.10685194283723831, "learning_rate": 5.586663029002314e-06, "loss": 0.053, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.07673314213752747, "learning_rate": 5.584322459303984e-06, "loss": 0.0485, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.05639342963695526, "learning_rate": 5.581981759782543e-06, "loss": 0.0514, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 0.05400070548057556, "learning_rate": 5.579640930958043e-06, "loss": 0.0506, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 0.08493531495332718, "learning_rate": 5.57729997335056e-06, "loss": 0.0606, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 0.08714528381824493, "learning_rate": 5.5749588874802055e-06, "loss": 0.062, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.10706283152103424, "learning_rate": 5.572617673867111e-06, "loss": 0.0645, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.10022803395986557, "learning_rate": 5.570276333031441e-06, "loss": 0.0591, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.0918039083480835, "learning_rate": 5.567934865493392e-06, "loss": 0.0492, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 0.12310583144426346, "learning_rate": 5.5655932717731805e-06, "loss": 0.0558, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 0.1325707584619522, "learning_rate": 5.563251552391058e-06, "loss": 0.0525, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 0.11871316283941269, "learning_rate": 5.560909707867299e-06, "loss": 0.0705, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 0.07906331866979599, "learning_rate": 5.558567738722208e-06, "loss": 0.0495, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.07775494456291199, "learning_rate": 5.556225645476119e-06, "loss": 0.049, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 0.11243552714586258, "learning_rate": 5.55388342864939e-06, "loss": 0.0616, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 0.19554102420806885, "learning_rate": 5.5515410887624085e-06, "loss": 0.0547, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 0.09110324829816818, "learning_rate": 5.549198626335589e-06, "loss": 0.0634, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 0.06357347965240479, "learning_rate": 5.546856041889374e-06, "loss": 0.0548, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.1726120561361313, "learning_rate": 5.544513335944228e-06, "loss": 0.0589, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 0.08097531646490097, "learning_rate": 5.542170509020655e-06, "loss": 0.061, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 0.10259037464857101, "learning_rate": 5.539827561639169e-06, "loss": 0.0558, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.08977872878313065, "learning_rate": 5.537484494320324e-06, "loss": 0.0583, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.12920722365379333, "learning_rate": 5.535141307584697e-06, "loss": 0.0504, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 0.0739729031920433, "learning_rate": 5.532798001952888e-06, "loss": 0.0623, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 0.10716905444860458, "learning_rate": 5.530454577945529e-06, "loss": 0.0614, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.07665102928876877, "learning_rate": 5.52811103608327e-06, "loss": 0.0536, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 0.07246160507202148, "learning_rate": 5.525767376886797e-06, "loss": 0.0514, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 0.10371064394712448, "learning_rate": 5.523423600876816e-06, "loss": 0.0606, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.08265119045972824, "learning_rate": 5.521079708574062e-06, "loss": 0.0498, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 0.10733005404472351, "learning_rate": 5.5187357004992926e-06, "loss": 0.0565, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.09365499764680862, "learning_rate": 5.516391577173293e-06, "loss": 0.0608, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.11957994103431702, "learning_rate": 5.514047339116874e-06, "loss": 0.0573, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.09056921303272247, "learning_rate": 5.511702986850873e-06, "loss": 0.0554, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 0.1046169176697731, "learning_rate": 5.509358520896151e-06, "loss": 0.054, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.05354544520378113, "learning_rate": 5.507013941773593e-06, "loss": 0.0527, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 0.07815783470869064, "learning_rate": 5.504669250004116e-06, "loss": 0.0519, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 0.07136417180299759, "learning_rate": 5.502324446108649e-06, "loss": 0.053, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 0.08774618059396744, "learning_rate": 5.49997953060816e-06, "loss": 0.0585, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.15103891491889954, "learning_rate": 5.497634504023634e-06, "loss": 0.0645, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 0.08574679493904114, "learning_rate": 5.495289366876083e-06, "loss": 0.0601, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 0.09119002521038055, "learning_rate": 5.492944119686544e-06, "loss": 0.0603, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.09813790023326874, "learning_rate": 5.4905987629760724e-06, "loss": 0.0632, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.11556592583656311, "learning_rate": 5.488253297265757e-06, "loss": 0.0673, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.13291944563388824, "learning_rate": 5.485907723076708e-06, "loss": 0.0575, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 0.07384729385375977, "learning_rate": 5.483562040930055e-06, "loss": 0.0607, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.12483338266611099, "learning_rate": 5.481216251346956e-06, "loss": 0.055, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 0.0882948562502861, "learning_rate": 5.478870354848593e-06, "loss": 0.0555, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 0.0976020023226738, "learning_rate": 5.47652435195617e-06, "loss": 0.0555, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.10592866688966751, "learning_rate": 5.4741782431909144e-06, "loss": 0.0637, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 0.12222854793071747, "learning_rate": 5.471832029074079e-06, "loss": 0.0652, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.06937339156866074, "learning_rate": 5.469485710126938e-06, "loss": 0.0488, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 0.06991003453731537, "learning_rate": 5.467139286870794e-06, "loss": 0.0573, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 0.13819946348667145, "learning_rate": 5.464792759826962e-06, "loss": 0.055, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 0.18472741544246674, "learning_rate": 5.462446129516793e-06, "loss": 0.078, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.06022239848971367, "learning_rate": 5.460099396461649e-06, "loss": 0.0495, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 0.08382849395275116, "learning_rate": 5.457752561182924e-06, "loss": 0.057, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.06798063218593597, "learning_rate": 5.455405624202032e-06, "loss": 0.0556, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 0.07917025685310364, "learning_rate": 5.453058586040406e-06, "loss": 0.0561, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.08278855681419373, "learning_rate": 5.450711447219507e-06, "loss": 0.0532, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.07828863710165024, "learning_rate": 5.448364208260813e-06, "loss": 0.0473, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 0.0688813105225563, "learning_rate": 5.446016869685829e-06, "loss": 0.0464, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.1146269291639328, "learning_rate": 5.44366943201608e-06, "loss": 0.0557, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.07902734726667404, "learning_rate": 5.441321895773112e-06, "loss": 0.0545, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.0895848199725151, "learning_rate": 5.438974261478494e-06, "loss": 0.0556, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.06453768908977509, "learning_rate": 5.436626529653817e-06, "loss": 0.0451, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 0.12741628289222717, "learning_rate": 5.434278700820693e-06, "loss": 0.0569, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.1324659287929535, "learning_rate": 5.431930775500756e-06, "loss": 0.0601, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 0.09007998555898666, "learning_rate": 5.429582754215664e-06, "loss": 0.054, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 0.10391875356435776, "learning_rate": 5.4272346374870885e-06, "loss": 0.0578, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.04957488179206848, "learning_rate": 5.424886425836734e-06, "loss": 0.0404, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.25622397661209106, "learning_rate": 5.4225381197863135e-06, "loss": 0.0555, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.08770693838596344, "learning_rate": 5.420189719857571e-06, "loss": 0.0539, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.08226736634969711, "learning_rate": 5.417841226572263e-06, "loss": 0.0592, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.1454608291387558, "learning_rate": 5.415492640452177e-06, "loss": 0.068, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.06723427027463913, "learning_rate": 5.4131439620191115e-06, "loss": 0.0527, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 0.14078566431999207, "learning_rate": 5.4107951917948896e-06, "loss": 0.0759, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 0.06584922969341278, "learning_rate": 5.408446330301355e-06, "loss": 0.0539, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.12179678678512573, "learning_rate": 5.40609737806037e-06, "loss": 0.0583, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 0.09653377532958984, "learning_rate": 5.403748335593819e-06, "loss": 0.0668, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.09900897741317749, "learning_rate": 5.4013992034236065e-06, "loss": 0.0476, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 0.08074475079774857, "learning_rate": 5.3990499820716545e-06, "loss": 0.0534, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 0.07063371688127518, "learning_rate": 5.396700672059907e-06, "loss": 0.0511, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 0.07650109380483627, "learning_rate": 5.394351273910327e-06, "loss": 0.0492, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 0.07293551415205002, "learning_rate": 5.392001788144897e-06, "loss": 0.0506, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 0.20286758244037628, "learning_rate": 5.389652215285618e-06, "loss": 0.0591, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 0.15198594331741333, "learning_rate": 5.387302555854516e-06, "loss": 0.0604, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 0.0814938172698021, "learning_rate": 5.384952810373625e-06, "loss": 0.0521, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 0.11662554740905762, "learning_rate": 5.382602979365009e-06, "loss": 0.0471, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.05832228809595108, "learning_rate": 5.380253063350747e-06, "loss": 0.0525, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.07805874198675156, "learning_rate": 5.377903062852935e-06, "loss": 0.053, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 0.09110360592603683, "learning_rate": 5.375552978393691e-06, "loss": 0.0437, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 0.12095142900943756, "learning_rate": 5.373202810495149e-06, "loss": 0.0551, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.05187676474452019, "learning_rate": 5.370852559679461e-06, "loss": 0.0547, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 0.06694616377353668, "learning_rate": 5.368502226468803e-06, "loss": 0.045, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 0.06426354497671127, "learning_rate": 5.366151811385363e-06, "loss": 0.0523, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 0.08870235830545425, "learning_rate": 5.363801314951349e-06, "loss": 0.06, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 0.07845088094472885, "learning_rate": 5.361450737688989e-06, "loss": 0.0526, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 0.07988006621599197, "learning_rate": 5.359100080120527e-06, "loss": 0.0477, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 0.05899794399738312, "learning_rate": 5.356749342768226e-06, "loss": 0.0548, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.09014344960451126, "learning_rate": 5.354398526154365e-06, "loss": 0.0553, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 0.07348120212554932, "learning_rate": 5.352047630801242e-06, "loss": 0.0539, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.0799022987484932, "learning_rate": 5.349696657231176e-06, "loss": 0.058, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 0.10843795537948608, "learning_rate": 5.347345605966493e-06, "loss": 0.0616, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.05253317952156067, "learning_rate": 5.344994477529548e-06, "loss": 0.0487, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 0.14418905973434448, "learning_rate": 5.342643272442706e-06, "loss": 0.0663, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 0.0792262926697731, "learning_rate": 5.340291991228352e-06, "loss": 0.0648, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.0607261136174202, "learning_rate": 5.337940634408888e-06, "loss": 0.0482, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 0.0967903658747673, "learning_rate": 5.335589202506727e-06, "loss": 0.0576, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 0.10156381875276566, "learning_rate": 5.333237696044309e-06, "loss": 0.0489, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.08733107894659042, "learning_rate": 5.330886115544081e-06, "loss": 0.0516, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.06353001296520233, "learning_rate": 5.328534461528515e-06, "loss": 0.051, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 0.0956566333770752, "learning_rate": 5.326182734520091e-06, "loss": 0.077, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 0.1007143184542656, "learning_rate": 5.32383093504131e-06, "loss": 0.0535, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 0.12180323153734207, "learning_rate": 5.32147906361469e-06, "loss": 0.0564, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 0.12428831309080124, "learning_rate": 5.31912712076276e-06, "loss": 0.0548, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 0.13412773609161377, "learning_rate": 5.316775107008069e-06, "loss": 0.0626, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 0.10352975875139236, "learning_rate": 5.314423022873181e-06, "loss": 0.0617, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.07425779849290848, "learning_rate": 5.312070868880678e-06, "loss": 0.0542, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.08777665346860886, "learning_rate": 5.3097186455531506e-06, "loss": 0.0559, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.12136924266815186, "learning_rate": 5.307366353413214e-06, "loss": 0.0628, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 0.09617836028337479, "learning_rate": 5.305013992983487e-06, "loss": 0.0573, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.09725280106067657, "learning_rate": 5.302661564786617e-06, "loss": 0.0705, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.04966358467936516, "learning_rate": 5.300309069345257e-06, "loss": 0.0473, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.07526290416717529, "learning_rate": 5.297956507182077e-06, "loss": 0.0489, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 0.09912273287773132, "learning_rate": 5.295603878819764e-06, "loss": 0.0614, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 0.06974770128726959, "learning_rate": 5.2932511847810175e-06, "loss": 0.0586, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.0968157947063446, "learning_rate": 5.290898425588553e-06, "loss": 0.0554, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 0.0737527534365654, "learning_rate": 5.2885456017651e-06, "loss": 0.0495, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 0.07609433680772781, "learning_rate": 5.286192713833402e-06, "loss": 0.0531, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.09271910041570663, "learning_rate": 5.283839762316217e-06, "loss": 0.0534, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.0862397700548172, "learning_rate": 5.281486747736316e-06, "loss": 0.0624, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 0.07971532642841339, "learning_rate": 5.279133670616488e-06, "loss": 0.0601, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 0.07310950756072998, "learning_rate": 5.276780531479528e-06, "loss": 0.0589, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.07803064584732056, "learning_rate": 5.274427330848257e-06, "loss": 0.0655, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.06473345309495926, "learning_rate": 5.2720740692454944e-06, "loss": 0.0571, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 0.1217832937836647, "learning_rate": 5.269720747194088e-06, "loss": 0.0575, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 0.08246544003486633, "learning_rate": 5.267367365216887e-06, "loss": 0.0552, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.10377098619937897, "learning_rate": 5.265013923836763e-06, "loss": 0.059, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.07933458685874939, "learning_rate": 5.262660423576595e-06, "loss": 0.0554, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.08368923515081406, "learning_rate": 5.260306864959278e-06, "loss": 0.0514, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 0.10180390626192093, "learning_rate": 5.25795324850772e-06, "loss": 0.0536, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 0.13529713451862335, "learning_rate": 5.255599574744836e-06, "loss": 0.0698, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 0.07795759290456772, "learning_rate": 5.253245844193564e-06, "loss": 0.0578, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.09711974114179611, "learning_rate": 5.250892057376848e-06, "loss": 0.061, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.08011709898710251, "learning_rate": 5.248538214817642e-06, "loss": 0.0551, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.08051735907793045, "learning_rate": 5.246184317038922e-06, "loss": 0.056, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 0.08882542699575424, "learning_rate": 5.243830364563665e-06, "loss": 0.0552, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.10293109714984894, "learning_rate": 5.241476357914869e-06, "loss": 0.0599, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.05996943265199661, "learning_rate": 5.239122297615539e-06, "loss": 0.0531, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 0.07923859357833862, "learning_rate": 5.236768184188693e-06, "loss": 0.0555, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 0.08640313893556595, "learning_rate": 5.234414018157361e-06, "loss": 0.0629, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 0.0998341515660286, "learning_rate": 5.232059800044589e-06, "loss": 0.0585, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 0.0742923691868782, "learning_rate": 5.229705530373424e-06, "loss": 0.0574, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.091366708278656, "learning_rate": 5.2273512096669364e-06, "loss": 0.0544, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.08832496404647827, "learning_rate": 5.2249968384482e-06, "loss": 0.0639, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.06785956025123596, "learning_rate": 5.222642417240305e-06, "loss": 0.054, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 0.07938659936189651, "learning_rate": 5.220287946566347e-06, "loss": 0.05, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.06794267147779465, "learning_rate": 5.2179334269494345e-06, "loss": 0.0413, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 0.09941545873880386, "learning_rate": 5.215578858912691e-06, "loss": 0.063, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.12637650966644287, "learning_rate": 5.213224242979247e-06, "loss": 0.0616, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.06985796988010406, "learning_rate": 5.2108695796722446e-06, "loss": 0.051, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.09563204646110535, "learning_rate": 5.208514869514835e-06, "loss": 0.0539, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 0.06217870116233826, "learning_rate": 5.206160113030182e-06, "loss": 0.0506, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 0.07619664818048477, "learning_rate": 5.203805310741459e-06, "loss": 0.0595, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.07770931720733643, "learning_rate": 5.201450463171849e-06, "loss": 0.0561, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.10172989964485168, "learning_rate": 5.199095570844546e-06, "loss": 0.0542, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 0.06304346024990082, "learning_rate": 5.19674063428275e-06, "loss": 0.0525, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 0.06513382494449615, "learning_rate": 5.1943856540096795e-06, "loss": 0.0442, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 0.08359311521053314, "learning_rate": 5.192030630548552e-06, "loss": 0.0599, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 0.06911475211381912, "learning_rate": 5.1896755644226046e-06, "loss": 0.0435, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.13517522811889648, "learning_rate": 5.1873204561550764e-06, "loss": 0.0632, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.13994020223617554, "learning_rate": 5.18496530626922e-06, "loss": 0.0661, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 0.1848805993795395, "learning_rate": 5.182610115288296e-06, "loss": 0.0595, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 0.05016443505883217, "learning_rate": 5.180254883735571e-06, "loss": 0.0528, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 0.06649505347013474, "learning_rate": 5.1778996121343274e-06, "loss": 0.0547, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 0.22798897325992584, "learning_rate": 5.175544301007852e-06, "loss": 0.0783, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 0.22223974764347076, "learning_rate": 5.173188950879441e-06, "loss": 0.0627, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.11995497345924377, "learning_rate": 5.170833562272398e-06, "loss": 0.0514, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 0.20759311318397522, "learning_rate": 5.168478135710038e-06, "loss": 0.0678, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 0.10069025307893753, "learning_rate": 5.166122671715683e-06, "loss": 0.0573, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 0.08687228709459305, "learning_rate": 5.163767170812663e-06, "loss": 0.0528, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 0.10101589560508728, "learning_rate": 5.1614116335243155e-06, "loss": 0.0454, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 0.07329048216342926, "learning_rate": 5.1590560603739885e-06, "loss": 0.0597, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 0.11661594361066818, "learning_rate": 5.156700451885037e-06, "loss": 0.0561, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 0.14199809730052948, "learning_rate": 5.154344808580821e-06, "loss": 0.0635, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 0.1545412838459015, "learning_rate": 5.151989130984715e-06, "loss": 0.0661, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 0.12492460012435913, "learning_rate": 5.149633419620092e-06, "loss": 0.057, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.18334506452083588, "learning_rate": 5.147277675010339e-06, "loss": 0.0695, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.06402560323476791, "learning_rate": 5.144921897678851e-06, "loss": 0.0525, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 0.08048830926418304, "learning_rate": 5.142566088149024e-06, "loss": 0.0564, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 0.111229307949543, "learning_rate": 5.1402102469442686e-06, "loss": 0.0616, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 0.12697352468967438, "learning_rate": 5.137854374587996e-06, "loss": 0.0614, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.0953187346458435, "learning_rate": 5.135498471603629e-06, "loss": 0.0534, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 0.1017116978764534, "learning_rate": 5.133142538514596e-06, "loss": 0.0574, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 0.08318663388490677, "learning_rate": 5.130786575844329e-06, "loss": 0.0659, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.08424968272447586, "learning_rate": 5.128430584116273e-06, "loss": 0.0547, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.09033700823783875, "learning_rate": 5.126074563853872e-06, "loss": 0.0582, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.08657951653003693, "learning_rate": 5.123718515580581e-06, "loss": 0.0562, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 0.09027206152677536, "learning_rate": 5.1213624398198606e-06, "loss": 0.0545, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 0.08350461721420288, "learning_rate": 5.119006337095178e-06, "loss": 0.0603, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 0.06773032248020172, "learning_rate": 5.1166502079300015e-06, "loss": 0.052, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 0.07440871745347977, "learning_rate": 5.114294052847814e-06, "loss": 0.0556, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 0.08820491284132004, "learning_rate": 5.111937872372097e-06, "loss": 0.0722, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 0.17322701215744019, "learning_rate": 5.109581667026341e-06, "loss": 0.0781, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 0.0696282908320427, "learning_rate": 5.107225437334039e-06, "loss": 0.051, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 0.08477845042943954, "learning_rate": 5.1048691838186935e-06, "loss": 0.0571, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 0.1187656819820404, "learning_rate": 5.102512907003812e-06, "loss": 0.0561, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 0.1444581001996994, "learning_rate": 5.100156607412899e-06, "loss": 0.0524, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 0.13175885379314423, "learning_rate": 5.097800285569476e-06, "loss": 0.0575, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.07685589790344238, "learning_rate": 5.095443941997062e-06, "loss": 0.0548, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.0809049978852272, "learning_rate": 5.093087577219183e-06, "loss": 0.0548, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.11014116555452347, "learning_rate": 5.090731191759371e-06, "loss": 0.049, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.07300493121147156, "learning_rate": 5.088374786141159e-06, "loss": 0.0469, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.07599174976348877, "learning_rate": 5.086018360888087e-06, "loss": 0.0621, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 0.07086855918169022, "learning_rate": 5.083661916523699e-06, "loss": 0.0488, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.08926187455654144, "learning_rate": 5.081305453571543e-06, "loss": 0.0545, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 0.07233814150094986, "learning_rate": 5.07894897255517e-06, "loss": 0.0693, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.08713299036026001, "learning_rate": 5.076592473998141e-06, "loss": 0.0643, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.05797349289059639, "learning_rate": 5.07423595842401e-06, "loss": 0.0503, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 0.06928517669439316, "learning_rate": 5.071879426356345e-06, "loss": 0.0487, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 0.09270603209733963, "learning_rate": 5.069522878318712e-06, "loss": 0.0547, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 0.08038587868213654, "learning_rate": 5.067166314834684e-06, "loss": 0.0684, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 0.12516944110393524, "learning_rate": 5.064809736427835e-06, "loss": 0.0714, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 0.13758081197738647, "learning_rate": 5.062453143621739e-06, "loss": 0.0601, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 0.08323261886835098, "learning_rate": 5.060096536939982e-06, "loss": 0.0579, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.11803529411554337, "learning_rate": 5.057739916906147e-06, "loss": 0.0562, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.1346980333328247, "learning_rate": 5.05538328404382e-06, "loss": 0.0634, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 0.06549321860074997, "learning_rate": 5.053026638876591e-06, "loss": 0.0531, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 0.1582474261522293, "learning_rate": 5.050669981928056e-06, "loss": 0.0663, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 0.07558704167604446, "learning_rate": 5.048313313721806e-06, "loss": 0.0457, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 0.0764412060379982, "learning_rate": 5.04595663478144e-06, "loss": 0.0532, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.10080064833164215, "learning_rate": 5.0435999456305605e-06, "loss": 0.0519, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 0.14637774229049683, "learning_rate": 5.0412432467927674e-06, "loss": 0.0532, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 0.09957172721624374, "learning_rate": 5.038886538791668e-06, "loss": 0.0559, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 0.09038233757019043, "learning_rate": 5.036529822150865e-06, "loss": 0.0479, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 0.06290141493082047, "learning_rate": 5.034173097393973e-06, "loss": 0.0626, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.08474253863096237, "learning_rate": 5.031816365044595e-06, "loss": 0.0619, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 0.12221936136484146, "learning_rate": 5.02945962562635e-06, "loss": 0.0586, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.09100456535816193, "learning_rate": 5.027102879662847e-06, "loss": 0.0593, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 0.09045515209436417, "learning_rate": 5.024746127677703e-06, "loss": 0.0538, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 0.0925830826163292, "learning_rate": 5.022389370194536e-06, "loss": 0.0626, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.06803865730762482, "learning_rate": 5.020032607736961e-06, "loss": 0.0515, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.10455776005983353, "learning_rate": 5.017675840828597e-06, "loss": 0.0584, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 0.10973944514989853, "learning_rate": 5.015319069993066e-06, "loss": 0.0648, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.07345602661371231, "learning_rate": 5.012962295753988e-06, "loss": 0.0534, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.09852230548858643, "learning_rate": 5.010605518634982e-06, "loss": 0.0544, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 0.08184441924095154, "learning_rate": 5.008248739159674e-06, "loss": 0.0608, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 0.1475686877965927, "learning_rate": 5.005891957851683e-06, "loss": 0.0573, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.07939355075359344, "learning_rate": 5.003535175234633e-06, "loss": 0.0531, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 0.06861286610364914, "learning_rate": 5.001178391832149e-06, "loss": 0.047, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 0.1983242928981781, "learning_rate": 4.998821608167853e-06, "loss": 0.0587, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.06580420583486557, "learning_rate": 4.996464824765369e-06, "loss": 0.0559, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 0.08588270843029022, "learning_rate": 4.994108042148318e-06, "loss": 0.0602, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 0.04879340901970863, "learning_rate": 4.991751260840328e-06, "loss": 0.0484, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 0.08423618227243423, "learning_rate": 4.9893944813650185e-06, "loss": 0.0513, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.10281403362751007, "learning_rate": 4.987037704246015e-06, "loss": 0.0597, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.06256261467933655, "learning_rate": 4.984680930006936e-06, "loss": 0.0441, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.09489016234874725, "learning_rate": 4.982324159171404e-06, "loss": 0.0653, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 0.09132910519838333, "learning_rate": 4.979967392263041e-06, "loss": 0.0626, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 0.08898158371448517, "learning_rate": 4.977610629805465e-06, "loss": 0.0528, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 0.06613066792488098, "learning_rate": 4.975253872322297e-06, "loss": 0.045, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 0.12744715809822083, "learning_rate": 4.972897120337155e-06, "loss": 0.0626, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 0.08552943170070648, "learning_rate": 4.970540374373653e-06, "loss": 0.0617, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 0.063919797539711, "learning_rate": 4.9681836349554064e-06, "loss": 0.0571, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.06644053012132645, "learning_rate": 4.965826902606029e-06, "loss": 0.0497, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 0.14211975038051605, "learning_rate": 4.963470177849135e-06, "loss": 0.0555, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 0.11612587422132492, "learning_rate": 4.961113461208335e-06, "loss": 0.0483, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 0.08679955452680588, "learning_rate": 4.958756753207234e-06, "loss": 0.0532, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.13839483261108398, "learning_rate": 4.956400054369441e-06, "loss": 0.0668, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.05491872876882553, "learning_rate": 4.954043365218561e-06, "loss": 0.0534, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 0.1335037499666214, "learning_rate": 4.951686686278195e-06, "loss": 0.0588, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 0.06133507937192917, "learning_rate": 4.949330018071947e-06, "loss": 0.0517, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.11251293122768402, "learning_rate": 4.946973361123411e-06, "loss": 0.0511, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.07136544585227966, "learning_rate": 4.9446167159561814e-06, "loss": 0.0519, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 0.0719805359840393, "learning_rate": 4.942260083093854e-06, "loss": 0.0549, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.09695342928171158, "learning_rate": 4.939903463060018e-06, "loss": 0.0569, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.08160314708948135, "learning_rate": 4.937546856378263e-06, "loss": 0.0531, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.09136642515659332, "learning_rate": 4.935190263572168e-06, "loss": 0.0604, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 0.0685405284166336, "learning_rate": 4.932833685165318e-06, "loss": 0.0625, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 0.07843372970819473, "learning_rate": 4.930477121681289e-06, "loss": 0.0584, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.08816356956958771, "learning_rate": 4.9281205736436555e-06, "loss": 0.0497, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 0.1580350250005722, "learning_rate": 4.925764041575991e-06, "loss": 0.0727, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 0.07446722686290741, "learning_rate": 4.9234075260018615e-06, "loss": 0.0528, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 0.1560431271791458, "learning_rate": 4.921051027444831e-06, "loss": 0.0634, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 0.07783017307519913, "learning_rate": 4.918694546428458e-06, "loss": 0.0489, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.10328764468431473, "learning_rate": 4.916338083476303e-06, "loss": 0.0575, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.07080835849046707, "learning_rate": 4.913981639111914e-06, "loss": 0.0442, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 0.15549758076667786, "learning_rate": 4.9116252138588435e-06, "loss": 0.0605, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 0.16478443145751953, "learning_rate": 4.90926880824063e-06, "loss": 0.0736, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 0.09623577445745468, "learning_rate": 4.906912422780818e-06, "loss": 0.0648, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 0.06605075299739838, "learning_rate": 4.904556058002939e-06, "loss": 0.0515, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 0.08449802547693253, "learning_rate": 4.902199714430525e-06, "loss": 0.0534, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 0.1080017015337944, "learning_rate": 4.899843392587104e-06, "loss": 0.0563, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.08669287711381912, "learning_rate": 4.8974870929961915e-06, "loss": 0.0544, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 0.11000647395849228, "learning_rate": 4.895130816181307e-06, "loss": 0.0498, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 0.08059559762477875, "learning_rate": 4.8927745626659625e-06, "loss": 0.0484, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 0.1192249283194542, "learning_rate": 4.89041833297366e-06, "loss": 0.0586, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 0.08024027198553085, "learning_rate": 4.888062127627904e-06, "loss": 0.0563, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 0.0797138512134552, "learning_rate": 4.885705947152187e-06, "loss": 0.0495, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.08436648547649384, "learning_rate": 4.883349792069999e-06, "loss": 0.054, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.09325878322124481, "learning_rate": 4.880993662904824e-06, "loss": 0.056, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 0.1694086641073227, "learning_rate": 4.87863756018014e-06, "loss": 0.0565, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 0.10375278443098068, "learning_rate": 4.87628148441942e-06, "loss": 0.0568, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.05045125260949135, "learning_rate": 4.8739254361461305e-06, "loss": 0.0438, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 0.16554486751556396, "learning_rate": 4.871569415883729e-06, "loss": 0.062, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 0.1040266826748848, "learning_rate": 4.869213424155671e-06, "loss": 0.0502, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 0.1220700666308403, "learning_rate": 4.8668574614854055e-06, "loss": 0.0558, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 0.14457036554813385, "learning_rate": 4.864501528396371e-06, "loss": 0.0562, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.08693687617778778, "learning_rate": 4.862145625412006e-06, "loss": 0.0584, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.08929797261953354, "learning_rate": 4.859789753055734e-06, "loss": 0.0528, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.2277388721704483, "learning_rate": 4.857433911850977e-06, "loss": 0.06, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 0.11929161846637726, "learning_rate": 4.8550781023211516e-06, "loss": 0.0654, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 0.0880543515086174, "learning_rate": 4.852722324989661e-06, "loss": 0.0561, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 0.30543041229248047, "learning_rate": 4.85036658037991e-06, "loss": 0.0779, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 0.10066944360733032, "learning_rate": 4.848010869015288e-06, "loss": 0.0541, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.05439188703894615, "learning_rate": 4.84565519141918e-06, "loss": 0.0535, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 0.06929533183574677, "learning_rate": 4.843299548114964e-06, "loss": 0.0534, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 0.06354095786809921, "learning_rate": 4.840943939626012e-06, "loss": 0.0577, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 0.10450813919305801, "learning_rate": 4.838588366475685e-06, "loss": 0.0582, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 0.06468679010868073, "learning_rate": 4.83623282918734e-06, "loss": 0.0544, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.11410649865865707, "learning_rate": 4.833877328284319e-06, "loss": 0.0589, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 0.08994846791028976, "learning_rate": 4.831521864289964e-06, "loss": 0.058, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 0.06914255023002625, "learning_rate": 4.829166437727603e-06, "loss": 0.0505, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 0.0817023292183876, "learning_rate": 4.82681104912056e-06, "loss": 0.054, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 0.1014062687754631, "learning_rate": 4.82445569899215e-06, "loss": 0.0612, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 0.07889237999916077, "learning_rate": 4.822100387865673e-06, "loss": 0.0501, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.08030268549919128, "learning_rate": 4.8197451162644305e-06, "loss": 0.054, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.1212068498134613, "learning_rate": 4.817389884711706e-06, "loss": 0.0597, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 0.09125075489282608, "learning_rate": 4.815034693730781e-06, "loss": 0.0541, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 0.1232677698135376, "learning_rate": 4.812679543844924e-06, "loss": 0.0547, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 0.07766276597976685, "learning_rate": 4.810324435577397e-06, "loss": 0.0503, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.06423966586589813, "learning_rate": 4.807969369451449e-06, "loss": 0.0509, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 0.11700902134180069, "learning_rate": 4.805614345990322e-06, "loss": 0.0726, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.06867904961109161, "learning_rate": 4.803259365717251e-06, "loss": 0.0581, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.07207992672920227, "learning_rate": 4.800904429155458e-06, "loss": 0.0419, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.08812477439641953, "learning_rate": 4.7985495368281534e-06, "loss": 0.0518, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 0.06736211478710175, "learning_rate": 4.796194689258542e-06, "loss": 0.0459, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.12313925474882126, "learning_rate": 4.793839886969819e-06, "loss": 0.0552, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 0.14302565157413483, "learning_rate": 4.791485130485167e-06, "loss": 0.0568, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 0.10556294769048691, "learning_rate": 4.789130420327756e-06, "loss": 0.0504, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.10762958973646164, "learning_rate": 4.786775757020755e-06, "loss": 0.0606, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 0.11900533735752106, "learning_rate": 4.784421141087311e-06, "loss": 0.054, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.0886584147810936, "learning_rate": 4.782066573050567e-06, "loss": 0.0567, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.0711425170302391, "learning_rate": 4.779712053433655e-06, "loss": 0.0552, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 0.09476209431886673, "learning_rate": 4.777357582759696e-06, "loss": 0.0501, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.07006845623254776, "learning_rate": 4.7750031615518e-06, "loss": 0.0472, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 0.061509452760219574, "learning_rate": 4.772648790333065e-06, "loss": 0.058, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.09760530292987823, "learning_rate": 4.7702944696265766e-06, "loss": 0.0513, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 0.10629713535308838, "learning_rate": 4.767940199955413e-06, "loss": 0.0623, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 0.07475273311138153, "learning_rate": 4.765585981842639e-06, "loss": 0.057, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.06850016862154007, "learning_rate": 4.76323181581131e-06, "loss": 0.0485, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.08740714937448502, "learning_rate": 4.760877702384464e-06, "loss": 0.049, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 0.07650598138570786, "learning_rate": 4.758523642085133e-06, "loss": 0.0489, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 0.06620986759662628, "learning_rate": 4.756169635436336e-06, "loss": 0.0612, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.050040192902088165, "learning_rate": 4.75381568296108e-06, "loss": 0.0481, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 0.11251692473888397, "learning_rate": 4.751461785182358e-06, "loss": 0.0497, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 0.06608670204877853, "learning_rate": 4.7491079426231556e-06, "loss": 0.0519, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 0.06991898268461227, "learning_rate": 4.746754155806437e-06, "loss": 0.0525, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 0.12746986746788025, "learning_rate": 4.744400425255165e-06, "loss": 0.063, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.11613786220550537, "learning_rate": 4.7420467514922815e-06, "loss": 0.0516, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 0.08282609283924103, "learning_rate": 4.739693135040722e-06, "loss": 0.0534, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.08396472781896591, "learning_rate": 4.737339576423406e-06, "loss": 0.0484, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 0.10765239596366882, "learning_rate": 4.734986076163238e-06, "loss": 0.0612, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.07068535685539246, "learning_rate": 4.732632634783114e-06, "loss": 0.0556, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 0.06890986114740372, "learning_rate": 4.730279252805914e-06, "loss": 0.0574, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 0.13793212175369263, "learning_rate": 4.727925930754506e-06, "loss": 0.0705, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 0.07833149284124374, "learning_rate": 4.725572669151747e-06, "loss": 0.0554, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 0.17772702872753143, "learning_rate": 4.723219468520474e-06, "loss": 0.0634, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 0.05857982486486435, "learning_rate": 4.720866329383514e-06, "loss": 0.049, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 0.07993752509355545, "learning_rate": 4.718513252263685e-06, "loss": 0.0562, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 0.08857478946447372, "learning_rate": 4.716160237683785e-06, "loss": 0.0542, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 0.06670936942100525, "learning_rate": 4.7138072861666e-06, "loss": 0.0464, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 0.0909000039100647, "learning_rate": 4.711454398234902e-06, "loss": 0.0582, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 0.07463257014751434, "learning_rate": 4.7091015744114475e-06, "loss": 0.0583, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 0.07597753405570984, "learning_rate": 4.706748815218984e-06, "loss": 0.0542, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 0.13365057110786438, "learning_rate": 4.704396121180237e-06, "loss": 0.0724, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.07221978902816772, "learning_rate": 4.702043492817924e-06, "loss": 0.0474, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 0.08357124775648117, "learning_rate": 4.6996909306547455e-06, "loss": 0.0526, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 0.08545710891485214, "learning_rate": 4.697338435213385e-06, "loss": 0.0578, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 0.09811346977949142, "learning_rate": 4.694986007016514e-06, "loss": 0.0506, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 0.1420692652463913, "learning_rate": 4.692633646586788e-06, "loss": 0.0691, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 0.08008364588022232, "learning_rate": 4.690281354446849e-06, "loss": 0.0527, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 0.12764720618724823, "learning_rate": 4.6879291311193244e-06, "loss": 0.0585, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 0.12277701497077942, "learning_rate": 4.68557697712682e-06, "loss": 0.056, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 0.057292379438877106, "learning_rate": 4.683224892991932e-06, "loss": 0.0396, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 0.07974762469530106, "learning_rate": 4.680872879237242e-06, "loss": 0.051, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 0.21480315923690796, "learning_rate": 4.678520936385313e-06, "loss": 0.0689, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 0.09088830649852753, "learning_rate": 4.676169064958692e-06, "loss": 0.0497, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 0.07662203162908554, "learning_rate": 4.6738172654799105e-06, "loss": 0.0512, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 0.07248291373252869, "learning_rate": 4.671465538471487e-06, "loss": 0.0524, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 0.0700262039899826, "learning_rate": 4.66911388445592e-06, "loss": 0.0486, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.11687278002500534, "learning_rate": 4.666762303955692e-06, "loss": 0.0425, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 0.07924049347639084, "learning_rate": 4.664410797493275e-06, "loss": 0.054, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 0.05428416654467583, "learning_rate": 4.662059365591115e-06, "loss": 0.0514, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 0.050399865955114365, "learning_rate": 4.6597080087716494e-06, "loss": 0.0429, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 0.0671544224023819, "learning_rate": 4.657356727557295e-06, "loss": 0.0513, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 0.09729403257369995, "learning_rate": 4.655005522470453e-06, "loss": 0.0653, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.12733516097068787, "learning_rate": 4.652654394033508e-06, "loss": 0.0549, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 0.05282820016145706, "learning_rate": 4.650303342768827e-06, "loss": 0.0502, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 0.06816496700048447, "learning_rate": 4.6479523691987585e-06, "loss": 0.0507, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 0.13258397579193115, "learning_rate": 4.645601473845636e-06, "loss": 0.0645, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 0.06825105845928192, "learning_rate": 4.6432506572317754e-06, "loss": 0.0465, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 0.0987773984670639, "learning_rate": 4.6408999198794744e-06, "loss": 0.0636, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 0.06132073327898979, "learning_rate": 4.6385492623110135e-06, "loss": 0.0458, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 0.060749925673007965, "learning_rate": 4.636198685048653e-06, "loss": 0.0441, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.10213325917720795, "learning_rate": 4.633848188614639e-06, "loss": 0.0705, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.07030253857374191, "learning_rate": 4.631497773531199e-06, "loss": 0.0581, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 0.09629756212234497, "learning_rate": 4.629147440320539e-06, "loss": 0.0491, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 0.06259472668170929, "learning_rate": 4.626797189504855e-06, "loss": 0.0493, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.0910591334104538, "learning_rate": 4.624447021606311e-06, "loss": 0.0555, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 0.09533378481864929, "learning_rate": 4.6220969371470665e-06, "loss": 0.0549, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 0.06391846388578415, "learning_rate": 4.619746936649254e-06, "loss": 0.0442, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 0.11452421545982361, "learning_rate": 4.617397020634991e-06, "loss": 0.065, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.06901202350854874, "learning_rate": 4.615047189626376e-06, "loss": 0.0523, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.05886236950755119, "learning_rate": 4.612697444145487e-06, "loss": 0.0399, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.06087873876094818, "learning_rate": 4.610347784714383e-06, "loss": 0.043, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 0.13763675093650818, "learning_rate": 4.6079982118551045e-06, "loss": 0.0637, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 0.10405832529067993, "learning_rate": 4.605648726089674e-06, "loss": 0.0573, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.09177328646183014, "learning_rate": 4.603299327940094e-06, "loss": 0.0446, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 0.06267241388559341, "learning_rate": 4.600950017928348e-06, "loss": 0.0477, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 0.08010246604681015, "learning_rate": 4.598600796576395e-06, "loss": 0.0615, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 0.08020200580358505, "learning_rate": 4.596251664406182e-06, "loss": 0.0585, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 0.10330712050199509, "learning_rate": 4.593902621939632e-06, "loss": 0.0605, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 0.10037408769130707, "learning_rate": 4.591553669698646e-06, "loss": 0.0593, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.10183335095643997, "learning_rate": 4.589204808205113e-06, "loss": 0.0492, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 0.06338781118392944, "learning_rate": 4.58685603798089e-06, "loss": 0.0519, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.052238669246435165, "learning_rate": 4.5845073595478245e-06, "loss": 0.0444, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 0.09871406108140945, "learning_rate": 4.5821587734277374e-06, "loss": 0.0524, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.08457300066947937, "learning_rate": 4.57981028014243e-06, "loss": 0.0606, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 0.11453713476657867, "learning_rate": 4.577461880213688e-06, "loss": 0.057, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 0.1390664428472519, "learning_rate": 4.575113574163269e-06, "loss": 0.0726, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 0.07239227741956711, "learning_rate": 4.572765362512912e-06, "loss": 0.0577, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 0.06443676352500916, "learning_rate": 4.570417245784337e-06, "loss": 0.0551, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 0.07223588228225708, "learning_rate": 4.568069224499244e-06, "loss": 0.0597, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.07173758000135422, "learning_rate": 4.565721299179308e-06, "loss": 0.0547, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.0771300196647644, "learning_rate": 4.563373470346186e-06, "loss": 0.047, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 0.11496078222990036, "learning_rate": 4.561025738521508e-06, "loss": 0.0579, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 0.08119389414787292, "learning_rate": 4.55867810422689e-06, "loss": 0.0468, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 0.08015679568052292, "learning_rate": 4.5563305679839214e-06, "loss": 0.0508, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 0.11613670736551285, "learning_rate": 4.553983130314171e-06, "loss": 0.0665, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 0.07250872999429703, "learning_rate": 4.551635791739188e-06, "loss": 0.057, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.05620569363236427, "learning_rate": 4.549288552780494e-06, "loss": 0.0486, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 0.16753220558166504, "learning_rate": 4.546941413959595e-06, "loss": 0.0686, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 0.058676350861787796, "learning_rate": 4.544594375797969e-06, "loss": 0.049, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 0.041176699101924896, "learning_rate": 4.542247438817076e-06, "loss": 0.0466, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.05441125109791756, "learning_rate": 4.539900603538352e-06, "loss": 0.0539, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.061333946883678436, "learning_rate": 4.53755387048321e-06, "loss": 0.0501, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.09570582956075668, "learning_rate": 4.53520724017304e-06, "loss": 0.0503, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.07871995121240616, "learning_rate": 4.532860713129208e-06, "loss": 0.0597, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.049539756029844284, "learning_rate": 4.530514289873062e-06, "loss": 0.0508, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.10863008350133896, "learning_rate": 4.528167970925922e-06, "loss": 0.0581, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 0.10493814945220947, "learning_rate": 4.525821756809088e-06, "loss": 0.0578, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 0.059236790984869, "learning_rate": 4.523475648043832e-06, "loss": 0.0578, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.055359844118356705, "learning_rate": 4.5211296451514085e-06, "loss": 0.0434, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 0.06414637714624405, "learning_rate": 4.518783748653045e-06, "loss": 0.0549, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.06304401904344559, "learning_rate": 4.516437959069946e-06, "loss": 0.0496, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 0.068234384059906, "learning_rate": 4.514092276923295e-06, "loss": 0.0497, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 0.08285931497812271, "learning_rate": 4.5117467027342435e-06, "loss": 0.0655, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 0.09028860181570053, "learning_rate": 4.509401237023928e-06, "loss": 0.0536, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 0.07358772307634354, "learning_rate": 4.507055880313458e-06, "loss": 0.0628, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 0.09620841592550278, "learning_rate": 4.504710633123917e-06, "loss": 0.0482, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 0.06912661343812943, "learning_rate": 4.502365495976367e-06, "loss": 0.0534, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.05717456340789795, "learning_rate": 4.5000204693918405e-06, "loss": 0.0501, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.08030813187360764, "learning_rate": 4.497675553891352e-06, "loss": 0.0558, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 0.1188773512840271, "learning_rate": 4.495330749995887e-06, "loss": 0.06, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 0.06840382516384125, "learning_rate": 4.492986058226407e-06, "loss": 0.0437, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 0.07615093141794205, "learning_rate": 4.490641479103851e-06, "loss": 0.0548, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 0.1244698241353035, "learning_rate": 4.4882970131491286e-06, "loss": 0.0446, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 0.06921643763780594, "learning_rate": 4.485952660883126e-06, "loss": 0.0505, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.06736037135124207, "learning_rate": 4.483608422826708e-06, "loss": 0.0583, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 0.07818710058927536, "learning_rate": 4.481264299500709e-06, "loss": 0.0657, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 0.10114632546901703, "learning_rate": 4.478920291425939e-06, "loss": 0.0606, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.09066477417945862, "learning_rate": 4.476576399123187e-06, "loss": 0.0649, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.1472519338130951, "learning_rate": 4.474232623113204e-06, "loss": 0.0568, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 0.07209550589323044, "learning_rate": 4.471888963916732e-06, "loss": 0.0589, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 0.11157864332199097, "learning_rate": 4.4695454220544735e-06, "loss": 0.0591, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.059160906821489334, "learning_rate": 4.467201998047112e-06, "loss": 0.0551, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 0.06015707924962044, "learning_rate": 4.464858692415304e-06, "loss": 0.047, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 0.1222090795636177, "learning_rate": 4.462515505679677e-06, "loss": 0.0522, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.11485128849744797, "learning_rate": 4.460172438360832e-06, "loss": 0.0656, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 0.15866726636886597, "learning_rate": 4.457829490979347e-06, "loss": 0.0582, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 0.11916091293096542, "learning_rate": 4.455486664055772e-06, "loss": 0.0596, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.05402408167719841, "learning_rate": 4.4531439581106295e-06, "loss": 0.049, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.09806133806705475, "learning_rate": 4.450801373664413e-06, "loss": 0.0534, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.06882151961326599, "learning_rate": 4.448458911237593e-06, "loss": 0.0533, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 0.10406661778688431, "learning_rate": 4.446116571350611e-06, "loss": 0.0617, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.09400828182697296, "learning_rate": 4.443774354523883e-06, "loss": 0.0514, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.09261878579854965, "learning_rate": 4.441432261277794e-06, "loss": 0.0532, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 0.1156836748123169, "learning_rate": 4.4390902921327025e-06, "loss": 0.0448, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 0.09648916870355606, "learning_rate": 4.436748447608944e-06, "loss": 0.0626, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.053570281714200974, "learning_rate": 4.43440672822682e-06, "loss": 0.0489, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.08302601426839828, "learning_rate": 4.432065134506608e-06, "loss": 0.0482, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 0.08772047609090805, "learning_rate": 4.429723666968559e-06, "loss": 0.0588, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.09057117253541946, "learning_rate": 4.427382326132892e-06, "loss": 0.058, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.09387324005365372, "learning_rate": 4.425041112519797e-06, "loss": 0.0482, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 0.07797315716743469, "learning_rate": 4.42270002664944e-06, "loss": 0.0598, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 0.10680722445249557, "learning_rate": 4.4203590690419575e-06, "loss": 0.0495, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.09224545210599899, "learning_rate": 4.418018240217457e-06, "loss": 0.0518, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 0.05647670477628708, "learning_rate": 4.415677540696017e-06, "loss": 0.0529, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.1569254994392395, "learning_rate": 4.413336970997687e-06, "loss": 0.0619, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.13979972898960114, "learning_rate": 4.410996531642487e-06, "loss": 0.068, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.08776950091123581, "learning_rate": 4.408656223150412e-06, "loss": 0.0537, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 0.09914672374725342, "learning_rate": 4.406316046041423e-06, "loss": 0.0517, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.06017930060625076, "learning_rate": 4.4039760008354556e-06, "loss": 0.0519, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 0.08200819790363312, "learning_rate": 4.401636088052411e-06, "loss": 0.0528, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.0986037403345108, "learning_rate": 4.399296308212168e-06, "loss": 0.0609, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.07373367249965668, "learning_rate": 4.396956661834571e-06, "loss": 0.0429, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.07403984665870667, "learning_rate": 4.394617149439435e-06, "loss": 0.0472, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.08314023166894913, "learning_rate": 4.392277771546549e-06, "loss": 0.0473, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 0.05304037779569626, "learning_rate": 4.389938528675668e-06, "loss": 0.0474, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 0.08666234463453293, "learning_rate": 4.387599421346517e-06, "loss": 0.0474, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.06569314002990723, "learning_rate": 4.385260450078793e-06, "loss": 0.0579, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 0.07154226303100586, "learning_rate": 4.382921615392162e-06, "loss": 0.0522, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 0.07872533053159714, "learning_rate": 4.38058291780626e-06, "loss": 0.0688, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.07003357261419296, "learning_rate": 4.378244357840694e-06, "loss": 0.0522, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.06167755648493767, "learning_rate": 4.375905936015035e-06, "loss": 0.0433, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.06375864893198013, "learning_rate": 4.373567652848828e-06, "loss": 0.0503, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.0658906102180481, "learning_rate": 4.371229508861588e-06, "loss": 0.0546, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 0.1400042176246643, "learning_rate": 4.368891504572796e-06, "loss": 0.0744, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.08624929189682007, "learning_rate": 4.3665536405019045e-06, "loss": 0.0467, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 0.07282475382089615, "learning_rate": 4.36421591716833e-06, "loss": 0.0505, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 0.07842124998569489, "learning_rate": 4.361878335091464e-06, "loss": 0.0523, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.06845692545175552, "learning_rate": 4.3595408947906644e-06, "loss": 0.0515, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.0720575824379921, "learning_rate": 4.357203596785254e-06, "loss": 0.0541, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 0.08379632979631424, "learning_rate": 4.3548664415945326e-06, "loss": 0.0572, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 0.15202687680721283, "learning_rate": 4.3525294297377566e-06, "loss": 0.0662, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 0.08321414887905121, "learning_rate": 4.35019256173416e-06, "loss": 0.056, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 0.10935156047344208, "learning_rate": 4.34785583810294e-06, "loss": 0.0591, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 0.0795736089348793, "learning_rate": 4.345519259363264e-06, "loss": 0.051, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.09363991767168045, "learning_rate": 4.343182826034268e-06, "loss": 0.0599, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.08197915554046631, "learning_rate": 4.340846538635053e-06, "loss": 0.0485, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 0.0618266835808754, "learning_rate": 4.338510397684687e-06, "loss": 0.052, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.10640568286180496, "learning_rate": 4.336174403702208e-06, "loss": 0.057, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.0671224594116211, "learning_rate": 4.333838557206623e-06, "loss": 0.0478, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 0.11736678332090378, "learning_rate": 4.3315028587169e-06, "loss": 0.0541, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 0.11710043251514435, "learning_rate": 4.329167308751982e-06, "loss": 0.0626, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.09746218472719193, "learning_rate": 4.3268319078307695e-06, "loss": 0.0497, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 0.07093841582536697, "learning_rate": 4.324496656472141e-06, "loss": 0.0591, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.16078348457813263, "learning_rate": 4.322161555194932e-06, "loss": 0.0653, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.09515839070081711, "learning_rate": 4.31982660451795e-06, "loss": 0.0531, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.08127336949110031, "learning_rate": 4.3174918049599705e-06, "loss": 0.0598, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.10683717578649521, "learning_rate": 4.315157157039727e-06, "loss": 0.0473, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.05365932360291481, "learning_rate": 4.312822661275929e-06, "loss": 0.0446, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 0.08844908326864243, "learning_rate": 4.310488318187247e-06, "loss": 0.0578, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 0.06067844107747078, "learning_rate": 4.308154128292318e-06, "loss": 0.0512, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.06700817495584488, "learning_rate": 4.305820092109748e-06, "loss": 0.0472, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 0.08712171763181686, "learning_rate": 4.303486210158106e-06, "loss": 0.0549, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 0.16016753017902374, "learning_rate": 4.301152482955926e-06, "loss": 0.0689, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.07273393124341965, "learning_rate": 4.298818911021707e-06, "loss": 0.0566, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.04902715981006622, "learning_rate": 4.296485494873919e-06, "loss": 0.0437, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.09481319785118103, "learning_rate": 4.294152235030993e-06, "loss": 0.0659, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 0.11323727667331696, "learning_rate": 4.291819132011327e-06, "loss": 0.0586, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 0.10627885162830353, "learning_rate": 4.2894861863332785e-06, "loss": 0.0567, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 0.07285749912261963, "learning_rate": 4.28715339851518e-06, "loss": 0.0638, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 0.09580353647470474, "learning_rate": 4.284820769075322e-06, "loss": 0.0513, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.06145021691918373, "learning_rate": 4.282488298531959e-06, "loss": 0.0448, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.06888570636510849, "learning_rate": 4.28015598740332e-06, "loss": 0.0568, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.08184679597616196, "learning_rate": 4.277823836207581e-06, "loss": 0.0552, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.10823147743940353, "learning_rate": 4.275491845462901e-06, "loss": 0.0586, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 0.09119870513677597, "learning_rate": 4.27316001568739e-06, "loss": 0.0587, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 0.09478078782558441, "learning_rate": 4.270828347399131e-06, "loss": 0.058, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.053184859454631805, "learning_rate": 4.268496841116166e-06, "loss": 0.049, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.09605635702610016, "learning_rate": 4.266165497356503e-06, "loss": 0.0638, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.08820071816444397, "learning_rate": 4.2638343166381115e-06, "loss": 0.0514, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.06635449826717377, "learning_rate": 4.261503299478928e-06, "loss": 0.0448, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.14531783759593964, "learning_rate": 4.259172446396851e-06, "loss": 0.0606, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.10280749201774597, "learning_rate": 4.256841757909744e-06, "loss": 0.0514, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 0.05546177551150322, "learning_rate": 4.254511234535432e-06, "loss": 0.0511, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.06577707082033157, "learning_rate": 4.2521808767917024e-06, "loss": 0.0558, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 0.05739376321434975, "learning_rate": 4.2498506851963095e-06, "loss": 0.055, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.12072692066431046, "learning_rate": 4.247520660266969e-06, "loss": 0.0534, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 0.10164999216794968, "learning_rate": 4.245190802521356e-06, "loss": 0.0553, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 0.10424523055553436, "learning_rate": 4.2428611124771184e-06, "loss": 0.0627, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.09187158942222595, "learning_rate": 4.240531590651853e-06, "loss": 0.0553, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 0.2537391185760498, "learning_rate": 4.238202237563129e-06, "loss": 0.0601, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 0.1418619155883789, "learning_rate": 4.235873053728475e-06, "loss": 0.0507, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 0.15230940282344818, "learning_rate": 4.233544039665385e-06, "loss": 0.0744, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 0.1342790126800537, "learning_rate": 4.231215195891311e-06, "loss": 0.052, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 0.10376612842082977, "learning_rate": 4.228886522923668e-06, "loss": 0.0587, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 0.07201267778873444, "learning_rate": 4.2265580212798355e-06, "loss": 0.0562, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 0.09970392286777496, "learning_rate": 4.224229691477151e-06, "loss": 0.0681, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.1378369927406311, "learning_rate": 4.221901534032918e-06, "loss": 0.0587, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 0.07207901030778885, "learning_rate": 4.219573549464403e-06, "loss": 0.0523, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.05342128500342369, "learning_rate": 4.217245738288825e-06, "loss": 0.0455, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.0715801790356636, "learning_rate": 4.2149181010233734e-06, "loss": 0.0502, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.06558159738779068, "learning_rate": 4.212590638185196e-06, "loss": 0.0532, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.06610485911369324, "learning_rate": 4.2102633502914035e-06, "loss": 0.0492, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.13764595985412598, "learning_rate": 4.2079362378590625e-06, "loss": 0.0582, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 0.07442311942577362, "learning_rate": 4.2056093014052085e-06, "loss": 0.0575, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.06640686094760895, "learning_rate": 4.20328254144683e-06, "loss": 0.051, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 0.10987158864736557, "learning_rate": 4.2009559585008826e-06, "loss": 0.0624, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.1251242309808731, "learning_rate": 4.198629553084277e-06, "loss": 0.0529, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 0.10256034880876541, "learning_rate": 4.1963033257138904e-06, "loss": 0.0569, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 0.0668899416923523, "learning_rate": 4.193977276906557e-06, "loss": 0.0494, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.05953852832317352, "learning_rate": 4.191651407179069e-06, "loss": 0.0468, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 0.06457334011793137, "learning_rate": 4.189325717048185e-06, "loss": 0.0531, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 0.11265842616558075, "learning_rate": 4.187000207030616e-06, "loss": 0.062, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 0.08124857395887375, "learning_rate": 4.184674877643042e-06, "loss": 0.0678, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 0.18299846351146698, "learning_rate": 4.182349729402097e-06, "loss": 0.0524, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 0.07492823153734207, "learning_rate": 4.180024762824374e-06, "loss": 0.0578, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 0.08723972737789154, "learning_rate": 4.177699978426426e-06, "loss": 0.053, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 0.07956837862730026, "learning_rate": 4.175375376724772e-06, "loss": 0.0625, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 0.07967565953731537, "learning_rate": 4.173050958235882e-06, "loss": 0.0664, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.11265525966882706, "learning_rate": 4.170726723476189e-06, "loss": 0.0575, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.04971630498766899, "learning_rate": 4.168402672962086e-06, "loss": 0.0458, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.06906507164239883, "learning_rate": 4.166078807209924e-06, "loss": 0.0457, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.08644929528236389, "learning_rate": 4.163755126736011e-06, "loss": 0.0562, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 0.0688561275601387, "learning_rate": 4.1614316320566174e-06, "loss": 0.048, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 0.1425238698720932, "learning_rate": 4.159108323687971e-06, "loss": 0.0647, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 0.09498348087072372, "learning_rate": 4.156785202146257e-06, "loss": 0.0607, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 0.0980413556098938, "learning_rate": 4.154462267947621e-06, "loss": 0.0629, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.07859209924936295, "learning_rate": 4.152139521608164e-06, "loss": 0.0561, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.07688848674297333, "learning_rate": 4.149816963643947e-06, "loss": 0.0507, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.06417093425989151, "learning_rate": 4.147494594570992e-06, "loss": 0.0571, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 0.07270471751689911, "learning_rate": 4.1451724149052764e-06, "loss": 0.0596, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.06948600709438324, "learning_rate": 4.1428504251627335e-06, "loss": 0.0613, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 0.0482616201043129, "learning_rate": 4.140528625859254e-06, "loss": 0.0553, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 0.06729816645383835, "learning_rate": 4.138207017510696e-06, "loss": 0.0493, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 0.17580653727054596, "learning_rate": 4.1358856006328614e-06, "loss": 0.0517, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 0.15035994350910187, "learning_rate": 4.1335643757415195e-06, "loss": 0.0645, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.06007103621959686, "learning_rate": 4.131243343352391e-06, "loss": 0.0511, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.141882985830307, "learning_rate": 4.128922503981158e-06, "loss": 0.0493, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 0.12665614485740662, "learning_rate": 4.126601858143457e-06, "loss": 0.0635, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 0.07043761759996414, "learning_rate": 4.124281406354883e-06, "loss": 0.0553, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 0.09355522692203522, "learning_rate": 4.121961149130989e-06, "loss": 0.0462, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.09246663749217987, "learning_rate": 4.119641086987282e-06, "loss": 0.0659, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 0.0920211523771286, "learning_rate": 4.1173212204392245e-06, "loss": 0.0444, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.0621083527803421, "learning_rate": 4.115001550002241e-06, "loss": 0.0511, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 0.09590505808591843, "learning_rate": 4.1126820761917075e-06, "loss": 0.0542, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 0.07794459909200668, "learning_rate": 4.11036279952296e-06, "loss": 0.055, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 0.08487473428249359, "learning_rate": 4.108043720511287e-06, "loss": 0.05, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.05299568921327591, "learning_rate": 4.105724839671936e-06, "loss": 0.0484, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 0.06330814957618713, "learning_rate": 4.103406157520108e-06, "loss": 0.0495, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.07681740075349808, "learning_rate": 4.101087674570963e-06, "loss": 0.0482, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 0.11363404989242554, "learning_rate": 4.0987693913396145e-06, "loss": 0.0593, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 0.06332168728113174, "learning_rate": 4.096451308341132e-06, "loss": 0.0455, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.08896905928850174, "learning_rate": 4.094133426090539e-06, "loss": 0.055, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.10633544623851776, "learning_rate": 4.091815745102818e-06, "loss": 0.0573, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 0.0803629457950592, "learning_rate": 4.089498265892905e-06, "loss": 0.0584, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.08353038877248764, "learning_rate": 4.0871809889756884e-06, "loss": 0.0438, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 0.07330983877182007, "learning_rate": 4.084863914866018e-06, "loss": 0.0528, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 0.0607525035738945, "learning_rate": 4.082547044078693e-06, "loss": 0.0444, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.054314784705638885, "learning_rate": 4.0802303771284685e-06, "loss": 0.0487, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 0.08006389439105988, "learning_rate": 4.0779139145300536e-06, "loss": 0.0524, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 0.097940593957901, "learning_rate": 4.075597656798117e-06, "loss": 0.059, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.06423325836658478, "learning_rate": 4.073281604447277e-06, "loss": 0.0554, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 0.06579422950744629, "learning_rate": 4.0709657579921075e-06, "loss": 0.0499, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 0.09566860646009445, "learning_rate": 4.068650117947135e-06, "loss": 0.0608, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 0.07237736880779266, "learning_rate": 4.0663346848268435e-06, "loss": 0.0571, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 0.09512404352426529, "learning_rate": 4.064019459145669e-06, "loss": 0.0544, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 0.09070129692554474, "learning_rate": 4.061704441418002e-06, "loss": 0.0513, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.05996028333902359, "learning_rate": 4.059389632158189e-06, "loss": 0.0493, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 0.08510972559452057, "learning_rate": 4.057075031880521e-06, "loss": 0.0499, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.08302594721317291, "learning_rate": 4.054760641099256e-06, "loss": 0.0598, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.0895972028374672, "learning_rate": 4.052446460328595e-06, "loss": 0.0495, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.06387151777744293, "learning_rate": 4.050132490082698e-06, "loss": 0.0527, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.06351472437381744, "learning_rate": 4.0478187308756775e-06, "loss": 0.0522, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 0.12890368700027466, "learning_rate": 4.045505183221594e-06, "loss": 0.0514, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.10004564374685287, "learning_rate": 4.043191847634469e-06, "loss": 0.0546, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 0.07878788560628891, "learning_rate": 4.040878724628269e-06, "loss": 0.0556, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.08744609355926514, "learning_rate": 4.038565814716921e-06, "loss": 0.0509, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 0.21103855967521667, "learning_rate": 4.036253118414299e-06, "loss": 0.0652, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 0.07266874611377716, "learning_rate": 4.033940636234233e-06, "loss": 0.0575, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 0.12414006888866425, "learning_rate": 4.0316283686905e-06, "loss": 0.0601, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 0.07220815122127533, "learning_rate": 4.029316316296834e-06, "loss": 0.0579, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.08617658168077469, "learning_rate": 4.027004479566923e-06, "loss": 0.0499, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.09236972033977509, "learning_rate": 4.024692859014403e-06, "loss": 0.0589, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.07171005010604858, "learning_rate": 4.022381455152863e-06, "loss": 0.051, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 0.10392889380455017, "learning_rate": 4.020070268495844e-06, "loss": 0.0541, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 0.07512316852807999, "learning_rate": 4.017759299556838e-06, "loss": 0.0533, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 0.08416133373975754, "learning_rate": 4.015448548849293e-06, "loss": 0.0555, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 0.06470878422260284, "learning_rate": 4.0131380168866e-06, "loss": 0.0456, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.08039339631795883, "learning_rate": 4.010827704182113e-06, "loss": 0.0424, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.11144347488880157, "learning_rate": 4.0085176112491245e-06, "loss": 0.0574, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.1405935138463974, "learning_rate": 4.006207738600887e-06, "loss": 0.0539, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.07936269789934158, "learning_rate": 4.0038980867506e-06, "loss": 0.0526, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.07342267036437988, "learning_rate": 4.001588656211418e-06, "loss": 0.058, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 0.0809645876288414, "learning_rate": 3.999279447496444e-06, "loss": 0.0527, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.07320921868085861, "learning_rate": 3.996970461118729e-06, "loss": 0.0554, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.10259027779102325, "learning_rate": 3.994661697591278e-06, "loss": 0.0574, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 0.07360542565584183, "learning_rate": 3.992353157427044e-06, "loss": 0.0551, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 0.062049876898527145, "learning_rate": 3.990044841138934e-06, "loss": 0.049, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 0.14986871182918549, "learning_rate": 3.987736749239804e-06, "loss": 0.0689, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.07552632689476013, "learning_rate": 3.985428882242458e-06, "loss": 0.0611, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 0.07369387149810791, "learning_rate": 3.983121240659649e-06, "loss": 0.0595, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.10803712904453278, "learning_rate": 3.980813825004086e-06, "loss": 0.0536, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 0.11810906231403351, "learning_rate": 3.978506635788423e-06, "loss": 0.0587, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 0.06131356582045555, "learning_rate": 3.976199673525263e-06, "loss": 0.0379, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 0.06108645349740982, "learning_rate": 3.973892938727164e-06, "loss": 0.0439, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.06424526125192642, "learning_rate": 3.971586431906627e-06, "loss": 0.0482, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 0.07000236213207245, "learning_rate": 3.969280153576105e-06, "loss": 0.0607, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 0.09137723594903946, "learning_rate": 3.966974104248001e-06, "loss": 0.0632, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 0.08111859858036041, "learning_rate": 3.964668284434666e-06, "loss": 0.0663, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.13966770470142365, "learning_rate": 3.962362694648404e-06, "loss": 0.0588, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.06272704154253006, "learning_rate": 3.960057335401459e-06, "loss": 0.0477, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 0.11827754229307175, "learning_rate": 3.9577522072060336e-06, "loss": 0.0624, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 0.16700422763824463, "learning_rate": 3.95544731057427e-06, "loss": 0.0749, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.07568961381912231, "learning_rate": 3.953142646018269e-06, "loss": 0.0427, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.07988271117210388, "learning_rate": 3.95083821405007e-06, "loss": 0.0474, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 0.08614973723888397, "learning_rate": 3.948534015181671e-06, "loss": 0.0538, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.14993707835674286, "learning_rate": 3.946230049925004e-06, "loss": 0.062, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.12516885995864868, "learning_rate": 3.9439263187919635e-06, "loss": 0.0543, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.10111603885889053, "learning_rate": 3.941622822294385e-06, "loss": 0.056, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 0.06887655705213547, "learning_rate": 3.939319560944051e-06, "loss": 0.0587, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.07065674662590027, "learning_rate": 3.937016535252696e-06, "loss": 0.0587, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 0.07381673157215118, "learning_rate": 3.934713745731998e-06, "loss": 0.0612, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 0.17266252636909485, "learning_rate": 3.932411192893586e-06, "loss": 0.0708, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.09822988510131836, "learning_rate": 3.93010887724903e-06, "loss": 0.0635, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 0.10494419187307358, "learning_rate": 3.927806799309859e-06, "loss": 0.0663, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.055978190153837204, "learning_rate": 3.925504959587538e-06, "loss": 0.0516, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 0.09583277255296707, "learning_rate": 3.9232033585934835e-06, "loss": 0.058, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 0.07253244519233704, "learning_rate": 3.920901996839059e-06, "loss": 0.0538, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 0.08309577405452728, "learning_rate": 3.918600874835573e-06, "loss": 0.0543, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 0.10472655296325684, "learning_rate": 3.916299993094285e-06, "loss": 0.0495, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.10360588878393173, "learning_rate": 3.913999352126399e-06, "loss": 0.0625, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 0.07994751632213593, "learning_rate": 3.9116989524430615e-06, "loss": 0.0613, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 0.0798192173242569, "learning_rate": 3.90939879455537e-06, "loss": 0.054, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.08375079929828644, "learning_rate": 3.907098878974367e-06, "loss": 0.0565, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 0.06930316239595413, "learning_rate": 3.9047992062110435e-06, "loss": 0.0584, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.0767996534705162, "learning_rate": 3.902499776776331e-06, "loss": 0.0548, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 0.09716659784317017, "learning_rate": 3.900200591181114e-06, "loss": 0.0631, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.062070608139038086, "learning_rate": 3.897901649936215e-06, "loss": 0.0538, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.08335589617490768, "learning_rate": 3.895602953552408e-06, "loss": 0.0586, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 0.06945926696062088, "learning_rate": 3.8933045025404105e-06, "loss": 0.0583, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.0629892647266388, "learning_rate": 3.891006297410887e-06, "loss": 0.0509, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 0.07845433056354523, "learning_rate": 3.888708338674447e-06, "loss": 0.0576, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.07189968973398209, "learning_rate": 3.8864106268416416e-06, "loss": 0.0495, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.08515582978725433, "learning_rate": 3.884113162422971e-06, "loss": 0.0449, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 0.08261711150407791, "learning_rate": 3.881815945928879e-06, "loss": 0.0641, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 0.2518320381641388, "learning_rate": 3.879518977869755e-06, "loss": 0.0744, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.07261962443590164, "learning_rate": 3.8772222587559345e-06, "loss": 0.0509, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 0.08222626894712448, "learning_rate": 3.874925789097695e-06, "loss": 0.0665, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.10307995975017548, "learning_rate": 3.872629569405257e-06, "loss": 0.056, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 0.09223025292158127, "learning_rate": 3.870333600188792e-06, "loss": 0.0595, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 0.13041996955871582, "learning_rate": 3.86803788195841e-06, "loss": 0.0556, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 0.07882905751466751, "learning_rate": 3.865742415224169e-06, "loss": 0.0482, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 0.11667034775018692, "learning_rate": 3.863447200496065e-06, "loss": 0.0533, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 0.08755616098642349, "learning_rate": 3.8611522382840476e-06, "loss": 0.0527, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 0.08218477666378021, "learning_rate": 3.858857529098001e-06, "loss": 0.0481, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 0.09923698753118515, "learning_rate": 3.8565630734477575e-06, "loss": 0.0635, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 0.19145043194293976, "learning_rate": 3.854268871843096e-06, "loss": 0.059, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.07439190149307251, "learning_rate": 3.851974924793734e-06, "loss": 0.0589, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 0.056597258895635605, "learning_rate": 3.8496812328093335e-06, "loss": 0.0472, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 0.10433920472860336, "learning_rate": 3.8473877963995e-06, "loss": 0.0624, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.08932819962501526, "learning_rate": 3.845094616073783e-06, "loss": 0.0488, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.06807895749807358, "learning_rate": 3.8428016923416775e-06, "loss": 0.0544, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.08029086887836456, "learning_rate": 3.840509025712616e-06, "loss": 0.0555, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 0.09414266049861908, "learning_rate": 3.838216616695977e-06, "loss": 0.0597, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.10565424710512161, "learning_rate": 3.835924465801081e-06, "loss": 0.0534, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 0.07864160090684891, "learning_rate": 3.833632573537193e-06, "loss": 0.0683, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.10388990491628647, "learning_rate": 3.831340940413519e-06, "loss": 0.052, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 0.08350779116153717, "learning_rate": 3.8290495669392085e-06, "loss": 0.0522, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 0.06159449368715286, "learning_rate": 3.826758453623348e-06, "loss": 0.053, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 0.11152561008930206, "learning_rate": 3.8244676009749745e-06, "loss": 0.0648, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.08689049631357193, "learning_rate": 3.8221770095030625e-06, "loss": 0.0509, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.08883017301559448, "learning_rate": 3.819886679716528e-06, "loss": 0.0531, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.06570281833410263, "learning_rate": 3.8175966121242314e-06, "loss": 0.0535, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 0.08244561403989792, "learning_rate": 3.815306807234974e-06, "loss": 0.0739, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 0.08639867603778839, "learning_rate": 3.8130172655574963e-06, "loss": 0.0489, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 0.11991211771965027, "learning_rate": 3.810727987600482e-06, "loss": 0.055, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.07033029943704605, "learning_rate": 3.808438973872558e-06, "loss": 0.0451, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 0.085952527821064, "learning_rate": 3.80615022488229e-06, "loss": 0.0569, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.06351848691701889, "learning_rate": 3.8038617411381876e-06, "loss": 0.0533, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 0.09397047013044357, "learning_rate": 3.8015735231486974e-06, "loss": 0.0447, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 0.11234748363494873, "learning_rate": 3.799285571422208e-06, "loss": 0.0588, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.06230277568101883, "learning_rate": 3.7969978864670527e-06, "loss": 0.0624, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.06324181705713272, "learning_rate": 3.794710468791502e-06, "loss": 0.0499, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.05807293578982353, "learning_rate": 3.7924233189037697e-06, "loss": 0.0478, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.08704938739538193, "learning_rate": 3.7901364373120036e-06, "loss": 0.0644, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 0.09421828389167786, "learning_rate": 3.787849824524301e-06, "loss": 0.0558, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 0.05997128039598465, "learning_rate": 3.7855634810486936e-06, "loss": 0.0533, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 0.06249934807419777, "learning_rate": 3.7832774073931535e-06, "loss": 0.0478, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 0.08306746184825897, "learning_rate": 3.780991604065598e-06, "loss": 0.0626, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 0.07734288275241852, "learning_rate": 3.778706071573875e-06, "loss": 0.0512, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 0.11955537647008896, "learning_rate": 3.776420810425781e-06, "loss": 0.0621, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 0.052380889654159546, "learning_rate": 3.774135821129047e-06, "loss": 0.0413, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.06354732066392899, "learning_rate": 3.771851104191348e-06, "loss": 0.0509, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.10211558640003204, "learning_rate": 3.7695666601202944e-06, "loss": 0.0638, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.06830663233995438, "learning_rate": 3.7672824894234388e-06, "loss": 0.0584, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.10141701251268387, "learning_rate": 3.7649985926082695e-06, "loss": 0.0548, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.06747404485940933, "learning_rate": 3.762714970182216e-06, "loss": 0.0515, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.07645375281572342, "learning_rate": 3.76043162265265e-06, "loss": 0.0494, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 0.06980391591787338, "learning_rate": 3.758148550526877e-06, "loss": 0.0545, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.08397135883569717, "learning_rate": 3.7558657543121456e-06, "loss": 0.0568, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.07895751297473907, "learning_rate": 3.7535832345156376e-06, "loss": 0.0582, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 0.09447213262319565, "learning_rate": 3.7513009916444797e-06, "loss": 0.0612, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 0.09402360022068024, "learning_rate": 3.7490190262057322e-06, "loss": 0.0538, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.10970395803451538, "learning_rate": 3.7467373387063973e-06, "loss": 0.0704, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 0.06088118255138397, "learning_rate": 3.7444559296534144e-06, "loss": 0.0579, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 0.12221170961856842, "learning_rate": 3.7421747995536585e-06, "loss": 0.0499, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.0842755064368248, "learning_rate": 3.739893948913945e-06, "loss": 0.0561, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 0.07909669727087021, "learning_rate": 3.7376133782410275e-06, "loss": 0.0506, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.07988641411066055, "learning_rate": 3.7353330880415963e-06, "loss": 0.0562, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 0.12316776067018509, "learning_rate": 3.7330530788222807e-06, "loss": 0.0587, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.09107019007205963, "learning_rate": 3.730773351089647e-06, "loss": 0.0521, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.06169666349887848, "learning_rate": 3.7284939053501966e-06, "loss": 0.0563, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.07130502909421921, "learning_rate": 3.7262147421103713e-06, "loss": 0.0533, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 0.10893186926841736, "learning_rate": 3.723935861876549e-06, "loss": 0.0687, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 0.1055290549993515, "learning_rate": 3.7216572651550453e-06, "loss": 0.0612, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.05958883836865425, "learning_rate": 3.7193789524521146e-06, "loss": 0.0438, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 0.08481936156749725, "learning_rate": 3.717100924273941e-06, "loss": 0.0492, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.07351113110780716, "learning_rate": 3.714823181126653e-06, "loss": 0.0461, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 0.07312434911727905, "learning_rate": 3.7125457235163144e-06, "loss": 0.0496, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 0.07857973873615265, "learning_rate": 3.710268551948921e-06, "loss": 0.0562, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 0.05983644351363182, "learning_rate": 3.7079916669304127e-06, "loss": 0.0457, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.09375081211328506, "learning_rate": 3.7057150689666577e-06, "loss": 0.0541, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.0991017296910286, "learning_rate": 3.7034387585634656e-06, "loss": 0.0539, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 0.06409290432929993, "learning_rate": 3.701162736226579e-06, "loss": 0.0495, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 0.051888544112443924, "learning_rate": 3.6988870024616807e-06, "loss": 0.0439, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 0.06827472150325775, "learning_rate": 3.6966115577743865e-06, "loss": 0.0593, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.05534510686993599, "learning_rate": 3.6943364026702466e-06, "loss": 0.0435, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.07587098330259323, "learning_rate": 3.6920615376547487e-06, "loss": 0.05, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 0.06213802099227905, "learning_rate": 3.6897869632333157e-06, "loss": 0.0554, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.09113885462284088, "learning_rate": 3.687512679911307e-06, "loss": 0.0573, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.07347504049539566, "learning_rate": 3.685238688194016e-06, "loss": 0.0602, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.07987052947282791, "learning_rate": 3.682964988586675e-06, "loss": 0.0501, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 0.0583658330142498, "learning_rate": 3.6806915815944422e-06, "loss": 0.0539, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.08676149696111679, "learning_rate": 3.6784184677224204e-06, "loss": 0.054, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.09499461948871613, "learning_rate": 3.676145647475643e-06, "loss": 0.0558, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.1073199138045311, "learning_rate": 3.673873121359077e-06, "loss": 0.0644, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.11326567083597183, "learning_rate": 3.6716008898776306e-06, "loss": 0.0515, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.09182228147983551, "learning_rate": 3.669328953536137e-06, "loss": 0.048, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.07604238390922546, "learning_rate": 3.6670573128393704e-06, "loss": 0.0539, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 0.07761584967374802, "learning_rate": 3.664785968292036e-06, "loss": 0.0556, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.1318955421447754, "learning_rate": 3.662514920398777e-06, "loss": 0.0623, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.12605911493301392, "learning_rate": 3.6602441696641684e-06, "loss": 0.0468, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 0.08959977328777313, "learning_rate": 3.6579737165927176e-06, "loss": 0.0693, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 0.1165311262011528, "learning_rate": 3.655703561688867e-06, "loss": 0.0613, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 0.07788603007793427, "learning_rate": 3.653433705456994e-06, "loss": 0.0661, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 0.09588514268398285, "learning_rate": 3.651164148401409e-06, "loss": 0.0551, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.06590525805950165, "learning_rate": 3.648894891026358e-06, "loss": 0.0509, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 0.08226035535335541, "learning_rate": 3.646625933836015e-06, "loss": 0.0511, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.08910276740789413, "learning_rate": 3.64435727733449e-06, "loss": 0.0515, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 0.08973176777362823, "learning_rate": 3.6420889220258295e-06, "loss": 0.0671, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 0.17303389310836792, "learning_rate": 3.639820868414008e-06, "loss": 0.0534, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 0.062151651829481125, "learning_rate": 3.6375531170029356e-06, "loss": 0.0551, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.09668100625276566, "learning_rate": 3.6352856682964576e-06, "loss": 0.0561, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.0874239057302475, "learning_rate": 3.633018522798346e-06, "loss": 0.0437, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.08260820806026459, "learning_rate": 3.6307516810123095e-06, "loss": 0.0555, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.09648429602384567, "learning_rate": 3.6284851434419886e-06, "loss": 0.0417, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.051354408264160156, "learning_rate": 3.6262189105909574e-06, "loss": 0.0551, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 0.12501327693462372, "learning_rate": 3.6239529829627214e-06, "loss": 0.0641, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 0.09375233203172684, "learning_rate": 3.6216873610607155e-06, "loss": 0.0656, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.10082889348268509, "learning_rate": 3.61942204538831e-06, "loss": 0.0511, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.24443456530570984, "learning_rate": 3.6171570364488075e-06, "loss": 0.0623, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.08337131142616272, "learning_rate": 3.6148923347454413e-06, "loss": 0.0558, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 0.09993402659893036, "learning_rate": 3.6126279407813765e-06, "loss": 0.0569, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 0.08547236025333405, "learning_rate": 3.6103638550597074e-06, "loss": 0.0627, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 0.09449673444032669, "learning_rate": 3.6081000780834635e-06, "loss": 0.0604, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 0.08663970977067947, "learning_rate": 3.6058366103556055e-06, "loss": 0.0563, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 0.15958018600940704, "learning_rate": 3.6035734523790235e-06, "loss": 0.0593, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.06952792406082153, "learning_rate": 3.6013106046565383e-06, "loss": 0.0509, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 0.14431458711624146, "learning_rate": 3.5990480676909055e-06, "loss": 0.0614, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.1437947154045105, "learning_rate": 3.5967858419848077e-06, "loss": 0.0553, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 0.1075996682047844, "learning_rate": 3.5945239280408596e-06, "loss": 0.0581, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 0.08880697190761566, "learning_rate": 3.592262326361606e-06, "loss": 0.0577, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 0.12804029881954193, "learning_rate": 3.5900010374495252e-06, "loss": 0.0697, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 0.1618773341178894, "learning_rate": 3.587740061807024e-06, "loss": 0.0577, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.08258078992366791, "learning_rate": 3.585479399936438e-06, "loss": 0.0531, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.0627969279885292, "learning_rate": 3.583219052340034e-06, "loss": 0.055, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.10530482232570648, "learning_rate": 3.5809590195200115e-06, "loss": 0.0674, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 0.08420637249946594, "learning_rate": 3.578699301978499e-06, "loss": 0.054, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.062236886471509933, "learning_rate": 3.576439900217552e-06, "loss": 0.0588, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 0.11928233504295349, "learning_rate": 3.5741808147391587e-06, "loss": 0.0643, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 0.10647901147603989, "learning_rate": 3.571922046045235e-06, "loss": 0.0625, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.07596041262149811, "learning_rate": 3.5696635946376305e-06, "loss": 0.0705, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.06610961258411407, "learning_rate": 3.5674054610181203e-06, "loss": 0.0576, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.10733763873577118, "learning_rate": 3.5651476456884103e-06, "loss": 0.051, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 0.0842532142996788, "learning_rate": 3.562890149150134e-06, "loss": 0.0556, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.09071391820907593, "learning_rate": 3.560632971904857e-06, "loss": 0.0486, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 0.08360367268323898, "learning_rate": 3.558376114454073e-06, "loss": 0.0625, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 0.1166568249464035, "learning_rate": 3.556119577299202e-06, "loss": 0.0591, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 0.11322050541639328, "learning_rate": 3.553863360941598e-06, "loss": 0.0536, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 0.08656046539545059, "learning_rate": 3.55160746588254e-06, "loss": 0.0549, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 0.13896040618419647, "learning_rate": 3.5493518926232352e-06, "loss": 0.0539, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 0.0663319081068039, "learning_rate": 3.547096641664819e-06, "loss": 0.0521, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.08695098757743835, "learning_rate": 3.5448417135083603e-06, "loss": 0.0607, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 0.11478862911462784, "learning_rate": 3.5425871086548513e-06, "loss": 0.058, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.06029365956783295, "learning_rate": 3.540332827605214e-06, "loss": 0.0437, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 0.09723271429538727, "learning_rate": 3.538078870860297e-06, "loss": 0.0648, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.06883861869573593, "learning_rate": 3.5358252389208777e-06, "loss": 0.0462, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 0.07858740538358688, "learning_rate": 3.533571932287663e-06, "loss": 0.062, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.07106266915798187, "learning_rate": 3.5313189514612867e-06, "loss": 0.052, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.08724851906299591, "learning_rate": 3.5290662969423097e-06, "loss": 0.0515, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.07164718210697174, "learning_rate": 3.5268139692312163e-06, "loss": 0.0583, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.06622415035963058, "learning_rate": 3.5245619688284277e-06, "loss": 0.0581, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 0.08499923348426819, "learning_rate": 3.522310296234285e-06, "loss": 0.0677, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.07366494089365005, "learning_rate": 3.520058951949056e-06, "loss": 0.0607, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 0.08778796344995499, "learning_rate": 3.517807936472942e-06, "loss": 0.0476, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 0.09312663972377777, "learning_rate": 3.515557250306067e-06, "loss": 0.0537, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.0747649073600769, "learning_rate": 3.5133068939484793e-06, "loss": 0.053, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 0.10593307018280029, "learning_rate": 3.511056867900157e-06, "loss": 0.0563, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.04611271992325783, "learning_rate": 3.508807172661006e-06, "loss": 0.0531, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 0.11506998538970947, "learning_rate": 3.506557808730857e-06, "loss": 0.0542, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 0.07787463814020157, "learning_rate": 3.504308776609468e-06, "loss": 0.0569, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 0.11110758781433105, "learning_rate": 3.502060076796521e-06, "loss": 0.0545, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 0.10001367330551147, "learning_rate": 3.4998117097916247e-06, "loss": 0.0448, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 0.06638507544994354, "learning_rate": 3.4975636760943177e-06, "loss": 0.0561, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 0.10280970484018326, "learning_rate": 3.49531597620406e-06, "loss": 0.052, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 0.0738481730222702, "learning_rate": 3.4930686106202428e-06, "loss": 0.0587, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 0.12108921259641647, "learning_rate": 3.4908215798421737e-06, "loss": 0.0582, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 0.11529424786567688, "learning_rate": 3.488574884369095e-06, "loss": 0.0663, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 0.06230957433581352, "learning_rate": 3.486328524700171e-06, "loss": 0.0521, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.09585603326559067, "learning_rate": 3.4840825013344897e-06, "loss": 0.0588, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.0663522332906723, "learning_rate": 3.48183681477107e-06, "loss": 0.0582, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 0.062258873134851456, "learning_rate": 3.4795914655088486e-06, "loss": 0.0559, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.08464963734149933, "learning_rate": 3.4773464540466917e-06, "loss": 0.048, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.06119132786989212, "learning_rate": 3.47510178088339e-06, "loss": 0.0476, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 0.06502038985490799, "learning_rate": 3.4728574465176585e-06, "loss": 0.047, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 0.10916738957166672, "learning_rate": 3.4706134514481372e-06, "loss": 0.0567, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.0720461905002594, "learning_rate": 3.468369796173392e-06, "loss": 0.047, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 0.07760259509086609, "learning_rate": 3.4661264811919093e-06, "loss": 0.0562, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.08092865347862244, "learning_rate": 3.4638835070021027e-06, "loss": 0.0557, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.10063797980546951, "learning_rate": 3.4616408741023113e-06, "loss": 0.0585, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.06096034124493599, "learning_rate": 3.459398582990795e-06, "loss": 0.0565, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 0.07641632854938507, "learning_rate": 3.4571566341657446e-06, "loss": 0.0519, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.07461446523666382, "learning_rate": 3.4549150281252635e-06, "loss": 0.0471, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.06915462762117386, "learning_rate": 3.452673765367389e-06, "loss": 0.0466, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.09255481511354446, "learning_rate": 3.450432846390078e-06, "loss": 0.0515, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 0.06251832097768784, "learning_rate": 3.4481922716912097e-06, "loss": 0.0459, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 0.07164394855499268, "learning_rate": 3.445952041768593e-06, "loss": 0.0554, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.07121187448501587, "learning_rate": 3.443712157119952e-06, "loss": 0.0486, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 0.07247193157672882, "learning_rate": 3.4414726182429388e-06, "loss": 0.0545, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 0.07977662235498428, "learning_rate": 3.4392334256351265e-06, "loss": 0.0485, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 0.08938438445329666, "learning_rate": 3.436994579794016e-06, "loss": 0.0599, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.05525144562125206, "learning_rate": 3.4347560812170267e-06, "loss": 0.0478, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.09097389131784439, "learning_rate": 3.4325179304014997e-06, "loss": 0.0554, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 0.1601172685623169, "learning_rate": 3.4302801278447028e-06, "loss": 0.062, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 0.04860951378941536, "learning_rate": 3.428042674043822e-06, "loss": 0.0472, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.07133585959672928, "learning_rate": 3.425805569495973e-06, "loss": 0.0542, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.06974175572395325, "learning_rate": 3.4235688146981854e-06, "loss": 0.0474, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 0.09840220957994461, "learning_rate": 3.42133241014742e-06, "loss": 0.0589, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 0.06131640449166298, "learning_rate": 3.4190963563405482e-06, "loss": 0.0511, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.07412374019622803, "learning_rate": 3.416860653774374e-06, "loss": 0.053, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 0.0877683237195015, "learning_rate": 3.4146253029456195e-06, "loss": 0.0555, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 0.14988596737384796, "learning_rate": 3.4123903043509267e-06, "loss": 0.0632, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 0.0665024146437645, "learning_rate": 3.4101556584868646e-06, "loss": 0.0513, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 0.0824522003531456, "learning_rate": 3.407921365849917e-06, "loss": 0.0532, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 0.09687259793281555, "learning_rate": 3.4056874269364946e-06, "loss": 0.0591, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 0.06379666924476624, "learning_rate": 3.4034538422429263e-06, "loss": 0.0474, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 0.143656924366951, "learning_rate": 3.401220612265465e-06, "loss": 0.0676, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.06512502580881119, "learning_rate": 3.3989877375002846e-06, "loss": 0.0454, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 0.14782941341400146, "learning_rate": 3.3967552184434753e-06, "loss": 0.0527, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.06632068008184433, "learning_rate": 3.3945230555910534e-06, "loss": 0.0414, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 0.09227354824542999, "learning_rate": 3.3922912494389554e-06, "loss": 0.0561, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.060027334839105606, "learning_rate": 3.3900598004830377e-06, "loss": 0.0436, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 0.09664350003004074, "learning_rate": 3.387828709219075e-06, "loss": 0.0537, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 0.09548146277666092, "learning_rate": 3.3855979761427705e-06, "loss": 0.0619, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.057498522102832794, "learning_rate": 3.3833676017497353e-06, "loss": 0.0489, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.08321362733840942, "learning_rate": 3.381137586535511e-06, "loss": 0.0565, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 0.06877320259809494, "learning_rate": 3.3789079309955556e-06, "loss": 0.0548, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 0.06559249013662338, "learning_rate": 3.3766786356252466e-06, "loss": 0.0507, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 0.09655594080686569, "learning_rate": 3.374449700919887e-06, "loss": 0.0626, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.11093734949827194, "learning_rate": 3.37222112737469e-06, "loss": 0.0501, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.06440604478120804, "learning_rate": 3.3699929154847957e-06, "loss": 0.0574, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 0.11929228156805038, "learning_rate": 3.367765065745261e-06, "loss": 0.0488, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 0.08076290786266327, "learning_rate": 3.365537578651065e-06, "loss": 0.0585, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.0605500191450119, "learning_rate": 3.3633104546971052e-06, "loss": 0.0503, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 0.0889330580830574, "learning_rate": 3.3610836943781945e-06, "loss": 0.064, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.11546411365270615, "learning_rate": 3.358857298189069e-06, "loss": 0.0615, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 0.1371268779039383, "learning_rate": 3.356631266624385e-06, "loss": 0.059, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 0.10679356008768082, "learning_rate": 3.3544056001787146e-06, "loss": 0.0591, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 0.06682105362415314, "learning_rate": 3.3521802993465513e-06, "loss": 0.0456, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 0.05872224271297455, "learning_rate": 3.3499553646223037e-06, "loss": 0.0466, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.06434083729982376, "learning_rate": 3.3477307965003026e-06, "loss": 0.051, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 0.1568373143672943, "learning_rate": 3.345506595474798e-06, "loss": 0.0587, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.13008472323417664, "learning_rate": 3.3432827620399543e-06, "loss": 0.0534, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 0.10659515112638474, "learning_rate": 3.3410592966898565e-06, "loss": 0.0586, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 0.10144897550344467, "learning_rate": 3.3388361999185105e-06, "loss": 0.0654, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 0.11783154308795929, "learning_rate": 3.3366134722198352e-06, "loss": 0.0673, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 0.13555708527565002, "learning_rate": 3.3343911140876704e-06, "loss": 0.0636, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.06110250577330589, "learning_rate": 3.332169126015773e-06, "loss": 0.0436, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 0.08393498510122299, "learning_rate": 3.3299475084978195e-06, "loss": 0.061, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 0.12502482533454895, "learning_rate": 3.3277262620274025e-06, "loss": 0.0572, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 0.0727326050400734, "learning_rate": 3.3255053870980304e-06, "loss": 0.0514, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.10906469076871872, "learning_rate": 3.3232848842031306e-06, "loss": 0.0549, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 0.07357215136289597, "learning_rate": 3.3210647538360514e-06, "loss": 0.0576, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 0.12598302960395813, "learning_rate": 3.3188449964900527e-06, "loss": 0.0573, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.05503508448600769, "learning_rate": 3.316625612658315e-06, "loss": 0.0474, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 0.07026039063930511, "learning_rate": 3.314406602833933e-06, "loss": 0.0563, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 0.08005090057849884, "learning_rate": 3.3121879675099205e-06, "loss": 0.0597, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.07180421054363251, "learning_rate": 3.3099697071792093e-06, "loss": 0.0531, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 0.08448819816112518, "learning_rate": 3.3077518223346448e-06, "loss": 0.0488, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.12357746809720993, "learning_rate": 3.30553431346899e-06, "loss": 0.0517, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.07250818610191345, "learning_rate": 3.3033171810749274e-06, "loss": 0.0569, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.0802890881896019, "learning_rate": 3.3011004256450497e-06, "loss": 0.0554, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.07502352446317673, "learning_rate": 3.2988840476718713e-06, "loss": 0.0498, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.05509054660797119, "learning_rate": 3.2966680476478196e-06, "loss": 0.0461, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.06875148415565491, "learning_rate": 3.294452426065241e-06, "loss": 0.0529, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.07170097529888153, "learning_rate": 3.2922371834163958e-06, "loss": 0.0563, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 0.07149610668420792, "learning_rate": 3.2900223201934584e-06, "loss": 0.0542, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.12191461771726608, "learning_rate": 3.287807836888521e-06, "loss": 0.0586, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.07529132813215256, "learning_rate": 3.2855937339935933e-06, "loss": 0.0497, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.05877859517931938, "learning_rate": 3.2833800120005977e-06, "loss": 0.0467, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 0.0762028694152832, "learning_rate": 3.2811666714013724e-06, "loss": 0.0509, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.052880626171827316, "learning_rate": 3.2789537126876714e-06, "loss": 0.0388, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.1159139946103096, "learning_rate": 3.2767411363511613e-06, "loss": 0.0568, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.08240318298339844, "learning_rate": 3.2745289428834294e-06, "loss": 0.0514, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.06455820798873901, "learning_rate": 3.272317132775972e-06, "loss": 0.0441, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 0.0457688644528389, "learning_rate": 3.270105706520207e-06, "loss": 0.0451, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 0.06613708287477493, "learning_rate": 3.267894664607457e-06, "loss": 0.0489, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 0.07209887355566025, "learning_rate": 3.265684007528969e-06, "loss": 0.0526, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 0.08370833098888397, "learning_rate": 3.2634737357758994e-06, "loss": 0.0564, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.09055087715387344, "learning_rate": 3.261263849839319e-06, "loss": 0.062, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 0.07171456515789032, "learning_rate": 3.2590543502102163e-06, "loss": 0.0581, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.08641044050455093, "learning_rate": 3.256845237379491e-06, "loss": 0.0427, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 0.12213337421417236, "learning_rate": 3.254636511837957e-06, "loss": 0.0507, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 0.0821838229894638, "learning_rate": 3.252428174076341e-06, "loss": 0.0468, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 0.072004035115242, "learning_rate": 3.2502202245852887e-06, "loss": 0.0506, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 0.06607486307621002, "learning_rate": 3.2480126638553533e-06, "loss": 0.053, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 0.07021421939134598, "learning_rate": 3.245805492377007e-06, "loss": 0.0565, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 0.06783813238143921, "learning_rate": 3.243598710640631e-06, "loss": 0.0555, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 0.08055990189313889, "learning_rate": 3.2413923191365203e-06, "loss": 0.0444, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 0.07517543435096741, "learning_rate": 3.2391863183548877e-06, "loss": 0.0628, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.12323378771543503, "learning_rate": 3.236980708785854e-06, "loss": 0.0578, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.09550410509109497, "learning_rate": 3.2347754909194595e-06, "loss": 0.0613, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 0.08303412050008774, "learning_rate": 3.232570665245648e-06, "loss": 0.0586, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 0.10337403416633606, "learning_rate": 3.2303662322542835e-06, "loss": 0.0553, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 0.10056988149881363, "learning_rate": 3.2281621924351407e-06, "loss": 0.05, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 0.11449537426233292, "learning_rate": 3.2259585462779063e-06, "loss": 0.0521, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 0.1860843449831009, "learning_rate": 3.2237552942721832e-06, "loss": 0.0588, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 0.09368985146284103, "learning_rate": 3.2215524369074802e-06, "loss": 0.0569, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.07778330892324448, "learning_rate": 3.219349974673223e-06, "loss": 0.0591, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 0.12558160722255707, "learning_rate": 3.2171479080587475e-06, "loss": 0.0564, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.08635799586772919, "learning_rate": 3.2149462375533046e-06, "loss": 0.0527, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.07409758120775223, "learning_rate": 3.212744963646054e-06, "loss": 0.0614, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 0.05868968367576599, "learning_rate": 3.2105440868260706e-06, "loss": 0.0521, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.1180076003074646, "learning_rate": 3.2083436075823353e-06, "loss": 0.0578, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.0525190532207489, "learning_rate": 3.2061435264037457e-06, "loss": 0.0451, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.07068391144275665, "learning_rate": 3.2039438437791105e-06, "loss": 0.0449, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.0844702273607254, "learning_rate": 3.2017445601971474e-06, "loss": 0.0578, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.07030987739562988, "learning_rate": 3.199545676146492e-06, "loss": 0.048, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.10842228680849075, "learning_rate": 3.197347192115679e-06, "loss": 0.0587, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.07831210643053055, "learning_rate": 3.1951491085931657e-06, "loss": 0.0541, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.07777494192123413, "learning_rate": 3.1929514260673145e-06, "loss": 0.0565, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.05733437463641167, "learning_rate": 3.1907541450264003e-06, "loss": 0.0413, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 0.0709819421172142, "learning_rate": 3.188557265958612e-06, "loss": 0.0445, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.06276021897792816, "learning_rate": 3.186360789352041e-06, "loss": 0.0518, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 0.08579199761152267, "learning_rate": 3.184164715694697e-06, "loss": 0.059, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.06039068475365639, "learning_rate": 3.1819690454744956e-06, "loss": 0.0493, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 0.061665020883083344, "learning_rate": 3.1797737791792672e-06, "loss": 0.055, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.09639989584684372, "learning_rate": 3.1775789172967486e-06, "loss": 0.0609, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.07418102025985718, "learning_rate": 3.1753844603145894e-06, "loss": 0.055, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 0.0777088925242424, "learning_rate": 3.1731904087203442e-06, "loss": 0.049, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.0764508917927742, "learning_rate": 3.1709967630014844e-06, "loss": 0.0497, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 0.06915315240621567, "learning_rate": 3.168803523645387e-06, "loss": 0.0559, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 0.1209055632352829, "learning_rate": 3.166610691139338e-06, "loss": 0.0555, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.05842460319399834, "learning_rate": 3.1644182659705403e-06, "loss": 0.0471, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 0.0864686369895935, "learning_rate": 3.1622262486260936e-06, "loss": 0.0554, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.08721999824047089, "learning_rate": 3.160034639593018e-06, "loss": 0.0617, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.08923602849245071, "learning_rate": 3.1578434393582392e-06, "loss": 0.0644, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.13499310612678528, "learning_rate": 3.155652648408589e-06, "loss": 0.054, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.06495742499828339, "learning_rate": 3.1534622672308165e-06, "loss": 0.0481, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.0784953311085701, "learning_rate": 3.1512722963115693e-06, "loss": 0.0534, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 0.16327211260795593, "learning_rate": 3.1490827361374105e-06, "loss": 0.0594, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 0.1182890310883522, "learning_rate": 3.1468935871948096e-06, "loss": 0.0684, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 0.07501354068517685, "learning_rate": 3.1447048499701478e-06, "loss": 0.0488, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 0.09338512271642685, "learning_rate": 3.1425165249497118e-06, "loss": 0.0578, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 0.07427532225847244, "learning_rate": 3.1403286126196963e-06, "loss": 0.0494, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 0.08437635749578476, "learning_rate": 3.138141113466205e-06, "loss": 0.0551, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 0.13058432936668396, "learning_rate": 3.135954027975252e-06, "loss": 0.0535, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 0.1944456547498703, "learning_rate": 3.1337673566327575e-06, "loss": 0.0571, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.18496839702129364, "learning_rate": 3.1315810999245483e-06, "loss": 0.0577, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 0.0710131898522377, "learning_rate": 3.1293952583363653e-06, "loss": 0.0592, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.09979034215211868, "learning_rate": 3.127209832353846e-06, "loss": 0.0524, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 0.07354643195867538, "learning_rate": 3.1250248224625463e-06, "loss": 0.0469, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 0.09743399918079376, "learning_rate": 3.1228402291479243e-06, "loss": 0.0555, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 0.07859030365943909, "learning_rate": 3.1206560528953467e-06, "loss": 0.061, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 0.13054315745830536, "learning_rate": 3.1184722941900902e-06, "loss": 0.0513, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.10940619558095932, "learning_rate": 3.1162889535173323e-06, "loss": 0.0636, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.08403994143009186, "learning_rate": 3.1141060313621637e-06, "loss": 0.0514, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 0.0834677666425705, "learning_rate": 3.111923528209577e-06, "loss": 0.056, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 0.09218906611204147, "learning_rate": 3.1097414445444796e-06, "loss": 0.0612, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 0.10245212912559509, "learning_rate": 3.1075597808516776e-06, "loss": 0.0599, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 0.09027300029993057, "learning_rate": 3.1053785376158865e-06, "loss": 0.0528, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 0.06488863378763199, "learning_rate": 3.1031977153217286e-06, "loss": 0.0577, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 0.16527311503887177, "learning_rate": 3.1010173144537348e-06, "loss": 0.0614, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 0.07818372547626495, "learning_rate": 3.0988373354963387e-06, "loss": 0.056, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.08910881727933884, "learning_rate": 3.0966577789338812e-06, "loss": 0.0623, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.07685418426990509, "learning_rate": 3.0944786452506147e-06, "loss": 0.055, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 0.07172869145870209, "learning_rate": 3.092299934930686e-06, "loss": 0.0521, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.07908608764410019, "learning_rate": 3.0901216484581597e-06, "loss": 0.0565, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 0.1716628521680832, "learning_rate": 3.087943786316999e-06, "loss": 0.061, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.0655159130692482, "learning_rate": 3.085766348991076e-06, "loss": 0.0489, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.07178297638893127, "learning_rate": 3.0835893369641694e-06, "loss": 0.0558, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 0.08756351470947266, "learning_rate": 3.0814127507199587e-06, "loss": 0.0578, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.09826286137104034, "learning_rate": 3.0792365907420323e-06, "loss": 0.0522, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 0.09871425479650497, "learning_rate": 3.0770608575138825e-06, "loss": 0.0573, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.08691855520009995, "learning_rate": 3.0748855515189104e-06, "loss": 0.0521, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.07139725238084793, "learning_rate": 3.0727106732404183e-06, "loss": 0.0523, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.05333508551120758, "learning_rate": 3.0705362231616133e-06, "loss": 0.0501, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.12614698708057404, "learning_rate": 3.0683622017656074e-06, "loss": 0.0578, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.07259982824325562, "learning_rate": 3.066188609535421e-06, "loss": 0.0471, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.11869249492883682, "learning_rate": 3.064015446953977e-06, "loss": 0.0619, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.06820869445800781, "learning_rate": 3.0618427145041017e-06, "loss": 0.0545, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 0.08931764215230942, "learning_rate": 3.059670412668525e-06, "loss": 0.0581, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.09588248282670975, "learning_rate": 3.0574985419298843e-06, "loss": 0.0493, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 0.07440144568681717, "learning_rate": 3.055327102770719e-06, "loss": 0.0525, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 0.05569580942392349, "learning_rate": 3.053156095673474e-06, "loss": 0.0542, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.07933412492275238, "learning_rate": 3.0509855211204976e-06, "loss": 0.0482, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.09564250707626343, "learning_rate": 3.048815379594043e-06, "loss": 0.0523, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.09606681764125824, "learning_rate": 3.046645671576264e-06, "loss": 0.0564, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.09296216070652008, "learning_rate": 3.044476397549221e-06, "loss": 0.0529, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.08959583938121796, "learning_rate": 3.0423075579948756e-06, "loss": 0.0555, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.07117096334695816, "learning_rate": 3.0401391533950976e-06, "loss": 0.0461, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 0.08719198405742645, "learning_rate": 3.037971184231655e-06, "loss": 0.0595, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.0642516016960144, "learning_rate": 3.035803650986222e-06, "loss": 0.0458, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 0.07004646956920624, "learning_rate": 3.0336365541403723e-06, "loss": 0.0466, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 0.07484911382198334, "learning_rate": 3.0314698941755886e-06, "loss": 0.0502, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.0634661465883255, "learning_rate": 3.0293036715732527e-06, "loss": 0.045, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.07643570750951767, "learning_rate": 3.0271378868146494e-06, "loss": 0.0456, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 0.06451112031936646, "learning_rate": 3.024972540380966e-06, "loss": 0.0505, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.07153883576393127, "learning_rate": 3.0228076327532925e-06, "loss": 0.0551, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 0.09540286660194397, "learning_rate": 3.0206431644126234e-06, "loss": 0.0476, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 0.13510027527809143, "learning_rate": 3.0184791358398537e-06, "loss": 0.0568, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.09142244607210159, "learning_rate": 3.016315547515783e-06, "loss": 0.0517, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 0.10394962877035141, "learning_rate": 3.0141523999211065e-06, "loss": 0.0573, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.08967318385839462, "learning_rate": 3.0119896935364305e-06, "loss": 0.0543, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 0.07972319424152374, "learning_rate": 3.009827428842258e-06, "loss": 0.0503, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.07433620095252991, "learning_rate": 3.0076656063189926e-06, "loss": 0.0555, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 0.07954548299312592, "learning_rate": 3.0055042264469447e-06, "loss": 0.0581, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 0.11283513903617859, "learning_rate": 3.003343289706324e-06, "loss": 0.0605, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 0.15884889662265778, "learning_rate": 3.001182796577239e-06, "loss": 0.0532, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 0.06682777404785156, "learning_rate": 2.999022747539701e-06, "loss": 0.0512, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.06884802132844925, "learning_rate": 2.9968631430736274e-06, "loss": 0.045, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.08781354129314423, "learning_rate": 2.99470398365883e-06, "loss": 0.0449, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.07350879907608032, "learning_rate": 2.9925452697750275e-06, "loss": 0.0525, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 0.09756495803594589, "learning_rate": 2.990387001901834e-06, "loss": 0.0632, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.07140127569437027, "learning_rate": 2.988229180518767e-06, "loss": 0.0496, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 0.06368130445480347, "learning_rate": 2.9860718061052478e-06, "loss": 0.0536, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 0.06481610983610153, "learning_rate": 2.9839148791405937e-06, "loss": 0.0508, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.05986953154206276, "learning_rate": 2.981758400104028e-06, "loss": 0.0535, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 0.10891203582286835, "learning_rate": 2.979602369474667e-06, "loss": 0.0578, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 0.0990588515996933, "learning_rate": 2.977446787731532e-06, "loss": 0.0601, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.07801506668329239, "learning_rate": 2.975291655353546e-06, "loss": 0.0519, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 0.07363911718130112, "learning_rate": 2.9731369728195288e-06, "loss": 0.0505, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 0.0929122269153595, "learning_rate": 2.9709827406082028e-06, "loss": 0.0729, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 0.06768246740102768, "learning_rate": 2.9688289591981887e-06, "loss": 0.0415, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.0993683710694313, "learning_rate": 2.9666756290680078e-06, "loss": 0.0575, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 0.13581715524196625, "learning_rate": 2.964522750696079e-06, "loss": 0.0595, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.05767132714390755, "learning_rate": 2.962370324560725e-06, "loss": 0.0463, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.08594764769077301, "learning_rate": 2.9602183511401656e-06, "loss": 0.0448, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 0.07535246759653091, "learning_rate": 2.9580668309125203e-06, "loss": 0.0517, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.11987196654081345, "learning_rate": 2.9559157643558046e-06, "loss": 0.0486, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.08948517590761185, "learning_rate": 2.9537651519479403e-06, "loss": 0.0481, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.07497917115688324, "learning_rate": 2.951614994166743e-06, "loss": 0.0549, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.06901587545871735, "learning_rate": 2.9494652914899267e-06, "loss": 0.0485, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.08926527947187424, "learning_rate": 2.947316044395112e-06, "loss": 0.05, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 0.1174115389585495, "learning_rate": 2.945167253359806e-06, "loss": 0.054, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 0.11506317555904388, "learning_rate": 2.943018918861424e-06, "loss": 0.0545, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.07475755363702774, "learning_rate": 2.940871041377277e-06, "loss": 0.0661, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.07129078358411789, "learning_rate": 2.938723621384572e-06, "loss": 0.0545, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.07199318706989288, "learning_rate": 2.936576659360421e-06, "loss": 0.0536, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 0.09507235884666443, "learning_rate": 2.9344301557818267e-06, "loss": 0.0539, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 0.09411122649908066, "learning_rate": 2.9322841111256937e-06, "loss": 0.0592, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 0.07147178053855896, "learning_rate": 2.930138525868824e-06, "loss": 0.0545, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 0.07731910794973373, "learning_rate": 2.927993400487919e-06, "loss": 0.0474, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.07263687998056412, "learning_rate": 2.9258487354595754e-06, "loss": 0.0597, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 0.0754777118563652, "learning_rate": 2.9237045312602908e-06, "loss": 0.0498, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.07285064458847046, "learning_rate": 2.921560788366454e-06, "loss": 0.0409, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.07404270023107529, "learning_rate": 2.9194175072543594e-06, "loss": 0.0564, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 0.09383848309516907, "learning_rate": 2.9172746884001944e-06, "loss": 0.0619, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 0.10265857726335526, "learning_rate": 2.9151323322800433e-06, "loss": 0.0741, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 0.061529237776994705, "learning_rate": 2.9129904393698917e-06, "loss": 0.0439, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.06623881310224533, "learning_rate": 2.910849010145617e-06, "loss": 0.0511, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.19260583817958832, "learning_rate": 2.908708045082994e-06, "loss": 0.0674, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 0.07739590108394623, "learning_rate": 2.906567544657699e-06, "loss": 0.0636, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 0.10052764415740967, "learning_rate": 2.9044275093453034e-06, "loss": 0.0601, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 0.07794144004583359, "learning_rate": 2.902287939621272e-06, "loss": 0.0502, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.09721159189939499, "learning_rate": 2.9001488359609676e-06, "loss": 0.0602, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 0.0835815817117691, "learning_rate": 2.898010198839651e-06, "loss": 0.065, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.0940609946846962, "learning_rate": 2.895872028732481e-06, "loss": 0.0577, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 0.17284785211086273, "learning_rate": 2.893734326114506e-06, "loss": 0.0499, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.08872541785240173, "learning_rate": 2.8915970914606793e-06, "loss": 0.0538, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.06715186685323715, "learning_rate": 2.8894603252458407e-06, "loss": 0.0436, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 0.09992779791355133, "learning_rate": 2.8873240279447355e-06, "loss": 0.0555, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 0.06985561549663544, "learning_rate": 2.8851882000319966e-06, "loss": 0.0545, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 0.08185523003339767, "learning_rate": 2.883052841982157e-06, "loss": 0.0482, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.06221539527177811, "learning_rate": 2.8809179542696474e-06, "loss": 0.0511, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.09473932534456253, "learning_rate": 2.878783537368789e-06, "loss": 0.0504, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 0.0781300887465477, "learning_rate": 2.8766495917537985e-06, "loss": 0.048, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.08226018399000168, "learning_rate": 2.874516117898792e-06, "loss": 0.0555, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 0.10136803984642029, "learning_rate": 2.8723831162777806e-06, "loss": 0.0523, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.07708325237035751, "learning_rate": 2.8702505873646636e-06, "loss": 0.0519, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 0.099998340010643, "learning_rate": 2.8681185316332453e-06, "loss": 0.0636, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.0814286470413208, "learning_rate": 2.865986949557218e-06, "loss": 0.051, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 0.07188466191291809, "learning_rate": 2.8638558416101683e-06, "loss": 0.0522, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.1313522607088089, "learning_rate": 2.8617252082655813e-06, "loss": 0.0663, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.08414238691329956, "learning_rate": 2.8595950499968352e-06, "loss": 0.0584, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 0.06422757357358932, "learning_rate": 2.8574653672772068e-06, "loss": 0.0482, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 0.10440972447395325, "learning_rate": 2.8553361605798545e-06, "loss": 0.0572, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.10503745079040527, "learning_rate": 2.8532074303778446e-06, "loss": 0.0531, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 0.0966191217303276, "learning_rate": 2.8510791771441327e-06, "loss": 0.0602, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.09108087420463562, "learning_rate": 2.8489514013515656e-06, "loss": 0.0448, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 0.08469603210687637, "learning_rate": 2.8468241034728878e-06, "loss": 0.0582, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 0.08288419991731644, "learning_rate": 2.8446972839807384e-06, "loss": 0.0523, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 0.15720948576927185, "learning_rate": 2.8425709433476455e-06, "loss": 0.0665, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.05897888168692589, "learning_rate": 2.8404450820460326e-06, "loss": 0.0471, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.0650927722454071, "learning_rate": 2.8383197005482187e-06, "loss": 0.0558, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.07246372103691101, "learning_rate": 2.8361947993264185e-06, "loss": 0.051, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 0.05806669592857361, "learning_rate": 2.834070378852732e-06, "loss": 0.0415, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 0.08292754739522934, "learning_rate": 2.8319464395991567e-06, "loss": 0.0615, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.10076726227998734, "learning_rate": 2.829822982037585e-06, "loss": 0.0532, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 0.10096703469753265, "learning_rate": 2.8277000066398032e-06, "loss": 0.0678, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 0.07418700307607651, "learning_rate": 2.8255775138774827e-06, "loss": 0.059, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 0.09920790791511536, "learning_rate": 2.823455504222198e-06, "loss": 0.0593, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 0.07718557119369507, "learning_rate": 2.821333978145407e-06, "loss": 0.0483, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.05946483463048935, "learning_rate": 2.8192129361184685e-06, "loss": 0.0467, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 0.12252868711948395, "learning_rate": 2.817092378612625e-06, "loss": 0.0558, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 0.0742538794875145, "learning_rate": 2.814972306099018e-06, "loss": 0.0569, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 0.0984501913189888, "learning_rate": 2.8128527190486823e-06, "loss": 0.0662, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.06281553208827972, "learning_rate": 2.8107336179325383e-06, "loss": 0.0511, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 0.0682847648859024, "learning_rate": 2.808615003221401e-06, "loss": 0.0513, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 0.09808211028575897, "learning_rate": 2.80649687538598e-06, "loss": 0.0557, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 0.06822305917739868, "learning_rate": 2.8043792348968767e-06, "loss": 0.0509, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 0.07668324559926987, "learning_rate": 2.8022620822245782e-06, "loss": 0.052, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.07595229893922806, "learning_rate": 2.8001454178394715e-06, "loss": 0.062, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 0.0768076479434967, "learning_rate": 2.7980292422118282e-06, "loss": 0.0569, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 0.08774106204509735, "learning_rate": 2.795913555811817e-06, "loss": 0.0607, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 0.11254257708787918, "learning_rate": 2.793798359109492e-06, "loss": 0.0588, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.06633991748094559, "learning_rate": 2.7916836525748024e-06, "loss": 0.0521, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 0.06226175278425217, "learning_rate": 2.7895694366775934e-06, "loss": 0.0586, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.09304317831993103, "learning_rate": 2.7874557118875863e-06, "loss": 0.0563, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 0.07473073899745941, "learning_rate": 2.7853424786744068e-06, "loss": 0.0583, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.08719348907470703, "learning_rate": 2.7832297375075685e-06, "loss": 0.0635, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.06457062810659409, "learning_rate": 2.7811174888564713e-06, "loss": 0.0527, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 0.06016220897436142, "learning_rate": 2.779005733190412e-06, "loss": 0.0461, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.12498952448368073, "learning_rate": 2.7768944709785705e-06, "loss": 0.0511, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 0.08840315043926239, "learning_rate": 2.774783702690025e-06, "loss": 0.0534, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 0.05788619816303253, "learning_rate": 2.7726734287937367e-06, "loss": 0.0542, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 0.06724393367767334, "learning_rate": 2.770563649758562e-06, "loss": 0.0569, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.058510974049568176, "learning_rate": 2.768454366053247e-06, "loss": 0.0466, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 0.07356071472167969, "learning_rate": 2.7663455781464245e-06, "loss": 0.0567, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.08932532370090485, "learning_rate": 2.764237286506618e-06, "loss": 0.0575, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.0725843533873558, "learning_rate": 2.7621294916022423e-06, "loss": 0.0542, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 0.07163878530263901, "learning_rate": 2.760022193901605e-06, "loss": 0.0493, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 0.09340589493513107, "learning_rate": 2.7579153938728943e-06, "loss": 0.0736, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 0.16100287437438965, "learning_rate": 2.7558090919841972e-06, "loss": 0.0551, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 0.09968803077936172, "learning_rate": 2.753703288703482e-06, "loss": 0.0559, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 0.09133012592792511, "learning_rate": 2.7515979844986148e-06, "loss": 0.0597, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 0.12688301503658295, "learning_rate": 2.749493179837341e-06, "loss": 0.0543, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.06680864095687866, "learning_rate": 2.747388875187303e-06, "loss": 0.049, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 0.06255491077899933, "learning_rate": 2.7452850710160305e-06, "loss": 0.0501, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.08518878370523453, "learning_rate": 2.74318176779094e-06, "loss": 0.0542, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.08199314028024673, "learning_rate": 2.741078965979334e-06, "loss": 0.0493, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.07189822196960449, "learning_rate": 2.7389766660484103e-06, "loss": 0.0578, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 0.0740530714392662, "learning_rate": 2.736874868465253e-06, "loss": 0.0541, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.05655915290117264, "learning_rate": 2.7347735736968318e-06, "loss": 0.0438, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.12916399538516998, "learning_rate": 2.7326727822100047e-06, "loss": 0.0542, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.08187045156955719, "learning_rate": 2.7305724944715218e-06, "loss": 0.0433, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 0.07897275686264038, "learning_rate": 2.72847271094802e-06, "loss": 0.0598, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 0.0776299387216568, "learning_rate": 2.7263734321060198e-06, "loss": 0.052, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 0.08948611468076706, "learning_rate": 2.7242746584119364e-06, "loss": 0.0481, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.09289266914129257, "learning_rate": 2.722176390332071e-06, "loss": 0.0461, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 0.07372020184993744, "learning_rate": 2.720078628332605e-06, "loss": 0.0564, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 0.07678327709436417, "learning_rate": 2.7179813728796156e-06, "loss": 0.0488, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.10561781376600266, "learning_rate": 2.7158846244390657e-06, "loss": 0.0599, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.060823142528533936, "learning_rate": 2.7137883834768076e-06, "loss": 0.047, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.06719938665628433, "learning_rate": 2.7116926504585756e-06, "loss": 0.0467, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.08423199504613876, "learning_rate": 2.7095974258499914e-06, "loss": 0.0563, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.07826627045869827, "learning_rate": 2.7075027101165706e-06, "loss": 0.0592, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.05656365305185318, "learning_rate": 2.7054085037237066e-06, "loss": 0.0412, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 0.11785440146923065, "learning_rate": 2.7033148071366866e-06, "loss": 0.0595, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 0.0766441747546196, "learning_rate": 2.701221620820685e-06, "loss": 0.0419, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 0.05691485106945038, "learning_rate": 2.6991289452407564e-06, "loss": 0.0593, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 0.09628929197788239, "learning_rate": 2.697036780861845e-06, "loss": 0.0651, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.09972301870584488, "learning_rate": 2.694945128148784e-06, "loss": 0.0682, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.05100781098008156, "learning_rate": 2.692853987566291e-06, "loss": 0.0458, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 0.09886938333511353, "learning_rate": 2.690763359578969e-06, "loss": 0.051, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 0.07638214528560638, "learning_rate": 2.6886732446513066e-06, "loss": 0.0548, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.07301630079746246, "learning_rate": 2.68658364324768e-06, "loss": 0.0545, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.05538376420736313, "learning_rate": 2.684494555832353e-06, "loss": 0.053, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 0.09801625460386276, "learning_rate": 2.6824059828694715e-06, "loss": 0.0728, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 0.07978673279285431, "learning_rate": 2.680317924823068e-06, "loss": 0.0489, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 0.10324831306934357, "learning_rate": 2.6782303821570644e-06, "loss": 0.055, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 0.13081541657447815, "learning_rate": 2.676143355335263e-06, "loss": 0.0531, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.08505754917860031, "learning_rate": 2.6740568448213523e-06, "loss": 0.0572, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 0.09224630147218704, "learning_rate": 2.6719708510789077e-06, "loss": 0.046, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 0.08641964197158813, "learning_rate": 2.669885374571392e-06, "loss": 0.0594, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 0.09641878306865692, "learning_rate": 2.667800415762149e-06, "loss": 0.0557, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.10261465609073639, "learning_rate": 2.665715975114407e-06, "loss": 0.0576, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.05590217933058739, "learning_rate": 2.6636320530912817e-06, "loss": 0.0509, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 0.06204787641763687, "learning_rate": 2.6615486501557765e-06, "loss": 0.0446, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 0.16557258367538452, "learning_rate": 2.659465766770772e-06, "loss": 0.078, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.0865369662642479, "learning_rate": 2.6573834033990404e-06, "loss": 0.0536, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 0.09908000379800797, "learning_rate": 2.655301560503234e-06, "loss": 0.0545, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.13091205060482025, "learning_rate": 2.6532202385458875e-06, "loss": 0.0579, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 0.11873062700033188, "learning_rate": 2.6511394379894274e-06, "loss": 0.0592, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 0.09904859960079193, "learning_rate": 2.649059159296158e-06, "loss": 0.0565, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 0.064430832862854, "learning_rate": 2.6469794029282726e-06, "loss": 0.052, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.08562096953392029, "learning_rate": 2.6449001693478438e-06, "loss": 0.0497, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 0.11116208136081696, "learning_rate": 2.642821459016827e-06, "loss": 0.0689, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 0.08363047987222672, "learning_rate": 2.6407432723970694e-06, "loss": 0.0521, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 0.055458106100559235, "learning_rate": 2.6386656099502917e-06, "loss": 0.0557, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.052346713840961456, "learning_rate": 2.6365884721381045e-06, "loss": 0.0413, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.0730181559920311, "learning_rate": 2.6345118594220044e-06, "loss": 0.0491, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.09783273935317993, "learning_rate": 2.632435772263363e-06, "loss": 0.0523, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 0.13277871906757355, "learning_rate": 2.6303602111234394e-06, "loss": 0.0533, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 0.14208005368709564, "learning_rate": 2.6282851764633765e-06, "loss": 0.0693, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.06444413959980011, "learning_rate": 2.626210668744203e-06, "loss": 0.0625, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.1092507615685463, "learning_rate": 2.624136688426824e-06, "loss": 0.0487, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 0.10459942370653152, "learning_rate": 2.6220632359720287e-06, "loss": 0.0576, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 0.15027405321598053, "learning_rate": 2.6199903118404934e-06, "loss": 0.0495, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.11552824079990387, "learning_rate": 2.617917916492776e-06, "loss": 0.0457, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 0.07658396661281586, "learning_rate": 2.615846050389312e-06, "loss": 0.0538, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.11561929434537888, "learning_rate": 2.6137747139904262e-06, "loss": 0.0533, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 0.06850194931030273, "learning_rate": 2.611703907756319e-06, "loss": 0.0564, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 0.059077102690935135, "learning_rate": 2.6096336321470796e-06, "loss": 0.0534, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.10310918837785721, "learning_rate": 2.6075638876226715e-06, "loss": 0.0517, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.10488024353981018, "learning_rate": 2.605494674642948e-06, "loss": 0.0536, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 0.07135561108589172, "learning_rate": 2.603425993667642e-06, "loss": 0.0404, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.08905045688152313, "learning_rate": 2.6013578451563653e-06, "loss": 0.0617, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.1014360785484314, "learning_rate": 2.599290229568612e-06, "loss": 0.0564, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.05465113744139671, "learning_rate": 2.59722314736376e-06, "loss": 0.0465, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 0.06926299631595612, "learning_rate": 2.5951565990010706e-06, "loss": 0.0486, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.06827723234891891, "learning_rate": 2.5930905849396792e-06, "loss": 0.0488, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 0.09317324310541153, "learning_rate": 2.5910251056386113e-06, "loss": 0.054, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 0.11591123044490814, "learning_rate": 2.5889601615567657e-06, "loss": 0.0639, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.07157182693481445, "learning_rate": 2.5868957531529283e-06, "loss": 0.0525, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 0.07655031979084015, "learning_rate": 2.584831880885761e-06, "loss": 0.0529, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 0.08724521845579147, "learning_rate": 2.582768545213811e-06, "loss": 0.062, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.12223323434591293, "learning_rate": 2.5807057465955065e-06, "loss": 0.062, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 0.09615126252174377, "learning_rate": 2.5786434854891482e-06, "loss": 0.0526, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.07974282652139664, "learning_rate": 2.576581762352928e-06, "loss": 0.0479, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 0.07835377007722855, "learning_rate": 2.574520577644913e-06, "loss": 0.0543, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 0.11396869271993637, "learning_rate": 2.5724599318230504e-06, "loss": 0.059, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.07454070448875427, "learning_rate": 2.570399825345169e-06, "loss": 0.0504, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 0.09516648203134537, "learning_rate": 2.5683402586689788e-06, "loss": 0.0554, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 0.073738232254982, "learning_rate": 2.566281232252068e-06, "loss": 0.048, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 0.17123587429523468, "learning_rate": 2.564222746551903e-06, "loss": 0.0533, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 0.06894269585609436, "learning_rate": 2.562164802025834e-06, "loss": 0.0558, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 0.07951327413320541, "learning_rate": 2.5601073991310903e-06, "loss": 0.0538, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 0.060853924602270126, "learning_rate": 2.5580505383247796e-06, "loss": 0.0524, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 0.0887114554643631, "learning_rate": 2.5559942200638866e-06, "loss": 0.0543, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 0.10572756826877594, "learning_rate": 2.5539384448052797e-06, "loss": 0.0547, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.08499515801668167, "learning_rate": 2.5518832130057082e-06, "loss": 0.0548, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 0.11779092997312546, "learning_rate": 2.5498285251217938e-06, "loss": 0.0544, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 0.08457351475954056, "learning_rate": 2.5477743816100443e-06, "loss": 0.058, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 0.07507619261741638, "learning_rate": 2.5457207829268394e-06, "loss": 0.0586, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 0.07770473510026932, "learning_rate": 2.5436677295284474e-06, "loss": 0.0588, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 0.08141887187957764, "learning_rate": 2.5416152218710044e-06, "loss": 0.0585, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 0.07959835976362228, "learning_rate": 2.539563260410533e-06, "loss": 0.0507, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 0.1275341808795929, "learning_rate": 2.5375118456029345e-06, "loss": 0.0585, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 0.10445360839366913, "learning_rate": 2.5354609779039844e-06, "loss": 0.0572, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 0.08784555643796921, "learning_rate": 2.533410657769337e-06, "loss": 0.0625, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 0.08261089026927948, "learning_rate": 2.531360885654528e-06, "loss": 0.0524, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.10677685588598251, "learning_rate": 2.529311662014972e-06, "loss": 0.0518, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 0.06407050788402557, "learning_rate": 2.5272629873059564e-06, "loss": 0.0506, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.09459806978702545, "learning_rate": 2.5252148619826535e-06, "loss": 0.0543, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 0.08914686739444733, "learning_rate": 2.5231672865001056e-06, "loss": 0.0483, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.10101643949747086, "learning_rate": 2.5211202613132413e-06, "loss": 0.0507, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 0.0749717429280281, "learning_rate": 2.5190737868768592e-06, "loss": 0.0641, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 0.09423404932022095, "learning_rate": 2.5170278636456413e-06, "loss": 0.0593, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.06025157868862152, "learning_rate": 2.5149824920741493e-06, "loss": 0.0475, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.0711042508482933, "learning_rate": 2.51293767261681e-06, "loss": 0.0547, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.10207570344209671, "learning_rate": 2.5108934057279376e-06, "loss": 0.063, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.06342051178216934, "learning_rate": 2.5088496918617243e-06, "loss": 0.0405, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 0.0892934799194336, "learning_rate": 2.5068065314722378e-06, "loss": 0.0518, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.07207101583480835, "learning_rate": 2.504763925013419e-06, "loss": 0.0509, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 0.08426319062709808, "learning_rate": 2.5027218729390867e-06, "loss": 0.0464, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.07594966143369675, "learning_rate": 2.500680375702943e-06, "loss": 0.0486, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.0873555839061737, "learning_rate": 2.498639433758557e-06, "loss": 0.0594, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.09053590148687363, "learning_rate": 2.4965990475593814e-06, "loss": 0.0451, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 0.06004135683178902, "learning_rate": 2.494559217558746e-06, "loss": 0.0564, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.07675155997276306, "learning_rate": 2.492519944209853e-06, "loss": 0.0568, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 0.060033317655324936, "learning_rate": 2.4904812279657792e-06, "loss": 0.0522, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 0.10236180573701859, "learning_rate": 2.488443069279483e-06, "loss": 0.0535, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 0.07854257524013519, "learning_rate": 2.4864054686037993e-06, "loss": 0.0482, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.09026525169610977, "learning_rate": 2.484368426391432e-06, "loss": 0.0561, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 0.06623373925685883, "learning_rate": 2.482331943094969e-06, "loss": 0.0477, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 0.09143693000078201, "learning_rate": 2.480296019166868e-06, "loss": 0.0574, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.06285420805215836, "learning_rate": 2.478260655059467e-06, "loss": 0.0528, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.08892861753702164, "learning_rate": 2.4762258512249745e-06, "loss": 0.0628, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.07713759690523148, "learning_rate": 2.4741916081154786e-06, "loss": 0.0575, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 0.12066462635993958, "learning_rate": 2.472157926182945e-06, "loss": 0.0568, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.10239606350660324, "learning_rate": 2.470124805879208e-06, "loss": 0.0443, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 0.0788760706782341, "learning_rate": 2.468092247655979e-06, "loss": 0.064, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 0.13182173669338226, "learning_rate": 2.466060251964848e-06, "loss": 0.0578, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 0.09736449271440506, "learning_rate": 2.464028819257281e-06, "loss": 0.0588, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.06835028529167175, "learning_rate": 2.4619979499846127e-06, "loss": 0.0509, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.07751058787107468, "learning_rate": 2.459967644598054e-06, "loss": 0.0573, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.13014930486679077, "learning_rate": 2.457937903548695e-06, "loss": 0.057, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.08183370530605316, "learning_rate": 2.4559087272875e-06, "loss": 0.0604, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.07936418801546097, "learning_rate": 2.4538801162653002e-06, "loss": 0.047, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 0.0897664874792099, "learning_rate": 2.451852070932811e-06, "loss": 0.0639, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.07198313623666763, "learning_rate": 2.4498245917406195e-06, "loss": 0.0516, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.09931986778974533, "learning_rate": 2.4477976791391784e-06, "loss": 0.0595, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 0.09070076793432236, "learning_rate": 2.445771333578825e-06, "loss": 0.0559, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 0.11471006274223328, "learning_rate": 2.443745555509768e-06, "loss": 0.0611, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.11573700606822968, "learning_rate": 2.4417203453820892e-06, "loss": 0.0649, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.08435819298028946, "learning_rate": 2.4396957036457443e-06, "loss": 0.0605, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 0.08890402317047119, "learning_rate": 2.437671630750558e-06, "loss": 0.057, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.08883317559957504, "learning_rate": 2.4356481271462396e-06, "loss": 0.0531, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.07229512184858322, "learning_rate": 2.4336251932823594e-06, "loss": 0.0497, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 0.08907341212034225, "learning_rate": 2.4316028296083705e-06, "loss": 0.0527, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.08608513325452805, "learning_rate": 2.4295810365735974e-06, "loss": 0.0599, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.07728107273578644, "learning_rate": 2.427559814627234e-06, "loss": 0.0484, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 0.07712375372648239, "learning_rate": 2.425539164218348e-06, "loss": 0.0502, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.09378056973218918, "learning_rate": 2.4235190857958834e-06, "loss": 0.057, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 0.05376884713768959, "learning_rate": 2.4214995798086584e-06, "loss": 0.049, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 0.12325520813465118, "learning_rate": 2.4194806467053584e-06, "loss": 0.0659, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 0.0743725523352623, "learning_rate": 2.417462286934543e-06, "loss": 0.0588, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.057329654693603516, "learning_rate": 2.4154445009446457e-06, "loss": 0.0431, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 0.07579848170280457, "learning_rate": 2.413427289183977e-06, "loss": 0.0605, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 0.061454325914382935, "learning_rate": 2.41141065210071e-06, "loss": 0.0493, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 0.08685282617807388, "learning_rate": 2.4093945901428977e-06, "loss": 0.0522, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.11844585835933685, "learning_rate": 2.4073791037584648e-06, "loss": 0.0484, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.09320404380559921, "learning_rate": 2.4053641933952043e-06, "loss": 0.0575, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 0.11965511739253998, "learning_rate": 2.403349859500782e-06, "loss": 0.0487, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.04842786490917206, "learning_rate": 2.4013361025227384e-06, "loss": 0.0448, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 0.0659589171409607, "learning_rate": 2.3993229229084856e-06, "loss": 0.0538, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.06316165626049042, "learning_rate": 2.3973103211053052e-06, "loss": 0.0496, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.10892651230096817, "learning_rate": 2.3952982975603494e-06, "loss": 0.0643, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 0.0864192545413971, "learning_rate": 2.393286852720645e-06, "loss": 0.0499, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.09877285361289978, "learning_rate": 2.391275987033092e-06, "loss": 0.0658, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.07003235816955566, "learning_rate": 2.3892657009444543e-06, "loss": 0.0508, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.0678882971405983, "learning_rate": 2.387255994901376e-06, "loss": 0.0525, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 0.08916526287794113, "learning_rate": 2.3852468693503635e-06, "loss": 0.0545, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.09628670662641525, "learning_rate": 2.3832383247378025e-06, "loss": 0.0583, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.08978064358234406, "learning_rate": 2.3812303615099423e-06, "loss": 0.0504, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 0.0651404857635498, "learning_rate": 2.3792229801129086e-06, "loss": 0.0589, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 0.09636931121349335, "learning_rate": 2.3772161809926973e-06, "loss": 0.0483, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 0.08091732114553452, "learning_rate": 2.375209964595171e-06, "loss": 0.055, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 0.0681779533624649, "learning_rate": 2.373204331366064e-06, "loss": 0.0661, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 0.0918482095003128, "learning_rate": 2.3711992817509854e-06, "loss": 0.0617, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.08627375960350037, "learning_rate": 2.3691948161954083e-06, "loss": 0.0476, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.09613887965679169, "learning_rate": 2.3671909351446802e-06, "loss": 0.0533, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 0.0835055559873581, "learning_rate": 2.365187639044021e-06, "loss": 0.0556, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 0.09445088356733322, "learning_rate": 2.363184928338514e-06, "loss": 0.0539, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 0.09668081998825073, "learning_rate": 2.3611828034731144e-06, "loss": 0.062, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 0.05772428587079048, "learning_rate": 2.359181264892651e-06, "loss": 0.0488, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 0.09791674464941025, "learning_rate": 2.3571803130418215e-06, "loss": 0.067, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 0.11598946899175644, "learning_rate": 2.3551799483651894e-06, "loss": 0.0677, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 0.4363313913345337, "learning_rate": 2.3531801713071887e-06, "loss": 0.0704, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 0.08529641479253769, "learning_rate": 2.351180982312127e-06, "loss": 0.0463, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 0.0779227539896965, "learning_rate": 2.349182381824178e-06, "loss": 0.0528, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.10724751651287079, "learning_rate": 2.3471843702873835e-06, "loss": 0.0577, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.060967955738306046, "learning_rate": 2.345186948145659e-06, "loss": 0.0443, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.10006620734930038, "learning_rate": 2.343190115842782e-06, "loss": 0.0477, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 0.07112152129411697, "learning_rate": 2.341193873822407e-06, "loss": 0.0473, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.1204996258020401, "learning_rate": 2.33919822252805e-06, "loss": 0.0559, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 0.14015571773052216, "learning_rate": 2.337203162403101e-06, "loss": 0.0584, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 0.13213106989860535, "learning_rate": 2.335208693890819e-06, "loss": 0.0569, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.08367743343114853, "learning_rate": 2.3332148174343257e-06, "loss": 0.0484, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.08388545364141464, "learning_rate": 2.331221533476615e-06, "loss": 0.0507, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.06517805904150009, "learning_rate": 2.3292288424605503e-06, "loss": 0.0532, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.08406512439250946, "learning_rate": 2.327236744828864e-06, "loss": 0.057, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.09932230412960052, "learning_rate": 2.325245241024151e-06, "loss": 0.0616, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 0.07942081987857819, "learning_rate": 2.323254331488881e-06, "loss": 0.0579, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 0.08365950733423233, "learning_rate": 2.3212640166653868e-06, "loss": 0.0412, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.09576790779829025, "learning_rate": 2.319274296995872e-06, "loss": 0.0578, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.08816641569137573, "learning_rate": 2.3172851729224056e-06, "loss": 0.0572, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 0.09326059371232986, "learning_rate": 2.315296644886926e-06, "loss": 0.0595, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 0.07913052290678024, "learning_rate": 2.313308713331242e-06, "loss": 0.0613, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 0.17376196384429932, "learning_rate": 2.3113213786970205e-06, "loss": 0.0606, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 0.0946589931845665, "learning_rate": 2.3093346414258054e-06, "loss": 0.0619, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 0.07715514302253723, "learning_rate": 2.3073485019590043e-06, "loss": 0.0553, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 0.12069918215274811, "learning_rate": 2.305362960737893e-06, "loss": 0.0446, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 0.0769791230559349, "learning_rate": 2.3033780182036127e-06, "loss": 0.0628, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.08999422937631607, "learning_rate": 2.301393674797169e-06, "loss": 0.0525, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.06471409648656845, "learning_rate": 2.2994099309594437e-06, "loss": 0.0493, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.06172993406653404, "learning_rate": 2.297426787131174e-06, "loss": 0.0374, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 0.08012011647224426, "learning_rate": 2.2954442437529705e-06, "loss": 0.0436, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.07985813915729523, "learning_rate": 2.293462301265313e-06, "loss": 0.0549, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.07442318648099899, "learning_rate": 2.2914809601085405e-06, "loss": 0.0472, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 0.14071550965309143, "learning_rate": 2.28950022072286e-06, "loss": 0.0541, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 0.08404932916164398, "learning_rate": 2.2875200835483486e-06, "loss": 0.0438, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.07818280160427094, "learning_rate": 2.2855405490249498e-06, "loss": 0.0543, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 0.09312036633491516, "learning_rate": 2.283561617592467e-06, "loss": 0.0524, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 0.09491156041622162, "learning_rate": 2.2815832896905772e-06, "loss": 0.0497, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.1243191733956337, "learning_rate": 2.279605565758816e-06, "loss": 0.0583, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.08422809094190598, "learning_rate": 2.277628446236592e-06, "loss": 0.0511, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.08410908281803131, "learning_rate": 2.275651931563173e-06, "loss": 0.0514, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.05943845957517624, "learning_rate": 2.273676022177697e-06, "loss": 0.0487, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 0.06899137049913406, "learning_rate": 2.2717007185191673e-06, "loss": 0.0467, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 0.1004875898361206, "learning_rate": 2.2697260210264506e-06, "loss": 0.0574, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 0.05743376910686493, "learning_rate": 2.267751930138276e-06, "loss": 0.0474, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 0.08454354107379913, "learning_rate": 2.265778446293245e-06, "loss": 0.0463, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.07252392172813416, "learning_rate": 2.263805569929821e-06, "loss": 0.0405, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 0.08642107993364334, "learning_rate": 2.2618333014863296e-06, "loss": 0.0641, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 0.0743330791592598, "learning_rate": 2.259861641400967e-06, "loss": 0.0557, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.08833429962396622, "learning_rate": 2.2578905901117876e-06, "loss": 0.0496, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.10000940412282944, "learning_rate": 2.255920148056717e-06, "loss": 0.0607, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.11770306527614594, "learning_rate": 2.2539503156735392e-06, "loss": 0.0578, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.06658948212862015, "learning_rate": 2.2519810933999085e-06, "loss": 0.05, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.06872525066137314, "learning_rate": 2.2500124816733437e-06, "loss": 0.0438, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 0.093822181224823, "learning_rate": 2.248044480931219e-06, "loss": 0.0533, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 0.0688437968492508, "learning_rate": 2.2460770916107823e-06, "loss": 0.0477, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.10528827458620071, "learning_rate": 2.2441103141491424e-06, "loss": 0.051, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.06880902498960495, "learning_rate": 2.2421441489832745e-06, "loss": 0.0536, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.08644308894872665, "learning_rate": 2.240178596550014e-06, "loss": 0.0522, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 0.11239510029554367, "learning_rate": 2.23821365728606e-06, "loss": 0.0559, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.06037157028913498, "learning_rate": 2.23624933162798e-06, "loss": 0.0501, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 0.08256492763757706, "learning_rate": 2.2342856200121993e-06, "loss": 0.0571, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 0.09181869775056839, "learning_rate": 2.2323225228750113e-06, "loss": 0.0636, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 0.08550214022397995, "learning_rate": 2.230360040652574e-06, "loss": 0.0518, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 0.15732578933238983, "learning_rate": 2.228398173780903e-06, "loss": 0.054, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.08564172685146332, "learning_rate": 2.2264369226958794e-06, "loss": 0.059, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.08047565817832947, "learning_rate": 2.2244762878332506e-06, "loss": 0.0447, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.06020093709230423, "learning_rate": 2.222516269628626e-06, "loss": 0.0506, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.05656345933675766, "learning_rate": 2.220556868517473e-06, "loss": 0.0523, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 0.09725853055715561, "learning_rate": 2.2185980849351295e-06, "loss": 0.0601, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 0.06081542372703552, "learning_rate": 2.2166399193167905e-06, "loss": 0.0496, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.06413135677576065, "learning_rate": 2.214682372097517e-06, "loss": 0.0494, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 0.10762829333543777, "learning_rate": 2.212725443712229e-06, "loss": 0.0563, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.054310571402311325, "learning_rate": 2.2107691345957133e-06, "loss": 0.0405, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 0.08396145701408386, "learning_rate": 2.208813445182618e-06, "loss": 0.0509, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.08559246361255646, "learning_rate": 2.2068583759074513e-06, "loss": 0.0482, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 0.07260023057460785, "learning_rate": 2.2049039272045837e-06, "loss": 0.0583, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.06306596845388412, "learning_rate": 2.2029500995082497e-06, "loss": 0.0495, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.0691046342253685, "learning_rate": 2.2009968932525478e-06, "loss": 0.0538, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 0.07219640910625458, "learning_rate": 2.199044308871434e-06, "loss": 0.0565, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.08027240633964539, "learning_rate": 2.197092346798726e-06, "loss": 0.0517, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 0.09599464386701584, "learning_rate": 2.1951410074681074e-06, "loss": 0.0509, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.08325923979282379, "learning_rate": 2.193190291313122e-06, "loss": 0.0539, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.0770263820886612, "learning_rate": 2.1912401987671724e-06, "loss": 0.0525, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 0.11554251611232758, "learning_rate": 2.1892907302635246e-06, "loss": 0.0563, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 0.09740384668111801, "learning_rate": 2.1873418862353095e-06, "loss": 0.0617, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 0.1061205118894577, "learning_rate": 2.185393667115513e-06, "loss": 0.0505, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 0.08301471173763275, "learning_rate": 2.1834460733369835e-06, "loss": 0.0504, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.07385019212961197, "learning_rate": 2.181499105332433e-06, "loss": 0.0506, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.08744758367538452, "learning_rate": 2.179552763534436e-06, "loss": 0.0556, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 0.0853753313422203, "learning_rate": 2.177607048375423e-06, "loss": 0.0465, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 0.07955986261367798, "learning_rate": 2.1756619602876857e-06, "loss": 0.0442, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 0.07900732755661011, "learning_rate": 2.1737174997033818e-06, "loss": 0.0543, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 0.0831942930817604, "learning_rate": 2.1717736670545226e-06, "loss": 0.0511, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 0.11361036449670792, "learning_rate": 2.169830462772985e-06, "loss": 0.0585, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 0.12340568751096725, "learning_rate": 2.1678878872905063e-06, "loss": 0.0577, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 0.055991701781749725, "learning_rate": 2.1659459410386814e-06, "loss": 0.043, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 0.1345641314983368, "learning_rate": 2.1640046244489637e-06, "loss": 0.0653, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 0.07418572902679443, "learning_rate": 2.1620639379526715e-06, "loss": 0.0457, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.07099944353103638, "learning_rate": 2.1601238819809827e-06, "loss": 0.0512, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.08296211063861847, "learning_rate": 2.158184456964932e-06, "loss": 0.0509, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.19800598919391632, "learning_rate": 2.156245663335414e-06, "loss": 0.05, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.16046114265918732, "learning_rate": 2.154307501523185e-06, "loss": 0.0534, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 0.10951798409223557, "learning_rate": 2.1523699719588633e-06, "loss": 0.0544, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 0.07946611195802689, "learning_rate": 2.1504330750729185e-06, "loss": 0.0532, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 0.05742217227816582, "learning_rate": 2.1484968112956884e-06, "loss": 0.0533, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 0.13484960794448853, "learning_rate": 2.146561181057368e-06, "loss": 0.0584, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.07647600024938583, "learning_rate": 2.1446261847880073e-06, "loss": 0.0505, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.07333741337060928, "learning_rate": 2.1426918229175175e-06, "loss": 0.0528, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 0.1619199961423874, "learning_rate": 2.140758095875671e-06, "loss": 0.0583, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 0.08315973728895187, "learning_rate": 2.1388250040921007e-06, "loss": 0.0619, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.10532897710800171, "learning_rate": 2.136892547996292e-06, "loss": 0.0548, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.08595676720142365, "learning_rate": 2.1349607280175918e-06, "loss": 0.0589, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.0679752454161644, "learning_rate": 2.133029544585207e-06, "loss": 0.0605, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 0.08173339068889618, "learning_rate": 2.1310989981282067e-06, "loss": 0.0532, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.09388474375009537, "learning_rate": 2.1291690890755078e-06, "loss": 0.048, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 0.0780683159828186, "learning_rate": 2.127239817855897e-06, "loss": 0.0486, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 0.10164988040924072, "learning_rate": 2.1253111848980113e-06, "loss": 0.0484, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 0.07928265631198883, "learning_rate": 2.1233831906303514e-06, "loss": 0.049, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.07746104896068573, "learning_rate": 2.121455835481271e-06, "loss": 0.0519, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.10172142088413239, "learning_rate": 2.119529119878985e-06, "loss": 0.05, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 0.06917150318622589, "learning_rate": 2.1176030442515704e-06, "loss": 0.0511, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 0.13155603408813477, "learning_rate": 2.115677609026949e-06, "loss": 0.0569, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.07955721020698547, "learning_rate": 2.1137528146329133e-06, "loss": 0.054, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.07359436899423599, "learning_rate": 2.1118286614971075e-06, "loss": 0.0483, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 0.17332719266414642, "learning_rate": 2.1099051500470368e-06, "loss": 0.0694, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 0.09128014743328094, "learning_rate": 2.1079822807100585e-06, "loss": 0.0638, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.05131068453192711, "learning_rate": 2.1060600539133928e-06, "loss": 0.0421, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 0.07256629317998886, "learning_rate": 2.104138470084114e-06, "loss": 0.052, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 0.1748359203338623, "learning_rate": 2.1022175296491516e-06, "loss": 0.063, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 0.08139670640230179, "learning_rate": 2.100297233035296e-06, "loss": 0.0619, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.05144781619310379, "learning_rate": 2.098377580669196e-06, "loss": 0.0421, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 0.11476757377386093, "learning_rate": 2.096458572977352e-06, "loss": 0.0503, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.08958636969327927, "learning_rate": 2.0945402103861233e-06, "loss": 0.0511, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 0.08982161432504654, "learning_rate": 2.0926224933217267e-06, "loss": 0.046, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 0.0824102908372879, "learning_rate": 2.0907054222102367e-06, "loss": 0.051, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.07505635917186737, "learning_rate": 2.0887889974775805e-06, "loss": 0.0549, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.05974547937512398, "learning_rate": 2.0868732195495463e-06, "loss": 0.0489, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.07215246558189392, "learning_rate": 2.0849580888517733e-06, "loss": 0.06, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.07711301743984222, "learning_rate": 2.083043605809763e-06, "loss": 0.0561, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 0.12945738434791565, "learning_rate": 2.081129770848867e-06, "loss": 0.0581, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 0.08744945377111435, "learning_rate": 2.0792165843942963e-06, "loss": 0.0678, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 0.08122459053993225, "learning_rate": 2.0773040468711205e-06, "loss": 0.0499, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 0.10464002937078476, "learning_rate": 2.0753921587042586e-06, "loss": 0.0534, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.08027352392673492, "learning_rate": 2.0734809203184873e-06, "loss": 0.0645, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 0.07871302962303162, "learning_rate": 2.071570332138442e-06, "loss": 0.0532, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 0.11494310945272446, "learning_rate": 2.0696603945886133e-06, "loss": 0.0454, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.08479233831167221, "learning_rate": 2.067751108093343e-06, "loss": 0.0606, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.06402184814214706, "learning_rate": 2.0658424730768335e-06, "loss": 0.045, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 0.09065043181180954, "learning_rate": 2.063934489963137e-06, "loss": 0.0601, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 0.09855534881353378, "learning_rate": 2.0620271591761666e-06, "loss": 0.0545, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 0.08707787096500397, "learning_rate": 2.0601204811396847e-06, "loss": 0.0552, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 0.06526514887809753, "learning_rate": 2.058214456277314e-06, "loss": 0.047, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 0.09309086203575134, "learning_rate": 2.0563090850125318e-06, "loss": 0.0533, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.10013782978057861, "learning_rate": 2.054404367768662e-06, "loss": 0.0661, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 0.07131834328174591, "learning_rate": 2.0525003049688923e-06, "loss": 0.0483, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 0.05927421897649765, "learning_rate": 2.0505968970362627e-06, "loss": 0.0488, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.06032422184944153, "learning_rate": 2.048694144393668e-06, "loss": 0.0496, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 0.11201485991477966, "learning_rate": 2.0467920474638552e-06, "loss": 0.0562, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.08236750215291977, "learning_rate": 2.0448906066694247e-06, "loss": 0.059, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.09004928916692734, "learning_rate": 2.042989822432837e-06, "loss": 0.0484, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 0.07553393393754959, "learning_rate": 2.041089695176399e-06, "loss": 0.0465, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 0.08183572441339493, "learning_rate": 2.0391902253222777e-06, "loss": 0.0508, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 0.07189787179231644, "learning_rate": 2.037291413292494e-06, "loss": 0.0444, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 0.15735149383544922, "learning_rate": 2.035393259508919e-06, "loss": 0.06, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 0.14277635514736176, "learning_rate": 2.0334957643932757e-06, "loss": 0.0746, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 0.059666287153959274, "learning_rate": 2.0315989283671474e-06, "loss": 0.0496, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.06922955065965652, "learning_rate": 2.0297027518519696e-06, "loss": 0.0414, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.09841412305831909, "learning_rate": 2.0278072352690253e-06, "loss": 0.0474, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.08503735810518265, "learning_rate": 2.0259123790394587e-06, "loss": 0.0502, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.05805813521146774, "learning_rate": 2.0240181835842605e-06, "loss": 0.0483, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 0.06682979315519333, "learning_rate": 2.0221246493242802e-06, "loss": 0.0541, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.06584489345550537, "learning_rate": 2.0202317766802155e-06, "loss": 0.0466, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 0.09686601161956787, "learning_rate": 2.0183395660726208e-06, "loss": 0.0519, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 0.1436368227005005, "learning_rate": 2.0164480179219038e-06, "loss": 0.0575, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.07297492772340775, "learning_rate": 2.014557132648321e-06, "loss": 0.0505, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 0.090306855738163, "learning_rate": 2.0126669106719833e-06, "loss": 0.0476, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 0.07724770903587341, "learning_rate": 2.010777352412856e-06, "loss": 0.0557, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 0.06434816122055054, "learning_rate": 2.0088884582907574e-06, "loss": 0.0559, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 0.10323669016361237, "learning_rate": 2.0070002287253554e-06, "loss": 0.0597, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 0.0846341997385025, "learning_rate": 2.0051126641361697e-06, "loss": 0.0494, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 0.0699022188782692, "learning_rate": 2.0032257649425753e-06, "loss": 0.0556, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.0750846266746521, "learning_rate": 2.0013395315637997e-06, "loss": 0.051, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.06644756346940994, "learning_rate": 1.9994539644189183e-06, "loss": 0.0539, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 0.06644661724567413, "learning_rate": 1.9975690639268623e-06, "loss": 0.0466, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 0.08071287721395493, "learning_rate": 1.9956848305064156e-06, "loss": 0.0578, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.05490993335843086, "learning_rate": 1.99380126457621e-06, "loss": 0.0539, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 0.09823523461818695, "learning_rate": 1.9919183665547285e-06, "loss": 0.0639, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.06892135739326477, "learning_rate": 1.9900361368603104e-06, "loss": 0.0434, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.06276039034128189, "learning_rate": 1.988154575911146e-06, "loss": 0.0435, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.07571365684270859, "learning_rate": 1.9862736841252734e-06, "loss": 0.0489, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.06679301708936691, "learning_rate": 1.984393461920581e-06, "loss": 0.0565, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 0.10621752589941025, "learning_rate": 1.9825139097148166e-06, "loss": 0.0692, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 0.08835549652576447, "learning_rate": 1.980635027925569e-06, "loss": 0.054, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 0.07108216732740402, "learning_rate": 1.9787568169702848e-06, "loss": 0.05, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 0.11104180663824081, "learning_rate": 1.9768792772662616e-06, "loss": 0.0542, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.16059856116771698, "learning_rate": 1.975002409230644e-06, "loss": 0.0492, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.06711825728416443, "learning_rate": 1.9731262132804275e-06, "loss": 0.0422, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 0.09321508556604385, "learning_rate": 1.9712506898324613e-06, "loss": 0.0544, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.07077881693840027, "learning_rate": 1.969375839303447e-06, "loss": 0.0443, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.10213906317949295, "learning_rate": 1.967501662109928e-06, "loss": 0.0562, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.09139721095561981, "learning_rate": 1.965628158668309e-06, "loss": 0.0523, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.06201554089784622, "learning_rate": 1.9637553293948353e-06, "loss": 0.0541, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.05014170706272125, "learning_rate": 1.9618831747056106e-06, "loss": 0.0388, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.06648626178503036, "learning_rate": 1.960011695016581e-06, "loss": 0.0491, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.0741441547870636, "learning_rate": 1.958140890743549e-06, "loss": 0.0488, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.05688941851258278, "learning_rate": 1.956270762302166e-06, "loss": 0.0446, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.10281325876712799, "learning_rate": 1.9544013101079295e-06, "loss": 0.0532, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 0.09980747103691101, "learning_rate": 1.9525325345761887e-06, "loss": 0.0527, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.06476645171642303, "learning_rate": 1.950664436122144e-06, "loss": 0.0627, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 0.08258725702762604, "learning_rate": 1.948797015160845e-06, "loss": 0.054, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.05810542032122612, "learning_rate": 1.94693027210719e-06, "loss": 0.0474, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.09238329529762268, "learning_rate": 1.945064207375923e-06, "loss": 0.046, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.09009598195552826, "learning_rate": 1.9431988213816444e-06, "loss": 0.055, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 0.08829962462186813, "learning_rate": 1.9413341145388013e-06, "loss": 0.0624, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 0.07106379419565201, "learning_rate": 1.9394700872616856e-06, "loss": 0.052, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 0.0844772458076477, "learning_rate": 1.9376067399644456e-06, "loss": 0.0595, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 0.07274799048900604, "learning_rate": 1.93574407306107e-06, "loss": 0.0443, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 0.10319989174604416, "learning_rate": 1.9338820869654056e-06, "loss": 0.0701, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.08841748535633087, "learning_rate": 1.9320207820911387e-06, "loss": 0.0512, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 0.09349069744348526, "learning_rate": 1.930160158851811e-06, "loss": 0.0519, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.0679420605301857, "learning_rate": 1.9283002176608116e-06, "loss": 0.045, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 0.06154279783368111, "learning_rate": 1.9264409589313767e-06, "loss": 0.0462, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.07780464738607407, "learning_rate": 1.9245823830765874e-06, "loss": 0.0514, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 0.10549511760473251, "learning_rate": 1.92272449050938e-06, "loss": 0.0526, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.09557400643825531, "learning_rate": 1.920867281642538e-06, "loss": 0.0601, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 0.12365149706602097, "learning_rate": 1.919010756888685e-06, "loss": 0.0666, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 0.07605300098657608, "learning_rate": 1.917154916660304e-06, "loss": 0.0509, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.06770153343677521, "learning_rate": 1.9152997613697184e-06, "loss": 0.0582, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.07751892507076263, "learning_rate": 1.913445291429099e-06, "loss": 0.0408, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.08529652655124664, "learning_rate": 1.9115915072504683e-06, "loss": 0.0511, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 0.07359704375267029, "learning_rate": 1.909738409245697e-06, "loss": 0.0522, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 0.10180028527975082, "learning_rate": 1.9078859978264995e-06, "loss": 0.0571, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.05635947734117508, "learning_rate": 1.9060342734044374e-06, "loss": 0.0514, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.08261220157146454, "learning_rate": 1.904183236390923e-06, "loss": 0.0515, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.07646799087524414, "learning_rate": 1.9023328871972163e-06, "loss": 0.0586, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 0.08462893217802048, "learning_rate": 1.9004832262344197e-06, "loss": 0.0567, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 0.09039084613323212, "learning_rate": 1.8986342539134873e-06, "loss": 0.0506, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.10085097700357437, "learning_rate": 1.8967859706452196e-06, "loss": 0.0547, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.11471913009881973, "learning_rate": 1.894938376840262e-06, "loss": 0.0522, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 0.08486364781856537, "learning_rate": 1.8930914729091055e-06, "loss": 0.0537, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 0.058541446924209595, "learning_rate": 1.8912452592620916e-06, "loss": 0.0438, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.07698597759008408, "learning_rate": 1.8893997363094086e-06, "loss": 0.044, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.08270854502916336, "learning_rate": 1.8875549044610886e-06, "loss": 0.0519, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 0.11321929842233658, "learning_rate": 1.8857107641270084e-06, "loss": 0.0644, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.07176367938518524, "learning_rate": 1.8838673157168956e-06, "loss": 0.0553, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.09152586758136749, "learning_rate": 1.8820245596403253e-06, "loss": 0.0541, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.08235985040664673, "learning_rate": 1.8801824963067105e-06, "loss": 0.051, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.18267332017421722, "learning_rate": 1.8783411261253208e-06, "loss": 0.043, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 0.09897443652153015, "learning_rate": 1.8765004495052623e-06, "loss": 0.0684, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.05411208048462868, "learning_rate": 1.8746604668554952e-06, "loss": 0.0453, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.05879870802164078, "learning_rate": 1.8728211785848176e-06, "loss": 0.0462, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 0.08629412204027176, "learning_rate": 1.8709825851018798e-06, "loss": 0.0536, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 0.10865982621908188, "learning_rate": 1.869144686815178e-06, "loss": 0.0552, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.09137021750211716, "learning_rate": 1.8673074841330447e-06, "loss": 0.0501, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 0.08272400498390198, "learning_rate": 1.8654709774636676e-06, "loss": 0.0505, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.0790167972445488, "learning_rate": 1.8636351672150771e-06, "loss": 0.0655, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.09469353407621384, "learning_rate": 1.8618000537951496e-06, "loss": 0.0459, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.08758269995450974, "learning_rate": 1.8599656376116026e-06, "loss": 0.051, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 0.12562721967697144, "learning_rate": 1.8581319190720038e-06, "loss": 0.0617, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 0.08839815109968185, "learning_rate": 1.8562988985837632e-06, "loss": 0.0522, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.08875248581171036, "learning_rate": 1.854466576554133e-06, "loss": 0.0613, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.10925185680389404, "learning_rate": 1.8526349533902161e-06, "loss": 0.0519, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.09779360890388489, "learning_rate": 1.8508040294989588e-06, "loss": 0.0619, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 0.07195991277694702, "learning_rate": 1.8489738052871486e-06, "loss": 0.0506, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 0.07521449774503708, "learning_rate": 1.8471442811614177e-06, "loss": 0.0529, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.12351705878973007, "learning_rate": 1.8453154575282472e-06, "loss": 0.058, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.07343204319477081, "learning_rate": 1.8434873347939608e-06, "loss": 0.0555, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 0.1531490981578827, "learning_rate": 1.8416599133647223e-06, "loss": 0.0615, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 0.07196402549743652, "learning_rate": 1.839833193646547e-06, "loss": 0.0541, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.08410970866680145, "learning_rate": 1.8380071760452862e-06, "loss": 0.0533, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.0944700688123703, "learning_rate": 1.8361818609666433e-06, "loss": 0.0565, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.10570145398378372, "learning_rate": 1.8343572488161576e-06, "loss": 0.0533, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 0.09987246245145798, "learning_rate": 1.832533339999219e-06, "loss": 0.0652, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.07716153562068939, "learning_rate": 1.8307101349210588e-06, "loss": 0.048, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 0.14076368510723114, "learning_rate": 1.8288876339867511e-06, "loss": 0.05, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 0.0772218108177185, "learning_rate": 1.8270658376012112e-06, "loss": 0.044, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.09546761959791183, "learning_rate": 1.8252447461692029e-06, "loss": 0.0595, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 0.1583711802959442, "learning_rate": 1.8234243600953334e-06, "loss": 0.0657, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.10809964686632156, "learning_rate": 1.8216046797840465e-06, "loss": 0.0581, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.06341490894556046, "learning_rate": 1.8197857056396372e-06, "loss": 0.0501, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 0.13258995115756989, "learning_rate": 1.8179674380662372e-06, "loss": 0.0578, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 0.1481996327638626, "learning_rate": 1.8161498774678271e-06, "loss": 0.0611, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 0.13332776725292206, "learning_rate": 1.8143330242482244e-06, "loss": 0.0609, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.11974728852510452, "learning_rate": 1.8125168788110932e-06, "loss": 0.0511, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 0.16151905059814453, "learning_rate": 1.8107014415599416e-06, "loss": 0.0512, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.09258044511079788, "learning_rate": 1.808886712898117e-06, "loss": 0.0411, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 0.08680899441242218, "learning_rate": 1.8070726932288086e-06, "loss": 0.0545, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.058932825922966, "learning_rate": 1.8052593829550525e-06, "loss": 0.0512, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 0.06310011446475983, "learning_rate": 1.8034467824797252e-06, "loss": 0.0426, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.10923843085765839, "learning_rate": 1.8016348922055448e-06, "loss": 0.047, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.07396760582923889, "learning_rate": 1.7998237125350698e-06, "loss": 0.0441, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.0869896188378334, "learning_rate": 1.7980132438707059e-06, "loss": 0.0544, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 0.10938174277544022, "learning_rate": 1.7962034866146954e-06, "loss": 0.0557, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 0.1105688139796257, "learning_rate": 1.794394441169126e-06, "loss": 0.0605, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 0.10744945704936981, "learning_rate": 1.7925861079359268e-06, "loss": 0.0627, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 0.09392455220222473, "learning_rate": 1.790778487316871e-06, "loss": 0.0507, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 0.06672252714633942, "learning_rate": 1.7889715797135643e-06, "loss": 0.0581, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 0.05655377358198166, "learning_rate": 1.7871653855274634e-06, "loss": 0.0477, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.06868085265159607, "learning_rate": 1.7853599051598658e-06, "loss": 0.0463, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.07110856473445892, "learning_rate": 1.7835551390119033e-06, "loss": 0.0507, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 0.07325731217861176, "learning_rate": 1.7817510874845585e-06, "loss": 0.0505, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 0.14405810832977295, "learning_rate": 1.779947750978646e-06, "loss": 0.0593, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 0.09496249258518219, "learning_rate": 1.7781451298948305e-06, "loss": 0.0595, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 0.07084287703037262, "learning_rate": 1.7763432246336087e-06, "loss": 0.0523, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 0.09879923611879349, "learning_rate": 1.7745420355953253e-06, "loss": 0.0539, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.10572215914726257, "learning_rate": 1.7727415631801648e-06, "loss": 0.0638, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 0.09049949795007706, "learning_rate": 1.7709418077881495e-06, "loss": 0.0539, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 0.08721312135457993, "learning_rate": 1.7691427698191422e-06, "loss": 0.0597, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 0.08849333971738815, "learning_rate": 1.7673444496728493e-06, "loss": 0.0617, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 0.06948335468769073, "learning_rate": 1.7655468477488191e-06, "loss": 0.0465, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 0.07760929316282272, "learning_rate": 1.763749964446435e-06, "loss": 0.0512, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.07566020637750626, "learning_rate": 1.7619538001649228e-06, "loss": 0.0543, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 0.1635419726371765, "learning_rate": 1.7601583553033502e-06, "loss": 0.0592, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 0.05367574468255043, "learning_rate": 1.7583636302606254e-06, "loss": 0.0537, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 0.0965673103928566, "learning_rate": 1.756569625435493e-06, "loss": 0.0572, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 0.1456577330827713, "learning_rate": 1.7547763412265412e-06, "loss": 0.0665, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 0.0674806535243988, "learning_rate": 1.7529837780321979e-06, "loss": 0.0447, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.07956665009260178, "learning_rate": 1.751191936250729e-06, "loss": 0.0469, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 0.08710294961929321, "learning_rate": 1.7494008162802378e-06, "loss": 0.0495, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 0.09363483637571335, "learning_rate": 1.7476104185186737e-06, "loss": 0.0475, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.0920918807387352, "learning_rate": 1.7458207433638225e-06, "loss": 0.0561, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 0.08153464645147324, "learning_rate": 1.7440317912133076e-06, "loss": 0.0595, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 0.0984039232134819, "learning_rate": 1.7422435624645928e-06, "loss": 0.0603, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 0.08088411390781403, "learning_rate": 1.7404560575149821e-06, "loss": 0.0487, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.12712332606315613, "learning_rate": 1.7386692767616204e-06, "loss": 0.0617, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 0.1128753200173378, "learning_rate": 1.7368832206014863e-06, "loss": 0.0488, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.06923916935920715, "learning_rate": 1.735097889431404e-06, "loss": 0.0561, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.09691797941923141, "learning_rate": 1.733313283648032e-06, "loss": 0.0487, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.1993040293455124, "learning_rate": 1.7315294036478664e-06, "loss": 0.0627, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 0.06920984387397766, "learning_rate": 1.7297462498272476e-06, "loss": 0.0562, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 0.08951245993375778, "learning_rate": 1.727963822582352e-06, "loss": 0.0491, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 0.07924480736255646, "learning_rate": 1.7261821223091918e-06, "loss": 0.0418, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 0.10200174897909164, "learning_rate": 1.7244011494036228e-06, "loss": 0.0511, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 0.17328466475009918, "learning_rate": 1.722620904261334e-06, "loss": 0.0541, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 0.08201539516448975, "learning_rate": 1.720841387277858e-06, "loss": 0.0558, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.06928930431604385, "learning_rate": 1.7190625988485593e-06, "loss": 0.05, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 0.07307639718055725, "learning_rate": 1.7172845393686465e-06, "loss": 0.0514, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.0763840526342392, "learning_rate": 1.7155072092331648e-06, "loss": 0.0495, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 0.08450911194086075, "learning_rate": 1.7137306088369948e-06, "loss": 0.0569, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 0.09918801486492157, "learning_rate": 1.7119547385748552e-06, "loss": 0.0538, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 0.06516231596469879, "learning_rate": 1.7101795988413056e-06, "loss": 0.049, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 0.07826415449380875, "learning_rate": 1.708405190030743e-06, "loss": 0.0479, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.06923072040081024, "learning_rate": 1.7066315125373984e-06, "loss": 0.0563, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 0.09719180315732956, "learning_rate": 1.7048585667553414e-06, "loss": 0.0583, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 0.08129526674747467, "learning_rate": 1.7030863530784814e-06, "loss": 0.0624, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.06732587516307831, "learning_rate": 1.7013148719005652e-06, "loss": 0.0565, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 0.08818040788173676, "learning_rate": 1.6995441236151732e-06, "loss": 0.0534, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 0.08749286085367203, "learning_rate": 1.6977741086157273e-06, "loss": 0.0555, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.06416649371385574, "learning_rate": 1.6960048272954821e-06, "loss": 0.0517, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 0.06384382396936417, "learning_rate": 1.6942362800475343e-06, "loss": 0.0494, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.07389501482248306, "learning_rate": 1.6924684672648117e-06, "loss": 0.0488, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 0.09497328847646713, "learning_rate": 1.6907013893400838e-06, "loss": 0.0569, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.08779667317867279, "learning_rate": 1.6889350466659554e-06, "loss": 0.0462, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.05944215878844261, "learning_rate": 1.687169439634867e-06, "loss": 0.0442, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 0.09572306275367737, "learning_rate": 1.6854045686390947e-06, "loss": 0.0654, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.06543020904064178, "learning_rate": 1.6836404340707535e-06, "loss": 0.0605, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.08330555260181427, "learning_rate": 1.6818770363217957e-06, "loss": 0.0518, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 0.06407363712787628, "learning_rate": 1.6801143757840043e-06, "loss": 0.0536, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.0677042007446289, "learning_rate": 1.678352452849007e-06, "loss": 0.0531, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.09171424061059952, "learning_rate": 1.6765912679082592e-06, "loss": 0.048, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 0.09015354514122009, "learning_rate": 1.6748308213530555e-06, "loss": 0.0623, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 0.13299217820167542, "learning_rate": 1.6730711135745287e-06, "loss": 0.0549, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.07577147334814072, "learning_rate": 1.6713121449636471e-06, "loss": 0.0439, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 0.1537642627954483, "learning_rate": 1.6695539159112112e-06, "loss": 0.0562, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 0.08361765742301941, "learning_rate": 1.6677964268078584e-06, "loss": 0.0499, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.10397906601428986, "learning_rate": 1.666039678044064e-06, "loss": 0.0507, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 0.07035347819328308, "learning_rate": 1.6642836700101396e-06, "loss": 0.0571, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.0977560356259346, "learning_rate": 1.6625284030962257e-06, "loss": 0.0526, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 0.14014488458633423, "learning_rate": 1.6607738776923072e-06, "loss": 0.0556, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 0.07675596326589584, "learning_rate": 1.659020094188195e-06, "loss": 0.0576, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.07649026811122894, "learning_rate": 1.657267052973544e-06, "loss": 0.0422, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.0631459504365921, "learning_rate": 1.6555147544378364e-06, "loss": 0.0548, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.10046930611133575, "learning_rate": 1.653763198970394e-06, "loss": 0.0442, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 0.08199012279510498, "learning_rate": 1.652012386960375e-06, "loss": 0.0578, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.12188100069761276, "learning_rate": 1.6502623187967675e-06, "loss": 0.0637, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 0.07197286933660507, "learning_rate": 1.6485129948683954e-06, "loss": 0.0576, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 0.08050046116113663, "learning_rate": 1.64676441556392e-06, "loss": 0.0516, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.09041454643011093, "learning_rate": 1.6450165812718377e-06, "loss": 0.0547, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.08005570620298386, "learning_rate": 1.643269492380473e-06, "loss": 0.0522, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.08760234713554382, "learning_rate": 1.6415231492779942e-06, "loss": 0.0541, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.06828713417053223, "learning_rate": 1.6397775523523946e-06, "loss": 0.0567, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.09023524075746536, "learning_rate": 1.6380327019915088e-06, "loss": 0.052, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 0.12161649763584137, "learning_rate": 1.6362885985830001e-06, "loss": 0.0514, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.09048926830291748, "learning_rate": 1.6345452425143705e-06, "loss": 0.0502, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 0.17122946679592133, "learning_rate": 1.6328026341729547e-06, "loss": 0.058, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 0.07239051163196564, "learning_rate": 1.6310607739459188e-06, "loss": 0.0548, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 0.17110060155391693, "learning_rate": 1.6293196622202635e-06, "loss": 0.0779, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 0.07184194028377533, "learning_rate": 1.6275792993828249e-06, "loss": 0.0518, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.11386380344629288, "learning_rate": 1.6258396858202746e-06, "loss": 0.0592, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.08744070678949356, "learning_rate": 1.6241008219191107e-06, "loss": 0.0553, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.05661440268158913, "learning_rate": 1.622362708065673e-06, "loss": 0.0409, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 0.09177888184785843, "learning_rate": 1.6206253446461278e-06, "loss": 0.0498, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 0.07166701555252075, "learning_rate": 1.618888732046478e-06, "loss": 0.0552, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 0.09073983132839203, "learning_rate": 1.6171528706525596e-06, "loss": 0.0489, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.04987004026770592, "learning_rate": 1.6154177608500415e-06, "loss": 0.0472, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 0.10360661894083023, "learning_rate": 1.6136834030244292e-06, "loss": 0.0587, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 0.06882062554359436, "learning_rate": 1.61194979756105e-06, "loss": 0.0521, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 0.1181250810623169, "learning_rate": 1.6102169448450756e-06, "loss": 0.0628, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.06761769205331802, "learning_rate": 1.6084848452615076e-06, "loss": 0.048, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.07919558137655258, "learning_rate": 1.6067534991951754e-06, "loss": 0.0475, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 0.09357103705406189, "learning_rate": 1.6050229070307488e-06, "loss": 0.0563, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 0.08800536394119263, "learning_rate": 1.6032930691527214e-06, "loss": 0.0514, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 0.07247783988714218, "learning_rate": 1.6015639859454278e-06, "loss": 0.0553, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 0.09975702315568924, "learning_rate": 1.5998356577930274e-06, "loss": 0.053, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 0.14559301733970642, "learning_rate": 1.5981080850795171e-06, "loss": 0.0655, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.08453655987977982, "learning_rate": 1.5963812681887248e-06, "loss": 0.046, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.06564454734325409, "learning_rate": 1.5946552075043092e-06, "loss": 0.046, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 0.13160473108291626, "learning_rate": 1.592929903409759e-06, "loss": 0.0455, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 0.2385934591293335, "learning_rate": 1.5912053562884e-06, "loss": 0.0632, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 0.07531750202178955, "learning_rate": 1.589481566523388e-06, "loss": 0.0547, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.05339108780026436, "learning_rate": 1.587758534497707e-06, "loss": 0.04, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 0.07933761179447174, "learning_rate": 1.5860362605941788e-06, "loss": 0.0369, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 0.06291171163320541, "learning_rate": 1.5843147451954493e-06, "loss": 0.061, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 0.09781903773546219, "learning_rate": 1.5825939886840036e-06, "loss": 0.0696, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.09966251254081726, "learning_rate": 1.5808739914421512e-06, "loss": 0.055, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 0.06589116156101227, "learning_rate": 1.5791547538520386e-06, "loss": 0.0431, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.07352028787136078, "learning_rate": 1.5774362762956414e-06, "loss": 0.0564, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 0.1326705366373062, "learning_rate": 1.5757185591547653e-06, "loss": 0.0625, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.10228926688432693, "learning_rate": 1.574001602811046e-06, "loss": 0.06, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.07681425660848618, "learning_rate": 1.5722854076459538e-06, "loss": 0.0471, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 0.17877525091171265, "learning_rate": 1.57056997404079e-06, "loss": 0.0542, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 0.09987873584032059, "learning_rate": 1.5688553023766823e-06, "loss": 0.0664, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 0.0968761146068573, "learning_rate": 1.5671413930345902e-06, "loss": 0.0475, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 0.08232636749744415, "learning_rate": 1.5654282463953074e-06, "loss": 0.0526, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 0.0969313383102417, "learning_rate": 1.5637158628394572e-06, "loss": 0.0512, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 0.07450516521930695, "learning_rate": 1.5620042427474892e-06, "loss": 0.049, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 0.0917234718799591, "learning_rate": 1.5602933864996872e-06, "loss": 0.061, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.08861460536718369, "learning_rate": 1.5585832944761686e-06, "loss": 0.0537, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.14863553643226624, "learning_rate": 1.5568739670568693e-06, "loss": 0.0604, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.08105555176734924, "learning_rate": 1.555165404621567e-06, "loss": 0.0548, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.08047549426555634, "learning_rate": 1.5534576075498664e-06, "loss": 0.0461, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.11138498038053513, "learning_rate": 1.5517505762211982e-06, "loss": 0.0557, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.06316171586513519, "learning_rate": 1.5500443110148283e-06, "loss": 0.0425, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 0.14955754578113556, "learning_rate": 1.5483388123098474e-06, "loss": 0.0663, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.09361644089221954, "learning_rate": 1.546634080485181e-06, "loss": 0.0572, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 0.08288153260946274, "learning_rate": 1.5449301159195785e-06, "loss": 0.0596, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 0.10126719623804092, "learning_rate": 1.5432269189916237e-06, "loss": 0.0578, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.08639319986104965, "learning_rate": 1.54152449007973e-06, "loss": 0.0532, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.11275330185890198, "learning_rate": 1.539822829562136e-06, "loss": 0.0524, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.07628880441188812, "learning_rate": 1.5381219378169103e-06, "loss": 0.0504, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 0.07247210294008255, "learning_rate": 1.5364218152219545e-06, "loss": 0.0586, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.11351710557937622, "learning_rate": 1.5347224621549978e-06, "loss": 0.0555, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 0.10006111115217209, "learning_rate": 1.5330238789935963e-06, "loss": 0.0494, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.1246267780661583, "learning_rate": 1.5313260661151352e-06, "loss": 0.0548, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.07097012549638748, "learning_rate": 1.5296290238968303e-06, "loss": 0.0478, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.07091274112462997, "learning_rate": 1.5279327527157289e-06, "loss": 0.0593, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 0.06321600824594498, "learning_rate": 1.526237252948699e-06, "loss": 0.0524, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 0.08213012665510178, "learning_rate": 1.5245425249724443e-06, "loss": 0.0682, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 0.06094379723072052, "learning_rate": 1.5228485691634964e-06, "loss": 0.0422, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 0.06849697232246399, "learning_rate": 1.5211553858982115e-06, "loss": 0.0557, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.11099973320960999, "learning_rate": 1.5194629755527746e-06, "loss": 0.0549, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.10007534921169281, "learning_rate": 1.517771338503203e-06, "loss": 0.0562, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.11406931281089783, "learning_rate": 1.5160804751253405e-06, "loss": 0.0525, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.10320593416690826, "learning_rate": 1.5143903857948572e-06, "loss": 0.0615, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 0.06603875756263733, "learning_rate": 1.5127010708872513e-06, "loss": 0.0558, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.08036568015813828, "learning_rate": 1.5110125307778506e-06, "loss": 0.0493, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.112222820520401, "learning_rate": 1.5093247658418125e-06, "loss": 0.0582, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 0.08834892511367798, "learning_rate": 1.5076377764541162e-06, "loss": 0.0624, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 0.09968392550945282, "learning_rate": 1.5059515629895754e-06, "loss": 0.0521, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.08315145969390869, "learning_rate": 1.5042661258228268e-06, "loss": 0.0616, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 0.11356015503406525, "learning_rate": 1.502581465328335e-06, "loss": 0.053, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 0.09515377134084702, "learning_rate": 1.5008975818803939e-06, "loss": 0.0555, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.09004177898168564, "learning_rate": 1.4992144758531257e-06, "loss": 0.0563, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 0.07882235199213028, "learning_rate": 1.4975321476204767e-06, "loss": 0.0465, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.07764694839715958, "learning_rate": 1.4958505975562205e-06, "loss": 0.0473, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.11575952917337418, "learning_rate": 1.49416982603396e-06, "loss": 0.0483, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 0.10205117613077164, "learning_rate": 1.4924898334271265e-06, "loss": 0.0485, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.06570300459861755, "learning_rate": 1.4908106201089722e-06, "loss": 0.0515, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 0.09420111775398254, "learning_rate": 1.4891321864525826e-06, "loss": 0.0605, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.059380870312452316, "learning_rate": 1.4874545328308681e-06, "loss": 0.0471, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.09774579107761383, "learning_rate": 1.4857776596165635e-06, "loss": 0.0526, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.05339952930808067, "learning_rate": 1.4841015671822306e-06, "loss": 0.0411, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 0.09044470638036728, "learning_rate": 1.4824262559002595e-06, "loss": 0.0604, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 0.12144847214221954, "learning_rate": 1.480751726142869e-06, "loss": 0.0643, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.0822177529335022, "learning_rate": 1.4790779782820991e-06, "loss": 0.0503, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 0.0796395018696785, "learning_rate": 1.4774050126898164e-06, "loss": 0.043, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.06527993828058243, "learning_rate": 1.4757328297377177e-06, "loss": 0.0467, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 0.07095197588205338, "learning_rate": 1.474061429797326e-06, "loss": 0.0515, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.07285799831151962, "learning_rate": 1.4723908132399838e-06, "loss": 0.0514, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.11935143172740936, "learning_rate": 1.4707209804368683e-06, "loss": 0.0572, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.06156020238995552, "learning_rate": 1.4690519317589742e-06, "loss": 0.0524, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.20625683665275574, "learning_rate": 1.4673836675771298e-06, "loss": 0.0494, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.05343709513545036, "learning_rate": 1.4657161882619814e-06, "loss": 0.045, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.08543966710567474, "learning_rate": 1.4640494941840072e-06, "loss": 0.0646, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 0.07765746861696243, "learning_rate": 1.4623835857135099e-06, "loss": 0.0549, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 0.07625564187765121, "learning_rate": 1.460718463220615e-06, "loss": 0.0561, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 0.0837671160697937, "learning_rate": 1.4590541270752723e-06, "loss": 0.053, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.07643146812915802, "learning_rate": 1.457390577647262e-06, "loss": 0.0467, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.0733250230550766, "learning_rate": 1.455727815306187e-06, "loss": 0.0424, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.10826227813959122, "learning_rate": 1.454065840421473e-06, "loss": 0.065, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 0.1019284576177597, "learning_rate": 1.4524046533623758e-06, "loss": 0.0623, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 0.07491384446620941, "learning_rate": 1.450744254497972e-06, "loss": 0.0533, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.08437422662973404, "learning_rate": 1.4490846441971624e-06, "loss": 0.0523, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 0.07020269334316254, "learning_rate": 1.4474258228286758e-06, "loss": 0.0559, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.07079312950372696, "learning_rate": 1.4457677907610646e-06, "loss": 0.0466, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.05416329950094223, "learning_rate": 1.4441105483627088e-06, "loss": 0.0431, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.06324688345193863, "learning_rate": 1.442454096001804e-06, "loss": 0.0477, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 0.10110067576169968, "learning_rate": 1.4407984340463794e-06, "loss": 0.065, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 0.10500342398881912, "learning_rate": 1.4391435628642853e-06, "loss": 0.0642, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 0.08072929084300995, "learning_rate": 1.437489482823195e-06, "loss": 0.0539, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 0.11368343234062195, "learning_rate": 1.4358361942906097e-06, "loss": 0.0572, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 0.08239424973726273, "learning_rate": 1.4341836976338485e-06, "loss": 0.0555, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 0.07270600646734238, "learning_rate": 1.4325319932200631e-06, "loss": 0.053, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 0.07953186333179474, "learning_rate": 1.43088108141622e-06, "loss": 0.0619, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.0877450630068779, "learning_rate": 1.4292309625891166e-06, "loss": 0.0557, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.07912842929363251, "learning_rate": 1.4275816371053725e-06, "loss": 0.0544, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.09334947913885117, "learning_rate": 1.425933105331429e-06, "loss": 0.0594, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 0.11887780576944351, "learning_rate": 1.424285367633551e-06, "loss": 0.0663, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 0.1354939490556717, "learning_rate": 1.422638424377829e-06, "loss": 0.0582, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 0.08943279832601547, "learning_rate": 1.420992275930178e-06, "loss": 0.0518, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 0.08346547931432724, "learning_rate": 1.4193469226563322e-06, "loss": 0.0477, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.1251590996980667, "learning_rate": 1.4177023649218536e-06, "loss": 0.049, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.0858345478773117, "learning_rate": 1.4160586030921224e-06, "loss": 0.0611, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.09090550988912582, "learning_rate": 1.4144156375323486e-06, "loss": 0.0478, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.09873270988464355, "learning_rate": 1.4127734686075589e-06, "loss": 0.0577, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.08778005093336105, "learning_rate": 1.411132096682606e-06, "loss": 0.0634, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 0.104679174721241, "learning_rate": 1.4094915221221677e-06, "loss": 0.0514, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 0.07462243735790253, "learning_rate": 1.4078517452907403e-06, "loss": 0.0482, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.07480854541063309, "learning_rate": 1.4062127665526438e-06, "loss": 0.0507, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 0.07514097541570663, "learning_rate": 1.4045745862720227e-06, "loss": 0.05, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 0.0764220729470253, "learning_rate": 1.4029372048128454e-06, "loss": 0.0506, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.0712374672293663, "learning_rate": 1.401300622538897e-06, "loss": 0.0485, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 0.08686919510364532, "learning_rate": 1.3996648398137924e-06, "loss": 0.0514, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.10872773081064224, "learning_rate": 1.398029857000962e-06, "loss": 0.0624, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 0.12062761187553406, "learning_rate": 1.3963956744636642e-06, "loss": 0.0569, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.1130627766251564, "learning_rate": 1.394762292564974e-06, "loss": 0.0552, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 0.08627474308013916, "learning_rate": 1.393129711667794e-06, "loss": 0.0564, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.13156399130821228, "learning_rate": 1.3914979321348488e-06, "loss": 0.0556, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.0990133285522461, "learning_rate": 1.3898669543286763e-06, "loss": 0.0496, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.07740854471921921, "learning_rate": 1.3882367786116458e-06, "loss": 0.0514, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.09418664872646332, "learning_rate": 1.3866074053459465e-06, "loss": 0.0584, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 0.07390876859426498, "learning_rate": 1.3849788348935856e-06, "loss": 0.0509, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.08805354684591293, "learning_rate": 1.3833510676163963e-06, "loss": 0.0555, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.07279153168201447, "learning_rate": 1.3817241038760287e-06, "loss": 0.0473, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 0.1407492458820343, "learning_rate": 1.3800979440339602e-06, "loss": 0.0558, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.08109073340892792, "learning_rate": 1.3784725884514833e-06, "loss": 0.0558, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 0.12507127225399017, "learning_rate": 1.3768480374897163e-06, "loss": 0.0556, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 0.11396759003400803, "learning_rate": 1.3752242915095993e-06, "loss": 0.0603, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.11071231961250305, "learning_rate": 1.3736013508718892e-06, "loss": 0.0505, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.1620585322380066, "learning_rate": 1.371979215937166e-06, "loss": 0.0573, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 0.13089817762374878, "learning_rate": 1.3703578870658312e-06, "loss": 0.061, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.09250002354383469, "learning_rate": 1.3687373646181095e-06, "loss": 0.0587, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.09253475815057755, "learning_rate": 1.3671176489540406e-06, "loss": 0.0556, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 0.08597064763307571, "learning_rate": 1.3654987404334917e-06, "loss": 0.0532, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.08989037573337555, "learning_rate": 1.363880639416144e-06, "loss": 0.0545, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 0.07258813828229904, "learning_rate": 1.3622633462615058e-06, "loss": 0.0537, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 0.0829290971159935, "learning_rate": 1.3606468613288997e-06, "loss": 0.0515, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.10563842207193375, "learning_rate": 1.359031184977473e-06, "loss": 0.0566, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 0.09323306381702423, "learning_rate": 1.3574163175661936e-06, "loss": 0.0609, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 0.07663381099700928, "learning_rate": 1.3558022594538473e-06, "loss": 0.0509, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 0.1297454684972763, "learning_rate": 1.3541890109990386e-06, "loss": 0.0623, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 0.054535944014787674, "learning_rate": 1.3525765725601964e-06, "loss": 0.0503, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 0.1261109858751297, "learning_rate": 1.3509649444955697e-06, "loss": 0.0565, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 0.054174959659576416, "learning_rate": 1.3493541271632227e-06, "loss": 0.0535, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.06972308456897736, "learning_rate": 1.3477441209210418e-06, "loss": 0.0517, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 0.09154791384935379, "learning_rate": 1.3461349261267347e-06, "loss": 0.0643, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.14510425925254822, "learning_rate": 1.3445265431378297e-06, "loss": 0.0568, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.09647058695554733, "learning_rate": 1.3429189723116693e-06, "loss": 0.0546, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 0.05099973455071449, "learning_rate": 1.3413122140054219e-06, "loss": 0.0546, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.07751783728599548, "learning_rate": 1.3397062685760715e-06, "loss": 0.0542, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 0.06860939413309097, "learning_rate": 1.3381011363804208e-06, "loss": 0.0523, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.10939830541610718, "learning_rate": 1.3364968177750953e-06, "loss": 0.0678, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 0.09743180125951767, "learning_rate": 1.3348933131165387e-06, "loss": 0.0562, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.1149846613407135, "learning_rate": 1.333290622761011e-06, "loss": 0.0561, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 0.06897925585508347, "learning_rate": 1.3316887470645956e-06, "loss": 0.0469, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.10406090319156647, "learning_rate": 1.3300876863831903e-06, "loss": 0.0535, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 0.08246184885501862, "learning_rate": 1.3284874410725174e-06, "loss": 0.0493, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 0.11756274849176407, "learning_rate": 1.3268880114881112e-06, "loss": 0.0477, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.0775131806731224, "learning_rate": 1.3252893979853304e-06, "loss": 0.0555, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.14444462954998016, "learning_rate": 1.3236916009193517e-06, "loss": 0.0513, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 0.09788239747285843, "learning_rate": 1.3220946206451678e-06, "loss": 0.0511, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.08872998505830765, "learning_rate": 1.3204984575175893e-06, "loss": 0.0531, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 0.08861781656742096, "learning_rate": 1.31890311189125e-06, "loss": 0.0439, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.13949529826641083, "learning_rate": 1.317308584120599e-06, "loss": 0.0565, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.0625741258263588, "learning_rate": 1.3157148745599035e-06, "loss": 0.0478, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 0.08383699506521225, "learning_rate": 1.314121983563248e-06, "loss": 0.0566, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 0.09047859162092209, "learning_rate": 1.3125299114845375e-06, "loss": 0.0496, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 0.08185169100761414, "learning_rate": 1.3109386586774958e-06, "loss": 0.0541, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.12303061783313751, "learning_rate": 1.3093482254956602e-06, "loss": 0.0582, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.07281708717346191, "learning_rate": 1.3077586122923896e-06, "loss": 0.0502, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 0.09772007912397385, "learning_rate": 1.3061698194208616e-06, "loss": 0.059, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 0.12704457342624664, "learning_rate": 1.3045818472340683e-06, "loss": 0.0515, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.07917404919862747, "learning_rate": 1.3029946960848188e-06, "loss": 0.0455, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 0.07512355595827103, "learning_rate": 1.3014083663257443e-06, "loss": 0.0538, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 0.10206585377454758, "learning_rate": 1.299822858309292e-06, "loss": 0.0504, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.10411110520362854, "learning_rate": 1.2982381723877235e-06, "loss": 0.0578, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.06498763710260391, "learning_rate": 1.2966543089131196e-06, "loss": 0.0449, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 0.16467028856277466, "learning_rate": 1.295071268237379e-06, "loss": 0.0601, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.10377269983291626, "learning_rate": 1.2934890507122195e-06, "loss": 0.0527, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 0.1277051717042923, "learning_rate": 1.2919076566891703e-06, "loss": 0.0604, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 0.0724375918507576, "learning_rate": 1.2903270865195837e-06, "loss": 0.0467, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 0.07401979714632034, "learning_rate": 1.2887473405546254e-06, "loss": 0.0521, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 0.06306411325931549, "learning_rate": 1.2871684191452772e-06, "loss": 0.0496, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.06701214611530304, "learning_rate": 1.2855903226423412e-06, "loss": 0.0482, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.13637125492095947, "learning_rate": 1.2840130513964338e-06, "loss": 0.0568, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 0.10295648872852325, "learning_rate": 1.2824366057579917e-06, "loss": 0.056, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 0.08597618341445923, "learning_rate": 1.2808609860772598e-06, "loss": 0.0527, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 0.1137407049536705, "learning_rate": 1.2792861927043071e-06, "loss": 0.0595, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.1055063009262085, "learning_rate": 1.277712225989019e-06, "loss": 0.0571, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.12983012199401855, "learning_rate": 1.2761390862810907e-06, "loss": 0.0663, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 0.05385255068540573, "learning_rate": 1.274566773930041e-06, "loss": 0.0472, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 0.07241395115852356, "learning_rate": 1.272995289285202e-06, "loss": 0.0666, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.07437734305858612, "learning_rate": 1.2714246326957213e-06, "loss": 0.0478, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.10292033851146698, "learning_rate": 1.2698548045105608e-06, "loss": 0.0644, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.10575927048921585, "learning_rate": 1.2682858050785018e-06, "loss": 0.0438, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.09293752163648605, "learning_rate": 1.266717634748142e-06, "loss": 0.0515, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 0.08450354635715485, "learning_rate": 1.2651502938678917e-06, "loss": 0.049, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 0.10955359041690826, "learning_rate": 1.2635837827859766e-06, "loss": 0.0582, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.06684806942939758, "learning_rate": 1.2620181018504406e-06, "loss": 0.0603, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 0.09928850829601288, "learning_rate": 1.2604532514091444e-06, "loss": 0.0629, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.13887237012386322, "learning_rate": 1.258889231809759e-06, "loss": 0.0458, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.07048286497592926, "learning_rate": 1.2573260433997768e-06, "loss": 0.0548, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 0.13571849465370178, "learning_rate": 1.2557636865265e-06, "loss": 0.0556, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 0.09240985661745071, "learning_rate": 1.254202161537051e-06, "loss": 0.0453, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.09004154056310654, "learning_rate": 1.2526414687783616e-06, "loss": 0.054, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 0.13528837263584137, "learning_rate": 1.2510816085971849e-06, "loss": 0.0605, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.1002768874168396, "learning_rate": 1.2495225813400864e-06, "loss": 0.0512, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.1212386041879654, "learning_rate": 1.247964387353446e-06, "loss": 0.051, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 0.12497983127832413, "learning_rate": 1.2464070269834566e-06, "loss": 0.0647, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.08660150319337845, "learning_rate": 1.2448505005761297e-06, "loss": 0.0565, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.08174680918455124, "learning_rate": 1.2432948084772917e-06, "loss": 0.0488, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.08756338804960251, "learning_rate": 1.2417399510325785e-06, "loss": 0.0496, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 0.11102615296840668, "learning_rate": 1.2401859285874474e-06, "loss": 0.056, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.10170993953943253, "learning_rate": 1.2386327414871635e-06, "loss": 0.0565, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.07703004777431488, "learning_rate": 1.237080390076812e-06, "loss": 0.0521, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.05759332329034805, "learning_rate": 1.2355288747012878e-06, "loss": 0.0448, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.06827756017446518, "learning_rate": 1.2339781957053031e-06, "loss": 0.0536, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.05617311969399452, "learning_rate": 1.232428353433387e-06, "loss": 0.0443, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.06097723916172981, "learning_rate": 1.2308793482298724e-06, "loss": 0.0541, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 0.1339043378829956, "learning_rate": 1.2293311804389162e-06, "loss": 0.0559, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 0.07694771885871887, "learning_rate": 1.227783850404487e-06, "loss": 0.0656, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 0.09522897005081177, "learning_rate": 1.2262373584703642e-06, "loss": 0.0502, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 0.08624923229217529, "learning_rate": 1.2246917049801449e-06, "loss": 0.0484, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.049460891634225845, "learning_rate": 1.2231468902772354e-06, "loss": 0.0385, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 0.07017392665147781, "learning_rate": 1.221602914704862e-06, "loss": 0.0476, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 0.12162832915782928, "learning_rate": 1.2200597786060565e-06, "loss": 0.0706, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 0.0683995708823204, "learning_rate": 1.2185174823236711e-06, "loss": 0.0487, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 0.10611320286989212, "learning_rate": 1.2169760262003693e-06, "loss": 0.056, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.06412319093942642, "learning_rate": 1.2154354105786276e-06, "loss": 0.05, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 0.12526538968086243, "learning_rate": 1.2138956358007325e-06, "loss": 0.0516, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.14409878849983215, "learning_rate": 1.212356702208789e-06, "loss": 0.0585, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.09915833920240402, "learning_rate": 1.210818610144714e-06, "loss": 0.0584, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 0.04848919063806534, "learning_rate": 1.209281359950234e-06, "loss": 0.0456, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 0.10710623860359192, "learning_rate": 1.2077449519668943e-06, "loss": 0.0663, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.11257968842983246, "learning_rate": 1.2062093865360458e-06, "loss": 0.0573, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 0.14742006361484528, "learning_rate": 1.2046746639988593e-06, "loss": 0.0607, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 0.10353761166334152, "learning_rate": 1.2031407846963122e-06, "loss": 0.0597, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.05781884863972664, "learning_rate": 1.201607748969199e-06, "loss": 0.0541, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.062094658613204956, "learning_rate": 1.2000755571581263e-06, "loss": 0.0462, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 0.07775457948446274, "learning_rate": 1.1985442096035116e-06, "loss": 0.0487, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.08944392204284668, "learning_rate": 1.1970137066455834e-06, "loss": 0.0398, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 0.11411356180906296, "learning_rate": 1.1954840486243857e-06, "loss": 0.052, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 0.0740697905421257, "learning_rate": 1.193955235879775e-06, "loss": 0.0552, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.08516433835029602, "learning_rate": 1.1924272687514182e-06, "loss": 0.0583, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 0.09687300026416779, "learning_rate": 1.1909001475787917e-06, "loss": 0.0418, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.07520893961191177, "learning_rate": 1.1893738727011894e-06, "loss": 0.0585, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 0.07477658987045288, "learning_rate": 1.187848444457716e-06, "loss": 0.0533, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 0.09447512775659561, "learning_rate": 1.1863238631872843e-06, "loss": 0.0482, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.07350917160511017, "learning_rate": 1.184800129228622e-06, "loss": 0.0544, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.11326347291469574, "learning_rate": 1.1832772429202716e-06, "loss": 0.0534, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 0.0731981098651886, "learning_rate": 1.1817552046005777e-06, "loss": 0.0503, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.09527608007192612, "learning_rate": 1.1802340146077045e-06, "loss": 0.0565, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 0.05295353755354881, "learning_rate": 1.1787136732796289e-06, "loss": 0.0519, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 0.07380827516317368, "learning_rate": 1.177194180954132e-06, "loss": 0.0517, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.09235968440771103, "learning_rate": 1.1756755379688133e-06, "loss": 0.0524, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 0.068274587392807, "learning_rate": 1.174157744661078e-06, "loss": 0.0538, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 0.09058407694101334, "learning_rate": 1.1726408013681473e-06, "loss": 0.0571, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 0.09021344035863876, "learning_rate": 1.1711247084270494e-06, "loss": 0.0589, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.10318170487880707, "learning_rate": 1.1696094661746267e-06, "loss": 0.0568, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 0.08830767124891281, "learning_rate": 1.1680950749475328e-06, "loss": 0.0495, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.06986168026924133, "learning_rate": 1.1665815350822291e-06, "loss": 0.0539, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.07946350425481796, "learning_rate": 1.1650688469149884e-06, "loss": 0.0464, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.0758076086640358, "learning_rate": 1.1635570107818973e-06, "loss": 0.0559, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.08826667815446854, "learning_rate": 1.1620460270188516e-06, "loss": 0.0576, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.13672469556331635, "learning_rate": 1.1605358959615559e-06, "loss": 0.0573, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 0.07898011058568954, "learning_rate": 1.159026617945529e-06, "loss": 0.0572, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.07752328366041183, "learning_rate": 1.1575181933060952e-06, "loss": 0.0486, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.07689359039068222, "learning_rate": 1.156010622378395e-06, "loss": 0.0553, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.07437769323587418, "learning_rate": 1.1545039054973733e-06, "loss": 0.053, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.08536723256111145, "learning_rate": 1.1529980429977899e-06, "loss": 0.0463, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 0.07288666069507599, "learning_rate": 1.151493035214214e-06, "loss": 0.0516, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.06961433589458466, "learning_rate": 1.1499888824810223e-06, "loss": 0.0502, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.081065334379673, "learning_rate": 1.148485585132403e-06, "loss": 0.0498, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 0.1360531747341156, "learning_rate": 1.1469831435023542e-06, "loss": 0.0639, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 0.08013562113046646, "learning_rate": 1.1454815579246874e-06, "loss": 0.0585, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 0.10206633806228638, "learning_rate": 1.143980828733018e-06, "loss": 0.054, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.07638934999704361, "learning_rate": 1.1424809562607725e-06, "loss": 0.0484, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.08055087178945541, "learning_rate": 1.1409819408411898e-06, "loss": 0.0443, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 0.08525735139846802, "learning_rate": 1.1394837828073184e-06, "loss": 0.0569, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 0.08405753970146179, "learning_rate": 1.1379864824920116e-06, "loss": 0.0445, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 0.1307571977376938, "learning_rate": 1.1364900402279394e-06, "loss": 0.0593, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 0.07266631722450256, "learning_rate": 1.134994456347574e-06, "loss": 0.0613, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.05990241840481758, "learning_rate": 1.1334997311832003e-06, "loss": 0.0554, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.07490632683038712, "learning_rate": 1.132005865066912e-06, "loss": 0.049, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 0.12022388726472855, "learning_rate": 1.1305128583306125e-06, "loss": 0.061, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 0.0782085731625557, "learning_rate": 1.1290207113060158e-06, "loss": 0.0552, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 0.11611826717853546, "learning_rate": 1.127529424324641e-06, "loss": 0.0561, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 0.06394842267036438, "learning_rate": 1.1260389977178166e-06, "loss": 0.0474, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.12298116832971573, "learning_rate": 1.1245494318166844e-06, "loss": 0.0621, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 0.08073738217353821, "learning_rate": 1.1230607269521886e-06, "loss": 0.0513, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.09255991876125336, "learning_rate": 1.1215728834550877e-06, "loss": 0.056, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.06478375196456909, "learning_rate": 1.1200859016559473e-06, "loss": 0.0466, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.08219046145677567, "learning_rate": 1.1185997818851402e-06, "loss": 0.0549, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.07029028981924057, "learning_rate": 1.1171145244728454e-06, "loss": 0.0508, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 0.10744709521532059, "learning_rate": 1.1156301297490563e-06, "loss": 0.0579, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.11592234671115875, "learning_rate": 1.1141465980435713e-06, "loss": 0.0595, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 0.06318596005439758, "learning_rate": 1.112663929685997e-06, "loss": 0.0514, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.10720410943031311, "learning_rate": 1.111182125005747e-06, "loss": 0.0549, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 0.09338155388832092, "learning_rate": 1.1097011843320454e-06, "loss": 0.056, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 0.11056691408157349, "learning_rate": 1.1082211079939248e-06, "loss": 0.06, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 0.09884604066610336, "learning_rate": 1.106741896320222e-06, "loss": 0.0507, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 0.08582939207553864, "learning_rate": 1.1052635496395864e-06, "loss": 0.0508, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 0.1329447329044342, "learning_rate": 1.1037860682804708e-06, "loss": 0.06, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 0.08846328407526016, "learning_rate": 1.1023094525711397e-06, "loss": 0.0534, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 0.12673233449459076, "learning_rate": 1.1008337028396616e-06, "loss": 0.0569, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.07635264843702316, "learning_rate": 1.099358819413915e-06, "loss": 0.0462, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.07297080010175705, "learning_rate": 1.0978848026215865e-06, "loss": 0.054, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 0.09324780851602554, "learning_rate": 1.0964116527901686e-06, "loss": 0.0555, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 0.09585738927125931, "learning_rate": 1.094939370246959e-06, "loss": 0.056, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 0.08522653579711914, "learning_rate": 1.093467955319068e-06, "loss": 0.0466, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.07985446602106094, "learning_rate": 1.0919974083334106e-06, "loss": 0.0451, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 0.10419401526451111, "learning_rate": 1.0905277296167066e-06, "loss": 0.0597, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.0781305804848671, "learning_rate": 1.089058919495488e-06, "loss": 0.06, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 0.0864623636007309, "learning_rate": 1.0875909782960887e-06, "loss": 0.0461, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 0.06397251039743423, "learning_rate": 1.0861239063446511e-06, "loss": 0.0486, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.0740533098578453, "learning_rate": 1.0846577039671263e-06, "loss": 0.0505, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.10965058207511902, "learning_rate": 1.0831923714892706e-06, "loss": 0.0442, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 0.09463869780302048, "learning_rate": 1.0817279092366507e-06, "loss": 0.0552, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.12670712172985077, "learning_rate": 1.0802643175346312e-06, "loss": 0.0457, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.10160066932439804, "learning_rate": 1.0788015967083904e-06, "loss": 0.0536, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 0.07706629484891891, "learning_rate": 1.0773397470829145e-06, "loss": 0.0537, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 0.10799144208431244, "learning_rate": 1.0758787689829891e-06, "loss": 0.0617, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 0.07331926375627518, "learning_rate": 1.074418662733212e-06, "loss": 0.0522, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.0769469365477562, "learning_rate": 1.0729594286579876e-06, "loss": 0.0597, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.08978297561407089, "learning_rate": 1.0715010670815212e-06, "loss": 0.0505, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 0.09861728549003601, "learning_rate": 1.0700435783278278e-06, "loss": 0.0524, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 0.12511438131332397, "learning_rate": 1.068586962720729e-06, "loss": 0.0552, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.06830070912837982, "learning_rate": 1.0671312205838525e-06, "loss": 0.0488, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.08474357426166534, "learning_rate": 1.06567635224063e-06, "loss": 0.0539, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 0.06675877422094345, "learning_rate": 1.0642223580142985e-06, "loss": 0.0457, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 0.09509773552417755, "learning_rate": 1.0627692382279038e-06, "loss": 0.0589, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 0.088338702917099, "learning_rate": 1.0613169932042972e-06, "loss": 0.0638, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.12766693532466888, "learning_rate": 1.0598656232661313e-06, "loss": 0.0567, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 0.08932869881391525, "learning_rate": 1.0584151287358708e-06, "loss": 0.054, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 0.08899865299463272, "learning_rate": 1.0569655099357795e-06, "loss": 0.0472, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 0.11064378917217255, "learning_rate": 1.0555167671879319e-06, "loss": 0.0621, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.07679197937250137, "learning_rate": 1.0540689008142035e-06, "loss": 0.0575, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 0.06856629252433777, "learning_rate": 1.052621911136278e-06, "loss": 0.0532, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 0.0829601064324379, "learning_rate": 1.0511757984756455e-06, "loss": 0.0519, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 0.07096031308174133, "learning_rate": 1.049730563153597e-06, "loss": 0.0496, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 0.06208440661430359, "learning_rate": 1.0482862054912296e-06, "loss": 0.042, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.09203534573316574, "learning_rate": 1.0468427258094481e-06, "loss": 0.046, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 0.22397850453853607, "learning_rate": 1.045400124428963e-06, "loss": 0.0561, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.10824690759181976, "learning_rate": 1.043958401670283e-06, "loss": 0.0533, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.07328401505947113, "learning_rate": 1.04251755785373e-06, "loss": 0.0559, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.04647154361009598, "learning_rate": 1.0410775932994232e-06, "loss": 0.0441, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.09889335930347443, "learning_rate": 1.039638508327293e-06, "loss": 0.0681, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.09711897373199463, "learning_rate": 1.0382003032570682e-06, "loss": 0.0486, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.05368658900260925, "learning_rate": 1.0367629784082867e-06, "loss": 0.0467, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.07899448275566101, "learning_rate": 1.0353265341002916e-06, "loss": 0.0603, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.09849020093679428, "learning_rate": 1.0338909706522232e-06, "loss": 0.0552, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.11236616224050522, "learning_rate": 1.032456288383033e-06, "loss": 0.0549, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 0.074003204703331, "learning_rate": 1.0310224876114766e-06, "loss": 0.0564, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 0.12289776653051376, "learning_rate": 1.0295895686561087e-06, "loss": 0.0491, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.07560734450817108, "learning_rate": 1.0281575318352937e-06, "loss": 0.0532, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.11150704324245453, "learning_rate": 1.0267263774671953e-06, "loss": 0.0447, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.13615967333316803, "learning_rate": 1.0252961058697858e-06, "loss": 0.0528, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 0.07925523817539215, "learning_rate": 1.0238667173608364e-06, "loss": 0.0514, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.07582257688045502, "learning_rate": 1.0224382122579256e-06, "loss": 0.0432, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.05619031935930252, "learning_rate": 1.0210105908784362e-06, "loss": 0.052, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 0.08000878244638443, "learning_rate": 1.0195838535395514e-06, "loss": 0.0495, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 0.08015381544828415, "learning_rate": 1.0181580005582586e-06, "loss": 0.0538, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.09655759483575821, "learning_rate": 1.0167330322513508e-06, "loss": 0.0536, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.0748245120048523, "learning_rate": 1.0153089489354256e-06, "loss": 0.0473, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 0.06810933351516724, "learning_rate": 1.0138857509268784e-06, "loss": 0.0516, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.07636099308729172, "learning_rate": 1.012463438541914e-06, "loss": 0.0472, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.09604553133249283, "learning_rate": 1.0110420120965354e-06, "loss": 0.0591, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.07815226167440414, "learning_rate": 1.0096214719065534e-06, "loss": 0.0518, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 0.10577693581581116, "learning_rate": 1.008201818287577e-06, "loss": 0.0542, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 0.10606913268566132, "learning_rate": 1.0067830515550224e-06, "loss": 0.0577, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 0.19090613722801208, "learning_rate": 1.0053651720241087e-06, "loss": 0.0585, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 0.12401187419891357, "learning_rate": 1.0039481800098545e-06, "loss": 0.0641, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 0.08924747258424759, "learning_rate": 1.0025320758270819e-06, "loss": 0.0602, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.08308851718902588, "learning_rate": 1.001116859790418e-06, "loss": 0.0645, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 0.10125155746936798, "learning_rate": 9.997025322142934e-07, "loss": 0.055, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.08065185695886612, "learning_rate": 9.98289093412938e-07, "loss": 0.0572, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.08020439743995667, "learning_rate": 9.96876543700384e-07, "loss": 0.0595, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.10522840917110443, "learning_rate": 9.95464883390469e-07, "loss": 0.0613, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.0769176110625267, "learning_rate": 9.940541127968335e-07, "loss": 0.0487, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.08196137845516205, "learning_rate": 9.92644232232915e-07, "loss": 0.0471, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.07174241542816162, "learning_rate": 9.912352420119587e-07, "loss": 0.0452, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.06724240630865097, "learning_rate": 9.89827142447013e-07, "loss": 0.0538, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.0630229115486145, "learning_rate": 9.884199338509193e-07, "loss": 0.0511, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.06440796703100204, "learning_rate": 9.87013616536331e-07, "loss": 0.0495, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.16889873147010803, "learning_rate": 9.856081908156984e-07, "loss": 0.0509, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 0.07174556702375412, "learning_rate": 9.842036570012776e-07, "loss": 0.0482, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 0.1980215609073639, "learning_rate": 9.828000154051216e-07, "loss": 0.0548, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 0.10740801692008972, "learning_rate": 9.813972663390864e-07, "loss": 0.0585, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 0.07591170817613602, "learning_rate": 9.79995410114834e-07, "loss": 0.051, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.09706965833902359, "learning_rate": 9.785944470438218e-07, "loss": 0.0595, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 0.11039235442876816, "learning_rate": 9.771943774373138e-07, "loss": 0.0678, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.06423328816890717, "learning_rate": 9.757952016063738e-07, "loss": 0.0632, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 0.06879065185785294, "learning_rate": 9.743969198618659e-07, "loss": 0.0518, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.0682048574090004, "learning_rate": 9.729995325144548e-07, "loss": 0.0543, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 0.09200499951839447, "learning_rate": 9.716030398746096e-07, "loss": 0.0637, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.06159009411931038, "learning_rate": 9.702074422526004e-07, "loss": 0.0396, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 0.07159534096717834, "learning_rate": 9.688127399584956e-07, "loss": 0.0584, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.06689189374446869, "learning_rate": 9.674189333021655e-07, "loss": 0.0486, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.06299945712089539, "learning_rate": 9.660260225932834e-07, "loss": 0.0541, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.055414848029613495, "learning_rate": 9.646340081413225e-07, "loss": 0.0404, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.07687484472990036, "learning_rate": 9.632428902555546e-07, "loss": 0.0472, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.060893379151821136, "learning_rate": 9.618526692450564e-07, "loss": 0.0544, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.0929960235953331, "learning_rate": 9.604633454187035e-07, "loss": 0.0549, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 0.08776190876960754, "learning_rate": 9.59074919085171e-07, "loss": 0.0553, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 0.09171336889266968, "learning_rate": 9.57687390552935e-07, "loss": 0.0533, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 0.07688870280981064, "learning_rate": 9.563007601302727e-07, "loss": 0.0536, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 0.06289921700954437, "learning_rate": 9.549150281252633e-07, "loss": 0.0508, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.09568743407726288, "learning_rate": 9.535301948457842e-07, "loss": 0.0674, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 0.061497386544942856, "learning_rate": 9.521462605995119e-07, "loss": 0.0479, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.10858669131994247, "learning_rate": 9.507632256939264e-07, "loss": 0.0533, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 0.07000267505645752, "learning_rate": 9.493810904363077e-07, "loss": 0.0516, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 0.18968525528907776, "learning_rate": 9.479998551337322e-07, "loss": 0.0681, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 0.07843676209449768, "learning_rate": 9.466195200930817e-07, "loss": 0.0518, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 0.10720136761665344, "learning_rate": 9.452400856210337e-07, "loss": 0.0533, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 0.08770997077226639, "learning_rate": 9.438615520240651e-07, "loss": 0.0511, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.09481420367956161, "learning_rate": 9.424839196084568e-07, "loss": 0.0577, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 0.08533072471618652, "learning_rate": 9.411071886802869e-07, "loss": 0.0536, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.05317839980125427, "learning_rate": 9.397313595454349e-07, "loss": 0.0425, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 0.08470091968774796, "learning_rate": 9.383564325095767e-07, "loss": 0.0535, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.056570395827293396, "learning_rate": 9.369824078781897e-07, "loss": 0.0571, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.11134366691112518, "learning_rate": 9.356092859565524e-07, "loss": 0.0567, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.07284730672836304, "learning_rate": 9.342370670497391e-07, "loss": 0.0476, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.09313502162694931, "learning_rate": 9.328657514626266e-07, "loss": 0.0573, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.06879107654094696, "learning_rate": 9.314953394998905e-07, "loss": 0.0528, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 0.07731343060731888, "learning_rate": 9.30125831466005e-07, "loss": 0.0514, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 0.06874144077301025, "learning_rate": 9.287572276652417e-07, "loss": 0.0557, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.08033518493175507, "learning_rate": 9.273895284016743e-07, "loss": 0.0449, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 0.07086930423974991, "learning_rate": 9.260227339791755e-07, "loss": 0.0494, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 0.12533803284168243, "learning_rate": 9.246568447014148e-07, "loss": 0.0452, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 0.050455089658498764, "learning_rate": 9.232918608718599e-07, "loss": 0.0402, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.0996723547577858, "learning_rate": 9.219277827937811e-07, "loss": 0.0594, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.07043332606554031, "learning_rate": 9.205646107702465e-07, "loss": 0.0584, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 0.05390577390789986, "learning_rate": 9.192023451041187e-07, "loss": 0.0521, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 0.05967625230550766, "learning_rate": 9.178409860980648e-07, "loss": 0.0505, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.09394145011901855, "learning_rate": 9.164805340545457e-07, "loss": 0.0532, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 0.0765770897269249, "learning_rate": 9.151209892758245e-07, "loss": 0.0611, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 0.08003044128417969, "learning_rate": 9.137623520639588e-07, "loss": 0.0482, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.08412899076938629, "learning_rate": 9.124046227208083e-07, "loss": 0.0505, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 0.07681511342525482, "learning_rate": 9.110478015480301e-07, "loss": 0.0512, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 0.06086522713303566, "learning_rate": 9.096918888470785e-07, "loss": 0.0525, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 0.07680793106555939, "learning_rate": 9.083368849192042e-07, "loss": 0.0533, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 0.0796753540635109, "learning_rate": 9.069827900654604e-07, "loss": 0.051, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 0.0795714259147644, "learning_rate": 9.056296045866964e-07, "loss": 0.056, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.058070674538612366, "learning_rate": 9.042773287835566e-07, "loss": 0.0513, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.16076447069644928, "learning_rate": 9.02925962956489e-07, "loss": 0.0648, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 0.06055533513426781, "learning_rate": 9.015755074057336e-07, "loss": 0.0499, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 0.13172529637813568, "learning_rate": 9.002259624313325e-07, "loss": 0.0654, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 0.06010117381811142, "learning_rate": 8.98877328333122e-07, "loss": 0.0472, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 0.08357009291648865, "learning_rate": 8.975296054107396e-07, "loss": 0.0607, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.07868408411741257, "learning_rate": 8.961827939636198e-07, "loss": 0.0459, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.06371774524450302, "learning_rate": 8.948368942909891e-07, "loss": 0.046, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.0806845873594284, "learning_rate": 8.934919066918779e-07, "loss": 0.0575, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 0.15525846183300018, "learning_rate": 8.921478314651133e-07, "loss": 0.0596, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 0.07372912019491196, "learning_rate": 8.908046689093153e-07, "loss": 0.0486, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 0.11115770041942596, "learning_rate": 8.894624193229051e-07, "loss": 0.0554, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.08125561475753784, "learning_rate": 8.88121083004102e-07, "loss": 0.0519, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 0.06947682797908783, "learning_rate": 8.867806602509177e-07, "loss": 0.0556, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 0.06742046773433685, "learning_rate": 8.854411513611638e-07, "loss": 0.0475, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 0.10395096987485886, "learning_rate": 8.841025566324485e-07, "loss": 0.0626, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.1294267624616623, "learning_rate": 8.827648763621793e-07, "loss": 0.0529, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.10469917953014374, "learning_rate": 8.814281108475565e-07, "loss": 0.0616, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 0.053642917424440384, "learning_rate": 8.800922603855772e-07, "loss": 0.0496, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.0867520198225975, "learning_rate": 8.787573252730386e-07, "loss": 0.0529, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 0.06848857551813126, "learning_rate": 8.774233058065346e-07, "loss": 0.0534, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 0.11413761228322983, "learning_rate": 8.760902022824502e-07, "loss": 0.0514, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 0.0904303789138794, "learning_rate": 8.747580149969737e-07, "loss": 0.0564, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.0902155339717865, "learning_rate": 8.734267442460842e-07, "loss": 0.056, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 0.090797059237957, "learning_rate": 8.720963903255619e-07, "loss": 0.054, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 0.10001692175865173, "learning_rate": 8.707669535309793e-07, "loss": 0.0493, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.07319694757461548, "learning_rate": 8.694384341577072e-07, "loss": 0.0489, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.08721377700567245, "learning_rate": 8.681108325009141e-07, "loss": 0.0486, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.12089937180280685, "learning_rate": 8.667841488555617e-07, "loss": 0.0489, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 0.08249711990356445, "learning_rate": 8.654583835164066e-07, "loss": 0.0541, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.06520979851484299, "learning_rate": 8.641335367780057e-07, "loss": 0.0438, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.06414025276899338, "learning_rate": 8.62809608934711e-07, "loss": 0.0488, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.0671725645661354, "learning_rate": 8.614866002806665e-07, "loss": 0.0556, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 0.10540176928043365, "learning_rate": 8.601645111098162e-07, "loss": 0.0541, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 0.065383180975914, "learning_rate": 8.588433417158965e-07, "loss": 0.0492, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 0.07834497094154358, "learning_rate": 8.575230923924432e-07, "loss": 0.0535, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 0.12406440824270248, "learning_rate": 8.562037634327836e-07, "loss": 0.0581, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 0.1091122105717659, "learning_rate": 8.548853551300429e-07, "loss": 0.0669, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.14245669543743134, "learning_rate": 8.535678677771441e-07, "loss": 0.0529, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.06007487699389458, "learning_rate": 8.522513016667982e-07, "loss": 0.0521, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.1026240885257721, "learning_rate": 8.509356570915184e-07, "loss": 0.0483, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 0.08727822452783585, "learning_rate": 8.496209343436101e-07, "loss": 0.0555, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 0.09957965463399887, "learning_rate": 8.483071337151777e-07, "loss": 0.0595, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 0.07087967544794083, "learning_rate": 8.469942554981148e-07, "loss": 0.051, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.07955993711948395, "learning_rate": 8.456822999841125e-07, "loss": 0.0555, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 0.10132590681314468, "learning_rate": 8.443712674646598e-07, "loss": 0.0519, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.07910876721143723, "learning_rate": 8.430611582310355e-07, "loss": 0.0497, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 0.12733884155750275, "learning_rate": 8.417519725743173e-07, "loss": 0.0668, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 0.12322940677404404, "learning_rate": 8.40443710785378e-07, "loss": 0.0509, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 0.08195920288562775, "learning_rate": 8.391363731548813e-07, "loss": 0.0551, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.08751992881298065, "learning_rate": 8.378299599732875e-07, "loss": 0.0538, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 0.10562599450349808, "learning_rate": 8.365244715308524e-07, "loss": 0.0483, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.06541460752487183, "learning_rate": 8.352199081176271e-07, "loss": 0.0431, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 0.0948878824710846, "learning_rate": 8.339162700234537e-07, "loss": 0.0564, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.08589406311511993, "learning_rate": 8.326135575379729e-07, "loss": 0.0529, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 0.11262926459312439, "learning_rate": 8.313117709506158e-07, "loss": 0.0452, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.09196013957262039, "learning_rate": 8.30010910550611e-07, "loss": 0.0477, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.16270224750041962, "learning_rate": 8.287109766269786e-07, "loss": 0.0507, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.0783652514219284, "learning_rate": 8.274119694685345e-07, "loss": 0.0516, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 0.12069971114397049, "learning_rate": 8.26113889363891e-07, "loss": 0.0509, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.07006219029426575, "learning_rate": 8.248167366014493e-07, "loss": 0.0515, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.08566617220640182, "learning_rate": 8.235205114694067e-07, "loss": 0.0535, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 0.09226808696985245, "learning_rate": 8.222252142557557e-07, "loss": 0.0547, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.07683247327804565, "learning_rate": 8.209308452482829e-07, "loss": 0.0533, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.06890588998794556, "learning_rate": 8.196374047345668e-07, "loss": 0.0491, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 0.06595025211572647, "learning_rate": 8.183448930019783e-07, "loss": 0.0459, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 0.10567920655012131, "learning_rate": 8.170533103376865e-07, "loss": 0.0479, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.08546466380357742, "learning_rate": 8.157626570286515e-07, "loss": 0.0501, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 0.11547920107841492, "learning_rate": 8.144729333616259e-07, "loss": 0.0494, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 0.07878640294075012, "learning_rate": 8.131841396231566e-07, "loss": 0.0393, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 0.08713438361883163, "learning_rate": 8.118962760995874e-07, "loss": 0.0542, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 0.13322243094444275, "learning_rate": 8.106093430770473e-07, "loss": 0.055, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.11033355444669724, "learning_rate": 8.093233408414658e-07, "loss": 0.0619, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.07097680121660233, "learning_rate": 8.080382696785627e-07, "loss": 0.0425, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.056124597787857056, "learning_rate": 8.067541298738535e-07, "loss": 0.037, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 0.08983168005943298, "learning_rate": 8.054709217126433e-07, "loss": 0.0612, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.07232052087783813, "learning_rate": 8.041886454800307e-07, "loss": 0.0504, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 0.11138532310724258, "learning_rate": 8.029073014609096e-07, "loss": 0.0533, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 0.09342603385448456, "learning_rate": 8.016268899399643e-07, "loss": 0.0508, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.0894198939204216, "learning_rate": 8.00347411201673e-07, "loss": 0.053, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.06659939885139465, "learning_rate": 7.990688655303086e-07, "loss": 0.0464, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 0.10117091983556747, "learning_rate": 7.977912532099336e-07, "loss": 0.057, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 0.12167675793170929, "learning_rate": 7.965145745244029e-07, "loss": 0.0539, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 0.08729564398527145, "learning_rate": 7.95238829757366e-07, "loss": 0.0546, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 0.09164942800998688, "learning_rate": 7.939640191922665e-07, "loss": 0.0526, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 0.08729439228773117, "learning_rate": 7.926901431123362e-07, "loss": 0.0647, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.08673444390296936, "learning_rate": 7.914172018006006e-07, "loss": 0.0489, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 0.08490727096796036, "learning_rate": 7.901451955398792e-07, "loss": 0.0577, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.07329186052083969, "learning_rate": 7.88874124612784e-07, "loss": 0.0493, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 0.13547228276729584, "learning_rate": 7.876039893017151e-07, "loss": 0.0496, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 0.12457054108381271, "learning_rate": 7.863347898888696e-07, "loss": 0.0589, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 0.14212661981582642, "learning_rate": 7.850665266562352e-07, "loss": 0.0549, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.10501164942979813, "learning_rate": 7.837991998855899e-07, "loss": 0.0538, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 0.14506258070468903, "learning_rate": 7.825328098585039e-07, "loss": 0.0633, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 0.10099272429943085, "learning_rate": 7.812673568563406e-07, "loss": 0.0493, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 0.08561404794454575, "learning_rate": 7.800028411602572e-07, "loss": 0.0594, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 0.09416509419679642, "learning_rate": 7.78739263051198e-07, "loss": 0.0529, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.0728684663772583, "learning_rate": 7.774766228099001e-07, "loss": 0.0492, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.10877228528261185, "learning_rate": 7.762149207168951e-07, "loss": 0.0506, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.10121016949415207, "learning_rate": 7.749541570525054e-07, "loss": 0.0523, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.11886479705572128, "learning_rate": 7.736943320968409e-07, "loss": 0.0531, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.07822369784116745, "learning_rate": 7.724354461298089e-07, "loss": 0.0537, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.07062388956546783, "learning_rate": 7.711774994311027e-07, "loss": 0.0479, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 0.06376655399799347, "learning_rate": 7.699204922802123e-07, "loss": 0.0433, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 0.059975724667310715, "learning_rate": 7.686644249564124e-07, "loss": 0.0471, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.09042244404554367, "learning_rate": 7.674092977387737e-07, "loss": 0.0526, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 0.10947045683860779, "learning_rate": 7.661551109061593e-07, "loss": 0.0457, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 0.10054107010364532, "learning_rate": 7.649018647372186e-07, "loss": 0.0527, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.07518164068460464, "learning_rate": 7.636495595103938e-07, "loss": 0.0455, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 0.06796692311763763, "learning_rate": 7.6239819550392e-07, "loss": 0.0515, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 0.08725041151046753, "learning_rate": 7.611477729958205e-07, "loss": 0.048, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.06874902546405792, "learning_rate": 7.598982922639109e-07, "loss": 0.0524, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.07540875673294067, "learning_rate": 7.586497535857984e-07, "loss": 0.0533, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 0.08923916518688202, "learning_rate": 7.574021572388795e-07, "loss": 0.0562, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.0798087790608406, "learning_rate": 7.561555035003398e-07, "loss": 0.0396, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 0.07633282989263535, "learning_rate": 7.549097926471583e-07, "loss": 0.0442, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.1070733293890953, "learning_rate": 7.536650249561056e-07, "loss": 0.0528, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 0.09234771132469177, "learning_rate": 7.524212007037385e-07, "loss": 0.0543, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 0.09824876487255096, "learning_rate": 7.511783201664053e-07, "loss": 0.0634, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.07317839562892914, "learning_rate": 7.499363836202472e-07, "loss": 0.0496, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 0.06598658114671707, "learning_rate": 7.486953913411954e-07, "loss": 0.0396, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.07441561669111252, "learning_rate": 7.474553436049675e-07, "loss": 0.0571, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.0904918909072876, "learning_rate": 7.462162406870766e-07, "loss": 0.0579, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 0.13159401714801788, "learning_rate": 7.4497808286282e-07, "loss": 0.0646, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.0755876824259758, "learning_rate": 7.437408704072907e-07, "loss": 0.0495, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.09699501097202301, "learning_rate": 7.425046035953665e-07, "loss": 0.0478, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 0.11360365152359009, "learning_rate": 7.412692827017193e-07, "loss": 0.0619, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 0.10748880356550217, "learning_rate": 7.400349080008107e-07, "loss": 0.0686, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.06603825837373734, "learning_rate": 7.38801479766888e-07, "loss": 0.05, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 0.06719741970300674, "learning_rate": 7.375689982739915e-07, "loss": 0.0409, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.08572721481323242, "learning_rate": 7.363374637959498e-07, "loss": 0.0508, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 0.0833439901471138, "learning_rate": 7.35106876606384e-07, "loss": 0.0499, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.06218506023287773, "learning_rate": 7.338772369787001e-07, "loss": 0.0465, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.07251113653182983, "learning_rate": 7.326485451860976e-07, "loss": 0.0498, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.07479614019393921, "learning_rate": 7.314208015015623e-07, "loss": 0.0472, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 0.10449981689453125, "learning_rate": 7.301940061978724e-07, "loss": 0.0629, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.06987079232931137, "learning_rate": 7.289681595475922e-07, "loss": 0.0459, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 0.10755252093076706, "learning_rate": 7.277432618230773e-07, "loss": 0.057, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.11199691891670227, "learning_rate": 7.265193132964749e-07, "loss": 0.0557, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.07392280548810959, "learning_rate": 7.252963142397134e-07, "loss": 0.0485, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.08029250055551529, "learning_rate": 7.24074264924518e-07, "loss": 0.0506, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 0.11062180250883102, "learning_rate": 7.228531656223997e-07, "loss": 0.0608, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 0.07492237538099289, "learning_rate": 7.216330166046603e-07, "loss": 0.0473, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.07102438807487488, "learning_rate": 7.204138181423881e-07, "loss": 0.046, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.0756448283791542, "learning_rate": 7.191955705064591e-07, "loss": 0.0554, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.06542536616325378, "learning_rate": 7.179782739675434e-07, "loss": 0.0517, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.10741262882947922, "learning_rate": 7.167619287960942e-07, "loss": 0.0427, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 0.06739535927772522, "learning_rate": 7.155465352623559e-07, "loss": 0.056, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 0.06991644948720932, "learning_rate": 7.143320936363629e-07, "loss": 0.0453, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.14085523784160614, "learning_rate": 7.131186041879357e-07, "loss": 0.0546, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 0.10545305162668228, "learning_rate": 7.119060671866817e-07, "loss": 0.0554, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.08294740319252014, "learning_rate": 7.106944829020013e-07, "loss": 0.0612, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 0.10373063385486603, "learning_rate": 7.094838516030811e-07, "loss": 0.0579, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 0.14765968918800354, "learning_rate": 7.082741735588938e-07, "loss": 0.0578, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.06456327438354492, "learning_rate": 7.070654490382045e-07, "loss": 0.039, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 0.10339943319559097, "learning_rate": 7.058576783095622e-07, "loss": 0.0585, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.0882892981171608, "learning_rate": 7.046508616413078e-07, "loss": 0.0509, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 0.07533419877290726, "learning_rate": 7.034449993015663e-07, "loss": 0.0464, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.09101948887109756, "learning_rate": 7.022400915582539e-07, "loss": 0.051, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 0.09925069659948349, "learning_rate": 7.010361386790748e-07, "loss": 0.0603, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.09224335104227066, "learning_rate": 6.998331409315184e-07, "loss": 0.053, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 0.0950881689786911, "learning_rate": 6.986310985828626e-07, "loss": 0.0574, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 0.06697169691324234, "learning_rate": 6.974300119001754e-07, "loss": 0.0514, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 0.07180513441562653, "learning_rate": 6.962298811503104e-07, "loss": 0.0546, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.08003997802734375, "learning_rate": 6.950307065999085e-07, "loss": 0.0501, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.08690174669027328, "learning_rate": 6.938324885154007e-07, "loss": 0.0438, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.08757475763559341, "learning_rate": 6.92635227163001e-07, "loss": 0.05, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.07843492925167084, "learning_rate": 6.914389228087165e-07, "loss": 0.0469, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.08820163458585739, "learning_rate": 6.902435757183357e-07, "loss": 0.0469, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 0.13055597245693207, "learning_rate": 6.890491861574389e-07, "loss": 0.0451, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 0.14351195096969604, "learning_rate": 6.87855754391395e-07, "loss": 0.0599, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 0.07905314117670059, "learning_rate": 6.866632806853518e-07, "loss": 0.0479, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.055959008634090424, "learning_rate": 6.854717653042531e-07, "loss": 0.0445, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 0.09933200478553772, "learning_rate": 6.842812085128253e-07, "loss": 0.0589, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.13403892517089844, "learning_rate": 6.830916105755847e-07, "loss": 0.0664, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.11404629796743393, "learning_rate": 6.819029717568315e-07, "loss": 0.0576, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.09894242882728577, "learning_rate": 6.807152923206528e-07, "loss": 0.0521, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 0.07138168811798096, "learning_rate": 6.795285725309269e-07, "loss": 0.0488, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 0.145175963640213, "learning_rate": 6.783428126513125e-07, "loss": 0.0585, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.11447467654943466, "learning_rate": 6.771580129452604e-07, "loss": 0.0642, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 0.16225111484527588, "learning_rate": 6.759741736760062e-07, "loss": 0.0581, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.09079264104366302, "learning_rate": 6.747912951065722e-07, "loss": 0.0524, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.07996438443660736, "learning_rate": 6.736093774997643e-07, "loss": 0.0622, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 0.08794258534908295, "learning_rate": 6.724284211181803e-07, "loss": 0.0534, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.09985943138599396, "learning_rate": 6.712484262242014e-07, "loss": 0.0586, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.06289327889680862, "learning_rate": 6.700693930799945e-07, "loss": 0.0476, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.07854457944631577, "learning_rate": 6.688913219475158e-07, "loss": 0.0508, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 0.10502521693706512, "learning_rate": 6.677142130885028e-07, "loss": 0.0512, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.08709750324487686, "learning_rate": 6.665380667644849e-07, "loss": 0.0505, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.0728360041975975, "learning_rate": 6.653628832367731e-07, "loss": 0.0452, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.09189420938491821, "learning_rate": 6.641886627664673e-07, "loss": 0.0484, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.07818122208118439, "learning_rate": 6.630154056144533e-07, "loss": 0.0516, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 0.11082549393177032, "learning_rate": 6.618431120414015e-07, "loss": 0.0566, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.080203577876091, "learning_rate": 6.606717823077669e-07, "loss": 0.0555, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 0.10402711480855942, "learning_rate": 6.59501416673794e-07, "loss": 0.05, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 0.08840936422348022, "learning_rate": 6.583320153995121e-07, "loss": 0.0546, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 0.09169311076402664, "learning_rate": 6.571635787447339e-07, "loss": 0.0545, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 0.06502538919448853, "learning_rate": 6.559961069690596e-07, "loss": 0.0511, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 0.08085481077432632, "learning_rate": 6.548296003318744e-07, "loss": 0.0485, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 0.0822303518652916, "learning_rate": 6.536640590923515e-07, "loss": 0.0521, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 0.09658320993185043, "learning_rate": 6.52499483509445e-07, "loss": 0.0587, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.09828599542379379, "learning_rate": 6.51335873841899e-07, "loss": 0.0551, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 0.07888900488615036, "learning_rate": 6.501732303482394e-07, "loss": 0.0683, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.05880538001656532, "learning_rate": 6.490115532867808e-07, "loss": 0.0461, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 0.1132231280207634, "learning_rate": 6.478508429156189e-07, "loss": 0.0588, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.08282535523176193, "learning_rate": 6.466910994926384e-07, "loss": 0.0451, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.12573708593845367, "learning_rate": 6.455323232755095e-07, "loss": 0.0589, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.11801331490278244, "learning_rate": 6.44374514521684e-07, "loss": 0.0558, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.12219346314668655, "learning_rate": 6.432176734883994e-07, "loss": 0.0558, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.1361292600631714, "learning_rate": 6.420618004326818e-07, "loss": 0.0521, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.15018735826015472, "learning_rate": 6.409068956113379e-07, "loss": 0.0478, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 0.07097843289375305, "learning_rate": 6.397529592809615e-07, "loss": 0.0526, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 0.09995120018720627, "learning_rate": 6.38599991697933e-07, "loss": 0.0533, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.09257243573665619, "learning_rate": 6.374479931184141e-07, "loss": 0.0445, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 0.08636601269245148, "learning_rate": 6.362969637983507e-07, "loss": 0.0678, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.10209664702415466, "learning_rate": 6.351469039934771e-07, "loss": 0.052, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 0.15508325397968292, "learning_rate": 6.339978139593117e-07, "loss": 0.0656, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.11372406780719757, "learning_rate": 6.328496939511541e-07, "loss": 0.0686, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 0.07684998214244843, "learning_rate": 6.317025442240893e-07, "loss": 0.0556, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.09849116206169128, "learning_rate": 6.305563650329899e-07, "loss": 0.0667, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.060542475432157516, "learning_rate": 6.294111566325106e-07, "loss": 0.0517, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 0.05582110956311226, "learning_rate": 6.282669192770896e-07, "loss": 0.0453, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.09064096212387085, "learning_rate": 6.271236532209502e-07, "loss": 0.0438, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 0.12542547285556793, "learning_rate": 6.259813587181024e-07, "loss": 0.0629, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 0.07829128950834274, "learning_rate": 6.248400360223355e-07, "loss": 0.0427, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.06053340807557106, "learning_rate": 6.236996853872251e-07, "loss": 0.0418, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 0.10774370282888412, "learning_rate": 6.225603070661318e-07, "loss": 0.0587, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.059294287115335464, "learning_rate": 6.214219013122008e-07, "loss": 0.0438, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.08447447419166565, "learning_rate": 6.202844683783587e-07, "loss": 0.0509, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 0.06883672624826431, "learning_rate": 6.191480085173163e-07, "loss": 0.0512, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.06416331231594086, "learning_rate": 6.180125219815697e-07, "loss": 0.0482, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 0.06894087046384811, "learning_rate": 6.168780090233994e-07, "loss": 0.0441, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 0.09463229030370712, "learning_rate": 6.157444698948656e-07, "loss": 0.0504, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.14951790869235992, "learning_rate": 6.146119048478177e-07, "loss": 0.0518, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 0.07559062540531158, "learning_rate": 6.134803141338835e-07, "loss": 0.0557, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 0.10938233882188797, "learning_rate": 6.123496980044785e-07, "loss": 0.051, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 0.1171228364109993, "learning_rate": 6.112200567107978e-07, "loss": 0.058, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.09182290732860565, "learning_rate": 6.10091390503823e-07, "loss": 0.0526, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.08030255883932114, "learning_rate": 6.089636996343202e-07, "loss": 0.045, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 0.0760023221373558, "learning_rate": 6.07836984352832e-07, "loss": 0.0522, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.09200747311115265, "learning_rate": 6.067112449096907e-07, "loss": 0.051, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.0919809490442276, "learning_rate": 6.055864815550106e-07, "loss": 0.0591, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.09660755097866058, "learning_rate": 6.044626945386894e-07, "loss": 0.0483, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.06567011773586273, "learning_rate": 6.033398841104043e-07, "loss": 0.0474, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.06881514191627502, "learning_rate": 6.022180505196207e-07, "loss": 0.0437, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.11247028410434723, "learning_rate": 6.01097194015583e-07, "loss": 0.051, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 0.08585282415151596, "learning_rate": 5.999773148473193e-07, "loss": 0.0556, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 0.13611243665218353, "learning_rate": 5.988584132636421e-07, "loss": 0.0611, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 0.12052610516548157, "learning_rate": 5.977404895131467e-07, "loss": 0.0602, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.059725116938352585, "learning_rate": 5.966235438442086e-07, "loss": 0.0468, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 0.07290907949209213, "learning_rate": 5.955075765049878e-07, "loss": 0.0521, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 0.0686669573187828, "learning_rate": 5.943925877434276e-07, "loss": 0.0451, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 0.11853773146867752, "learning_rate": 5.932785778072531e-07, "loss": 0.0489, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 0.07865870743989944, "learning_rate": 5.921655469439708e-07, "loss": 0.0576, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 0.08393441140651703, "learning_rate": 5.910534954008718e-07, "loss": 0.0519, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 0.11857740581035614, "learning_rate": 5.899424234250278e-07, "loss": 0.0627, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.08172646164894104, "learning_rate": 5.888323312632948e-07, "loss": 0.0479, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 0.11220891773700714, "learning_rate": 5.877232191623078e-07, "loss": 0.0566, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 0.10652974992990494, "learning_rate": 5.866150873684878e-07, "loss": 0.0531, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 0.09247582405805588, "learning_rate": 5.855079361280374e-07, "loss": 0.0559, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 0.06992967426776886, "learning_rate": 5.844017656869389e-07, "loss": 0.0455, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.08437251299619675, "learning_rate": 5.83296576290957e-07, "loss": 0.0454, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 0.09025052189826965, "learning_rate": 5.821923681856406e-07, "loss": 0.0549, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.09585211426019669, "learning_rate": 5.810891416163211e-07, "loss": 0.0475, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 0.14727817475795746, "learning_rate": 5.799868968281075e-07, "loss": 0.0601, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.08024375885725021, "learning_rate": 5.788856340658966e-07, "loss": 0.0487, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.06033160164952278, "learning_rate": 5.777853535743605e-07, "loss": 0.0516, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.07602035999298096, "learning_rate": 5.766860555979586e-07, "loss": 0.0448, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 0.10071359574794769, "learning_rate": 5.755877403809284e-07, "loss": 0.0634, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.05998421832919121, "learning_rate": 5.744904081672914e-07, "loss": 0.0483, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.06278179585933685, "learning_rate": 5.733940592008519e-07, "loss": 0.0478, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.07366640865802765, "learning_rate": 5.72298693725189e-07, "loss": 0.0484, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.06724550575017929, "learning_rate": 5.712043119836702e-07, "loss": 0.0476, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 0.17325790226459503, "learning_rate": 5.701109142194422e-07, "loss": 0.0666, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.07719362527132034, "learning_rate": 5.69018500675434e-07, "loss": 0.0437, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.06236051768064499, "learning_rate": 5.679270715943535e-07, "loss": 0.0432, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 0.0799180194735527, "learning_rate": 5.668366272186915e-07, "loss": 0.045, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.07878269255161285, "learning_rate": 5.657471677907205e-07, "loss": 0.0513, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 0.06831042468547821, "learning_rate": 5.646586935524922e-07, "loss": 0.0455, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 0.14151231944561005, "learning_rate": 5.635712047458419e-07, "loss": 0.0604, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.07916690409183502, "learning_rate": 5.624847016123847e-07, "loss": 0.0516, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 0.12131595611572266, "learning_rate": 5.613991843935179e-07, "loss": 0.0554, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 0.09736620634794235, "learning_rate": 5.60314653330416e-07, "loss": 0.0536, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 0.12268839031457901, "learning_rate": 5.592311086640379e-07, "loss": 0.0518, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 0.10787776857614517, "learning_rate": 5.581485506351242e-07, "loss": 0.0533, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 0.07821659743785858, "learning_rate": 5.570669794841921e-07, "loss": 0.0488, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.1023516058921814, "learning_rate": 5.559863954515448e-07, "loss": 0.0562, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 0.07805726677179337, "learning_rate": 5.549067987772605e-07, "loss": 0.0534, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.0675511583685875, "learning_rate": 5.538281897012032e-07, "loss": 0.0487, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 0.08029545843601227, "learning_rate": 5.527505684630136e-07, "loss": 0.0488, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.09733273833990097, "learning_rate": 5.51673935302115e-07, "loss": 0.0555, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.08413613587617874, "learning_rate": 5.505982904577123e-07, "loss": 0.0524, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.10659236460924149, "learning_rate": 5.495236341687876e-07, "loss": 0.061, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 0.08003443479537964, "learning_rate": 5.484499666741044e-07, "loss": 0.0526, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.08468038588762283, "learning_rate": 5.47377288212208e-07, "loss": 0.0535, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.0909125804901123, "learning_rate": 5.463055990214245e-07, "loss": 0.0456, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.06276296824216843, "learning_rate": 5.452348993398566e-07, "loss": 0.0495, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.06071009114384651, "learning_rate": 5.441651894053895e-07, "loss": 0.0534, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.09678853303194046, "learning_rate": 5.430964694556884e-07, "loss": 0.0429, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 0.11954856663942337, "learning_rate": 5.420287397282004e-07, "loss": 0.0652, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 0.09165822714567184, "learning_rate": 5.409620004601479e-07, "loss": 0.0528, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.07250359654426575, "learning_rate": 5.398962518885375e-07, "loss": 0.0604, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.08533912897109985, "learning_rate": 5.388314942501549e-07, "loss": 0.0534, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 0.09335757046937943, "learning_rate": 5.377677277815646e-07, "loss": 0.0517, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 0.12506723403930664, "learning_rate": 5.367049527191093e-07, "loss": 0.062, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.09265542030334473, "learning_rate": 5.356431692989144e-07, "loss": 0.0456, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 0.14422069489955902, "learning_rate": 5.345823777568859e-07, "loss": 0.0573, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 0.10260806977748871, "learning_rate": 5.335225783287051e-07, "loss": 0.0608, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 0.06495699286460876, "learning_rate": 5.324637712498359e-07, "loss": 0.0399, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 0.14203254878520966, "learning_rate": 5.314059567555213e-07, "loss": 0.066, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 0.07941418141126633, "learning_rate": 5.303491350807832e-07, "loss": 0.0553, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 0.12840664386749268, "learning_rate": 5.292933064604228e-07, "loss": 0.0699, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.09625770151615143, "learning_rate": 5.282384711290228e-07, "loss": 0.0483, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.0821872353553772, "learning_rate": 5.271846293209426e-07, "loss": 0.0494, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 0.08939020335674286, "learning_rate": 5.261317812703204e-07, "loss": 0.0569, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 0.09763792157173157, "learning_rate": 5.250799272110768e-07, "loss": 0.0578, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 0.3262343108654022, "learning_rate": 5.240290673769099e-07, "loss": 0.0785, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.1558106392621994, "learning_rate": 5.229792020012947e-07, "loss": 0.057, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 0.05995384603738785, "learning_rate": 5.2193033131749e-07, "loss": 0.0485, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 0.0647670105099678, "learning_rate": 5.20882455558529e-07, "loss": 0.0528, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 0.07435950636863708, "learning_rate": 5.19835574957227e-07, "loss": 0.054, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 0.06172212213277817, "learning_rate": 5.187896897461752e-07, "loss": 0.0481, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 0.06926131248474121, "learning_rate": 5.177448001577468e-07, "loss": 0.0529, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 0.08734503388404846, "learning_rate": 5.167009064240936e-07, "loss": 0.0598, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.09159756451845169, "learning_rate": 5.156580087771429e-07, "loss": 0.0548, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.06732386350631714, "learning_rate": 5.146161074486022e-07, "loss": 0.0485, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.07313132286071777, "learning_rate": 5.135752026699597e-07, "loss": 0.0532, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.12488529086112976, "learning_rate": 5.125352946724816e-07, "loss": 0.0588, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.0813169777393341, "learning_rate": 5.114963836872105e-07, "loss": 0.052, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 0.09743688255548477, "learning_rate": 5.104584699449671e-07, "loss": 0.0563, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 0.13267725706100464, "learning_rate": 5.094215536763541e-07, "loss": 0.0532, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.11961034685373306, "learning_rate": 5.083856351117511e-07, "loss": 0.0566, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.08506160229444504, "learning_rate": 5.073507144813139e-07, "loss": 0.0572, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.08488819003105164, "learning_rate": 5.063167920149797e-07, "loss": 0.0608, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.07799236476421356, "learning_rate": 5.052838679424609e-07, "loss": 0.0539, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.08699233829975128, "learning_rate": 5.042519424932512e-07, "loss": 0.0516, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.10212501883506775, "learning_rate": 5.0322101589662e-07, "loss": 0.0606, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 0.10610243678092957, "learning_rate": 5.02191088381615e-07, "loss": 0.0503, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.06767094135284424, "learning_rate": 5.01162160177065e-07, "loss": 0.0561, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 0.12588313221931458, "learning_rate": 5.001342315115726e-07, "loss": 0.0466, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 0.09742487967014313, "learning_rate": 4.991073026135196e-07, "loss": 0.0642, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 0.14005421102046967, "learning_rate": 4.980813737110662e-07, "loss": 0.0554, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 0.07216469943523407, "learning_rate": 4.970564450321525e-07, "loss": 0.0552, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 0.10587121546268463, "learning_rate": 4.960325168044916e-07, "loss": 0.0605, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 0.0944194570183754, "learning_rate": 4.950095892555789e-07, "loss": 0.0581, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.07310859113931656, "learning_rate": 4.93987662612685e-07, "loss": 0.0456, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.09653480350971222, "learning_rate": 4.929667371028579e-07, "loss": 0.0491, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.09670577198266983, "learning_rate": 4.919468129529237e-07, "loss": 0.0511, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.09396444261074066, "learning_rate": 4.909278903894887e-07, "loss": 0.0569, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 0.06648948043584824, "learning_rate": 4.89909969638932e-07, "loss": 0.0512, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.13980749249458313, "learning_rate": 4.888930509274125e-07, "loss": 0.0546, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.1039370447397232, "learning_rate": 4.878771344808664e-07, "loss": 0.0528, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.10843183845281601, "learning_rate": 4.868622205250089e-07, "loss": 0.0439, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.06979010999202728, "learning_rate": 4.858483092853278e-07, "loss": 0.0573, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.0812549740076065, "learning_rate": 4.848354009870931e-07, "loss": 0.0481, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 0.1273130178451538, "learning_rate": 4.838234958553501e-07, "loss": 0.0583, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 0.12336072325706482, "learning_rate": 4.828125941149197e-07, "loss": 0.0557, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.06817056238651276, "learning_rate": 4.818026959904016e-07, "loss": 0.0466, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.10636591166257858, "learning_rate": 4.80793801706172e-07, "loss": 0.0573, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 0.05406570062041283, "learning_rate": 4.797859114863851e-07, "loss": 0.0452, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 0.103827565908432, "learning_rate": 4.787790255549707e-07, "loss": 0.0614, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.10023855417966843, "learning_rate": 4.777731441356342e-07, "loss": 0.0484, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.09954804182052612, "learning_rate": 4.7676826745186144e-07, "loss": 0.0712, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 0.07177392393350601, "learning_rate": 4.757643957269131e-07, "loss": 0.0474, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.08605086803436279, "learning_rate": 4.7476152918382535e-07, "loss": 0.0512, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.11300403624773026, "learning_rate": 4.737596680454137e-07, "loss": 0.0593, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 0.15755128860473633, "learning_rate": 4.727588125342669e-07, "loss": 0.0615, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.07987072318792343, "learning_rate": 4.7175896287275424e-07, "loss": 0.0589, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.09026525169610977, "learning_rate": 4.7076011928301803e-07, "loss": 0.0486, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.11555325239896774, "learning_rate": 4.6976228198697847e-07, "loss": 0.0535, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.11430969089269638, "learning_rate": 4.687654512063344e-07, "loss": 0.0525, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.05617630109190941, "learning_rate": 4.6776962716255593e-07, "loss": 0.0508, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 0.08739644289016724, "learning_rate": 4.667748100768937e-07, "loss": 0.0563, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.0761031061410904, "learning_rate": 4.657810001703733e-07, "loss": 0.0485, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 0.14449772238731384, "learning_rate": 4.647881976637975e-07, "loss": 0.0658, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.08529550582170486, "learning_rate": 4.637964027777425e-07, "loss": 0.0531, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 0.09171400219202042, "learning_rate": 4.62805615732565e-07, "loss": 0.0513, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 0.10086414963006973, "learning_rate": 4.6181583674839323e-07, "loss": 0.0589, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.10908132046461105, "learning_rate": 4.6082706604513307e-07, "loss": 0.0566, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 0.09872188419103622, "learning_rate": 4.598393038424681e-07, "loss": 0.0495, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.0906936526298523, "learning_rate": 4.5885255035985675e-07, "loss": 0.0538, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 0.1029166504740715, "learning_rate": 4.578668058165325e-07, "loss": 0.0572, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.07088612020015717, "learning_rate": 4.5688207043150467e-07, "loss": 0.0485, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 0.10670321434736252, "learning_rate": 4.5589834442355986e-07, "loss": 0.0619, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 0.10358668863773346, "learning_rate": 4.549156280112599e-07, "loss": 0.0578, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.06325802952051163, "learning_rate": 4.5393392141294066e-07, "loss": 0.0546, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.08495623618364334, "learning_rate": 4.5295322484671667e-07, "loss": 0.0519, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.08784747123718262, "learning_rate": 4.519735385304741e-07, "loss": 0.0528, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 0.11000318080186844, "learning_rate": 4.509948626818789e-07, "loss": 0.0555, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.07852855324745178, "learning_rate": 4.500171975183687e-07, "loss": 0.0492, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.08235786110162735, "learning_rate": 4.4904054325715927e-07, "loss": 0.0457, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.07731222361326218, "learning_rate": 4.4806490011524205e-07, "loss": 0.0601, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 0.07620193064212799, "learning_rate": 4.4709026830938194e-07, "loss": 0.0587, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 0.09003058820962906, "learning_rate": 4.46116648056118e-07, "loss": 0.0556, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 0.08467741310596466, "learning_rate": 4.451440395717682e-07, "loss": 0.0511, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 0.07779841125011444, "learning_rate": 4.441724430724248e-07, "loss": 0.0535, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 0.08453445136547089, "learning_rate": 4.432018587739517e-07, "loss": 0.0481, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.10282909870147705, "learning_rate": 4.422322868919937e-07, "loss": 0.0561, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.0843077078461647, "learning_rate": 4.4126372764196457e-07, "loss": 0.0596, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 0.07349080592393875, "learning_rate": 4.402961812390588e-07, "loss": 0.0427, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 0.12781846523284912, "learning_rate": 4.3932964789824064e-07, "loss": 0.0581, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.05828173831105232, "learning_rate": 4.3836412783425265e-07, "loss": 0.0449, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.08303509652614594, "learning_rate": 4.3739962126161273e-07, "loss": 0.0444, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 0.1477040797472, "learning_rate": 4.3643612839461057e-07, "loss": 0.0493, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 0.09637358039617538, "learning_rate": 4.354736494473122e-07, "loss": 0.0648, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 0.07476599514484406, "learning_rate": 4.345121846335593e-07, "loss": 0.0437, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 0.1364462822675705, "learning_rate": 4.335517341669676e-07, "loss": 0.0626, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 0.07311676442623138, "learning_rate": 4.3259229826092655e-07, "loss": 0.0428, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.10137993842363358, "learning_rate": 4.316338771286005e-07, "loss": 0.0532, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 0.16909313201904297, "learning_rate": 4.3067647098293033e-07, "loss": 0.067, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 0.09596981853246689, "learning_rate": 4.29720080036628e-07, "loss": 0.0523, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 0.1304568350315094, "learning_rate": 4.2876470450218254e-07, "loss": 0.0605, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.1045260801911354, "learning_rate": 4.278103445918569e-07, "loss": 0.0618, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.08028755336999893, "learning_rate": 4.268570005176892e-07, "loss": 0.0429, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 0.058845918625593185, "learning_rate": 4.259046724914878e-07, "loss": 0.0443, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 0.1481093317270279, "learning_rate": 4.2495336072484015e-07, "loss": 0.0673, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.07363555580377579, "learning_rate": 4.240030654291061e-07, "loss": 0.0553, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.07431205362081528, "learning_rate": 4.2305378681541833e-07, "loss": 0.0478, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.0909002497792244, "learning_rate": 4.221055250946865e-07, "loss": 0.0588, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 0.1339554637670517, "learning_rate": 4.21158280477591e-07, "loss": 0.0656, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 0.07195152342319489, "learning_rate": 4.202120531745896e-07, "loss": 0.0413, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 0.11039416491985321, "learning_rate": 4.192668433959113e-07, "loss": 0.0541, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 0.08664210885763168, "learning_rate": 4.183226513515598e-07, "loss": 0.058, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.11151698976755142, "learning_rate": 4.173794772513151e-07, "loss": 0.0547, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 0.08558917045593262, "learning_rate": 4.1643732130472737e-07, "loss": 0.0434, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 0.1031121015548706, "learning_rate": 4.1549618372112135e-07, "loss": 0.0529, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 0.12751606106758118, "learning_rate": 4.1455606470959755e-07, "loss": 0.0662, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.08001881092786789, "learning_rate": 4.1361696447902944e-07, "loss": 0.0428, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 0.08831873536109924, "learning_rate": 4.1267888323806294e-07, "loss": 0.05, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 0.10257959365844727, "learning_rate": 4.117418211951174e-07, "loss": 0.0645, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.07637500762939453, "learning_rate": 4.1080577855838746e-07, "loss": 0.0471, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 0.08632996678352356, "learning_rate": 4.098707555358411e-07, "loss": 0.0604, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 0.16021229326725006, "learning_rate": 4.0893675233521777e-07, "loss": 0.0631, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 0.11864162981510162, "learning_rate": 4.080037691640321e-07, "loss": 0.0533, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.10746530443429947, "learning_rate": 4.070718062295731e-07, "loss": 0.0568, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.05723006650805473, "learning_rate": 4.0614086373890026e-07, "loss": 0.0439, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 0.11883792281150818, "learning_rate": 4.05210941898847e-07, "loss": 0.0439, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.14757122099399567, "learning_rate": 4.042820409160214e-07, "loss": 0.0594, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.10053994506597519, "learning_rate": 4.033541609968056e-07, "loss": 0.0515, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 0.07799181342124939, "learning_rate": 4.0242730234735184e-07, "loss": 0.0562, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.08757543563842773, "learning_rate": 4.01501465173586e-07, "loss": 0.0517, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.09100954979658127, "learning_rate": 4.005766496812097e-07, "loss": 0.0506, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.10198578983545303, "learning_rate": 3.9965285607569573e-07, "loss": 0.0491, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 0.09498221427202225, "learning_rate": 3.987300845622882e-07, "loss": 0.0584, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 0.07501886785030365, "learning_rate": 3.978083353460083e-07, "loss": 0.0502, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.05770398676395416, "learning_rate": 3.96887608631647e-07, "loss": 0.0449, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.08389277011156082, "learning_rate": 3.959679046237663e-07, "loss": 0.0439, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.08421018719673157, "learning_rate": 3.950492235267062e-07, "loss": 0.0538, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 0.06780523806810379, "learning_rate": 3.9413156554457655e-07, "loss": 0.0467, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 0.08501182496547699, "learning_rate": 3.9321493088125774e-07, "loss": 0.0565, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.05934395268559456, "learning_rate": 3.9229931974040844e-07, "loss": 0.0467, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 0.05361861363053322, "learning_rate": 3.9138473232545326e-07, "loss": 0.0453, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 0.05515589937567711, "learning_rate": 3.9047116883959513e-07, "loss": 0.0439, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.07259302586317062, "learning_rate": 3.895586294858045e-07, "loss": 0.0547, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.05898294970393181, "learning_rate": 3.886471144668291e-07, "loss": 0.0488, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 0.10286977887153625, "learning_rate": 3.8773662398518596e-07, "loss": 0.0565, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.07231969386339188, "learning_rate": 3.8682715824316594e-07, "loss": 0.041, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.06791740655899048, "learning_rate": 3.8591871744282973e-07, "loss": 0.0479, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 0.07032502442598343, "learning_rate": 3.85011301786013e-07, "loss": 0.0514, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.11037711799144745, "learning_rate": 3.841049114743239e-07, "loss": 0.0553, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 0.13689248263835907, "learning_rate": 3.8319954670914094e-07, "loss": 0.0606, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 0.08168849349021912, "learning_rate": 3.8229520769161474e-07, "loss": 0.0508, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.0785612091422081, "learning_rate": 3.813918946226691e-07, "loss": 0.0516, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 0.0810251235961914, "learning_rate": 3.804896077030007e-07, "loss": 0.0576, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.11881854385137558, "learning_rate": 3.7958834713307524e-07, "loss": 0.0652, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.07539723068475723, "learning_rate": 3.786881131131348e-07, "loss": 0.0567, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.08026216924190521, "learning_rate": 3.7778890584318773e-07, "loss": 0.0442, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.10397721081972122, "learning_rate": 3.7689072552301973e-07, "loss": 0.0549, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 0.14259792864322662, "learning_rate": 3.759935723521846e-07, "loss": 0.057, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.10428915172815323, "learning_rate": 3.7509744653001e-07, "loss": 0.0446, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 0.0786292627453804, "learning_rate": 3.742023482555951e-07, "loss": 0.0467, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 0.09532216936349869, "learning_rate": 3.7330827772780967e-07, "loss": 0.0447, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.12486188113689423, "learning_rate": 3.7241523514529476e-07, "loss": 0.0501, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.09727918356657028, "learning_rate": 3.715232207064651e-07, "loss": 0.0613, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.0786789208650589, "learning_rate": 3.7063223460950705e-07, "loss": 0.0542, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.0850759744644165, "learning_rate": 3.697422770523751e-07, "loss": 0.0563, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 0.09762068837881088, "learning_rate": 3.688533482327994e-07, "loss": 0.0636, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 0.0876350849866867, "learning_rate": 3.6796544834827865e-07, "loss": 0.0526, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.06856174021959305, "learning_rate": 3.670785775960839e-07, "loss": 0.0491, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.0688227191567421, "learning_rate": 3.66192736173257e-07, "loss": 0.0471, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.06757720559835434, "learning_rate": 3.653079242766139e-07, "loss": 0.0446, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 0.08047280460596085, "learning_rate": 3.6442414210273834e-07, "loss": 0.0619, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 0.08004558831453323, "learning_rate": 3.6354138984798506e-07, "loss": 0.0429, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 0.07109881192445755, "learning_rate": 3.6265966770848314e-07, "loss": 0.0532, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 0.1215103417634964, "learning_rate": 3.6177897588013154e-07, "loss": 0.0537, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 0.12110777944326401, "learning_rate": 3.608993145585987e-07, "loss": 0.0522, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.0872085690498352, "learning_rate": 3.600206839393261e-07, "loss": 0.0431, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 0.09538152068853378, "learning_rate": 3.591430842175242e-07, "loss": 0.0511, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.07088501006364822, "learning_rate": 3.5826651558817703e-07, "loss": 0.0448, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.09567641466856003, "learning_rate": 3.5739097824603665e-07, "loss": 0.0461, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.12322816252708435, "learning_rate": 3.5651647238562904e-07, "loss": 0.0541, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.08598637580871582, "learning_rate": 3.5564299820124883e-07, "loss": 0.049, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 0.12022336572408676, "learning_rate": 3.547705558869624e-07, "loss": 0.053, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.08423098921775818, "learning_rate": 3.5389914563660475e-07, "loss": 0.0452, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.07862421125173569, "learning_rate": 3.530287676437849e-07, "loss": 0.0591, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.062062766402959824, "learning_rate": 3.5215942210188204e-07, "loss": 0.0418, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.06150529906153679, "learning_rate": 3.512911092040422e-07, "loss": 0.041, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 0.091456837952137, "learning_rate": 3.5042382914318716e-07, "loss": 0.04, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.0802859291434288, "learning_rate": 3.495575821120045e-07, "loss": 0.062, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 0.096346415579319, "learning_rate": 3.4869236830295695e-07, "loss": 0.0573, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.05474208667874336, "learning_rate": 3.478281879082729e-07, "loss": 0.044, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 0.12058186531066895, "learning_rate": 3.469650411199543e-07, "loss": 0.0511, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.07489988207817078, "learning_rate": 3.4610292812977454e-07, "loss": 0.053, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.07366960495710373, "learning_rate": 3.452418491292731e-07, "loss": 0.0523, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 0.09951493889093399, "learning_rate": 3.4438180430976243e-07, "loss": 0.0653, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 0.15010881423950195, "learning_rate": 3.4352279386232535e-07, "loss": 0.0678, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.05002034455537796, "learning_rate": 3.426648179778147e-07, "loss": 0.0512, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.09041357785463333, "learning_rate": 3.4180787684685246e-07, "loss": 0.0574, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 0.05670549347996712, "learning_rate": 3.409519706598324e-07, "loss": 0.0465, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.06984314322471619, "learning_rate": 3.400970996069164e-07, "loss": 0.0556, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.05426517501473427, "learning_rate": 3.392432638780363e-07, "loss": 0.0532, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.0868714451789856, "learning_rate": 3.383904636628965e-07, "loss": 0.0437, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 0.1002948135137558, "learning_rate": 3.3753869915096936e-07, "loss": 0.065, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.05707748234272003, "learning_rate": 3.3668797053149907e-07, "loss": 0.0496, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.11481553316116333, "learning_rate": 3.3583827799349486e-07, "loss": 0.0568, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 0.08759943395853043, "learning_rate": 3.3498962172574033e-07, "loss": 0.0508, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 0.09301585704088211, "learning_rate": 3.3414200191678903e-07, "loss": 0.0405, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 0.08661138266324997, "learning_rate": 3.332954187549603e-07, "loss": 0.0673, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.0854056105017662, "learning_rate": 3.3244987242834816e-07, "loss": 0.0559, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.062359366565942764, "learning_rate": 3.3160536312481174e-07, "loss": 0.0492, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.0940035954117775, "learning_rate": 3.3076189103198265e-07, "loss": 0.0589, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 0.11299636960029602, "learning_rate": 3.299194563372604e-07, "loss": 0.0557, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 0.08005334436893463, "learning_rate": 3.290780592278148e-07, "loss": 0.0594, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.04568152874708176, "learning_rate": 3.2823769989058674e-07, "loss": 0.0419, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.07336095720529556, "learning_rate": 3.2739837851228306e-07, "loss": 0.0455, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.08400295674800873, "learning_rate": 3.265600952793818e-07, "loss": 0.0497, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.08017655462026596, "learning_rate": 3.2572285037813123e-07, "loss": 0.0483, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 0.07940591871738434, "learning_rate": 3.248866439945486e-07, "loss": 0.0562, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.07732366770505905, "learning_rate": 3.2405147631441757e-07, "loss": 0.0503, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 0.10724185407161713, "learning_rate": 3.232173475232964e-07, "loss": 0.0491, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.06968395411968231, "learning_rate": 3.2238425780650617e-07, "loss": 0.0462, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.06271837651729584, "learning_rate": 3.215522073491434e-07, "loss": 0.0414, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.13007792830467224, "learning_rate": 3.2072119633606845e-07, "loss": 0.0609, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 0.07660271227359772, "learning_rate": 3.198912249519143e-07, "loss": 0.0586, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.0787324383854866, "learning_rate": 3.190622933810816e-07, "loss": 0.0492, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.08591490983963013, "learning_rate": 3.182344018077399e-07, "loss": 0.0466, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.09465635567903519, "learning_rate": 3.1740755041582694e-07, "loss": 0.0453, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 0.10276572406291962, "learning_rate": 3.1658173938905023e-07, "loss": 0.05, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 0.06978709250688553, "learning_rate": 3.1575696891088804e-07, "loss": 0.0463, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.09979019314050674, "learning_rate": 3.149332391645843e-07, "loss": 0.0548, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 0.07343760132789612, "learning_rate": 3.1411055033315207e-07, "loss": 0.0547, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 0.11299203336238861, "learning_rate": 3.132889025993746e-07, "loss": 0.0538, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 0.07962491363286972, "learning_rate": 3.1246829614580476e-07, "loss": 0.0461, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.11688567698001862, "learning_rate": 3.1164873115476056e-07, "loss": 0.0551, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 0.10997801274061203, "learning_rate": 3.1083020780833137e-07, "loss": 0.0489, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 0.08461752533912659, "learning_rate": 3.1001272628837565e-07, "loss": 0.0556, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 0.07846186310052872, "learning_rate": 3.0919628677651636e-07, "loss": 0.0481, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.08143095672130585, "learning_rate": 3.083808894541496e-07, "loss": 0.0532, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 0.10859303921461105, "learning_rate": 3.075665345024387e-07, "loss": 0.0538, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.08488629013299942, "learning_rate": 3.0675322210231227e-07, "loss": 0.0508, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 0.12052226811647415, "learning_rate": 3.0594095243447254e-07, "loss": 0.0474, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.0735020712018013, "learning_rate": 3.0512972567938505e-07, "loss": 0.0458, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 0.07665964961051941, "learning_rate": 3.043195420172879e-07, "loss": 0.0454, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.09524669498205185, "learning_rate": 3.035104016281831e-07, "loss": 0.0534, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 0.14690406620502472, "learning_rate": 3.027023046918448e-07, "loss": 0.0577, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 0.09380431473255157, "learning_rate": 3.018952513878137e-07, "loss": 0.0581, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.11429816484451294, "learning_rate": 3.010892418953981e-07, "loss": 0.0545, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.09087036550045013, "learning_rate": 3.0028427639367475e-07, "loss": 0.0446, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 0.06711911410093307, "learning_rate": 2.994803550614883e-07, "loss": 0.0467, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 0.08498097956180573, "learning_rate": 2.9867747807745315e-07, "loss": 0.0526, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 0.09874633699655533, "learning_rate": 2.978756456199494e-07, "loss": 0.0608, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.10138387978076935, "learning_rate": 2.970748578671251e-07, "loss": 0.0535, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.07153283804655075, "learning_rate": 2.9627511499689787e-07, "loss": 0.0542, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.08323238044977188, "learning_rate": 2.9547641718695285e-07, "loss": 0.0582, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 0.08412177860736847, "learning_rate": 2.946787646147414e-07, "loss": 0.0593, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 0.07795805484056473, "learning_rate": 2.9388215745748347e-07, "loss": 0.0523, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.0974557027220726, "learning_rate": 2.9308659589216913e-07, "loss": 0.0446, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.08413178473711014, "learning_rate": 2.92292080095552e-07, "loss": 0.0521, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 0.12781204283237457, "learning_rate": 2.9149861024415526e-07, "loss": 0.0599, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 0.06742383539676666, "learning_rate": 2.9070618651427073e-07, "loss": 0.0464, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 0.14298678934574127, "learning_rate": 2.89914809081957e-07, "loss": 0.0553, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.09854573756456375, "learning_rate": 2.8912447812303956e-07, "loss": 0.0509, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 0.0940491184592247, "learning_rate": 2.8833519381311127e-07, "loss": 0.0527, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 0.07329287379980087, "learning_rate": 2.8754695632753406e-07, "loss": 0.0524, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 0.06325235217809677, "learning_rate": 2.867597658414367e-07, "loss": 0.0517, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.088630311191082, "learning_rate": 2.859736225297133e-07, "loss": 0.0653, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 0.08110590279102325, "learning_rate": 2.8518852656702845e-07, "loss": 0.0563, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 0.08655114471912384, "learning_rate": 2.844044781278127e-07, "loss": 0.0588, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 0.11584433913230896, "learning_rate": 2.836214773862617e-07, "loss": 0.058, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 0.10286368429660797, "learning_rate": 2.828395245163418e-07, "loss": 0.0529, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 0.0790686309337616, "learning_rate": 2.820586196917857e-07, "loss": 0.0495, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.0717323049902916, "learning_rate": 2.812787630860919e-07, "loss": 0.0508, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.07453585416078568, "learning_rate": 2.8049995487252625e-07, "loss": 0.0492, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.0741502046585083, "learning_rate": 2.7972219522412194e-07, "loss": 0.0455, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 0.07163621485233307, "learning_rate": 2.789454843136813e-07, "loss": 0.045, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.0733925923705101, "learning_rate": 2.7816982231376964e-07, "loss": 0.0485, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 0.0775909349322319, "learning_rate": 2.773952093967225e-07, "loss": 0.0551, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.07007793337106705, "learning_rate": 2.7662164573464156e-07, "loss": 0.0497, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.07322996854782104, "learning_rate": 2.758491314993944e-07, "loss": 0.0501, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 0.07195471227169037, "learning_rate": 2.750776668626148e-07, "loss": 0.051, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.05026211589574814, "learning_rate": 2.743072519957063e-07, "loss": 0.0455, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.06702813506126404, "learning_rate": 2.73537887069838e-07, "loss": 0.0441, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.059998106211423874, "learning_rate": 2.7276957225594367e-07, "loss": 0.0556, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 0.08423367887735367, "learning_rate": 2.7200230772472526e-07, "loss": 0.0613, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.06219971925020218, "learning_rate": 2.712360936466524e-07, "loss": 0.0427, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 0.09970829635858536, "learning_rate": 2.704709301919606e-07, "loss": 0.0509, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 0.08006435632705688, "learning_rate": 2.6970681753065e-07, "loss": 0.0531, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.0834958627820015, "learning_rate": 2.6894375583249144e-07, "loss": 0.0462, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.06740569323301315, "learning_rate": 2.681817452670171e-07, "loss": 0.0476, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.0792429968714714, "learning_rate": 2.6742078600353106e-07, "loss": 0.0446, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 0.07713749259710312, "learning_rate": 2.6666087821109855e-07, "loss": 0.0471, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 0.07018478214740753, "learning_rate": 2.6590202205855506e-07, "loss": 0.054, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 0.08339058607816696, "learning_rate": 2.6514421771450194e-07, "loss": 0.0592, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.08683352917432785, "learning_rate": 2.6438746534730497e-07, "loss": 0.0518, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 0.09762319922447205, "learning_rate": 2.6363176512509637e-07, "loss": 0.0632, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.10259383171796799, "learning_rate": 2.628771172157768e-07, "loss": 0.0486, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.11990782618522644, "learning_rate": 2.621235217870116e-07, "loss": 0.0513, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 0.08003935217857361, "learning_rate": 2.6137097900623185e-07, "loss": 0.0419, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 0.07321500033140182, "learning_rate": 2.6061948904063663e-07, "loss": 0.0554, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.1355399340391159, "learning_rate": 2.598690520571889e-07, "loss": 0.0617, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.06748682260513306, "learning_rate": 2.591196682226182e-07, "loss": 0.0491, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.10050068795681, "learning_rate": 2.5837133770342135e-07, "loss": 0.0545, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 0.07431424409151077, "learning_rate": 2.5762406066585976e-07, "loss": 0.0561, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.11867383122444153, "learning_rate": 2.568778372759628e-07, "loss": 0.0441, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 0.09373785555362701, "learning_rate": 2.5613266769952183e-07, "loss": 0.0436, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.11327379941940308, "learning_rate": 2.5538855210209823e-07, "loss": 0.0555, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 0.1048625260591507, "learning_rate": 2.54645490649017e-07, "loss": 0.0594, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 0.12880709767341614, "learning_rate": 2.5390348350536887e-07, "loss": 0.062, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.07004128396511078, "learning_rate": 2.531625308360125e-07, "loss": 0.0445, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 0.10140983760356903, "learning_rate": 2.52422632805569e-07, "loss": 0.0594, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.07560853660106659, "learning_rate": 2.5168378957842797e-07, "loss": 0.0505, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.07620658725500107, "learning_rate": 2.5094600131874205e-07, "loss": 0.0509, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.11663605272769928, "learning_rate": 2.5020926819043223e-07, "loss": 0.0482, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.05995216965675354, "learning_rate": 2.4947359035718434e-07, "loss": 0.0589, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.11892921477556229, "learning_rate": 2.487389679824481e-07, "loss": 0.0575, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.07879141718149185, "learning_rate": 2.4800540122943915e-07, "loss": 0.0438, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.08282556384801865, "learning_rate": 2.4727289026114043e-07, "loss": 0.0499, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 0.09790206700563431, "learning_rate": 2.4654143524029896e-07, "loss": 0.0494, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 0.047558996826410294, "learning_rate": 2.4581103632942747e-07, "loss": 0.0367, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.06799650937318802, "learning_rate": 2.4508169369080404e-07, "loss": 0.0511, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.06357064843177795, "learning_rate": 2.443534074864706e-07, "loss": 0.0503, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.09832099825143814, "learning_rate": 2.436261778782378e-07, "loss": 0.0565, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.08751635253429413, "learning_rate": 2.4290000502767755e-07, "loss": 0.0506, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.06747984141111374, "learning_rate": 2.421748890961301e-07, "loss": 0.0473, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 0.07866170257329941, "learning_rate": 2.4145083024469996e-07, "loss": 0.0483, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.08338401466608047, "learning_rate": 2.407278286342557e-07, "loss": 0.059, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 0.0862409844994545, "learning_rate": 2.40005884425431e-07, "loss": 0.0552, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.06357096880674362, "learning_rate": 2.39284997778626e-07, "loss": 0.0524, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.0747937262058258, "learning_rate": 2.3856516885400693e-07, "loss": 0.0466, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.08987937867641449, "learning_rate": 2.3784639781150143e-07, "loss": 0.0488, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 0.06278655678033829, "learning_rate": 2.3712868481080397e-07, "loss": 0.0442, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 0.09331156313419342, "learning_rate": 2.364120300113748e-07, "loss": 0.0485, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 0.08400263637304306, "learning_rate": 2.356964335724382e-07, "loss": 0.0453, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.06798290461301804, "learning_rate": 2.3498189565298312e-07, "loss": 0.0466, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.06724914908409119, "learning_rate": 2.3426841641176311e-07, "loss": 0.0487, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.08950701355934143, "learning_rate": 2.3355599600729916e-07, "loss": 0.0505, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.058173637837171555, "learning_rate": 2.328446345978713e-07, "loss": 0.0457, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.06108802929520607, "learning_rate": 2.3213433234152982e-07, "loss": 0.0537, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.10076146572828293, "learning_rate": 2.3142508939608844e-07, "loss": 0.0524, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 0.07992421090602875, "learning_rate": 2.3071690591912277e-07, "loss": 0.0584, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.0900697410106659, "learning_rate": 2.3000978206797697e-07, "loss": 0.0571, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 0.07248484343290329, "learning_rate": 2.2930371799975593e-07, "loss": 0.046, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.1185319572687149, "learning_rate": 2.2859871387133248e-07, "loss": 0.0599, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 0.1017669290304184, "learning_rate": 2.2789476983934133e-07, "loss": 0.0612, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.09416984766721725, "learning_rate": 2.271918860601835e-07, "loss": 0.0544, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 0.08419349044561386, "learning_rate": 2.2649006269002406e-07, "loss": 0.0425, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.09336104989051819, "learning_rate": 2.257892998847916e-07, "loss": 0.0537, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 0.13087861239910126, "learning_rate": 2.250895978001788e-07, "loss": 0.0535, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 0.08327209204435349, "learning_rate": 2.2439095659164467e-07, "loss": 0.0553, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.0863952562212944, "learning_rate": 2.236933764144117e-07, "loss": 0.0517, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 0.08958934247493744, "learning_rate": 2.2299685742346423e-07, "loss": 0.0588, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.09514883905649185, "learning_rate": 2.223013997735557e-07, "loss": 0.0566, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.0739639401435852, "learning_rate": 2.2160700361919807e-07, "loss": 0.0504, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.06304378062486649, "learning_rate": 2.2091366911467238e-07, "loss": 0.0512, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.13590559363365173, "learning_rate": 2.2022139641402095e-07, "loss": 0.0596, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.05849015712738037, "learning_rate": 2.1953018567105078e-07, "loss": 0.0429, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 0.06581398099660873, "learning_rate": 2.1884003703933343e-07, "loss": 0.0405, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 0.06697867065668106, "learning_rate": 2.181509506722046e-07, "loss": 0.0455, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.14492730796337128, "learning_rate": 2.1746292672276238e-07, "loss": 0.0545, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 0.08384276181459427, "learning_rate": 2.1677596534387114e-07, "loss": 0.0541, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.13591714203357697, "learning_rate": 2.1609006668815768e-07, "loss": 0.0602, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 0.0854470357298851, "learning_rate": 2.1540523090801292e-07, "loss": 0.051, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.08606827259063721, "learning_rate": 2.1472145815559064e-07, "loss": 0.0454, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 0.10825375467538834, "learning_rate": 2.1403874858281104e-07, "loss": 0.0568, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.06952304393053055, "learning_rate": 2.133571023413572e-07, "loss": 0.0578, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 0.08597686886787415, "learning_rate": 2.1267651958267298e-07, "loss": 0.0424, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.086851105093956, "learning_rate": 2.1199700045797077e-07, "loss": 0.0456, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.08398046344518661, "learning_rate": 2.113185451182226e-07, "loss": 0.058, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 0.05932183191180229, "learning_rate": 2.106411537141656e-07, "loss": 0.0444, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 0.10947759449481964, "learning_rate": 2.0996482639630167e-07, "loss": 0.0724, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.0977511927485466, "learning_rate": 2.0928956331489558e-07, "loss": 0.0588, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 0.09164762496948242, "learning_rate": 2.08615364619974e-07, "loss": 0.0526, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 0.09562749415636063, "learning_rate": 2.079422304613299e-07, "loss": 0.0612, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 0.11157821118831635, "learning_rate": 2.0727016098851694e-07, "loss": 0.0522, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.07474181056022644, "learning_rate": 2.0659915635085515e-07, "loss": 0.0525, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 0.09033281356096268, "learning_rate": 2.0592921669742528e-07, "loss": 0.0554, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.07250408828258514, "learning_rate": 2.0526034217707213e-07, "loss": 0.0475, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.05774035304784775, "learning_rate": 2.0459253293840632e-07, "loss": 0.0507, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.09643113613128662, "learning_rate": 2.0392578912979853e-07, "loss": 0.0534, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 0.0694081261754036, "learning_rate": 2.032601108993837e-07, "loss": 0.049, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.08627600222826004, "learning_rate": 2.0259549839506064e-07, "loss": 0.0588, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 0.13907048106193542, "learning_rate": 2.0193195176449188e-07, "loss": 0.0662, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.0687466561794281, "learning_rate": 2.0126947115510165e-07, "loss": 0.0456, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 0.13029563426971436, "learning_rate": 2.006080567140778e-07, "loss": 0.0617, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 0.16549544036388397, "learning_rate": 1.999477085883711e-07, "loss": 0.0565, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 0.07886391878128052, "learning_rate": 1.9928842692469752e-07, "loss": 0.0485, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.09731854498386383, "learning_rate": 1.9863021186953268e-07, "loss": 0.0479, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.06225825846195221, "learning_rate": 1.9797306356911793e-07, "loss": 0.0448, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.08463142812252045, "learning_rate": 1.973169821694565e-07, "loss": 0.0552, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 0.13426846265792847, "learning_rate": 1.9666196781631453e-07, "loss": 0.0623, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 0.13309912383556366, "learning_rate": 1.9600802065522063e-07, "loss": 0.0572, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.08433052152395248, "learning_rate": 1.95355140831468e-07, "loss": 0.0454, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 0.10338418185710907, "learning_rate": 1.947033284901112e-07, "loss": 0.0579, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.0789591521024704, "learning_rate": 1.9405258377596825e-07, "loss": 0.0492, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.08046339452266693, "learning_rate": 1.9340290683361907e-07, "loss": 0.0596, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 0.10993606597185135, "learning_rate": 1.9275429780740763e-07, "loss": 0.0529, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 0.06477538496255875, "learning_rate": 1.921067568414403e-07, "loss": 0.0461, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 0.08691184222698212, "learning_rate": 1.9146028407958483e-07, "loss": 0.0525, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 0.10056848078966141, "learning_rate": 1.9081487966547407e-07, "loss": 0.0517, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 0.0806531012058258, "learning_rate": 1.9017054374250111e-07, "loss": 0.0516, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.10491331666707993, "learning_rate": 1.8952727645382307e-07, "loss": 0.0523, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.08685604482889175, "learning_rate": 1.88885077942359e-07, "loss": 0.0514, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.10217025130987167, "learning_rate": 1.8824394835079086e-07, "loss": 0.0629, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 0.0772676095366478, "learning_rate": 1.8760388782156468e-07, "loss": 0.0567, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.07959074527025223, "learning_rate": 1.8696489649688454e-07, "loss": 0.0466, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.07508888095617294, "learning_rate": 1.8632697451872074e-07, "loss": 0.0613, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.055918511003255844, "learning_rate": 1.8569012202880599e-07, "loss": 0.0457, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 0.07662253081798553, "learning_rate": 1.850543391686327e-07, "loss": 0.0632, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.08423933386802673, "learning_rate": 1.8441962607945786e-07, "loss": 0.0501, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.07923807948827744, "learning_rate": 1.83785982902302e-07, "loss": 0.0447, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.07865436375141144, "learning_rate": 1.8315340977794415e-07, "loss": 0.0534, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.08107592165470123, "learning_rate": 1.825219068469275e-07, "loss": 0.0446, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.06392812728881836, "learning_rate": 1.818914742495581e-07, "loss": 0.0418, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 0.10252520442008972, "learning_rate": 1.8126211212590505e-07, "loss": 0.0488, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.07558147609233856, "learning_rate": 1.8063382061579648e-07, "loss": 0.0425, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.09436511993408203, "learning_rate": 1.8000659985882463e-07, "loss": 0.0586, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.07460029423236847, "learning_rate": 1.7938044999434412e-07, "loss": 0.055, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 0.1122758686542511, "learning_rate": 1.7875537116147146e-07, "loss": 0.06, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 0.07978810369968414, "learning_rate": 1.781313634990839e-07, "loss": 0.0604, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.11493082344532013, "learning_rate": 1.7750842714582272e-07, "loss": 0.0498, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 0.07958534359931946, "learning_rate": 1.7688656224008893e-07, "loss": 0.0554, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.06450383365154266, "learning_rate": 1.762657689200481e-07, "loss": 0.0465, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 0.04763668403029442, "learning_rate": 1.7564604732362545e-07, "loss": 0.0379, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.08433783054351807, "learning_rate": 1.7502739758850863e-07, "loss": 0.0489, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 0.0677054300904274, "learning_rate": 1.7440981985214933e-07, "loss": 0.0496, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.08800935000181198, "learning_rate": 1.7379331425175728e-07, "loss": 0.0631, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 0.1064128577709198, "learning_rate": 1.7317788092430676e-07, "loss": 0.0616, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 0.06810528039932251, "learning_rate": 1.725635200065323e-07, "loss": 0.0474, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.08123359084129333, "learning_rate": 1.7195023163493253e-07, "loss": 0.0513, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 0.09686033427715302, "learning_rate": 1.7133801594576393e-07, "loss": 0.0666, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 0.10468028485774994, "learning_rate": 1.7072687307504887e-07, "loss": 0.0587, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.07290571928024292, "learning_rate": 1.701168031585676e-07, "loss": 0.0506, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 0.07413962483406067, "learning_rate": 1.695078063318656e-07, "loss": 0.0496, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 0.10227977484464645, "learning_rate": 1.6889988273024627e-07, "loss": 0.0517, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.07297626882791519, "learning_rate": 1.682930324887766e-07, "loss": 0.0515, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 0.07604748010635376, "learning_rate": 1.6768725574228706e-07, "loss": 0.0527, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.07805367559194565, "learning_rate": 1.6708255262536443e-07, "loss": 0.0374, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 0.08852995932102203, "learning_rate": 1.6647892327236125e-07, "loss": 0.0442, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.050979144871234894, "learning_rate": 1.658763678173908e-07, "loss": 0.0436, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.07122956216335297, "learning_rate": 1.6527488639432543e-07, "loss": 0.0596, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 0.06778468191623688, "learning_rate": 1.6467447913680268e-07, "loss": 0.0508, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 0.07052353024482727, "learning_rate": 1.6407514617821752e-07, "loss": 0.0462, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 0.1042063981294632, "learning_rate": 1.6347688765172953e-07, "loss": 0.0527, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 0.11151877045631409, "learning_rate": 1.6287970369025686e-07, "loss": 0.0595, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 0.14694450795650482, "learning_rate": 1.6228359442648112e-07, "loss": 0.0522, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.06062628701329231, "learning_rate": 1.616885599928436e-07, "loss": 0.0477, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.06887681782245636, "learning_rate": 1.6109460052154802e-07, "loss": 0.0551, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 0.0988774225115776, "learning_rate": 1.6050171614455712e-07, "loss": 0.0511, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 0.08879891782999039, "learning_rate": 1.5990990699359777e-07, "loss": 0.0592, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.07006113976240158, "learning_rate": 1.593191732001559e-07, "loss": 0.0522, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.1053687110543251, "learning_rate": 1.5872951489547926e-07, "loss": 0.0539, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.07675307989120483, "learning_rate": 1.5814093221057647e-07, "loss": 0.0468, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.06410157680511475, "learning_rate": 1.575534252762162e-07, "loss": 0.0532, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.1389368623495102, "learning_rate": 1.5696699422293072e-07, "loss": 0.0554, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.06250306963920593, "learning_rate": 1.5638163918101024e-07, "loss": 0.0474, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 0.10790180414915085, "learning_rate": 1.5579736028050797e-07, "loss": 0.0548, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 0.07217289507389069, "learning_rate": 1.5521415765123783e-07, "loss": 0.0479, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.10262615233659744, "learning_rate": 1.546320314227734e-07, "loss": 0.0511, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.06430312991142273, "learning_rate": 1.5405098172444954e-07, "loss": 0.0497, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 0.10088631510734558, "learning_rate": 1.5347100868536246e-07, "loss": 0.0573, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 0.10206782817840576, "learning_rate": 1.5289211243436964e-07, "loss": 0.0566, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.12147868424654007, "learning_rate": 1.5231429310008817e-07, "loss": 0.0537, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.08588667213916779, "learning_rate": 1.5173755081089536e-07, "loss": 0.0484, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.07894281297922134, "learning_rate": 1.511618856949315e-07, "loss": 0.0541, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 0.0790940374135971, "learning_rate": 1.5058729788009597e-07, "loss": 0.0616, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 0.06496177613735199, "learning_rate": 1.5001378749404883e-07, "loss": 0.0433, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 0.1703849583864212, "learning_rate": 1.4944135466421095e-07, "loss": 0.065, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.07847282290458679, "learning_rate": 1.4886999951776448e-07, "loss": 0.0462, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 0.1173173040151596, "learning_rate": 1.4829972218165013e-07, "loss": 0.0552, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.08588768541812897, "learning_rate": 1.477305227825715e-07, "loss": 0.0508, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.07801977545022964, "learning_rate": 1.471624014469919e-07, "loss": 0.0517, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 0.12048875540494919, "learning_rate": 1.4659535830113368e-07, "loss": 0.059, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 0.06570553034543991, "learning_rate": 1.4602939347098278e-07, "loss": 0.0477, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 0.07641483098268509, "learning_rate": 1.454645070822819e-07, "loss": 0.0542, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 0.09702589362859726, "learning_rate": 1.449006992605373e-07, "loss": 0.0476, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.09443528205156326, "learning_rate": 1.443379701310127e-07, "loss": 0.0581, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 0.07618983089923859, "learning_rate": 1.4377631981873474e-07, "loss": 0.0431, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.11298150569200516, "learning_rate": 1.432157484484892e-07, "loss": 0.0589, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 0.07476388663053513, "learning_rate": 1.4265625614482247e-07, "loss": 0.0524, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 0.14582912623882294, "learning_rate": 1.4209784303203965e-07, "loss": 0.0594, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.12335220724344254, "learning_rate": 1.415405092342087e-07, "loss": 0.0559, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.1479060798883438, "learning_rate": 1.4098425487515665e-07, "loss": 0.0636, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 0.13759654760360718, "learning_rate": 1.4042908007846912e-07, "loss": 0.0629, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 0.07447419315576553, "learning_rate": 1.3987498496749463e-07, "loss": 0.043, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.07201021164655685, "learning_rate": 1.3932196966533972e-07, "loss": 0.0478, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 0.10323453694581985, "learning_rate": 1.3877003429487224e-07, "loss": 0.0548, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.07468461245298386, "learning_rate": 1.3821917897871905e-07, "loss": 0.0477, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.10559988021850586, "learning_rate": 1.3766940383926785e-07, "loss": 0.0585, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.08134801685810089, "learning_rate": 1.3712070899866704e-07, "loss": 0.0437, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.06107497587800026, "learning_rate": 1.3657309457882294e-07, "loss": 0.051, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.09857072681188583, "learning_rate": 1.3602656070140275e-07, "loss": 0.0542, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.09654295444488525, "learning_rate": 1.3548110748783426e-07, "loss": 0.0462, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.0889766737818718, "learning_rate": 1.349367350593056e-07, "loss": 0.0505, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 0.06681947410106659, "learning_rate": 1.3439344353676276e-07, "loss": 0.0438, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.06722033768892288, "learning_rate": 1.3385123304091306e-07, "loss": 0.0407, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 0.0797632485628128, "learning_rate": 1.3331010369222298e-07, "loss": 0.0584, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 0.08864803612232208, "learning_rate": 1.3277005561092016e-07, "loss": 0.0449, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.12932871282100677, "learning_rate": 1.3223108891698976e-07, "loss": 0.0466, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 0.08300182968378067, "learning_rate": 1.316932037301788e-07, "loss": 0.0527, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 0.06619201600551605, "learning_rate": 1.3115640016999222e-07, "loss": 0.0496, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.07188136130571365, "learning_rate": 1.3062067835569625e-07, "loss": 0.0499, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.0832078754901886, "learning_rate": 1.3008603840631516e-07, "loss": 0.0518, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 0.10716896504163742, "learning_rate": 1.2955248044063452e-07, "loss": 0.0425, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 0.11106447130441666, "learning_rate": 1.2902000457719886e-07, "loss": 0.0547, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.08910094946622849, "learning_rate": 1.2848861093431143e-07, "loss": 0.0504, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.061350900679826736, "learning_rate": 1.2795829963003604e-07, "loss": 0.0505, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 0.0696420893073082, "learning_rate": 1.274290707821968e-07, "loss": 0.045, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 0.13210010528564453, "learning_rate": 1.269009245083741e-07, "loss": 0.0676, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 0.0692482441663742, "learning_rate": 1.2637386092591187e-07, "loss": 0.0499, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.07373761385679245, "learning_rate": 1.258478801519114e-07, "loss": 0.0538, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.09684666991233826, "learning_rate": 1.2532298230323258e-07, "loss": 0.058, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 0.13008978962898254, "learning_rate": 1.2479916749649657e-07, "loss": 0.0653, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 0.11756580322980881, "learning_rate": 1.2427643584808246e-07, "loss": 0.0551, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 0.1337878704071045, "learning_rate": 1.2375478747413017e-07, "loss": 0.0623, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.1097920760512352, "learning_rate": 1.2323422249053696e-07, "loss": 0.0532, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.07038409262895584, "learning_rate": 1.2271474101296144e-07, "loss": 0.0539, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 0.08753958344459534, "learning_rate": 1.2219634315681962e-07, "loss": 0.0478, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.062095995992422104, "learning_rate": 1.2167902903728879e-07, "loss": 0.0428, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.09129584580659866, "learning_rate": 1.211627987693037e-07, "loss": 0.0438, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.07175928354263306, "learning_rate": 1.206476524675587e-07, "loss": 0.046, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.07220089435577393, "learning_rate": 1.2013359024650785e-07, "loss": 0.0536, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.08528576046228409, "learning_rate": 1.196206122203647e-07, "loss": 0.051, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.10924102365970612, "learning_rate": 1.1910871850309979e-07, "loss": 0.056, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 0.0851510763168335, "learning_rate": 1.1859790920844494e-07, "loss": 0.0419, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 0.1423521190881729, "learning_rate": 1.1808818444989046e-07, "loss": 0.0588, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 0.13911668956279755, "learning_rate": 1.1757954434068574e-07, "loss": 0.0642, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.132608100771904, "learning_rate": 1.1707198899383875e-07, "loss": 0.0523, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.07759202271699905, "learning_rate": 1.1656551852211595e-07, "loss": 0.0403, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 0.0604473352432251, "learning_rate": 1.1606013303804508e-07, "loss": 0.044, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 0.10291823744773865, "learning_rate": 1.1555583265390968e-07, "loss": 0.0624, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.0961032435297966, "learning_rate": 1.1505261748175512e-07, "loss": 0.0534, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.1354345679283142, "learning_rate": 1.1455048763338361e-07, "loss": 0.0526, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.07238467037677765, "learning_rate": 1.1404944322035705e-07, "loss": 0.0452, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 0.10145197063684464, "learning_rate": 1.1354948435399582e-07, "loss": 0.0538, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.09477795660495758, "learning_rate": 1.130506111453794e-07, "loss": 0.0558, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.04511824622750282, "learning_rate": 1.1255282370534748e-07, "loss": 0.0418, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.06757242977619171, "learning_rate": 1.1205612214449434e-07, "loss": 0.0507, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 0.06402464210987091, "learning_rate": 1.1156050657317785e-07, "loss": 0.0421, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.06170593947172165, "learning_rate": 1.1106597710151157e-07, "loss": 0.0498, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 0.05344809591770172, "learning_rate": 1.1057253383936928e-07, "loss": 0.0444, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 0.06602682918310165, "learning_rate": 1.1008017689638162e-07, "loss": 0.052, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 0.10043424367904663, "learning_rate": 1.0958890638194108e-07, "loss": 0.055, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 0.10525436699390411, "learning_rate": 1.0909872240519481e-07, "loss": 0.0574, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.08237273246049881, "learning_rate": 1.0860962507505124e-07, "loss": 0.0501, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.06734702736139297, "learning_rate": 1.0812161450017678e-07, "loss": 0.0512, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 0.06594491750001907, "learning_rate": 1.0763469078899635e-07, "loss": 0.057, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.09361253678798676, "learning_rate": 1.0714885404969288e-07, "loss": 0.0515, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.07178681343793869, "learning_rate": 1.0666410439020836e-07, "loss": 0.0606, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 0.07468605041503906, "learning_rate": 1.0618044191824273e-07, "loss": 0.046, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.08297307789325714, "learning_rate": 1.056978667412556e-07, "loss": 0.0439, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 0.08877279609441757, "learning_rate": 1.0521637896646286e-07, "loss": 0.0537, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.07295197993516922, "learning_rate": 1.0473597870084174e-07, "loss": 0.0459, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.0706581324338913, "learning_rate": 1.0425666605112516e-07, "loss": 0.0521, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.11242197453975677, "learning_rate": 1.0377844112380575e-07, "loss": 0.0493, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 0.09365258365869522, "learning_rate": 1.0330130402513406e-07, "loss": 0.0503, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 0.10427149385213852, "learning_rate": 1.028252548611186e-07, "loss": 0.0625, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.12370271980762482, "learning_rate": 1.0235029373752758e-07, "loss": 0.0545, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.06838879734277725, "learning_rate": 1.0187642075988602e-07, "loss": 0.047, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.10621242970228195, "learning_rate": 1.0140363603347747e-07, "loss": 0.0498, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.07447123527526855, "learning_rate": 1.0093193966334403e-07, "loss": 0.0501, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.07148078829050064, "learning_rate": 1.0046133175428685e-07, "loss": 0.0444, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.11958823353052139, "learning_rate": 9.999181241086231e-08, "loss": 0.0512, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 0.07765078544616699, "learning_rate": 9.952338173738862e-08, "loss": 0.0499, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 0.13521647453308105, "learning_rate": 9.905603983793921e-08, "loss": 0.0579, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.06884226202964783, "learning_rate": 9.858978681634823e-08, "loss": 0.0537, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.13261966407299042, "learning_rate": 9.81246227762045e-08, "loss": 0.0664, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.07294401526451111, "learning_rate": 9.76605478208581e-08, "loss": 0.0536, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.06705954670906067, "learning_rate": 9.719756205341658e-08, "loss": 0.0522, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 0.15135414898395538, "learning_rate": 9.673566557674263e-08, "loss": 0.0475, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.09060429781675339, "learning_rate": 9.627485849346085e-08, "loss": 0.048, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 0.08648567646741867, "learning_rate": 9.581514090595212e-08, "loss": 0.0494, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.060149502009153366, "learning_rate": 9.535651291635362e-08, "loss": 0.0463, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.08899927884340286, "learning_rate": 9.489897462656383e-08, "loss": 0.0613, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 0.10699412971735, "learning_rate": 9.44425261382359e-08, "loss": 0.0477, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.08758970350027084, "learning_rate": 9.39871675527837e-08, "loss": 0.0511, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.07742782682180405, "learning_rate": 9.353289897137574e-08, "loss": 0.0459, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 0.14175939559936523, "learning_rate": 9.30797204949413e-08, "loss": 0.0577, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.09255857020616531, "learning_rate": 9.262763222416649e-08, "loss": 0.0602, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.06530500948429108, "learning_rate": 9.217663425949486e-08, "loss": 0.042, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.0822092741727829, "learning_rate": 9.172672670112681e-08, "loss": 0.0512, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.07126203179359436, "learning_rate": 9.127790964902239e-08, "loss": 0.0572, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.06680954992771149, "learning_rate": 9.083018320289849e-08, "loss": 0.0453, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.07219379395246506, "learning_rate": 9.038354746222999e-08, "loss": 0.0466, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.05353913456201553, "learning_rate": 8.993800252624863e-08, "loss": 0.0506, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 0.05904737487435341, "learning_rate": 8.94935484939441e-08, "loss": 0.0441, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 0.0761125236749649, "learning_rate": 8.905018546406519e-08, "loss": 0.0456, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.09023743122816086, "learning_rate": 8.860791353511532e-08, "loss": 0.0482, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.06936309486627579, "learning_rate": 8.816673280535815e-08, "loss": 0.0422, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.0879853144288063, "learning_rate": 8.772664337281412e-08, "loss": 0.0502, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 0.07060280442237854, "learning_rate": 8.728764533526112e-08, "loss": 0.0498, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 0.09690579771995544, "learning_rate": 8.684973879023395e-08, "loss": 0.0523, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.04685661941766739, "learning_rate": 8.641292383502531e-08, "loss": 0.0429, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 0.08339470624923706, "learning_rate": 8.597720056668646e-08, "loss": 0.0503, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.09696447104215622, "learning_rate": 8.55425690820244e-08, "loss": 0.0569, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 0.07479692250490189, "learning_rate": 8.510902947760469e-08, "loss": 0.0545, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.07923243194818497, "learning_rate": 8.467658184974914e-08, "loss": 0.0448, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.09397555142641068, "learning_rate": 8.424522629453924e-08, "loss": 0.0561, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 0.10449344664812088, "learning_rate": 8.381496290781055e-08, "loss": 0.0716, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 0.08740657567977905, "learning_rate": 8.338579178515882e-08, "loss": 0.0491, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.09976191818714142, "learning_rate": 8.295771302193723e-08, "loss": 0.0484, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.0861743912100792, "learning_rate": 8.253072671325246e-08, "loss": 0.0628, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 0.07920471578836441, "learning_rate": 8.210483295397309e-08, "loss": 0.0571, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 0.07385145127773285, "learning_rate": 8.168003183872175e-08, "loss": 0.0532, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.05677540972828865, "learning_rate": 8.125632346188073e-08, "loss": 0.0449, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.06909409165382385, "learning_rate": 8.083370791758804e-08, "loss": 0.0422, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 0.06575310975313187, "learning_rate": 8.04121852997386e-08, "loss": 0.0515, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.07874368876218796, "learning_rate": 7.999175570198526e-08, "loss": 0.0485, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 0.07004068046808243, "learning_rate": 7.957241921773828e-08, "loss": 0.047, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.07177966833114624, "learning_rate": 7.915417594016428e-08, "loss": 0.0468, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 0.10493630915880203, "learning_rate": 7.873702596218836e-08, "loss": 0.0663, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.06913945078849792, "learning_rate": 7.83209693764908e-08, "loss": 0.05, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 0.14981722831726074, "learning_rate": 7.790600627550937e-08, "loss": 0.071, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.08191618323326111, "learning_rate": 7.749213675143974e-08, "loss": 0.0499, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 0.10213267058134079, "learning_rate": 7.707936089623558e-08, "loss": 0.0568, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 0.08225990831851959, "learning_rate": 7.666767880160464e-08, "loss": 0.0582, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.06682934612035751, "learning_rate": 7.625709055901375e-08, "loss": 0.046, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.06693501025438309, "learning_rate": 7.584759625968663e-08, "loss": 0.0547, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 0.10236406326293945, "learning_rate": 7.543919599460325e-08, "loss": 0.0512, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.07215318083763123, "learning_rate": 7.503188985450105e-08, "loss": 0.0505, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.09358347207307816, "learning_rate": 7.462567792987374e-08, "loss": 0.0539, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 0.09367306530475616, "learning_rate": 7.422056031097302e-08, "loss": 0.0553, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.16989992558956146, "learning_rate": 7.381653708780578e-08, "loss": 0.0712, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.06703224033117294, "learning_rate": 7.341360835013745e-08, "loss": 0.0419, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.0997566357254982, "learning_rate": 7.301177418748973e-08, "loss": 0.0475, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.08461220562458038, "learning_rate": 7.261103468914066e-08, "loss": 0.045, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.07425161451101303, "learning_rate": 7.221138994412569e-08, "loss": 0.0429, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.1026737242937088, "learning_rate": 7.181284004123601e-08, "loss": 0.0688, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.0686807706952095, "learning_rate": 7.14153850690208e-08, "loss": 0.0492, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.08117609471082687, "learning_rate": 7.101902511578606e-08, "loss": 0.0552, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.06585362553596497, "learning_rate": 7.062376026959305e-08, "loss": 0.0504, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.09278598427772522, "learning_rate": 7.022959061826151e-08, "loss": 0.058, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.0812373235821724, "learning_rate": 6.983651624936527e-08, "loss": 0.0575, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 0.08493942767381668, "learning_rate": 6.944453725023836e-08, "loss": 0.046, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.08784142881631851, "learning_rate": 6.905365370796891e-08, "loss": 0.0527, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 0.11565005779266357, "learning_rate": 6.866386570940132e-08, "loss": 0.052, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.07761122286319733, "learning_rate": 6.827517334113965e-08, "loss": 0.0422, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.07417572289705276, "learning_rate": 6.788757668954038e-08, "loss": 0.0483, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.07260286062955856, "learning_rate": 6.750107584071964e-08, "loss": 0.0548, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 0.07336454838514328, "learning_rate": 6.711567088054927e-08, "loss": 0.0549, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 0.0911906436085701, "learning_rate": 6.67313618946569e-08, "loss": 0.0587, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.0704108476638794, "learning_rate": 6.634814896842757e-08, "loss": 0.0541, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 0.10314246267080307, "learning_rate": 6.59660321870026e-08, "loss": 0.052, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 0.07708535343408585, "learning_rate": 6.558501163527964e-08, "loss": 0.045, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 0.09842335432767868, "learning_rate": 6.520508739791153e-08, "loss": 0.0575, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.1264670193195343, "learning_rate": 6.482625955931022e-08, "loss": 0.0502, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 0.07920734584331512, "learning_rate": 6.444852820364222e-08, "loss": 0.0493, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 0.07359155267477036, "learning_rate": 6.407189341483044e-08, "loss": 0.0557, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 0.08368923515081406, "learning_rate": 6.369635527655515e-08, "loss": 0.0523, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.0898207277059555, "learning_rate": 6.332191387225128e-08, "loss": 0.0448, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.08014209568500519, "learning_rate": 6.294856928511284e-08, "loss": 0.0518, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 0.07108653336763382, "learning_rate": 6.257632159808679e-08, "loss": 0.0487, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.0883844792842865, "learning_rate": 6.220517089387867e-08, "loss": 0.0506, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.14988954365253448, "learning_rate": 6.183511725495028e-08, "loss": 0.0604, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 0.09736978262662888, "learning_rate": 6.146616076351864e-08, "loss": 0.0669, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 0.1631171852350235, "learning_rate": 6.109830150155705e-08, "loss": 0.0563, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 0.06741008907556534, "learning_rate": 6.07315395507957e-08, "loss": 0.0567, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 0.10119082033634186, "learning_rate": 6.036587499272161e-08, "loss": 0.0568, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.04960627853870392, "learning_rate": 6.000130790857595e-08, "loss": 0.0428, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.07301028817892075, "learning_rate": 5.963783837935722e-08, "loss": 0.0561, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.07596822828054428, "learning_rate": 5.927546648582083e-08, "loss": 0.0495, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.1027379184961319, "learning_rate": 5.8914192308476835e-08, "loss": 0.0491, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 0.07417672872543335, "learning_rate": 5.855401592759269e-08, "loss": 0.0517, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 0.08295443654060364, "learning_rate": 5.8194937423191043e-08, "loss": 0.0581, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.0753847286105156, "learning_rate": 5.783695687505087e-08, "loss": 0.0476, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.07058098912239075, "learning_rate": 5.7480074362707415e-08, "loss": 0.0472, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.11486651748418808, "learning_rate": 5.712428996545172e-08, "loss": 0.0516, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 0.09683867543935776, "learning_rate": 5.6769603762331096e-08, "loss": 0.0611, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 0.13258108496665955, "learning_rate": 5.641601583214862e-08, "loss": 0.0631, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.07585493475198746, "learning_rate": 5.606352625346368e-08, "loss": 0.0471, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 0.060074109584093094, "learning_rate": 5.571213510459084e-08, "loss": 0.0436, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.10453218221664429, "learning_rate": 5.53618424636021e-08, "loss": 0.0578, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.1090179830789566, "learning_rate": 5.501264840832299e-08, "loss": 0.047, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 0.07328250259160995, "learning_rate": 5.466455301633811e-08, "loss": 0.0487, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 0.06515620648860931, "learning_rate": 5.431755636498559e-08, "loss": 0.0498, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.08316532522439957, "learning_rate": 5.3971658531360436e-08, "loss": 0.0577, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 0.1128188893198967, "learning_rate": 5.362685959231284e-08, "loss": 0.064, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.08411706984043121, "learning_rate": 5.3283159624448745e-08, "loss": 0.0434, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 0.07898686081171036, "learning_rate": 5.294055870413206e-08, "loss": 0.049, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 0.12479493767023087, "learning_rate": 5.2599056907479685e-08, "loss": 0.06, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.09081611037254333, "learning_rate": 5.2258654310365366e-08, "loss": 0.0531, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.07500766962766647, "learning_rate": 5.1919350988419716e-08, "loss": 0.0506, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 0.28209200501441956, "learning_rate": 5.1581147017027434e-08, "loss": 0.0803, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.08766099810600281, "learning_rate": 5.124404247133008e-08, "loss": 0.0537, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.06935804337263107, "learning_rate": 5.090803742622441e-08, "loss": 0.0538, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.09062276780605316, "learning_rate": 5.057313195636293e-08, "loss": 0.0493, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 0.0981854498386383, "learning_rate": 5.0239326136154454e-08, "loss": 0.0502, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 0.07921098172664642, "learning_rate": 4.990662003976243e-08, "loss": 0.055, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 0.09928672760725021, "learning_rate": 4.957501374110718e-08, "loss": 0.0641, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.07283915579319, "learning_rate": 4.924450731386365e-08, "loss": 0.0544, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.13693352043628693, "learning_rate": 4.8915100831463116e-08, "loss": 0.0723, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 0.08733914792537689, "learning_rate": 4.858679436709201e-08, "loss": 0.0477, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.0666692703962326, "learning_rate": 4.825958799369201e-08, "loss": 0.0589, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 0.07749021798372269, "learning_rate": 4.7933481783961624e-08, "loss": 0.0629, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.0704946517944336, "learning_rate": 4.760847581035399e-08, "loss": 0.0442, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.09010842442512512, "learning_rate": 4.728457014507859e-08, "loss": 0.0502, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.08482711017131805, "learning_rate": 4.69617648600984e-08, "loss": 0.0596, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.07167661935091019, "learning_rate": 4.664006002713495e-08, "loss": 0.0484, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 0.07708411663770676, "learning_rate": 4.631945571766272e-08, "loss": 0.0493, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.05501934140920639, "learning_rate": 4.5999952002912516e-08, "loss": 0.0476, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.07489326596260071, "learning_rate": 4.5681548953872555e-08, "loss": 0.0495, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.07551907002925873, "learning_rate": 4.536424664128236e-08, "loss": 0.0533, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.1114705502986908, "learning_rate": 4.504804513564054e-08, "loss": 0.0616, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.06115157902240753, "learning_rate": 4.473294450719923e-08, "loss": 0.0416, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 0.08632116764783859, "learning_rate": 4.441894482596743e-08, "loss": 0.0577, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.08050049841403961, "learning_rate": 4.410604616170822e-08, "loss": 0.0573, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.10203662514686584, "learning_rate": 4.379424858394043e-08, "loss": 0.0574, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 0.07742881774902344, "learning_rate": 4.348355216193867e-08, "loss": 0.049, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 0.11373913288116455, "learning_rate": 4.3173956964732145e-08, "loss": 0.0594, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.093289315700531, "learning_rate": 4.286546306110639e-08, "loss": 0.0599, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 0.08271685987710953, "learning_rate": 4.2558070519601594e-08, "loss": 0.0571, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 0.10659763962030411, "learning_rate": 4.2251779408513104e-08, "loss": 0.0598, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 0.10141122341156006, "learning_rate": 4.19465897958915e-08, "loss": 0.0556, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 0.08526841551065445, "learning_rate": 4.164250174954365e-08, "loss": 0.0506, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.08562302589416504, "learning_rate": 4.133951533703107e-08, "loss": 0.0503, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.06665626168251038, "learning_rate": 4.1037630625669345e-08, "loss": 0.0485, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.06546662747859955, "learning_rate": 4.07368476825315e-08, "loss": 0.0504, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 0.0772920697927475, "learning_rate": 4.043716657444407e-08, "loss": 0.0423, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.09976423531770706, "learning_rate": 4.0138587367989365e-08, "loss": 0.0592, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.10475775599479675, "learning_rate": 3.984111012950487e-08, "loss": 0.0513, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 0.08909031003713608, "learning_rate": 3.9544734925083264e-08, "loss": 0.0553, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 0.08886663615703583, "learning_rate": 3.924946182057299e-08, "loss": 0.0525, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 0.08031714707612991, "learning_rate": 3.8955290881576566e-08, "loss": 0.0569, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 0.09365442395210266, "learning_rate": 3.866222217345117e-08, "loss": 0.0548, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 0.10137569904327393, "learning_rate": 3.837025576131137e-08, "loss": 0.0581, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 0.09526725113391876, "learning_rate": 3.807939171002473e-08, "loss": 0.0577, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 0.10035735368728638, "learning_rate": 3.778963008421455e-08, "loss": 0.0637, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.06988129019737244, "learning_rate": 3.750097094825933e-08, "loss": 0.0433, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.10214858502149582, "learning_rate": 3.721341436629222e-08, "loss": 0.0627, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.08438212424516678, "learning_rate": 3.6926960402202674e-08, "loss": 0.0606, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 0.17614029347896576, "learning_rate": 3.66416091196331e-08, "loss": 0.0621, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 0.10974134504795074, "learning_rate": 3.63573605819828e-08, "loss": 0.0637, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 0.1195061057806015, "learning_rate": 3.6074214852405695e-08, "loss": 0.0544, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 0.10917459428310394, "learning_rate": 3.5792171993809244e-08, "loss": 0.0609, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.07777038961648941, "learning_rate": 3.55112320688572e-08, "loss": 0.0425, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 0.0764622911810875, "learning_rate": 3.523139513996798e-08, "loss": 0.0514, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.058958955109119415, "learning_rate": 3.495266126931574e-08, "loss": 0.0418, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.13206757605075836, "learning_rate": 3.467503051882815e-08, "loss": 0.0623, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 0.07993407547473907, "learning_rate": 3.4398502950188096e-08, "loss": 0.0521, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 0.08450262993574142, "learning_rate": 3.4123078624834214e-08, "loss": 0.0595, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.10229264199733734, "learning_rate": 3.384875760395978e-08, "loss": 0.0476, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 0.09031584113836288, "learning_rate": 3.3575539948511595e-08, "loss": 0.0586, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 0.06589008122682571, "learning_rate": 3.330342571919332e-08, "loss": 0.0443, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 0.15181085467338562, "learning_rate": 3.30324149764627e-08, "loss": 0.0503, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 0.14081963896751404, "learning_rate": 3.2762507780531026e-08, "loss": 0.0513, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 0.08242476731538773, "learning_rate": 3.249370419136644e-08, "loss": 0.0473, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 0.081233449280262, "learning_rate": 3.2226004268690605e-08, "loss": 0.0484, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 0.09259588271379471, "learning_rate": 3.195940807198039e-08, "loss": 0.0524, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 0.09358435124158859, "learning_rate": 3.169391566046731e-08, "loss": 0.0551, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.06388888508081436, "learning_rate": 3.142952709313807e-08, "loss": 0.049, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.08543863892555237, "learning_rate": 3.116624242873345e-08, "loss": 0.0563, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.09950996190309525, "learning_rate": 3.090406172574889e-08, "loss": 0.0649, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 0.06033911183476448, "learning_rate": 3.064298504243612e-08, "loss": 0.0492, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.06948255002498627, "learning_rate": 3.0383012436799306e-08, "loss": 0.0402, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.05597880855202675, "learning_rate": 3.0124143966599464e-08, "loss": 0.0437, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 0.05325533449649811, "learning_rate": 2.9866379689350024e-08, "loss": 0.0415, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 0.1412399560213089, "learning_rate": 2.9609719662320735e-08, "loss": 0.0604, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.08039755374193192, "learning_rate": 2.9354163942535983e-08, "loss": 0.0561, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 0.09861835837364197, "learning_rate": 2.90997125867748e-08, "loss": 0.0524, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 0.09829536825418472, "learning_rate": 2.8846365651569175e-08, "loss": 0.0457, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 0.10230960696935654, "learning_rate": 2.8594123193207978e-08, "loss": 0.0559, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.07096341252326965, "learning_rate": 2.83429852677336e-08, "loss": 0.047, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.14713943004608154, "learning_rate": 2.809295193094308e-08, "loss": 0.0633, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 0.08882877230644226, "learning_rate": 2.7844023238388084e-08, "loss": 0.0579, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 0.08340595662593842, "learning_rate": 2.759619924537438e-08, "loss": 0.0487, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.0769120380282402, "learning_rate": 2.7349480006964023e-08, "loss": 0.0503, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.06586603820323944, "learning_rate": 2.7103865577970955e-08, "loss": 0.0474, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 0.061009861528873444, "learning_rate": 2.6859356012965964e-08, "loss": 0.0493, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.07962577044963837, "learning_rate": 2.661595136627393e-08, "loss": 0.0641, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.08090372383594513, "learning_rate": 2.63736516919727e-08, "loss": 0.0552, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.06559876352548599, "learning_rate": 2.6132457043896442e-08, "loss": 0.049, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.06881856918334961, "learning_rate": 2.589236747563284e-08, "loss": 0.0534, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.0778534933924675, "learning_rate": 2.5653383040524228e-08, "loss": 0.0487, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.09098907560110092, "learning_rate": 2.5415503791667573e-08, "loss": 0.0544, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.08129148185253143, "learning_rate": 2.5178729781915046e-08, "loss": 0.052, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 0.11943303048610687, "learning_rate": 2.4943061063870678e-08, "loss": 0.0659, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.09038890153169632, "learning_rate": 2.4708497689896472e-08, "loss": 0.0479, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.091140016913414, "learning_rate": 2.4475039712105742e-08, "loss": 0.0529, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.06775415688753128, "learning_rate": 2.4242687182368106e-08, "loss": 0.0422, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.0743945986032486, "learning_rate": 2.401144015230672e-08, "loss": 0.0526, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 0.09043169766664505, "learning_rate": 2.3781298673299924e-08, "loss": 0.0587, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.09259321540594101, "learning_rate": 2.3552262796479042e-08, "loss": 0.0535, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.07126559317111969, "learning_rate": 2.33243325727317e-08, "loss": 0.043, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.06184215471148491, "learning_rate": 2.3097508052697948e-08, "loss": 0.0618, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.07436727732419968, "learning_rate": 2.2871789286773582e-08, "loss": 0.0596, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 0.07063525170087814, "learning_rate": 2.264717632510738e-08, "loss": 0.051, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 0.15829135477542877, "learning_rate": 2.2423669217604415e-08, "loss": 0.0565, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 0.1872093677520752, "learning_rate": 2.220126801392164e-08, "loss": 0.0522, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 0.0928979143500328, "learning_rate": 2.1979972763471747e-08, "loss": 0.0489, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 0.09788254648447037, "learning_rate": 2.1759783515422074e-08, "loss": 0.0653, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 0.11401228606700897, "learning_rate": 2.1540700318693487e-08, "loss": 0.0615, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 0.07926298677921295, "learning_rate": 2.132272322196094e-08, "loss": 0.0505, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 0.08843808621168137, "learning_rate": 2.110585227365458e-08, "loss": 0.0563, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.08990781754255295, "learning_rate": 2.0890087521957536e-08, "loss": 0.0553, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.06201495602726936, "learning_rate": 2.0675429014807568e-08, "loss": 0.0552, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 0.10870019346475601, "learning_rate": 2.0461876799898196e-08, "loss": 0.0533, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 0.07760557532310486, "learning_rate": 2.024943092467424e-08, "loss": 0.0562, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.07589113712310791, "learning_rate": 2.0038091436337392e-08, "loss": 0.0472, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 0.07261012494564056, "learning_rate": 1.9827858381842312e-08, "loss": 0.0538, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.09026515483856201, "learning_rate": 1.961873180789775e-08, "loss": 0.0537, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.07861927151679993, "learning_rate": 1.9410711760967092e-08, "loss": 0.0523, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.10943084955215454, "learning_rate": 1.920379828726726e-08, "loss": 0.046, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 0.08310520648956299, "learning_rate": 1.8997991432769812e-08, "loss": 0.0439, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.05734114348888397, "learning_rate": 1.8793291243200396e-08, "loss": 0.0443, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.07926487922668457, "learning_rate": 1.8589697764039295e-08, "loss": 0.0509, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 0.10625598579645157, "learning_rate": 1.8387211040519216e-08, "loss": 0.0533, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.07656702399253845, "learning_rate": 1.818583111762917e-08, "loss": 0.0436, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 0.10935503244400024, "learning_rate": 1.7985558040110594e-08, "loss": 0.0536, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 0.09998450428247452, "learning_rate": 1.778639185245956e-08, "loss": 0.054, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.0772157683968544, "learning_rate": 1.758833259892623e-08, "loss": 0.0517, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 0.08108818531036377, "learning_rate": 1.7391380323515395e-08, "loss": 0.044, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 0.11649703979492188, "learning_rate": 1.7195535069984838e-08, "loss": 0.0563, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 0.09160694479942322, "learning_rate": 1.700079688184697e-08, "loss": 0.0582, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 0.08796050399541855, "learning_rate": 1.6807165802368297e-08, "loss": 0.0547, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.07569562643766403, "learning_rate": 1.661464187456885e-08, "loss": 0.0522, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 0.06622113287448883, "learning_rate": 1.6423225141223854e-08, "loss": 0.0606, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.07078331708908081, "learning_rate": 1.623291564486096e-08, "loss": 0.0509, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 0.08795704692602158, "learning_rate": 1.604371342776301e-08, "loss": 0.0531, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.06845948100090027, "learning_rate": 1.585561853196582e-08, "loss": 0.0473, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.06782171130180359, "learning_rate": 1.5668630999260968e-08, "loss": 0.0533, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.07438940554857254, "learning_rate": 1.5482750871191333e-08, "loss": 0.0443, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 0.10107584297657013, "learning_rate": 1.529797818905665e-08, "loss": 0.0505, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.07721274346113205, "learning_rate": 1.5114312993908532e-08, "loss": 0.054, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.21987049281597137, "learning_rate": 1.4931755326552667e-08, "loss": 0.0597, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.10174092650413513, "learning_rate": 1.4750305227549943e-08, "loss": 0.0591, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 0.11721985042095184, "learning_rate": 1.4569962737214228e-08, "loss": 0.0547, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 0.0982690081000328, "learning_rate": 1.4390727895613465e-08, "loss": 0.0616, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 0.09297265857458115, "learning_rate": 1.4212600742569694e-08, "loss": 0.0552, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.07351981103420258, "learning_rate": 1.4035581317658476e-08, "loss": 0.0542, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 0.12969592213630676, "learning_rate": 1.3859669660209463e-08, "loss": 0.0638, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 0.11538294702768326, "learning_rate": 1.368486580930639e-08, "loss": 0.0487, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.0913386344909668, "learning_rate": 1.3511169803786527e-08, "loss": 0.0491, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.09564817696809769, "learning_rate": 1.333858168224178e-08, "loss": 0.0468, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.07453183084726334, "learning_rate": 1.3167101483016476e-08, "loss": 0.0495, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.06717690080404282, "learning_rate": 1.2996729244209583e-08, "loss": 0.0492, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.11783505231142044, "learning_rate": 1.282746500367471e-08, "loss": 0.0546, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 0.17441698908805847, "learning_rate": 1.2659308799017889e-08, "loss": 0.0596, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.08761091530323029, "learning_rate": 1.2492260667599232e-08, "loss": 0.0559, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.07352183014154434, "learning_rate": 1.2326320646534051e-08, "loss": 0.0482, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 0.10848425328731537, "learning_rate": 1.2161488772690077e-08, "loss": 0.0566, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 0.09219516068696976, "learning_rate": 1.1997765082688573e-08, "loss": 0.0511, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.07706821709871292, "learning_rate": 1.1835149612905438e-08, "loss": 0.0429, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.07105432450771332, "learning_rate": 1.1673642399470663e-08, "loss": 0.0559, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.1719379723072052, "learning_rate": 1.1513243478267211e-08, "loss": 0.0606, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 0.07213807851076126, "learning_rate": 1.135395288493213e-08, "loss": 0.0463, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.08453879505395889, "learning_rate": 1.1195770654855443e-08, "loss": 0.0515, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.10595114529132843, "learning_rate": 1.1038696823182372e-08, "loss": 0.0625, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.13391318917274475, "learning_rate": 1.088273142481111e-08, "loss": 0.0521, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 0.06278140097856522, "learning_rate": 1.0727874494393386e-08, "loss": 0.0472, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 0.088347427546978, "learning_rate": 1.0574126066335011e-08, "loss": 0.049, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.06804744899272919, "learning_rate": 1.0421486174795326e-08, "loss": 0.0452, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.07875372469425201, "learning_rate": 1.0269954853687202e-08, "loss": 0.0513, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.06858901679515839, "learning_rate": 1.01195321366776e-08, "loss": 0.0453, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 0.06532973796129227, "learning_rate": 9.970218057187009e-09, "loss": 0.0462, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.07384221255779266, "learning_rate": 9.82201264839e-09, "loss": 0.0475, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.08115720748901367, "learning_rate": 9.67491594321357e-09, "loss": 0.054, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.0647360160946846, "learning_rate": 9.528927974339908e-09, "loss": 0.0508, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 0.09564556181430817, "learning_rate": 9.38404877420418e-09, "loss": 0.0578, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.07793880254030228, "learning_rate": 9.240278374995637e-09, "loss": 0.0466, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 0.08798930794000626, "learning_rate": 9.097616808655396e-09, "loss": 0.0518, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.059394098818302155, "learning_rate": 8.95606410688088e-09, "loss": 0.0472, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.07123907655477524, "learning_rate": 8.815620301121375e-09, "loss": 0.0519, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 0.07467346638441086, "learning_rate": 8.676285422580255e-09, "loss": 0.0501, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.059215083718299866, "learning_rate": 8.538059502214979e-09, "loss": 0.0451, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.128154456615448, "learning_rate": 8.400942570735427e-09, "loss": 0.0512, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 0.07031431049108505, "learning_rate": 8.264934658606672e-09, "loss": 0.0432, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.08699752390384674, "learning_rate": 8.13003579604621e-09, "loss": 0.0563, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.07504545897245407, "learning_rate": 7.996246013025067e-09, "loss": 0.0501, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 0.10438846051692963, "learning_rate": 7.863565339268908e-09, "loss": 0.0625, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.07620418071746826, "learning_rate": 7.731993804256378e-09, "loss": 0.0535, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.09781785309314728, "learning_rate": 7.60153143721909e-09, "loss": 0.0487, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 0.10500790178775787, "learning_rate": 7.472178267143304e-09, "loss": 0.0537, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 0.07338778674602509, "learning_rate": 7.343934322767699e-09, "loss": 0.0555, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.1376037895679474, "learning_rate": 7.216799632586147e-09, "loss": 0.0565, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.08292361348867416, "learning_rate": 7.0907742248443875e-09, "loss": 0.0387, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.061147063970565796, "learning_rate": 6.965858127542247e-09, "loss": 0.0438, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.13830150663852692, "learning_rate": 6.842051368433633e-09, "loss": 0.0534, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 0.1219475120306015, "learning_rate": 6.719353975025989e-09, "loss": 0.0589, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.0873057097196579, "learning_rate": 6.5977659745786185e-09, "loss": 0.059, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 0.0821199044585228, "learning_rate": 6.477287394107134e-09, "loss": 0.0568, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 0.08009091764688492, "learning_rate": 6.357918260377349e-09, "loss": 0.0625, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.12757599353790283, "learning_rate": 6.239658599911935e-09, "loss": 0.0555, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.0810101255774498, "learning_rate": 6.122508438984875e-09, "loss": 0.0569, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 0.08187715709209442, "learning_rate": 6.0064678036242385e-09, "loss": 0.0489, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.09458910673856735, "learning_rate": 5.891536719611624e-09, "loss": 0.0548, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 0.08290885388851166, "learning_rate": 5.77771521248216e-09, "loss": 0.0628, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 0.11105003952980042, "learning_rate": 5.665003307524508e-09, "loss": 0.0579, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.059004608541727066, "learning_rate": 5.5534010297803034e-09, "loss": 0.0434, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 0.11043426394462585, "learning_rate": 5.4429084040452665e-09, "loss": 0.0559, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.11203313618898392, "learning_rate": 5.333525454868094e-09, "loss": 0.0539, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.09217708557844162, "learning_rate": 5.225252206551568e-09, "loss": 0.0552, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 0.08599650114774704, "learning_rate": 5.118088683151445e-09, "loss": 0.0528, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 0.06860077381134033, "learning_rate": 5.01203490847646e-09, "loss": 0.0507, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.15736259520053864, "learning_rate": 4.907090906090539e-09, "loss": 0.0587, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.08663325756788254, "learning_rate": 4.803256699308923e-09, "loss": 0.0467, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 0.07291809469461441, "learning_rate": 4.700532311200934e-09, "loss": 0.052, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 0.07088744640350342, "learning_rate": 4.598917764590538e-09, "loss": 0.0489, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.06659021228551865, "learning_rate": 4.498413082053566e-09, "loss": 0.0477, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 0.11633474379777908, "learning_rate": 4.399018285919376e-09, "loss": 0.0601, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 0.0737287700176239, "learning_rate": 4.300733398272528e-09, "loss": 0.0499, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 0.10280559211969376, "learning_rate": 4.203558440948885e-09, "loss": 0.0598, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.0866975337266922, "learning_rate": 4.1074934355384015e-09, "loss": 0.0516, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.08097781240940094, "learning_rate": 4.0125384033845586e-09, "loss": 0.0557, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 0.07303857803344727, "learning_rate": 3.91869336558437e-09, "loss": 0.0532, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 0.07725530117750168, "learning_rate": 3.8259583429883785e-09, "loss": 0.0454, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.07709745317697525, "learning_rate": 3.734333356199548e-09, "loss": 0.0455, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.11385258287191391, "learning_rate": 3.643818425575485e-09, "loss": 0.0503, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.10703368484973907, "learning_rate": 3.5544135712262116e-09, "loss": 0.0629, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 0.09140671044588089, "learning_rate": 3.4661188130147295e-09, "loss": 0.0626, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 0.12641583383083344, "learning_rate": 3.378934170559789e-09, "loss": 0.0508, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 0.07451049983501434, "learning_rate": 3.292859663230341e-09, "loss": 0.0516, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 0.06212792545557022, "learning_rate": 3.207895310150533e-09, "loss": 0.054, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 0.09634598344564438, "learning_rate": 3.1240411301980413e-09, "loss": 0.048, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.06601904332637787, "learning_rate": 3.0412971420029636e-09, "loss": 0.0457, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 0.08894544094800949, "learning_rate": 2.959663363949483e-09, "loss": 0.0489, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.07103992253541946, "learning_rate": 2.8791398141736484e-09, "loss": 0.0496, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 0.1126934364438057, "learning_rate": 2.799726510567258e-09, "loss": 0.0663, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.12113260477781296, "learning_rate": 2.721423470773421e-09, "loss": 0.0543, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.06590321660041809, "learning_rate": 2.644230712189888e-09, "loss": 0.044, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.058968961238861084, "learning_rate": 2.5681482519662736e-09, "loss": 0.0445, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.06238102540373802, "learning_rate": 2.493176107006834e-09, "loss": 0.0441, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.08029675483703613, "learning_rate": 2.4193142939687996e-09, "loss": 0.0536, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 0.10721352696418762, "learning_rate": 2.3465628292623776e-09, "loss": 0.0578, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 0.10353364050388336, "learning_rate": 2.2749217290513048e-09, "loss": 0.0684, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.067519411444664, "learning_rate": 2.2043910092522935e-09, "loss": 0.0491, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 0.09430772066116333, "learning_rate": 2.134970685536697e-09, "loss": 0.0598, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 0.15106579661369324, "learning_rate": 2.066660773326623e-09, "loss": 0.0689, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.10077466815710068, "learning_rate": 1.999461287800486e-09, "loss": 0.0609, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 0.07072947919368744, "learning_rate": 1.9333722438874548e-09, "loss": 0.0436, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.08832772821187973, "learning_rate": 1.868393656271339e-09, "loss": 0.0494, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 0.0925067663192749, "learning_rate": 1.8045255393889238e-09, "loss": 0.0632, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.13175901770591736, "learning_rate": 1.7417679074299698e-09, "loss": 0.0554, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.0845513790845871, "learning_rate": 1.680120774338323e-09, "loss": 0.0457, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.09530067443847656, "learning_rate": 1.6195841538096947e-09, "loss": 0.0503, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.06832657009363174, "learning_rate": 1.5601580592949916e-09, "loss": 0.044, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.10193976759910583, "learning_rate": 1.5018425039969864e-09, "loss": 0.0506, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.09893766790628433, "learning_rate": 1.4446375008714264e-09, "loss": 0.0552, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.127162903547287, "learning_rate": 1.3885430626287e-09, "loss": 0.0561, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.09177439659833908, "learning_rate": 1.3335592017316156e-09, "loss": 0.0501, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.06440817564725876, "learning_rate": 1.2796859303959575e-09, "loss": 0.0394, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.08388829976320267, "learning_rate": 1.2269232605915948e-09, "loss": 0.046, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 0.12423191219568253, "learning_rate": 1.1752712040408176e-09, "loss": 0.0529, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.06284959614276886, "learning_rate": 1.124729772219446e-09, "loss": 0.0502, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 0.09640179574489594, "learning_rate": 1.075298976356831e-09, "loss": 0.0528, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 0.10060610622167587, "learning_rate": 1.026978827435854e-09, "loss": 0.0494, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.07376518100500107, "learning_rate": 9.797693361912607e-10, "loss": 0.0471, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 0.05512242391705513, "learning_rate": 9.33670513112439e-10, "loss": 0.0449, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 0.09264736622571945, "learning_rate": 8.886823684417512e-10, "loss": 0.0528, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 0.08211065083742142, "learning_rate": 8.448049121739798e-10, "loss": 0.0591, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 0.07713301479816437, "learning_rate": 8.020381540579936e-10, "loss": 0.0529, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 0.10248799622058868, "learning_rate": 7.603821035950809e-10, "loss": 0.0572, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 0.06745610386133194, "learning_rate": 7.198367700411712e-10, "loss": 0.0501, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 0.06733036786317825, "learning_rate": 6.80402162403504e-10, "loss": 0.0517, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 0.07670993357896805, "learning_rate": 6.420782894445144e-10, "loss": 0.0478, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.08047724515199661, "learning_rate": 6.048651596785027e-10, "loss": 0.0533, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.17828401923179626, "learning_rate": 5.687627813727448e-10, "loss": 0.0509, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.08800439536571503, "learning_rate": 5.337711625497122e-10, "loss": 0.0497, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.09017349779605865, "learning_rate": 4.998903109826314e-10, "loss": 0.0516, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 0.12122610211372375, "learning_rate": 4.671202341993697e-10, "loss": 0.0598, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 0.09740585088729858, "learning_rate": 4.354609394802145e-10, "loss": 0.0603, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.09021280705928802, "learning_rate": 4.0491243386009403e-10, "loss": 0.0521, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.08076806366443634, "learning_rate": 3.7547472412580167e-10, "loss": 0.0589, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.05950106307864189, "learning_rate": 3.471478168176612e-10, "loss": 0.0501, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.08257219940423965, "learning_rate": 3.19931718229527e-10, "loss": 0.047, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 0.0745423212647438, "learning_rate": 2.9382643440767354e-10, "loss": 0.0561, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 0.10522297769784927, "learning_rate": 2.6883197115190606e-10, "loss": 0.0498, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.10430429130792618, "learning_rate": 2.4494833401667027e-10, "loss": 0.0553, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.12367524951696396, "learning_rate": 2.2217552830716693e-10, "loss": 0.0519, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 0.13055390119552612, "learning_rate": 2.0051355908323743e-10, "loss": 0.0528, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 0.15446977317333221, "learning_rate": 1.7996243115769863e-10, "loss": 0.0632, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.1020069420337677, "learning_rate": 1.605221490968978e-10, "loss": 0.0544, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 0.07887081056833267, "learning_rate": 1.421927172201576e-10, "loss": 0.0479, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 0.11172398924827576, "learning_rate": 1.24974139599221e-10, "loss": 0.0572, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 0.10103298723697662, "learning_rate": 1.0886642005991654e-10, "loss": 0.0597, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.06449566036462784, "learning_rate": 9.386956218104815e-11, "loss": 0.0472, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 0.07458359748125076, "learning_rate": 7.998356929439511e-11, "loss": 0.0479, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.1037537008523941, "learning_rate": 6.72084444852672e-11, "loss": 0.0472, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.06644084304571152, "learning_rate": 5.554419059250471e-11, "loss": 0.0449, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 0.10016543418169022, "learning_rate": 4.499081020681306e-11, "loss": 0.0645, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 0.10833394527435303, "learning_rate": 3.554830567298328e-11, "loss": 0.0566, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.11804244667291641, "learning_rate": 2.7216679089892008e-11, "loss": 0.0651, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.09797792881727219, "learning_rate": 1.9995932307170783e-11, "loss": 0.049, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 0.08211483061313629, "learning_rate": 1.3886066930202113e-11, "loss": 0.0552, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 0.12613800168037415, "learning_rate": 8.88708431623364e-12, "loss": 0.0591, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 0.0747358649969101, "learning_rate": 4.998985576043503e-12, "loss": 0.0434, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 0.09872398525476456, "learning_rate": 2.2217715728301003e-12, "loss": 0.0554, "step": 7176 }, { "epoch": 1.0, "grad_norm": 0.08827116340398788, "learning_rate": 5.554429238774361e-13, "loss": 0.0493, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.326107668090192e+19, "train_loss": 0.06837210558760759, "train_runtime": 61656.6252, "train_samples_per_second": 29.797, "train_steps_per_second": 0.116 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.326107668090192e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }