{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7177, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013934369121438027, "grad_norm": 68.35494995117188, "learning_rate": 0.0, "loss": 0.741943359375, "step": 1 }, { "epoch": 0.00027868738242876054, "grad_norm": 84.05387878417969, "learning_rate": 1.953125e-08, "loss": 0.7530517578125, "step": 2 }, { "epoch": 0.0004180310736431408, "grad_norm": 80.63208770751953, "learning_rate": 3.90625e-08, "loss": 0.748779296875, "step": 3 }, { "epoch": 0.0005573747648575211, "grad_norm": 82.2960433959961, "learning_rate": 5.859375000000001e-08, "loss": 0.74957275390625, "step": 4 }, { "epoch": 0.0006967184560719013, "grad_norm": 86.4842300415039, "learning_rate": 7.8125e-08, "loss": 0.7559814453125, "step": 5 }, { "epoch": 0.0008360621472862816, "grad_norm": 80.18133544921875, "learning_rate": 9.765625e-08, "loss": 0.74969482421875, "step": 6 }, { "epoch": 0.0009754058385006619, "grad_norm": 86.44905853271484, "learning_rate": 1.1718750000000002e-07, "loss": 0.75323486328125, "step": 7 }, { "epoch": 0.0011147495297150422, "grad_norm": 86.52738189697266, "learning_rate": 1.3671875000000001e-07, "loss": 0.75494384765625, "step": 8 }, { "epoch": 0.0012540932209294225, "grad_norm": 84.33077239990234, "learning_rate": 1.5625e-07, "loss": 0.752197265625, "step": 9 }, { "epoch": 0.0013934369121438026, "grad_norm": 84.99915313720703, "learning_rate": 1.7578125e-07, "loss": 0.7545166015625, "step": 10 }, { "epoch": 0.001532780603358183, "grad_norm": 83.90425109863281, "learning_rate": 1.953125e-07, "loss": 0.75311279296875, "step": 11 }, { "epoch": 0.0016721242945725633, "grad_norm": 82.61958312988281, "learning_rate": 2.1484375e-07, "loss": 0.7503662109375, "step": 12 }, { "epoch": 0.0018114679857869436, "grad_norm": 83.66773223876953, "learning_rate": 2.3437500000000003e-07, "loss": 0.75018310546875, "step": 13 }, { "epoch": 0.0019508116770013237, "grad_norm": 85.87238311767578, "learning_rate": 2.5390625000000003e-07, "loss": 0.752685546875, "step": 14 }, { "epoch": 0.0020901553682157042, "grad_norm": 81.63400268554688, "learning_rate": 2.7343750000000003e-07, "loss": 0.74896240234375, "step": 15 }, { "epoch": 0.0022294990594300844, "grad_norm": 83.39278411865234, "learning_rate": 2.9296875000000003e-07, "loss": 0.74932861328125, "step": 16 }, { "epoch": 0.0023688427506444645, "grad_norm": 81.18522644042969, "learning_rate": 3.125e-07, "loss": 0.75, "step": 17 }, { "epoch": 0.002508186441858845, "grad_norm": 84.5507583618164, "learning_rate": 3.3203125e-07, "loss": 0.75177001953125, "step": 18 }, { "epoch": 0.002647530133073225, "grad_norm": 87.61405181884766, "learning_rate": 3.515625e-07, "loss": 0.75408935546875, "step": 19 }, { "epoch": 0.0027868738242876052, "grad_norm": 76.9505844116211, "learning_rate": 3.7109375e-07, "loss": 0.74542236328125, "step": 20 }, { "epoch": 0.0029262175155019858, "grad_norm": 81.5386734008789, "learning_rate": 3.90625e-07, "loss": 0.74603271484375, "step": 21 }, { "epoch": 0.003065561206716366, "grad_norm": 83.53208923339844, "learning_rate": 4.1015625e-07, "loss": 0.7440185546875, "step": 22 }, { "epoch": 0.003204904897930746, "grad_norm": 85.16510772705078, "learning_rate": 4.296875e-07, "loss": 0.74395751953125, "step": 23 }, { "epoch": 0.0033442485891451265, "grad_norm": 80.42221069335938, "learning_rate": 4.4921875e-07, "loss": 0.73895263671875, "step": 24 }, { "epoch": 0.0034835922803595066, "grad_norm": 79.36744689941406, "learning_rate": 4.6875000000000006e-07, "loss": 0.7376708984375, "step": 25 }, { "epoch": 0.003622935971573887, "grad_norm": 79.33958435058594, "learning_rate": 4.8828125e-07, "loss": 0.7379150390625, "step": 26 }, { "epoch": 0.0037622796627882673, "grad_norm": 80.06280517578125, "learning_rate": 5.078125000000001e-07, "loss": 0.7359619140625, "step": 27 }, { "epoch": 0.0039016233540026474, "grad_norm": 85.73165893554688, "learning_rate": 5.2734375e-07, "loss": 0.7406005859375, "step": 28 }, { "epoch": 0.0040409670452170275, "grad_norm": 81.08663177490234, "learning_rate": 5.468750000000001e-07, "loss": 0.73516845703125, "step": 29 }, { "epoch": 0.0041803107364314085, "grad_norm": 82.48782348632812, "learning_rate": 5.6640625e-07, "loss": 0.72515869140625, "step": 30 }, { "epoch": 0.004319654427645789, "grad_norm": 81.84295654296875, "learning_rate": 5.859375000000001e-07, "loss": 0.7235107421875, "step": 31 }, { "epoch": 0.004458998118860169, "grad_norm": 84.53002166748047, "learning_rate": 6.0546875e-07, "loss": 0.7294921875, "step": 32 }, { "epoch": 0.004598341810074549, "grad_norm": 81.94404602050781, "learning_rate": 6.25e-07, "loss": 0.704345703125, "step": 33 }, { "epoch": 0.004737685501288929, "grad_norm": 73.15337371826172, "learning_rate": 6.445312500000001e-07, "loss": 0.70013427734375, "step": 34 }, { "epoch": 0.004877029192503309, "grad_norm": 76.82156372070312, "learning_rate": 6.640625e-07, "loss": 0.6981201171875, "step": 35 }, { "epoch": 0.00501637288371769, "grad_norm": 78.95547485351562, "learning_rate": 6.835937500000001e-07, "loss": 0.69671630859375, "step": 36 }, { "epoch": 0.00515571657493207, "grad_norm": 74.64219665527344, "learning_rate": 7.03125e-07, "loss": 0.69329833984375, "step": 37 }, { "epoch": 0.00529506026614645, "grad_norm": 75.75146484375, "learning_rate": 7.226562500000001e-07, "loss": 0.690185546875, "step": 38 }, { "epoch": 0.00543440395736083, "grad_norm": 81.55589294433594, "learning_rate": 7.421875e-07, "loss": 0.6893310546875, "step": 39 }, { "epoch": 0.0055737476485752105, "grad_norm": 79.96965026855469, "learning_rate": 7.617187500000001e-07, "loss": 0.68988037109375, "step": 40 }, { "epoch": 0.005713091339789591, "grad_norm": 71.0796127319336, "learning_rate": 7.8125e-07, "loss": 0.67755126953125, "step": 41 }, { "epoch": 0.0058524350310039715, "grad_norm": 71.3530502319336, "learning_rate": 8.007812500000001e-07, "loss": 0.66119384765625, "step": 42 }, { "epoch": 0.005991778722218352, "grad_norm": 66.5196762084961, "learning_rate": 8.203125e-07, "loss": 0.66119384765625, "step": 43 }, { "epoch": 0.006131122413432732, "grad_norm": 73.02437591552734, "learning_rate": 8.398437500000001e-07, "loss": 0.65582275390625, "step": 44 }, { "epoch": 0.006270466104647112, "grad_norm": 63.1795768737793, "learning_rate": 8.59375e-07, "loss": 0.65325927734375, "step": 45 }, { "epoch": 0.006409809795861492, "grad_norm": 66.8799819946289, "learning_rate": 8.789062500000001e-07, "loss": 0.60980224609375, "step": 46 }, { "epoch": 0.006549153487075873, "grad_norm": 53.9091682434082, "learning_rate": 8.984375e-07, "loss": 0.60491943359375, "step": 47 }, { "epoch": 0.006688497178290253, "grad_norm": 58.41167449951172, "learning_rate": 9.179687500000001e-07, "loss": 0.5887451171875, "step": 48 }, { "epoch": 0.006827840869504633, "grad_norm": 65.39273071289062, "learning_rate": 9.375000000000001e-07, "loss": 0.5853271484375, "step": 49 }, { "epoch": 0.006967184560719013, "grad_norm": 55.16810989379883, "learning_rate": 9.570312500000002e-07, "loss": 0.58526611328125, "step": 50 }, { "epoch": 0.007106528251933393, "grad_norm": 59.427310943603516, "learning_rate": 9.765625e-07, "loss": 0.58319091796875, "step": 51 }, { "epoch": 0.007245871943147774, "grad_norm": 54.97341537475586, "learning_rate": 9.9609375e-07, "loss": 0.57757568359375, "step": 52 }, { "epoch": 0.0073852156343621545, "grad_norm": 60.58080291748047, "learning_rate": 1.0156250000000001e-06, "loss": 0.5634765625, "step": 53 }, { "epoch": 0.007524559325576535, "grad_norm": 54.51409912109375, "learning_rate": 1.0351562500000002e-06, "loss": 0.5738525390625, "step": 54 }, { "epoch": 0.007663903016790915, "grad_norm": 57.841087341308594, "learning_rate": 1.0546875e-06, "loss": 0.5626220703125, "step": 55 }, { "epoch": 0.007803246708005295, "grad_norm": 47.817543029785156, "learning_rate": 1.07421875e-06, "loss": 0.5706787109375, "step": 56 }, { "epoch": 0.007942590399219676, "grad_norm": 48.63961410522461, "learning_rate": 1.0937500000000001e-06, "loss": 0.550537109375, "step": 57 }, { "epoch": 0.008081934090434055, "grad_norm": 54.6886100769043, "learning_rate": 1.1132812500000002e-06, "loss": 0.527252197265625, "step": 58 }, { "epoch": 0.008221277781648436, "grad_norm": 53.075199127197266, "learning_rate": 1.1328125e-06, "loss": 0.51593017578125, "step": 59 }, { "epoch": 0.008360621472862817, "grad_norm": 46.40181350708008, "learning_rate": 1.15234375e-06, "loss": 0.509307861328125, "step": 60 }, { "epoch": 0.008499965164077196, "grad_norm": 42.75079345703125, "learning_rate": 1.1718750000000001e-06, "loss": 0.49481201171875, "step": 61 }, { "epoch": 0.008639308855291577, "grad_norm": 41.0912971496582, "learning_rate": 1.1914062500000002e-06, "loss": 0.470489501953125, "step": 62 }, { "epoch": 0.008778652546505956, "grad_norm": 32.980804443359375, "learning_rate": 1.2109375e-06, "loss": 0.481658935546875, "step": 63 }, { "epoch": 0.008917996237720337, "grad_norm": 36.74175262451172, "learning_rate": 1.23046875e-06, "loss": 0.436431884765625, "step": 64 }, { "epoch": 0.009057339928934717, "grad_norm": 34.02714920043945, "learning_rate": 1.25e-06, "loss": 0.43756103515625, "step": 65 }, { "epoch": 0.009196683620149098, "grad_norm": 37.49550247192383, "learning_rate": 1.2695312500000002e-06, "loss": 0.402618408203125, "step": 66 }, { "epoch": 0.009336027311363479, "grad_norm": 40.40102005004883, "learning_rate": 1.2890625000000002e-06, "loss": 0.389862060546875, "step": 67 }, { "epoch": 0.009475371002577858, "grad_norm": 39.036800384521484, "learning_rate": 1.30859375e-06, "loss": 0.37738037109375, "step": 68 }, { "epoch": 0.009614714693792239, "grad_norm": 28.47911834716797, "learning_rate": 1.328125e-06, "loss": 0.430755615234375, "step": 69 }, { "epoch": 0.009754058385006618, "grad_norm": 35.620765686035156, "learning_rate": 1.3476562500000001e-06, "loss": 0.37652587890625, "step": 70 }, { "epoch": 0.009893402076220999, "grad_norm": 25.777910232543945, "learning_rate": 1.3671875000000002e-06, "loss": 0.422210693359375, "step": 71 }, { "epoch": 0.01003274576743538, "grad_norm": 34.11002731323242, "learning_rate": 1.38671875e-06, "loss": 0.367279052734375, "step": 72 }, { "epoch": 0.01017208945864976, "grad_norm": 20.286418914794922, "learning_rate": 1.40625e-06, "loss": 0.4461669921875, "step": 73 }, { "epoch": 0.01031143314986414, "grad_norm": 32.067298889160156, "learning_rate": 1.4257812500000001e-06, "loss": 0.37225341796875, "step": 74 }, { "epoch": 0.01045077684107852, "grad_norm": 34.308658599853516, "learning_rate": 1.4453125000000002e-06, "loss": 0.341064453125, "step": 75 }, { "epoch": 0.0105901205322929, "grad_norm": 20.289152145385742, "learning_rate": 1.46484375e-06, "loss": 0.41522216796875, "step": 76 }, { "epoch": 0.010729464223507281, "grad_norm": 25.78588104248047, "learning_rate": 1.484375e-06, "loss": 0.36761474609375, "step": 77 }, { "epoch": 0.01086880791472166, "grad_norm": 27.436697006225586, "learning_rate": 1.5039062500000001e-06, "loss": 0.34478759765625, "step": 78 }, { "epoch": 0.011008151605936042, "grad_norm": 23.519006729125977, "learning_rate": 1.5234375000000002e-06, "loss": 0.3582763671875, "step": 79 }, { "epoch": 0.011147495297150421, "grad_norm": 20.55748176574707, "learning_rate": 1.54296875e-06, "loss": 0.36572265625, "step": 80 }, { "epoch": 0.011286838988364802, "grad_norm": 14.371522903442383, "learning_rate": 1.5625e-06, "loss": 0.40362548828125, "step": 81 }, { "epoch": 0.011426182679579183, "grad_norm": 13.883323669433594, "learning_rate": 1.5820312500000001e-06, "loss": 0.39520263671875, "step": 82 }, { "epoch": 0.011565526370793562, "grad_norm": 9.703386306762695, "learning_rate": 1.6015625000000002e-06, "loss": 0.419036865234375, "step": 83 }, { "epoch": 0.011704870062007943, "grad_norm": 17.911293029785156, "learning_rate": 1.6210937500000002e-06, "loss": 0.3396759033203125, "step": 84 }, { "epoch": 0.011844213753222322, "grad_norm": 16.994115829467773, "learning_rate": 1.640625e-06, "loss": 0.337615966796875, "step": 85 }, { "epoch": 0.011983557444436703, "grad_norm": 17.18427848815918, "learning_rate": 1.6601562500000001e-06, "loss": 0.3290557861328125, "step": 86 }, { "epoch": 0.012122901135651083, "grad_norm": 18.99462890625, "learning_rate": 1.6796875000000002e-06, "loss": 0.3018646240234375, "step": 87 }, { "epoch": 0.012262244826865464, "grad_norm": 10.68571662902832, "learning_rate": 1.6992187500000002e-06, "loss": 0.36529541015625, "step": 88 }, { "epoch": 0.012401588518079844, "grad_norm": 14.161504745483398, "learning_rate": 1.71875e-06, "loss": 0.3247528076171875, "step": 89 }, { "epoch": 0.012540932209294224, "grad_norm": 14.623936653137207, "learning_rate": 1.7382812500000001e-06, "loss": 0.30584716796875, "step": 90 }, { "epoch": 0.012680275900508605, "grad_norm": 11.805484771728516, "learning_rate": 1.7578125000000002e-06, "loss": 0.32391357421875, "step": 91 }, { "epoch": 0.012819619591722984, "grad_norm": 15.4630708694458, "learning_rate": 1.7773437500000002e-06, "loss": 0.2793731689453125, "step": 92 }, { "epoch": 0.012958963282937365, "grad_norm": 3.7045085430145264, "learning_rate": 1.796875e-06, "loss": 0.408935546875, "step": 93 }, { "epoch": 0.013098306974151746, "grad_norm": 12.460589408874512, "learning_rate": 1.81640625e-06, "loss": 0.2960357666015625, "step": 94 }, { "epoch": 0.013237650665366125, "grad_norm": 13.185916900634766, "learning_rate": 1.8359375000000002e-06, "loss": 0.2888641357421875, "step": 95 }, { "epoch": 0.013376994356580506, "grad_norm": 14.953057289123535, "learning_rate": 1.8554687500000002e-06, "loss": 0.27215576171875, "step": 96 }, { "epoch": 0.013516338047794885, "grad_norm": 11.212820053100586, "learning_rate": 1.8750000000000003e-06, "loss": 0.3022003173828125, "step": 97 }, { "epoch": 0.013655681739009266, "grad_norm": 11.601651191711426, "learning_rate": 1.89453125e-06, "loss": 0.2960052490234375, "step": 98 }, { "epoch": 0.013795025430223647, "grad_norm": 5.1889777183532715, "learning_rate": 1.9140625000000004e-06, "loss": 0.3502044677734375, "step": 99 }, { "epoch": 0.013934369121438027, "grad_norm": 8.941661834716797, "learning_rate": 1.93359375e-06, "loss": 0.2930755615234375, "step": 100 }, { "epoch": 0.014073712812652408, "grad_norm": 11.271244049072266, "learning_rate": 1.953125e-06, "loss": 0.2547149658203125, "step": 101 }, { "epoch": 0.014213056503866787, "grad_norm": 3.522622585296631, "learning_rate": 1.97265625e-06, "loss": 0.365203857421875, "step": 102 }, { "epoch": 0.014352400195081168, "grad_norm": 7.349270820617676, "learning_rate": 1.9921875e-06, "loss": 0.3033294677734375, "step": 103 }, { "epoch": 0.014491743886295549, "grad_norm": 6.06632661819458, "learning_rate": 2.01171875e-06, "loss": 0.3130340576171875, "step": 104 }, { "epoch": 0.014631087577509928, "grad_norm": 10.517955780029297, "learning_rate": 2.0312500000000002e-06, "loss": 0.2613525390625, "step": 105 }, { "epoch": 0.014770431268724309, "grad_norm": 5.6757307052612305, "learning_rate": 2.0507812500000003e-06, "loss": 0.3197479248046875, "step": 106 }, { "epoch": 0.014909774959938688, "grad_norm": 7.207823276519775, "learning_rate": 2.0703125000000003e-06, "loss": 0.2822418212890625, "step": 107 }, { "epoch": 0.01504911865115307, "grad_norm": 9.90378475189209, "learning_rate": 2.08984375e-06, "loss": 0.2250823974609375, "step": 108 }, { "epoch": 0.01518846234236745, "grad_norm": 5.886512756347656, "learning_rate": 2.109375e-06, "loss": 0.2859649658203125, "step": 109 }, { "epoch": 0.01532780603358183, "grad_norm": 5.922928333282471, "learning_rate": 2.12890625e-06, "loss": 0.282562255859375, "step": 110 }, { "epoch": 0.01546714972479621, "grad_norm": 7.973358631134033, "learning_rate": 2.1484375e-06, "loss": 0.2565460205078125, "step": 111 }, { "epoch": 0.01560649341601059, "grad_norm": 4.881793022155762, "learning_rate": 2.16796875e-06, "loss": 0.2964324951171875, "step": 112 }, { "epoch": 0.01574583710722497, "grad_norm": 4.358090877532959, "learning_rate": 2.1875000000000002e-06, "loss": 0.3502349853515625, "step": 113 }, { "epoch": 0.01588518079843935, "grad_norm": 7.518368244171143, "learning_rate": 2.2070312500000003e-06, "loss": 0.2879180908203125, "step": 114 }, { "epoch": 0.016024524489653733, "grad_norm": 7.775466442108154, "learning_rate": 2.2265625000000003e-06, "loss": 0.250885009765625, "step": 115 }, { "epoch": 0.01616386818086811, "grad_norm": 8.459100723266602, "learning_rate": 2.2460937500000004e-06, "loss": 0.3305206298828125, "step": 116 }, { "epoch": 0.01630321187208249, "grad_norm": 8.193920135498047, "learning_rate": 2.265625e-06, "loss": 0.2454986572265625, "step": 117 }, { "epoch": 0.016442555563296872, "grad_norm": 11.520269393920898, "learning_rate": 2.28515625e-06, "loss": 0.232940673828125, "step": 118 }, { "epoch": 0.016581899254511253, "grad_norm": 13.670186042785645, "learning_rate": 2.3046875e-06, "loss": 0.2822113037109375, "step": 119 }, { "epoch": 0.016721242945725634, "grad_norm": 6.1685791015625, "learning_rate": 2.32421875e-06, "loss": 0.3016510009765625, "step": 120 }, { "epoch": 0.01686058663694001, "grad_norm": 11.714713096618652, "learning_rate": 2.3437500000000002e-06, "loss": 0.3345184326171875, "step": 121 }, { "epoch": 0.016999930328154392, "grad_norm": 9.635237693786621, "learning_rate": 2.3632812500000003e-06, "loss": 0.243072509765625, "step": 122 }, { "epoch": 0.017139274019368773, "grad_norm": 11.123957633972168, "learning_rate": 2.3828125000000003e-06, "loss": 0.2427215576171875, "step": 123 }, { "epoch": 0.017278617710583154, "grad_norm": 11.280086517333984, "learning_rate": 2.4023437500000004e-06, "loss": 0.2865142822265625, "step": 124 }, { "epoch": 0.017417961401797532, "grad_norm": 8.971766471862793, "learning_rate": 2.421875e-06, "loss": 0.22045135498046875, "step": 125 }, { "epoch": 0.017557305093011913, "grad_norm": 7.7800397872924805, "learning_rate": 2.44140625e-06, "loss": 0.2339935302734375, "step": 126 }, { "epoch": 0.017696648784226294, "grad_norm": 15.217044830322266, "learning_rate": 2.4609375e-06, "loss": 0.27880859375, "step": 127 }, { "epoch": 0.017835992475440675, "grad_norm": 10.86860466003418, "learning_rate": 2.48046875e-06, "loss": 0.27020263671875, "step": 128 }, { "epoch": 0.017975336166655056, "grad_norm": 14.109737396240234, "learning_rate": 2.5e-06, "loss": 0.2345733642578125, "step": 129 }, { "epoch": 0.018114679857869433, "grad_norm": 19.318281173706055, "learning_rate": 2.5195312500000003e-06, "loss": 0.20600128173828125, "step": 130 }, { "epoch": 0.018254023549083814, "grad_norm": 15.905678749084473, "learning_rate": 2.5390625000000003e-06, "loss": 0.2763519287109375, "step": 131 }, { "epoch": 0.018393367240298195, "grad_norm": 17.408435821533203, "learning_rate": 2.5585937500000004e-06, "loss": 0.31676483154296875, "step": 132 }, { "epoch": 0.018532710931512576, "grad_norm": 13.256052017211914, "learning_rate": 2.5781250000000004e-06, "loss": 0.189727783203125, "step": 133 }, { "epoch": 0.018672054622726957, "grad_norm": 20.911108016967773, "learning_rate": 2.59765625e-06, "loss": 0.21178436279296875, "step": 134 }, { "epoch": 0.018811398313941335, "grad_norm": 18.483245849609375, "learning_rate": 2.6171875e-06, "loss": 0.2153778076171875, "step": 135 }, { "epoch": 0.018950742005155716, "grad_norm": 6.049556255340576, "learning_rate": 2.63671875e-06, "loss": 0.2985076904296875, "step": 136 }, { "epoch": 0.019090085696370097, "grad_norm": 18.854267120361328, "learning_rate": 2.65625e-06, "loss": 0.24414825439453125, "step": 137 }, { "epoch": 0.019229429387584478, "grad_norm": 19.55357551574707, "learning_rate": 2.6757812500000002e-06, "loss": 0.2570648193359375, "step": 138 }, { "epoch": 0.01936877307879886, "grad_norm": 7.835501670837402, "learning_rate": 2.6953125000000003e-06, "loss": 0.26766204833984375, "step": 139 }, { "epoch": 0.019508116770013236, "grad_norm": 10.696989059448242, "learning_rate": 2.7148437500000003e-06, "loss": 0.20166778564453125, "step": 140 }, { "epoch": 0.019647460461227617, "grad_norm": 19.63593101501465, "learning_rate": 2.7343750000000004e-06, "loss": 0.299468994140625, "step": 141 }, { "epoch": 0.019786804152441998, "grad_norm": 5.826798915863037, "learning_rate": 2.75390625e-06, "loss": 0.23632049560546875, "step": 142 }, { "epoch": 0.01992614784365638, "grad_norm": 13.42160701751709, "learning_rate": 2.7734375e-06, "loss": 0.28849029541015625, "step": 143 }, { "epoch": 0.02006549153487076, "grad_norm": 9.09771728515625, "learning_rate": 2.79296875e-06, "loss": 0.24741363525390625, "step": 144 }, { "epoch": 0.020204835226085138, "grad_norm": 7.207635879516602, "learning_rate": 2.8125e-06, "loss": 0.2219085693359375, "step": 145 }, { "epoch": 0.02034417891729952, "grad_norm": 15.632699966430664, "learning_rate": 2.8320312500000002e-06, "loss": 0.20798492431640625, "step": 146 }, { "epoch": 0.0204835226085139, "grad_norm": 5.595261573791504, "learning_rate": 2.8515625000000003e-06, "loss": 0.25344085693359375, "step": 147 }, { "epoch": 0.02062286629972828, "grad_norm": 13.693769454956055, "learning_rate": 2.8710937500000003e-06, "loss": 0.219146728515625, "step": 148 }, { "epoch": 0.02076220999094266, "grad_norm": 16.934677124023438, "learning_rate": 2.8906250000000004e-06, "loss": 0.19277191162109375, "step": 149 }, { "epoch": 0.02090155368215704, "grad_norm": 7.718424320220947, "learning_rate": 2.9101562500000004e-06, "loss": 0.2061309814453125, "step": 150 }, { "epoch": 0.02104089737337142, "grad_norm": 10.528800010681152, "learning_rate": 2.9296875e-06, "loss": 0.265777587890625, "step": 151 }, { "epoch": 0.0211802410645858, "grad_norm": 11.900863647460938, "learning_rate": 2.94921875e-06, "loss": 0.25640869140625, "step": 152 }, { "epoch": 0.021319584755800182, "grad_norm": 8.182567596435547, "learning_rate": 2.96875e-06, "loss": 0.1594696044921875, "step": 153 }, { "epoch": 0.021458928447014563, "grad_norm": 5.898952484130859, "learning_rate": 2.9882812500000002e-06, "loss": 0.22032928466796875, "step": 154 }, { "epoch": 0.02159827213822894, "grad_norm": 4.803685665130615, "learning_rate": 3.0078125000000003e-06, "loss": 0.2268524169921875, "step": 155 }, { "epoch": 0.02173761582944332, "grad_norm": 5.28781270980835, "learning_rate": 3.0273437500000003e-06, "loss": 0.200714111328125, "step": 156 }, { "epoch": 0.021876959520657702, "grad_norm": 7.769278526306152, "learning_rate": 3.0468750000000004e-06, "loss": 0.20618438720703125, "step": 157 }, { "epoch": 0.022016303211872083, "grad_norm": 8.119647026062012, "learning_rate": 3.0664062500000004e-06, "loss": 0.2869110107421875, "step": 158 }, { "epoch": 0.022155646903086464, "grad_norm": 6.1786208152771, "learning_rate": 3.0859375e-06, "loss": 0.181976318359375, "step": 159 }, { "epoch": 0.022294990594300842, "grad_norm": 7.1285600662231445, "learning_rate": 3.10546875e-06, "loss": 0.25788116455078125, "step": 160 }, { "epoch": 0.022434334285515223, "grad_norm": 7.072371482849121, "learning_rate": 3.125e-06, "loss": 0.2634429931640625, "step": 161 }, { "epoch": 0.022573677976729604, "grad_norm": 5.4215006828308105, "learning_rate": 3.14453125e-06, "loss": 0.16474151611328125, "step": 162 }, { "epoch": 0.022713021667943985, "grad_norm": 9.127823829650879, "learning_rate": 3.1640625000000003e-06, "loss": 0.2030792236328125, "step": 163 }, { "epoch": 0.022852365359158366, "grad_norm": 12.090044975280762, "learning_rate": 3.1835937500000003e-06, "loss": 0.15362548828125, "step": 164 }, { "epoch": 0.022991709050372743, "grad_norm": 10.323309898376465, "learning_rate": 3.2031250000000004e-06, "loss": 0.19815826416015625, "step": 165 }, { "epoch": 0.023131052741587124, "grad_norm": 16.43946647644043, "learning_rate": 3.2226562500000004e-06, "loss": 0.22174072265625, "step": 166 }, { "epoch": 0.023270396432801505, "grad_norm": 16.29372215270996, "learning_rate": 3.2421875000000005e-06, "loss": 0.214202880859375, "step": 167 }, { "epoch": 0.023409740124015886, "grad_norm": 7.276315212249756, "learning_rate": 3.26171875e-06, "loss": 0.1411895751953125, "step": 168 }, { "epoch": 0.023549083815230267, "grad_norm": 18.32369613647461, "learning_rate": 3.28125e-06, "loss": 0.17213058471679688, "step": 169 }, { "epoch": 0.023688427506444645, "grad_norm": 19.348413467407227, "learning_rate": 3.30078125e-06, "loss": 0.1541748046875, "step": 170 }, { "epoch": 0.023827771197659026, "grad_norm": 13.204751014709473, "learning_rate": 3.3203125000000002e-06, "loss": 0.22714996337890625, "step": 171 }, { "epoch": 0.023967114888873407, "grad_norm": 8.605999946594238, "learning_rate": 3.3398437500000003e-06, "loss": 0.14795684814453125, "step": 172 }, { "epoch": 0.024106458580087788, "grad_norm": 10.686951637268066, "learning_rate": 3.3593750000000003e-06, "loss": 0.17398834228515625, "step": 173 }, { "epoch": 0.024245802271302165, "grad_norm": 8.047852516174316, "learning_rate": 3.3789062500000004e-06, "loss": 0.19377899169921875, "step": 174 }, { "epoch": 0.024385145962516546, "grad_norm": 14.08549976348877, "learning_rate": 3.3984375000000004e-06, "loss": 0.2244720458984375, "step": 175 }, { "epoch": 0.024524489653730927, "grad_norm": 18.280128479003906, "learning_rate": 3.41796875e-06, "loss": 0.19191741943359375, "step": 176 }, { "epoch": 0.024663833344945308, "grad_norm": 6.978715419769287, "learning_rate": 3.4375e-06, "loss": 0.14148712158203125, "step": 177 }, { "epoch": 0.02480317703615969, "grad_norm": 18.080406188964844, "learning_rate": 3.45703125e-06, "loss": 0.17270660400390625, "step": 178 }, { "epoch": 0.024942520727374067, "grad_norm": 22.01258087158203, "learning_rate": 3.4765625000000002e-06, "loss": 0.17096710205078125, "step": 179 }, { "epoch": 0.025081864418588447, "grad_norm": 15.491872787475586, "learning_rate": 3.4960937500000003e-06, "loss": 0.12880706787109375, "step": 180 }, { "epoch": 0.02522120810980283, "grad_norm": 3.813991069793701, "learning_rate": 3.5156250000000003e-06, "loss": 0.17453765869140625, "step": 181 }, { "epoch": 0.02536055180101721, "grad_norm": 9.4974365234375, "learning_rate": 3.5351562500000004e-06, "loss": 0.21686553955078125, "step": 182 }, { "epoch": 0.02549989549223159, "grad_norm": 11.350061416625977, "learning_rate": 3.5546875000000004e-06, "loss": 0.26969146728515625, "step": 183 }, { "epoch": 0.025639239183445968, "grad_norm": 6.153392791748047, "learning_rate": 3.5742187500000005e-06, "loss": 0.21497344970703125, "step": 184 }, { "epoch": 0.02577858287466035, "grad_norm": 11.463932991027832, "learning_rate": 3.59375e-06, "loss": 0.17822265625, "step": 185 }, { "epoch": 0.02591792656587473, "grad_norm": 12.559348106384277, "learning_rate": 3.61328125e-06, "loss": 0.20806884765625, "step": 186 }, { "epoch": 0.02605727025708911, "grad_norm": 3.625437021255493, "learning_rate": 3.6328125e-06, "loss": 0.2114105224609375, "step": 187 }, { "epoch": 0.026196613948303492, "grad_norm": 17.932340621948242, "learning_rate": 3.6523437500000003e-06, "loss": 0.21163177490234375, "step": 188 }, { "epoch": 0.02633595763951787, "grad_norm": 13.19356918334961, "learning_rate": 3.6718750000000003e-06, "loss": 0.1602020263671875, "step": 189 }, { "epoch": 0.02647530133073225, "grad_norm": 5.894471168518066, "learning_rate": 3.6914062500000004e-06, "loss": 0.21329498291015625, "step": 190 }, { "epoch": 0.02661464502194663, "grad_norm": 10.870020866394043, "learning_rate": 3.7109375000000004e-06, "loss": 0.15550994873046875, "step": 191 }, { "epoch": 0.026753988713161012, "grad_norm": 14.274733543395996, "learning_rate": 3.7304687500000005e-06, "loss": 0.2457275390625, "step": 192 }, { "epoch": 0.026893332404375393, "grad_norm": 11.851292610168457, "learning_rate": 3.7500000000000005e-06, "loss": 0.2808685302734375, "step": 193 }, { "epoch": 0.02703267609558977, "grad_norm": 13.567766189575195, "learning_rate": 3.76953125e-06, "loss": 0.1654052734375, "step": 194 }, { "epoch": 0.02717201978680415, "grad_norm": 16.627771377563477, "learning_rate": 3.7890625e-06, "loss": 0.21094512939453125, "step": 195 }, { "epoch": 0.027311363478018533, "grad_norm": 10.149715423583984, "learning_rate": 3.8085937500000002e-06, "loss": 0.14745330810546875, "step": 196 }, { "epoch": 0.027450707169232914, "grad_norm": 9.29069709777832, "learning_rate": 3.828125000000001e-06, "loss": 0.2246856689453125, "step": 197 }, { "epoch": 0.027590050860447295, "grad_norm": 8.808156967163086, "learning_rate": 3.84765625e-06, "loss": 0.18689727783203125, "step": 198 }, { "epoch": 0.027729394551661672, "grad_norm": 5.878643035888672, "learning_rate": 3.8671875e-06, "loss": 0.17896270751953125, "step": 199 }, { "epoch": 0.027868738242876053, "grad_norm": 11.250216484069824, "learning_rate": 3.88671875e-06, "loss": 0.22203826904296875, "step": 200 }, { "epoch": 0.028008081934090434, "grad_norm": 10.842925071716309, "learning_rate": 3.90625e-06, "loss": 0.1611175537109375, "step": 201 }, { "epoch": 0.028147425625304815, "grad_norm": 6.182948589324951, "learning_rate": 3.92578125e-06, "loss": 0.11178970336914062, "step": 202 }, { "epoch": 0.028286769316519196, "grad_norm": 5.736722946166992, "learning_rate": 3.9453125e-06, "loss": 0.1240386962890625, "step": 203 }, { "epoch": 0.028426113007733574, "grad_norm": 8.489317893981934, "learning_rate": 3.96484375e-06, "loss": 0.147186279296875, "step": 204 }, { "epoch": 0.028565456698947955, "grad_norm": 5.713868618011475, "learning_rate": 3.984375e-06, "loss": 0.16083526611328125, "step": 205 }, { "epoch": 0.028704800390162336, "grad_norm": 6.102829456329346, "learning_rate": 4.00390625e-06, "loss": 0.1366424560546875, "step": 206 }, { "epoch": 0.028844144081376717, "grad_norm": 8.997658729553223, "learning_rate": 4.0234375e-06, "loss": 0.16748809814453125, "step": 207 }, { "epoch": 0.028983487772591097, "grad_norm": 8.503252983093262, "learning_rate": 4.0429687500000004e-06, "loss": 0.16446304321289062, "step": 208 }, { "epoch": 0.029122831463805475, "grad_norm": 6.848926067352295, "learning_rate": 4.0625000000000005e-06, "loss": 0.1138916015625, "step": 209 }, { "epoch": 0.029262175155019856, "grad_norm": 7.528169631958008, "learning_rate": 4.0820312500000005e-06, "loss": 0.2125091552734375, "step": 210 }, { "epoch": 0.029401518846234237, "grad_norm": 8.364323616027832, "learning_rate": 4.101562500000001e-06, "loss": 0.14452362060546875, "step": 211 }, { "epoch": 0.029540862537448618, "grad_norm": 7.650020122528076, "learning_rate": 4.121093750000001e-06, "loss": 0.16576766967773438, "step": 212 }, { "epoch": 0.029680206228663, "grad_norm": 3.5509536266326904, "learning_rate": 4.140625000000001e-06, "loss": 0.18918609619140625, "step": 213 }, { "epoch": 0.029819549919877376, "grad_norm": 10.501336097717285, "learning_rate": 4.160156250000001e-06, "loss": 0.21966934204101562, "step": 214 }, { "epoch": 0.029958893611091757, "grad_norm": 6.057239532470703, "learning_rate": 4.1796875e-06, "loss": 0.2366790771484375, "step": 215 }, { "epoch": 0.03009823730230614, "grad_norm": 9.071110725402832, "learning_rate": 4.19921875e-06, "loss": 0.24114990234375, "step": 216 }, { "epoch": 0.03023758099352052, "grad_norm": 8.935094833374023, "learning_rate": 4.21875e-06, "loss": 0.16864395141601562, "step": 217 }, { "epoch": 0.0303769246847349, "grad_norm": 6.636025905609131, "learning_rate": 4.23828125e-06, "loss": 0.18006134033203125, "step": 218 }, { "epoch": 0.030516268375949278, "grad_norm": 2.495985507965088, "learning_rate": 4.2578125e-06, "loss": 0.1280670166015625, "step": 219 }, { "epoch": 0.03065561206716366, "grad_norm": 4.954185962677002, "learning_rate": 4.27734375e-06, "loss": 0.15920257568359375, "step": 220 }, { "epoch": 0.03079495575837804, "grad_norm": 5.448329448699951, "learning_rate": 4.296875e-06, "loss": 0.19134521484375, "step": 221 }, { "epoch": 0.03093429944959242, "grad_norm": 9.991975784301758, "learning_rate": 4.31640625e-06, "loss": 0.2651481628417969, "step": 222 }, { "epoch": 0.031073643140806798, "grad_norm": 9.014793395996094, "learning_rate": 4.3359375e-06, "loss": 0.22442626953125, "step": 223 }, { "epoch": 0.03121298683202118, "grad_norm": 12.686395645141602, "learning_rate": 4.35546875e-06, "loss": 0.1631622314453125, "step": 224 }, { "epoch": 0.031352330523235564, "grad_norm": 11.868584632873535, "learning_rate": 4.3750000000000005e-06, "loss": 0.170806884765625, "step": 225 }, { "epoch": 0.03149167421444994, "grad_norm": 8.01285457611084, "learning_rate": 4.3945312500000005e-06, "loss": 0.16197967529296875, "step": 226 }, { "epoch": 0.03163101790566432, "grad_norm": 9.051870346069336, "learning_rate": 4.4140625000000006e-06, "loss": 0.15195083618164062, "step": 227 }, { "epoch": 0.0317703615968787, "grad_norm": 3.0590474605560303, "learning_rate": 4.433593750000001e-06, "loss": 0.20590972900390625, "step": 228 }, { "epoch": 0.03190970528809308, "grad_norm": 9.498433113098145, "learning_rate": 4.453125000000001e-06, "loss": 0.22304534912109375, "step": 229 }, { "epoch": 0.032049048979307465, "grad_norm": 10.462127685546875, "learning_rate": 4.472656250000001e-06, "loss": 0.25177001953125, "step": 230 }, { "epoch": 0.03218839267052184, "grad_norm": 5.5637030601501465, "learning_rate": 4.492187500000001e-06, "loss": 0.1548919677734375, "step": 231 }, { "epoch": 0.03232773636173622, "grad_norm": 6.1535725593566895, "learning_rate": 4.51171875e-06, "loss": 0.2370452880859375, "step": 232 }, { "epoch": 0.032467080052950605, "grad_norm": 9.472496032714844, "learning_rate": 4.53125e-06, "loss": 0.15172576904296875, "step": 233 }, { "epoch": 0.03260642374416498, "grad_norm": 21.69154930114746, "learning_rate": 4.55078125e-06, "loss": 0.2596168518066406, "step": 234 }, { "epoch": 0.032745767435379367, "grad_norm": 5.061357021331787, "learning_rate": 4.5703125e-06, "loss": 0.1684417724609375, "step": 235 }, { "epoch": 0.032885111126593744, "grad_norm": 11.299346923828125, "learning_rate": 4.58984375e-06, "loss": 0.14146804809570312, "step": 236 }, { "epoch": 0.03302445481780812, "grad_norm": 3.601768970489502, "learning_rate": 4.609375e-06, "loss": 0.13920974731445312, "step": 237 }, { "epoch": 0.033163798509022506, "grad_norm": 6.4150471687316895, "learning_rate": 4.62890625e-06, "loss": 0.14637374877929688, "step": 238 }, { "epoch": 0.033303142200236883, "grad_norm": 5.1752519607543945, "learning_rate": 4.6484375e-06, "loss": 0.10382843017578125, "step": 239 }, { "epoch": 0.03344248589145127, "grad_norm": 3.5020318031311035, "learning_rate": 4.66796875e-06, "loss": 0.18653106689453125, "step": 240 }, { "epoch": 0.033581829582665645, "grad_norm": 5.40152645111084, "learning_rate": 4.6875000000000004e-06, "loss": 0.13255691528320312, "step": 241 }, { "epoch": 0.03372117327388002, "grad_norm": 3.929950714111328, "learning_rate": 4.7070312500000005e-06, "loss": 0.1297760009765625, "step": 242 }, { "epoch": 0.03386051696509441, "grad_norm": 11.20995044708252, "learning_rate": 4.7265625000000005e-06, "loss": 0.206085205078125, "step": 243 }, { "epoch": 0.033999860656308785, "grad_norm": 7.998402118682861, "learning_rate": 4.746093750000001e-06, "loss": 0.20669937133789062, "step": 244 }, { "epoch": 0.03413920434752317, "grad_norm": 6.067149639129639, "learning_rate": 4.765625000000001e-06, "loss": 0.165802001953125, "step": 245 }, { "epoch": 0.03427854803873755, "grad_norm": 4.830921173095703, "learning_rate": 4.785156250000001e-06, "loss": 0.09593582153320312, "step": 246 }, { "epoch": 0.034417891729951924, "grad_norm": 5.768091201782227, "learning_rate": 4.804687500000001e-06, "loss": 0.14981842041015625, "step": 247 }, { "epoch": 0.03455723542116631, "grad_norm": 5.530703067779541, "learning_rate": 4.824218750000001e-06, "loss": 0.16669464111328125, "step": 248 }, { "epoch": 0.034696579112380686, "grad_norm": 3.173926830291748, "learning_rate": 4.84375e-06, "loss": 0.206146240234375, "step": 249 }, { "epoch": 0.034835922803595064, "grad_norm": 5.437123775482178, "learning_rate": 4.86328125e-06, "loss": 0.1277618408203125, "step": 250 }, { "epoch": 0.03497526649480945, "grad_norm": 7.042880058288574, "learning_rate": 4.8828125e-06, "loss": 0.15764236450195312, "step": 251 }, { "epoch": 0.035114610186023826, "grad_norm": 6.963073253631592, "learning_rate": 4.90234375e-06, "loss": 0.23963165283203125, "step": 252 }, { "epoch": 0.03525395387723821, "grad_norm": 5.145940780639648, "learning_rate": 4.921875e-06, "loss": 0.19152069091796875, "step": 253 }, { "epoch": 0.03539329756845259, "grad_norm": 7.379619598388672, "learning_rate": 4.94140625e-06, "loss": 0.13663864135742188, "step": 254 }, { "epoch": 0.035532641259666965, "grad_norm": 4.53934383392334, "learning_rate": 4.9609375e-06, "loss": 0.17798614501953125, "step": 255 }, { "epoch": 0.03567198495088135, "grad_norm": 8.443085670471191, "learning_rate": 4.98046875e-06, "loss": 0.19706344604492188, "step": 256 }, { "epoch": 0.03581132864209573, "grad_norm": 4.311809539794922, "learning_rate": 5e-06, "loss": 0.17674636840820312, "step": 257 }, { "epoch": 0.03595067233331011, "grad_norm": 5.0607452392578125, "learning_rate": 5.0195312500000005e-06, "loss": 0.13150787353515625, "step": 258 }, { "epoch": 0.03609001602452449, "grad_norm": 8.619985580444336, "learning_rate": 5.0390625000000005e-06, "loss": 0.18846511840820312, "step": 259 }, { "epoch": 0.03622935971573887, "grad_norm": 6.483821868896484, "learning_rate": 5.0585937500000006e-06, "loss": 0.16686630249023438, "step": 260 }, { "epoch": 0.03636870340695325, "grad_norm": 7.281093120574951, "learning_rate": 5.078125000000001e-06, "loss": 0.17561721801757812, "step": 261 }, { "epoch": 0.03650804709816763, "grad_norm": 9.899968147277832, "learning_rate": 5.097656250000001e-06, "loss": 0.18521881103515625, "step": 262 }, { "epoch": 0.03664739078938201, "grad_norm": 3.4008100032806396, "learning_rate": 5.117187500000001e-06, "loss": 0.20846939086914062, "step": 263 }, { "epoch": 0.03678673448059639, "grad_norm": 6.725180625915527, "learning_rate": 5.136718750000001e-06, "loss": 0.20343017578125, "step": 264 }, { "epoch": 0.03692607817181077, "grad_norm": 8.565742492675781, "learning_rate": 5.156250000000001e-06, "loss": 0.21564102172851562, "step": 265 }, { "epoch": 0.03706542186302515, "grad_norm": 2.7858119010925293, "learning_rate": 5.17578125e-06, "loss": 0.1243438720703125, "step": 266 }, { "epoch": 0.03720476555423953, "grad_norm": 5.725818157196045, "learning_rate": 5.1953125e-06, "loss": 0.1251678466796875, "step": 267 }, { "epoch": 0.037344109245453914, "grad_norm": 12.146950721740723, "learning_rate": 5.21484375e-06, "loss": 0.1965789794921875, "step": 268 }, { "epoch": 0.03748345293666829, "grad_norm": 5.756470680236816, "learning_rate": 5.234375e-06, "loss": 0.13936996459960938, "step": 269 }, { "epoch": 0.03762279662788267, "grad_norm": 3.3918659687042236, "learning_rate": 5.25390625e-06, "loss": 0.1524505615234375, "step": 270 }, { "epoch": 0.037762140319097054, "grad_norm": 22.826784133911133, "learning_rate": 5.2734375e-06, "loss": 0.16909027099609375, "step": 271 }, { "epoch": 0.03790148401031143, "grad_norm": 12.304383277893066, "learning_rate": 5.29296875e-06, "loss": 0.17107772827148438, "step": 272 }, { "epoch": 0.038040827701525816, "grad_norm": 5.408809661865234, "learning_rate": 5.3125e-06, "loss": 0.1603851318359375, "step": 273 }, { "epoch": 0.03818017139274019, "grad_norm": 4.830539226531982, "learning_rate": 5.3320312500000004e-06, "loss": 0.11445999145507812, "step": 274 }, { "epoch": 0.03831951508395457, "grad_norm": 4.90142822265625, "learning_rate": 5.3515625000000005e-06, "loss": 0.150726318359375, "step": 275 }, { "epoch": 0.038458858775168955, "grad_norm": 3.035292387008667, "learning_rate": 5.3710937500000005e-06, "loss": 0.09768295288085938, "step": 276 }, { "epoch": 0.03859820246638333, "grad_norm": 4.580375671386719, "learning_rate": 5.390625000000001e-06, "loss": 0.17510223388671875, "step": 277 }, { "epoch": 0.03873754615759772, "grad_norm": 11.96704387664795, "learning_rate": 5.410156250000001e-06, "loss": 0.18767929077148438, "step": 278 }, { "epoch": 0.038876889848812095, "grad_norm": 2.7872259616851807, "learning_rate": 5.429687500000001e-06, "loss": 0.15422439575195312, "step": 279 }, { "epoch": 0.03901623354002647, "grad_norm": 4.3358588218688965, "learning_rate": 5.449218750000001e-06, "loss": 0.13988494873046875, "step": 280 }, { "epoch": 0.03915557723124086, "grad_norm": 5.167471885681152, "learning_rate": 5.468750000000001e-06, "loss": 0.18105697631835938, "step": 281 }, { "epoch": 0.039294920922455234, "grad_norm": 9.806900978088379, "learning_rate": 5.488281250000001e-06, "loss": 0.13212966918945312, "step": 282 }, { "epoch": 0.03943426461366962, "grad_norm": 12.551218032836914, "learning_rate": 5.5078125e-06, "loss": 0.18539810180664062, "step": 283 }, { "epoch": 0.039573608304883996, "grad_norm": 8.98375129699707, "learning_rate": 5.52734375e-06, "loss": 0.2766075134277344, "step": 284 }, { "epoch": 0.039712951996098374, "grad_norm": 10.094618797302246, "learning_rate": 5.546875e-06, "loss": 0.17274093627929688, "step": 285 }, { "epoch": 0.03985229568731276, "grad_norm": 6.698843002319336, "learning_rate": 5.56640625e-06, "loss": 0.17241287231445312, "step": 286 }, { "epoch": 0.039991639378527136, "grad_norm": 9.38508415222168, "learning_rate": 5.5859375e-06, "loss": 0.16141891479492188, "step": 287 }, { "epoch": 0.04013098306974152, "grad_norm": 2.8251891136169434, "learning_rate": 5.60546875e-06, "loss": 0.11837387084960938, "step": 288 }, { "epoch": 0.0402703267609559, "grad_norm": 9.905708312988281, "learning_rate": 5.625e-06, "loss": 0.15514373779296875, "step": 289 }, { "epoch": 0.040409670452170275, "grad_norm": 4.515154838562012, "learning_rate": 5.64453125e-06, "loss": 0.15529251098632812, "step": 290 }, { "epoch": 0.04054901414338466, "grad_norm": 8.912689208984375, "learning_rate": 5.6640625000000005e-06, "loss": 0.17760848999023438, "step": 291 }, { "epoch": 0.04068835783459904, "grad_norm": 10.442919731140137, "learning_rate": 5.6835937500000005e-06, "loss": 0.20956039428710938, "step": 292 }, { "epoch": 0.04082770152581342, "grad_norm": 3.931211233139038, "learning_rate": 5.7031250000000006e-06, "loss": 0.24493408203125, "step": 293 }, { "epoch": 0.0409670452170278, "grad_norm": 5.086414337158203, "learning_rate": 5.722656250000001e-06, "loss": 0.16294288635253906, "step": 294 }, { "epoch": 0.04110638890824218, "grad_norm": 6.5166850090026855, "learning_rate": 5.742187500000001e-06, "loss": 0.15177536010742188, "step": 295 }, { "epoch": 0.04124573259945656, "grad_norm": 12.635576248168945, "learning_rate": 5.761718750000001e-06, "loss": 0.20684814453125, "step": 296 }, { "epoch": 0.04138507629067094, "grad_norm": 4.308379650115967, "learning_rate": 5.781250000000001e-06, "loss": 0.128570556640625, "step": 297 }, { "epoch": 0.04152441998188532, "grad_norm": 5.704751491546631, "learning_rate": 5.800781250000001e-06, "loss": 0.10442733764648438, "step": 298 }, { "epoch": 0.0416637636730997, "grad_norm": 4.553186416625977, "learning_rate": 5.820312500000001e-06, "loss": 0.1034698486328125, "step": 299 }, { "epoch": 0.04180310736431408, "grad_norm": 10.681221008300781, "learning_rate": 5.83984375e-06, "loss": 0.17845535278320312, "step": 300 }, { "epoch": 0.04194245105552846, "grad_norm": 7.105865001678467, "learning_rate": 5.859375e-06, "loss": 0.210906982421875, "step": 301 }, { "epoch": 0.04208179474674284, "grad_norm": 8.547418594360352, "learning_rate": 5.87890625e-06, "loss": 0.18758010864257812, "step": 302 }, { "epoch": 0.042221138437957224, "grad_norm": 4.346088409423828, "learning_rate": 5.8984375e-06, "loss": 0.13190460205078125, "step": 303 }, { "epoch": 0.0423604821291716, "grad_norm": 7.519179344177246, "learning_rate": 5.91796875e-06, "loss": 0.23636627197265625, "step": 304 }, { "epoch": 0.04249982582038598, "grad_norm": 2.9944818019866943, "learning_rate": 5.9375e-06, "loss": 0.13605499267578125, "step": 305 }, { "epoch": 0.042639169511600364, "grad_norm": 19.403547286987305, "learning_rate": 5.95703125e-06, "loss": 0.2678680419921875, "step": 306 }, { "epoch": 0.04277851320281474, "grad_norm": 7.73319149017334, "learning_rate": 5.9765625000000004e-06, "loss": 0.1683502197265625, "step": 307 }, { "epoch": 0.042917856894029126, "grad_norm": 7.398472785949707, "learning_rate": 5.9960937500000005e-06, "loss": 0.13270187377929688, "step": 308 }, { "epoch": 0.0430572005852435, "grad_norm": 2.4699199199676514, "learning_rate": 6.0156250000000005e-06, "loss": 0.07991409301757812, "step": 309 }, { "epoch": 0.04319654427645788, "grad_norm": 25.520092010498047, "learning_rate": 6.035156250000001e-06, "loss": 0.232177734375, "step": 310 }, { "epoch": 0.043335887967672265, "grad_norm": 11.49552059173584, "learning_rate": 6.054687500000001e-06, "loss": 0.153900146484375, "step": 311 }, { "epoch": 0.04347523165888664, "grad_norm": 13.067058563232422, "learning_rate": 6.074218750000001e-06, "loss": 0.23683929443359375, "step": 312 }, { "epoch": 0.04361457535010103, "grad_norm": 16.978187561035156, "learning_rate": 6.093750000000001e-06, "loss": 0.16468429565429688, "step": 313 }, { "epoch": 0.043753919041315405, "grad_norm": 13.662372589111328, "learning_rate": 6.113281250000001e-06, "loss": 0.16987991333007812, "step": 314 }, { "epoch": 0.04389326273252978, "grad_norm": 12.126619338989258, "learning_rate": 6.132812500000001e-06, "loss": 0.17652130126953125, "step": 315 }, { "epoch": 0.04403260642374417, "grad_norm": 6.19202995300293, "learning_rate": 6.152343750000001e-06, "loss": 0.1519317626953125, "step": 316 }, { "epoch": 0.044171950114958544, "grad_norm": 4.712600231170654, "learning_rate": 6.171875e-06, "loss": 0.18951416015625, "step": 317 }, { "epoch": 0.04431129380617293, "grad_norm": 7.281041145324707, "learning_rate": 6.19140625e-06, "loss": 0.16849136352539062, "step": 318 }, { "epoch": 0.044450637497387306, "grad_norm": 7.862853527069092, "learning_rate": 6.2109375e-06, "loss": 0.15417098999023438, "step": 319 }, { "epoch": 0.044589981188601684, "grad_norm": 6.146832466125488, "learning_rate": 6.23046875e-06, "loss": 0.13824081420898438, "step": 320 }, { "epoch": 0.04472932487981607, "grad_norm": 6.811535358428955, "learning_rate": 6.25e-06, "loss": 0.13828659057617188, "step": 321 }, { "epoch": 0.044868668571030446, "grad_norm": 5.0624494552612305, "learning_rate": 6.26953125e-06, "loss": 0.13940811157226562, "step": 322 }, { "epoch": 0.04500801226224483, "grad_norm": 4.1468329429626465, "learning_rate": 6.2890625e-06, "loss": 0.17108154296875, "step": 323 }, { "epoch": 0.04514735595345921, "grad_norm": 5.912970542907715, "learning_rate": 6.3085937500000005e-06, "loss": 0.100311279296875, "step": 324 }, { "epoch": 0.045286699644673585, "grad_norm": 8.819735527038574, "learning_rate": 6.3281250000000005e-06, "loss": 0.17658233642578125, "step": 325 }, { "epoch": 0.04542604333588797, "grad_norm": 17.51801872253418, "learning_rate": 6.3476562500000006e-06, "loss": 0.18968582153320312, "step": 326 }, { "epoch": 0.04556538702710235, "grad_norm": 6.580379486083984, "learning_rate": 6.367187500000001e-06, "loss": 0.10309219360351562, "step": 327 }, { "epoch": 0.04570473071831673, "grad_norm": 7.000513553619385, "learning_rate": 6.386718750000001e-06, "loss": 0.1716766357421875, "step": 328 }, { "epoch": 0.04584407440953111, "grad_norm": 18.392086029052734, "learning_rate": 6.406250000000001e-06, "loss": 0.15927505493164062, "step": 329 }, { "epoch": 0.045983418100745486, "grad_norm": 9.248756408691406, "learning_rate": 6.425781250000001e-06, "loss": 0.17191696166992188, "step": 330 }, { "epoch": 0.04612276179195987, "grad_norm": 7.702888011932373, "learning_rate": 6.445312500000001e-06, "loss": 0.15683746337890625, "step": 331 }, { "epoch": 0.04626210548317425, "grad_norm": 11.261006355285645, "learning_rate": 6.464843750000001e-06, "loss": 0.13149642944335938, "step": 332 }, { "epoch": 0.04640144917438863, "grad_norm": 2.8427987098693848, "learning_rate": 6.484375000000001e-06, "loss": 0.1201629638671875, "step": 333 }, { "epoch": 0.04654079286560301, "grad_norm": 13.691452026367188, "learning_rate": 6.50390625e-06, "loss": 0.16332244873046875, "step": 334 }, { "epoch": 0.04668013655681739, "grad_norm": 5.589183807373047, "learning_rate": 6.5234375e-06, "loss": 0.16098403930664062, "step": 335 }, { "epoch": 0.04681948024803177, "grad_norm": 6.954538345336914, "learning_rate": 6.54296875e-06, "loss": 0.12490463256835938, "step": 336 }, { "epoch": 0.04695882393924615, "grad_norm": 4.5035247802734375, "learning_rate": 6.5625e-06, "loss": 0.10052871704101562, "step": 337 }, { "epoch": 0.047098167630460534, "grad_norm": 7.932315826416016, "learning_rate": 6.58203125e-06, "loss": 0.1541290283203125, "step": 338 }, { "epoch": 0.04723751132167491, "grad_norm": 6.410855293273926, "learning_rate": 6.6015625e-06, "loss": 0.12466049194335938, "step": 339 }, { "epoch": 0.04737685501288929, "grad_norm": 5.066309452056885, "learning_rate": 6.6210937500000004e-06, "loss": 0.10927963256835938, "step": 340 }, { "epoch": 0.047516198704103674, "grad_norm": 10.824272155761719, "learning_rate": 6.6406250000000005e-06, "loss": 0.15692901611328125, "step": 341 }, { "epoch": 0.04765554239531805, "grad_norm": 13.106494903564453, "learning_rate": 6.6601562500000005e-06, "loss": 0.19391250610351562, "step": 342 }, { "epoch": 0.047794886086532436, "grad_norm": 6.773313522338867, "learning_rate": 6.679687500000001e-06, "loss": 0.13434982299804688, "step": 343 }, { "epoch": 0.04793422977774681, "grad_norm": 9.352890968322754, "learning_rate": 6.699218750000001e-06, "loss": 0.16396331787109375, "step": 344 }, { "epoch": 0.04807357346896119, "grad_norm": 6.116921424865723, "learning_rate": 6.718750000000001e-06, "loss": 0.1432361602783203, "step": 345 }, { "epoch": 0.048212917160175575, "grad_norm": 5.947998523712158, "learning_rate": 6.738281250000001e-06, "loss": 0.12053871154785156, "step": 346 }, { "epoch": 0.04835226085138995, "grad_norm": 8.146012306213379, "learning_rate": 6.757812500000001e-06, "loss": 0.22043228149414062, "step": 347 }, { "epoch": 0.04849160454260433, "grad_norm": 3.7714684009552, "learning_rate": 6.777343750000001e-06, "loss": 0.13985824584960938, "step": 348 }, { "epoch": 0.048630948233818715, "grad_norm": 7.111119747161865, "learning_rate": 6.796875000000001e-06, "loss": 0.19112014770507812, "step": 349 }, { "epoch": 0.04877029192503309, "grad_norm": 5.1177568435668945, "learning_rate": 6.816406250000001e-06, "loss": 0.1302337646484375, "step": 350 }, { "epoch": 0.04890963561624748, "grad_norm": 4.6686201095581055, "learning_rate": 6.8359375e-06, "loss": 0.14802932739257812, "step": 351 }, { "epoch": 0.049048979307461854, "grad_norm": 3.694054365158081, "learning_rate": 6.85546875e-06, "loss": 0.11016082763671875, "step": 352 }, { "epoch": 0.04918832299867623, "grad_norm": 4.139405250549316, "learning_rate": 6.875e-06, "loss": 0.0958251953125, "step": 353 }, { "epoch": 0.049327666689890616, "grad_norm": 7.257689952850342, "learning_rate": 6.89453125e-06, "loss": 0.15290069580078125, "step": 354 }, { "epoch": 0.049467010381104994, "grad_norm": 9.240818977355957, "learning_rate": 6.9140625e-06, "loss": 0.11512374877929688, "step": 355 }, { "epoch": 0.04960635407231938, "grad_norm": 8.49925422668457, "learning_rate": 6.93359375e-06, "loss": 0.19433975219726562, "step": 356 }, { "epoch": 0.049745697763533755, "grad_norm": 11.691304206848145, "learning_rate": 6.9531250000000004e-06, "loss": 0.17238998413085938, "step": 357 }, { "epoch": 0.04988504145474813, "grad_norm": 8.109983444213867, "learning_rate": 6.9726562500000005e-06, "loss": 0.12975311279296875, "step": 358 }, { "epoch": 0.05002438514596252, "grad_norm": 2.629049777984619, "learning_rate": 6.9921875000000006e-06, "loss": 0.11673736572265625, "step": 359 }, { "epoch": 0.050163728837176895, "grad_norm": 16.54665184020996, "learning_rate": 7.011718750000001e-06, "loss": 0.21240997314453125, "step": 360 }, { "epoch": 0.05030307252839128, "grad_norm": 13.363916397094727, "learning_rate": 7.031250000000001e-06, "loss": 0.14391326904296875, "step": 361 }, { "epoch": 0.05044241621960566, "grad_norm": 11.484570503234863, "learning_rate": 7.050781250000001e-06, "loss": 0.17730331420898438, "step": 362 }, { "epoch": 0.050581759910820034, "grad_norm": 4.018231391906738, "learning_rate": 7.070312500000001e-06, "loss": 0.14871978759765625, "step": 363 }, { "epoch": 0.05072110360203442, "grad_norm": 5.214398384094238, "learning_rate": 7.089843750000001e-06, "loss": 0.13983535766601562, "step": 364 }, { "epoch": 0.050860447293248796, "grad_norm": 5.001184940338135, "learning_rate": 7.109375000000001e-06, "loss": 0.10845565795898438, "step": 365 }, { "epoch": 0.05099979098446318, "grad_norm": 7.348252296447754, "learning_rate": 7.128906250000001e-06, "loss": 0.1524658203125, "step": 366 }, { "epoch": 0.05113913467567756, "grad_norm": 6.935975551605225, "learning_rate": 7.148437500000001e-06, "loss": 0.13389205932617188, "step": 367 }, { "epoch": 0.051278478366891936, "grad_norm": 5.81451416015625, "learning_rate": 7.16796875e-06, "loss": 0.1551361083984375, "step": 368 }, { "epoch": 0.05141782205810632, "grad_norm": 12.967432975769043, "learning_rate": 7.1875e-06, "loss": 0.19011688232421875, "step": 369 }, { "epoch": 0.0515571657493207, "grad_norm": 8.961128234863281, "learning_rate": 7.20703125e-06, "loss": 0.18409347534179688, "step": 370 }, { "epoch": 0.05169650944053508, "grad_norm": 6.923586368560791, "learning_rate": 7.2265625e-06, "loss": 0.1577911376953125, "step": 371 }, { "epoch": 0.05183585313174946, "grad_norm": 2.009138584136963, "learning_rate": 7.24609375e-06, "loss": 0.07984542846679688, "step": 372 }, { "epoch": 0.05197519682296384, "grad_norm": 4.3188300132751465, "learning_rate": 7.265625e-06, "loss": 0.15604400634765625, "step": 373 }, { "epoch": 0.05211454051417822, "grad_norm": 6.211289405822754, "learning_rate": 7.2851562500000005e-06, "loss": 0.15728378295898438, "step": 374 }, { "epoch": 0.0522538842053926, "grad_norm": 4.765275955200195, "learning_rate": 7.3046875000000005e-06, "loss": 0.172515869140625, "step": 375 }, { "epoch": 0.052393227896606984, "grad_norm": 3.6731619834899902, "learning_rate": 7.3242187500000006e-06, "loss": 0.17987442016601562, "step": 376 }, { "epoch": 0.05253257158782136, "grad_norm": 3.9369845390319824, "learning_rate": 7.343750000000001e-06, "loss": 0.13933944702148438, "step": 377 }, { "epoch": 0.05267191527903574, "grad_norm": 7.812531471252441, "learning_rate": 7.363281250000001e-06, "loss": 0.1426239013671875, "step": 378 }, { "epoch": 0.05281125897025012, "grad_norm": 7.8352437019348145, "learning_rate": 7.382812500000001e-06, "loss": 0.2225933074951172, "step": 379 }, { "epoch": 0.0529506026614645, "grad_norm": 7.565308094024658, "learning_rate": 7.402343750000001e-06, "loss": 0.14449691772460938, "step": 380 }, { "epoch": 0.053089946352678885, "grad_norm": 7.301002502441406, "learning_rate": 7.421875000000001e-06, "loss": 0.12604522705078125, "step": 381 }, { "epoch": 0.05322929004389326, "grad_norm": 10.109261512756348, "learning_rate": 7.441406250000001e-06, "loss": 0.16047286987304688, "step": 382 }, { "epoch": 0.05336863373510764, "grad_norm": 9.735771179199219, "learning_rate": 7.460937500000001e-06, "loss": 0.1440582275390625, "step": 383 }, { "epoch": 0.053507977426322025, "grad_norm": 3.60077166557312, "learning_rate": 7.480468750000001e-06, "loss": 0.11254501342773438, "step": 384 }, { "epoch": 0.0536473211175364, "grad_norm": 9.131245613098145, "learning_rate": 7.500000000000001e-06, "loss": 0.12182998657226562, "step": 385 }, { "epoch": 0.053786664808750786, "grad_norm": 6.778835296630859, "learning_rate": 7.51953125e-06, "loss": 0.16750335693359375, "step": 386 }, { "epoch": 0.053926008499965164, "grad_norm": 3.9918320178985596, "learning_rate": 7.5390625e-06, "loss": 0.13628387451171875, "step": 387 }, { "epoch": 0.05406535219117954, "grad_norm": 18.808807373046875, "learning_rate": 7.55859375e-06, "loss": 0.2561454772949219, "step": 388 }, { "epoch": 0.054204695882393926, "grad_norm": 12.555989265441895, "learning_rate": 7.578125e-06, "loss": 0.13036155700683594, "step": 389 }, { "epoch": 0.0543440395736083, "grad_norm": 14.020085334777832, "learning_rate": 7.5976562500000004e-06, "loss": 0.16039276123046875, "step": 390 }, { "epoch": 0.05448338326482269, "grad_norm": 4.631503105163574, "learning_rate": 7.6171875000000005e-06, "loss": 0.13013839721679688, "step": 391 }, { "epoch": 0.054622726956037065, "grad_norm": 10.978066444396973, "learning_rate": 7.63671875e-06, "loss": 0.15363311767578125, "step": 392 }, { "epoch": 0.05476207064725144, "grad_norm": 4.148693561553955, "learning_rate": 7.656250000000001e-06, "loss": 0.1549530029296875, "step": 393 }, { "epoch": 0.05490141433846583, "grad_norm": 6.286527633666992, "learning_rate": 7.67578125e-06, "loss": 0.142669677734375, "step": 394 }, { "epoch": 0.055040758029680205, "grad_norm": 2.1567695140838623, "learning_rate": 7.6953125e-06, "loss": 0.1510162353515625, "step": 395 }, { "epoch": 0.05518010172089459, "grad_norm": 3.8652422428131104, "learning_rate": 7.71484375e-06, "loss": 0.10583114624023438, "step": 396 }, { "epoch": 0.05531944541210897, "grad_norm": 5.179031848907471, "learning_rate": 7.734375e-06, "loss": 0.14903640747070312, "step": 397 }, { "epoch": 0.055458789103323344, "grad_norm": 13.140238761901855, "learning_rate": 7.753906250000001e-06, "loss": 0.16473007202148438, "step": 398 }, { "epoch": 0.05559813279453773, "grad_norm": 8.461373329162598, "learning_rate": 7.7734375e-06, "loss": 0.12543487548828125, "step": 399 }, { "epoch": 0.055737476485752106, "grad_norm": 6.091463565826416, "learning_rate": 7.792968750000001e-06, "loss": 0.08975028991699219, "step": 400 }, { "epoch": 0.05587682017696649, "grad_norm": 21.730098724365234, "learning_rate": 7.8125e-06, "loss": 0.22025299072265625, "step": 401 }, { "epoch": 0.05601616386818087, "grad_norm": 5.358403205871582, "learning_rate": 7.832031250000001e-06, "loss": 0.09710693359375, "step": 402 }, { "epoch": 0.056155507559395246, "grad_norm": 6.081615924835205, "learning_rate": 7.8515625e-06, "loss": 0.19209671020507812, "step": 403 }, { "epoch": 0.05629485125060963, "grad_norm": 1.6371394395828247, "learning_rate": 7.871093750000001e-06, "loss": 0.09849166870117188, "step": 404 }, { "epoch": 0.05643419494182401, "grad_norm": 10.8544282913208, "learning_rate": 7.890625e-06, "loss": 0.16938018798828125, "step": 405 }, { "epoch": 0.05657353863303839, "grad_norm": 8.349696159362793, "learning_rate": 7.910156250000001e-06, "loss": 0.1298675537109375, "step": 406 }, { "epoch": 0.05671288232425277, "grad_norm": 5.7460527420043945, "learning_rate": 7.9296875e-06, "loss": 0.16898345947265625, "step": 407 }, { "epoch": 0.05685222601546715, "grad_norm": 9.518223762512207, "learning_rate": 7.949218750000001e-06, "loss": 0.13545989990234375, "step": 408 }, { "epoch": 0.05699156970668153, "grad_norm": 6.731402397155762, "learning_rate": 7.96875e-06, "loss": 0.11738967895507812, "step": 409 }, { "epoch": 0.05713091339789591, "grad_norm": 4.867839336395264, "learning_rate": 7.988281250000001e-06, "loss": 0.11629104614257812, "step": 410 }, { "epoch": 0.057270257089110294, "grad_norm": 10.383163452148438, "learning_rate": 8.0078125e-06, "loss": 0.15866470336914062, "step": 411 }, { "epoch": 0.05740960078032467, "grad_norm": 12.226064682006836, "learning_rate": 8.02734375e-06, "loss": 0.13211441040039062, "step": 412 }, { "epoch": 0.05754894447153905, "grad_norm": 10.200098037719727, "learning_rate": 8.046875e-06, "loss": 0.1467132568359375, "step": 413 }, { "epoch": 0.05768828816275343, "grad_norm": 16.913179397583008, "learning_rate": 8.06640625e-06, "loss": 0.16307449340820312, "step": 414 }, { "epoch": 0.05782763185396781, "grad_norm": 17.48457908630371, "learning_rate": 8.085937500000001e-06, "loss": 0.14197540283203125, "step": 415 }, { "epoch": 0.057966975545182195, "grad_norm": 6.679429531097412, "learning_rate": 8.10546875e-06, "loss": 0.11342239379882812, "step": 416 }, { "epoch": 0.05810631923639657, "grad_norm": 4.506830215454102, "learning_rate": 8.125000000000001e-06, "loss": 0.13917160034179688, "step": 417 }, { "epoch": 0.05824566292761095, "grad_norm": 8.367295265197754, "learning_rate": 8.14453125e-06, "loss": 0.18604278564453125, "step": 418 }, { "epoch": 0.058385006618825334, "grad_norm": 5.108609199523926, "learning_rate": 8.164062500000001e-06, "loss": 0.11978912353515625, "step": 419 }, { "epoch": 0.05852435031003971, "grad_norm": 3.678968667984009, "learning_rate": 8.18359375e-06, "loss": 0.12985610961914062, "step": 420 }, { "epoch": 0.058663694001254096, "grad_norm": 2.1380603313446045, "learning_rate": 8.203125000000001e-06, "loss": 0.1230621337890625, "step": 421 }, { "epoch": 0.058803037692468474, "grad_norm": 4.395849704742432, "learning_rate": 8.22265625e-06, "loss": 0.1041717529296875, "step": 422 }, { "epoch": 0.05894238138368285, "grad_norm": 12.321252822875977, "learning_rate": 8.242187500000001e-06, "loss": 0.1401214599609375, "step": 423 }, { "epoch": 0.059081725074897236, "grad_norm": 12.018882751464844, "learning_rate": 8.26171875e-06, "loss": 0.164947509765625, "step": 424 }, { "epoch": 0.05922106876611161, "grad_norm": 6.849151134490967, "learning_rate": 8.281250000000001e-06, "loss": 0.220855712890625, "step": 425 }, { "epoch": 0.059360412457326, "grad_norm": 12.337539672851562, "learning_rate": 8.30078125e-06, "loss": 0.2361602783203125, "step": 426 }, { "epoch": 0.059499756148540375, "grad_norm": 4.445042610168457, "learning_rate": 8.320312500000001e-06, "loss": 0.15053939819335938, "step": 427 }, { "epoch": 0.05963909983975475, "grad_norm": 3.047506332397461, "learning_rate": 8.33984375e-06, "loss": 0.15465164184570312, "step": 428 }, { "epoch": 0.05977844353096914, "grad_norm": 3.5426013469696045, "learning_rate": 8.359375e-06, "loss": 0.1783905029296875, "step": 429 }, { "epoch": 0.059917787222183515, "grad_norm": 8.39637279510498, "learning_rate": 8.37890625e-06, "loss": 0.09811019897460938, "step": 430 }, { "epoch": 0.0600571309133979, "grad_norm": 3.5945894718170166, "learning_rate": 8.3984375e-06, "loss": 0.121826171875, "step": 431 }, { "epoch": 0.06019647460461228, "grad_norm": 2.594871997833252, "learning_rate": 8.417968750000001e-06, "loss": 0.11317062377929688, "step": 432 }, { "epoch": 0.060335818295826654, "grad_norm": 2.993346691131592, "learning_rate": 8.4375e-06, "loss": 0.10511016845703125, "step": 433 }, { "epoch": 0.06047516198704104, "grad_norm": 5.767642498016357, "learning_rate": 8.457031250000001e-06, "loss": 0.1856689453125, "step": 434 }, { "epoch": 0.060614505678255416, "grad_norm": 4.801823616027832, "learning_rate": 8.4765625e-06, "loss": 0.114044189453125, "step": 435 }, { "epoch": 0.0607538493694698, "grad_norm": 1.6317639350891113, "learning_rate": 8.496093750000001e-06, "loss": 0.1136627197265625, "step": 436 }, { "epoch": 0.06089319306068418, "grad_norm": 3.602670431137085, "learning_rate": 8.515625e-06, "loss": 0.134674072265625, "step": 437 }, { "epoch": 0.061032536751898556, "grad_norm": 3.460202217102051, "learning_rate": 8.535156250000001e-06, "loss": 0.10312652587890625, "step": 438 }, { "epoch": 0.06117188044311294, "grad_norm": 5.560850143432617, "learning_rate": 8.5546875e-06, "loss": 0.10280227661132812, "step": 439 }, { "epoch": 0.06131122413432732, "grad_norm": 6.1726861000061035, "learning_rate": 8.574218750000001e-06, "loss": 0.15325164794921875, "step": 440 }, { "epoch": 0.0614505678255417, "grad_norm": 6.208412170410156, "learning_rate": 8.59375e-06, "loss": 0.14085769653320312, "step": 441 }, { "epoch": 0.06158991151675608, "grad_norm": 11.32680606842041, "learning_rate": 8.613281250000001e-06, "loss": 0.1794891357421875, "step": 442 }, { "epoch": 0.06172925520797046, "grad_norm": 4.965343475341797, "learning_rate": 8.6328125e-06, "loss": 0.16953277587890625, "step": 443 }, { "epoch": 0.06186859889918484, "grad_norm": 10.146510124206543, "learning_rate": 8.652343750000002e-06, "loss": 0.16817092895507812, "step": 444 }, { "epoch": 0.06200794259039922, "grad_norm": 12.709176063537598, "learning_rate": 8.671875e-06, "loss": 0.20774078369140625, "step": 445 }, { "epoch": 0.062147286281613597, "grad_norm": 3.6331989765167236, "learning_rate": 8.69140625e-06, "loss": 0.11433029174804688, "step": 446 }, { "epoch": 0.06228662997282798, "grad_norm": 5.102368354797363, "learning_rate": 8.7109375e-06, "loss": 0.13233566284179688, "step": 447 }, { "epoch": 0.06242597366404236, "grad_norm": 5.463651180267334, "learning_rate": 8.73046875e-06, "loss": 0.12487411499023438, "step": 448 }, { "epoch": 0.06256531735525674, "grad_norm": 5.565462112426758, "learning_rate": 8.750000000000001e-06, "loss": 0.12705230712890625, "step": 449 }, { "epoch": 0.06270466104647113, "grad_norm": 10.228673934936523, "learning_rate": 8.76953125e-06, "loss": 0.24814605712890625, "step": 450 }, { "epoch": 0.0628440047376855, "grad_norm": 5.053422451019287, "learning_rate": 8.789062500000001e-06, "loss": 0.20119094848632812, "step": 451 }, { "epoch": 0.06298334842889988, "grad_norm": 4.931256294250488, "learning_rate": 8.80859375e-06, "loss": 0.18230819702148438, "step": 452 }, { "epoch": 0.06312269212011426, "grad_norm": 3.784743547439575, "learning_rate": 8.828125000000001e-06, "loss": 0.12211227416992188, "step": 453 }, { "epoch": 0.06326203581132864, "grad_norm": 2.3690133094787598, "learning_rate": 8.84765625e-06, "loss": 0.11800384521484375, "step": 454 }, { "epoch": 0.06340137950254303, "grad_norm": 3.6212024688720703, "learning_rate": 8.867187500000001e-06, "loss": 0.09967422485351562, "step": 455 }, { "epoch": 0.0635407231937574, "grad_norm": 3.3933937549591064, "learning_rate": 8.88671875e-06, "loss": 0.09289169311523438, "step": 456 }, { "epoch": 0.06368006688497178, "grad_norm": 5.423181533813477, "learning_rate": 8.906250000000001e-06, "loss": 0.12906646728515625, "step": 457 }, { "epoch": 0.06381941057618616, "grad_norm": 16.90750503540039, "learning_rate": 8.92578125e-06, "loss": 0.1785717010498047, "step": 458 }, { "epoch": 0.06395875426740054, "grad_norm": 8.203119277954102, "learning_rate": 8.945312500000001e-06, "loss": 0.12935638427734375, "step": 459 }, { "epoch": 0.06409809795861493, "grad_norm": 11.418166160583496, "learning_rate": 8.96484375e-06, "loss": 0.19008636474609375, "step": 460 }, { "epoch": 0.06423744164982931, "grad_norm": 11.755461692810059, "learning_rate": 8.984375000000002e-06, "loss": 0.1816253662109375, "step": 461 }, { "epoch": 0.06437678534104369, "grad_norm": 5.348918914794922, "learning_rate": 9.00390625e-06, "loss": 0.12701034545898438, "step": 462 }, { "epoch": 0.06451612903225806, "grad_norm": 3.869060754776001, "learning_rate": 9.0234375e-06, "loss": 0.09430694580078125, "step": 463 }, { "epoch": 0.06465547272347244, "grad_norm": 4.048496246337891, "learning_rate": 9.042968750000001e-06, "loss": 0.09656524658203125, "step": 464 }, { "epoch": 0.06479481641468683, "grad_norm": 5.232170581817627, "learning_rate": 9.0625e-06, "loss": 0.102630615234375, "step": 465 }, { "epoch": 0.06493416010590121, "grad_norm": 12.312739372253418, "learning_rate": 9.082031250000001e-06, "loss": 0.1607666015625, "step": 466 }, { "epoch": 0.06507350379711559, "grad_norm": 11.251489639282227, "learning_rate": 9.1015625e-06, "loss": 0.21221542358398438, "step": 467 }, { "epoch": 0.06521284748832996, "grad_norm": 2.748323678970337, "learning_rate": 9.121093750000001e-06, "loss": 0.10125350952148438, "step": 468 }, { "epoch": 0.06535219117954434, "grad_norm": 2.5680441856384277, "learning_rate": 9.140625e-06, "loss": 0.122467041015625, "step": 469 }, { "epoch": 0.06549153487075873, "grad_norm": 2.381077766418457, "learning_rate": 9.160156250000001e-06, "loss": 0.09125900268554688, "step": 470 }, { "epoch": 0.06563087856197311, "grad_norm": 3.8618390560150146, "learning_rate": 9.1796875e-06, "loss": 0.14691925048828125, "step": 471 }, { "epoch": 0.06577022225318749, "grad_norm": 3.0958657264709473, "learning_rate": 9.199218750000001e-06, "loss": 0.15578842163085938, "step": 472 }, { "epoch": 0.06590956594440187, "grad_norm": 8.699210166931152, "learning_rate": 9.21875e-06, "loss": 0.16204071044921875, "step": 473 }, { "epoch": 0.06604890963561624, "grad_norm": 6.674229621887207, "learning_rate": 9.238281250000001e-06, "loss": 0.11976242065429688, "step": 474 }, { "epoch": 0.06618825332683063, "grad_norm": 5.649018287658691, "learning_rate": 9.2578125e-06, "loss": 0.15082550048828125, "step": 475 }, { "epoch": 0.06632759701804501, "grad_norm": 3.394219398498535, "learning_rate": 9.277343750000001e-06, "loss": 0.10911369323730469, "step": 476 }, { "epoch": 0.06646694070925939, "grad_norm": 6.292479038238525, "learning_rate": 9.296875e-06, "loss": 0.1534576416015625, "step": 477 }, { "epoch": 0.06660628440047377, "grad_norm": 3.742297887802124, "learning_rate": 9.316406250000002e-06, "loss": 0.09996795654296875, "step": 478 }, { "epoch": 0.06674562809168814, "grad_norm": 7.1079792976379395, "learning_rate": 9.3359375e-06, "loss": 0.18710708618164062, "step": 479 }, { "epoch": 0.06688497178290254, "grad_norm": 12.647974014282227, "learning_rate": 9.35546875e-06, "loss": 0.18231582641601562, "step": 480 }, { "epoch": 0.06702431547411691, "grad_norm": 8.408488273620605, "learning_rate": 9.375000000000001e-06, "loss": 0.16270065307617188, "step": 481 }, { "epoch": 0.06716365916533129, "grad_norm": 11.801240921020508, "learning_rate": 9.39453125e-06, "loss": 0.14190673828125, "step": 482 }, { "epoch": 0.06730300285654567, "grad_norm": 6.0307416915893555, "learning_rate": 9.414062500000001e-06, "loss": 0.18597412109375, "step": 483 }, { "epoch": 0.06744234654776005, "grad_norm": 4.593260288238525, "learning_rate": 9.43359375e-06, "loss": 0.14778900146484375, "step": 484 }, { "epoch": 0.06758169023897444, "grad_norm": 5.288080215454102, "learning_rate": 9.453125000000001e-06, "loss": 0.16090011596679688, "step": 485 }, { "epoch": 0.06772103393018881, "grad_norm": 1.872069239616394, "learning_rate": 9.47265625e-06, "loss": 0.11014747619628906, "step": 486 }, { "epoch": 0.06786037762140319, "grad_norm": 4.302060127258301, "learning_rate": 9.492187500000001e-06, "loss": 0.11877059936523438, "step": 487 }, { "epoch": 0.06799972131261757, "grad_norm": 4.9464569091796875, "learning_rate": 9.51171875e-06, "loss": 0.12305068969726562, "step": 488 }, { "epoch": 0.06813906500383195, "grad_norm": 2.3734323978424072, "learning_rate": 9.531250000000001e-06, "loss": 0.10615921020507812, "step": 489 }, { "epoch": 0.06827840869504634, "grad_norm": 14.136625289916992, "learning_rate": 9.55078125e-06, "loss": 0.14084243774414062, "step": 490 }, { "epoch": 0.06841775238626072, "grad_norm": 12.254860877990723, "learning_rate": 9.570312500000001e-06, "loss": 0.13855743408203125, "step": 491 }, { "epoch": 0.0685570960774751, "grad_norm": 3.937863349914551, "learning_rate": 9.58984375e-06, "loss": 0.1295013427734375, "step": 492 }, { "epoch": 0.06869643976868947, "grad_norm": 5.559098720550537, "learning_rate": 9.609375000000001e-06, "loss": 0.10406303405761719, "step": 493 }, { "epoch": 0.06883578345990385, "grad_norm": 3.487945318222046, "learning_rate": 9.62890625e-06, "loss": 0.21099853515625, "step": 494 }, { "epoch": 0.06897512715111823, "grad_norm": 2.7169878482818604, "learning_rate": 9.648437500000002e-06, "loss": 0.11754989624023438, "step": 495 }, { "epoch": 0.06911447084233262, "grad_norm": 3.5485458374023438, "learning_rate": 9.66796875e-06, "loss": 0.09869575500488281, "step": 496 }, { "epoch": 0.069253814533547, "grad_norm": 2.993523120880127, "learning_rate": 9.6875e-06, "loss": 0.07935905456542969, "step": 497 }, { "epoch": 0.06939315822476137, "grad_norm": 6.837823390960693, "learning_rate": 9.707031250000001e-06, "loss": 0.13780975341796875, "step": 498 }, { "epoch": 0.06953250191597575, "grad_norm": 4.282668113708496, "learning_rate": 9.7265625e-06, "loss": 0.13189315795898438, "step": 499 }, { "epoch": 0.06967184560719013, "grad_norm": 9.870418548583984, "learning_rate": 9.746093750000001e-06, "loss": 0.1633453369140625, "step": 500 }, { "epoch": 0.06981118929840452, "grad_norm": 5.6105804443359375, "learning_rate": 9.765625e-06, "loss": 0.12067794799804688, "step": 501 }, { "epoch": 0.0699505329896189, "grad_norm": 9.181720733642578, "learning_rate": 9.785156250000001e-06, "loss": 0.17663192749023438, "step": 502 }, { "epoch": 0.07008987668083327, "grad_norm": 4.199966907501221, "learning_rate": 9.8046875e-06, "loss": 0.13761520385742188, "step": 503 }, { "epoch": 0.07022922037204765, "grad_norm": 4.245943546295166, "learning_rate": 9.824218750000001e-06, "loss": 0.13050079345703125, "step": 504 }, { "epoch": 0.07036856406326203, "grad_norm": 9.801031112670898, "learning_rate": 9.84375e-06, "loss": 0.1554107666015625, "step": 505 }, { "epoch": 0.07050790775447642, "grad_norm": 11.036959648132324, "learning_rate": 9.863281250000001e-06, "loss": 0.15795516967773438, "step": 506 }, { "epoch": 0.0706472514456908, "grad_norm": 4.15101432800293, "learning_rate": 9.8828125e-06, "loss": 0.14223480224609375, "step": 507 }, { "epoch": 0.07078659513690518, "grad_norm": 10.496186256408691, "learning_rate": 9.902343750000001e-06, "loss": 0.13386917114257812, "step": 508 }, { "epoch": 0.07092593882811955, "grad_norm": 4.582158088684082, "learning_rate": 9.921875e-06, "loss": 0.10603904724121094, "step": 509 }, { "epoch": 0.07106528251933393, "grad_norm": 2.6217150688171387, "learning_rate": 9.941406250000002e-06, "loss": 0.12618255615234375, "step": 510 }, { "epoch": 0.07120462621054832, "grad_norm": 12.85193157196045, "learning_rate": 9.9609375e-06, "loss": 0.18337631225585938, "step": 511 }, { "epoch": 0.0713439699017627, "grad_norm": 11.564126968383789, "learning_rate": 9.980468750000002e-06, "loss": 0.16335678100585938, "step": 512 }, { "epoch": 0.07148331359297708, "grad_norm": 4.551219940185547, "learning_rate": 1e-05, "loss": 0.1472320556640625, "step": 513 }, { "epoch": 0.07162265728419145, "grad_norm": 3.5331125259399414, "learning_rate": 9.999999444557077e-06, "loss": 0.11478805541992188, "step": 514 }, { "epoch": 0.07176200097540583, "grad_norm": 2.5557548999786377, "learning_rate": 9.999997778228428e-06, "loss": 0.11708259582519531, "step": 515 }, { "epoch": 0.07190134466662022, "grad_norm": 11.993180274963379, "learning_rate": 9.999995001014424e-06, "loss": 0.160919189453125, "step": 516 }, { "epoch": 0.0720406883578346, "grad_norm": 15.335415840148926, "learning_rate": 9.999991112915685e-06, "loss": 0.18131637573242188, "step": 517 }, { "epoch": 0.07218003204904898, "grad_norm": 16.299345016479492, "learning_rate": 9.999986113933071e-06, "loss": 0.24446487426757812, "step": 518 }, { "epoch": 0.07231937574026336, "grad_norm": 3.6203243732452393, "learning_rate": 9.999980004067694e-06, "loss": 0.16176986694335938, "step": 519 }, { "epoch": 0.07245871943147773, "grad_norm": 2.5277347564697266, "learning_rate": 9.99997278332091e-06, "loss": 0.12752532958984375, "step": 520 }, { "epoch": 0.07259806312269212, "grad_norm": 6.865196704864502, "learning_rate": 9.999964451694328e-06, "loss": 0.15906524658203125, "step": 521 }, { "epoch": 0.0727374068139065, "grad_norm": 3.861334800720215, "learning_rate": 9.999955009189795e-06, "loss": 0.10079193115234375, "step": 522 }, { "epoch": 0.07287675050512088, "grad_norm": 5.46802282333374, "learning_rate": 9.999944455809408e-06, "loss": 0.17410659790039062, "step": 523 }, { "epoch": 0.07301609419633526, "grad_norm": 4.298651695251465, "learning_rate": 9.999932791555516e-06, "loss": 0.151336669921875, "step": 524 }, { "epoch": 0.07315543788754963, "grad_norm": 4.687849044799805, "learning_rate": 9.999920016430706e-06, "loss": 0.1749267578125, "step": 525 }, { "epoch": 0.07329478157876403, "grad_norm": 11.04818058013916, "learning_rate": 9.99990613043782e-06, "loss": 0.16864013671875, "step": 526 }, { "epoch": 0.0734341252699784, "grad_norm": 4.083409786224365, "learning_rate": 9.999891133579941e-06, "loss": 0.10359954833984375, "step": 527 }, { "epoch": 0.07357346896119278, "grad_norm": 2.7435271739959717, "learning_rate": 9.999875025860401e-06, "loss": 0.12073898315429688, "step": 528 }, { "epoch": 0.07371281265240716, "grad_norm": 4.880257606506348, "learning_rate": 9.99985780728278e-06, "loss": 0.221710205078125, "step": 529 }, { "epoch": 0.07385215634362154, "grad_norm": 7.100545883178711, "learning_rate": 9.999839477850903e-06, "loss": 0.12413787841796875, "step": 530 }, { "epoch": 0.07399150003483593, "grad_norm": 3.207688093185425, "learning_rate": 9.999820037568844e-06, "loss": 0.11678695678710938, "step": 531 }, { "epoch": 0.0741308437260503, "grad_norm": 3.9212756156921387, "learning_rate": 9.999799486440917e-06, "loss": 0.15315628051757812, "step": 532 }, { "epoch": 0.07427018741726468, "grad_norm": 3.043498992919922, "learning_rate": 9.999777824471694e-06, "loss": 0.09430503845214844, "step": 533 }, { "epoch": 0.07440953110847906, "grad_norm": 4.625790596008301, "learning_rate": 9.999755051665985e-06, "loss": 0.09774589538574219, "step": 534 }, { "epoch": 0.07454887479969344, "grad_norm": 9.962725639343262, "learning_rate": 9.99973116802885e-06, "loss": 0.13122177124023438, "step": 535 }, { "epoch": 0.07468821849090783, "grad_norm": 4.982052803039551, "learning_rate": 9.999706173565594e-06, "loss": 0.10751724243164062, "step": 536 }, { "epoch": 0.0748275621821222, "grad_norm": 8.14294147491455, "learning_rate": 9.999680068281773e-06, "loss": 0.13558387756347656, "step": 537 }, { "epoch": 0.07496690587333658, "grad_norm": 3.4529833793640137, "learning_rate": 9.999652852183184e-06, "loss": 0.1653289794921875, "step": 538 }, { "epoch": 0.07510624956455096, "grad_norm": 20.135299682617188, "learning_rate": 9.999624525275875e-06, "loss": 0.18907546997070312, "step": 539 }, { "epoch": 0.07524559325576534, "grad_norm": 16.93981170654297, "learning_rate": 9.99959508756614e-06, "loss": 0.17448806762695312, "step": 540 }, { "epoch": 0.07538493694697973, "grad_norm": 5.9071736335754395, "learning_rate": 9.99956453906052e-06, "loss": 0.10242271423339844, "step": 541 }, { "epoch": 0.07552428063819411, "grad_norm": 2.2539429664611816, "learning_rate": 9.999532879765801e-06, "loss": 0.134735107421875, "step": 542 }, { "epoch": 0.07566362432940849, "grad_norm": 7.951721668243408, "learning_rate": 9.999500109689018e-06, "loss": 0.1572704315185547, "step": 543 }, { "epoch": 0.07580296802062286, "grad_norm": 5.617295265197754, "learning_rate": 9.999466228837452e-06, "loss": 0.09231948852539062, "step": 544 }, { "epoch": 0.07594231171183724, "grad_norm": 4.809814453125, "learning_rate": 9.999431237218629e-06, "loss": 0.12733078002929688, "step": 545 }, { "epoch": 0.07608165540305163, "grad_norm": 4.948379039764404, "learning_rate": 9.999395134840323e-06, "loss": 0.16418838500976562, "step": 546 }, { "epoch": 0.07622099909426601, "grad_norm": 2.957818031311035, "learning_rate": 9.999357921710557e-06, "loss": 0.08749008178710938, "step": 547 }, { "epoch": 0.07636034278548039, "grad_norm": 9.300163269042969, "learning_rate": 9.999319597837599e-06, "loss": 0.16336441040039062, "step": 548 }, { "epoch": 0.07649968647669476, "grad_norm": 5.376430034637451, "learning_rate": 9.99928016322996e-06, "loss": 0.14243125915527344, "step": 549 }, { "epoch": 0.07663903016790914, "grad_norm": 5.098387241363525, "learning_rate": 9.999239617896406e-06, "loss": 0.1824798583984375, "step": 550 }, { "epoch": 0.07677837385912353, "grad_norm": 2.710820436477661, "learning_rate": 9.999197961845943e-06, "loss": 0.1353740692138672, "step": 551 }, { "epoch": 0.07691771755033791, "grad_norm": 11.104676246643066, "learning_rate": 9.999155195087826e-06, "loss": 0.15334129333496094, "step": 552 }, { "epoch": 0.07705706124155229, "grad_norm": 9.511804580688477, "learning_rate": 9.999111317631559e-06, "loss": 0.16564178466796875, "step": 553 }, { "epoch": 0.07719640493276667, "grad_norm": 26.391714096069336, "learning_rate": 9.999066329486888e-06, "loss": 0.22072982788085938, "step": 554 }, { "epoch": 0.07733574862398104, "grad_norm": 1.6427117586135864, "learning_rate": 9.999020230663809e-06, "loss": 0.08692741394042969, "step": 555 }, { "epoch": 0.07747509231519543, "grad_norm": 6.634900093078613, "learning_rate": 9.998973021172564e-06, "loss": 0.13315582275390625, "step": 556 }, { "epoch": 0.07761443600640981, "grad_norm": 2.292752265930176, "learning_rate": 9.998924701023645e-06, "loss": 0.1636505126953125, "step": 557 }, { "epoch": 0.07775377969762419, "grad_norm": 2.5345330238342285, "learning_rate": 9.998875270227781e-06, "loss": 0.14969253540039062, "step": 558 }, { "epoch": 0.07789312338883857, "grad_norm": 1.9385415315628052, "learning_rate": 9.99882472879596e-06, "loss": 0.11063003540039062, "step": 559 }, { "epoch": 0.07803246708005294, "grad_norm": 2.1842827796936035, "learning_rate": 9.998773076739409e-06, "loss": 0.1460552215576172, "step": 560 }, { "epoch": 0.07817181077126734, "grad_norm": 4.729969024658203, "learning_rate": 9.998720314069606e-06, "loss": 0.1074676513671875, "step": 561 }, { "epoch": 0.07831115446248171, "grad_norm": 9.703096389770508, "learning_rate": 9.99866644079827e-06, "loss": 0.19107437133789062, "step": 562 }, { "epoch": 0.07845049815369609, "grad_norm": 8.122536659240723, "learning_rate": 9.998611456937373e-06, "loss": 0.16602325439453125, "step": 563 }, { "epoch": 0.07858984184491047, "grad_norm": 8.987259864807129, "learning_rate": 9.99855536249913e-06, "loss": 0.2034473419189453, "step": 564 }, { "epoch": 0.07872918553612485, "grad_norm": 5.7487897872924805, "learning_rate": 9.998498157496004e-06, "loss": 0.14210128784179688, "step": 565 }, { "epoch": 0.07886852922733924, "grad_norm": 3.7944343090057373, "learning_rate": 9.998439841940706e-06, "loss": 0.09049224853515625, "step": 566 }, { "epoch": 0.07900787291855361, "grad_norm": 6.10442590713501, "learning_rate": 9.998380415846191e-06, "loss": 0.142669677734375, "step": 567 }, { "epoch": 0.07914721660976799, "grad_norm": 7.3212127685546875, "learning_rate": 9.998319879225662e-06, "loss": 0.21024703979492188, "step": 568 }, { "epoch": 0.07928656030098237, "grad_norm": 4.610320568084717, "learning_rate": 9.998258232092571e-06, "loss": 0.16867446899414062, "step": 569 }, { "epoch": 0.07942590399219675, "grad_norm": 8.706377029418945, "learning_rate": 9.998195474460613e-06, "loss": 0.11806106567382812, "step": 570 }, { "epoch": 0.07956524768341114, "grad_norm": 9.971480369567871, "learning_rate": 9.998131606343729e-06, "loss": 0.15869140625, "step": 571 }, { "epoch": 0.07970459137462552, "grad_norm": 6.423381805419922, "learning_rate": 9.998066627756113e-06, "loss": 0.1387958526611328, "step": 572 }, { "epoch": 0.0798439350658399, "grad_norm": 5.203154563903809, "learning_rate": 9.9980005387122e-06, "loss": 0.1467132568359375, "step": 573 }, { "epoch": 0.07998327875705427, "grad_norm": 4.141793727874756, "learning_rate": 9.997933339226675e-06, "loss": 0.15495681762695312, "step": 574 }, { "epoch": 0.08012262244826865, "grad_norm": 4.447422027587891, "learning_rate": 9.997865029314464e-06, "loss": 0.17770767211914062, "step": 575 }, { "epoch": 0.08026196613948304, "grad_norm": 5.540290832519531, "learning_rate": 9.997795608990749e-06, "loss": 0.20757293701171875, "step": 576 }, { "epoch": 0.08040130983069742, "grad_norm": 7.423814296722412, "learning_rate": 9.99772507827095e-06, "loss": 0.1262035369873047, "step": 577 }, { "epoch": 0.0805406535219118, "grad_norm": 2.8121798038482666, "learning_rate": 9.997653437170739e-06, "loss": 0.13526535034179688, "step": 578 }, { "epoch": 0.08067999721312617, "grad_norm": 3.3012561798095703, "learning_rate": 9.997580685706032e-06, "loss": 0.11421966552734375, "step": 579 }, { "epoch": 0.08081934090434055, "grad_norm": 3.5328261852264404, "learning_rate": 9.997506823892993e-06, "loss": 0.11431503295898438, "step": 580 }, { "epoch": 0.08095868459555494, "grad_norm": 2.2115132808685303, "learning_rate": 9.997431851748034e-06, "loss": 0.12911605834960938, "step": 581 }, { "epoch": 0.08109802828676932, "grad_norm": 2.7331326007843018, "learning_rate": 9.99735576928781e-06, "loss": 0.11686325073242188, "step": 582 }, { "epoch": 0.0812373719779837, "grad_norm": 6.25287389755249, "learning_rate": 9.997278576529228e-06, "loss": 0.16445541381835938, "step": 583 }, { "epoch": 0.08137671566919807, "grad_norm": 3.2421278953552246, "learning_rate": 9.997200273489434e-06, "loss": 0.11030769348144531, "step": 584 }, { "epoch": 0.08151605936041245, "grad_norm": 1.7862628698349, "learning_rate": 9.997120860185827e-06, "loss": 0.09953689575195312, "step": 585 }, { "epoch": 0.08165540305162684, "grad_norm": 2.4111883640289307, "learning_rate": 9.997040336636052e-06, "loss": 0.10691356658935547, "step": 586 }, { "epoch": 0.08179474674284122, "grad_norm": 7.026747226715088, "learning_rate": 9.996958702857997e-06, "loss": 0.14279937744140625, "step": 587 }, { "epoch": 0.0819340904340556, "grad_norm": 6.804615020751953, "learning_rate": 9.996875958869803e-06, "loss": 0.10869598388671875, "step": 588 }, { "epoch": 0.08207343412526998, "grad_norm": 2.0449166297912598, "learning_rate": 9.996792104689849e-06, "loss": 0.11845016479492188, "step": 589 }, { "epoch": 0.08221277781648435, "grad_norm": 4.826764106750488, "learning_rate": 9.99670714033677e-06, "loss": 0.10760498046875, "step": 590 }, { "epoch": 0.08235212150769874, "grad_norm": 4.910223484039307, "learning_rate": 9.996621065829442e-06, "loss": 0.13116836547851562, "step": 591 }, { "epoch": 0.08249146519891312, "grad_norm": 2.515676975250244, "learning_rate": 9.996533881186986e-06, "loss": 0.11487579345703125, "step": 592 }, { "epoch": 0.0826308088901275, "grad_norm": 7.005797386169434, "learning_rate": 9.996445586428776e-06, "loss": 0.13048553466796875, "step": 593 }, { "epoch": 0.08277015258134188, "grad_norm": 8.14521598815918, "learning_rate": 9.996356181574425e-06, "loss": 0.11243820190429688, "step": 594 }, { "epoch": 0.08290949627255625, "grad_norm": 4.09276819229126, "learning_rate": 9.9962656666438e-06, "loss": 0.13774871826171875, "step": 595 }, { "epoch": 0.08304883996377065, "grad_norm": 8.499027252197266, "learning_rate": 9.996174041657012e-06, "loss": 0.1202850341796875, "step": 596 }, { "epoch": 0.08318818365498502, "grad_norm": 4.358172416687012, "learning_rate": 9.996081306634416e-06, "loss": 0.11732101440429688, "step": 597 }, { "epoch": 0.0833275273461994, "grad_norm": 2.907266139984131, "learning_rate": 9.995987461596617e-06, "loss": 0.10545921325683594, "step": 598 }, { "epoch": 0.08346687103741378, "grad_norm": 2.4421913623809814, "learning_rate": 9.995892506564461e-06, "loss": 0.1113739013671875, "step": 599 }, { "epoch": 0.08360621472862816, "grad_norm": 1.641378402709961, "learning_rate": 9.995796441559052e-06, "loss": 0.11544990539550781, "step": 600 }, { "epoch": 0.08374555841984255, "grad_norm": 4.494574069976807, "learning_rate": 9.995699266601728e-06, "loss": 0.10565567016601562, "step": 601 }, { "epoch": 0.08388490211105692, "grad_norm": 2.572431802749634, "learning_rate": 9.995600981714082e-06, "loss": 0.11953926086425781, "step": 602 }, { "epoch": 0.0840242458022713, "grad_norm": 2.970099925994873, "learning_rate": 9.995501586917949e-06, "loss": 0.1225137710571289, "step": 603 }, { "epoch": 0.08416358949348568, "grad_norm": 4.182504177093506, "learning_rate": 9.99540108223541e-06, "loss": 0.10535049438476562, "step": 604 }, { "epoch": 0.08430293318470006, "grad_norm": 1.588797688484192, "learning_rate": 9.9952994676888e-06, "loss": 0.07065010070800781, "step": 605 }, { "epoch": 0.08444227687591445, "grad_norm": 3.636584997177124, "learning_rate": 9.995196743300693e-06, "loss": 0.1547698974609375, "step": 606 }, { "epoch": 0.08458162056712883, "grad_norm": 3.1652610301971436, "learning_rate": 9.995092909093911e-06, "loss": 0.12047958374023438, "step": 607 }, { "epoch": 0.0847209642583432, "grad_norm": 3.8959109783172607, "learning_rate": 9.994987965091525e-06, "loss": 0.11340999603271484, "step": 608 }, { "epoch": 0.08486030794955758, "grad_norm": 8.113880157470703, "learning_rate": 9.994881911316849e-06, "loss": 0.1307373046875, "step": 609 }, { "epoch": 0.08499965164077196, "grad_norm": 2.1812193393707275, "learning_rate": 9.99477474779345e-06, "loss": 0.10513496398925781, "step": 610 }, { "epoch": 0.08513899533198635, "grad_norm": 5.736509799957275, "learning_rate": 9.994666474545133e-06, "loss": 0.12813568115234375, "step": 611 }, { "epoch": 0.08527833902320073, "grad_norm": 11.081178665161133, "learning_rate": 9.994557091595956e-06, "loss": 0.13549423217773438, "step": 612 }, { "epoch": 0.0854176827144151, "grad_norm": 1.4614448547363281, "learning_rate": 9.99444659897022e-06, "loss": 0.10453414916992188, "step": 613 }, { "epoch": 0.08555702640562948, "grad_norm": 2.6289451122283936, "learning_rate": 9.994334996692476e-06, "loss": 0.12312126159667969, "step": 614 }, { "epoch": 0.08569637009684386, "grad_norm": 6.972894668579102, "learning_rate": 9.994222284787519e-06, "loss": 0.14410781860351562, "step": 615 }, { "epoch": 0.08583571378805825, "grad_norm": 6.38238000869751, "learning_rate": 9.99410846328039e-06, "loss": 0.12541580200195312, "step": 616 }, { "epoch": 0.08597505747927263, "grad_norm": 3.311662435531616, "learning_rate": 9.993993532196376e-06, "loss": 0.11568641662597656, "step": 617 }, { "epoch": 0.086114401170487, "grad_norm": 2.6507761478424072, "learning_rate": 9.993877491561015e-06, "loss": 0.11964797973632812, "step": 618 }, { "epoch": 0.08625374486170138, "grad_norm": 5.723592281341553, "learning_rate": 9.99376034140009e-06, "loss": 0.10979461669921875, "step": 619 }, { "epoch": 0.08639308855291576, "grad_norm": 2.193638801574707, "learning_rate": 9.993642081739623e-06, "loss": 0.0973968505859375, "step": 620 }, { "epoch": 0.08653243224413015, "grad_norm": 4.534931659698486, "learning_rate": 9.993522712605895e-06, "loss": 0.11529541015625, "step": 621 }, { "epoch": 0.08667177593534453, "grad_norm": 3.126666784286499, "learning_rate": 9.993402234025422e-06, "loss": 0.11991119384765625, "step": 622 }, { "epoch": 0.08681111962655891, "grad_norm": 11.232254981994629, "learning_rate": 9.993280646024975e-06, "loss": 0.17949867248535156, "step": 623 }, { "epoch": 0.08695046331777329, "grad_norm": 8.165175437927246, "learning_rate": 9.993157948631566e-06, "loss": 0.14517974853515625, "step": 624 }, { "epoch": 0.08708980700898766, "grad_norm": 4.896198749542236, "learning_rate": 9.993034141872459e-06, "loss": 0.1824817657470703, "step": 625 }, { "epoch": 0.08722915070020205, "grad_norm": 4.815464019775391, "learning_rate": 9.992909225775157e-06, "loss": 0.12293434143066406, "step": 626 }, { "epoch": 0.08736849439141643, "grad_norm": 4.758384704589844, "learning_rate": 9.992783200367414e-06, "loss": 0.16939926147460938, "step": 627 }, { "epoch": 0.08750783808263081, "grad_norm": 3.65385365486145, "learning_rate": 9.992656065677234e-06, "loss": 0.15931129455566406, "step": 628 }, { "epoch": 0.08764718177384519, "grad_norm": 2.565504550933838, "learning_rate": 9.992527821732858e-06, "loss": 0.08680915832519531, "step": 629 }, { "epoch": 0.08778652546505956, "grad_norm": 1.476867437362671, "learning_rate": 9.992398468562782e-06, "loss": 0.09166526794433594, "step": 630 }, { "epoch": 0.08792586915627396, "grad_norm": 3.324653148651123, "learning_rate": 9.992268006195744e-06, "loss": 0.13250350952148438, "step": 631 }, { "epoch": 0.08806521284748833, "grad_norm": 1.8665456771850586, "learning_rate": 9.992136434660733e-06, "loss": 0.12223434448242188, "step": 632 }, { "epoch": 0.08820455653870271, "grad_norm": 2.4213273525238037, "learning_rate": 9.992003753986976e-06, "loss": 0.12194061279296875, "step": 633 }, { "epoch": 0.08834390022991709, "grad_norm": 2.348762273788452, "learning_rate": 9.991869964203955e-06, "loss": 0.11362457275390625, "step": 634 }, { "epoch": 0.08848324392113147, "grad_norm": 6.482764720916748, "learning_rate": 9.991735065341394e-06, "loss": 0.126312255859375, "step": 635 }, { "epoch": 0.08862258761234586, "grad_norm": 4.5020856857299805, "learning_rate": 9.991599057429266e-06, "loss": 0.09481620788574219, "step": 636 }, { "epoch": 0.08876193130356023, "grad_norm": 6.6450300216674805, "learning_rate": 9.991461940497786e-06, "loss": 0.19031143188476562, "step": 637 }, { "epoch": 0.08890127499477461, "grad_norm": 5.3376288414001465, "learning_rate": 9.991323714577421e-06, "loss": 0.12372589111328125, "step": 638 }, { "epoch": 0.08904061868598899, "grad_norm": 2.935049057006836, "learning_rate": 9.99118437969888e-06, "loss": 0.11130905151367188, "step": 639 }, { "epoch": 0.08917996237720337, "grad_norm": 3.831725597381592, "learning_rate": 9.99104393589312e-06, "loss": 0.13283157348632812, "step": 640 }, { "epoch": 0.08931930606841776, "grad_norm": 5.889954566955566, "learning_rate": 9.990902383191346e-06, "loss": 0.16871070861816406, "step": 641 }, { "epoch": 0.08945864975963214, "grad_norm": 1.6011278629302979, "learning_rate": 9.990759721625005e-06, "loss": 0.10799789428710938, "step": 642 }, { "epoch": 0.08959799345084651, "grad_norm": 3.7214303016662598, "learning_rate": 9.990615951225797e-06, "loss": 0.10919952392578125, "step": 643 }, { "epoch": 0.08973733714206089, "grad_norm": 6.151251792907715, "learning_rate": 9.99047107202566e-06, "loss": 0.1583404541015625, "step": 644 }, { "epoch": 0.08987668083327527, "grad_norm": 3.03200364112854, "learning_rate": 9.990325084056787e-06, "loss": 0.1358489990234375, "step": 645 }, { "epoch": 0.09001602452448966, "grad_norm": 4.519227981567383, "learning_rate": 9.99017798735161e-06, "loss": 0.10815715789794922, "step": 646 }, { "epoch": 0.09015536821570404, "grad_norm": 1.4755104780197144, "learning_rate": 9.990029781942814e-06, "loss": 0.10350418090820312, "step": 647 }, { "epoch": 0.09029471190691842, "grad_norm": 5.036289691925049, "learning_rate": 9.989880467863323e-06, "loss": 0.15119552612304688, "step": 648 }, { "epoch": 0.09043405559813279, "grad_norm": 6.2971577644348145, "learning_rate": 9.989730045146313e-06, "loss": 0.11263084411621094, "step": 649 }, { "epoch": 0.09057339928934717, "grad_norm": 9.330389022827148, "learning_rate": 9.989578513825205e-06, "loss": 0.22081661224365234, "step": 650 }, { "epoch": 0.09071274298056156, "grad_norm": 6.744177341461182, "learning_rate": 9.989425873933666e-06, "loss": 0.13211631774902344, "step": 651 }, { "epoch": 0.09085208667177594, "grad_norm": 4.539707183837891, "learning_rate": 9.989272125505606e-06, "loss": 0.10669136047363281, "step": 652 }, { "epoch": 0.09099143036299032, "grad_norm": 2.2672312259674072, "learning_rate": 9.98911726857519e-06, "loss": 0.10796356201171875, "step": 653 }, { "epoch": 0.0911307740542047, "grad_norm": 7.819256782531738, "learning_rate": 9.988961303176818e-06, "loss": 0.1518096923828125, "step": 654 }, { "epoch": 0.09127011774541907, "grad_norm": 11.08742618560791, "learning_rate": 9.988804229345146e-06, "loss": 0.14454269409179688, "step": 655 }, { "epoch": 0.09140946143663346, "grad_norm": 9.333878517150879, "learning_rate": 9.98864604711507e-06, "loss": 0.12226295471191406, "step": 656 }, { "epoch": 0.09154880512784784, "grad_norm": 3.9583966732025146, "learning_rate": 9.988486756521733e-06, "loss": 0.15987348556518555, "step": 657 }, { "epoch": 0.09168814881906222, "grad_norm": 5.4428887367248535, "learning_rate": 9.98832635760053e-06, "loss": 0.09360122680664062, "step": 658 }, { "epoch": 0.0918274925102766, "grad_norm": 10.172745704650879, "learning_rate": 9.988164850387095e-06, "loss": 0.16600418090820312, "step": 659 }, { "epoch": 0.09196683620149097, "grad_norm": 19.268810272216797, "learning_rate": 9.988002234917312e-06, "loss": 0.25562477111816406, "step": 660 }, { "epoch": 0.09210617989270536, "grad_norm": 4.2740583419799805, "learning_rate": 9.987838511227311e-06, "loss": 0.12265396118164062, "step": 661 }, { "epoch": 0.09224552358391974, "grad_norm": 2.3952720165252686, "learning_rate": 9.987673679353467e-06, "loss": 0.11246776580810547, "step": 662 }, { "epoch": 0.09238486727513412, "grad_norm": 4.9181365966796875, "learning_rate": 9.987507739332401e-06, "loss": 0.14954757690429688, "step": 663 }, { "epoch": 0.0925242109663485, "grad_norm": 1.9089114665985107, "learning_rate": 9.987340691200984e-06, "loss": 0.08322906494140625, "step": 664 }, { "epoch": 0.09266355465756287, "grad_norm": 1.9534562826156616, "learning_rate": 9.987172534996326e-06, "loss": 0.11472702026367188, "step": 665 }, { "epoch": 0.09280289834877727, "grad_norm": 1.6391056776046753, "learning_rate": 9.98700327075579e-06, "loss": 0.0764017105102539, "step": 666 }, { "epoch": 0.09294224203999164, "grad_norm": 2.9994139671325684, "learning_rate": 9.986832898516985e-06, "loss": 0.12739944458007812, "step": 667 }, { "epoch": 0.09308158573120602, "grad_norm": 5.459787368774414, "learning_rate": 9.986661418317759e-06, "loss": 0.15485000610351562, "step": 668 }, { "epoch": 0.0932209294224204, "grad_norm": 7.138373851776123, "learning_rate": 9.986488830196215e-06, "loss": 0.13676834106445312, "step": 669 }, { "epoch": 0.09336027311363478, "grad_norm": 5.530710220336914, "learning_rate": 9.986315134190694e-06, "loss": 0.08398246765136719, "step": 670 }, { "epoch": 0.09349961680484917, "grad_norm": 5.0010199546813965, "learning_rate": 9.98614033033979e-06, "loss": 0.1752490997314453, "step": 671 }, { "epoch": 0.09363896049606354, "grad_norm": 8.643370628356934, "learning_rate": 9.985964418682342e-06, "loss": 0.15865325927734375, "step": 672 }, { "epoch": 0.09377830418727792, "grad_norm": 2.543217658996582, "learning_rate": 9.985787399257431e-06, "loss": 0.08936691284179688, "step": 673 }, { "epoch": 0.0939176478784923, "grad_norm": 6.790078163146973, "learning_rate": 9.985609272104387e-06, "loss": 0.16419410705566406, "step": 674 }, { "epoch": 0.09405699156970668, "grad_norm": 7.764473915100098, "learning_rate": 9.985430037262787e-06, "loss": 0.16870880126953125, "step": 675 }, { "epoch": 0.09419633526092107, "grad_norm": 2.465524196624756, "learning_rate": 9.98524969477245e-06, "loss": 0.12793922424316406, "step": 676 }, { "epoch": 0.09433567895213545, "grad_norm": 1.9273732900619507, "learning_rate": 9.985068244673449e-06, "loss": 0.08533477783203125, "step": 677 }, { "epoch": 0.09447502264334982, "grad_norm": 4.097898006439209, "learning_rate": 9.984885687006093e-06, "loss": 0.18805313110351562, "step": 678 }, { "epoch": 0.0946143663345642, "grad_norm": 4.169610977172852, "learning_rate": 9.984702021810944e-06, "loss": 0.16600799560546875, "step": 679 }, { "epoch": 0.09475371002577858, "grad_norm": 3.0349280834198, "learning_rate": 9.98451724912881e-06, "loss": 0.12169075012207031, "step": 680 }, { "epoch": 0.09489305371699297, "grad_norm": 2.790010452270508, "learning_rate": 9.984331369000739e-06, "loss": 0.1151123046875, "step": 681 }, { "epoch": 0.09503239740820735, "grad_norm": 2.6838719844818115, "learning_rate": 9.984144381468035e-06, "loss": 0.10231590270996094, "step": 682 }, { "epoch": 0.09517174109942172, "grad_norm": 3.075143337249756, "learning_rate": 9.983956286572238e-06, "loss": 0.15852737426757812, "step": 683 }, { "epoch": 0.0953110847906361, "grad_norm": 2.2006821632385254, "learning_rate": 9.983767084355141e-06, "loss": 0.10262298583984375, "step": 684 }, { "epoch": 0.09545042848185048, "grad_norm": 1.0452128648757935, "learning_rate": 9.983576774858776e-06, "loss": 0.08076286315917969, "step": 685 }, { "epoch": 0.09558977217306487, "grad_norm": 5.127438545227051, "learning_rate": 9.983385358125432e-06, "loss": 0.10265159606933594, "step": 686 }, { "epoch": 0.09572911586427925, "grad_norm": 11.134477615356445, "learning_rate": 9.983192834197633e-06, "loss": 0.1519794464111328, "step": 687 }, { "epoch": 0.09586845955549363, "grad_norm": 3.5396952629089355, "learning_rate": 9.982999203118153e-06, "loss": 0.096710205078125, "step": 688 }, { "epoch": 0.096007803246708, "grad_norm": 2.603250503540039, "learning_rate": 9.982804464930016e-06, "loss": 0.14869308471679688, "step": 689 }, { "epoch": 0.09614714693792238, "grad_norm": 7.991442680358887, "learning_rate": 9.982608619676485e-06, "loss": 0.14873313903808594, "step": 690 }, { "epoch": 0.09628649062913676, "grad_norm": 4.0060648918151855, "learning_rate": 9.982411667401076e-06, "loss": 0.12914085388183594, "step": 691 }, { "epoch": 0.09642583432035115, "grad_norm": 5.904354572296143, "learning_rate": 9.982213608147541e-06, "loss": 0.18931961059570312, "step": 692 }, { "epoch": 0.09656517801156553, "grad_norm": 3.569946050643921, "learning_rate": 9.982014441959891e-06, "loss": 0.1266040802001953, "step": 693 }, { "epoch": 0.0967045217027799, "grad_norm": 2.8597095012664795, "learning_rate": 9.98181416888237e-06, "loss": 0.11122512817382812, "step": 694 }, { "epoch": 0.09684386539399428, "grad_norm": 4.030284404754639, "learning_rate": 9.981612788959481e-06, "loss": 0.12308502197265625, "step": 695 }, { "epoch": 0.09698320908520866, "grad_norm": 4.084993362426758, "learning_rate": 9.981410302235962e-06, "loss": 0.1272125244140625, "step": 696 }, { "epoch": 0.09712255277642305, "grad_norm": 4.249486923217773, "learning_rate": 9.9812067087568e-06, "loss": 0.14068603515625, "step": 697 }, { "epoch": 0.09726189646763743, "grad_norm": 3.554090976715088, "learning_rate": 9.98100200856723e-06, "loss": 0.12052154541015625, "step": 698 }, { "epoch": 0.0974012401588518, "grad_norm": 1.5964144468307495, "learning_rate": 9.980796201712734e-06, "loss": 0.11322021484375, "step": 699 }, { "epoch": 0.09754058385006618, "grad_norm": 2.088028907775879, "learning_rate": 9.980589288239034e-06, "loss": 0.12884140014648438, "step": 700 }, { "epoch": 0.09767992754128056, "grad_norm": 1.8859295845031738, "learning_rate": 9.980381268192103e-06, "loss": 0.11065673828125, "step": 701 }, { "epoch": 0.09781927123249495, "grad_norm": 2.710925817489624, "learning_rate": 9.980172141618159e-06, "loss": 0.1462249755859375, "step": 702 }, { "epoch": 0.09795861492370933, "grad_norm": 1.987047791481018, "learning_rate": 9.979961908563663e-06, "loss": 0.10261154174804688, "step": 703 }, { "epoch": 0.09809795861492371, "grad_norm": 2.691478729248047, "learning_rate": 9.979750569075325e-06, "loss": 0.11887550354003906, "step": 704 }, { "epoch": 0.09823730230613809, "grad_norm": 2.0406556129455566, "learning_rate": 9.979538123200102e-06, "loss": 0.14940452575683594, "step": 705 }, { "epoch": 0.09837664599735246, "grad_norm": 2.765416145324707, "learning_rate": 9.979324570985194e-06, "loss": 0.11402130126953125, "step": 706 }, { "epoch": 0.09851598968856685, "grad_norm": 1.2252438068389893, "learning_rate": 9.979109912478044e-06, "loss": 0.11532974243164062, "step": 707 }, { "epoch": 0.09865533337978123, "grad_norm": 3.5358312129974365, "learning_rate": 9.978894147726346e-06, "loss": 0.19122695922851562, "step": 708 }, { "epoch": 0.09879467707099561, "grad_norm": 1.0849413871765137, "learning_rate": 9.97867727677804e-06, "loss": 0.08762741088867188, "step": 709 }, { "epoch": 0.09893402076220999, "grad_norm": 10.553237915039062, "learning_rate": 9.978459299681306e-06, "loss": 0.171905517578125, "step": 710 }, { "epoch": 0.09907336445342436, "grad_norm": 1.571395993232727, "learning_rate": 9.978240216484579e-06, "loss": 0.09839820861816406, "step": 711 }, { "epoch": 0.09921270814463876, "grad_norm": 1.7810089588165283, "learning_rate": 9.978020027236529e-06, "loss": 0.1112060546875, "step": 712 }, { "epoch": 0.09935205183585313, "grad_norm": 2.894700527191162, "learning_rate": 9.977798731986079e-06, "loss": 0.08604049682617188, "step": 713 }, { "epoch": 0.09949139552706751, "grad_norm": 1.9100863933563232, "learning_rate": 9.977576330782397e-06, "loss": 0.15811538696289062, "step": 714 }, { "epoch": 0.09963073921828189, "grad_norm": 1.23178231716156, "learning_rate": 9.977352823674893e-06, "loss": 0.1030120849609375, "step": 715 }, { "epoch": 0.09977008290949627, "grad_norm": 5.987951755523682, "learning_rate": 9.977128210713227e-06, "loss": 0.14347457885742188, "step": 716 }, { "epoch": 0.09990942660071066, "grad_norm": 4.097415447235107, "learning_rate": 9.976902491947303e-06, "loss": 0.12700653076171875, "step": 717 }, { "epoch": 0.10004877029192503, "grad_norm": 3.517375946044922, "learning_rate": 9.976675667427268e-06, "loss": 0.11352348327636719, "step": 718 }, { "epoch": 0.10018811398313941, "grad_norm": 13.318920135498047, "learning_rate": 9.976447737203521e-06, "loss": 0.19477081298828125, "step": 719 }, { "epoch": 0.10032745767435379, "grad_norm": 4.648531436920166, "learning_rate": 9.976218701326701e-06, "loss": 0.11558341979980469, "step": 720 }, { "epoch": 0.10046680136556817, "grad_norm": 1.5700478553771973, "learning_rate": 9.975988559847693e-06, "loss": 0.13268280029296875, "step": 721 }, { "epoch": 0.10060614505678256, "grad_norm": 2.5926222801208496, "learning_rate": 9.975757312817634e-06, "loss": 0.08626365661621094, "step": 722 }, { "epoch": 0.10074548874799694, "grad_norm": 2.19411563873291, "learning_rate": 9.975524960287895e-06, "loss": 0.08205032348632812, "step": 723 }, { "epoch": 0.10088483243921131, "grad_norm": 2.301260471343994, "learning_rate": 9.975291502310105e-06, "loss": 0.1022176742553711, "step": 724 }, { "epoch": 0.10102417613042569, "grad_norm": 1.3015631437301636, "learning_rate": 9.975056938936129e-06, "loss": 0.07771682739257812, "step": 725 }, { "epoch": 0.10116351982164007, "grad_norm": 4.362000465393066, "learning_rate": 9.974821270218086e-06, "loss": 0.22386550903320312, "step": 726 }, { "epoch": 0.10130286351285446, "grad_norm": 2.7601752281188965, "learning_rate": 9.974584496208334e-06, "loss": 0.09568595886230469, "step": 727 }, { "epoch": 0.10144220720406884, "grad_norm": 5.341733455657959, "learning_rate": 9.974346616959476e-06, "loss": 0.15967559814453125, "step": 728 }, { "epoch": 0.10158155089528322, "grad_norm": 1.4235759973526, "learning_rate": 9.974107632524368e-06, "loss": 0.12837600708007812, "step": 729 }, { "epoch": 0.10172089458649759, "grad_norm": 2.1577038764953613, "learning_rate": 9.973867542956104e-06, "loss": 0.11235618591308594, "step": 730 }, { "epoch": 0.10186023827771197, "grad_norm": 2.738220453262329, "learning_rate": 9.973626348308027e-06, "loss": 0.10859107971191406, "step": 731 }, { "epoch": 0.10199958196892636, "grad_norm": 3.266761541366577, "learning_rate": 9.973384048633728e-06, "loss": 0.11774444580078125, "step": 732 }, { "epoch": 0.10213892566014074, "grad_norm": 2.031468391418457, "learning_rate": 9.973140643987034e-06, "loss": 0.0803375244140625, "step": 733 }, { "epoch": 0.10227826935135512, "grad_norm": 4.062684535980225, "learning_rate": 9.97289613442203e-06, "loss": 0.15354156494140625, "step": 734 }, { "epoch": 0.1024176130425695, "grad_norm": 5.676593780517578, "learning_rate": 9.972650519993037e-06, "loss": 0.12801742553710938, "step": 735 }, { "epoch": 0.10255695673378387, "grad_norm": 4.770260810852051, "learning_rate": 9.972403800754626e-06, "loss": 0.12137031555175781, "step": 736 }, { "epoch": 0.10269630042499826, "grad_norm": 2.7721476554870605, "learning_rate": 9.972155976761613e-06, "loss": 0.12109184265136719, "step": 737 }, { "epoch": 0.10283564411621264, "grad_norm": 2.6401052474975586, "learning_rate": 9.971907048069058e-06, "loss": 0.11466407775878906, "step": 738 }, { "epoch": 0.10297498780742702, "grad_norm": 1.8589903116226196, "learning_rate": 9.971657014732268e-06, "loss": 0.14503860473632812, "step": 739 }, { "epoch": 0.1031143314986414, "grad_norm": 3.0074260234832764, "learning_rate": 9.971405876806792e-06, "loss": 0.09781837463378906, "step": 740 }, { "epoch": 0.10325367518985577, "grad_norm": 2.2141001224517822, "learning_rate": 9.971153634348431e-06, "loss": 0.09088134765625, "step": 741 }, { "epoch": 0.10339301888107016, "grad_norm": 1.5630972385406494, "learning_rate": 9.970900287413225e-06, "loss": 0.13116455078125, "step": 742 }, { "epoch": 0.10353236257228454, "grad_norm": 1.9413392543792725, "learning_rate": 9.970645836057464e-06, "loss": 0.11499977111816406, "step": 743 }, { "epoch": 0.10367170626349892, "grad_norm": 2.126976490020752, "learning_rate": 9.970390280337681e-06, "loss": 0.15629196166992188, "step": 744 }, { "epoch": 0.1038110499547133, "grad_norm": 2.4147908687591553, "learning_rate": 9.970133620310652e-06, "loss": 0.14080810546875, "step": 745 }, { "epoch": 0.10395039364592767, "grad_norm": 2.0597169399261475, "learning_rate": 9.969875856033402e-06, "loss": 0.1287708282470703, "step": 746 }, { "epoch": 0.10408973733714207, "grad_norm": 3.15750789642334, "learning_rate": 9.969616987563202e-06, "loss": 0.1228790283203125, "step": 747 }, { "epoch": 0.10422908102835644, "grad_norm": 1.7076297998428345, "learning_rate": 9.969357014957564e-06, "loss": 0.0882415771484375, "step": 748 }, { "epoch": 0.10436842471957082, "grad_norm": 3.234873056411743, "learning_rate": 9.969095938274251e-06, "loss": 0.12628555297851562, "step": 749 }, { "epoch": 0.1045077684107852, "grad_norm": 1.7887731790542603, "learning_rate": 9.968833757571268e-06, "loss": 0.10002326965332031, "step": 750 }, { "epoch": 0.10464711210199958, "grad_norm": 2.5106678009033203, "learning_rate": 9.968570472906862e-06, "loss": 0.1024932861328125, "step": 751 }, { "epoch": 0.10478645579321397, "grad_norm": 2.5694620609283447, "learning_rate": 9.968306084339534e-06, "loss": 0.13214492797851562, "step": 752 }, { "epoch": 0.10492579948442834, "grad_norm": 4.134302139282227, "learning_rate": 9.96804059192802e-06, "loss": 0.14826393127441406, "step": 753 }, { "epoch": 0.10506514317564272, "grad_norm": 1.7670466899871826, "learning_rate": 9.96777399573131e-06, "loss": 0.13856124877929688, "step": 754 }, { "epoch": 0.1052044868668571, "grad_norm": 2.63659930229187, "learning_rate": 9.967506295808634e-06, "loss": 0.10821914672851562, "step": 755 }, { "epoch": 0.10534383055807148, "grad_norm": 3.7481284141540527, "learning_rate": 9.96723749221947e-06, "loss": 0.1284637451171875, "step": 756 }, { "epoch": 0.10548317424928587, "grad_norm": 2.466850519180298, "learning_rate": 9.96696758502354e-06, "loss": 0.11518096923828125, "step": 757 }, { "epoch": 0.10562251794050025, "grad_norm": 2.3487908840179443, "learning_rate": 9.966696574280808e-06, "loss": 0.13548660278320312, "step": 758 }, { "epoch": 0.10576186163171462, "grad_norm": 4.673755168914795, "learning_rate": 9.966424460051489e-06, "loss": 0.14966773986816406, "step": 759 }, { "epoch": 0.105901205322929, "grad_norm": 3.044626474380493, "learning_rate": 9.96615124239604e-06, "loss": 0.13897323608398438, "step": 760 }, { "epoch": 0.10604054901414338, "grad_norm": 2.6723315715789795, "learning_rate": 9.965876921375165e-06, "loss": 0.11374855041503906, "step": 761 }, { "epoch": 0.10617989270535777, "grad_norm": 4.433045864105225, "learning_rate": 9.965601497049812e-06, "loss": 0.14502906799316406, "step": 762 }, { "epoch": 0.10631923639657215, "grad_norm": 2.4509003162384033, "learning_rate": 9.965324969481172e-06, "loss": 0.09815216064453125, "step": 763 }, { "epoch": 0.10645858008778653, "grad_norm": 4.257585525512695, "learning_rate": 9.965047338730685e-06, "loss": 0.18212509155273438, "step": 764 }, { "epoch": 0.1065979237790009, "grad_norm": 2.938614845275879, "learning_rate": 9.964768604860033e-06, "loss": 0.09301948547363281, "step": 765 }, { "epoch": 0.10673726747021528, "grad_norm": 1.9573019742965698, "learning_rate": 9.964488767931144e-06, "loss": 0.12679672241210938, "step": 766 }, { "epoch": 0.10687661116142967, "grad_norm": 2.3368029594421387, "learning_rate": 9.964207828006191e-06, "loss": 0.1496868133544922, "step": 767 }, { "epoch": 0.10701595485264405, "grad_norm": 3.559807538986206, "learning_rate": 9.963925785147595e-06, "loss": 0.10795402526855469, "step": 768 }, { "epoch": 0.10715529854385843, "grad_norm": 2.5855202674865723, "learning_rate": 9.963642639418018e-06, "loss": 0.11501884460449219, "step": 769 }, { "epoch": 0.1072946422350728, "grad_norm": 4.151330947875977, "learning_rate": 9.963358390880367e-06, "loss": 0.14352798461914062, "step": 770 }, { "epoch": 0.10743398592628718, "grad_norm": 3.9597554206848145, "learning_rate": 9.963073039597798e-06, "loss": 0.15679168701171875, "step": 771 }, { "epoch": 0.10757332961750157, "grad_norm": 1.4013322591781616, "learning_rate": 9.962786585633708e-06, "loss": 0.09072494506835938, "step": 772 }, { "epoch": 0.10771267330871595, "grad_norm": 2.737136125564575, "learning_rate": 9.962499029051742e-06, "loss": 0.09397506713867188, "step": 773 }, { "epoch": 0.10785201699993033, "grad_norm": 2.4977920055389404, "learning_rate": 9.962210369915787e-06, "loss": 0.12050628662109375, "step": 774 }, { "epoch": 0.1079913606911447, "grad_norm": 2.7339963912963867, "learning_rate": 9.961920608289977e-06, "loss": 0.1143798828125, "step": 775 }, { "epoch": 0.10813070438235908, "grad_norm": 1.2170480489730835, "learning_rate": 9.96162974423869e-06, "loss": 0.10388374328613281, "step": 776 }, { "epoch": 0.10827004807357347, "grad_norm": 2.509786367416382, "learning_rate": 9.961337777826549e-06, "loss": 0.12294387817382812, "step": 777 }, { "epoch": 0.10840939176478785, "grad_norm": 2.15649676322937, "learning_rate": 9.961044709118425e-06, "loss": 0.15141773223876953, "step": 778 }, { "epoch": 0.10854873545600223, "grad_norm": 3.169171094894409, "learning_rate": 9.960750538179428e-06, "loss": 0.1097259521484375, "step": 779 }, { "epoch": 0.1086880791472166, "grad_norm": 3.870387554168701, "learning_rate": 9.960455265074918e-06, "loss": 0.08778762817382812, "step": 780 }, { "epoch": 0.10882742283843098, "grad_norm": 1.3703899383544922, "learning_rate": 9.960158889870495e-06, "loss": 0.11860847473144531, "step": 781 }, { "epoch": 0.10896676652964538, "grad_norm": 3.2301158905029297, "learning_rate": 9.959861412632011e-06, "loss": 0.13852500915527344, "step": 782 }, { "epoch": 0.10910611022085975, "grad_norm": 7.315746307373047, "learning_rate": 9.959562833425557e-06, "loss": 0.13867950439453125, "step": 783 }, { "epoch": 0.10924545391207413, "grad_norm": 2.9696781635284424, "learning_rate": 9.95926315231747e-06, "loss": 0.1553363800048828, "step": 784 }, { "epoch": 0.10938479760328851, "grad_norm": 4.3576579093933105, "learning_rate": 9.958962369374333e-06, "loss": 0.15116119384765625, "step": 785 }, { "epoch": 0.10952414129450289, "grad_norm": 7.163794994354248, "learning_rate": 9.95866048466297e-06, "loss": 0.13873291015625, "step": 786 }, { "epoch": 0.10966348498571728, "grad_norm": 3.5548574924468994, "learning_rate": 9.958357498250457e-06, "loss": 0.09755134582519531, "step": 787 }, { "epoch": 0.10980282867693165, "grad_norm": 6.224717617034912, "learning_rate": 9.95805341020411e-06, "loss": 0.16241455078125, "step": 788 }, { "epoch": 0.10994217236814603, "grad_norm": 2.159318208694458, "learning_rate": 9.957748220591487e-06, "loss": 0.10515403747558594, "step": 789 }, { "epoch": 0.11008151605936041, "grad_norm": 2.974952220916748, "learning_rate": 9.9574419294804e-06, "loss": 0.1822052001953125, "step": 790 }, { "epoch": 0.11022085975057479, "grad_norm": 5.4025349617004395, "learning_rate": 9.957134536938894e-06, "loss": 0.16259384155273438, "step": 791 }, { "epoch": 0.11036020344178918, "grad_norm": 3.9135541915893555, "learning_rate": 9.956826043035268e-06, "loss": 0.12826156616210938, "step": 792 }, { "epoch": 0.11049954713300356, "grad_norm": 4.582199573516846, "learning_rate": 9.956516447838063e-06, "loss": 0.15796279907226562, "step": 793 }, { "epoch": 0.11063889082421793, "grad_norm": 3.1019110679626465, "learning_rate": 9.95620575141606e-06, "loss": 0.1077423095703125, "step": 794 }, { "epoch": 0.11077823451543231, "grad_norm": 2.8154327869415283, "learning_rate": 9.955893953838293e-06, "loss": 0.11102485656738281, "step": 795 }, { "epoch": 0.11091757820664669, "grad_norm": 1.4689937829971313, "learning_rate": 9.955581055174034e-06, "loss": 0.12690353393554688, "step": 796 }, { "epoch": 0.11105692189786108, "grad_norm": 2.7622263431549072, "learning_rate": 9.9552670554928e-06, "loss": 0.13167190551757812, "step": 797 }, { "epoch": 0.11119626558907546, "grad_norm": 2.013964891433716, "learning_rate": 9.954951954864361e-06, "loss": 0.13796615600585938, "step": 798 }, { "epoch": 0.11133560928028984, "grad_norm": 3.0717530250549316, "learning_rate": 9.954635753358718e-06, "loss": 0.1412334442138672, "step": 799 }, { "epoch": 0.11147495297150421, "grad_norm": 2.146881341934204, "learning_rate": 9.954318451046128e-06, "loss": 0.11650848388671875, "step": 800 }, { "epoch": 0.11161429666271859, "grad_norm": 2.5520427227020264, "learning_rate": 9.954000047997088e-06, "loss": 0.15456390380859375, "step": 801 }, { "epoch": 0.11175364035393298, "grad_norm": 4.5670061111450195, "learning_rate": 9.953680544282338e-06, "loss": 0.11327457427978516, "step": 802 }, { "epoch": 0.11189298404514736, "grad_norm": 1.13467538356781, "learning_rate": 9.953359939972866e-06, "loss": 0.09945869445800781, "step": 803 }, { "epoch": 0.11203232773636174, "grad_norm": 2.2846016883850098, "learning_rate": 9.953038235139902e-06, "loss": 0.1237020492553711, "step": 804 }, { "epoch": 0.11217167142757611, "grad_norm": 2.356666088104248, "learning_rate": 9.952715429854923e-06, "loss": 0.11206817626953125, "step": 805 }, { "epoch": 0.11231101511879049, "grad_norm": 1.3561443090438843, "learning_rate": 9.952391524189646e-06, "loss": 0.10030746459960938, "step": 806 }, { "epoch": 0.11245035881000488, "grad_norm": 2.778235673904419, "learning_rate": 9.952066518216039e-06, "loss": 0.12351036071777344, "step": 807 }, { "epoch": 0.11258970250121926, "grad_norm": 3.086517810821533, "learning_rate": 9.951740412006308e-06, "loss": 0.11660957336425781, "step": 808 }, { "epoch": 0.11272904619243364, "grad_norm": 5.353437423706055, "learning_rate": 9.95141320563291e-06, "loss": 0.15546798706054688, "step": 809 }, { "epoch": 0.11286838988364802, "grad_norm": 2.8509175777435303, "learning_rate": 9.951084899168537e-06, "loss": 0.11568546295166016, "step": 810 }, { "epoch": 0.11300773357486239, "grad_norm": 2.8412106037139893, "learning_rate": 9.950755492686138e-06, "loss": 0.08780860900878906, "step": 811 }, { "epoch": 0.11314707726607678, "grad_norm": 1.5376516580581665, "learning_rate": 9.950424986258893e-06, "loss": 0.11478042602539062, "step": 812 }, { "epoch": 0.11328642095729116, "grad_norm": 2.328151226043701, "learning_rate": 9.950093379960238e-06, "loss": 0.115966796875, "step": 813 }, { "epoch": 0.11342576464850554, "grad_norm": 3.9742534160614014, "learning_rate": 9.949760673863846e-06, "loss": 0.14521217346191406, "step": 814 }, { "epoch": 0.11356510833971992, "grad_norm": 3.536050319671631, "learning_rate": 9.949426868043638e-06, "loss": 0.10870170593261719, "step": 815 }, { "epoch": 0.1137044520309343, "grad_norm": 2.9160900115966797, "learning_rate": 9.949091962573775e-06, "loss": 0.10007667541503906, "step": 816 }, { "epoch": 0.11384379572214869, "grad_norm": 1.2794684171676636, "learning_rate": 9.94875595752867e-06, "loss": 0.078582763671875, "step": 817 }, { "epoch": 0.11398313941336306, "grad_norm": 3.6646246910095215, "learning_rate": 9.948418852982973e-06, "loss": 0.158203125, "step": 818 }, { "epoch": 0.11412248310457744, "grad_norm": 4.73965311050415, "learning_rate": 9.948080649011582e-06, "loss": 0.13189697265625, "step": 819 }, { "epoch": 0.11426182679579182, "grad_norm": 1.9040216207504272, "learning_rate": 9.947741345689635e-06, "loss": 0.11892127990722656, "step": 820 }, { "epoch": 0.1144011704870062, "grad_norm": 3.949151039123535, "learning_rate": 9.947400943092522e-06, "loss": 0.1683940887451172, "step": 821 }, { "epoch": 0.11454051417822059, "grad_norm": 4.421910285949707, "learning_rate": 9.94705944129587e-06, "loss": 0.1695270538330078, "step": 822 }, { "epoch": 0.11467985786943496, "grad_norm": 4.51173734664917, "learning_rate": 9.946716840375552e-06, "loss": 0.1755390167236328, "step": 823 }, { "epoch": 0.11481920156064934, "grad_norm": 2.929441452026367, "learning_rate": 9.946373140407688e-06, "loss": 0.1367206573486328, "step": 824 }, { "epoch": 0.11495854525186372, "grad_norm": 3.333976984024048, "learning_rate": 9.946028341468642e-06, "loss": 0.11565208435058594, "step": 825 }, { "epoch": 0.1150978889430781, "grad_norm": 3.35102915763855, "learning_rate": 9.945682443635015e-06, "loss": 0.09621429443359375, "step": 826 }, { "epoch": 0.11523723263429249, "grad_norm": 0.9562882781028748, "learning_rate": 9.945335446983662e-06, "loss": 0.09880256652832031, "step": 827 }, { "epoch": 0.11537657632550687, "grad_norm": 3.479519844055176, "learning_rate": 9.944987351591677e-06, "loss": 0.12340736389160156, "step": 828 }, { "epoch": 0.11551592001672124, "grad_norm": 3.378981351852417, "learning_rate": 9.944638157536399e-06, "loss": 0.09527206420898438, "step": 829 }, { "epoch": 0.11565526370793562, "grad_norm": 7.496798992156982, "learning_rate": 9.94428786489541e-06, "loss": 0.24370765686035156, "step": 830 }, { "epoch": 0.11579460739915, "grad_norm": 1.775589108467102, "learning_rate": 9.943936473746539e-06, "loss": 0.13155460357666016, "step": 831 }, { "epoch": 0.11593395109036439, "grad_norm": 1.776629090309143, "learning_rate": 9.943583984167853e-06, "loss": 0.13026809692382812, "step": 832 }, { "epoch": 0.11607329478157877, "grad_norm": 2.3282217979431152, "learning_rate": 9.94323039623767e-06, "loss": 0.11234855651855469, "step": 833 }, { "epoch": 0.11621263847279314, "grad_norm": 1.1399813890457153, "learning_rate": 9.942875710034549e-06, "loss": 0.09619903564453125, "step": 834 }, { "epoch": 0.11635198216400752, "grad_norm": 2.2887635231018066, "learning_rate": 9.942519925637293e-06, "loss": 0.11146926879882812, "step": 835 }, { "epoch": 0.1164913258552219, "grad_norm": 1.335863709449768, "learning_rate": 9.942163043124951e-06, "loss": 0.10033798217773438, "step": 836 }, { "epoch": 0.11663066954643629, "grad_norm": 3.8721890449523926, "learning_rate": 9.941805062576811e-06, "loss": 0.11355209350585938, "step": 837 }, { "epoch": 0.11677001323765067, "grad_norm": 3.4671294689178467, "learning_rate": 9.941445984072408e-06, "loss": 0.11339187622070312, "step": 838 }, { "epoch": 0.11690935692886505, "grad_norm": 1.9975965023040771, "learning_rate": 9.941085807691524e-06, "loss": 0.11185836791992188, "step": 839 }, { "epoch": 0.11704870062007942, "grad_norm": 1.4470244646072388, "learning_rate": 9.94072453351418e-06, "loss": 0.097808837890625, "step": 840 }, { "epoch": 0.1171880443112938, "grad_norm": 1.519439935684204, "learning_rate": 9.940362161620644e-06, "loss": 0.100341796875, "step": 841 }, { "epoch": 0.11732738800250819, "grad_norm": 3.021069049835205, "learning_rate": 9.939998692091427e-06, "loss": 0.11673545837402344, "step": 842 }, { "epoch": 0.11746673169372257, "grad_norm": 0.8848722577095032, "learning_rate": 9.939634125007279e-06, "loss": 0.0883035659790039, "step": 843 }, { "epoch": 0.11760607538493695, "grad_norm": 1.6267117261886597, "learning_rate": 9.939268460449205e-06, "loss": 0.10515403747558594, "step": 844 }, { "epoch": 0.11774541907615133, "grad_norm": 1.9374058246612549, "learning_rate": 9.938901698498444e-06, "loss": 0.11259651184082031, "step": 845 }, { "epoch": 0.1178847627673657, "grad_norm": 2.654165267944336, "learning_rate": 9.938533839236483e-06, "loss": 0.1327667236328125, "step": 846 }, { "epoch": 0.1180241064585801, "grad_norm": 2.668545722961426, "learning_rate": 9.938164882745051e-06, "loss": 0.11937332153320312, "step": 847 }, { "epoch": 0.11816345014979447, "grad_norm": 4.512094974517822, "learning_rate": 9.937794829106122e-06, "loss": 0.19552230834960938, "step": 848 }, { "epoch": 0.11830279384100885, "grad_norm": 2.841870069503784, "learning_rate": 9.937423678401913e-06, "loss": 0.10074615478515625, "step": 849 }, { "epoch": 0.11844213753222323, "grad_norm": 2.5769410133361816, "learning_rate": 9.937051430714888e-06, "loss": 0.08856964111328125, "step": 850 }, { "epoch": 0.1185814812234376, "grad_norm": 2.884423017501831, "learning_rate": 9.936678086127749e-06, "loss": 0.10734939575195312, "step": 851 }, { "epoch": 0.118720824914652, "grad_norm": 1.9025465250015259, "learning_rate": 9.936303644723446e-06, "loss": 0.10186004638671875, "step": 852 }, { "epoch": 0.11886016860586637, "grad_norm": 5.787459850311279, "learning_rate": 9.93592810658517e-06, "loss": 0.1775341033935547, "step": 853 }, { "epoch": 0.11899951229708075, "grad_norm": 3.713503122329712, "learning_rate": 9.935551471796358e-06, "loss": 0.0973663330078125, "step": 854 }, { "epoch": 0.11913885598829513, "grad_norm": 1.473311424255371, "learning_rate": 9.935173740440692e-06, "loss": 0.12673187255859375, "step": 855 }, { "epoch": 0.1192781996795095, "grad_norm": 3.733553171157837, "learning_rate": 9.93479491260209e-06, "loss": 0.13537216186523438, "step": 856 }, { "epoch": 0.1194175433707239, "grad_norm": 4.151919841766357, "learning_rate": 9.934414988364722e-06, "loss": 0.15812301635742188, "step": 857 }, { "epoch": 0.11955688706193827, "grad_norm": 2.257246732711792, "learning_rate": 9.934033967812998e-06, "loss": 0.11460304260253906, "step": 858 }, { "epoch": 0.11969623075315265, "grad_norm": 1.1260120868682861, "learning_rate": 9.933651851031573e-06, "loss": 0.12759780883789062, "step": 859 }, { "epoch": 0.11983557444436703, "grad_norm": 1.5599157810211182, "learning_rate": 9.933268638105345e-06, "loss": 0.10565471649169922, "step": 860 }, { "epoch": 0.11997491813558141, "grad_norm": 1.711558222770691, "learning_rate": 9.932884329119452e-06, "loss": 0.13408851623535156, "step": 861 }, { "epoch": 0.1201142618267958, "grad_norm": 2.1007299423217773, "learning_rate": 9.932498924159281e-06, "loss": 0.11727714538574219, "step": 862 }, { "epoch": 0.12025360551801018, "grad_norm": 1.0553011894226074, "learning_rate": 9.93211242331046e-06, "loss": 0.08898735046386719, "step": 863 }, { "epoch": 0.12039294920922455, "grad_norm": 2.1952273845672607, "learning_rate": 9.931724826658861e-06, "loss": 0.20813369750976562, "step": 864 }, { "epoch": 0.12053229290043893, "grad_norm": 2.3987364768981934, "learning_rate": 9.931336134290598e-06, "loss": 0.15213966369628906, "step": 865 }, { "epoch": 0.12067163659165331, "grad_norm": 1.837159276008606, "learning_rate": 9.930946346292032e-06, "loss": 0.126739501953125, "step": 866 }, { "epoch": 0.1208109802828677, "grad_norm": 2.0698280334472656, "learning_rate": 9.930555462749762e-06, "loss": 0.13711929321289062, "step": 867 }, { "epoch": 0.12095032397408208, "grad_norm": 1.3877984285354614, "learning_rate": 9.930163483750636e-06, "loss": 0.08395099639892578, "step": 868 }, { "epoch": 0.12108966766529645, "grad_norm": 3.7408783435821533, "learning_rate": 9.92977040938174e-06, "loss": 0.15201187133789062, "step": 869 }, { "epoch": 0.12122901135651083, "grad_norm": 1.337400197982788, "learning_rate": 9.929376239730408e-06, "loss": 0.10839080810546875, "step": 870 }, { "epoch": 0.12136835504772521, "grad_norm": 1.8364098072052002, "learning_rate": 9.928980974884215e-06, "loss": 0.13596343994140625, "step": 871 }, { "epoch": 0.1215076987389396, "grad_norm": 1.5362532138824463, "learning_rate": 9.928584614930981e-06, "loss": 0.1359100341796875, "step": 872 }, { "epoch": 0.12164704243015398, "grad_norm": 1.2761961221694946, "learning_rate": 9.928187159958764e-06, "loss": 0.12134170532226562, "step": 873 }, { "epoch": 0.12178638612136836, "grad_norm": 1.7400174140930176, "learning_rate": 9.927788610055875e-06, "loss": 0.15717315673828125, "step": 874 }, { "epoch": 0.12192572981258273, "grad_norm": 1.294701099395752, "learning_rate": 9.92738896531086e-06, "loss": 0.11321449279785156, "step": 875 }, { "epoch": 0.12206507350379711, "grad_norm": 2.9460957050323486, "learning_rate": 9.926988225812511e-06, "loss": 0.16394424438476562, "step": 876 }, { "epoch": 0.1222044171950115, "grad_norm": 0.9970824122428894, "learning_rate": 9.926586391649863e-06, "loss": 0.07205963134765625, "step": 877 }, { "epoch": 0.12234376088622588, "grad_norm": 5.047313213348389, "learning_rate": 9.926183462912196e-06, "loss": 0.11138916015625, "step": 878 }, { "epoch": 0.12248310457744026, "grad_norm": 3.8090784549713135, "learning_rate": 9.925779439689028e-06, "loss": 0.1392822265625, "step": 879 }, { "epoch": 0.12262244826865464, "grad_norm": 1.6350536346435547, "learning_rate": 9.925374322070126e-06, "loss": 0.11841964721679688, "step": 880 }, { "epoch": 0.12276179195986901, "grad_norm": 1.2674857378005981, "learning_rate": 9.9249681101455e-06, "loss": 0.12849807739257812, "step": 881 }, { "epoch": 0.1229011356510834, "grad_norm": 1.2336673736572266, "learning_rate": 9.924560804005397e-06, "loss": 0.10401344299316406, "step": 882 }, { "epoch": 0.12304047934229778, "grad_norm": 1.3660746812820435, "learning_rate": 9.924152403740315e-06, "loss": 0.12233352661132812, "step": 883 }, { "epoch": 0.12317982303351216, "grad_norm": 0.9952439069747925, "learning_rate": 9.923742909440987e-06, "loss": 0.13697433471679688, "step": 884 }, { "epoch": 0.12331916672472654, "grad_norm": 1.4050688743591309, "learning_rate": 9.923332321198396e-06, "loss": 0.17056846618652344, "step": 885 }, { "epoch": 0.12345851041594091, "grad_norm": 4.577809810638428, "learning_rate": 9.922920639103766e-06, "loss": 0.16692161560058594, "step": 886 }, { "epoch": 0.12359785410715529, "grad_norm": 2.0981602668762207, "learning_rate": 9.92250786324856e-06, "loss": 0.17106246948242188, "step": 887 }, { "epoch": 0.12373719779836968, "grad_norm": 3.4991607666015625, "learning_rate": 9.922093993724492e-06, "loss": 0.11417388916015625, "step": 888 }, { "epoch": 0.12387654148958406, "grad_norm": 1.8839601278305054, "learning_rate": 9.92167903062351e-06, "loss": 0.1222381591796875, "step": 889 }, { "epoch": 0.12401588518079844, "grad_norm": 2.0493786334991455, "learning_rate": 9.921262974037813e-06, "loss": 0.1186065673828125, "step": 890 }, { "epoch": 0.12415522887201282, "grad_norm": 3.0305721759796143, "learning_rate": 9.920845824059836e-06, "loss": 0.09667396545410156, "step": 891 }, { "epoch": 0.12429457256322719, "grad_norm": 4.631941318511963, "learning_rate": 9.920427580782263e-06, "loss": 0.16551589965820312, "step": 892 }, { "epoch": 0.12443391625444158, "grad_norm": 2.3744049072265625, "learning_rate": 9.920008244298016e-06, "loss": 0.11404609680175781, "step": 893 }, { "epoch": 0.12457325994565596, "grad_norm": 2.1509804725646973, "learning_rate": 9.919587814700262e-06, "loss": 0.12093925476074219, "step": 894 }, { "epoch": 0.12471260363687034, "grad_norm": 3.690701961517334, "learning_rate": 9.919166292082414e-06, "loss": 0.15124893188476562, "step": 895 }, { "epoch": 0.12485194732808472, "grad_norm": 3.0548622608184814, "learning_rate": 9.91874367653812e-06, "loss": 0.15052223205566406, "step": 896 }, { "epoch": 0.1249912910192991, "grad_norm": 3.239874839782715, "learning_rate": 9.91831996816128e-06, "loss": 0.1407146453857422, "step": 897 }, { "epoch": 0.12513063471051347, "grad_norm": 1.6977906227111816, "learning_rate": 9.917895167046027e-06, "loss": 0.13953208923339844, "step": 898 }, { "epoch": 0.12526997840172785, "grad_norm": 2.055492877960205, "learning_rate": 9.917469273286749e-06, "loss": 0.12134552001953125, "step": 899 }, { "epoch": 0.12540932209294225, "grad_norm": 1.860037088394165, "learning_rate": 9.917042286978064e-06, "loss": 0.10687446594238281, "step": 900 }, { "epoch": 0.12554866578415663, "grad_norm": 2.2206954956054688, "learning_rate": 9.916614208214841e-06, "loss": 0.13385391235351562, "step": 901 }, { "epoch": 0.125688009475371, "grad_norm": 1.6816539764404297, "learning_rate": 9.91618503709219e-06, "loss": 0.10153770446777344, "step": 902 }, { "epoch": 0.1258273531665854, "grad_norm": 6.297918796539307, "learning_rate": 9.915754773705461e-06, "loss": 0.20024490356445312, "step": 903 }, { "epoch": 0.12596669685779976, "grad_norm": 1.0499135255813599, "learning_rate": 9.915323418150252e-06, "loss": 0.1164398193359375, "step": 904 }, { "epoch": 0.12610604054901414, "grad_norm": 2.2393150329589844, "learning_rate": 9.914890970522397e-06, "loss": 0.10892868041992188, "step": 905 }, { "epoch": 0.12624538424022852, "grad_norm": 1.3612406253814697, "learning_rate": 9.914457430917977e-06, "loss": 0.14824676513671875, "step": 906 }, { "epoch": 0.1263847279314429, "grad_norm": 2.0408668518066406, "learning_rate": 9.914022799433315e-06, "loss": 0.14533615112304688, "step": 907 }, { "epoch": 0.12652407162265727, "grad_norm": 2.3896849155426025, "learning_rate": 9.913587076164976e-06, "loss": 0.11905670166015625, "step": 908 }, { "epoch": 0.12666341531387165, "grad_norm": 3.8411037921905518, "learning_rate": 9.913150261209767e-06, "loss": 0.1389598846435547, "step": 909 }, { "epoch": 0.12680275900508606, "grad_norm": 1.8666280508041382, "learning_rate": 9.91271235466474e-06, "loss": 0.10832405090332031, "step": 910 }, { "epoch": 0.12694210269630044, "grad_norm": 1.491258144378662, "learning_rate": 9.912273356627188e-06, "loss": 0.1093597412109375, "step": 911 }, { "epoch": 0.1270814463875148, "grad_norm": 2.8522098064422607, "learning_rate": 9.911833267194643e-06, "loss": 0.17055702209472656, "step": 912 }, { "epoch": 0.1272207900787292, "grad_norm": 1.062656044960022, "learning_rate": 9.911392086464886e-06, "loss": 0.06942558288574219, "step": 913 }, { "epoch": 0.12736013376994357, "grad_norm": 1.269735336303711, "learning_rate": 9.910949814535936e-06, "loss": 0.13307762145996094, "step": 914 }, { "epoch": 0.12749947746115795, "grad_norm": 1.4313851594924927, "learning_rate": 9.910506451506056e-06, "loss": 0.10178184509277344, "step": 915 }, { "epoch": 0.12763882115237232, "grad_norm": 1.794067621231079, "learning_rate": 9.910061997473753e-06, "loss": 0.0731353759765625, "step": 916 }, { "epoch": 0.1277781648435867, "grad_norm": 1.212664008140564, "learning_rate": 9.909616452537772e-06, "loss": 0.09881973266601562, "step": 917 }, { "epoch": 0.12791750853480108, "grad_norm": 2.4786291122436523, "learning_rate": 9.909169816797102e-06, "loss": 0.0957489013671875, "step": 918 }, { "epoch": 0.12805685222601546, "grad_norm": 3.13810658454895, "learning_rate": 9.908722090350979e-06, "loss": 0.1250324249267578, "step": 919 }, { "epoch": 0.12819619591722986, "grad_norm": 1.6426526308059692, "learning_rate": 9.908273273298874e-06, "loss": 0.09784126281738281, "step": 920 }, { "epoch": 0.12833553960844424, "grad_norm": 2.1480534076690674, "learning_rate": 9.907823365740507e-06, "loss": 0.10575485229492188, "step": 921 }, { "epoch": 0.12847488329965862, "grad_norm": 1.4512734413146973, "learning_rate": 9.907372367775834e-06, "loss": 0.10442733764648438, "step": 922 }, { "epoch": 0.128614226990873, "grad_norm": 2.098043441772461, "learning_rate": 9.906920279505058e-06, "loss": 0.10557174682617188, "step": 923 }, { "epoch": 0.12875357068208737, "grad_norm": 8.155874252319336, "learning_rate": 9.906467101028625e-06, "loss": 0.2353668212890625, "step": 924 }, { "epoch": 0.12889291437330175, "grad_norm": 3.9646453857421875, "learning_rate": 9.906012832447219e-06, "loss": 0.1695842742919922, "step": 925 }, { "epoch": 0.12903225806451613, "grad_norm": 2.7919883728027344, "learning_rate": 9.905557473861764e-06, "loss": 0.11015129089355469, "step": 926 }, { "epoch": 0.1291716017557305, "grad_norm": 2.0643599033355713, "learning_rate": 9.905101025373438e-06, "loss": 0.15250396728515625, "step": 927 }, { "epoch": 0.12931094544694488, "grad_norm": 1.2086976766586304, "learning_rate": 9.904643487083648e-06, "loss": 0.10530471801757812, "step": 928 }, { "epoch": 0.12945028913815926, "grad_norm": 1.4754369258880615, "learning_rate": 9.90418485909405e-06, "loss": 0.09225082397460938, "step": 929 }, { "epoch": 0.12958963282937366, "grad_norm": 2.057001829147339, "learning_rate": 9.903725141506539e-06, "loss": 0.0952911376953125, "step": 930 }, { "epoch": 0.12972897652058804, "grad_norm": 2.3877036571502686, "learning_rate": 9.903264334423258e-06, "loss": 0.11377334594726562, "step": 931 }, { "epoch": 0.12986832021180242, "grad_norm": 1.8009529113769531, "learning_rate": 9.902802437946584e-06, "loss": 0.09631538391113281, "step": 932 }, { "epoch": 0.1300076639030168, "grad_norm": 2.068565607070923, "learning_rate": 9.902339452179142e-06, "loss": 0.14774703979492188, "step": 933 }, { "epoch": 0.13014700759423117, "grad_norm": 2.501396656036377, "learning_rate": 9.901875377223796e-06, "loss": 0.18851852416992188, "step": 934 }, { "epoch": 0.13028635128544555, "grad_norm": 1.9086467027664185, "learning_rate": 9.901410213183653e-06, "loss": 0.12717294692993164, "step": 935 }, { "epoch": 0.13042569497665993, "grad_norm": 3.742818832397461, "learning_rate": 9.900943960162061e-06, "loss": 0.2209625244140625, "step": 936 }, { "epoch": 0.1305650386678743, "grad_norm": 1.5281400680541992, "learning_rate": 9.900476618262612e-06, "loss": 0.11386871337890625, "step": 937 }, { "epoch": 0.13070438235908868, "grad_norm": 1.3746768236160278, "learning_rate": 9.900008187589138e-06, "loss": 0.09535598754882812, "step": 938 }, { "epoch": 0.13084372605030306, "grad_norm": 1.154545783996582, "learning_rate": 9.899538668245713e-06, "loss": 0.09383964538574219, "step": 939 }, { "epoch": 0.13098306974151747, "grad_norm": 2.551018714904785, "learning_rate": 9.899068060336656e-06, "loss": 0.1189117431640625, "step": 940 }, { "epoch": 0.13112241343273184, "grad_norm": 1.4672882556915283, "learning_rate": 9.898596363966523e-06, "loss": 0.11236953735351562, "step": 941 }, { "epoch": 0.13126175712394622, "grad_norm": 2.936674118041992, "learning_rate": 9.898123579240115e-06, "loss": 0.11356544494628906, "step": 942 }, { "epoch": 0.1314011008151606, "grad_norm": 0.9462618231773376, "learning_rate": 9.897649706262474e-06, "loss": 0.0743408203125, "step": 943 }, { "epoch": 0.13154044450637498, "grad_norm": 1.7569912672042847, "learning_rate": 9.897174745138883e-06, "loss": 0.12050056457519531, "step": 944 }, { "epoch": 0.13167978819758935, "grad_norm": 1.5822068452835083, "learning_rate": 9.896698695974866e-06, "loss": 0.118316650390625, "step": 945 }, { "epoch": 0.13181913188880373, "grad_norm": 1.5177350044250488, "learning_rate": 9.896221558876195e-06, "loss": 0.10832786560058594, "step": 946 }, { "epoch": 0.1319584755800181, "grad_norm": 1.5472376346588135, "learning_rate": 9.895743333948875e-06, "loss": 0.1272563934326172, "step": 947 }, { "epoch": 0.1320978192712325, "grad_norm": 2.0363235473632812, "learning_rate": 9.895264021299158e-06, "loss": 0.0849456787109375, "step": 948 }, { "epoch": 0.13223716296244686, "grad_norm": 1.1892768144607544, "learning_rate": 9.894783621033538e-06, "loss": 0.129364013671875, "step": 949 }, { "epoch": 0.13237650665366127, "grad_norm": 4.953368186950684, "learning_rate": 9.894302133258747e-06, "loss": 0.20486068725585938, "step": 950 }, { "epoch": 0.13251585034487565, "grad_norm": 0.7738628387451172, "learning_rate": 9.893819558081759e-06, "loss": 0.09299087524414062, "step": 951 }, { "epoch": 0.13265519403609002, "grad_norm": 2.1877174377441406, "learning_rate": 9.893335895609792e-06, "loss": 0.17313766479492188, "step": 952 }, { "epoch": 0.1327945377273044, "grad_norm": 2.0124945640563965, "learning_rate": 9.892851145950308e-06, "loss": 0.13207435607910156, "step": 953 }, { "epoch": 0.13293388141851878, "grad_norm": 2.014340400695801, "learning_rate": 9.892365309211005e-06, "loss": 0.11225318908691406, "step": 954 }, { "epoch": 0.13307322510973316, "grad_norm": 2.999178886413574, "learning_rate": 9.891878385499825e-06, "loss": 0.12664031982421875, "step": 955 }, { "epoch": 0.13321256880094753, "grad_norm": 1.2931232452392578, "learning_rate": 9.891390374924949e-06, "loss": 0.09814071655273438, "step": 956 }, { "epoch": 0.1333519124921619, "grad_norm": 3.149179220199585, "learning_rate": 9.890901277594806e-06, "loss": 0.14688682556152344, "step": 957 }, { "epoch": 0.1334912561833763, "grad_norm": 1.4984017610549927, "learning_rate": 9.89041109361806e-06, "loss": 0.0880889892578125, "step": 958 }, { "epoch": 0.13363059987459067, "grad_norm": 1.6382348537445068, "learning_rate": 9.889919823103618e-06, "loss": 0.10218238830566406, "step": 959 }, { "epoch": 0.13376994356580507, "grad_norm": 1.6810864210128784, "learning_rate": 9.889427466160633e-06, "loss": 0.1687450408935547, "step": 960 }, { "epoch": 0.13390928725701945, "grad_norm": 4.754077434539795, "learning_rate": 9.888934022898488e-06, "loss": 0.10555648803710938, "step": 961 }, { "epoch": 0.13404863094823383, "grad_norm": 5.879039764404297, "learning_rate": 9.888439493426824e-06, "loss": 0.139739990234375, "step": 962 }, { "epoch": 0.1341879746394482, "grad_norm": 4.016459941864014, "learning_rate": 9.887943877855505e-06, "loss": 0.11587142944335938, "step": 963 }, { "epoch": 0.13432731833066258, "grad_norm": 1.863180160522461, "learning_rate": 9.887447176294653e-06, "loss": 0.11307144165039062, "step": 964 }, { "epoch": 0.13446666202187696, "grad_norm": 2.6464433670043945, "learning_rate": 9.88694938885462e-06, "loss": 0.09642601013183594, "step": 965 }, { "epoch": 0.13460600571309134, "grad_norm": 2.147632360458374, "learning_rate": 9.886450515646005e-06, "loss": 0.1258563995361328, "step": 966 }, { "epoch": 0.13474534940430571, "grad_norm": 1.05220365524292, "learning_rate": 9.885950556779644e-06, "loss": 0.10730552673339844, "step": 967 }, { "epoch": 0.1348846930955201, "grad_norm": 1.9030753374099731, "learning_rate": 9.885449512366617e-06, "loss": 0.14672088623046875, "step": 968 }, { "epoch": 0.13502403678673447, "grad_norm": 1.913576364517212, "learning_rate": 9.884947382518247e-06, "loss": 0.10857582092285156, "step": 969 }, { "epoch": 0.13516338047794887, "grad_norm": 1.2597901821136475, "learning_rate": 9.88444416734609e-06, "loss": 0.11533927917480469, "step": 970 }, { "epoch": 0.13530272416916325, "grad_norm": 1.072048306465149, "learning_rate": 9.883939866961956e-06, "loss": 0.12041091918945312, "step": 971 }, { "epoch": 0.13544206786037763, "grad_norm": 5.611994743347168, "learning_rate": 9.883434481477885e-06, "loss": 0.13574600219726562, "step": 972 }, { "epoch": 0.135581411551592, "grad_norm": 5.158845901489258, "learning_rate": 9.882928011006163e-06, "loss": 0.13785743713378906, "step": 973 }, { "epoch": 0.13572075524280638, "grad_norm": 2.7756361961364746, "learning_rate": 9.882420455659316e-06, "loss": 0.13128280639648438, "step": 974 }, { "epoch": 0.13586009893402076, "grad_norm": 1.541153907775879, "learning_rate": 9.881911815550111e-06, "loss": 0.11407279968261719, "step": 975 }, { "epoch": 0.13599944262523514, "grad_norm": 3.6755411624908447, "learning_rate": 9.881402090791556e-06, "loss": 0.12953948974609375, "step": 976 }, { "epoch": 0.13613878631644952, "grad_norm": 3.8778457641601562, "learning_rate": 9.880891281496901e-06, "loss": 0.1380786895751953, "step": 977 }, { "epoch": 0.1362781300076639, "grad_norm": 1.2132763862609863, "learning_rate": 9.880379387779637e-06, "loss": 0.08445549011230469, "step": 978 }, { "epoch": 0.13641747369887827, "grad_norm": 2.256014823913574, "learning_rate": 9.879866409753493e-06, "loss": 0.1431751251220703, "step": 979 }, { "epoch": 0.13655681739009268, "grad_norm": 1.1358532905578613, "learning_rate": 9.879352347532442e-06, "loss": 0.11407661437988281, "step": 980 }, { "epoch": 0.13669616108130705, "grad_norm": 1.114951729774475, "learning_rate": 9.878837201230697e-06, "loss": 0.10241508483886719, "step": 981 }, { "epoch": 0.13683550477252143, "grad_norm": 3.1054773330688477, "learning_rate": 9.878320970962712e-06, "loss": 0.16690444946289062, "step": 982 }, { "epoch": 0.1369748484637358, "grad_norm": 2.066377639770508, "learning_rate": 9.877803656843182e-06, "loss": 0.09337615966796875, "step": 983 }, { "epoch": 0.1371141921549502, "grad_norm": 4.149231910705566, "learning_rate": 9.877285258987039e-06, "loss": 0.1590137481689453, "step": 984 }, { "epoch": 0.13725353584616456, "grad_norm": 2.630004405975342, "learning_rate": 9.876765777509463e-06, "loss": 0.10122489929199219, "step": 985 }, { "epoch": 0.13739287953737894, "grad_norm": 3.597561836242676, "learning_rate": 9.87624521252587e-06, "loss": 0.1190185546875, "step": 986 }, { "epoch": 0.13753222322859332, "grad_norm": 3.474681854248047, "learning_rate": 9.875723564151918e-06, "loss": 0.16026687622070312, "step": 987 }, { "epoch": 0.1376715669198077, "grad_norm": 3.458003282546997, "learning_rate": 9.875200832503505e-06, "loss": 0.12447166442871094, "step": 988 }, { "epoch": 0.13781091061102207, "grad_norm": 1.5881690979003906, "learning_rate": 9.874677017696769e-06, "loss": 0.12384414672851562, "step": 989 }, { "epoch": 0.13795025430223645, "grad_norm": 1.6673887968063354, "learning_rate": 9.87415211984809e-06, "loss": 0.10889434814453125, "step": 990 }, { "epoch": 0.13808959799345086, "grad_norm": 3.080457925796509, "learning_rate": 9.873626139074088e-06, "loss": 0.14409255981445312, "step": 991 }, { "epoch": 0.13822894168466524, "grad_norm": 2.937192440032959, "learning_rate": 9.873099075491626e-06, "loss": 0.10990715026855469, "step": 992 }, { "epoch": 0.1383682853758796, "grad_norm": 0.7786864638328552, "learning_rate": 9.872570929217804e-06, "loss": 0.08275032043457031, "step": 993 }, { "epoch": 0.138507629067094, "grad_norm": 3.808356523513794, "learning_rate": 9.872041700369965e-06, "loss": 0.13469886779785156, "step": 994 }, { "epoch": 0.13864697275830837, "grad_norm": 3.5202181339263916, "learning_rate": 9.871511389065689e-06, "loss": 0.12337875366210938, "step": 995 }, { "epoch": 0.13878631644952275, "grad_norm": 2.9403598308563232, "learning_rate": 9.870979995422803e-06, "loss": 0.1368560791015625, "step": 996 }, { "epoch": 0.13892566014073712, "grad_norm": 2.7480716705322266, "learning_rate": 9.870447519559366e-06, "loss": 0.09333610534667969, "step": 997 }, { "epoch": 0.1390650038319515, "grad_norm": 2.799710273742676, "learning_rate": 9.869913961593685e-06, "loss": 0.08749961853027344, "step": 998 }, { "epoch": 0.13920434752316588, "grad_norm": 3.078524112701416, "learning_rate": 9.869379321644306e-06, "loss": 0.1253662109375, "step": 999 }, { "epoch": 0.13934369121438026, "grad_norm": 1.9207063913345337, "learning_rate": 9.868843599830009e-06, "loss": 0.1538524627685547, "step": 1000 }, { "epoch": 0.13948303490559466, "grad_norm": 1.0987446308135986, "learning_rate": 9.868306796269822e-06, "loss": 0.11619186401367188, "step": 1001 }, { "epoch": 0.13962237859680904, "grad_norm": 1.9249656200408936, "learning_rate": 9.86776891108301e-06, "loss": 0.10321998596191406, "step": 1002 }, { "epoch": 0.13976172228802342, "grad_norm": 6.254263877868652, "learning_rate": 9.86722994438908e-06, "loss": 0.16740036010742188, "step": 1003 }, { "epoch": 0.1399010659792378, "grad_norm": 1.0408365726470947, "learning_rate": 9.866689896307778e-06, "loss": 0.10057830810546875, "step": 1004 }, { "epoch": 0.14004040967045217, "grad_norm": 1.8542811870574951, "learning_rate": 9.866148766959087e-06, "loss": 0.09086036682128906, "step": 1005 }, { "epoch": 0.14017975336166655, "grad_norm": 1.3008568286895752, "learning_rate": 9.865606556463239e-06, "loss": 0.0931243896484375, "step": 1006 }, { "epoch": 0.14031909705288093, "grad_norm": 1.5243746042251587, "learning_rate": 9.865063264940695e-06, "loss": 0.08152198791503906, "step": 1007 }, { "epoch": 0.1404584407440953, "grad_norm": 1.5090724229812622, "learning_rate": 9.864518892512167e-06, "loss": 0.10435295104980469, "step": 1008 }, { "epoch": 0.14059778443530968, "grad_norm": 1.8664700984954834, "learning_rate": 9.863973439298597e-06, "loss": 0.14229583740234375, "step": 1009 }, { "epoch": 0.14073712812652406, "grad_norm": 1.8609627485275269, "learning_rate": 9.863426905421179e-06, "loss": 0.10617351531982422, "step": 1010 }, { "epoch": 0.14087647181773846, "grad_norm": 0.8662399053573608, "learning_rate": 9.862879291001334e-06, "loss": 0.07611083984375, "step": 1011 }, { "epoch": 0.14101581550895284, "grad_norm": 2.978705406188965, "learning_rate": 9.862330596160732e-06, "loss": 0.19106292724609375, "step": 1012 }, { "epoch": 0.14115515920016722, "grad_norm": 1.8152658939361572, "learning_rate": 9.861780821021282e-06, "loss": 0.09973716735839844, "step": 1013 }, { "epoch": 0.1412945028913816, "grad_norm": 1.0726598501205444, "learning_rate": 9.861229965705129e-06, "loss": 0.08348464965820312, "step": 1014 }, { "epoch": 0.14143384658259597, "grad_norm": 0.9365045428276062, "learning_rate": 9.86067803033466e-06, "loss": 0.1428699493408203, "step": 1015 }, { "epoch": 0.14157319027381035, "grad_norm": 1.6964683532714844, "learning_rate": 9.860125015032506e-06, "loss": 0.09793472290039062, "step": 1016 }, { "epoch": 0.14171253396502473, "grad_norm": 1.430267333984375, "learning_rate": 9.859570919921533e-06, "loss": 0.1139984130859375, "step": 1017 }, { "epoch": 0.1418518776562391, "grad_norm": 0.8479512333869934, "learning_rate": 9.859015745124844e-06, "loss": 0.08798980712890625, "step": 1018 }, { "epoch": 0.14199122134745348, "grad_norm": 2.6580092906951904, "learning_rate": 9.858459490765792e-06, "loss": 0.11228752136230469, "step": 1019 }, { "epoch": 0.14213056503866786, "grad_norm": 2.4479312896728516, "learning_rate": 9.857902156967961e-06, "loss": 0.10344314575195312, "step": 1020 }, { "epoch": 0.14226990872988227, "grad_norm": 1.1891297101974487, "learning_rate": 9.857343743855178e-06, "loss": 0.08953285217285156, "step": 1021 }, { "epoch": 0.14240925242109664, "grad_norm": 1.8463282585144043, "learning_rate": 9.856784251551512e-06, "loss": 0.11816787719726562, "step": 1022 }, { "epoch": 0.14254859611231102, "grad_norm": 1.1369330883026123, "learning_rate": 9.856223680181267e-06, "loss": 0.09880828857421875, "step": 1023 }, { "epoch": 0.1426879398035254, "grad_norm": 4.128539562225342, "learning_rate": 9.85566202986899e-06, "loss": 0.18663406372070312, "step": 1024 }, { "epoch": 0.14282728349473978, "grad_norm": 3.0448343753814697, "learning_rate": 9.855099300739463e-06, "loss": 0.12615203857421875, "step": 1025 }, { "epoch": 0.14296662718595415, "grad_norm": 1.319766640663147, "learning_rate": 9.854535492917718e-06, "loss": 0.09950065612792969, "step": 1026 }, { "epoch": 0.14310597087716853, "grad_norm": 1.4163964986801147, "learning_rate": 9.853970606529018e-06, "loss": 0.06732177734375, "step": 1027 }, { "epoch": 0.1432453145683829, "grad_norm": 2.028414011001587, "learning_rate": 9.853404641698866e-06, "loss": 0.11108970642089844, "step": 1028 }, { "epoch": 0.1433846582595973, "grad_norm": 1.3858832120895386, "learning_rate": 9.85283759855301e-06, "loss": 0.12947654724121094, "step": 1029 }, { "epoch": 0.14352400195081166, "grad_norm": 3.095322847366333, "learning_rate": 9.852269477217428e-06, "loss": 0.111572265625, "step": 1030 }, { "epoch": 0.14366334564202607, "grad_norm": 2.884150266647339, "learning_rate": 9.85170027781835e-06, "loss": 0.14571380615234375, "step": 1031 }, { "epoch": 0.14380268933324045, "grad_norm": 2.459848403930664, "learning_rate": 9.851130000482236e-06, "loss": 0.10236740112304688, "step": 1032 }, { "epoch": 0.14394203302445482, "grad_norm": 1.857698678970337, "learning_rate": 9.85055864533579e-06, "loss": 0.11304473876953125, "step": 1033 }, { "epoch": 0.1440813767156692, "grad_norm": 1.9749637842178345, "learning_rate": 9.849986212505952e-06, "loss": 0.13237762451171875, "step": 1034 }, { "epoch": 0.14422072040688358, "grad_norm": 1.093501329421997, "learning_rate": 9.849412702119905e-06, "loss": 0.09447479248046875, "step": 1035 }, { "epoch": 0.14436006409809796, "grad_norm": 1.7946789264678955, "learning_rate": 9.848838114305069e-06, "loss": 0.10604286193847656, "step": 1036 }, { "epoch": 0.14449940778931233, "grad_norm": 2.332737684249878, "learning_rate": 9.848262449189105e-06, "loss": 0.1371307373046875, "step": 1037 }, { "epoch": 0.1446387514805267, "grad_norm": 3.7334272861480713, "learning_rate": 9.847685706899913e-06, "loss": 0.14175987243652344, "step": 1038 }, { "epoch": 0.1447780951717411, "grad_norm": 2.5513691902160645, "learning_rate": 9.84710788756563e-06, "loss": 0.11175155639648438, "step": 1039 }, { "epoch": 0.14491743886295547, "grad_norm": 1.2427335977554321, "learning_rate": 9.846528991314638e-06, "loss": 0.08874130249023438, "step": 1040 }, { "epoch": 0.14505678255416987, "grad_norm": 1.987300157546997, "learning_rate": 9.845949018275551e-06, "loss": 0.11796951293945312, "step": 1041 }, { "epoch": 0.14519612624538425, "grad_norm": 2.1240320205688477, "learning_rate": 9.845367968577229e-06, "loss": 0.1269855499267578, "step": 1042 }, { "epoch": 0.14533546993659863, "grad_norm": 5.146514415740967, "learning_rate": 9.844785842348764e-06, "loss": 0.12022590637207031, "step": 1043 }, { "epoch": 0.145474813627813, "grad_norm": 3.530014991760254, "learning_rate": 9.844202639719492e-06, "loss": 0.17010879516601562, "step": 1044 }, { "epoch": 0.14561415731902738, "grad_norm": 2.0769999027252197, "learning_rate": 9.84361836081899e-06, "loss": 0.12293624877929688, "step": 1045 }, { "epoch": 0.14575350101024176, "grad_norm": 3.7609212398529053, "learning_rate": 9.84303300577707e-06, "loss": 0.12495613098144531, "step": 1046 }, { "epoch": 0.14589284470145614, "grad_norm": 2.452101230621338, "learning_rate": 9.842446574723786e-06, "loss": 0.095794677734375, "step": 1047 }, { "epoch": 0.14603218839267051, "grad_norm": 1.186803936958313, "learning_rate": 9.841859067789425e-06, "loss": 0.12148284912109375, "step": 1048 }, { "epoch": 0.1461715320838849, "grad_norm": 3.0730347633361816, "learning_rate": 9.841270485104522e-06, "loss": 0.1352519989013672, "step": 1049 }, { "epoch": 0.14631087577509927, "grad_norm": 0.9133756756782532, "learning_rate": 9.840680826799845e-06, "loss": 0.07359695434570312, "step": 1050 }, { "epoch": 0.14645021946631367, "grad_norm": 1.7205007076263428, "learning_rate": 9.840090093006403e-06, "loss": 0.11304855346679688, "step": 1051 }, { "epoch": 0.14658956315752805, "grad_norm": 0.9804624915122986, "learning_rate": 9.839498283855444e-06, "loss": 0.11765098571777344, "step": 1052 }, { "epoch": 0.14672890684874243, "grad_norm": 1.3363525867462158, "learning_rate": 9.838905399478453e-06, "loss": 0.09582328796386719, "step": 1053 }, { "epoch": 0.1468682505399568, "grad_norm": 0.8113605380058289, "learning_rate": 9.838311440007159e-06, "loss": 0.08640289306640625, "step": 1054 }, { "epoch": 0.14700759423117118, "grad_norm": 2.3688225746154785, "learning_rate": 9.83771640557352e-06, "loss": 0.10682106018066406, "step": 1055 }, { "epoch": 0.14714693792238556, "grad_norm": 4.134925365447998, "learning_rate": 9.837120296309744e-06, "loss": 0.23162078857421875, "step": 1056 }, { "epoch": 0.14728628161359994, "grad_norm": 0.8805249929428101, "learning_rate": 9.836523112348271e-06, "loss": 0.12408447265625, "step": 1057 }, { "epoch": 0.14742562530481432, "grad_norm": 1.6230392456054688, "learning_rate": 9.835924853821783e-06, "loss": 0.10920906066894531, "step": 1058 }, { "epoch": 0.1475649689960287, "grad_norm": 2.1271767616271973, "learning_rate": 9.8353255208632e-06, "loss": 0.12268829345703125, "step": 1059 }, { "epoch": 0.14770431268724307, "grad_norm": 1.4266064167022705, "learning_rate": 9.834725113605676e-06, "loss": 0.12659072875976562, "step": 1060 }, { "epoch": 0.14784365637845748, "grad_norm": 1.4301948547363281, "learning_rate": 9.83412363218261e-06, "loss": 0.08237075805664062, "step": 1061 }, { "epoch": 0.14798300006967186, "grad_norm": 1.3779383897781372, "learning_rate": 9.833521076727638e-06, "loss": 0.09731864929199219, "step": 1062 }, { "epoch": 0.14812234376088623, "grad_norm": 2.8835601806640625, "learning_rate": 9.832917447374637e-06, "loss": 0.1078948974609375, "step": 1063 }, { "epoch": 0.1482616874521006, "grad_norm": 2.980736255645752, "learning_rate": 9.832312744257715e-06, "loss": 0.10147762298583984, "step": 1064 }, { "epoch": 0.148401031143315, "grad_norm": 2.086592674255371, "learning_rate": 9.831706967511223e-06, "loss": 0.12428092956542969, "step": 1065 }, { "epoch": 0.14854037483452937, "grad_norm": 1.6085100173950195, "learning_rate": 9.831100117269755e-06, "loss": 0.14163780212402344, "step": 1066 }, { "epoch": 0.14867971852574374, "grad_norm": 2.8041043281555176, "learning_rate": 9.830492193668135e-06, "loss": 0.09098434448242188, "step": 1067 }, { "epoch": 0.14881906221695812, "grad_norm": 3.2826614379882812, "learning_rate": 9.829883196841433e-06, "loss": 0.1322307586669922, "step": 1068 }, { "epoch": 0.1489584059081725, "grad_norm": 0.6832737326622009, "learning_rate": 9.829273126924952e-06, "loss": 0.10031318664550781, "step": 1069 }, { "epoch": 0.14909774959938688, "grad_norm": 2.552924156188965, "learning_rate": 9.828661984054238e-06, "loss": 0.11118698120117188, "step": 1070 }, { "epoch": 0.14923709329060128, "grad_norm": 2.159208059310913, "learning_rate": 9.82804976836507e-06, "loss": 0.09140205383300781, "step": 1071 }, { "epoch": 0.14937643698181566, "grad_norm": 1.3345681428909302, "learning_rate": 9.827436479993468e-06, "loss": 0.08770370483398438, "step": 1072 }, { "epoch": 0.14951578067303004, "grad_norm": 1.4990051984786987, "learning_rate": 9.826822119075694e-06, "loss": 0.0811920166015625, "step": 1073 }, { "epoch": 0.1496551243642444, "grad_norm": 1.4960527420043945, "learning_rate": 9.826206685748242e-06, "loss": 0.1261444091796875, "step": 1074 }, { "epoch": 0.1497944680554588, "grad_norm": 1.0012563467025757, "learning_rate": 9.825590180147852e-06, "loss": 0.07767677307128906, "step": 1075 }, { "epoch": 0.14993381174667317, "grad_norm": 0.9669225811958313, "learning_rate": 9.82497260241149e-06, "loss": 0.096588134765625, "step": 1076 }, { "epoch": 0.15007315543788755, "grad_norm": 2.967665672302246, "learning_rate": 9.824353952676375e-06, "loss": 0.12472915649414062, "step": 1077 }, { "epoch": 0.15021249912910192, "grad_norm": 1.266595482826233, "learning_rate": 9.823734231079953e-06, "loss": 0.09095382690429688, "step": 1078 }, { "epoch": 0.1503518428203163, "grad_norm": 2.6708009243011475, "learning_rate": 9.823113437759912e-06, "loss": 0.1365814208984375, "step": 1079 }, { "epoch": 0.15049118651153068, "grad_norm": 1.5288232564926147, "learning_rate": 9.822491572854178e-06, "loss": 0.13714981079101562, "step": 1080 }, { "epoch": 0.15063053020274508, "grad_norm": 0.9788911938667297, "learning_rate": 9.821868636500917e-06, "loss": 0.12567138671875, "step": 1081 }, { "epoch": 0.15076987389395946, "grad_norm": 1.5539700984954834, "learning_rate": 9.82124462883853e-06, "loss": 0.09547042846679688, "step": 1082 }, { "epoch": 0.15090921758517384, "grad_norm": 5.447384834289551, "learning_rate": 9.820619550005656e-06, "loss": 0.14908790588378906, "step": 1083 }, { "epoch": 0.15104856127638822, "grad_norm": 0.9541572332382202, "learning_rate": 9.819993400141176e-06, "loss": 0.10042953491210938, "step": 1084 }, { "epoch": 0.1511879049676026, "grad_norm": 1.7170088291168213, "learning_rate": 9.819366179384204e-06, "loss": 0.11655044555664062, "step": 1085 }, { "epoch": 0.15132724865881697, "grad_norm": 1.2276512384414673, "learning_rate": 9.818737887874097e-06, "loss": 0.10511016845703125, "step": 1086 }, { "epoch": 0.15146659235003135, "grad_norm": 1.3472354412078857, "learning_rate": 9.818108525750442e-06, "loss": 0.10983467102050781, "step": 1087 }, { "epoch": 0.15160593604124573, "grad_norm": 1.5226141214370728, "learning_rate": 9.817478093153074e-06, "loss": 0.0911417007446289, "step": 1088 }, { "epoch": 0.1517452797324601, "grad_norm": 1.157883644104004, "learning_rate": 9.816846590222058e-06, "loss": 0.12926483154296875, "step": 1089 }, { "epoch": 0.15188462342367448, "grad_norm": 1.6668484210968018, "learning_rate": 9.8162140170977e-06, "loss": 0.08575057983398438, "step": 1090 }, { "epoch": 0.15202396711488889, "grad_norm": 2.7954750061035156, "learning_rate": 9.815580373920543e-06, "loss": 0.11421966552734375, "step": 1091 }, { "epoch": 0.15216331080610326, "grad_norm": 2.2789266109466553, "learning_rate": 9.81494566083137e-06, "loss": 0.10859870910644531, "step": 1092 }, { "epoch": 0.15230265449731764, "grad_norm": 2.1020660400390625, "learning_rate": 9.814309877971195e-06, "loss": 0.10151863098144531, "step": 1093 }, { "epoch": 0.15244199818853202, "grad_norm": 0.8721752762794495, "learning_rate": 9.81367302548128e-06, "loss": 0.10521697998046875, "step": 1094 }, { "epoch": 0.1525813418797464, "grad_norm": 1.6129738092422485, "learning_rate": 9.813035103503116e-06, "loss": 0.10347557067871094, "step": 1095 }, { "epoch": 0.15272068557096077, "grad_norm": 4.267656326293945, "learning_rate": 9.812396112178437e-06, "loss": 0.12789535522460938, "step": 1096 }, { "epoch": 0.15286002926217515, "grad_norm": 2.1778080463409424, "learning_rate": 9.811756051649209e-06, "loss": 0.1422595977783203, "step": 1097 }, { "epoch": 0.15299937295338953, "grad_norm": 2.276245594024658, "learning_rate": 9.811114922057642e-06, "loss": 0.12933731079101562, "step": 1098 }, { "epoch": 0.1531387166446039, "grad_norm": 1.418143630027771, "learning_rate": 9.810472723546178e-06, "loss": 0.10715866088867188, "step": 1099 }, { "epoch": 0.15327806033581828, "grad_norm": 2.2123141288757324, "learning_rate": 9.8098294562575e-06, "loss": 0.10009002685546875, "step": 1100 }, { "epoch": 0.1534174040270327, "grad_norm": 1.142788290977478, "learning_rate": 9.809185120334528e-06, "loss": 0.11750221252441406, "step": 1101 }, { "epoch": 0.15355674771824707, "grad_norm": 2.747541904449463, "learning_rate": 9.808539715920415e-06, "loss": 0.12290573120117188, "step": 1102 }, { "epoch": 0.15369609140946144, "grad_norm": 1.227493166923523, "learning_rate": 9.807893243158562e-06, "loss": 0.09665870666503906, "step": 1103 }, { "epoch": 0.15383543510067582, "grad_norm": 1.0132280588150024, "learning_rate": 9.807245702192593e-06, "loss": 0.10650634765625, "step": 1104 }, { "epoch": 0.1539747787918902, "grad_norm": 1.8064380884170532, "learning_rate": 9.80659709316638e-06, "loss": 0.09995841979980469, "step": 1105 }, { "epoch": 0.15411412248310458, "grad_norm": 1.8370006084442139, "learning_rate": 9.805947416224034e-06, "loss": 0.10512161254882812, "step": 1106 }, { "epoch": 0.15425346617431895, "grad_norm": 2.386532783508301, "learning_rate": 9.80529667150989e-06, "loss": 0.1309518814086914, "step": 1107 }, { "epoch": 0.15439280986553333, "grad_norm": 1.0302255153656006, "learning_rate": 9.804644859168534e-06, "loss": 0.07882499694824219, "step": 1108 }, { "epoch": 0.1545321535567477, "grad_norm": 1.8083314895629883, "learning_rate": 9.80399197934478e-06, "loss": 0.12176132202148438, "step": 1109 }, { "epoch": 0.1546714972479621, "grad_norm": 0.8901605606079102, "learning_rate": 9.803338032183686e-06, "loss": 0.08805084228515625, "step": 1110 }, { "epoch": 0.1548108409391765, "grad_norm": 2.8761041164398193, "learning_rate": 9.802683017830544e-06, "loss": 0.10262680053710938, "step": 1111 }, { "epoch": 0.15495018463039087, "grad_norm": 3.2882847785949707, "learning_rate": 9.802026936430883e-06, "loss": 0.10912513732910156, "step": 1112 }, { "epoch": 0.15508952832160525, "grad_norm": 1.2784273624420166, "learning_rate": 9.801369788130468e-06, "loss": 0.11402320861816406, "step": 1113 }, { "epoch": 0.15522887201281962, "grad_norm": 2.7230794429779053, "learning_rate": 9.800711573075303e-06, "loss": 0.11358451843261719, "step": 1114 }, { "epoch": 0.155368215704034, "grad_norm": 2.935987949371338, "learning_rate": 9.80005229141163e-06, "loss": 0.13935279846191406, "step": 1115 }, { "epoch": 0.15550755939524838, "grad_norm": 2.046396017074585, "learning_rate": 9.799391943285923e-06, "loss": 0.11237335205078125, "step": 1116 }, { "epoch": 0.15564690308646276, "grad_norm": 3.476052761077881, "learning_rate": 9.798730528844899e-06, "loss": 0.1320781707763672, "step": 1117 }, { "epoch": 0.15578624677767713, "grad_norm": 1.3277158737182617, "learning_rate": 9.79806804823551e-06, "loss": 0.1110382080078125, "step": 1118 }, { "epoch": 0.1559255904688915, "grad_norm": 1.6073486804962158, "learning_rate": 9.79740450160494e-06, "loss": 0.12734603881835938, "step": 1119 }, { "epoch": 0.1560649341601059, "grad_norm": 0.8410440683364868, "learning_rate": 9.796739889100617e-06, "loss": 0.08813667297363281, "step": 1120 }, { "epoch": 0.1562042778513203, "grad_norm": 3.4864063262939453, "learning_rate": 9.796074210870204e-06, "loss": 0.16645050048828125, "step": 1121 }, { "epoch": 0.15634362154253467, "grad_norm": 1.786096215248108, "learning_rate": 9.795407467061596e-06, "loss": 0.13909530639648438, "step": 1122 }, { "epoch": 0.15648296523374905, "grad_norm": 2.565213680267334, "learning_rate": 9.794739657822929e-06, "loss": 0.11962127685546875, "step": 1123 }, { "epoch": 0.15662230892496343, "grad_norm": 1.3379079103469849, "learning_rate": 9.794070783302576e-06, "loss": 0.10598564147949219, "step": 1124 }, { "epoch": 0.1567616526161778, "grad_norm": 1.1145917177200317, "learning_rate": 9.793400843649146e-06, "loss": 0.09069252014160156, "step": 1125 }, { "epoch": 0.15690099630739218, "grad_norm": 2.666562080383301, "learning_rate": 9.792729839011484e-06, "loss": 0.14253807067871094, "step": 1126 }, { "epoch": 0.15704033999860656, "grad_norm": 2.332292079925537, "learning_rate": 9.792057769538672e-06, "loss": 0.15527915954589844, "step": 1127 }, { "epoch": 0.15717968368982094, "grad_norm": 0.9249873757362366, "learning_rate": 9.791384635380028e-06, "loss": 0.09765434265136719, "step": 1128 }, { "epoch": 0.15731902738103531, "grad_norm": 4.410477638244629, "learning_rate": 9.790710436685105e-06, "loss": 0.118133544921875, "step": 1129 }, { "epoch": 0.1574583710722497, "grad_norm": 3.113246440887451, "learning_rate": 9.790035173603699e-06, "loss": 0.12039566040039062, "step": 1130 }, { "epoch": 0.1575977147634641, "grad_norm": 1.0260038375854492, "learning_rate": 9.789358846285835e-06, "loss": 0.12094688415527344, "step": 1131 }, { "epoch": 0.15773705845467847, "grad_norm": 2.701874017715454, "learning_rate": 9.788681454881778e-06, "loss": 0.16746902465820312, "step": 1132 }, { "epoch": 0.15787640214589285, "grad_norm": 2.769437551498413, "learning_rate": 9.78800299954203e-06, "loss": 0.1392078399658203, "step": 1133 }, { "epoch": 0.15801574583710723, "grad_norm": 2.17820405960083, "learning_rate": 9.787323480417328e-06, "loss": 0.0839691162109375, "step": 1134 }, { "epoch": 0.1581550895283216, "grad_norm": 0.9839970469474792, "learning_rate": 9.786642897658645e-06, "loss": 0.09945297241210938, "step": 1135 }, { "epoch": 0.15829443321953598, "grad_norm": 4.5529913902282715, "learning_rate": 9.78596125141719e-06, "loss": 0.124786376953125, "step": 1136 }, { "epoch": 0.15843377691075036, "grad_norm": 7.3030290603637695, "learning_rate": 9.785278541844409e-06, "loss": 0.17746734619140625, "step": 1137 }, { "epoch": 0.15857312060196474, "grad_norm": 1.85294508934021, "learning_rate": 9.784594769091989e-06, "loss": 0.08985137939453125, "step": 1138 }, { "epoch": 0.15871246429317912, "grad_norm": 2.7220616340637207, "learning_rate": 9.783909933311844e-06, "loss": 0.10800361633300781, "step": 1139 }, { "epoch": 0.1588518079843935, "grad_norm": 6.759482383728027, "learning_rate": 9.78322403465613e-06, "loss": 0.14719772338867188, "step": 1140 }, { "epoch": 0.1589911516756079, "grad_norm": 3.751554489135742, "learning_rate": 9.782537073277238e-06, "loss": 0.13608932495117188, "step": 1141 }, { "epoch": 0.15913049536682228, "grad_norm": 3.185969114303589, "learning_rate": 9.781849049327796e-06, "loss": 0.12254142761230469, "step": 1142 }, { "epoch": 0.15926983905803666, "grad_norm": 2.2381632328033447, "learning_rate": 9.781159962960667e-06, "loss": 0.11780548095703125, "step": 1143 }, { "epoch": 0.15940918274925103, "grad_norm": 4.682390213012695, "learning_rate": 9.78046981432895e-06, "loss": 0.14595413208007812, "step": 1144 }, { "epoch": 0.1595485264404654, "grad_norm": 2.784904956817627, "learning_rate": 9.77977860358598e-06, "loss": 0.1572284698486328, "step": 1145 }, { "epoch": 0.1596878701316798, "grad_norm": 2.8427040576934814, "learning_rate": 9.779086330885328e-06, "loss": 0.13162803649902344, "step": 1146 }, { "epoch": 0.15982721382289417, "grad_norm": 2.4826951026916504, "learning_rate": 9.778392996380803e-06, "loss": 0.11373710632324219, "step": 1147 }, { "epoch": 0.15996655751410854, "grad_norm": 2.5169310569763184, "learning_rate": 9.777698600226446e-06, "loss": 0.09879493713378906, "step": 1148 }, { "epoch": 0.16010590120532292, "grad_norm": 2.125706672668457, "learning_rate": 9.777003142576536e-06, "loss": 0.11262893676757812, "step": 1149 }, { "epoch": 0.1602452448965373, "grad_norm": 1.8545984029769897, "learning_rate": 9.77630662358559e-06, "loss": 0.13683128356933594, "step": 1150 }, { "epoch": 0.1603845885877517, "grad_norm": 1.7176673412322998, "learning_rate": 9.775609043408356e-06, "loss": 0.1165771484375, "step": 1151 }, { "epoch": 0.16052393227896608, "grad_norm": 1.465571641921997, "learning_rate": 9.774910402199821e-06, "loss": 0.08298683166503906, "step": 1152 }, { "epoch": 0.16066327597018046, "grad_norm": 2.9790024757385254, "learning_rate": 9.774210700115209e-06, "loss": 0.11755561828613281, "step": 1153 }, { "epoch": 0.16080261966139484, "grad_norm": 2.3028526306152344, "learning_rate": 9.773509937309978e-06, "loss": 0.14222335815429688, "step": 1154 }, { "epoch": 0.1609419633526092, "grad_norm": 2.2285635471343994, "learning_rate": 9.772808113939819e-06, "loss": 0.14792633056640625, "step": 1155 }, { "epoch": 0.1610813070438236, "grad_norm": 3.2857322692871094, "learning_rate": 9.77210523016066e-06, "loss": 0.09727096557617188, "step": 1156 }, { "epoch": 0.16122065073503797, "grad_norm": 2.80137038230896, "learning_rate": 9.771401286128668e-06, "loss": 0.09875297546386719, "step": 1157 }, { "epoch": 0.16135999442625235, "grad_norm": 2.1204981803894043, "learning_rate": 9.770696282000245e-06, "loss": 0.13214492797851562, "step": 1158 }, { "epoch": 0.16149933811746672, "grad_norm": 1.7803642749786377, "learning_rate": 9.769990217932023e-06, "loss": 0.10540008544921875, "step": 1159 }, { "epoch": 0.1616386818086811, "grad_norm": 2.0871684551239014, "learning_rate": 9.769283094080878e-06, "loss": 0.14534759521484375, "step": 1160 }, { "epoch": 0.1617780254998955, "grad_norm": 3.0407679080963135, "learning_rate": 9.768574910603912e-06, "loss": 0.12723541259765625, "step": 1161 }, { "epoch": 0.16191736919110988, "grad_norm": 0.849249005317688, "learning_rate": 9.767865667658472e-06, "loss": 0.07295036315917969, "step": 1162 }, { "epoch": 0.16205671288232426, "grad_norm": 1.6817564964294434, "learning_rate": 9.76715536540213e-06, "loss": 0.10634613037109375, "step": 1163 }, { "epoch": 0.16219605657353864, "grad_norm": 1.455728530883789, "learning_rate": 9.766444003992704e-06, "loss": 0.10127830505371094, "step": 1164 }, { "epoch": 0.16233540026475302, "grad_norm": 2.9737741947174072, "learning_rate": 9.765731583588237e-06, "loss": 0.11976051330566406, "step": 1165 }, { "epoch": 0.1624747439559674, "grad_norm": 5.381644248962402, "learning_rate": 9.765018104347017e-06, "loss": 0.15997314453125, "step": 1166 }, { "epoch": 0.16261408764718177, "grad_norm": 3.5223376750946045, "learning_rate": 9.764303566427561e-06, "loss": 0.1483154296875, "step": 1167 }, { "epoch": 0.16275343133839615, "grad_norm": 1.588440179824829, "learning_rate": 9.763587969988626e-06, "loss": 0.09045982360839844, "step": 1168 }, { "epoch": 0.16289277502961053, "grad_norm": 6.013360023498535, "learning_rate": 9.762871315189198e-06, "loss": 0.1679821014404297, "step": 1169 }, { "epoch": 0.1630321187208249, "grad_norm": 2.474217653274536, "learning_rate": 9.7621536021885e-06, "loss": 0.1385345458984375, "step": 1170 }, { "epoch": 0.1631714624120393, "grad_norm": 1.3188972473144531, "learning_rate": 9.761434831145995e-06, "loss": 0.08989334106445312, "step": 1171 }, { "epoch": 0.1633108061032537, "grad_norm": 1.4494866132736206, "learning_rate": 9.760715002221375e-06, "loss": 0.10084676742553711, "step": 1172 }, { "epoch": 0.16345014979446806, "grad_norm": 1.5449609756469727, "learning_rate": 9.759994115574571e-06, "loss": 0.0981903076171875, "step": 1173 }, { "epoch": 0.16358949348568244, "grad_norm": 1.5839638710021973, "learning_rate": 9.759272171365746e-06, "loss": 0.12118339538574219, "step": 1174 }, { "epoch": 0.16372883717689682, "grad_norm": 0.665877103805542, "learning_rate": 9.758549169755302e-06, "loss": 0.09360599517822266, "step": 1175 }, { "epoch": 0.1638681808681112, "grad_norm": 1.2593506574630737, "learning_rate": 9.757825110903872e-06, "loss": 0.08160400390625, "step": 1176 }, { "epoch": 0.16400752455932557, "grad_norm": 1.427451491355896, "learning_rate": 9.757099994972323e-06, "loss": 0.11591339111328125, "step": 1177 }, { "epoch": 0.16414686825053995, "grad_norm": 1.6216596364974976, "learning_rate": 9.756373822121762e-06, "loss": 0.09525680541992188, "step": 1178 }, { "epoch": 0.16428621194175433, "grad_norm": 1.7017525434494019, "learning_rate": 9.75564659251353e-06, "loss": 0.14091110229492188, "step": 1179 }, { "epoch": 0.1644255556329687, "grad_norm": 1.9947988986968994, "learning_rate": 9.754918306309197e-06, "loss": 0.10468673706054688, "step": 1180 }, { "epoch": 0.16456489932418308, "grad_norm": 1.5897881984710693, "learning_rate": 9.754188963670573e-06, "loss": 0.07353782653808594, "step": 1181 }, { "epoch": 0.1647042430153975, "grad_norm": 2.485908269882202, "learning_rate": 9.753458564759701e-06, "loss": 0.15148353576660156, "step": 1182 }, { "epoch": 0.16484358670661187, "grad_norm": 1.6469224691390991, "learning_rate": 9.752727109738859e-06, "loss": 0.1200103759765625, "step": 1183 }, { "epoch": 0.16498293039782624, "grad_norm": 0.9365809559822083, "learning_rate": 9.751994598770563e-06, "loss": 0.09227752685546875, "step": 1184 }, { "epoch": 0.16512227408904062, "grad_norm": 3.329281806945801, "learning_rate": 9.751261032017553e-06, "loss": 0.1426239013671875, "step": 1185 }, { "epoch": 0.165261617780255, "grad_norm": 1.2253293991088867, "learning_rate": 9.750526409642818e-06, "loss": 0.10387802124023438, "step": 1186 }, { "epoch": 0.16540096147146938, "grad_norm": 1.4330849647521973, "learning_rate": 9.749790731809568e-06, "loss": 0.09958457946777344, "step": 1187 }, { "epoch": 0.16554030516268375, "grad_norm": 2.122262954711914, "learning_rate": 9.74905399868126e-06, "loss": 0.1428966522216797, "step": 1188 }, { "epoch": 0.16567964885389813, "grad_norm": 1.0563783645629883, "learning_rate": 9.748316210421573e-06, "loss": 0.09887504577636719, "step": 1189 }, { "epoch": 0.1658189925451125, "grad_norm": 0.7059668898582458, "learning_rate": 9.747577367194432e-06, "loss": 0.07669830322265625, "step": 1190 }, { "epoch": 0.1659583362363269, "grad_norm": 1.0435082912445068, "learning_rate": 9.74683746916399e-06, "loss": 0.07513046264648438, "step": 1191 }, { "epoch": 0.1660976799275413, "grad_norm": 2.0167322158813477, "learning_rate": 9.746096516494632e-06, "loss": 0.10745811462402344, "step": 1192 }, { "epoch": 0.16623702361875567, "grad_norm": 1.6695536375045776, "learning_rate": 9.745354509350983e-06, "loss": 0.10975456237792969, "step": 1193 }, { "epoch": 0.16637636730997005, "grad_norm": 3.385521173477173, "learning_rate": 9.744611447897902e-06, "loss": 0.1018524169921875, "step": 1194 }, { "epoch": 0.16651571100118442, "grad_norm": 1.4704012870788574, "learning_rate": 9.743867332300478e-06, "loss": 0.08571434020996094, "step": 1195 }, { "epoch": 0.1666550546923988, "grad_norm": 1.7063229084014893, "learning_rate": 9.743122162724038e-06, "loss": 0.10161399841308594, "step": 1196 }, { "epoch": 0.16679439838361318, "grad_norm": 3.1968727111816406, "learning_rate": 9.742375939334141e-06, "loss": 0.12468910217285156, "step": 1197 }, { "epoch": 0.16693374207482756, "grad_norm": 1.339321255683899, "learning_rate": 9.74162866229658e-06, "loss": 0.0814056396484375, "step": 1198 }, { "epoch": 0.16707308576604193, "grad_norm": 1.0437657833099365, "learning_rate": 9.740880331777383e-06, "loss": 0.09397506713867188, "step": 1199 }, { "epoch": 0.1672124294572563, "grad_norm": 2.311774492263794, "learning_rate": 9.740130947942812e-06, "loss": 0.12254524230957031, "step": 1200 }, { "epoch": 0.1673517731484707, "grad_norm": 2.1113388538360596, "learning_rate": 9.739380510959365e-06, "loss": 0.10673713684082031, "step": 1201 }, { "epoch": 0.1674911168396851, "grad_norm": 2.1443560123443604, "learning_rate": 9.738629020993769e-06, "loss": 0.09248542785644531, "step": 1202 }, { "epoch": 0.16763046053089947, "grad_norm": 2.200929880142212, "learning_rate": 9.737876478212989e-06, "loss": 0.12691307067871094, "step": 1203 }, { "epoch": 0.16776980422211385, "grad_norm": 1.5204575061798096, "learning_rate": 9.737122882784225e-06, "loss": 0.10427474975585938, "step": 1204 }, { "epoch": 0.16790914791332823, "grad_norm": 1.4220317602157593, "learning_rate": 9.736368234874904e-06, "loss": 0.12013816833496094, "step": 1205 }, { "epoch": 0.1680484916045426, "grad_norm": 1.6175323724746704, "learning_rate": 9.735612534652697e-06, "loss": 0.09470748901367188, "step": 1206 }, { "epoch": 0.16818783529575698, "grad_norm": 1.5115338563919067, "learning_rate": 9.734855782285499e-06, "loss": 0.08332443237304688, "step": 1207 }, { "epoch": 0.16832717898697136, "grad_norm": 6.539751052856445, "learning_rate": 9.734097977941446e-06, "loss": 0.1450634002685547, "step": 1208 }, { "epoch": 0.16846652267818574, "grad_norm": 5.548697471618652, "learning_rate": 9.733339121788903e-06, "loss": 0.15528106689453125, "step": 1209 }, { "epoch": 0.16860586636940011, "grad_norm": 3.9648191928863525, "learning_rate": 9.73257921399647e-06, "loss": 0.1359710693359375, "step": 1210 }, { "epoch": 0.1687452100606145, "grad_norm": 1.6940126419067383, "learning_rate": 9.731818254732983e-06, "loss": 0.10462188720703125, "step": 1211 }, { "epoch": 0.1688845537518289, "grad_norm": 1.9332242012023926, "learning_rate": 9.73105624416751e-06, "loss": 0.07991218566894531, "step": 1212 }, { "epoch": 0.16902389744304328, "grad_norm": 1.926384687423706, "learning_rate": 9.73029318246935e-06, "loss": 0.11108207702636719, "step": 1213 }, { "epoch": 0.16916324113425765, "grad_norm": 0.8490850329399109, "learning_rate": 9.72952906980804e-06, "loss": 0.09612274169921875, "step": 1214 }, { "epoch": 0.16930258482547203, "grad_norm": 0.852992594242096, "learning_rate": 9.72876390635335e-06, "loss": 0.1400623321533203, "step": 1215 }, { "epoch": 0.1694419285166864, "grad_norm": 0.9066285490989685, "learning_rate": 9.727997692275275e-06, "loss": 0.09743309020996094, "step": 1216 }, { "epoch": 0.16958127220790079, "grad_norm": 1.3547402620315552, "learning_rate": 9.727230427744058e-06, "loss": 0.14065933227539062, "step": 1217 }, { "epoch": 0.16972061589911516, "grad_norm": 1.4978727102279663, "learning_rate": 9.726462112930165e-06, "loss": 0.13397598266601562, "step": 1218 }, { "epoch": 0.16985995959032954, "grad_norm": 1.1564520597457886, "learning_rate": 9.725692748004295e-06, "loss": 0.0960845947265625, "step": 1219 }, { "epoch": 0.16999930328154392, "grad_norm": 1.8752375841140747, "learning_rate": 9.724922333137385e-06, "loss": 0.11888313293457031, "step": 1220 }, { "epoch": 0.1701386469727583, "grad_norm": 1.8506124019622803, "learning_rate": 9.724150868500607e-06, "loss": 0.1130218505859375, "step": 1221 }, { "epoch": 0.1702779906639727, "grad_norm": 3.336322069168091, "learning_rate": 9.72337835426536e-06, "loss": 0.12274360656738281, "step": 1222 }, { "epoch": 0.17041733435518708, "grad_norm": 2.568013906478882, "learning_rate": 9.722604790603279e-06, "loss": 0.10791492462158203, "step": 1223 }, { "epoch": 0.17055667804640146, "grad_norm": 1.4093509912490845, "learning_rate": 9.721830177686231e-06, "loss": 0.07751083374023438, "step": 1224 }, { "epoch": 0.17069602173761583, "grad_norm": 0.5418991446495056, "learning_rate": 9.72105451568632e-06, "loss": 0.06308364868164062, "step": 1225 }, { "epoch": 0.1708353654288302, "grad_norm": 2.2871878147125244, "learning_rate": 9.720277804775879e-06, "loss": 0.08726787567138672, "step": 1226 }, { "epoch": 0.1709747091200446, "grad_norm": 3.610196590423584, "learning_rate": 9.719500045127475e-06, "loss": 0.11744308471679688, "step": 1227 }, { "epoch": 0.17111405281125897, "grad_norm": 2.440214157104492, "learning_rate": 9.718721236913909e-06, "loss": 0.10986900329589844, "step": 1228 }, { "epoch": 0.17125339650247334, "grad_norm": 2.3477535247802734, "learning_rate": 9.717941380308216e-06, "loss": 0.11744117736816406, "step": 1229 }, { "epoch": 0.17139274019368772, "grad_norm": 1.669662594795227, "learning_rate": 9.717160475483659e-06, "loss": 0.12308120727539062, "step": 1230 }, { "epoch": 0.1715320838849021, "grad_norm": 4.864188194274902, "learning_rate": 9.71637852261374e-06, "loss": 0.15309524536132812, "step": 1231 }, { "epoch": 0.1716714275761165, "grad_norm": 1.5145572423934937, "learning_rate": 9.71559552187219e-06, "loss": 0.08620834350585938, "step": 1232 }, { "epoch": 0.17181077126733088, "grad_norm": 1.6452754735946655, "learning_rate": 9.714811473432973e-06, "loss": 0.11076164245605469, "step": 1233 }, { "epoch": 0.17195011495854526, "grad_norm": 2.101069688796997, "learning_rate": 9.714026377470287e-06, "loss": 0.08495521545410156, "step": 1234 }, { "epoch": 0.17208945864975964, "grad_norm": 1.4711008071899414, "learning_rate": 9.713240234158565e-06, "loss": 0.11873245239257812, "step": 1235 }, { "epoch": 0.172228802340974, "grad_norm": 0.6067689657211304, "learning_rate": 9.712453043672467e-06, "loss": 0.08763885498046875, "step": 1236 }, { "epoch": 0.1723681460321884, "grad_norm": 0.9466434717178345, "learning_rate": 9.71166480618689e-06, "loss": 0.09210014343261719, "step": 1237 }, { "epoch": 0.17250748972340277, "grad_norm": 3.4422974586486816, "learning_rate": 9.71087552187696e-06, "loss": 0.1343994140625, "step": 1238 }, { "epoch": 0.17264683341461715, "grad_norm": 1.9919476509094238, "learning_rate": 9.710085190918044e-06, "loss": 0.090240478515625, "step": 1239 }, { "epoch": 0.17278617710583152, "grad_norm": 2.680283308029175, "learning_rate": 9.70929381348573e-06, "loss": 0.13481903076171875, "step": 1240 }, { "epoch": 0.1729255207970459, "grad_norm": 2.326173782348633, "learning_rate": 9.708501389755846e-06, "loss": 0.09811782836914062, "step": 1241 }, { "epoch": 0.1730648644882603, "grad_norm": 2.223180055618286, "learning_rate": 9.70770791990445e-06, "loss": 0.09767723083496094, "step": 1242 }, { "epoch": 0.17320420817947468, "grad_norm": 1.6016875505447388, "learning_rate": 9.706913404107832e-06, "loss": 0.1343212127685547, "step": 1243 }, { "epoch": 0.17334355187068906, "grad_norm": 2.6146607398986816, "learning_rate": 9.706117842542517e-06, "loss": 0.11590003967285156, "step": 1244 }, { "epoch": 0.17348289556190344, "grad_norm": 3.689570903778076, "learning_rate": 9.70532123538526e-06, "loss": 0.126220703125, "step": 1245 }, { "epoch": 0.17362223925311782, "grad_norm": 1.9936268329620361, "learning_rate": 9.704523582813049e-06, "loss": 0.12146949768066406, "step": 1246 }, { "epoch": 0.1737615829443322, "grad_norm": 3.5481085777282715, "learning_rate": 9.703724885003102e-06, "loss": 0.1312580108642578, "step": 1247 }, { "epoch": 0.17390092663554657, "grad_norm": 1.0315645933151245, "learning_rate": 9.702925142132876e-06, "loss": 0.0900735855102539, "step": 1248 }, { "epoch": 0.17404027032676095, "grad_norm": 0.6787413954734802, "learning_rate": 9.70212435438005e-06, "loss": 0.08465003967285156, "step": 1249 }, { "epoch": 0.17417961401797533, "grad_norm": 1.8038886785507202, "learning_rate": 9.701322521922549e-06, "loss": 0.111083984375, "step": 1250 }, { "epoch": 0.1743189577091897, "grad_norm": 1.4533987045288086, "learning_rate": 9.700519644938513e-06, "loss": 0.17218399047851562, "step": 1251 }, { "epoch": 0.1744583014004041, "grad_norm": 1.3955694437026978, "learning_rate": 9.699715723606327e-06, "loss": 0.1275196075439453, "step": 1252 }, { "epoch": 0.1745976450916185, "grad_norm": 0.7039941549301147, "learning_rate": 9.698910758104603e-06, "loss": 0.07725143432617188, "step": 1253 }, { "epoch": 0.17473698878283286, "grad_norm": 5.695724964141846, "learning_rate": 9.698104748612187e-06, "loss": 0.1910991668701172, "step": 1254 }, { "epoch": 0.17487633247404724, "grad_norm": 1.4545682668685913, "learning_rate": 9.697297695308157e-06, "loss": 0.13803482055664062, "step": 1255 }, { "epoch": 0.17501567616526162, "grad_norm": 1.33333158493042, "learning_rate": 9.696489598371817e-06, "loss": 0.0698089599609375, "step": 1256 }, { "epoch": 0.175155019856476, "grad_norm": 3.493004322052002, "learning_rate": 9.695680457982713e-06, "loss": 0.12169647216796875, "step": 1257 }, { "epoch": 0.17529436354769037, "grad_norm": 1.4062362909317017, "learning_rate": 9.694870274320616e-06, "loss": 0.09786224365234375, "step": 1258 }, { "epoch": 0.17543370723890475, "grad_norm": 2.0289885997772217, "learning_rate": 9.694059047565529e-06, "loss": 0.11410331726074219, "step": 1259 }, { "epoch": 0.17557305093011913, "grad_norm": 1.908785343170166, "learning_rate": 9.69324677789769e-06, "loss": 0.0982666015625, "step": 1260 }, { "epoch": 0.1757123946213335, "grad_norm": 1.015703797340393, "learning_rate": 9.692433465497562e-06, "loss": 0.06354618072509766, "step": 1261 }, { "epoch": 0.1758517383125479, "grad_norm": 2.125791549682617, "learning_rate": 9.69161911054585e-06, "loss": 0.11115264892578125, "step": 1262 }, { "epoch": 0.1759910820037623, "grad_norm": 1.7883414030075073, "learning_rate": 9.690803713223485e-06, "loss": 0.11554527282714844, "step": 1263 }, { "epoch": 0.17613042569497667, "grad_norm": 1.2352783679962158, "learning_rate": 9.689987273711626e-06, "loss": 0.101348876953125, "step": 1264 }, { "epoch": 0.17626976938619104, "grad_norm": 0.9315357804298401, "learning_rate": 9.68916979219167e-06, "loss": 0.10083198547363281, "step": 1265 }, { "epoch": 0.17640911307740542, "grad_norm": 1.5272349119186401, "learning_rate": 9.68835126884524e-06, "loss": 0.08263015747070312, "step": 1266 }, { "epoch": 0.1765484567686198, "grad_norm": 3.9613029956817627, "learning_rate": 9.687531703854196e-06, "loss": 0.12387847900390625, "step": 1267 }, { "epoch": 0.17668780045983418, "grad_norm": 2.341148614883423, "learning_rate": 9.686711097400625e-06, "loss": 0.11022377014160156, "step": 1268 }, { "epoch": 0.17682714415104855, "grad_norm": 1.7165026664733887, "learning_rate": 9.685889449666849e-06, "loss": 0.11401748657226562, "step": 1269 }, { "epoch": 0.17696648784226293, "grad_norm": 4.637948989868164, "learning_rate": 9.685066760835417e-06, "loss": 0.165313720703125, "step": 1270 }, { "epoch": 0.1771058315334773, "grad_norm": 1.2457655668258667, "learning_rate": 9.684243031089113e-06, "loss": 0.1474323272705078, "step": 1271 }, { "epoch": 0.17724517522469171, "grad_norm": 2.266160726547241, "learning_rate": 9.68341826061095e-06, "loss": 0.12483596801757812, "step": 1272 }, { "epoch": 0.1773845189159061, "grad_norm": 3.1803455352783203, "learning_rate": 9.682592449584174e-06, "loss": 0.12599563598632812, "step": 1273 }, { "epoch": 0.17752386260712047, "grad_norm": 3.622314214706421, "learning_rate": 9.68176559819226e-06, "loss": 0.14037322998046875, "step": 1274 }, { "epoch": 0.17766320629833485, "grad_norm": 2.3908071517944336, "learning_rate": 9.680937706618919e-06, "loss": 0.13748931884765625, "step": 1275 }, { "epoch": 0.17780254998954922, "grad_norm": 1.0638519525527954, "learning_rate": 9.680108775048087e-06, "loss": 0.10463333129882812, "step": 1276 }, { "epoch": 0.1779418936807636, "grad_norm": 1.0046831369400024, "learning_rate": 9.679278803663932e-06, "loss": 0.09916496276855469, "step": 1277 }, { "epoch": 0.17808123737197798, "grad_norm": 1.2056066989898682, "learning_rate": 9.678447792650858e-06, "loss": 0.09347152709960938, "step": 1278 }, { "epoch": 0.17822058106319236, "grad_norm": 3.3155946731567383, "learning_rate": 9.677615742193495e-06, "loss": 0.11376571655273438, "step": 1279 }, { "epoch": 0.17835992475440673, "grad_norm": 1.2759042978286743, "learning_rate": 9.676782652476705e-06, "loss": 0.09206390380859375, "step": 1280 }, { "epoch": 0.1784992684456211, "grad_norm": 1.7534143924713135, "learning_rate": 9.675948523685583e-06, "loss": 0.115478515625, "step": 1281 }, { "epoch": 0.17863861213683552, "grad_norm": 2.335188150405884, "learning_rate": 9.675113356005453e-06, "loss": 0.09412193298339844, "step": 1282 }, { "epoch": 0.1787779558280499, "grad_norm": 1.1933821439743042, "learning_rate": 9.674277149621869e-06, "loss": 0.10985469818115234, "step": 1283 }, { "epoch": 0.17891729951926427, "grad_norm": 2.7771480083465576, "learning_rate": 9.673439904720619e-06, "loss": 0.12807846069335938, "step": 1284 }, { "epoch": 0.17905664321047865, "grad_norm": 1.7522506713867188, "learning_rate": 9.672601621487718e-06, "loss": 0.10041046142578125, "step": 1285 }, { "epoch": 0.17919598690169303, "grad_norm": 1.0521231889724731, "learning_rate": 9.671762300109415e-06, "loss": 0.08980178833007812, "step": 1286 }, { "epoch": 0.1793353305929074, "grad_norm": 1.6625428199768066, "learning_rate": 9.670921940772186e-06, "loss": 0.1669902801513672, "step": 1287 }, { "epoch": 0.17947467428412178, "grad_norm": 1.9580973386764526, "learning_rate": 9.670080543662742e-06, "loss": 0.15896224975585938, "step": 1288 }, { "epoch": 0.17961401797533616, "grad_norm": 2.5352187156677246, "learning_rate": 9.669238108968018e-06, "loss": 0.13741302490234375, "step": 1289 }, { "epoch": 0.17975336166655054, "grad_norm": 2.026872396469116, "learning_rate": 9.668394636875188e-06, "loss": 0.11388397216796875, "step": 1290 }, { "epoch": 0.17989270535776491, "grad_norm": 1.9972022771835327, "learning_rate": 9.667550127571653e-06, "loss": 0.11944198608398438, "step": 1291 }, { "epoch": 0.18003204904897932, "grad_norm": 1.6791995763778687, "learning_rate": 9.666704581245041e-06, "loss": 0.1517620086669922, "step": 1292 }, { "epoch": 0.1801713927401937, "grad_norm": 2.501500368118286, "learning_rate": 9.665857998083212e-06, "loss": 0.11721038818359375, "step": 1293 }, { "epoch": 0.18031073643140808, "grad_norm": 2.6686713695526123, "learning_rate": 9.66501037827426e-06, "loss": 0.138946533203125, "step": 1294 }, { "epoch": 0.18045008012262245, "grad_norm": 1.307910442352295, "learning_rate": 9.664161722006506e-06, "loss": 0.08954238891601562, "step": 1295 }, { "epoch": 0.18058942381383683, "grad_norm": 1.7495228052139282, "learning_rate": 9.663312029468504e-06, "loss": 0.12938499450683594, "step": 1296 }, { "epoch": 0.1807287675050512, "grad_norm": 3.1157660484313965, "learning_rate": 9.662461300849031e-06, "loss": 0.08114242553710938, "step": 1297 }, { "epoch": 0.18086811119626559, "grad_norm": 8.345358848571777, "learning_rate": 9.661609536337104e-06, "loss": 0.1558380126953125, "step": 1298 }, { "epoch": 0.18100745488747996, "grad_norm": 2.8201849460601807, "learning_rate": 9.660756736121964e-06, "loss": 0.13051223754882812, "step": 1299 }, { "epoch": 0.18114679857869434, "grad_norm": 1.0688998699188232, "learning_rate": 9.659902900393086e-06, "loss": 0.09604072570800781, "step": 1300 }, { "epoch": 0.18128614226990872, "grad_norm": 2.496065616607666, "learning_rate": 9.659048029340169e-06, "loss": 0.11721611022949219, "step": 1301 }, { "epoch": 0.18142548596112312, "grad_norm": 1.2161725759506226, "learning_rate": 9.658192123153149e-06, "loss": 0.08228302001953125, "step": 1302 }, { "epoch": 0.1815648296523375, "grad_norm": 1.9569470882415771, "learning_rate": 9.657335182022187e-06, "loss": 0.11868858337402344, "step": 1303 }, { "epoch": 0.18170417334355188, "grad_norm": 3.8147132396698, "learning_rate": 9.656477206137675e-06, "loss": 0.13263702392578125, "step": 1304 }, { "epoch": 0.18184351703476626, "grad_norm": 3.618439197540283, "learning_rate": 9.655618195690239e-06, "loss": 0.16974830627441406, "step": 1305 }, { "epoch": 0.18198286072598063, "grad_norm": 0.8329817056655884, "learning_rate": 9.654758150870728e-06, "loss": 0.1153106689453125, "step": 1306 }, { "epoch": 0.182122204417195, "grad_norm": 1.1941519975662231, "learning_rate": 9.653897071870226e-06, "loss": 0.14161300659179688, "step": 1307 }, { "epoch": 0.1822615481084094, "grad_norm": 1.883608102798462, "learning_rate": 9.653034958880045e-06, "loss": 0.10730743408203125, "step": 1308 }, { "epoch": 0.18240089179962377, "grad_norm": 1.2004767656326294, "learning_rate": 9.652171812091728e-06, "loss": 0.11151313781738281, "step": 1309 }, { "epoch": 0.18254023549083814, "grad_norm": 1.2447007894515991, "learning_rate": 9.651307631697044e-06, "loss": 0.11548614501953125, "step": 1310 }, { "epoch": 0.18267957918205252, "grad_norm": 1.8279610872268677, "learning_rate": 9.650442417887995e-06, "loss": 0.09847259521484375, "step": 1311 }, { "epoch": 0.18281892287326693, "grad_norm": 1.5036135911941528, "learning_rate": 9.649576170856814e-06, "loss": 0.08987808227539062, "step": 1312 }, { "epoch": 0.1829582665644813, "grad_norm": 0.9949598908424377, "learning_rate": 9.64870889079596e-06, "loss": 0.09500694274902344, "step": 1313 }, { "epoch": 0.18309761025569568, "grad_norm": 1.0875258445739746, "learning_rate": 9.64784057789812e-06, "loss": 0.08940315246582031, "step": 1314 }, { "epoch": 0.18323695394691006, "grad_norm": 2.163557291030884, "learning_rate": 9.646971232356215e-06, "loss": 0.13629913330078125, "step": 1315 }, { "epoch": 0.18337629763812444, "grad_norm": 1.2826176881790161, "learning_rate": 9.646100854363396e-06, "loss": 0.08991813659667969, "step": 1316 }, { "epoch": 0.1835156413293388, "grad_norm": 5.098117351531982, "learning_rate": 9.64522944411304e-06, "loss": 0.14299392700195312, "step": 1317 }, { "epoch": 0.1836549850205532, "grad_norm": 2.4754631519317627, "learning_rate": 9.644357001798752e-06, "loss": 0.09269905090332031, "step": 1318 }, { "epoch": 0.18379432871176757, "grad_norm": 0.876258134841919, "learning_rate": 9.643483527614372e-06, "loss": 0.09093856811523438, "step": 1319 }, { "epoch": 0.18393367240298195, "grad_norm": 1.0664504766464233, "learning_rate": 9.642609021753964e-06, "loss": 0.10816001892089844, "step": 1320 }, { "epoch": 0.18407301609419632, "grad_norm": 1.0759501457214355, "learning_rate": 9.641733484411823e-06, "loss": 0.09656524658203125, "step": 1321 }, { "epoch": 0.18421235978541073, "grad_norm": 1.9097319841384888, "learning_rate": 9.640856915782477e-06, "loss": 0.13080596923828125, "step": 1322 }, { "epoch": 0.1843517034766251, "grad_norm": 2.0479633808135986, "learning_rate": 9.639979316060675e-06, "loss": 0.11539936065673828, "step": 1323 }, { "epoch": 0.18449104716783948, "grad_norm": 1.9189727306365967, "learning_rate": 9.639100685441403e-06, "loss": 0.09706687927246094, "step": 1324 }, { "epoch": 0.18463039085905386, "grad_norm": 1.2145968675613403, "learning_rate": 9.638221024119869e-06, "loss": 0.09970664978027344, "step": 1325 }, { "epoch": 0.18476973455026824, "grad_norm": 1.5610294342041016, "learning_rate": 9.637340332291518e-06, "loss": 0.11986923217773438, "step": 1326 }, { "epoch": 0.18490907824148262, "grad_norm": 1.1365286111831665, "learning_rate": 9.636458610152015e-06, "loss": 0.1360187530517578, "step": 1327 }, { "epoch": 0.185048421932697, "grad_norm": 1.0469812154769897, "learning_rate": 9.635575857897264e-06, "loss": 0.09530830383300781, "step": 1328 }, { "epoch": 0.18518776562391137, "grad_norm": 0.9786520600318909, "learning_rate": 9.634692075723386e-06, "loss": 0.08376693725585938, "step": 1329 }, { "epoch": 0.18532710931512575, "grad_norm": 1.7480747699737549, "learning_rate": 9.633807263826745e-06, "loss": 0.12212753295898438, "step": 1330 }, { "epoch": 0.18546645300634013, "grad_norm": 1.3640618324279785, "learning_rate": 9.632921422403918e-06, "loss": 0.10878372192382812, "step": 1331 }, { "epoch": 0.18560579669755453, "grad_norm": 1.8868491649627686, "learning_rate": 9.632034551651723e-06, "loss": 0.14792442321777344, "step": 1332 }, { "epoch": 0.1857451403887689, "grad_norm": 1.0794833898544312, "learning_rate": 9.631146651767202e-06, "loss": 0.12482452392578125, "step": 1333 }, { "epoch": 0.1858844840799833, "grad_norm": 1.7542515993118286, "learning_rate": 9.630257722947625e-06, "loss": 0.10247516632080078, "step": 1334 }, { "epoch": 0.18602382777119766, "grad_norm": 0.7921610474586487, "learning_rate": 9.629367765390494e-06, "loss": 0.08763504028320312, "step": 1335 }, { "epoch": 0.18616317146241204, "grad_norm": 0.8966168165206909, "learning_rate": 9.628476779293536e-06, "loss": 0.09291839599609375, "step": 1336 }, { "epoch": 0.18630251515362642, "grad_norm": 0.4647544026374817, "learning_rate": 9.627584764854706e-06, "loss": 0.0633096694946289, "step": 1337 }, { "epoch": 0.1864418588448408, "grad_norm": 1.5471100807189941, "learning_rate": 9.626691722272193e-06, "loss": 0.10872268676757812, "step": 1338 }, { "epoch": 0.18658120253605517, "grad_norm": 1.891131043434143, "learning_rate": 9.625797651744406e-06, "loss": 0.08047866821289062, "step": 1339 }, { "epoch": 0.18672054622726955, "grad_norm": 0.9580826163291931, "learning_rate": 9.62490255346999e-06, "loss": 0.09629440307617188, "step": 1340 }, { "epoch": 0.18685988991848393, "grad_norm": 1.7927536964416504, "learning_rate": 9.624006427647817e-06, "loss": 0.1248016357421875, "step": 1341 }, { "epoch": 0.18699923360969833, "grad_norm": 0.7180280685424805, "learning_rate": 9.623109274476982e-06, "loss": 0.077972412109375, "step": 1342 }, { "epoch": 0.1871385773009127, "grad_norm": 1.0548917055130005, "learning_rate": 9.622211094156812e-06, "loss": 0.10823631286621094, "step": 1343 }, { "epoch": 0.1872779209921271, "grad_norm": 2.1474239826202393, "learning_rate": 9.621311886886866e-06, "loss": 0.1619873046875, "step": 1344 }, { "epoch": 0.18741726468334147, "grad_norm": 1.6757322549819946, "learning_rate": 9.620411652866926e-06, "loss": 0.09937667846679688, "step": 1345 }, { "epoch": 0.18755660837455584, "grad_norm": 1.777125597000122, "learning_rate": 9.619510392297e-06, "loss": 0.1323680877685547, "step": 1346 }, { "epoch": 0.18769595206577022, "grad_norm": 2.5886006355285645, "learning_rate": 9.618608105377331e-06, "loss": 0.13910675048828125, "step": 1347 }, { "epoch": 0.1878352957569846, "grad_norm": 1.6742905378341675, "learning_rate": 9.617704792308387e-06, "loss": 0.14233779907226562, "step": 1348 }, { "epoch": 0.18797463944819898, "grad_norm": 1.8119503259658813, "learning_rate": 9.61680045329086e-06, "loss": 0.09369468688964844, "step": 1349 }, { "epoch": 0.18811398313941335, "grad_norm": 4.31087589263916, "learning_rate": 9.615895088525677e-06, "loss": 0.13628578186035156, "step": 1350 }, { "epoch": 0.18825332683062773, "grad_norm": 2.8484668731689453, "learning_rate": 9.614988698213987e-06, "loss": 0.09499740600585938, "step": 1351 }, { "epoch": 0.18839267052184214, "grad_norm": 2.891897439956665, "learning_rate": 9.614081282557172e-06, "loss": 0.11462211608886719, "step": 1352 }, { "epoch": 0.18853201421305651, "grad_norm": 1.882290244102478, "learning_rate": 9.613172841756835e-06, "loss": 0.12476921081542969, "step": 1353 }, { "epoch": 0.1886713579042709, "grad_norm": 3.8657054901123047, "learning_rate": 9.612263376014815e-06, "loss": 0.16077613830566406, "step": 1354 }, { "epoch": 0.18881070159548527, "grad_norm": 3.360471248626709, "learning_rate": 9.611352885533171e-06, "loss": 0.16945838928222656, "step": 1355 }, { "epoch": 0.18895004528669965, "grad_norm": 0.5970257520675659, "learning_rate": 9.610441370514196e-06, "loss": 0.08349800109863281, "step": 1356 }, { "epoch": 0.18908938897791402, "grad_norm": 1.9394181966781616, "learning_rate": 9.609528831160407e-06, "loss": 0.14981842041015625, "step": 1357 }, { "epoch": 0.1892287326691284, "grad_norm": 2.3817341327667236, "learning_rate": 9.608615267674548e-06, "loss": 0.1614971160888672, "step": 1358 }, { "epoch": 0.18936807636034278, "grad_norm": 2.5062174797058105, "learning_rate": 9.607700680259593e-06, "loss": 0.11038780212402344, "step": 1359 }, { "epoch": 0.18950742005155716, "grad_norm": 1.2357542514801025, "learning_rate": 9.606785069118742e-06, "loss": 0.12851333618164062, "step": 1360 }, { "epoch": 0.18964676374277153, "grad_norm": 1.4444472789764404, "learning_rate": 9.605868434455426e-06, "loss": 0.0983123779296875, "step": 1361 }, { "epoch": 0.18978610743398594, "grad_norm": 2.6366066932678223, "learning_rate": 9.604950776473294e-06, "loss": 0.11525154113769531, "step": 1362 }, { "epoch": 0.18992545112520032, "grad_norm": 1.6925328969955444, "learning_rate": 9.604032095376234e-06, "loss": 0.12773704528808594, "step": 1363 }, { "epoch": 0.1900647948164147, "grad_norm": 1.8649622201919556, "learning_rate": 9.603112391368354e-06, "loss": 0.11888504028320312, "step": 1364 }, { "epoch": 0.19020413850762907, "grad_norm": 1.7700470685958862, "learning_rate": 9.602191664653992e-06, "loss": 0.11918830871582031, "step": 1365 }, { "epoch": 0.19034348219884345, "grad_norm": 1.8901818990707397, "learning_rate": 9.601269915437713e-06, "loss": 0.13022613525390625, "step": 1366 }, { "epoch": 0.19048282589005783, "grad_norm": 0.6050156354904175, "learning_rate": 9.600347143924305e-06, "loss": 0.09169578552246094, "step": 1367 }, { "epoch": 0.1906221695812722, "grad_norm": 4.604312419891357, "learning_rate": 9.599423350318791e-06, "loss": 0.11665725708007812, "step": 1368 }, { "epoch": 0.19076151327248658, "grad_norm": 2.148955821990967, "learning_rate": 9.598498534826414e-06, "loss": 0.09467697143554688, "step": 1369 }, { "epoch": 0.19090085696370096, "grad_norm": 1.9990426301956177, "learning_rate": 9.597572697652649e-06, "loss": 0.10024261474609375, "step": 1370 }, { "epoch": 0.19104020065491534, "grad_norm": 0.7070313692092896, "learning_rate": 9.596645839003196e-06, "loss": 0.06640815734863281, "step": 1371 }, { "epoch": 0.19117954434612974, "grad_norm": 3.447469472885132, "learning_rate": 9.595717959083978e-06, "loss": 0.14351844787597656, "step": 1372 }, { "epoch": 0.19131888803734412, "grad_norm": 1.8832645416259766, "learning_rate": 9.594789058101154e-06, "loss": 0.1552295684814453, "step": 1373 }, { "epoch": 0.1914582317285585, "grad_norm": 3.3646130561828613, "learning_rate": 9.593859136261102e-06, "loss": 0.11854362487792969, "step": 1374 }, { "epoch": 0.19159757541977288, "grad_norm": 1.7308815717697144, "learning_rate": 9.592928193770427e-06, "loss": 0.1199951171875, "step": 1375 }, { "epoch": 0.19173691911098725, "grad_norm": 1.1012989282608032, "learning_rate": 9.591996230835968e-06, "loss": 0.13631248474121094, "step": 1376 }, { "epoch": 0.19187626280220163, "grad_norm": 1.1108665466308594, "learning_rate": 9.591063247664783e-06, "loss": 0.10369491577148438, "step": 1377 }, { "epoch": 0.192015606493416, "grad_norm": 0.8243598341941833, "learning_rate": 9.59012924446416e-06, "loss": 0.0907745361328125, "step": 1378 }, { "epoch": 0.19215495018463039, "grad_norm": 3.496760129928589, "learning_rate": 9.589194221441614e-06, "loss": 0.1157073974609375, "step": 1379 }, { "epoch": 0.19229429387584476, "grad_norm": 1.4757888317108154, "learning_rate": 9.588258178804884e-06, "loss": 0.09687232971191406, "step": 1380 }, { "epoch": 0.19243363756705914, "grad_norm": 1.0841822624206543, "learning_rate": 9.587321116761938e-06, "loss": 0.10299873352050781, "step": 1381 }, { "epoch": 0.19257298125827352, "grad_norm": 3.2083888053894043, "learning_rate": 9.586383035520972e-06, "loss": 0.10457611083984375, "step": 1382 }, { "epoch": 0.19271232494948792, "grad_norm": 2.5106565952301025, "learning_rate": 9.585443935290403e-06, "loss": 0.08735084533691406, "step": 1383 }, { "epoch": 0.1928516686407023, "grad_norm": 1.6564613580703735, "learning_rate": 9.58450381627888e-06, "loss": 0.08207893371582031, "step": 1384 }, { "epoch": 0.19299101233191668, "grad_norm": 2.837794780731201, "learning_rate": 9.583562678695275e-06, "loss": 0.11822700500488281, "step": 1385 }, { "epoch": 0.19313035602313106, "grad_norm": 1.3257235288619995, "learning_rate": 9.582620522748686e-06, "loss": 0.08090782165527344, "step": 1386 }, { "epoch": 0.19326969971434543, "grad_norm": 3.4390902519226074, "learning_rate": 9.58167734864844e-06, "loss": 0.10674667358398438, "step": 1387 }, { "epoch": 0.1934090434055598, "grad_norm": 1.2559077739715576, "learning_rate": 9.58073315660409e-06, "loss": 0.08679771423339844, "step": 1388 }, { "epoch": 0.1935483870967742, "grad_norm": 2.0808236598968506, "learning_rate": 9.579787946825411e-06, "loss": 0.13006591796875, "step": 1389 }, { "epoch": 0.19368773078798857, "grad_norm": 1.4455705881118774, "learning_rate": 9.57884171952241e-06, "loss": 0.10415077209472656, "step": 1390 }, { "epoch": 0.19382707447920294, "grad_norm": 1.4311186075210571, "learning_rate": 9.577894474905314e-06, "loss": 0.11511993408203125, "step": 1391 }, { "epoch": 0.19396641817041732, "grad_norm": 0.9982158541679382, "learning_rate": 9.576946213184583e-06, "loss": 0.10754966735839844, "step": 1392 }, { "epoch": 0.19410576186163173, "grad_norm": 1.6770687103271484, "learning_rate": 9.575996934570896e-06, "loss": 0.11977767944335938, "step": 1393 }, { "epoch": 0.1942451055528461, "grad_norm": 2.006361484527588, "learning_rate": 9.57504663927516e-06, "loss": 0.12200546264648438, "step": 1394 }, { "epoch": 0.19438444924406048, "grad_norm": 0.7012666463851929, "learning_rate": 9.574095327508513e-06, "loss": 0.06995964050292969, "step": 1395 }, { "epoch": 0.19452379293527486, "grad_norm": 3.397796154022217, "learning_rate": 9.573142999482313e-06, "loss": 0.12151336669921875, "step": 1396 }, { "epoch": 0.19466313662648924, "grad_norm": 1.1963391304016113, "learning_rate": 9.572189655408144e-06, "loss": 0.10277175903320312, "step": 1397 }, { "epoch": 0.1948024803177036, "grad_norm": 2.591247081756592, "learning_rate": 9.571235295497818e-06, "loss": 0.12264251708984375, "step": 1398 }, { "epoch": 0.194941824008918, "grad_norm": 2.0836021900177, "learning_rate": 9.570279919963373e-06, "loss": 0.11511802673339844, "step": 1399 }, { "epoch": 0.19508116770013237, "grad_norm": 2.00787091255188, "learning_rate": 9.569323529017071e-06, "loss": 0.10000801086425781, "step": 1400 }, { "epoch": 0.19522051139134675, "grad_norm": 3.296086311340332, "learning_rate": 9.568366122871399e-06, "loss": 0.1624889373779297, "step": 1401 }, { "epoch": 0.19535985508256112, "grad_norm": 2.317185640335083, "learning_rate": 9.567407701739075e-06, "loss": 0.10927772521972656, "step": 1402 }, { "epoch": 0.19549919877377553, "grad_norm": 1.4552067518234253, "learning_rate": 9.566448265833034e-06, "loss": 0.10081100463867188, "step": 1403 }, { "epoch": 0.1956385424649899, "grad_norm": 2.1655433177948, "learning_rate": 9.56548781536644e-06, "loss": 0.15071868896484375, "step": 1404 }, { "epoch": 0.19577788615620428, "grad_norm": 1.503403902053833, "learning_rate": 9.564526350552689e-06, "loss": 0.09784889221191406, "step": 1405 }, { "epoch": 0.19591722984741866, "grad_norm": 0.737284779548645, "learning_rate": 9.56356387160539e-06, "loss": 0.07886695861816406, "step": 1406 }, { "epoch": 0.19605657353863304, "grad_norm": 1.344818353652954, "learning_rate": 9.562600378738389e-06, "loss": 0.11109447479248047, "step": 1407 }, { "epoch": 0.19619591722984742, "grad_norm": 1.6051379442214966, "learning_rate": 9.561635872165747e-06, "loss": 0.09342575073242188, "step": 1408 }, { "epoch": 0.1963352609210618, "grad_norm": 1.88807213306427, "learning_rate": 9.56067035210176e-06, "loss": 0.11314201354980469, "step": 1409 }, { "epoch": 0.19647460461227617, "grad_norm": 1.6019105911254883, "learning_rate": 9.559703818760943e-06, "loss": 0.1491241455078125, "step": 1410 }, { "epoch": 0.19661394830349055, "grad_norm": 1.0424236059188843, "learning_rate": 9.558736272358036e-06, "loss": 0.10351943969726562, "step": 1411 }, { "epoch": 0.19675329199470493, "grad_norm": 1.8281315565109253, "learning_rate": 9.557767713108009e-06, "loss": 0.18277740478515625, "step": 1412 }, { "epoch": 0.19689263568591933, "grad_norm": 0.707242488861084, "learning_rate": 9.55679814122605e-06, "loss": 0.08185482025146484, "step": 1413 }, { "epoch": 0.1970319793771337, "grad_norm": 2.2460665702819824, "learning_rate": 9.555827556927578e-06, "loss": 0.09493160247802734, "step": 1414 }, { "epoch": 0.1971713230683481, "grad_norm": 0.554520308971405, "learning_rate": 9.554855960428234e-06, "loss": 0.0679473876953125, "step": 1415 }, { "epoch": 0.19731066675956246, "grad_norm": 0.6277605891227722, "learning_rate": 9.553883351943882e-06, "loss": 0.07268905639648438, "step": 1416 }, { "epoch": 0.19745001045077684, "grad_norm": 1.3070094585418701, "learning_rate": 9.55290973169062e-06, "loss": 0.142547607421875, "step": 1417 }, { "epoch": 0.19758935414199122, "grad_norm": 1.040483832359314, "learning_rate": 9.55193509988476e-06, "loss": 0.10680580139160156, "step": 1418 }, { "epoch": 0.1977286978332056, "grad_norm": 1.0522947311401367, "learning_rate": 9.55095945674284e-06, "loss": 0.09882164001464844, "step": 1419 }, { "epoch": 0.19786804152441997, "grad_norm": 2.2713749408721924, "learning_rate": 9.549982802481632e-06, "loss": 0.10624980926513672, "step": 1420 }, { "epoch": 0.19800738521563435, "grad_norm": 1.769079327583313, "learning_rate": 9.549005137318122e-06, "loss": 0.10416030883789062, "step": 1421 }, { "epoch": 0.19814672890684873, "grad_norm": 1.3953663110733032, "learning_rate": 9.548026461469527e-06, "loss": 0.11916351318359375, "step": 1422 }, { "epoch": 0.19828607259806313, "grad_norm": 0.7818230390548706, "learning_rate": 9.547046775153285e-06, "loss": 0.09327888488769531, "step": 1423 }, { "epoch": 0.1984254162892775, "grad_norm": 1.3055779933929443, "learning_rate": 9.54606607858706e-06, "loss": 0.1399211883544922, "step": 1424 }, { "epoch": 0.1985647599804919, "grad_norm": 2.4786288738250732, "learning_rate": 9.545084371988743e-06, "loss": 0.16811370849609375, "step": 1425 }, { "epoch": 0.19870410367170627, "grad_norm": 1.8453129529953003, "learning_rate": 9.54410165557644e-06, "loss": 0.1272754669189453, "step": 1426 }, { "epoch": 0.19884344736292064, "grad_norm": 0.683563768863678, "learning_rate": 9.543117929568497e-06, "loss": 0.07044410705566406, "step": 1427 }, { "epoch": 0.19898279105413502, "grad_norm": 0.9225583672523499, "learning_rate": 9.542133194183469e-06, "loss": 0.09586334228515625, "step": 1428 }, { "epoch": 0.1991221347453494, "grad_norm": 1.6477631330490112, "learning_rate": 9.541147449640145e-06, "loss": 0.10464096069335938, "step": 1429 }, { "epoch": 0.19926147843656378, "grad_norm": 1.9870879650115967, "learning_rate": 9.540160696157532e-06, "loss": 0.1322193145751953, "step": 1430 }, { "epoch": 0.19940082212777815, "grad_norm": 1.4221158027648926, "learning_rate": 9.539172933954867e-06, "loss": 0.08999919891357422, "step": 1431 }, { "epoch": 0.19954016581899253, "grad_norm": 1.8590315580368042, "learning_rate": 9.538184163251608e-06, "loss": 0.12167167663574219, "step": 1432 }, { "epoch": 0.19967950951020694, "grad_norm": 1.209007978439331, "learning_rate": 9.537194384267436e-06, "loss": 0.14074325561523438, "step": 1433 }, { "epoch": 0.19981885320142131, "grad_norm": 1.6991674900054932, "learning_rate": 9.536203597222259e-06, "loss": 0.11177444458007812, "step": 1434 }, { "epoch": 0.1999581968926357, "grad_norm": 1.379069209098816, "learning_rate": 9.535211802336204e-06, "loss": 0.08930397033691406, "step": 1435 }, { "epoch": 0.20009754058385007, "grad_norm": 1.774138331413269, "learning_rate": 9.534218999829627e-06, "loss": 0.11231231689453125, "step": 1436 }, { "epoch": 0.20023688427506445, "grad_norm": 0.982581615447998, "learning_rate": 9.533225189923107e-06, "loss": 0.09053421020507812, "step": 1437 }, { "epoch": 0.20037622796627882, "grad_norm": 2.2674050331115723, "learning_rate": 9.532230372837446e-06, "loss": 0.10453414916992188, "step": 1438 }, { "epoch": 0.2005155716574932, "grad_norm": 2.5735342502593994, "learning_rate": 9.531234548793667e-06, "loss": 0.158538818359375, "step": 1439 }, { "epoch": 0.20065491534870758, "grad_norm": 1.088998794555664, "learning_rate": 9.530237718013023e-06, "loss": 0.09945297241210938, "step": 1440 }, { "epoch": 0.20079425903992196, "grad_norm": 2.047879934310913, "learning_rate": 9.529239880716983e-06, "loss": 0.09705543518066406, "step": 1441 }, { "epoch": 0.20093360273113633, "grad_norm": 1.537980556488037, "learning_rate": 9.528241037127247e-06, "loss": 0.12973976135253906, "step": 1442 }, { "epoch": 0.20107294642235074, "grad_norm": 1.1905215978622437, "learning_rate": 9.527241187465735e-06, "loss": 0.08292007446289062, "step": 1443 }, { "epoch": 0.20121229011356512, "grad_norm": 3.027613639831543, "learning_rate": 9.526240331954589e-06, "loss": 0.14481353759765625, "step": 1444 }, { "epoch": 0.2013516338047795, "grad_norm": 1.6377190351486206, "learning_rate": 9.525238470816176e-06, "loss": 0.11445999145507812, "step": 1445 }, { "epoch": 0.20149097749599387, "grad_norm": 1.7620259523391724, "learning_rate": 9.524235604273088e-06, "loss": 0.11924934387207031, "step": 1446 }, { "epoch": 0.20163032118720825, "grad_norm": 0.8290073871612549, "learning_rate": 9.523231732548139e-06, "loss": 0.07655906677246094, "step": 1447 }, { "epoch": 0.20176966487842263, "grad_norm": 0.5263558626174927, "learning_rate": 9.522226855864366e-06, "loss": 0.07246589660644531, "step": 1448 }, { "epoch": 0.201909008569637, "grad_norm": 0.8088838458061218, "learning_rate": 9.521220974445032e-06, "loss": 0.07196235656738281, "step": 1449 }, { "epoch": 0.20204835226085138, "grad_norm": 1.1444482803344727, "learning_rate": 9.520214088513616e-06, "loss": 0.09911537170410156, "step": 1450 }, { "epoch": 0.20218769595206576, "grad_norm": 1.8983176946640015, "learning_rate": 9.519206198293828e-06, "loss": 0.11034584045410156, "step": 1451 }, { "epoch": 0.20232703964328014, "grad_norm": 1.262357234954834, "learning_rate": 9.5181973040096e-06, "loss": 0.09925651550292969, "step": 1452 }, { "epoch": 0.20246638333449454, "grad_norm": 1.3740696907043457, "learning_rate": 9.517187405885082e-06, "loss": 0.13965415954589844, "step": 1453 }, { "epoch": 0.20260572702570892, "grad_norm": 1.4891890287399292, "learning_rate": 9.516176504144652e-06, "loss": 0.10027503967285156, "step": 1454 }, { "epoch": 0.2027450707169233, "grad_norm": 2.416921854019165, "learning_rate": 9.515164599012908e-06, "loss": 0.08343124389648438, "step": 1455 }, { "epoch": 0.20288441440813768, "grad_norm": 1.095812201499939, "learning_rate": 9.514151690714672e-06, "loss": 0.102386474609375, "step": 1456 }, { "epoch": 0.20302375809935205, "grad_norm": 0.596342146396637, "learning_rate": 9.513137779474992e-06, "loss": 0.06286048889160156, "step": 1457 }, { "epoch": 0.20316310179056643, "grad_norm": 2.1044206619262695, "learning_rate": 9.512122865519135e-06, "loss": 0.1280984878540039, "step": 1458 }, { "epoch": 0.2033024454817808, "grad_norm": 1.5854228734970093, "learning_rate": 9.511106949072588e-06, "loss": 0.08957672119140625, "step": 1459 }, { "epoch": 0.20344178917299519, "grad_norm": 0.7488170266151428, "learning_rate": 9.51009003036107e-06, "loss": 0.07001256942749023, "step": 1460 }, { "epoch": 0.20358113286420956, "grad_norm": 3.304750919342041, "learning_rate": 9.509072109610514e-06, "loss": 0.10292625427246094, "step": 1461 }, { "epoch": 0.20372047655542394, "grad_norm": 0.9669604301452637, "learning_rate": 9.508053187047077e-06, "loss": 0.07283401489257812, "step": 1462 }, { "epoch": 0.20385982024663835, "grad_norm": 0.9503243565559387, "learning_rate": 9.507033262897142e-06, "loss": 0.08637237548828125, "step": 1463 }, { "epoch": 0.20399916393785272, "grad_norm": 0.471229612827301, "learning_rate": 9.506012337387315e-06, "loss": 0.07016563415527344, "step": 1464 }, { "epoch": 0.2041385076290671, "grad_norm": 1.2802735567092896, "learning_rate": 9.504990410744422e-06, "loss": 0.1190032958984375, "step": 1465 }, { "epoch": 0.20427785132028148, "grad_norm": 3.760833501815796, "learning_rate": 9.503967483195509e-06, "loss": 0.1507434844970703, "step": 1466 }, { "epoch": 0.20441719501149586, "grad_norm": 1.4359376430511475, "learning_rate": 9.502943554967848e-06, "loss": 0.09305000305175781, "step": 1467 }, { "epoch": 0.20455653870271023, "grad_norm": 1.07479989528656, "learning_rate": 9.501918626288935e-06, "loss": 0.10387611389160156, "step": 1468 }, { "epoch": 0.2046958823939246, "grad_norm": 2.4094297885894775, "learning_rate": 9.500892697386482e-06, "loss": 0.11750411987304688, "step": 1469 }, { "epoch": 0.204835226085139, "grad_norm": 1.6923578977584839, "learning_rate": 9.499865768488429e-06, "loss": 0.125213623046875, "step": 1470 }, { "epoch": 0.20497456977635337, "grad_norm": 2.6429877281188965, "learning_rate": 9.498837839822936e-06, "loss": 0.11548805236816406, "step": 1471 }, { "epoch": 0.20511391346756774, "grad_norm": 1.4507546424865723, "learning_rate": 9.497808911618385e-06, "loss": 0.13278961181640625, "step": 1472 }, { "epoch": 0.20525325715878215, "grad_norm": 1.227297067642212, "learning_rate": 9.496778984103381e-06, "loss": 0.12062454223632812, "step": 1473 }, { "epoch": 0.20539260084999653, "grad_norm": 1.3206292390823364, "learning_rate": 9.49574805750675e-06, "loss": 0.12315082550048828, "step": 1474 }, { "epoch": 0.2055319445412109, "grad_norm": 1.9812729358673096, "learning_rate": 9.49471613205754e-06, "loss": 0.11132431030273438, "step": 1475 }, { "epoch": 0.20567128823242528, "grad_norm": 2.352987289428711, "learning_rate": 9.493683207985022e-06, "loss": 0.1348114013671875, "step": 1476 }, { "epoch": 0.20581063192363966, "grad_norm": 1.8943636417388916, "learning_rate": 9.492649285518688e-06, "loss": 0.12561416625976562, "step": 1477 }, { "epoch": 0.20594997561485404, "grad_norm": 0.5734724402427673, "learning_rate": 9.49161436488825e-06, "loss": 0.07415008544921875, "step": 1478 }, { "epoch": 0.2060893193060684, "grad_norm": 1.4478847980499268, "learning_rate": 9.490578446323646e-06, "loss": 0.11959075927734375, "step": 1479 }, { "epoch": 0.2062286629972828, "grad_norm": 1.8126739263534546, "learning_rate": 9.489541530055034e-06, "loss": 0.10828590393066406, "step": 1480 }, { "epoch": 0.20636800668849717, "grad_norm": 1.4169654846191406, "learning_rate": 9.488503616312793e-06, "loss": 0.1402130126953125, "step": 1481 }, { "epoch": 0.20650735037971155, "grad_norm": 2.879258155822754, "learning_rate": 9.48746470532752e-06, "loss": 0.11876678466796875, "step": 1482 }, { "epoch": 0.20664669407092595, "grad_norm": 0.9008368849754333, "learning_rate": 9.48642479733004e-06, "loss": 0.09136772155761719, "step": 1483 }, { "epoch": 0.20678603776214033, "grad_norm": 2.370961904525757, "learning_rate": 9.4853838925514e-06, "loss": 0.11390113830566406, "step": 1484 }, { "epoch": 0.2069253814533547, "grad_norm": 1.2230815887451172, "learning_rate": 9.484341991222858e-06, "loss": 0.11555862426757812, "step": 1485 }, { "epoch": 0.20706472514456908, "grad_norm": 1.0500283241271973, "learning_rate": 9.483299093575909e-06, "loss": 0.09124183654785156, "step": 1486 }, { "epoch": 0.20720406883578346, "grad_norm": 2.1586074829101562, "learning_rate": 9.482255199842254e-06, "loss": 0.09299087524414062, "step": 1487 }, { "epoch": 0.20734341252699784, "grad_norm": 3.072868585586548, "learning_rate": 9.481210310253826e-06, "loss": 0.11739921569824219, "step": 1488 }, { "epoch": 0.20748275621821222, "grad_norm": 3.467500925064087, "learning_rate": 9.480164425042775e-06, "loss": 0.09548759460449219, "step": 1489 }, { "epoch": 0.2076220999094266, "grad_norm": 0.7984983325004578, "learning_rate": 9.479117544441472e-06, "loss": 0.06946563720703125, "step": 1490 }, { "epoch": 0.20776144360064097, "grad_norm": 1.5205286741256714, "learning_rate": 9.47806966868251e-06, "loss": 0.09842681884765625, "step": 1491 }, { "epoch": 0.20790078729185535, "grad_norm": 0.7293893694877625, "learning_rate": 9.477020797998707e-06, "loss": 0.07752418518066406, "step": 1492 }, { "epoch": 0.20804013098306975, "grad_norm": 0.6480579376220703, "learning_rate": 9.47597093262309e-06, "loss": 0.06247520446777344, "step": 1493 }, { "epoch": 0.20817947467428413, "grad_norm": 2.3479998111724854, "learning_rate": 9.474920072788925e-06, "loss": 0.1362762451171875, "step": 1494 }, { "epoch": 0.2083188183654985, "grad_norm": 0.9666648507118225, "learning_rate": 9.47386821872968e-06, "loss": 0.07034111022949219, "step": 1495 }, { "epoch": 0.2084581620567129, "grad_norm": 1.200432300567627, "learning_rate": 9.47281537067906e-06, "loss": 0.09501266479492188, "step": 1496 }, { "epoch": 0.20859750574792726, "grad_norm": 1.1498953104019165, "learning_rate": 9.471761528870978e-06, "loss": 0.10678482055664062, "step": 1497 }, { "epoch": 0.20873684943914164, "grad_norm": 0.5114059448242188, "learning_rate": 9.470706693539578e-06, "loss": 0.08069801330566406, "step": 1498 }, { "epoch": 0.20887619313035602, "grad_norm": 2.1306543350219727, "learning_rate": 9.469650864919217e-06, "loss": 0.11215782165527344, "step": 1499 }, { "epoch": 0.2090155368215704, "grad_norm": 1.554348349571228, "learning_rate": 9.46859404324448e-06, "loss": 0.12381744384765625, "step": 1500 }, { "epoch": 0.20915488051278477, "grad_norm": 1.4245953559875488, "learning_rate": 9.467536228750166e-06, "loss": 0.09197044372558594, "step": 1501 }, { "epoch": 0.20929422420399915, "grad_norm": 2.1319661140441895, "learning_rate": 9.466477421671296e-06, "loss": 0.10001754760742188, "step": 1502 }, { "epoch": 0.20943356789521356, "grad_norm": 1.6943762302398682, "learning_rate": 9.465417622243116e-06, "loss": 0.09728240966796875, "step": 1503 }, { "epoch": 0.20957291158642793, "grad_norm": 1.1123851537704468, "learning_rate": 9.464356830701086e-06, "loss": 0.09623527526855469, "step": 1504 }, { "epoch": 0.2097122552776423, "grad_norm": 1.8862817287445068, "learning_rate": 9.463295047280892e-06, "loss": 0.1253223419189453, "step": 1505 }, { "epoch": 0.2098515989688567, "grad_norm": 1.6294704675674438, "learning_rate": 9.462232272218437e-06, "loss": 0.11099624633789062, "step": 1506 }, { "epoch": 0.20999094266007107, "grad_norm": 2.213303327560425, "learning_rate": 9.461168505749847e-06, "loss": 0.1388683319091797, "step": 1507 }, { "epoch": 0.21013028635128544, "grad_norm": 0.9333174824714661, "learning_rate": 9.460103748111462e-06, "loss": 0.09784317016601562, "step": 1508 }, { "epoch": 0.21026963004249982, "grad_norm": 1.926177740097046, "learning_rate": 9.459037999539852e-06, "loss": 0.10472679138183594, "step": 1509 }, { "epoch": 0.2104089737337142, "grad_norm": 2.2276482582092285, "learning_rate": 9.4579712602718e-06, "loss": 0.16356658935546875, "step": 1510 }, { "epoch": 0.21054831742492858, "grad_norm": 1.4267802238464355, "learning_rate": 9.456903530544312e-06, "loss": 0.09262657165527344, "step": 1511 }, { "epoch": 0.21068766111614295, "grad_norm": 3.397228479385376, "learning_rate": 9.455834810594611e-06, "loss": 0.13512229919433594, "step": 1512 }, { "epoch": 0.21082700480735736, "grad_norm": 1.1855980157852173, "learning_rate": 9.454765100660144e-06, "loss": 0.08651542663574219, "step": 1513 }, { "epoch": 0.21096634849857174, "grad_norm": 0.7697134017944336, "learning_rate": 9.453694400978576e-06, "loss": 0.07770919799804688, "step": 1514 }, { "epoch": 0.21110569218978611, "grad_norm": 1.9885512590408325, "learning_rate": 9.452622711787793e-06, "loss": 0.09715461730957031, "step": 1515 }, { "epoch": 0.2112450358810005, "grad_norm": 1.0634506940841675, "learning_rate": 9.451550033325896e-06, "loss": 0.14888381958007812, "step": 1516 }, { "epoch": 0.21138437957221487, "grad_norm": 1.4431729316711426, "learning_rate": 9.450476365831214e-06, "loss": 0.178314208984375, "step": 1517 }, { "epoch": 0.21152372326342925, "grad_norm": 0.9164255857467651, "learning_rate": 9.449401709542289e-06, "loss": 0.09616470336914062, "step": 1518 }, { "epoch": 0.21166306695464362, "grad_norm": 1.2991732358932495, "learning_rate": 9.448326064697886e-06, "loss": 0.09123039245605469, "step": 1519 }, { "epoch": 0.211802410645858, "grad_norm": 0.39290276169776917, "learning_rate": 9.447249431536987e-06, "loss": 0.06719589233398438, "step": 1520 }, { "epoch": 0.21194175433707238, "grad_norm": 2.004169464111328, "learning_rate": 9.446171810298799e-06, "loss": 0.12768173217773438, "step": 1521 }, { "epoch": 0.21208109802828676, "grad_norm": 1.9128050804138184, "learning_rate": 9.44509320122274e-06, "loss": 0.10712814331054688, "step": 1522 }, { "epoch": 0.21222044171950116, "grad_norm": 1.3872367143630981, "learning_rate": 9.444013604548457e-06, "loss": 0.09460067749023438, "step": 1523 }, { "epoch": 0.21235978541071554, "grad_norm": 1.18509840965271, "learning_rate": 9.442933020515808e-06, "loss": 0.07114601135253906, "step": 1524 }, { "epoch": 0.21249912910192992, "grad_norm": 3.849945545196533, "learning_rate": 9.441851449364878e-06, "loss": 0.1353473663330078, "step": 1525 }, { "epoch": 0.2126384727931443, "grad_norm": 3.1182773113250732, "learning_rate": 9.440768891335962e-06, "loss": 0.09999465942382812, "step": 1526 }, { "epoch": 0.21277781648435867, "grad_norm": 1.4514895677566528, "learning_rate": 9.439685346669585e-06, "loss": 0.136383056640625, "step": 1527 }, { "epoch": 0.21291716017557305, "grad_norm": 1.5943679809570312, "learning_rate": 9.438600815606483e-06, "loss": 0.09419441223144531, "step": 1528 }, { "epoch": 0.21305650386678743, "grad_norm": 1.7391244173049927, "learning_rate": 9.437515298387617e-06, "loss": 0.08986282348632812, "step": 1529 }, { "epoch": 0.2131958475580018, "grad_norm": 1.3553844690322876, "learning_rate": 9.436428795254159e-06, "loss": 0.1248779296875, "step": 1530 }, { "epoch": 0.21333519124921618, "grad_norm": 0.7851797342300415, "learning_rate": 9.43534130644751e-06, "loss": 0.0820465087890625, "step": 1531 }, { "epoch": 0.21347453494043056, "grad_norm": 4.675093650817871, "learning_rate": 9.43425283220928e-06, "loss": 0.13877105712890625, "step": 1532 }, { "epoch": 0.21361387863164497, "grad_norm": 2.8102266788482666, "learning_rate": 9.43316337278131e-06, "loss": 0.11788702011108398, "step": 1533 }, { "epoch": 0.21375322232285934, "grad_norm": 1.3728299140930176, "learning_rate": 9.432072928405648e-06, "loss": 0.08423995971679688, "step": 1534 }, { "epoch": 0.21389256601407372, "grad_norm": 0.8776077032089233, "learning_rate": 9.430981499324567e-06, "loss": 0.09377479553222656, "step": 1535 }, { "epoch": 0.2140319097052881, "grad_norm": 1.3201202154159546, "learning_rate": 9.429889085780559e-06, "loss": 0.08826255798339844, "step": 1536 }, { "epoch": 0.21417125339650248, "grad_norm": 1.853030800819397, "learning_rate": 9.42879568801633e-06, "loss": 0.08786773681640625, "step": 1537 }, { "epoch": 0.21431059708771685, "grad_norm": 1.4265352487564087, "learning_rate": 9.427701306274812e-06, "loss": 0.1115264892578125, "step": 1538 }, { "epoch": 0.21444994077893123, "grad_norm": 1.9248921871185303, "learning_rate": 9.42660594079915e-06, "loss": 0.09314537048339844, "step": 1539 }, { "epoch": 0.2145892844701456, "grad_norm": 2.7239716053009033, "learning_rate": 9.42550959183271e-06, "loss": 0.08787918090820312, "step": 1540 }, { "epoch": 0.21472862816135999, "grad_norm": 1.6094738245010376, "learning_rate": 9.424412259619073e-06, "loss": 0.10169410705566406, "step": 1541 }, { "epoch": 0.21486797185257436, "grad_norm": 1.3145079612731934, "learning_rate": 9.423313944402043e-06, "loss": 0.09901237487792969, "step": 1542 }, { "epoch": 0.21500731554378877, "grad_norm": 2.267019748687744, "learning_rate": 9.422214646425641e-06, "loss": 0.10782241821289062, "step": 1543 }, { "epoch": 0.21514665923500315, "grad_norm": 1.4592335224151611, "learning_rate": 9.421114365934105e-06, "loss": 0.1319904327392578, "step": 1544 }, { "epoch": 0.21528600292621752, "grad_norm": 0.7489651441574097, "learning_rate": 9.420013103171893e-06, "loss": 0.07942771911621094, "step": 1545 }, { "epoch": 0.2154253466174319, "grad_norm": 2.83022403717041, "learning_rate": 9.418910858383681e-06, "loss": 0.1256561279296875, "step": 1546 }, { "epoch": 0.21556469030864628, "grad_norm": 0.5593822002410889, "learning_rate": 9.41780763181436e-06, "loss": 0.07887458801269531, "step": 1547 }, { "epoch": 0.21570403399986066, "grad_norm": 1.3192391395568848, "learning_rate": 9.416703423709044e-06, "loss": 0.11138725280761719, "step": 1548 }, { "epoch": 0.21584337769107503, "grad_norm": 0.5455822348594666, "learning_rate": 9.415598234313064e-06, "loss": 0.07921981811523438, "step": 1549 }, { "epoch": 0.2159827213822894, "grad_norm": 1.4986172914505005, "learning_rate": 9.414492063871964e-06, "loss": 0.12051010131835938, "step": 1550 }, { "epoch": 0.2161220650735038, "grad_norm": 1.3851656913757324, "learning_rate": 9.413384912631512e-06, "loss": 0.11636924743652344, "step": 1551 }, { "epoch": 0.21626140876471817, "grad_norm": 1.443562626838684, "learning_rate": 9.412276780837692e-06, "loss": 0.14691162109375, "step": 1552 }, { "epoch": 0.21640075245593257, "grad_norm": 0.7025742530822754, "learning_rate": 9.411167668736707e-06, "loss": 0.08977890014648438, "step": 1553 }, { "epoch": 0.21654009614714695, "grad_norm": 1.2798709869384766, "learning_rate": 9.410057576574974e-06, "loss": 0.09669685363769531, "step": 1554 }, { "epoch": 0.21667943983836133, "grad_norm": 1.7047947645187378, "learning_rate": 9.408946504599131e-06, "loss": 0.11164093017578125, "step": 1555 }, { "epoch": 0.2168187835295757, "grad_norm": 2.3328144550323486, "learning_rate": 9.40783445305603e-06, "loss": 0.11983585357666016, "step": 1556 }, { "epoch": 0.21695812722079008, "grad_norm": 1.9962234497070312, "learning_rate": 9.406721422192748e-06, "loss": 0.13907241821289062, "step": 1557 }, { "epoch": 0.21709747091200446, "grad_norm": 0.7681465148925781, "learning_rate": 9.405607412256573e-06, "loss": 0.08945274353027344, "step": 1558 }, { "epoch": 0.21723681460321884, "grad_norm": 1.3589235544204712, "learning_rate": 9.404492423495012e-06, "loss": 0.10952949523925781, "step": 1559 }, { "epoch": 0.2173761582944332, "grad_norm": 0.9898211359977722, "learning_rate": 9.403376456155792e-06, "loss": 0.10697364807128906, "step": 1560 }, { "epoch": 0.2175155019856476, "grad_norm": 2.3257036209106445, "learning_rate": 9.402259510486855e-06, "loss": 0.13796615600585938, "step": 1561 }, { "epoch": 0.21765484567686197, "grad_norm": 1.9850627183914185, "learning_rate": 9.401141586736359e-06, "loss": 0.11114501953125, "step": 1562 }, { "epoch": 0.21779418936807637, "grad_norm": 1.5774518251419067, "learning_rate": 9.400022685152683e-06, "loss": 0.18183135986328125, "step": 1563 }, { "epoch": 0.21793353305929075, "grad_norm": 1.3716251850128174, "learning_rate": 9.398902805984417e-06, "loss": 0.12571334838867188, "step": 1564 }, { "epoch": 0.21807287675050513, "grad_norm": 1.0918958187103271, "learning_rate": 9.397781949480381e-06, "loss": 0.1001882553100586, "step": 1565 }, { "epoch": 0.2182122204417195, "grad_norm": 1.2327957153320312, "learning_rate": 9.396660115889596e-06, "loss": 0.11102867126464844, "step": 1566 }, { "epoch": 0.21835156413293388, "grad_norm": 1.1927425861358643, "learning_rate": 9.395537305461312e-06, "loss": 0.1189422607421875, "step": 1567 }, { "epoch": 0.21849090782414826, "grad_norm": 1.091321349143982, "learning_rate": 9.394413518444989e-06, "loss": 0.08481788635253906, "step": 1568 }, { "epoch": 0.21863025151536264, "grad_norm": 1.8891550302505493, "learning_rate": 9.39328875509031e-06, "loss": 0.10857772827148438, "step": 1569 }, { "epoch": 0.21876959520657702, "grad_norm": 1.7986112833023071, "learning_rate": 9.39216301564717e-06, "loss": 0.1160888671875, "step": 1570 }, { "epoch": 0.2189089388977914, "grad_norm": 0.9586092829704285, "learning_rate": 9.391036300365681e-06, "loss": 0.09706306457519531, "step": 1571 }, { "epoch": 0.21904828258900577, "grad_norm": 1.8236552476882935, "learning_rate": 9.389908609496177e-06, "loss": 0.11907577514648438, "step": 1572 }, { "epoch": 0.21918762628022015, "grad_norm": 0.8784865140914917, "learning_rate": 9.388779943289204e-06, "loss": 0.07598495483398438, "step": 1573 }, { "epoch": 0.21932696997143455, "grad_norm": 2.3172101974487305, "learning_rate": 9.387650301995523e-06, "loss": 0.10402679443359375, "step": 1574 }, { "epoch": 0.21946631366264893, "grad_norm": 2.8548471927642822, "learning_rate": 9.386519685866117e-06, "loss": 0.10286140441894531, "step": 1575 }, { "epoch": 0.2196056573538633, "grad_norm": 1.325999140739441, "learning_rate": 9.385388095152184e-06, "loss": 0.10865020751953125, "step": 1576 }, { "epoch": 0.2197450010450777, "grad_norm": 1.771314024925232, "learning_rate": 9.384255530105136e-06, "loss": 0.15275955200195312, "step": 1577 }, { "epoch": 0.21988434473629206, "grad_norm": 1.2352505922317505, "learning_rate": 9.383121990976602e-06, "loss": 0.11501884460449219, "step": 1578 }, { "epoch": 0.22002368842750644, "grad_norm": 2.1449577808380127, "learning_rate": 9.381987478018431e-06, "loss": 0.08465576171875, "step": 1579 }, { "epoch": 0.22016303211872082, "grad_norm": 1.551053762435913, "learning_rate": 9.380851991482685e-06, "loss": 0.13165664672851562, "step": 1580 }, { "epoch": 0.2203023758099352, "grad_norm": 1.444352149963379, "learning_rate": 9.379715531621642e-06, "loss": 0.13681983947753906, "step": 1581 }, { "epoch": 0.22044171950114957, "grad_norm": 1.7737606763839722, "learning_rate": 9.3785780986878e-06, "loss": 0.13210487365722656, "step": 1582 }, { "epoch": 0.22058106319236395, "grad_norm": 0.8355474472045898, "learning_rate": 9.377439692933869e-06, "loss": 0.07354164123535156, "step": 1583 }, { "epoch": 0.22072040688357836, "grad_norm": 0.3746301233768463, "learning_rate": 9.376300314612775e-06, "loss": 0.07143211364746094, "step": 1584 }, { "epoch": 0.22085975057479273, "grad_norm": 1.785102367401123, "learning_rate": 9.375159963977668e-06, "loss": 0.1278209686279297, "step": 1585 }, { "epoch": 0.2209990942660071, "grad_norm": 2.0433504581451416, "learning_rate": 9.374018641281898e-06, "loss": 0.11345863342285156, "step": 1586 }, { "epoch": 0.2211384379572215, "grad_norm": 1.6296477317810059, "learning_rate": 9.37287634677905e-06, "loss": 0.08202552795410156, "step": 1587 }, { "epoch": 0.22127778164843587, "grad_norm": 0.6162410378456116, "learning_rate": 9.371733080722911e-06, "loss": 0.08197498321533203, "step": 1588 }, { "epoch": 0.22141712533965024, "grad_norm": 1.324569821357727, "learning_rate": 9.37058884336749e-06, "loss": 0.09532356262207031, "step": 1589 }, { "epoch": 0.22155646903086462, "grad_norm": 1.0317175388336182, "learning_rate": 9.36944363496701e-06, "loss": 0.09135627746582031, "step": 1590 }, { "epoch": 0.221695812722079, "grad_norm": 2.270353317260742, "learning_rate": 9.368297455775911e-06, "loss": 0.16811370849609375, "step": 1591 }, { "epoch": 0.22183515641329338, "grad_norm": 0.5626220107078552, "learning_rate": 9.367150306048847e-06, "loss": 0.06699180603027344, "step": 1592 }, { "epoch": 0.22197450010450775, "grad_norm": 0.8139947652816772, "learning_rate": 9.36600218604069e-06, "loss": 0.07612800598144531, "step": 1593 }, { "epoch": 0.22211384379572216, "grad_norm": 1.7687703371047974, "learning_rate": 9.364853096006523e-06, "loss": 0.12435150146484375, "step": 1594 }, { "epoch": 0.22225318748693654, "grad_norm": 1.140484094619751, "learning_rate": 9.36370303620165e-06, "loss": 0.122802734375, "step": 1595 }, { "epoch": 0.22239253117815092, "grad_norm": 0.45916134119033813, "learning_rate": 9.362552006881588e-06, "loss": 0.07425403594970703, "step": 1596 }, { "epoch": 0.2225318748693653, "grad_norm": 4.369224548339844, "learning_rate": 9.361400008302068e-06, "loss": 0.13373184204101562, "step": 1597 }, { "epoch": 0.22267121856057967, "grad_norm": 0.9578552842140198, "learning_rate": 9.36024704071904e-06, "loss": 0.07784461975097656, "step": 1598 }, { "epoch": 0.22281056225179405, "grad_norm": 1.5284415483474731, "learning_rate": 9.359093104388663e-06, "loss": 0.11660385131835938, "step": 1599 }, { "epoch": 0.22294990594300843, "grad_norm": 1.0828661918640137, "learning_rate": 9.35793819956732e-06, "loss": 0.10884857177734375, "step": 1600 }, { "epoch": 0.2230892496342228, "grad_norm": 1.323595404624939, "learning_rate": 9.356782326511602e-06, "loss": 0.16167449951171875, "step": 1601 }, { "epoch": 0.22322859332543718, "grad_norm": 2.8721578121185303, "learning_rate": 9.355625485478319e-06, "loss": 0.1677074432373047, "step": 1602 }, { "epoch": 0.22336793701665156, "grad_norm": 2.2807250022888184, "learning_rate": 9.354467676724491e-06, "loss": 0.1252899169921875, "step": 1603 }, { "epoch": 0.22350728070786596, "grad_norm": 2.139329671859741, "learning_rate": 9.353308900507361e-06, "loss": 0.10918426513671875, "step": 1604 }, { "epoch": 0.22364662439908034, "grad_norm": 2.925529718399048, "learning_rate": 9.352149157084383e-06, "loss": 0.15073394775390625, "step": 1605 }, { "epoch": 0.22378596809029472, "grad_norm": 3.704134225845337, "learning_rate": 9.350988446713221e-06, "loss": 0.11532974243164062, "step": 1606 }, { "epoch": 0.2239253117815091, "grad_norm": 1.353419303894043, "learning_rate": 9.349826769651762e-06, "loss": 0.09574127197265625, "step": 1607 }, { "epoch": 0.22406465547272347, "grad_norm": 1.4272894859313965, "learning_rate": 9.348664126158103e-06, "loss": 0.1407470703125, "step": 1608 }, { "epoch": 0.22420399916393785, "grad_norm": 0.45251521468162537, "learning_rate": 9.347500516490555e-06, "loss": 0.06984138488769531, "step": 1609 }, { "epoch": 0.22434334285515223, "grad_norm": 2.8880951404571533, "learning_rate": 9.346335940907648e-06, "loss": 0.11532020568847656, "step": 1610 }, { "epoch": 0.2244826865463666, "grad_norm": 1.7078443765640259, "learning_rate": 9.345170399668127e-06, "loss": 0.09656524658203125, "step": 1611 }, { "epoch": 0.22462203023758098, "grad_norm": 0.6364144086837769, "learning_rate": 9.344003893030942e-06, "loss": 0.060894012451171875, "step": 1612 }, { "epoch": 0.22476137392879536, "grad_norm": 0.49949976801872253, "learning_rate": 9.342836421255268e-06, "loss": 0.07738685607910156, "step": 1613 }, { "epoch": 0.22490071762000977, "grad_norm": 6.710173606872559, "learning_rate": 9.341667984600489e-06, "loss": 0.14514923095703125, "step": 1614 }, { "epoch": 0.22504006131122414, "grad_norm": 1.8636267185211182, "learning_rate": 9.340498583326208e-06, "loss": 0.10691070556640625, "step": 1615 }, { "epoch": 0.22517940500243852, "grad_norm": 0.6805217266082764, "learning_rate": 9.339328217692233e-06, "loss": 0.055393218994140625, "step": 1616 }, { "epoch": 0.2253187486936529, "grad_norm": 1.4126287698745728, "learning_rate": 9.3381568879586e-06, "loss": 0.11688613891601562, "step": 1617 }, { "epoch": 0.22545809238486728, "grad_norm": 1.5232429504394531, "learning_rate": 9.336984594385547e-06, "loss": 0.10313796997070312, "step": 1618 }, { "epoch": 0.22559743607608165, "grad_norm": 1.7585700750350952, "learning_rate": 9.335811337233533e-06, "loss": 0.09059524536132812, "step": 1619 }, { "epoch": 0.22573677976729603, "grad_norm": 0.8959484100341797, "learning_rate": 9.334637116763227e-06, "loss": 0.08831214904785156, "step": 1620 }, { "epoch": 0.2258761234585104, "grad_norm": 1.535442590713501, "learning_rate": 9.333461933235517e-06, "loss": 0.09344482421875, "step": 1621 }, { "epoch": 0.22601546714972479, "grad_norm": 2.011326313018799, "learning_rate": 9.332285786911498e-06, "loss": 0.11568641662597656, "step": 1622 }, { "epoch": 0.22615481084093916, "grad_norm": 0.8975304961204529, "learning_rate": 9.331108678052485e-06, "loss": 0.09638214111328125, "step": 1623 }, { "epoch": 0.22629415453215357, "grad_norm": 1.7438193559646606, "learning_rate": 9.329930606920005e-06, "loss": 0.12486648559570312, "step": 1624 }, { "epoch": 0.22643349822336795, "grad_norm": 1.7585535049438477, "learning_rate": 9.3287515737758e-06, "loss": 0.09982872009277344, "step": 1625 }, { "epoch": 0.22657284191458232, "grad_norm": 0.9384442567825317, "learning_rate": 9.32757157888182e-06, "loss": 0.06961441040039062, "step": 1626 }, { "epoch": 0.2267121856057967, "grad_norm": 1.476862907409668, "learning_rate": 9.326390622500236e-06, "loss": 0.10798454284667969, "step": 1627 }, { "epoch": 0.22685152929701108, "grad_norm": 0.515020489692688, "learning_rate": 9.32520870489343e-06, "loss": 0.07626724243164062, "step": 1628 }, { "epoch": 0.22699087298822546, "grad_norm": 1.6060264110565186, "learning_rate": 9.324025826323995e-06, "loss": 0.12512969970703125, "step": 1629 }, { "epoch": 0.22713021667943983, "grad_norm": 1.093331217765808, "learning_rate": 9.322841987054741e-06, "loss": 0.06621456146240234, "step": 1630 }, { "epoch": 0.2272695603706542, "grad_norm": 1.189266562461853, "learning_rate": 9.321657187348689e-06, "loss": 0.07067108154296875, "step": 1631 }, { "epoch": 0.2274089040618686, "grad_norm": 1.219838261604309, "learning_rate": 9.320471427469076e-06, "loss": 0.10618972778320312, "step": 1632 }, { "epoch": 0.22754824775308297, "grad_norm": 1.8008726835250854, "learning_rate": 9.319284707679348e-06, "loss": 0.10804367065429688, "step": 1633 }, { "epoch": 0.22768759144429737, "grad_norm": 2.31172251701355, "learning_rate": 9.31809702824317e-06, "loss": 0.1707620620727539, "step": 1634 }, { "epoch": 0.22782693513551175, "grad_norm": 2.186272144317627, "learning_rate": 9.316908389424416e-06, "loss": 0.09435272216796875, "step": 1635 }, { "epoch": 0.22796627882672613, "grad_norm": 3.5995023250579834, "learning_rate": 9.315718791487175e-06, "loss": 0.1595458984375, "step": 1636 }, { "epoch": 0.2281056225179405, "grad_norm": 2.099000930786133, "learning_rate": 9.314528234695747e-06, "loss": 0.10005378723144531, "step": 1637 }, { "epoch": 0.22824496620915488, "grad_norm": 3.3607466220855713, "learning_rate": 9.31333671931465e-06, "loss": 0.12052345275878906, "step": 1638 }, { "epoch": 0.22838430990036926, "grad_norm": 2.398569107055664, "learning_rate": 9.312144245608608e-06, "loss": 0.14010238647460938, "step": 1639 }, { "epoch": 0.22852365359158364, "grad_norm": 0.9166138172149658, "learning_rate": 9.31095081384256e-06, "loss": 0.125946044921875, "step": 1640 }, { "epoch": 0.22866299728279801, "grad_norm": 1.1273524761199951, "learning_rate": 9.309756424281664e-06, "loss": 0.10224533081054688, "step": 1641 }, { "epoch": 0.2288023409740124, "grad_norm": 1.9242297410964966, "learning_rate": 9.308561077191284e-06, "loss": 0.11240768432617188, "step": 1642 }, { "epoch": 0.22894168466522677, "grad_norm": 0.8166905641555786, "learning_rate": 9.307364772837e-06, "loss": 0.06946182250976562, "step": 1643 }, { "epoch": 0.22908102835644117, "grad_norm": 0.7225096821784973, "learning_rate": 9.306167511484601e-06, "loss": 0.0860910415649414, "step": 1644 }, { "epoch": 0.22922037204765555, "grad_norm": 1.0422028303146362, "learning_rate": 9.304969293400092e-06, "loss": 0.08449554443359375, "step": 1645 }, { "epoch": 0.22935971573886993, "grad_norm": 1.581206202507019, "learning_rate": 9.303770118849692e-06, "loss": 0.1503753662109375, "step": 1646 }, { "epoch": 0.2294990594300843, "grad_norm": 1.4856163263320923, "learning_rate": 9.302569988099825e-06, "loss": 0.10939788818359375, "step": 1647 }, { "epoch": 0.22963840312129868, "grad_norm": 1.627637505531311, "learning_rate": 9.301368901417138e-06, "loss": 0.13797760009765625, "step": 1648 }, { "epoch": 0.22977774681251306, "grad_norm": 2.440843105316162, "learning_rate": 9.300166859068482e-06, "loss": 0.10733604431152344, "step": 1649 }, { "epoch": 0.22991709050372744, "grad_norm": 1.6698108911514282, "learning_rate": 9.298963861320927e-06, "loss": 0.11138725280761719, "step": 1650 }, { "epoch": 0.23005643419494182, "grad_norm": 0.9459899663925171, "learning_rate": 9.297759908441747e-06, "loss": 0.07871055603027344, "step": 1651 }, { "epoch": 0.2301957778861562, "grad_norm": 1.6316320896148682, "learning_rate": 9.296555000698435e-06, "loss": 0.090240478515625, "step": 1652 }, { "epoch": 0.23033512157737057, "grad_norm": 1.2467890977859497, "learning_rate": 9.295349138358693e-06, "loss": 0.10487937927246094, "step": 1653 }, { "epoch": 0.23047446526858498, "grad_norm": 0.5154477953910828, "learning_rate": 9.294142321690438e-06, "loss": 0.09226417541503906, "step": 1654 }, { "epoch": 0.23061380895979935, "grad_norm": 1.068105697631836, "learning_rate": 9.292934550961796e-06, "loss": 0.0778961181640625, "step": 1655 }, { "epoch": 0.23075315265101373, "grad_norm": 2.541280508041382, "learning_rate": 9.291725826441107e-06, "loss": 0.16767311096191406, "step": 1656 }, { "epoch": 0.2308924963422281, "grad_norm": 2.457369804382324, "learning_rate": 9.29051614839692e-06, "loss": 0.09633159637451172, "step": 1657 }, { "epoch": 0.2310318400334425, "grad_norm": 0.6597248315811157, "learning_rate": 9.289305517098e-06, "loss": 0.0984954833984375, "step": 1658 }, { "epoch": 0.23117118372465686, "grad_norm": 1.4166737794876099, "learning_rate": 9.28809393281332e-06, "loss": 0.07991981506347656, "step": 1659 }, { "epoch": 0.23131052741587124, "grad_norm": 1.702810287475586, "learning_rate": 9.286881395812066e-06, "loss": 0.15488815307617188, "step": 1660 }, { "epoch": 0.23144987110708562, "grad_norm": 0.8761632442474365, "learning_rate": 9.285667906363637e-06, "loss": 0.08447074890136719, "step": 1661 }, { "epoch": 0.2315892147983, "grad_norm": 0.944878876209259, "learning_rate": 9.284453464737644e-06, "loss": 0.08221435546875, "step": 1662 }, { "epoch": 0.23172855848951437, "grad_norm": 3.329188585281372, "learning_rate": 9.283238071203907e-06, "loss": 0.10627937316894531, "step": 1663 }, { "epoch": 0.23186790218072878, "grad_norm": 3.0493719577789307, "learning_rate": 9.282021726032457e-06, "loss": 0.09693145751953125, "step": 1664 }, { "epoch": 0.23200724587194316, "grad_norm": 2.1401939392089844, "learning_rate": 9.280804429493542e-06, "loss": 0.1191253662109375, "step": 1665 }, { "epoch": 0.23214658956315753, "grad_norm": 3.676395893096924, "learning_rate": 9.279586181857613e-06, "loss": 0.11944961547851562, "step": 1666 }, { "epoch": 0.2322859332543719, "grad_norm": 2.484574794769287, "learning_rate": 9.278366983395341e-06, "loss": 0.09064483642578125, "step": 1667 }, { "epoch": 0.2324252769455863, "grad_norm": 2.2590415477752686, "learning_rate": 9.277146834377601e-06, "loss": 0.13689613342285156, "step": 1668 }, { "epoch": 0.23256462063680067, "grad_norm": 1.2250643968582153, "learning_rate": 9.275925735075484e-06, "loss": 0.08687496185302734, "step": 1669 }, { "epoch": 0.23270396432801504, "grad_norm": 0.8530566096305847, "learning_rate": 9.274703685760287e-06, "loss": 0.060616493225097656, "step": 1670 }, { "epoch": 0.23284330801922942, "grad_norm": 2.530149221420288, "learning_rate": 9.273480686703526e-06, "loss": 0.10058021545410156, "step": 1671 }, { "epoch": 0.2329826517104438, "grad_norm": 0.7407644987106323, "learning_rate": 9.272256738176924e-06, "loss": 0.06924819946289062, "step": 1672 }, { "epoch": 0.23312199540165818, "grad_norm": 2.016254425048828, "learning_rate": 9.271031840452409e-06, "loss": 0.2003650665283203, "step": 1673 }, { "epoch": 0.23326133909287258, "grad_norm": 1.1484466791152954, "learning_rate": 9.26980599380213e-06, "loss": 0.0854349136352539, "step": 1674 }, { "epoch": 0.23340068278408696, "grad_norm": 1.8101845979690552, "learning_rate": 9.268579198498438e-06, "loss": 0.12054061889648438, "step": 1675 }, { "epoch": 0.23354002647530134, "grad_norm": 0.9374971985816956, "learning_rate": 9.267351454813904e-06, "loss": 0.10516929626464844, "step": 1676 }, { "epoch": 0.23367937016651572, "grad_norm": 1.150597333908081, "learning_rate": 9.266122763021302e-06, "loss": 0.09988594055175781, "step": 1677 }, { "epoch": 0.2338187138577301, "grad_norm": 1.3502087593078613, "learning_rate": 9.264893123393618e-06, "loss": 0.11198616027832031, "step": 1678 }, { "epoch": 0.23395805754894447, "grad_norm": 0.8323330283164978, "learning_rate": 9.26366253620405e-06, "loss": 0.11626052856445312, "step": 1679 }, { "epoch": 0.23409740124015885, "grad_norm": 2.0808024406433105, "learning_rate": 9.26243100172601e-06, "loss": 0.11988258361816406, "step": 1680 }, { "epoch": 0.23423674493137323, "grad_norm": 2.107897996902466, "learning_rate": 9.261198520233113e-06, "loss": 0.126312255859375, "step": 1681 }, { "epoch": 0.2343760886225876, "grad_norm": 0.8464423418045044, "learning_rate": 9.25996509199919e-06, "loss": 0.08742523193359375, "step": 1682 }, { "epoch": 0.23451543231380198, "grad_norm": 1.187475323677063, "learning_rate": 9.258730717298281e-06, "loss": 0.08918571472167969, "step": 1683 }, { "epoch": 0.23465477600501639, "grad_norm": 1.7097424268722534, "learning_rate": 9.257495396404635e-06, "loss": 0.12475013732910156, "step": 1684 }, { "epoch": 0.23479411969623076, "grad_norm": 3.384294033050537, "learning_rate": 9.256259129592711e-06, "loss": 0.10417938232421875, "step": 1685 }, { "epoch": 0.23493346338744514, "grad_norm": 2.2324585914611816, "learning_rate": 9.255021917137181e-06, "loss": 0.10951805114746094, "step": 1686 }, { "epoch": 0.23507280707865952, "grad_norm": 1.439475655555725, "learning_rate": 9.253783759312924e-06, "loss": 0.11902046203613281, "step": 1687 }, { "epoch": 0.2352121507698739, "grad_norm": 1.950290560722351, "learning_rate": 9.252544656395033e-06, "loss": 0.09666824340820312, "step": 1688 }, { "epoch": 0.23535149446108827, "grad_norm": 0.7911676168441772, "learning_rate": 9.251304608658806e-06, "loss": 0.10423469543457031, "step": 1689 }, { "epoch": 0.23549083815230265, "grad_norm": 1.4599876403808594, "learning_rate": 9.250063616379754e-06, "loss": 0.129730224609375, "step": 1690 }, { "epoch": 0.23563018184351703, "grad_norm": 1.3013643026351929, "learning_rate": 9.248821679833596e-06, "loss": 0.1327362060546875, "step": 1691 }, { "epoch": 0.2357695255347314, "grad_norm": 1.3837902545928955, "learning_rate": 9.247578799296263e-06, "loss": 0.09362602233886719, "step": 1692 }, { "epoch": 0.23590886922594578, "grad_norm": 1.5624029636383057, "learning_rate": 9.246334975043896e-06, "loss": 0.11053752899169922, "step": 1693 }, { "epoch": 0.2360482129171602, "grad_norm": 1.6405926942825317, "learning_rate": 9.245090207352842e-06, "loss": 0.11254692077636719, "step": 1694 }, { "epoch": 0.23618755660837457, "grad_norm": 1.8343784809112549, "learning_rate": 9.243844496499661e-06, "loss": 0.11967658996582031, "step": 1695 }, { "epoch": 0.23632690029958894, "grad_norm": 1.2434450387954712, "learning_rate": 9.242597842761123e-06, "loss": 0.10386466979980469, "step": 1696 }, { "epoch": 0.23646624399080332, "grad_norm": 2.6804392337799072, "learning_rate": 9.241350246414203e-06, "loss": 0.1440258026123047, "step": 1697 }, { "epoch": 0.2366055876820177, "grad_norm": 1.807586669921875, "learning_rate": 9.24010170773609e-06, "loss": 0.10992050170898438, "step": 1698 }, { "epoch": 0.23674493137323208, "grad_norm": 3.747856616973877, "learning_rate": 9.23885222700418e-06, "loss": 0.13232994079589844, "step": 1699 }, { "epoch": 0.23688427506444645, "grad_norm": 1.5015519857406616, "learning_rate": 9.237601804496081e-06, "loss": 0.07676887512207031, "step": 1700 }, { "epoch": 0.23702361875566083, "grad_norm": 2.6207070350646973, "learning_rate": 9.236350440489608e-06, "loss": 0.12345504760742188, "step": 1701 }, { "epoch": 0.2371629624468752, "grad_norm": 0.7721652984619141, "learning_rate": 9.235098135262783e-06, "loss": 0.08983993530273438, "step": 1702 }, { "epoch": 0.23730230613808959, "grad_norm": 1.201410174369812, "learning_rate": 9.233844889093842e-06, "loss": 0.10291099548339844, "step": 1703 }, { "epoch": 0.237441649829304, "grad_norm": 1.1699730157852173, "learning_rate": 9.232590702261227e-06, "loss": 0.10898780822753906, "step": 1704 }, { "epoch": 0.23758099352051837, "grad_norm": 1.2576709985733032, "learning_rate": 9.23133557504359e-06, "loss": 0.10711097717285156, "step": 1705 }, { "epoch": 0.23772033721173275, "grad_norm": 2.346768379211426, "learning_rate": 9.23007950771979e-06, "loss": 0.12064743041992188, "step": 1706 }, { "epoch": 0.23785968090294712, "grad_norm": 1.349402666091919, "learning_rate": 9.228822500568898e-06, "loss": 0.11309242248535156, "step": 1707 }, { "epoch": 0.2379990245941615, "grad_norm": 1.0185155868530273, "learning_rate": 9.227564553870192e-06, "loss": 0.10869979858398438, "step": 1708 }, { "epoch": 0.23813836828537588, "grad_norm": 0.6905288100242615, "learning_rate": 9.226305667903159e-06, "loss": 0.11757850646972656, "step": 1709 }, { "epoch": 0.23827771197659026, "grad_norm": 0.7420839667320251, "learning_rate": 9.225045842947496e-06, "loss": 0.09010505676269531, "step": 1710 }, { "epoch": 0.23841705566780463, "grad_norm": 0.8632801175117493, "learning_rate": 9.223785079283106e-06, "loss": 0.07841873168945312, "step": 1711 }, { "epoch": 0.238556399359019, "grad_norm": 1.1588927507400513, "learning_rate": 9.2225233771901e-06, "loss": 0.09322118759155273, "step": 1712 }, { "epoch": 0.2386957430502334, "grad_norm": 1.0337774753570557, "learning_rate": 9.221260736948803e-06, "loss": 0.08963775634765625, "step": 1713 }, { "epoch": 0.2388350867414478, "grad_norm": 1.0910232067108154, "learning_rate": 9.219997158839743e-06, "loss": 0.09815216064453125, "step": 1714 }, { "epoch": 0.23897443043266217, "grad_norm": 1.0705934762954712, "learning_rate": 9.21873264314366e-06, "loss": 0.11175155639648438, "step": 1715 }, { "epoch": 0.23911377412387655, "grad_norm": 0.9757649898529053, "learning_rate": 9.217467190141498e-06, "loss": 0.07398223876953125, "step": 1716 }, { "epoch": 0.23925311781509093, "grad_norm": 0.9425495266914368, "learning_rate": 9.216200800114412e-06, "loss": 0.0914154052734375, "step": 1717 }, { "epoch": 0.2393924615063053, "grad_norm": 1.378273844718933, "learning_rate": 9.214933473343765e-06, "loss": 0.10976409912109375, "step": 1718 }, { "epoch": 0.23953180519751968, "grad_norm": 0.9774830341339111, "learning_rate": 9.213665210111131e-06, "loss": 0.11139297485351562, "step": 1719 }, { "epoch": 0.23967114888873406, "grad_norm": 1.1262882947921753, "learning_rate": 9.212396010698286e-06, "loss": 0.10485458374023438, "step": 1720 }, { "epoch": 0.23981049257994844, "grad_norm": 0.7427749037742615, "learning_rate": 9.211125875387217e-06, "loss": 0.06793403625488281, "step": 1721 }, { "epoch": 0.23994983627116281, "grad_norm": 0.6784161925315857, "learning_rate": 9.209854804460121e-06, "loss": 0.067718505859375, "step": 1722 }, { "epoch": 0.2400891799623772, "grad_norm": 4.0813446044921875, "learning_rate": 9.208582798199402e-06, "loss": 0.1517467498779297, "step": 1723 }, { "epoch": 0.2402285236535916, "grad_norm": 3.723716974258423, "learning_rate": 9.207309856887664e-06, "loss": 0.11745643615722656, "step": 1724 }, { "epoch": 0.24036786734480597, "grad_norm": 1.6425106525421143, "learning_rate": 9.206035980807734e-06, "loss": 0.12605857849121094, "step": 1725 }, { "epoch": 0.24050721103602035, "grad_norm": 1.1993446350097656, "learning_rate": 9.204761170242635e-06, "loss": 0.08905601501464844, "step": 1726 }, { "epoch": 0.24064655472723473, "grad_norm": 0.6697181463241577, "learning_rate": 9.203485425475598e-06, "loss": 0.08455657958984375, "step": 1727 }, { "epoch": 0.2407858984184491, "grad_norm": 3.030216693878174, "learning_rate": 9.202208746790069e-06, "loss": 0.0921783447265625, "step": 1728 }, { "epoch": 0.24092524210966348, "grad_norm": 4.726919651031494, "learning_rate": 9.200931134469692e-06, "loss": 0.10928916931152344, "step": 1729 }, { "epoch": 0.24106458580087786, "grad_norm": 0.9397567510604858, "learning_rate": 9.199652588798327e-06, "loss": 0.08933639526367188, "step": 1730 }, { "epoch": 0.24120392949209224, "grad_norm": 1.308463215827942, "learning_rate": 9.198373110060037e-06, "loss": 0.09709548950195312, "step": 1731 }, { "epoch": 0.24134327318330662, "grad_norm": 2.4024062156677246, "learning_rate": 9.197092698539092e-06, "loss": 0.14113998413085938, "step": 1732 }, { "epoch": 0.241482616874521, "grad_norm": 2.639132261276245, "learning_rate": 9.19581135451997e-06, "loss": 0.15449905395507812, "step": 1733 }, { "epoch": 0.2416219605657354, "grad_norm": 2.4321558475494385, "learning_rate": 9.194529078287358e-06, "loss": 0.11220741271972656, "step": 1734 }, { "epoch": 0.24176130425694978, "grad_norm": 1.237034559249878, "learning_rate": 9.193245870126147e-06, "loss": 0.10234355926513672, "step": 1735 }, { "epoch": 0.24190064794816415, "grad_norm": 1.5212939977645874, "learning_rate": 9.191961730321437e-06, "loss": 0.09677886962890625, "step": 1736 }, { "epoch": 0.24203999163937853, "grad_norm": 1.733559250831604, "learning_rate": 9.190676659158535e-06, "loss": 0.10144996643066406, "step": 1737 }, { "epoch": 0.2421793353305929, "grad_norm": 2.3258442878723145, "learning_rate": 9.189390656922955e-06, "loss": 0.08634567260742188, "step": 1738 }, { "epoch": 0.2423186790218073, "grad_norm": 2.209672451019287, "learning_rate": 9.188103723900414e-06, "loss": 0.12119865417480469, "step": 1739 }, { "epoch": 0.24245802271302166, "grad_norm": 1.38559091091156, "learning_rate": 9.186815860376843e-06, "loss": 0.09567451477050781, "step": 1740 }, { "epoch": 0.24259736640423604, "grad_norm": 1.4015096426010132, "learning_rate": 9.185527066638375e-06, "loss": 0.12428855895996094, "step": 1741 }, { "epoch": 0.24273671009545042, "grad_norm": 1.99052894115448, "learning_rate": 9.184237342971349e-06, "loss": 0.08995819091796875, "step": 1742 }, { "epoch": 0.2428760537866648, "grad_norm": 2.7128028869628906, "learning_rate": 9.182946689662314e-06, "loss": 0.125640869140625, "step": 1743 }, { "epoch": 0.2430153974778792, "grad_norm": 3.3809332847595215, "learning_rate": 9.181655106998023e-06, "loss": 0.161376953125, "step": 1744 }, { "epoch": 0.24315474116909358, "grad_norm": 0.49076616764068604, "learning_rate": 9.180362595265435e-06, "loss": 0.06482887268066406, "step": 1745 }, { "epoch": 0.24329408486030796, "grad_norm": 1.3536745309829712, "learning_rate": 9.179069154751718e-06, "loss": 0.10400962829589844, "step": 1746 }, { "epoch": 0.24343342855152234, "grad_norm": 1.0918325185775757, "learning_rate": 9.177774785744245e-06, "loss": 0.09191131591796875, "step": 1747 }, { "epoch": 0.2435727722427367, "grad_norm": 0.9617135524749756, "learning_rate": 9.176479488530594e-06, "loss": 0.08865165710449219, "step": 1748 }, { "epoch": 0.2437121159339511, "grad_norm": 1.4715749025344849, "learning_rate": 9.175183263398553e-06, "loss": 0.09898567199707031, "step": 1749 }, { "epoch": 0.24385145962516547, "grad_norm": 1.4477710723876953, "learning_rate": 9.17388611063611e-06, "loss": 0.08181571960449219, "step": 1750 }, { "epoch": 0.24399080331637985, "grad_norm": 1.411683201789856, "learning_rate": 9.172588030531467e-06, "loss": 0.11558151245117188, "step": 1751 }, { "epoch": 0.24413014700759422, "grad_norm": 1.4621968269348145, "learning_rate": 9.171289023373022e-06, "loss": 0.1378040313720703, "step": 1752 }, { "epoch": 0.2442694906988086, "grad_norm": 1.1109113693237305, "learning_rate": 9.16998908944939e-06, "loss": 0.11367034912109375, "step": 1753 }, { "epoch": 0.244408834390023, "grad_norm": 1.7136162519454956, "learning_rate": 9.168688229049386e-06, "loss": 0.08981513977050781, "step": 1754 }, { "epoch": 0.24454817808123738, "grad_norm": 1.6248486042022705, "learning_rate": 9.167386442462029e-06, "loss": 0.10851097106933594, "step": 1755 }, { "epoch": 0.24468752177245176, "grad_norm": 1.1223889589309692, "learning_rate": 9.166083729976547e-06, "loss": 0.0909271240234375, "step": 1756 }, { "epoch": 0.24482686546366614, "grad_norm": 1.3102573156356812, "learning_rate": 9.164780091882374e-06, "loss": 0.08616256713867188, "step": 1757 }, { "epoch": 0.24496620915488052, "grad_norm": 1.3224775791168213, "learning_rate": 9.163475528469148e-06, "loss": 0.09122848510742188, "step": 1758 }, { "epoch": 0.2451055528460949, "grad_norm": 0.6794072985649109, "learning_rate": 9.162170040026714e-06, "loss": 0.07804107666015625, "step": 1759 }, { "epoch": 0.24524489653730927, "grad_norm": 1.2313120365142822, "learning_rate": 9.16086362684512e-06, "loss": 0.12737274169921875, "step": 1760 }, { "epoch": 0.24538424022852365, "grad_norm": 2.375062942504883, "learning_rate": 9.159556289214623e-06, "loss": 0.11561775207519531, "step": 1761 }, { "epoch": 0.24552358391973803, "grad_norm": 1.4171160459518433, "learning_rate": 9.158248027425683e-06, "loss": 0.10225677490234375, "step": 1762 }, { "epoch": 0.2456629276109524, "grad_norm": 3.1119143962860107, "learning_rate": 9.156938841768965e-06, "loss": 0.1445789337158203, "step": 1763 }, { "epoch": 0.2458022713021668, "grad_norm": 1.4237351417541504, "learning_rate": 9.155628732535342e-06, "loss": 0.08463096618652344, "step": 1764 }, { "epoch": 0.24594161499338119, "grad_norm": 1.2046748399734497, "learning_rate": 9.15431770001589e-06, "loss": 0.10136985778808594, "step": 1765 }, { "epoch": 0.24608095868459556, "grad_norm": 1.2096055746078491, "learning_rate": 9.153005744501886e-06, "loss": 0.1225433349609375, "step": 1766 }, { "epoch": 0.24622030237580994, "grad_norm": 1.6218088865280151, "learning_rate": 9.151692866284824e-06, "loss": 0.14098739624023438, "step": 1767 }, { "epoch": 0.24635964606702432, "grad_norm": 0.9361887574195862, "learning_rate": 9.150379065656389e-06, "loss": 0.07372665405273438, "step": 1768 }, { "epoch": 0.2464989897582387, "grad_norm": 2.628002643585205, "learning_rate": 9.149064342908482e-06, "loss": 0.11157989501953125, "step": 1769 }, { "epoch": 0.24663833344945307, "grad_norm": 1.893249273300171, "learning_rate": 9.147748698333203e-06, "loss": 0.13138580322265625, "step": 1770 }, { "epoch": 0.24677767714066745, "grad_norm": 1.5241318941116333, "learning_rate": 9.146432132222858e-06, "loss": 0.11575508117675781, "step": 1771 }, { "epoch": 0.24691702083188183, "grad_norm": 0.9694388508796692, "learning_rate": 9.145114644869957e-06, "loss": 0.106201171875, "step": 1772 }, { "epoch": 0.2470563645230962, "grad_norm": 1.1504231691360474, "learning_rate": 9.143796236567218e-06, "loss": 0.14105606079101562, "step": 1773 }, { "epoch": 0.24719570821431058, "grad_norm": 1.1973505020141602, "learning_rate": 9.142476907607558e-06, "loss": 0.10606098175048828, "step": 1774 }, { "epoch": 0.247335051905525, "grad_norm": 1.3748987913131714, "learning_rate": 9.141156658284104e-06, "loss": 0.08311653137207031, "step": 1775 }, { "epoch": 0.24747439559673937, "grad_norm": 1.5408309698104858, "learning_rate": 9.139835488890186e-06, "loss": 0.09572601318359375, "step": 1776 }, { "epoch": 0.24761373928795374, "grad_norm": 2.7034196853637695, "learning_rate": 9.138513399719335e-06, "loss": 0.1306915283203125, "step": 1777 }, { "epoch": 0.24775308297916812, "grad_norm": 2.6373348236083984, "learning_rate": 9.13719039106529e-06, "loss": 0.12446212768554688, "step": 1778 }, { "epoch": 0.2478924266703825, "grad_norm": 1.1081151962280273, "learning_rate": 9.135866463221994e-06, "loss": 0.122344970703125, "step": 1779 }, { "epoch": 0.24803177036159688, "grad_norm": 2.510920524597168, "learning_rate": 9.134541616483594e-06, "loss": 0.13286781311035156, "step": 1780 }, { "epoch": 0.24817111405281125, "grad_norm": 1.5860799551010132, "learning_rate": 9.13321585114444e-06, "loss": 0.11869430541992188, "step": 1781 }, { "epoch": 0.24831045774402563, "grad_norm": 0.6289289593696594, "learning_rate": 9.131889167499086e-06, "loss": 0.08248329162597656, "step": 1782 }, { "epoch": 0.24844980143524, "grad_norm": 1.253448486328125, "learning_rate": 9.130561565842293e-06, "loss": 0.12022590637207031, "step": 1783 }, { "epoch": 0.24858914512645439, "grad_norm": 1.4077112674713135, "learning_rate": 9.129233046469021e-06, "loss": 0.09189224243164062, "step": 1784 }, { "epoch": 0.2487284888176688, "grad_norm": 0.9177370667457581, "learning_rate": 9.12790360967444e-06, "loss": 0.10833168029785156, "step": 1785 }, { "epoch": 0.24886783250888317, "grad_norm": 2.5588622093200684, "learning_rate": 9.126573255753917e-06, "loss": 0.12390613555908203, "step": 1786 }, { "epoch": 0.24900717620009755, "grad_norm": 2.2440927028656006, "learning_rate": 9.125241985003028e-06, "loss": 0.15847015380859375, "step": 1787 }, { "epoch": 0.24914651989131192, "grad_norm": 2.7141172885894775, "learning_rate": 9.123909797717551e-06, "loss": 0.1609344482421875, "step": 1788 }, { "epoch": 0.2492858635825263, "grad_norm": 1.5315619707107544, "learning_rate": 9.122576694193467e-06, "loss": 0.0953521728515625, "step": 1789 }, { "epoch": 0.24942520727374068, "grad_norm": 1.7012529373168945, "learning_rate": 9.121242674726962e-06, "loss": 0.1070098876953125, "step": 1790 }, { "epoch": 0.24956455096495506, "grad_norm": 1.0277824401855469, "learning_rate": 9.119907739614424e-06, "loss": 0.09138870239257812, "step": 1791 }, { "epoch": 0.24970389465616943, "grad_norm": 1.3875606060028076, "learning_rate": 9.118571889152445e-06, "loss": 0.11743736267089844, "step": 1792 }, { "epoch": 0.2498432383473838, "grad_norm": 0.8954291939735413, "learning_rate": 9.117235123637822e-06, "loss": 0.105987548828125, "step": 1793 }, { "epoch": 0.2499825820385982, "grad_norm": 0.43958190083503723, "learning_rate": 9.115897443367552e-06, "loss": 0.06092357635498047, "step": 1794 }, { "epoch": 0.25012192572981257, "grad_norm": 0.666200578212738, "learning_rate": 9.114558848638836e-06, "loss": 0.07728195190429688, "step": 1795 }, { "epoch": 0.25026126942102694, "grad_norm": 0.6824493408203125, "learning_rate": 9.113219339749084e-06, "loss": 0.09764289855957031, "step": 1796 }, { "epoch": 0.2504006131122413, "grad_norm": 0.6331687569618225, "learning_rate": 9.1118789169959e-06, "loss": 0.09376716613769531, "step": 1797 }, { "epoch": 0.2505399568034557, "grad_norm": 3.071763277053833, "learning_rate": 9.110537580677094e-06, "loss": 0.13203811645507812, "step": 1798 }, { "epoch": 0.2506793004946701, "grad_norm": 0.8340160250663757, "learning_rate": 9.109195331090685e-06, "loss": 0.1014862060546875, "step": 1799 }, { "epoch": 0.2508186441858845, "grad_norm": 1.2434887886047363, "learning_rate": 9.10785216853489e-06, "loss": 0.09128570556640625, "step": 1800 }, { "epoch": 0.2509579878770989, "grad_norm": 0.6175529956817627, "learning_rate": 9.106508093308123e-06, "loss": 0.08535003662109375, "step": 1801 }, { "epoch": 0.25109733156831326, "grad_norm": 1.285143256187439, "learning_rate": 9.105163105709011e-06, "loss": 0.09274101257324219, "step": 1802 }, { "epoch": 0.25123667525952764, "grad_norm": 1.4111216068267822, "learning_rate": 9.103817206036383e-06, "loss": 0.16399192810058594, "step": 1803 }, { "epoch": 0.251376018950742, "grad_norm": 0.4563996195793152, "learning_rate": 9.10247039458926e-06, "loss": 0.06763648986816406, "step": 1804 }, { "epoch": 0.2515153626419564, "grad_norm": 0.736782968044281, "learning_rate": 9.101122671666878e-06, "loss": 0.09151077270507812, "step": 1805 }, { "epoch": 0.2516547063331708, "grad_norm": 0.932953953742981, "learning_rate": 9.09977403756867e-06, "loss": 0.09952926635742188, "step": 1806 }, { "epoch": 0.25179405002438515, "grad_norm": 0.5773320198059082, "learning_rate": 9.098424492594268e-06, "loss": 0.07416152954101562, "step": 1807 }, { "epoch": 0.25193339371559953, "grad_norm": 1.3903281688690186, "learning_rate": 9.097074037043512e-06, "loss": 0.09075355529785156, "step": 1808 }, { "epoch": 0.2520727374068139, "grad_norm": 0.9314352869987488, "learning_rate": 9.095722671216443e-06, "loss": 0.11103630065917969, "step": 1809 }, { "epoch": 0.2522120810980283, "grad_norm": 3.0655019283294678, "learning_rate": 9.094370395413306e-06, "loss": 0.1875934600830078, "step": 1810 }, { "epoch": 0.25235142478924266, "grad_norm": 1.6376396417617798, "learning_rate": 9.09301720993454e-06, "loss": 0.16095733642578125, "step": 1811 }, { "epoch": 0.25249076848045704, "grad_norm": 1.6268724203109741, "learning_rate": 9.091663115080797e-06, "loss": 0.10204696655273438, "step": 1812 }, { "epoch": 0.2526301121716714, "grad_norm": 1.0174798965454102, "learning_rate": 9.090308111152924e-06, "loss": 0.08769607543945312, "step": 1813 }, { "epoch": 0.2527694558628858, "grad_norm": 1.6534556150436401, "learning_rate": 9.08895219845197e-06, "loss": 0.12287139892578125, "step": 1814 }, { "epoch": 0.25290879955410017, "grad_norm": 0.6771280765533447, "learning_rate": 9.087595377279192e-06, "loss": 0.0951690673828125, "step": 1815 }, { "epoch": 0.25304814324531455, "grad_norm": 1.1192026138305664, "learning_rate": 9.086237647936043e-06, "loss": 0.14423179626464844, "step": 1816 }, { "epoch": 0.2531874869365289, "grad_norm": 2.722320556640625, "learning_rate": 9.084879010724177e-06, "loss": 0.14861249923706055, "step": 1817 }, { "epoch": 0.2533268306277433, "grad_norm": 2.018537759780884, "learning_rate": 9.083519465945456e-06, "loss": 0.08835792541503906, "step": 1818 }, { "epoch": 0.2534661743189577, "grad_norm": 1.6478023529052734, "learning_rate": 9.082159013901937e-06, "loss": 0.10846138000488281, "step": 1819 }, { "epoch": 0.2536055180101721, "grad_norm": 1.506010890007019, "learning_rate": 9.080797654895883e-06, "loss": 0.14704513549804688, "step": 1820 }, { "epoch": 0.2537448617013865, "grad_norm": 1.9377961158752441, "learning_rate": 9.079435389229755e-06, "loss": 0.12740135192871094, "step": 1821 }, { "epoch": 0.25388420539260087, "grad_norm": 1.7321079969406128, "learning_rate": 9.07807221720622e-06, "loss": 0.10477733612060547, "step": 1822 }, { "epoch": 0.25402354908381525, "grad_norm": 0.9244603514671326, "learning_rate": 9.07670813912814e-06, "loss": 0.06731224060058594, "step": 1823 }, { "epoch": 0.2541628927750296, "grad_norm": 1.2304373979568481, "learning_rate": 9.075343155298589e-06, "loss": 0.10082530975341797, "step": 1824 }, { "epoch": 0.254302236466244, "grad_norm": 1.0041378736495972, "learning_rate": 9.073977266020826e-06, "loss": 0.08694648742675781, "step": 1825 }, { "epoch": 0.2544415801574584, "grad_norm": 1.6857155561447144, "learning_rate": 9.072610471598327e-06, "loss": 0.121734619140625, "step": 1826 }, { "epoch": 0.25458092384867276, "grad_norm": 0.5721675157546997, "learning_rate": 9.07124277233476e-06, "loss": 0.09222030639648438, "step": 1827 }, { "epoch": 0.25472026753988714, "grad_norm": 1.1421124935150146, "learning_rate": 9.069874168533996e-06, "loss": 0.1219482421875, "step": 1828 }, { "epoch": 0.2548596112311015, "grad_norm": 1.396429419517517, "learning_rate": 9.068504660500111e-06, "loss": 0.09272956848144531, "step": 1829 }, { "epoch": 0.2549989549223159, "grad_norm": 1.635254144668579, "learning_rate": 9.067134248537374e-06, "loss": 0.09113693237304688, "step": 1830 }, { "epoch": 0.25513829861353027, "grad_norm": 1.6289385557174683, "learning_rate": 9.065762932950262e-06, "loss": 0.1367816925048828, "step": 1831 }, { "epoch": 0.25527764230474465, "grad_norm": 1.305099606513977, "learning_rate": 9.06439071404345e-06, "loss": 0.09994125366210938, "step": 1832 }, { "epoch": 0.255416985995959, "grad_norm": 1.7049418687820435, "learning_rate": 9.063017592121812e-06, "loss": 0.08757162094116211, "step": 1833 }, { "epoch": 0.2555563296871734, "grad_norm": 1.2096233367919922, "learning_rate": 9.061643567490425e-06, "loss": 0.10314178466796875, "step": 1834 }, { "epoch": 0.2556956733783878, "grad_norm": 2.0083863735198975, "learning_rate": 9.060268640454565e-06, "loss": 0.144317626953125, "step": 1835 }, { "epoch": 0.25583501706960216, "grad_norm": 0.6510186791419983, "learning_rate": 9.058892811319713e-06, "loss": 0.09420585632324219, "step": 1836 }, { "epoch": 0.25597436076081653, "grad_norm": 1.185957431793213, "learning_rate": 9.057516080391544e-06, "loss": 0.10371971130371094, "step": 1837 }, { "epoch": 0.2561137044520309, "grad_norm": 0.9471873044967651, "learning_rate": 9.056138447975936e-06, "loss": 0.10918045043945312, "step": 1838 }, { "epoch": 0.2562530481432453, "grad_norm": 0.7760533690452576, "learning_rate": 9.05475991437897e-06, "loss": 0.08783531188964844, "step": 1839 }, { "epoch": 0.2563923918344597, "grad_norm": 1.6673046350479126, "learning_rate": 9.053380479906919e-06, "loss": 0.13378334045410156, "step": 1840 }, { "epoch": 0.2565317355256741, "grad_norm": 1.3903954029083252, "learning_rate": 9.052000144866269e-06, "loss": 0.08989143371582031, "step": 1841 }, { "epoch": 0.2566710792168885, "grad_norm": 1.0811781883239746, "learning_rate": 9.050618909563693e-06, "loss": 0.10190963745117188, "step": 1842 }, { "epoch": 0.25681042290810285, "grad_norm": 0.9270034432411194, "learning_rate": 9.049236774306073e-06, "loss": 0.069793701171875, "step": 1843 }, { "epoch": 0.25694976659931723, "grad_norm": 4.14654016494751, "learning_rate": 9.04785373940049e-06, "loss": 0.0938873291015625, "step": 1844 }, { "epoch": 0.2570891102905316, "grad_norm": 3.648742437362671, "learning_rate": 9.046469805154218e-06, "loss": 0.12440109252929688, "step": 1845 }, { "epoch": 0.257228453981746, "grad_norm": 3.2715046405792236, "learning_rate": 9.045084971874738e-06, "loss": 0.10841178894042969, "step": 1846 }, { "epoch": 0.25736779767296036, "grad_norm": 1.240277647972107, "learning_rate": 9.043699239869727e-06, "loss": 0.09416007995605469, "step": 1847 }, { "epoch": 0.25750714136417474, "grad_norm": 2.268416404724121, "learning_rate": 9.042312609447066e-06, "loss": 0.10871124267578125, "step": 1848 }, { "epoch": 0.2576464850553891, "grad_norm": 2.092728853225708, "learning_rate": 9.040925080914832e-06, "loss": 0.1187896728515625, "step": 1849 }, { "epoch": 0.2577858287466035, "grad_norm": 2.4340717792510986, "learning_rate": 9.039536654581297e-06, "loss": 0.1329784393310547, "step": 1850 }, { "epoch": 0.2579251724378179, "grad_norm": 1.4391217231750488, "learning_rate": 9.038147330754944e-06, "loss": 0.09682083129882812, "step": 1851 }, { "epoch": 0.25806451612903225, "grad_norm": 1.415557861328125, "learning_rate": 9.036757109744447e-06, "loss": 0.09072113037109375, "step": 1852 }, { "epoch": 0.25820385982024663, "grad_norm": 1.4742785692214966, "learning_rate": 9.035365991858679e-06, "loss": 0.091217041015625, "step": 1853 }, { "epoch": 0.258343203511461, "grad_norm": 1.9310691356658936, "learning_rate": 9.033973977406718e-06, "loss": 0.1378793716430664, "step": 1854 }, { "epoch": 0.2584825472026754, "grad_norm": 0.7957258224487305, "learning_rate": 9.032581066697836e-06, "loss": 0.08388900756835938, "step": 1855 }, { "epoch": 0.25862189089388976, "grad_norm": 1.3892862796783447, "learning_rate": 9.031187260041505e-06, "loss": 0.11836624145507812, "step": 1856 }, { "epoch": 0.25876123458510414, "grad_norm": 1.7409039735794067, "learning_rate": 9.0297925577474e-06, "loss": 0.09461212158203125, "step": 1857 }, { "epoch": 0.2589005782763185, "grad_norm": 1.1223766803741455, "learning_rate": 9.028396960125392e-06, "loss": 0.08438491821289062, "step": 1858 }, { "epoch": 0.2590399219675329, "grad_norm": 1.2248919010162354, "learning_rate": 9.027000467485547e-06, "loss": 0.10943794250488281, "step": 1859 }, { "epoch": 0.2591792656587473, "grad_norm": 1.0148628950119019, "learning_rate": 9.025603080138136e-06, "loss": 0.11799240112304688, "step": 1860 }, { "epoch": 0.2593186093499617, "grad_norm": 1.250918984413147, "learning_rate": 9.024204798393627e-06, "loss": 0.0961761474609375, "step": 1861 }, { "epoch": 0.2594579530411761, "grad_norm": 1.0903435945510864, "learning_rate": 9.022805622562687e-06, "loss": 0.10341835021972656, "step": 1862 }, { "epoch": 0.25959729673239046, "grad_norm": 0.6797084808349609, "learning_rate": 9.02140555295618e-06, "loss": 0.08785057067871094, "step": 1863 }, { "epoch": 0.25973664042360484, "grad_norm": 1.0775980949401855, "learning_rate": 9.020004589885167e-06, "loss": 0.09402275085449219, "step": 1864 }, { "epoch": 0.2598759841148192, "grad_norm": 1.6189380884170532, "learning_rate": 9.018602733660915e-06, "loss": 0.088958740234375, "step": 1865 }, { "epoch": 0.2600153278060336, "grad_norm": 0.8365020155906677, "learning_rate": 9.01719998459488e-06, "loss": 0.08501625061035156, "step": 1866 }, { "epoch": 0.26015467149724797, "grad_norm": 0.8047712445259094, "learning_rate": 9.015796342998724e-06, "loss": 0.06907272338867188, "step": 1867 }, { "epoch": 0.26029401518846235, "grad_norm": 1.6112056970596313, "learning_rate": 9.014391809184302e-06, "loss": 0.09986686706542969, "step": 1868 }, { "epoch": 0.2604333588796767, "grad_norm": 0.6151695847511292, "learning_rate": 9.01298638346367e-06, "loss": 0.08145332336425781, "step": 1869 }, { "epoch": 0.2605727025708911, "grad_norm": 1.552791953086853, "learning_rate": 9.011580066149081e-06, "loss": 0.08392906188964844, "step": 1870 }, { "epoch": 0.2607120462621055, "grad_norm": 5.303103446960449, "learning_rate": 9.010172857552989e-06, "loss": 0.1769542694091797, "step": 1871 }, { "epoch": 0.26085138995331986, "grad_norm": 1.355271816253662, "learning_rate": 9.008764757988042e-06, "loss": 0.07807064056396484, "step": 1872 }, { "epoch": 0.26099073364453423, "grad_norm": 1.9150551557540894, "learning_rate": 9.007355767767085e-06, "loss": 0.10395431518554688, "step": 1873 }, { "epoch": 0.2611300773357486, "grad_norm": 2.8745977878570557, "learning_rate": 9.005945887203167e-06, "loss": 0.1540699005126953, "step": 1874 }, { "epoch": 0.261269421026963, "grad_norm": 1.2129255533218384, "learning_rate": 9.004535116609532e-06, "loss": 0.1032876968383789, "step": 1875 }, { "epoch": 0.26140876471817737, "grad_norm": 2.098893404006958, "learning_rate": 9.003123456299617e-06, "loss": 0.14905548095703125, "step": 1876 }, { "epoch": 0.26154810840939174, "grad_norm": 2.504533529281616, "learning_rate": 9.001710906587064e-06, "loss": 0.1473217010498047, "step": 1877 }, { "epoch": 0.2616874521006061, "grad_norm": 1.3300690650939941, "learning_rate": 9.000297467785708e-06, "loss": 0.08445215225219727, "step": 1878 }, { "epoch": 0.2618267957918205, "grad_norm": 3.1815500259399414, "learning_rate": 8.998883140209582e-06, "loss": 0.14067459106445312, "step": 1879 }, { "epoch": 0.26196613948303493, "grad_norm": 1.3560103178024292, "learning_rate": 8.99746792417292e-06, "loss": 0.09051132202148438, "step": 1880 }, { "epoch": 0.2621054831742493, "grad_norm": 1.3331449031829834, "learning_rate": 8.996051819990148e-06, "loss": 0.12945175170898438, "step": 1881 }, { "epoch": 0.2622448268654637, "grad_norm": 0.8410905003547668, "learning_rate": 8.994634827975892e-06, "loss": 0.08153343200683594, "step": 1882 }, { "epoch": 0.26238417055667806, "grad_norm": 1.3053785562515259, "learning_rate": 8.993216948444978e-06, "loss": 0.10254287719726562, "step": 1883 }, { "epoch": 0.26252351424789244, "grad_norm": 0.6639305949211121, "learning_rate": 8.991798181712423e-06, "loss": 0.07761669158935547, "step": 1884 }, { "epoch": 0.2626628579391068, "grad_norm": 0.6375487446784973, "learning_rate": 8.99037852809345e-06, "loss": 0.06989288330078125, "step": 1885 }, { "epoch": 0.2628022016303212, "grad_norm": 0.7502957582473755, "learning_rate": 8.988957987903467e-06, "loss": 0.07715606689453125, "step": 1886 }, { "epoch": 0.2629415453215356, "grad_norm": 1.9561011791229248, "learning_rate": 8.987536561458088e-06, "loss": 0.103118896484375, "step": 1887 }, { "epoch": 0.26308088901274995, "grad_norm": 0.8038527369499207, "learning_rate": 8.986114249073122e-06, "loss": 0.08720207214355469, "step": 1888 }, { "epoch": 0.26322023270396433, "grad_norm": 0.5906953811645508, "learning_rate": 8.984691051064576e-06, "loss": 0.0729217529296875, "step": 1889 }, { "epoch": 0.2633595763951787, "grad_norm": 1.0270553827285767, "learning_rate": 8.98326696774865e-06, "loss": 0.09445762634277344, "step": 1890 }, { "epoch": 0.2634989200863931, "grad_norm": 0.6287045478820801, "learning_rate": 8.981841999441743e-06, "loss": 0.07621383666992188, "step": 1891 }, { "epoch": 0.26363826377760746, "grad_norm": 0.5614995360374451, "learning_rate": 8.980416146460452e-06, "loss": 0.07157707214355469, "step": 1892 }, { "epoch": 0.26377760746882184, "grad_norm": 0.5461083054542542, "learning_rate": 8.978989409121565e-06, "loss": 0.08571624755859375, "step": 1893 }, { "epoch": 0.2639169511600362, "grad_norm": 0.6976205706596375, "learning_rate": 8.977561787742074e-06, "loss": 0.06891441345214844, "step": 1894 }, { "epoch": 0.2640562948512506, "grad_norm": 1.0504497289657593, "learning_rate": 8.976133282639166e-06, "loss": 0.0930023193359375, "step": 1895 }, { "epoch": 0.264195638542465, "grad_norm": 1.0138412714004517, "learning_rate": 8.974703894130218e-06, "loss": 0.11748886108398438, "step": 1896 }, { "epoch": 0.26433498223367935, "grad_norm": 1.7370562553405762, "learning_rate": 8.973273622532806e-06, "loss": 0.1534271240234375, "step": 1897 }, { "epoch": 0.2644743259248937, "grad_norm": 1.0114198923110962, "learning_rate": 8.97184246816471e-06, "loss": 0.144805908203125, "step": 1898 }, { "epoch": 0.2646136696161081, "grad_norm": 2.7352232933044434, "learning_rate": 8.970410431343892e-06, "loss": 0.16678619384765625, "step": 1899 }, { "epoch": 0.26475301330732254, "grad_norm": 0.8970415592193604, "learning_rate": 8.968977512388524e-06, "loss": 0.07260799407958984, "step": 1900 }, { "epoch": 0.2648923569985369, "grad_norm": 1.809792399406433, "learning_rate": 8.967543711616968e-06, "loss": 0.15375518798828125, "step": 1901 }, { "epoch": 0.2650317006897513, "grad_norm": 0.8458457589149475, "learning_rate": 8.966109029347777e-06, "loss": 0.1284027099609375, "step": 1902 }, { "epoch": 0.26517104438096567, "grad_norm": 0.8866481184959412, "learning_rate": 8.96467346589971e-06, "loss": 0.09443283081054688, "step": 1903 }, { "epoch": 0.26531038807218005, "grad_norm": 1.1179777383804321, "learning_rate": 8.963237021591714e-06, "loss": 0.0980682373046875, "step": 1904 }, { "epoch": 0.2654497317633944, "grad_norm": 0.5376020669937134, "learning_rate": 8.961799696742933e-06, "loss": 0.07334327697753906, "step": 1905 }, { "epoch": 0.2655890754546088, "grad_norm": 1.4261685609817505, "learning_rate": 8.960361491672708e-06, "loss": 0.10724639892578125, "step": 1906 }, { "epoch": 0.2657284191458232, "grad_norm": 1.2659692764282227, "learning_rate": 8.958922406700578e-06, "loss": 0.11085891723632812, "step": 1907 }, { "epoch": 0.26586776283703756, "grad_norm": 0.8548563718795776, "learning_rate": 8.957482442146271e-06, "loss": 0.09950447082519531, "step": 1908 }, { "epoch": 0.26600710652825194, "grad_norm": 0.8339663743972778, "learning_rate": 8.956041598329716e-06, "loss": 0.09372138977050781, "step": 1909 }, { "epoch": 0.2661464502194663, "grad_norm": 1.5842301845550537, "learning_rate": 8.954599875571039e-06, "loss": 0.1466522216796875, "step": 1910 }, { "epoch": 0.2662857939106807, "grad_norm": 1.4855608940124512, "learning_rate": 8.953157274190552e-06, "loss": 0.12486457824707031, "step": 1911 }, { "epoch": 0.26642513760189507, "grad_norm": 0.6087223291397095, "learning_rate": 8.951713794508771e-06, "loss": 0.07673835754394531, "step": 1912 }, { "epoch": 0.26656448129310945, "grad_norm": 0.3923768103122711, "learning_rate": 8.950269436846405e-06, "loss": 0.06628990173339844, "step": 1913 }, { "epoch": 0.2667038249843238, "grad_norm": 0.9004413485527039, "learning_rate": 8.948824201524355e-06, "loss": 0.08336162567138672, "step": 1914 }, { "epoch": 0.2668431686755382, "grad_norm": 2.0934975147247314, "learning_rate": 8.947378088863722e-06, "loss": 0.1551494598388672, "step": 1915 }, { "epoch": 0.2669825123667526, "grad_norm": 1.6196311712265015, "learning_rate": 8.945931099185798e-06, "loss": 0.142974853515625, "step": 1916 }, { "epoch": 0.26712185605796696, "grad_norm": 2.6730384826660156, "learning_rate": 8.94448323281207e-06, "loss": 0.147247314453125, "step": 1917 }, { "epoch": 0.26726119974918133, "grad_norm": 0.6166942119598389, "learning_rate": 8.943034490064222e-06, "loss": 0.081024169921875, "step": 1918 }, { "epoch": 0.2674005434403957, "grad_norm": 0.8311205506324768, "learning_rate": 8.941584871264131e-06, "loss": 0.0809011459350586, "step": 1919 }, { "epoch": 0.26753988713161014, "grad_norm": 1.2029203176498413, "learning_rate": 8.940134376733869e-06, "loss": 0.11765480041503906, "step": 1920 }, { "epoch": 0.2676792308228245, "grad_norm": 0.6316922307014465, "learning_rate": 8.938683006795704e-06, "loss": 0.09202957153320312, "step": 1921 }, { "epoch": 0.2678185745140389, "grad_norm": 0.8444608449935913, "learning_rate": 8.937230761772098e-06, "loss": 0.09557723999023438, "step": 1922 }, { "epoch": 0.2679579182052533, "grad_norm": 2.4041807651519775, "learning_rate": 8.935777641985704e-06, "loss": 0.11413002014160156, "step": 1923 }, { "epoch": 0.26809726189646765, "grad_norm": 1.8388230800628662, "learning_rate": 8.934323647759373e-06, "loss": 0.13323593139648438, "step": 1924 }, { "epoch": 0.26823660558768203, "grad_norm": 0.7714658975601196, "learning_rate": 8.932868779416148e-06, "loss": 0.08943367004394531, "step": 1925 }, { "epoch": 0.2683759492788964, "grad_norm": 0.7345979809761047, "learning_rate": 8.931413037279271e-06, "loss": 0.09379959106445312, "step": 1926 }, { "epoch": 0.2685152929701108, "grad_norm": 1.0846829414367676, "learning_rate": 8.929956421672172e-06, "loss": 0.0917673110961914, "step": 1927 }, { "epoch": 0.26865463666132516, "grad_norm": 1.0510315895080566, "learning_rate": 8.92849893291848e-06, "loss": 0.10387611389160156, "step": 1928 }, { "epoch": 0.26879398035253954, "grad_norm": 0.5177178382873535, "learning_rate": 8.927040571342014e-06, "loss": 0.077606201171875, "step": 1929 }, { "epoch": 0.2689333240437539, "grad_norm": 1.0231589078903198, "learning_rate": 8.92558133726679e-06, "loss": 0.09763526916503906, "step": 1930 }, { "epoch": 0.2690726677349683, "grad_norm": 1.526546597480774, "learning_rate": 8.924121231017012e-06, "loss": 0.14705276489257812, "step": 1931 }, { "epoch": 0.2692120114261827, "grad_norm": 1.3750522136688232, "learning_rate": 8.922660252917088e-06, "loss": 0.1300525665283203, "step": 1932 }, { "epoch": 0.26935135511739705, "grad_norm": 1.5406895875930786, "learning_rate": 8.92119840329161e-06, "loss": 0.10787200927734375, "step": 1933 }, { "epoch": 0.26949069880861143, "grad_norm": 0.6154429316520691, "learning_rate": 8.919735682465372e-06, "loss": 0.08092880249023438, "step": 1934 }, { "epoch": 0.2696300424998258, "grad_norm": 1.6375961303710938, "learning_rate": 8.918272090763352e-06, "loss": 0.10542488098144531, "step": 1935 }, { "epoch": 0.2697693861910402, "grad_norm": 0.8513425588607788, "learning_rate": 8.91680762851073e-06, "loss": 0.09581375122070312, "step": 1936 }, { "epoch": 0.26990872988225456, "grad_norm": 1.6568562984466553, "learning_rate": 8.915342296032874e-06, "loss": 0.12232589721679688, "step": 1937 }, { "epoch": 0.27004807357346894, "grad_norm": 1.0259318351745605, "learning_rate": 8.913876093655351e-06, "loss": 0.11375617980957031, "step": 1938 }, { "epoch": 0.2701874172646833, "grad_norm": 2.4824471473693848, "learning_rate": 8.912409021703914e-06, "loss": 0.0817575454711914, "step": 1939 }, { "epoch": 0.27032676095589775, "grad_norm": 2.8475258350372314, "learning_rate": 8.910941080504514e-06, "loss": 0.10833549499511719, "step": 1940 }, { "epoch": 0.2704661046471121, "grad_norm": 1.2241653203964233, "learning_rate": 8.909472270383293e-06, "loss": 0.149658203125, "step": 1941 }, { "epoch": 0.2706054483383265, "grad_norm": 1.008413553237915, "learning_rate": 8.90800259166659e-06, "loss": 0.06477165222167969, "step": 1942 }, { "epoch": 0.2707447920295409, "grad_norm": 1.6531351804733276, "learning_rate": 8.906532044680933e-06, "loss": 0.10782241821289062, "step": 1943 }, { "epoch": 0.27088413572075526, "grad_norm": 1.520605206489563, "learning_rate": 8.905060629753041e-06, "loss": 0.11155128479003906, "step": 1944 }, { "epoch": 0.27102347941196964, "grad_norm": 0.7413604855537415, "learning_rate": 8.903588347209833e-06, "loss": 0.10414505004882812, "step": 1945 }, { "epoch": 0.271162823103184, "grad_norm": 2.084470510482788, "learning_rate": 8.902115197378414e-06, "loss": 0.1133270263671875, "step": 1946 }, { "epoch": 0.2713021667943984, "grad_norm": 1.7642236948013306, "learning_rate": 8.900641180586086e-06, "loss": 0.12908935546875, "step": 1947 }, { "epoch": 0.27144151048561277, "grad_norm": 0.8379514217376709, "learning_rate": 8.89916629716034e-06, "loss": 0.10439300537109375, "step": 1948 }, { "epoch": 0.27158085417682715, "grad_norm": 0.8657958507537842, "learning_rate": 8.897690547428861e-06, "loss": 0.10568046569824219, "step": 1949 }, { "epoch": 0.2717201978680415, "grad_norm": 1.5078179836273193, "learning_rate": 8.89621393171953e-06, "loss": 0.12174606323242188, "step": 1950 }, { "epoch": 0.2718595415592559, "grad_norm": 0.9853098392486572, "learning_rate": 8.894736450360415e-06, "loss": 0.07683944702148438, "step": 1951 }, { "epoch": 0.2719988852504703, "grad_norm": 1.5558342933654785, "learning_rate": 8.893258103679779e-06, "loss": 0.16225814819335938, "step": 1952 }, { "epoch": 0.27213822894168466, "grad_norm": 1.4672683477401733, "learning_rate": 8.891778892006077e-06, "loss": 0.12190437316894531, "step": 1953 }, { "epoch": 0.27227757263289903, "grad_norm": 2.2954766750335693, "learning_rate": 8.890298815667956e-06, "loss": 0.09727096557617188, "step": 1954 }, { "epoch": 0.2724169163241134, "grad_norm": 0.8300310969352722, "learning_rate": 8.888817874994254e-06, "loss": 0.07903289794921875, "step": 1955 }, { "epoch": 0.2725562600153278, "grad_norm": 0.8223693370819092, "learning_rate": 8.887336070314005e-06, "loss": 0.08286285400390625, "step": 1956 }, { "epoch": 0.27269560370654217, "grad_norm": 1.2268997430801392, "learning_rate": 8.88585340195643e-06, "loss": 0.10991477966308594, "step": 1957 }, { "epoch": 0.27283494739775654, "grad_norm": 0.9550989866256714, "learning_rate": 8.884369870250945e-06, "loss": 0.11276865005493164, "step": 1958 }, { "epoch": 0.2729742910889709, "grad_norm": 0.6568061113357544, "learning_rate": 8.882885475527156e-06, "loss": 0.06536865234375, "step": 1959 }, { "epoch": 0.27311363478018535, "grad_norm": 1.1974601745605469, "learning_rate": 8.881400218114861e-06, "loss": 0.14170265197753906, "step": 1960 }, { "epoch": 0.27325297847139973, "grad_norm": 1.2744566202163696, "learning_rate": 8.879914098344053e-06, "loss": 0.08325004577636719, "step": 1961 }, { "epoch": 0.2733923221626141, "grad_norm": 0.9666972160339355, "learning_rate": 8.878427116544912e-06, "loss": 0.10713577270507812, "step": 1962 }, { "epoch": 0.2735316658538285, "grad_norm": 1.596187710762024, "learning_rate": 8.876939273047813e-06, "loss": 0.11614227294921875, "step": 1963 }, { "epoch": 0.27367100954504286, "grad_norm": 1.1034897565841675, "learning_rate": 8.875450568183318e-06, "loss": 0.10671615600585938, "step": 1964 }, { "epoch": 0.27381035323625724, "grad_norm": 2.6979167461395264, "learning_rate": 8.873961002282185e-06, "loss": 0.12302780151367188, "step": 1965 }, { "epoch": 0.2739496969274716, "grad_norm": 0.7923979163169861, "learning_rate": 8.872470575675361e-06, "loss": 0.10718345642089844, "step": 1966 }, { "epoch": 0.274089040618686, "grad_norm": 0.9452114701271057, "learning_rate": 8.870979288693985e-06, "loss": 0.09156417846679688, "step": 1967 }, { "epoch": 0.2742283843099004, "grad_norm": 0.781556248664856, "learning_rate": 8.86948714166939e-06, "loss": 0.08133506774902344, "step": 1968 }, { "epoch": 0.27436772800111475, "grad_norm": 1.0441186428070068, "learning_rate": 8.86799413493309e-06, "loss": 0.09275054931640625, "step": 1969 }, { "epoch": 0.27450707169232913, "grad_norm": 0.8609143495559692, "learning_rate": 8.866500268816803e-06, "loss": 0.09581756591796875, "step": 1970 }, { "epoch": 0.2746464153835435, "grad_norm": 1.87991201877594, "learning_rate": 8.865005543652428e-06, "loss": 0.08322525024414062, "step": 1971 }, { "epoch": 0.2747857590747579, "grad_norm": 0.5422377586364746, "learning_rate": 8.863509959772064e-06, "loss": 0.07145500183105469, "step": 1972 }, { "epoch": 0.27492510276597226, "grad_norm": 1.4494773149490356, "learning_rate": 8.86201351750799e-06, "loss": 0.12996196746826172, "step": 1973 }, { "epoch": 0.27506444645718664, "grad_norm": 0.531000018119812, "learning_rate": 8.860516217192683e-06, "loss": 0.08033561706542969, "step": 1974 }, { "epoch": 0.275203790148401, "grad_norm": 1.2626272439956665, "learning_rate": 8.85901805915881e-06, "loss": 0.09345054626464844, "step": 1975 }, { "epoch": 0.2753431338396154, "grad_norm": 1.5554580688476562, "learning_rate": 8.85751904373923e-06, "loss": 0.10381698608398438, "step": 1976 }, { "epoch": 0.2754824775308298, "grad_norm": 0.5200096368789673, "learning_rate": 8.856019171266984e-06, "loss": 0.079193115234375, "step": 1977 }, { "epoch": 0.27562182122204415, "grad_norm": 1.6700490713119507, "learning_rate": 8.854518442075313e-06, "loss": 0.13105010986328125, "step": 1978 }, { "epoch": 0.2757611649132585, "grad_norm": 1.1631978750228882, "learning_rate": 8.853016856497646e-06, "loss": 0.1043701171875, "step": 1979 }, { "epoch": 0.2759005086044729, "grad_norm": 1.124624252319336, "learning_rate": 8.8515144148676e-06, "loss": 0.1247406005859375, "step": 1980 }, { "epoch": 0.27603985229568734, "grad_norm": 2.138389825820923, "learning_rate": 8.85001111751898e-06, "loss": 0.15157318115234375, "step": 1981 }, { "epoch": 0.2761791959869017, "grad_norm": 0.7509351968765259, "learning_rate": 8.848506964785789e-06, "loss": 0.0781402587890625, "step": 1982 }, { "epoch": 0.2763185396781161, "grad_norm": 1.1944843530654907, "learning_rate": 8.847001957002211e-06, "loss": 0.11547088623046875, "step": 1983 }, { "epoch": 0.27645788336933047, "grad_norm": 0.8784593343734741, "learning_rate": 8.845496094502628e-06, "loss": 0.1115264892578125, "step": 1984 }, { "epoch": 0.27659722706054485, "grad_norm": 0.9060272574424744, "learning_rate": 8.843989377621606e-06, "loss": 0.07942867279052734, "step": 1985 }, { "epoch": 0.2767365707517592, "grad_norm": 1.0324256420135498, "learning_rate": 8.842481806693906e-06, "loss": 0.08014869689941406, "step": 1986 }, { "epoch": 0.2768759144429736, "grad_norm": 0.814344584941864, "learning_rate": 8.840973382054472e-06, "loss": 0.09375953674316406, "step": 1987 }, { "epoch": 0.277015258134188, "grad_norm": 1.0914887189865112, "learning_rate": 8.839464104038445e-06, "loss": 0.10063552856445312, "step": 1988 }, { "epoch": 0.27715460182540236, "grad_norm": 0.6664043664932251, "learning_rate": 8.83795397298115e-06, "loss": 0.0918731689453125, "step": 1989 }, { "epoch": 0.27729394551661674, "grad_norm": 0.5040803551673889, "learning_rate": 8.836442989218104e-06, "loss": 0.06544303894042969, "step": 1990 }, { "epoch": 0.2774332892078311, "grad_norm": 1.048405408859253, "learning_rate": 8.834931153085014e-06, "loss": 0.09206390380859375, "step": 1991 }, { "epoch": 0.2775726328990455, "grad_norm": 0.8025798797607422, "learning_rate": 8.833418464917774e-06, "loss": 0.081787109375, "step": 1992 }, { "epoch": 0.27771197659025987, "grad_norm": 2.7516090869903564, "learning_rate": 8.831904925052468e-06, "loss": 0.1537914276123047, "step": 1993 }, { "epoch": 0.27785132028147425, "grad_norm": 0.8255273103713989, "learning_rate": 8.830390533825373e-06, "loss": 0.07728195190429688, "step": 1994 }, { "epoch": 0.2779906639726886, "grad_norm": 1.4076370000839233, "learning_rate": 8.828875291572951e-06, "loss": 0.12504959106445312, "step": 1995 }, { "epoch": 0.278130007663903, "grad_norm": 0.4591045379638672, "learning_rate": 8.827359198631854e-06, "loss": 0.07740020751953125, "step": 1996 }, { "epoch": 0.2782693513551174, "grad_norm": 1.1993097066879272, "learning_rate": 8.825842255338923e-06, "loss": 0.09710502624511719, "step": 1997 }, { "epoch": 0.27840869504633176, "grad_norm": 0.9002577662467957, "learning_rate": 8.824324462031189e-06, "loss": 0.08853721618652344, "step": 1998 }, { "epoch": 0.27854803873754613, "grad_norm": 2.8746979236602783, "learning_rate": 8.822805819045869e-06, "loss": 0.20244979858398438, "step": 1999 }, { "epoch": 0.2786873824287605, "grad_norm": 0.8804088234901428, "learning_rate": 8.821286326720372e-06, "loss": 0.09937477111816406, "step": 2000 }, { "epoch": 0.27882672611997494, "grad_norm": 1.388755440711975, "learning_rate": 8.819765985392297e-06, "loss": 0.1009979248046875, "step": 2001 }, { "epoch": 0.2789660698111893, "grad_norm": 2.757976770401001, "learning_rate": 8.818244795399425e-06, "loss": 0.109527587890625, "step": 2002 }, { "epoch": 0.2791054135024037, "grad_norm": 1.3168888092041016, "learning_rate": 8.81672275707973e-06, "loss": 0.13371658325195312, "step": 2003 }, { "epoch": 0.2792447571936181, "grad_norm": 0.8832052946090698, "learning_rate": 8.815199870771378e-06, "loss": 0.11378669738769531, "step": 2004 }, { "epoch": 0.27938410088483245, "grad_norm": 1.0877019166946411, "learning_rate": 8.813676136812717e-06, "loss": 0.06818008422851562, "step": 2005 }, { "epoch": 0.27952344457604683, "grad_norm": 0.8710251450538635, "learning_rate": 8.812151555542286e-06, "loss": 0.09987068176269531, "step": 2006 }, { "epoch": 0.2796627882672612, "grad_norm": 1.2643871307373047, "learning_rate": 8.81062612729881e-06, "loss": 0.136505126953125, "step": 2007 }, { "epoch": 0.2798021319584756, "grad_norm": 1.4937995672225952, "learning_rate": 8.80909985242121e-06, "loss": 0.08194160461425781, "step": 2008 }, { "epoch": 0.27994147564968996, "grad_norm": 1.30401611328125, "learning_rate": 8.807572731248583e-06, "loss": 0.07307815551757812, "step": 2009 }, { "epoch": 0.28008081934090434, "grad_norm": 0.8293116092681885, "learning_rate": 8.806044764120226e-06, "loss": 0.08686637878417969, "step": 2010 }, { "epoch": 0.2802201630321187, "grad_norm": 3.0448765754699707, "learning_rate": 8.804515951375615e-06, "loss": 0.15813255310058594, "step": 2011 }, { "epoch": 0.2803595067233331, "grad_norm": 1.9211512804031372, "learning_rate": 8.802986293354418e-06, "loss": 0.1121368408203125, "step": 2012 }, { "epoch": 0.2804988504145475, "grad_norm": 0.8089905977249146, "learning_rate": 8.80145579039649e-06, "loss": 0.09165477752685547, "step": 2013 }, { "epoch": 0.28063819410576185, "grad_norm": 0.5980757474899292, "learning_rate": 8.799924442841873e-06, "loss": 0.06435012817382812, "step": 2014 }, { "epoch": 0.28077753779697623, "grad_norm": 1.2429585456848145, "learning_rate": 8.798392251030801e-06, "loss": 0.10102462768554688, "step": 2015 }, { "epoch": 0.2809168814881906, "grad_norm": 1.985147476196289, "learning_rate": 8.796859215303688e-06, "loss": 0.07905769348144531, "step": 2016 }, { "epoch": 0.281056225179405, "grad_norm": 1.8895806074142456, "learning_rate": 8.795325336001143e-06, "loss": 0.07982254028320312, "step": 2017 }, { "epoch": 0.28119556887061936, "grad_norm": 0.6584905385971069, "learning_rate": 8.793790613463956e-06, "loss": 0.0958251953125, "step": 2018 }, { "epoch": 0.28133491256183374, "grad_norm": 1.2170671224594116, "learning_rate": 8.792255048033106e-06, "loss": 0.12908172607421875, "step": 2019 }, { "epoch": 0.2814742562530481, "grad_norm": 1.3567003011703491, "learning_rate": 8.790718640049767e-06, "loss": 0.09520339965820312, "step": 2020 }, { "epoch": 0.28161359994426255, "grad_norm": 1.1728906631469727, "learning_rate": 8.789181389855288e-06, "loss": 0.13274192810058594, "step": 2021 }, { "epoch": 0.2817529436354769, "grad_norm": 4.5142621994018555, "learning_rate": 8.787643297791214e-06, "loss": 0.16675567626953125, "step": 2022 }, { "epoch": 0.2818922873266913, "grad_norm": 1.423538088798523, "learning_rate": 8.78610436419927e-06, "loss": 0.09650230407714844, "step": 2023 }, { "epoch": 0.2820316310179057, "grad_norm": 1.0183614492416382, "learning_rate": 8.784564589421373e-06, "loss": 0.11596870422363281, "step": 2024 }, { "epoch": 0.28217097470912006, "grad_norm": 2.3526852130889893, "learning_rate": 8.783023973799632e-06, "loss": 0.12462997436523438, "step": 2025 }, { "epoch": 0.28231031840033444, "grad_norm": 1.2984504699707031, "learning_rate": 8.78148251767633e-06, "loss": 0.07509231567382812, "step": 2026 }, { "epoch": 0.2824496620915488, "grad_norm": 0.9962300658226013, "learning_rate": 8.779940221393946e-06, "loss": 0.10432052612304688, "step": 2027 }, { "epoch": 0.2825890057827632, "grad_norm": 1.2488714456558228, "learning_rate": 8.778397085295141e-06, "loss": 0.12222480773925781, "step": 2028 }, { "epoch": 0.28272834947397757, "grad_norm": 0.6695850491523743, "learning_rate": 8.776853109722765e-06, "loss": 0.0869598388671875, "step": 2029 }, { "epoch": 0.28286769316519195, "grad_norm": 0.4369499087333679, "learning_rate": 8.775308295019857e-06, "loss": 0.06398773193359375, "step": 2030 }, { "epoch": 0.2830070368564063, "grad_norm": 3.3467836380004883, "learning_rate": 8.773762641529637e-06, "loss": 0.1470947265625, "step": 2031 }, { "epoch": 0.2831463805476207, "grad_norm": 0.9330527186393738, "learning_rate": 8.772216149595515e-06, "loss": 0.11295223236083984, "step": 2032 }, { "epoch": 0.2832857242388351, "grad_norm": 1.1642858982086182, "learning_rate": 8.770668819561085e-06, "loss": 0.08220291137695312, "step": 2033 }, { "epoch": 0.28342506793004946, "grad_norm": 0.9654267430305481, "learning_rate": 8.769120651770128e-06, "loss": 0.1032705307006836, "step": 2034 }, { "epoch": 0.28356441162126383, "grad_norm": 0.9266795516014099, "learning_rate": 8.767571646566615e-06, "loss": 0.10532188415527344, "step": 2035 }, { "epoch": 0.2837037553124782, "grad_norm": 1.3263483047485352, "learning_rate": 8.766021804294697e-06, "loss": 0.087188720703125, "step": 2036 }, { "epoch": 0.2838430990036926, "grad_norm": 0.5338225364685059, "learning_rate": 8.764471125298712e-06, "loss": 0.07918548583984375, "step": 2037 }, { "epoch": 0.28398244269490697, "grad_norm": 0.8386817574501038, "learning_rate": 8.76291960992319e-06, "loss": 0.08725357055664062, "step": 2038 }, { "epoch": 0.28412178638612134, "grad_norm": 0.9425200819969177, "learning_rate": 8.761367258512838e-06, "loss": 0.09484672546386719, "step": 2039 }, { "epoch": 0.2842611300773357, "grad_norm": 2.160388231277466, "learning_rate": 8.759814071412554e-06, "loss": 0.1472949981689453, "step": 2040 }, { "epoch": 0.28440047376855015, "grad_norm": 1.3054630756378174, "learning_rate": 8.758260048967421e-06, "loss": 0.09638214111328125, "step": 2041 }, { "epoch": 0.28453981745976453, "grad_norm": 0.6129730939865112, "learning_rate": 8.75670519152271e-06, "loss": 0.06860828399658203, "step": 2042 }, { "epoch": 0.2846791611509789, "grad_norm": 1.2130074501037598, "learning_rate": 8.755149499423871e-06, "loss": 0.08915328979492188, "step": 2043 }, { "epoch": 0.2848185048421933, "grad_norm": 0.682859480381012, "learning_rate": 8.753592973016545e-06, "loss": 0.08290863037109375, "step": 2044 }, { "epoch": 0.28495784853340766, "grad_norm": 1.3932125568389893, "learning_rate": 8.752035612646557e-06, "loss": 0.17496871948242188, "step": 2045 }, { "epoch": 0.28509719222462204, "grad_norm": 1.936081886291504, "learning_rate": 8.750477418659914e-06, "loss": 0.10379409790039062, "step": 2046 }, { "epoch": 0.2852365359158364, "grad_norm": 1.5873713493347168, "learning_rate": 8.748918391402816e-06, "loss": 0.09826087951660156, "step": 2047 }, { "epoch": 0.2853758796070508, "grad_norm": 0.7050638794898987, "learning_rate": 8.74735853122164e-06, "loss": 0.0637664794921875, "step": 2048 }, { "epoch": 0.2855152232982652, "grad_norm": 1.4166136980056763, "learning_rate": 8.745797838462951e-06, "loss": 0.10528564453125, "step": 2049 }, { "epoch": 0.28565456698947955, "grad_norm": 0.852595329284668, "learning_rate": 8.7442363134735e-06, "loss": 0.12497329711914062, "step": 2050 }, { "epoch": 0.28579391068069393, "grad_norm": 1.1089458465576172, "learning_rate": 8.742673956600225e-06, "loss": 0.11477279663085938, "step": 2051 }, { "epoch": 0.2859332543719083, "grad_norm": 0.8125136494636536, "learning_rate": 8.741110768190242e-06, "loss": 0.0936737060546875, "step": 2052 }, { "epoch": 0.2860725980631227, "grad_norm": 0.6061564087867737, "learning_rate": 8.739546748590857e-06, "loss": 0.08184051513671875, "step": 2053 }, { "epoch": 0.28621194175433706, "grad_norm": 1.2785416841506958, "learning_rate": 8.73798189814956e-06, "loss": 0.08179664611816406, "step": 2054 }, { "epoch": 0.28635128544555144, "grad_norm": 0.9195134043693542, "learning_rate": 8.736416217214026e-06, "loss": 0.1011199951171875, "step": 2055 }, { "epoch": 0.2864906291367658, "grad_norm": 0.6270169615745544, "learning_rate": 8.734849706132112e-06, "loss": 0.08628463745117188, "step": 2056 }, { "epoch": 0.2866299728279802, "grad_norm": 0.7000659108161926, "learning_rate": 8.733282365251858e-06, "loss": 0.10091590881347656, "step": 2057 }, { "epoch": 0.2867693165191946, "grad_norm": 0.9263471364974976, "learning_rate": 8.731714194921498e-06, "loss": 0.0710601806640625, "step": 2058 }, { "epoch": 0.28690866021040895, "grad_norm": 2.5358357429504395, "learning_rate": 8.73014519548944e-06, "loss": 0.12241935729980469, "step": 2059 }, { "epoch": 0.2870480039016233, "grad_norm": 1.2466192245483398, "learning_rate": 8.72857536730428e-06, "loss": 0.12400150299072266, "step": 2060 }, { "epoch": 0.28718734759283776, "grad_norm": 1.2814325094223022, "learning_rate": 8.7270047107148e-06, "loss": 0.09675407409667969, "step": 2061 }, { "epoch": 0.28732669128405214, "grad_norm": 1.47586190700531, "learning_rate": 8.72543322606996e-06, "loss": 0.1099691390991211, "step": 2062 }, { "epoch": 0.2874660349752665, "grad_norm": 0.8035123348236084, "learning_rate": 8.72386091371891e-06, "loss": 0.10928916931152344, "step": 2063 }, { "epoch": 0.2876053786664809, "grad_norm": 1.0954418182373047, "learning_rate": 8.722287774010983e-06, "loss": 0.0755615234375, "step": 2064 }, { "epoch": 0.28774472235769527, "grad_norm": 1.9206620454788208, "learning_rate": 8.720713807295692e-06, "loss": 0.12173175811767578, "step": 2065 }, { "epoch": 0.28788406604890965, "grad_norm": 1.1693674325942993, "learning_rate": 8.71913901392274e-06, "loss": 0.10723114013671875, "step": 2066 }, { "epoch": 0.288023409740124, "grad_norm": 1.7798293828964233, "learning_rate": 8.71756339424201e-06, "loss": 0.12537765502929688, "step": 2067 }, { "epoch": 0.2881627534313384, "grad_norm": 1.2221122980117798, "learning_rate": 8.715986948603566e-06, "loss": 0.09234237670898438, "step": 2068 }, { "epoch": 0.2883020971225528, "grad_norm": 1.2851653099060059, "learning_rate": 8.71440967735766e-06, "loss": 0.125885009765625, "step": 2069 }, { "epoch": 0.28844144081376716, "grad_norm": 0.6939249634742737, "learning_rate": 8.712831580854724e-06, "loss": 0.07818984985351562, "step": 2070 }, { "epoch": 0.28858078450498154, "grad_norm": 2.7098047733306885, "learning_rate": 8.711252659445378e-06, "loss": 0.12860107421875, "step": 2071 }, { "epoch": 0.2887201281961959, "grad_norm": 1.8512442111968994, "learning_rate": 8.709672913480418e-06, "loss": 0.11288070678710938, "step": 2072 }, { "epoch": 0.2888594718874103, "grad_norm": 1.2672182321548462, "learning_rate": 8.70809234331083e-06, "loss": 0.12160682678222656, "step": 2073 }, { "epoch": 0.28899881557862467, "grad_norm": 1.2239421606063843, "learning_rate": 8.706510949287782e-06, "loss": 0.1363534927368164, "step": 2074 }, { "epoch": 0.28913815926983905, "grad_norm": 1.4103111028671265, "learning_rate": 8.70492873176262e-06, "loss": 0.0949087142944336, "step": 2075 }, { "epoch": 0.2892775029610534, "grad_norm": 0.7252928018569946, "learning_rate": 8.703345691086882e-06, "loss": 0.0774078369140625, "step": 2076 }, { "epoch": 0.2894168466522678, "grad_norm": 1.5263233184814453, "learning_rate": 8.701761827612278e-06, "loss": 0.09845733642578125, "step": 2077 }, { "epoch": 0.2895561903434822, "grad_norm": 1.1147204637527466, "learning_rate": 8.700177141690708e-06, "loss": 0.08705520629882812, "step": 2078 }, { "epoch": 0.28969553403469656, "grad_norm": 0.9217893481254578, "learning_rate": 8.698591633674256e-06, "loss": 0.09444427490234375, "step": 2079 }, { "epoch": 0.28983487772591093, "grad_norm": 1.4434231519699097, "learning_rate": 8.697005303915183e-06, "loss": 0.09065532684326172, "step": 2080 }, { "epoch": 0.28997422141712537, "grad_norm": 1.2541617155075073, "learning_rate": 8.695418152765933e-06, "loss": 0.10202312469482422, "step": 2081 }, { "epoch": 0.29011356510833974, "grad_norm": 1.2281363010406494, "learning_rate": 8.693830180579139e-06, "loss": 0.10656929016113281, "step": 2082 }, { "epoch": 0.2902529087995541, "grad_norm": 1.547706961631775, "learning_rate": 8.69224138770761e-06, "loss": 0.09525585174560547, "step": 2083 }, { "epoch": 0.2903922524907685, "grad_norm": 2.0484046936035156, "learning_rate": 8.69065177450434e-06, "loss": 0.12899303436279297, "step": 2084 }, { "epoch": 0.2905315961819829, "grad_norm": 1.2415937185287476, "learning_rate": 8.689061341322505e-06, "loss": 0.07655525207519531, "step": 2085 }, { "epoch": 0.29067093987319725, "grad_norm": 1.0615715980529785, "learning_rate": 8.687470088515464e-06, "loss": 0.08894920349121094, "step": 2086 }, { "epoch": 0.29081028356441163, "grad_norm": 1.1433513164520264, "learning_rate": 8.685878016436753e-06, "loss": 0.10393524169921875, "step": 2087 }, { "epoch": 0.290949627255626, "grad_norm": 0.7593168616294861, "learning_rate": 8.684285125440099e-06, "loss": 0.10215377807617188, "step": 2088 }, { "epoch": 0.2910889709468404, "grad_norm": 0.49320000410079956, "learning_rate": 8.682691415879402e-06, "loss": 0.057578086853027344, "step": 2089 }, { "epoch": 0.29122831463805476, "grad_norm": 1.5948134660720825, "learning_rate": 8.681096888108751e-06, "loss": 0.08769702911376953, "step": 2090 }, { "epoch": 0.29136765832926914, "grad_norm": 1.6264210939407349, "learning_rate": 8.679501542482412e-06, "loss": 0.0947418212890625, "step": 2091 }, { "epoch": 0.2915070020204835, "grad_norm": 1.8287581205368042, "learning_rate": 8.677905379354834e-06, "loss": 0.10593223571777344, "step": 2092 }, { "epoch": 0.2916463457116979, "grad_norm": 2.580226421356201, "learning_rate": 8.67630839908065e-06, "loss": 0.1544017791748047, "step": 2093 }, { "epoch": 0.2917856894029123, "grad_norm": 2.324923038482666, "learning_rate": 8.674710602014672e-06, "loss": 0.1345672607421875, "step": 2094 }, { "epoch": 0.29192503309412665, "grad_norm": 1.830374836921692, "learning_rate": 8.673111988511892e-06, "loss": 0.09112548828125, "step": 2095 }, { "epoch": 0.29206437678534103, "grad_norm": 1.382127285003662, "learning_rate": 8.671512558927483e-06, "loss": 0.07088947296142578, "step": 2096 }, { "epoch": 0.2922037204765554, "grad_norm": 2.273145914077759, "learning_rate": 8.669912313616811e-06, "loss": 0.14761734008789062, "step": 2097 }, { "epoch": 0.2923430641677698, "grad_norm": 0.9123716354370117, "learning_rate": 8.668311252935407e-06, "loss": 0.07930564880371094, "step": 2098 }, { "epoch": 0.29248240785898416, "grad_norm": 1.151667594909668, "learning_rate": 8.66670937723899e-06, "loss": 0.08314132690429688, "step": 2099 }, { "epoch": 0.29262175155019854, "grad_norm": 1.4384651184082031, "learning_rate": 8.665106686883461e-06, "loss": 0.1015777587890625, "step": 2100 }, { "epoch": 0.29276109524141297, "grad_norm": 1.222511887550354, "learning_rate": 8.663503182224906e-06, "loss": 0.09702682495117188, "step": 2101 }, { "epoch": 0.29290043893262735, "grad_norm": 1.1167112588882446, "learning_rate": 8.66189886361958e-06, "loss": 0.10015869140625, "step": 2102 }, { "epoch": 0.2930397826238417, "grad_norm": 2.334763526916504, "learning_rate": 8.660293731423929e-06, "loss": 0.1629180908203125, "step": 2103 }, { "epoch": 0.2931791263150561, "grad_norm": 1.1660003662109375, "learning_rate": 8.658687785994579e-06, "loss": 0.09131622314453125, "step": 2104 }, { "epoch": 0.2933184700062705, "grad_norm": 2.803689956665039, "learning_rate": 8.657081027688332e-06, "loss": 0.11013603210449219, "step": 2105 }, { "epoch": 0.29345781369748486, "grad_norm": 1.361438512802124, "learning_rate": 8.655473456862172e-06, "loss": 0.08187103271484375, "step": 2106 }, { "epoch": 0.29359715738869924, "grad_norm": 0.899986207485199, "learning_rate": 8.653865073873265e-06, "loss": 0.08583259582519531, "step": 2107 }, { "epoch": 0.2937365010799136, "grad_norm": 0.8039808869361877, "learning_rate": 8.652255879078959e-06, "loss": 0.08509254455566406, "step": 2108 }, { "epoch": 0.293875844771128, "grad_norm": 0.7530122995376587, "learning_rate": 8.650645872836779e-06, "loss": 0.09302425384521484, "step": 2109 }, { "epoch": 0.29401518846234237, "grad_norm": 0.8028215169906616, "learning_rate": 8.649035055504431e-06, "loss": 0.08583831787109375, "step": 2110 }, { "epoch": 0.29415453215355675, "grad_norm": 0.9497259259223938, "learning_rate": 8.647423427439804e-06, "loss": 0.07361793518066406, "step": 2111 }, { "epoch": 0.2942938758447711, "grad_norm": 1.0489705801010132, "learning_rate": 8.645810989000962e-06, "loss": 0.09101104736328125, "step": 2112 }, { "epoch": 0.2944332195359855, "grad_norm": 0.9495927095413208, "learning_rate": 8.644197740546153e-06, "loss": 0.09838104248046875, "step": 2113 }, { "epoch": 0.2945725632271999, "grad_norm": 0.7934648990631104, "learning_rate": 8.642583682433808e-06, "loss": 0.0863027572631836, "step": 2114 }, { "epoch": 0.29471190691841426, "grad_norm": 2.0641932487487793, "learning_rate": 8.640968815022529e-06, "loss": 0.10749053955078125, "step": 2115 }, { "epoch": 0.29485125060962863, "grad_norm": 1.4435571432113647, "learning_rate": 8.6393531386711e-06, "loss": 0.09774398803710938, "step": 2116 }, { "epoch": 0.294990594300843, "grad_norm": 0.9599229097366333, "learning_rate": 8.637736653738496e-06, "loss": 0.1125335693359375, "step": 2117 }, { "epoch": 0.2951299379920574, "grad_norm": 1.2020602226257324, "learning_rate": 8.636119360583857e-06, "loss": 0.11184883117675781, "step": 2118 }, { "epoch": 0.29526928168327177, "grad_norm": 1.5203289985656738, "learning_rate": 8.63450125956651e-06, "loss": 0.10055732727050781, "step": 2119 }, { "epoch": 0.29540862537448614, "grad_norm": 1.114607810974121, "learning_rate": 8.63288235104596e-06, "loss": 0.09414482116699219, "step": 2120 }, { "epoch": 0.2955479690657006, "grad_norm": 0.648865818977356, "learning_rate": 8.631262635381892e-06, "loss": 0.07556724548339844, "step": 2121 }, { "epoch": 0.29568731275691496, "grad_norm": 2.9648842811584473, "learning_rate": 8.629642112934169e-06, "loss": 0.07420730590820312, "step": 2122 }, { "epoch": 0.29582665644812933, "grad_norm": 0.9093860983848572, "learning_rate": 8.628020784062837e-06, "loss": 0.05992317199707031, "step": 2123 }, { "epoch": 0.2959660001393437, "grad_norm": 1.4804857969284058, "learning_rate": 8.626398649128113e-06, "loss": 0.09425163269042969, "step": 2124 }, { "epoch": 0.2961053438305581, "grad_norm": 0.7090494632720947, "learning_rate": 8.624775708490403e-06, "loss": 0.07298469543457031, "step": 2125 }, { "epoch": 0.29624468752177247, "grad_norm": 1.3354942798614502, "learning_rate": 8.623151962510284e-06, "loss": 0.10924339294433594, "step": 2126 }, { "epoch": 0.29638403121298684, "grad_norm": 1.7853269577026367, "learning_rate": 8.621527411548517e-06, "loss": 0.10206222534179688, "step": 2127 }, { "epoch": 0.2965233749042012, "grad_norm": 1.9342936277389526, "learning_rate": 8.619902055966043e-06, "loss": 0.16387367248535156, "step": 2128 }, { "epoch": 0.2966627185954156, "grad_norm": 2.1505610942840576, "learning_rate": 8.618275896123973e-06, "loss": 0.13204002380371094, "step": 2129 }, { "epoch": 0.29680206228663, "grad_norm": 1.9957427978515625, "learning_rate": 8.616648932383607e-06, "loss": 0.10746192932128906, "step": 2130 }, { "epoch": 0.29694140597784435, "grad_norm": 1.0656652450561523, "learning_rate": 8.615021165106415e-06, "loss": 0.09097862243652344, "step": 2131 }, { "epoch": 0.29708074966905873, "grad_norm": 1.0227540731430054, "learning_rate": 8.613392594654056e-06, "loss": 0.10247421264648438, "step": 2132 }, { "epoch": 0.2972200933602731, "grad_norm": 0.9382286071777344, "learning_rate": 8.611763221388356e-06, "loss": 0.07413291931152344, "step": 2133 }, { "epoch": 0.2973594370514875, "grad_norm": 1.0384635925292969, "learning_rate": 8.610133045671325e-06, "loss": 0.09461212158203125, "step": 2134 }, { "epoch": 0.29749878074270186, "grad_norm": 1.023695945739746, "learning_rate": 8.608502067865155e-06, "loss": 0.09906578063964844, "step": 2135 }, { "epoch": 0.29763812443391624, "grad_norm": 0.8605170845985413, "learning_rate": 8.606870288332206e-06, "loss": 0.09185218811035156, "step": 2136 }, { "epoch": 0.2977774681251306, "grad_norm": 2.795044183731079, "learning_rate": 8.605237707435028e-06, "loss": 0.10869979858398438, "step": 2137 }, { "epoch": 0.297916811816345, "grad_norm": 1.8346092700958252, "learning_rate": 8.603604325536338e-06, "loss": 0.08695411682128906, "step": 2138 }, { "epoch": 0.2980561555075594, "grad_norm": 1.784476399421692, "learning_rate": 8.60197014299904e-06, "loss": 0.09604263305664062, "step": 2139 }, { "epoch": 0.29819549919877375, "grad_norm": 2.0969038009643555, "learning_rate": 8.600335160186208e-06, "loss": 0.13623046875, "step": 2140 }, { "epoch": 0.2983348428899882, "grad_norm": 1.0840219259262085, "learning_rate": 8.598699377461104e-06, "loss": 0.09362602233886719, "step": 2141 }, { "epoch": 0.29847418658120256, "grad_norm": 0.8348666429519653, "learning_rate": 8.597062795187157e-06, "loss": 0.07263374328613281, "step": 2142 }, { "epoch": 0.29861353027241694, "grad_norm": 1.0849758386611938, "learning_rate": 8.595425413727979e-06, "loss": 0.1280975341796875, "step": 2143 }, { "epoch": 0.2987528739636313, "grad_norm": 0.8224836587905884, "learning_rate": 8.593787233447357e-06, "loss": 0.10315608978271484, "step": 2144 }, { "epoch": 0.2988922176548457, "grad_norm": 0.44468817114830017, "learning_rate": 8.592148254709262e-06, "loss": 0.07162857055664062, "step": 2145 }, { "epoch": 0.29903156134606007, "grad_norm": 2.48746919631958, "learning_rate": 8.590508477877834e-06, "loss": 0.11378097534179688, "step": 2146 }, { "epoch": 0.29917090503727445, "grad_norm": 2.982757091522217, "learning_rate": 8.588867903317395e-06, "loss": 0.14501571655273438, "step": 2147 }, { "epoch": 0.2993102487284888, "grad_norm": 1.8350030183792114, "learning_rate": 8.587226531392443e-06, "loss": 0.14233779907226562, "step": 2148 }, { "epoch": 0.2994495924197032, "grad_norm": 1.2869170904159546, "learning_rate": 8.585584362467652e-06, "loss": 0.15001487731933594, "step": 2149 }, { "epoch": 0.2995889361109176, "grad_norm": 1.3021810054779053, "learning_rate": 8.583941396907877e-06, "loss": 0.08682441711425781, "step": 2150 }, { "epoch": 0.29972827980213196, "grad_norm": 0.7869486808776855, "learning_rate": 8.582297635078149e-06, "loss": 0.0917205810546875, "step": 2151 }, { "epoch": 0.29986762349334634, "grad_norm": 1.0444923639297485, "learning_rate": 8.58065307734367e-06, "loss": 0.09225845336914062, "step": 2152 }, { "epoch": 0.3000069671845607, "grad_norm": 2.272752523422241, "learning_rate": 8.579007724069823e-06, "loss": 0.13938331604003906, "step": 2153 }, { "epoch": 0.3001463108757751, "grad_norm": 0.5887141227722168, "learning_rate": 8.577361575622171e-06, "loss": 0.05969810485839844, "step": 2154 }, { "epoch": 0.30028565456698947, "grad_norm": 1.2128931283950806, "learning_rate": 8.575714632366451e-06, "loss": 0.09988975524902344, "step": 2155 }, { "epoch": 0.30042499825820385, "grad_norm": 0.9186558127403259, "learning_rate": 8.574066894668573e-06, "loss": 0.10797500610351562, "step": 2156 }, { "epoch": 0.3005643419494182, "grad_norm": 1.8652443885803223, "learning_rate": 8.57241836289463e-06, "loss": 0.138702392578125, "step": 2157 }, { "epoch": 0.3007036856406326, "grad_norm": 1.8254530429840088, "learning_rate": 8.570769037410885e-06, "loss": 0.12150764465332031, "step": 2158 }, { "epoch": 0.300843029331847, "grad_norm": 3.166701555252075, "learning_rate": 8.56911891858378e-06, "loss": 0.1470317840576172, "step": 2159 }, { "epoch": 0.30098237302306136, "grad_norm": 0.977761447429657, "learning_rate": 8.56746800677994e-06, "loss": 0.12361717224121094, "step": 2160 }, { "epoch": 0.3011217167142758, "grad_norm": 1.0193543434143066, "learning_rate": 8.565816302366151e-06, "loss": 0.074920654296875, "step": 2161 }, { "epoch": 0.30126106040549017, "grad_norm": 1.0938241481781006, "learning_rate": 8.564163805709393e-06, "loss": 0.086944580078125, "step": 2162 }, { "epoch": 0.30140040409670454, "grad_norm": 0.7437418103218079, "learning_rate": 8.562510517176807e-06, "loss": 0.07579708099365234, "step": 2163 }, { "epoch": 0.3015397477879189, "grad_norm": 1.9725459814071655, "learning_rate": 8.560856437135716e-06, "loss": 0.12787437438964844, "step": 2164 }, { "epoch": 0.3016790914791333, "grad_norm": 1.2981699705123901, "learning_rate": 8.559201565953623e-06, "loss": 0.09594345092773438, "step": 2165 }, { "epoch": 0.3018184351703477, "grad_norm": 1.6634550094604492, "learning_rate": 8.557545903998197e-06, "loss": 0.10935211181640625, "step": 2166 }, { "epoch": 0.30195777886156205, "grad_norm": 3.109833002090454, "learning_rate": 8.555889451637294e-06, "loss": 0.12761259078979492, "step": 2167 }, { "epoch": 0.30209712255277643, "grad_norm": 1.6497424840927124, "learning_rate": 8.554232209238935e-06, "loss": 0.10636711120605469, "step": 2168 }, { "epoch": 0.3022364662439908, "grad_norm": 1.6585017442703247, "learning_rate": 8.552574177171326e-06, "loss": 0.08616828918457031, "step": 2169 }, { "epoch": 0.3023758099352052, "grad_norm": 0.6987817883491516, "learning_rate": 8.55091535580284e-06, "loss": 0.090362548828125, "step": 2170 }, { "epoch": 0.30251515362641956, "grad_norm": 1.2899656295776367, "learning_rate": 8.54925574550203e-06, "loss": 0.1264801025390625, "step": 2171 }, { "epoch": 0.30265449731763394, "grad_norm": 0.9279129505157471, "learning_rate": 8.547595346637624e-06, "loss": 0.09293365478515625, "step": 2172 }, { "epoch": 0.3027938410088483, "grad_norm": 0.7771095633506775, "learning_rate": 8.545934159578527e-06, "loss": 0.09982872009277344, "step": 2173 }, { "epoch": 0.3029331847000627, "grad_norm": 1.137837529182434, "learning_rate": 8.544272184693814e-06, "loss": 0.10568809509277344, "step": 2174 }, { "epoch": 0.3030725283912771, "grad_norm": 2.7032394409179688, "learning_rate": 8.542609422352738e-06, "loss": 0.11636924743652344, "step": 2175 }, { "epoch": 0.30321187208249145, "grad_norm": 1.8136671781539917, "learning_rate": 8.540945872924728e-06, "loss": 0.1208505630493164, "step": 2176 }, { "epoch": 0.30335121577370583, "grad_norm": 2.0728416442871094, "learning_rate": 8.539281536779388e-06, "loss": 0.10661888122558594, "step": 2177 }, { "epoch": 0.3034905594649202, "grad_norm": 0.9752723574638367, "learning_rate": 8.537616414286491e-06, "loss": 0.08057785034179688, "step": 2178 }, { "epoch": 0.3036299031561346, "grad_norm": 0.3883991539478302, "learning_rate": 8.535950505815993e-06, "loss": 0.06844711303710938, "step": 2179 }, { "epoch": 0.30376924684734896, "grad_norm": 1.2913641929626465, "learning_rate": 8.53428381173802e-06, "loss": 0.09827804565429688, "step": 2180 }, { "epoch": 0.30390859053856334, "grad_norm": 1.7086070775985718, "learning_rate": 8.532616332422872e-06, "loss": 0.0808868408203125, "step": 2181 }, { "epoch": 0.30404793422977777, "grad_norm": 0.6486015319824219, "learning_rate": 8.530948068241028e-06, "loss": 0.07472038269042969, "step": 2182 }, { "epoch": 0.30418727792099215, "grad_norm": 0.7910940051078796, "learning_rate": 8.529279019563133e-06, "loss": 0.08029556274414062, "step": 2183 }, { "epoch": 0.3043266216122065, "grad_norm": 1.0654646158218384, "learning_rate": 8.527609186760017e-06, "loss": 0.0919189453125, "step": 2184 }, { "epoch": 0.3044659653034209, "grad_norm": 1.9499804973602295, "learning_rate": 8.525938570202676e-06, "loss": 0.07866954803466797, "step": 2185 }, { "epoch": 0.3046053089946353, "grad_norm": 1.1612253189086914, "learning_rate": 8.524267170262283e-06, "loss": 0.1269817352294922, "step": 2186 }, { "epoch": 0.30474465268584966, "grad_norm": 0.5349484086036682, "learning_rate": 8.522594987310184e-06, "loss": 0.064910888671875, "step": 2187 }, { "epoch": 0.30488399637706404, "grad_norm": 1.2977129220962524, "learning_rate": 8.520922021717903e-06, "loss": 0.10455322265625, "step": 2188 }, { "epoch": 0.3050233400682784, "grad_norm": 1.3100656270980835, "learning_rate": 8.519248273857132e-06, "loss": 0.10770034790039062, "step": 2189 }, { "epoch": 0.3051626837594928, "grad_norm": 0.7437235713005066, "learning_rate": 8.51757374409974e-06, "loss": 0.10653495788574219, "step": 2190 }, { "epoch": 0.30530202745070717, "grad_norm": 2.4075562953948975, "learning_rate": 8.51589843281777e-06, "loss": 0.15001296997070312, "step": 2191 }, { "epoch": 0.30544137114192155, "grad_norm": 1.1382404565811157, "learning_rate": 8.514222340383438e-06, "loss": 0.09443855285644531, "step": 2192 }, { "epoch": 0.3055807148331359, "grad_norm": 1.671923041343689, "learning_rate": 8.512545467169133e-06, "loss": 0.10545921325683594, "step": 2193 }, { "epoch": 0.3057200585243503, "grad_norm": 1.0618908405303955, "learning_rate": 8.510867813547417e-06, "loss": 0.08933448791503906, "step": 2194 }, { "epoch": 0.3058594022155647, "grad_norm": 1.864598274230957, "learning_rate": 8.509189379891029e-06, "loss": 0.10234296321868896, "step": 2195 }, { "epoch": 0.30599874590677906, "grad_norm": 1.2621670961380005, "learning_rate": 8.507510166572875e-06, "loss": 0.10830307006835938, "step": 2196 }, { "epoch": 0.30613808959799343, "grad_norm": 2.4080464839935303, "learning_rate": 8.50583017396604e-06, "loss": 0.11383533477783203, "step": 2197 }, { "epoch": 0.3062774332892078, "grad_norm": 1.9520440101623535, "learning_rate": 8.504149402443782e-06, "loss": 0.09161567687988281, "step": 2198 }, { "epoch": 0.3064167769804222, "grad_norm": 2.1409409046173096, "learning_rate": 8.502467852379526e-06, "loss": 0.09454727172851562, "step": 2199 }, { "epoch": 0.30655612067163657, "grad_norm": 0.6907148361206055, "learning_rate": 8.500785524146875e-06, "loss": 0.06659412384033203, "step": 2200 }, { "epoch": 0.30669546436285094, "grad_norm": 1.4033888578414917, "learning_rate": 8.499102418119607e-06, "loss": 0.09119606018066406, "step": 2201 }, { "epoch": 0.3068348080540654, "grad_norm": 0.6393194198608398, "learning_rate": 8.497418534671666e-06, "loss": 0.08377838134765625, "step": 2202 }, { "epoch": 0.30697415174527976, "grad_norm": 1.2713793516159058, "learning_rate": 8.495733874177176e-06, "loss": 0.08742523193359375, "step": 2203 }, { "epoch": 0.30711349543649413, "grad_norm": 1.7613005638122559, "learning_rate": 8.494048437010427e-06, "loss": 0.105438232421875, "step": 2204 }, { "epoch": 0.3072528391277085, "grad_norm": 0.9357157945632935, "learning_rate": 8.492362223545884e-06, "loss": 0.09245491027832031, "step": 2205 }, { "epoch": 0.3073921828189229, "grad_norm": 0.886816143989563, "learning_rate": 8.49067523415819e-06, "loss": 0.08864402770996094, "step": 2206 }, { "epoch": 0.30753152651013727, "grad_norm": 1.20974600315094, "learning_rate": 8.48898746922215e-06, "loss": 0.10465621948242188, "step": 2207 }, { "epoch": 0.30767087020135164, "grad_norm": 1.5988534688949585, "learning_rate": 8.487298929112751e-06, "loss": 0.11030769348144531, "step": 2208 }, { "epoch": 0.307810213892566, "grad_norm": 1.6058870553970337, "learning_rate": 8.485609614205146e-06, "loss": 0.11908912658691406, "step": 2209 }, { "epoch": 0.3079495575837804, "grad_norm": 0.6463251709938049, "learning_rate": 8.483919524874661e-06, "loss": 0.07759857177734375, "step": 2210 }, { "epoch": 0.3080889012749948, "grad_norm": 2.223421335220337, "learning_rate": 8.482228661496797e-06, "loss": 0.10012245178222656, "step": 2211 }, { "epoch": 0.30822824496620915, "grad_norm": 2.0857291221618652, "learning_rate": 8.480537024447227e-06, "loss": 0.10840415954589844, "step": 2212 }, { "epoch": 0.30836758865742353, "grad_norm": 1.1434073448181152, "learning_rate": 8.478844614101792e-06, "loss": 0.09607696533203125, "step": 2213 }, { "epoch": 0.3085069323486379, "grad_norm": 0.6468687057495117, "learning_rate": 8.477151430836505e-06, "loss": 0.07260513305664062, "step": 2214 }, { "epoch": 0.3086462760398523, "grad_norm": 0.8967586755752563, "learning_rate": 8.475457475027555e-06, "loss": 0.0702972412109375, "step": 2215 }, { "epoch": 0.30878561973106666, "grad_norm": 2.177523136138916, "learning_rate": 8.473762747051302e-06, "loss": 0.10538864135742188, "step": 2216 }, { "epoch": 0.30892496342228104, "grad_norm": 0.7808054685592651, "learning_rate": 8.472067247284272e-06, "loss": 0.07447624206542969, "step": 2217 }, { "epoch": 0.3090643071134954, "grad_norm": 1.1902740001678467, "learning_rate": 8.470370976103171e-06, "loss": 0.0928640365600586, "step": 2218 }, { "epoch": 0.3092036508047098, "grad_norm": 0.9279667735099792, "learning_rate": 8.468673933884867e-06, "loss": 0.11698532104492188, "step": 2219 }, { "epoch": 0.3093429944959242, "grad_norm": 2.0213027000427246, "learning_rate": 8.466976121006407e-06, "loss": 0.09569358825683594, "step": 2220 }, { "epoch": 0.30948233818713855, "grad_norm": 2.9180922508239746, "learning_rate": 8.465277537845004e-06, "loss": 0.11650466918945312, "step": 2221 }, { "epoch": 0.309621681878353, "grad_norm": 1.7568081617355347, "learning_rate": 8.463578184778047e-06, "loss": 0.07752227783203125, "step": 2222 }, { "epoch": 0.30976102556956736, "grad_norm": 0.4567534327507019, "learning_rate": 8.461878062183092e-06, "loss": 0.0767669677734375, "step": 2223 }, { "epoch": 0.30990036926078174, "grad_norm": 1.4306422472000122, "learning_rate": 8.460177170437865e-06, "loss": 0.10972404479980469, "step": 2224 }, { "epoch": 0.3100397129519961, "grad_norm": 1.4343935251235962, "learning_rate": 8.458475509920272e-06, "loss": 0.09896469116210938, "step": 2225 }, { "epoch": 0.3101790566432105, "grad_norm": 0.7773798704147339, "learning_rate": 8.456773081008376e-06, "loss": 0.08411407470703125, "step": 2226 }, { "epoch": 0.31031840033442487, "grad_norm": 1.0332190990447998, "learning_rate": 8.455069884080422e-06, "loss": 0.09921073913574219, "step": 2227 }, { "epoch": 0.31045774402563925, "grad_norm": 1.6114089488983154, "learning_rate": 8.45336591951482e-06, "loss": 0.12183380126953125, "step": 2228 }, { "epoch": 0.3105970877168536, "grad_norm": 0.47751468420028687, "learning_rate": 8.451661187690154e-06, "loss": 0.06331062316894531, "step": 2229 }, { "epoch": 0.310736431408068, "grad_norm": 1.4931334257125854, "learning_rate": 8.449955688985174e-06, "loss": 0.09115791320800781, "step": 2230 }, { "epoch": 0.3108757750992824, "grad_norm": 1.011025309562683, "learning_rate": 8.448249423778802e-06, "loss": 0.09704971313476562, "step": 2231 }, { "epoch": 0.31101511879049676, "grad_norm": 1.7074335813522339, "learning_rate": 8.446542392450134e-06, "loss": 0.16095352172851562, "step": 2232 }, { "epoch": 0.31115446248171114, "grad_norm": 2.408289909362793, "learning_rate": 8.444834595378434e-06, "loss": 0.10661506652832031, "step": 2233 }, { "epoch": 0.3112938061729255, "grad_norm": 2.3533852100372314, "learning_rate": 8.443126032943132e-06, "loss": 0.10863494873046875, "step": 2234 }, { "epoch": 0.3114331498641399, "grad_norm": 3.212519645690918, "learning_rate": 8.441416705523834e-06, "loss": 0.10399627685546875, "step": 2235 }, { "epoch": 0.31157249355535427, "grad_norm": 1.5125548839569092, "learning_rate": 8.439706613500312e-06, "loss": 0.1126708984375, "step": 2236 }, { "epoch": 0.31171183724656865, "grad_norm": 1.7505080699920654, "learning_rate": 8.43799575725251e-06, "loss": 0.1518421173095703, "step": 2237 }, { "epoch": 0.311851180937783, "grad_norm": 4.792854309082031, "learning_rate": 8.436284137160544e-06, "loss": 0.12653732299804688, "step": 2238 }, { "epoch": 0.3119905246289974, "grad_norm": 5.842606544494629, "learning_rate": 8.434571753604693e-06, "loss": 0.12541770935058594, "step": 2239 }, { "epoch": 0.3121298683202118, "grad_norm": 4.75462532043457, "learning_rate": 8.432858606965411e-06, "loss": 0.10592842102050781, "step": 2240 }, { "epoch": 0.31226921201142616, "grad_norm": 1.40451979637146, "learning_rate": 8.43114469762332e-06, "loss": 0.08047103881835938, "step": 2241 }, { "epoch": 0.3124085557026406, "grad_norm": 0.7440071105957031, "learning_rate": 8.429430025959212e-06, "loss": 0.09406661987304688, "step": 2242 }, { "epoch": 0.31254789939385497, "grad_norm": 1.2475829124450684, "learning_rate": 8.427714592354046e-06, "loss": 0.13516616821289062, "step": 2243 }, { "epoch": 0.31268724308506934, "grad_norm": 1.0664056539535522, "learning_rate": 8.425998397188955e-06, "loss": 0.07686805725097656, "step": 2244 }, { "epoch": 0.3128265867762837, "grad_norm": 3.081840753555298, "learning_rate": 8.424281440845236e-06, "loss": 0.1283893585205078, "step": 2245 }, { "epoch": 0.3129659304674981, "grad_norm": 1.0785729885101318, "learning_rate": 8.42256372370436e-06, "loss": 0.11775970458984375, "step": 2246 }, { "epoch": 0.3131052741587125, "grad_norm": 0.6863513588905334, "learning_rate": 8.420845246147961e-06, "loss": 0.09130191802978516, "step": 2247 }, { "epoch": 0.31324461784992685, "grad_norm": 3.031174898147583, "learning_rate": 8.41912600855785e-06, "loss": 0.15790367126464844, "step": 2248 }, { "epoch": 0.31338396154114123, "grad_norm": 1.7819126844406128, "learning_rate": 8.417406011316e-06, "loss": 0.10620880126953125, "step": 2249 }, { "epoch": 0.3135233052323556, "grad_norm": 1.7160851955413818, "learning_rate": 8.415685254804552e-06, "loss": 0.09091567993164062, "step": 2250 }, { "epoch": 0.31366264892357, "grad_norm": 0.5503154993057251, "learning_rate": 8.413963739405824e-06, "loss": 0.07415962219238281, "step": 2251 }, { "epoch": 0.31380199261478436, "grad_norm": 0.9959557056427002, "learning_rate": 8.412241465502294e-06, "loss": 0.10115432739257812, "step": 2252 }, { "epoch": 0.31394133630599874, "grad_norm": 2.2964189052581787, "learning_rate": 8.410518433476613e-06, "loss": 0.13892555236816406, "step": 2253 }, { "epoch": 0.3140806799972131, "grad_norm": 1.2889890670776367, "learning_rate": 8.408794643711601e-06, "loss": 0.10821151733398438, "step": 2254 }, { "epoch": 0.3142200236884275, "grad_norm": 0.6610022783279419, "learning_rate": 8.407070096590243e-06, "loss": 0.07665157318115234, "step": 2255 }, { "epoch": 0.3143593673796419, "grad_norm": 0.8022118210792542, "learning_rate": 8.405344792495694e-06, "loss": 0.0975332260131836, "step": 2256 }, { "epoch": 0.31449871107085625, "grad_norm": 0.4942915737628937, "learning_rate": 8.403618731811277e-06, "loss": 0.06373977661132812, "step": 2257 }, { "epoch": 0.31463805476207063, "grad_norm": 2.285313844680786, "learning_rate": 8.401891914920483e-06, "loss": 0.11175918579101562, "step": 2258 }, { "epoch": 0.314777398453285, "grad_norm": 1.0385538339614868, "learning_rate": 8.400164342206973e-06, "loss": 0.10684776306152344, "step": 2259 }, { "epoch": 0.3149167421444994, "grad_norm": 0.47357243299484253, "learning_rate": 8.398436014054575e-06, "loss": 0.07458114624023438, "step": 2260 }, { "epoch": 0.31505608583571376, "grad_norm": 1.6092774868011475, "learning_rate": 8.39670693084728e-06, "loss": 0.11116886138916016, "step": 2261 }, { "epoch": 0.3151954295269282, "grad_norm": 0.5692644715309143, "learning_rate": 8.394977092969253e-06, "loss": 0.07047271728515625, "step": 2262 }, { "epoch": 0.31533477321814257, "grad_norm": 2.030021905899048, "learning_rate": 8.393246500804825e-06, "loss": 0.1147613525390625, "step": 2263 }, { "epoch": 0.31547411690935695, "grad_norm": 1.516716480255127, "learning_rate": 8.391515154738495e-06, "loss": 0.11577987670898438, "step": 2264 }, { "epoch": 0.3156134606005713, "grad_norm": 0.7228257656097412, "learning_rate": 8.389783055154925e-06, "loss": 0.097808837890625, "step": 2265 }, { "epoch": 0.3157528042917857, "grad_norm": 1.4544811248779297, "learning_rate": 8.388050202438952e-06, "loss": 0.09086990356445312, "step": 2266 }, { "epoch": 0.3158921479830001, "grad_norm": 0.48001182079315186, "learning_rate": 8.386316596975574e-06, "loss": 0.07048416137695312, "step": 2267 }, { "epoch": 0.31603149167421446, "grad_norm": 0.6262307167053223, "learning_rate": 8.38458223914996e-06, "loss": 0.08760261535644531, "step": 2268 }, { "epoch": 0.31617083536542884, "grad_norm": 1.0104295015335083, "learning_rate": 8.38284712934744e-06, "loss": 0.11310577392578125, "step": 2269 }, { "epoch": 0.3163101790566432, "grad_norm": 0.7285651564598083, "learning_rate": 8.381111267953523e-06, "loss": 0.09903144836425781, "step": 2270 }, { "epoch": 0.3164495227478576, "grad_norm": 0.9349532127380371, "learning_rate": 8.379374655353874e-06, "loss": 0.10173416137695312, "step": 2271 }, { "epoch": 0.31658886643907197, "grad_norm": 1.9064075946807861, "learning_rate": 8.377637291934329e-06, "loss": 0.12133979797363281, "step": 2272 }, { "epoch": 0.31672821013028635, "grad_norm": 1.2473236322402954, "learning_rate": 8.37589917808089e-06, "loss": 0.0917510986328125, "step": 2273 }, { "epoch": 0.3168675538215007, "grad_norm": 1.3085963726043701, "learning_rate": 8.374160314179727e-06, "loss": 0.12884521484375, "step": 2274 }, { "epoch": 0.3170068975127151, "grad_norm": 1.5291281938552856, "learning_rate": 8.372420700617176e-06, "loss": 0.08033370971679688, "step": 2275 }, { "epoch": 0.3171462412039295, "grad_norm": 1.9269373416900635, "learning_rate": 8.370680337779737e-06, "loss": 0.09707164764404297, "step": 2276 }, { "epoch": 0.31728558489514386, "grad_norm": 1.1744779348373413, "learning_rate": 8.368939226054083e-06, "loss": 0.08414649963378906, "step": 2277 }, { "epoch": 0.31742492858635823, "grad_norm": 1.379770278930664, "learning_rate": 8.367197365827047e-06, "loss": 0.12230300903320312, "step": 2278 }, { "epoch": 0.3175642722775726, "grad_norm": 2.2372865676879883, "learning_rate": 8.36545475748563e-06, "loss": 0.14892196655273438, "step": 2279 }, { "epoch": 0.317703615968787, "grad_norm": 1.7687333822250366, "learning_rate": 8.363711401417e-06, "loss": 0.11992263793945312, "step": 2280 }, { "epoch": 0.31784295966000137, "grad_norm": 1.8586912155151367, "learning_rate": 8.361967298008494e-06, "loss": 0.10260772705078125, "step": 2281 }, { "epoch": 0.3179823033512158, "grad_norm": 1.5486758947372437, "learning_rate": 8.360222447647606e-06, "loss": 0.09423542022705078, "step": 2282 }, { "epoch": 0.3181216470424302, "grad_norm": 1.0550175905227661, "learning_rate": 8.358476850722007e-06, "loss": 0.076080322265625, "step": 2283 }, { "epoch": 0.31826099073364456, "grad_norm": 2.2589128017425537, "learning_rate": 8.356730507619526e-06, "loss": 0.1040191650390625, "step": 2284 }, { "epoch": 0.31840033442485893, "grad_norm": 1.4508429765701294, "learning_rate": 8.354983418728165e-06, "loss": 0.07288932800292969, "step": 2285 }, { "epoch": 0.3185396781160733, "grad_norm": 5.037619590759277, "learning_rate": 8.353235584436082e-06, "loss": 0.1318378448486328, "step": 2286 }, { "epoch": 0.3186790218072877, "grad_norm": 0.9276213049888611, "learning_rate": 8.351487005131606e-06, "loss": 0.07124137878417969, "step": 2287 }, { "epoch": 0.31881836549850207, "grad_norm": 0.993743360042572, "learning_rate": 8.349737681203234e-06, "loss": 0.07740974426269531, "step": 2288 }, { "epoch": 0.31895770918971644, "grad_norm": 1.3436405658721924, "learning_rate": 8.347987613039626e-06, "loss": 0.12079048156738281, "step": 2289 }, { "epoch": 0.3190970528809308, "grad_norm": 1.3502867221832275, "learning_rate": 8.346236801029605e-06, "loss": 0.09432220458984375, "step": 2290 }, { "epoch": 0.3192363965721452, "grad_norm": 2.289473533630371, "learning_rate": 8.344485245562165e-06, "loss": 0.1271686553955078, "step": 2291 }, { "epoch": 0.3193757402633596, "grad_norm": 1.7953654527664185, "learning_rate": 8.342732947026457e-06, "loss": 0.09188461303710938, "step": 2292 }, { "epoch": 0.31951508395457395, "grad_norm": 1.407926082611084, "learning_rate": 8.340979905811805e-06, "loss": 0.09052467346191406, "step": 2293 }, { "epoch": 0.31965442764578833, "grad_norm": 0.937024712562561, "learning_rate": 8.339226122307696e-06, "loss": 0.09366989135742188, "step": 2294 }, { "epoch": 0.3197937713370027, "grad_norm": 0.5435752272605896, "learning_rate": 8.337471596903774e-06, "loss": 0.07494926452636719, "step": 2295 }, { "epoch": 0.3199331150282171, "grad_norm": 0.5347227454185486, "learning_rate": 8.335716329989863e-06, "loss": 0.07395744323730469, "step": 2296 }, { "epoch": 0.32007245871943146, "grad_norm": 2.2027876377105713, "learning_rate": 8.333960321955937e-06, "loss": 0.10854911804199219, "step": 2297 }, { "epoch": 0.32021180241064584, "grad_norm": 0.9182535409927368, "learning_rate": 8.332203573192143e-06, "loss": 0.08582496643066406, "step": 2298 }, { "epoch": 0.3203511461018602, "grad_norm": 0.7218563556671143, "learning_rate": 8.330446084088791e-06, "loss": 0.06610298156738281, "step": 2299 }, { "epoch": 0.3204904897930746, "grad_norm": 1.8310301303863525, "learning_rate": 8.328687855036355e-06, "loss": 0.10867881774902344, "step": 2300 }, { "epoch": 0.320629833484289, "grad_norm": 0.6067085862159729, "learning_rate": 8.326928886425471e-06, "loss": 0.08932876586914062, "step": 2301 }, { "epoch": 0.3207691771755034, "grad_norm": 1.1077409982681274, "learning_rate": 8.325169178646946e-06, "loss": 0.11242866516113281, "step": 2302 }, { "epoch": 0.3209085208667178, "grad_norm": 0.9637612104415894, "learning_rate": 8.323408732091743e-06, "loss": 0.09601402282714844, "step": 2303 }, { "epoch": 0.32104786455793216, "grad_norm": 1.5341174602508545, "learning_rate": 8.321647547150995e-06, "loss": 0.11345863342285156, "step": 2304 }, { "epoch": 0.32118720824914654, "grad_norm": 1.4408862590789795, "learning_rate": 8.319885624215996e-06, "loss": 0.13578414916992188, "step": 2305 }, { "epoch": 0.3213265519403609, "grad_norm": 0.86015784740448, "learning_rate": 8.318122963678206e-06, "loss": 0.10184478759765625, "step": 2306 }, { "epoch": 0.3214658956315753, "grad_norm": 0.7313867807388306, "learning_rate": 8.316359565929248e-06, "loss": 0.07052040100097656, "step": 2307 }, { "epoch": 0.32160523932278967, "grad_norm": 0.8139681220054626, "learning_rate": 8.314595431360906e-06, "loss": 0.09360027313232422, "step": 2308 }, { "epoch": 0.32174458301400405, "grad_norm": 0.5926975607872009, "learning_rate": 8.312830560365136e-06, "loss": 0.06243705749511719, "step": 2309 }, { "epoch": 0.3218839267052184, "grad_norm": 1.5418304204940796, "learning_rate": 8.311064953334046e-06, "loss": 0.11491012573242188, "step": 2310 }, { "epoch": 0.3220232703964328, "grad_norm": 1.5727869272232056, "learning_rate": 8.309298610659917e-06, "loss": 0.11864185333251953, "step": 2311 }, { "epoch": 0.3221626140876472, "grad_norm": 1.1163078546524048, "learning_rate": 8.307531532735188e-06, "loss": 0.1070098876953125, "step": 2312 }, { "epoch": 0.32230195777886156, "grad_norm": 1.4789296388626099, "learning_rate": 8.305763719952467e-06, "loss": 0.10852813720703125, "step": 2313 }, { "epoch": 0.32244130147007594, "grad_norm": 1.1517186164855957, "learning_rate": 8.303995172704519e-06, "loss": 0.078521728515625, "step": 2314 }, { "epoch": 0.3225806451612903, "grad_norm": 1.65707266330719, "learning_rate": 8.302225891384275e-06, "loss": 0.1179351806640625, "step": 2315 }, { "epoch": 0.3227199888525047, "grad_norm": 0.9455122351646423, "learning_rate": 8.300455876384827e-06, "loss": 0.07680892944335938, "step": 2316 }, { "epoch": 0.32285933254371907, "grad_norm": 1.3108958005905151, "learning_rate": 8.298685128099437e-06, "loss": 0.12338066101074219, "step": 2317 }, { "epoch": 0.32299867623493345, "grad_norm": 1.219027042388916, "learning_rate": 8.29691364692152e-06, "loss": 0.10832977294921875, "step": 2318 }, { "epoch": 0.3231380199261478, "grad_norm": 1.76201593875885, "learning_rate": 8.29514143324466e-06, "loss": 0.10112380981445312, "step": 2319 }, { "epoch": 0.3232773636173622, "grad_norm": 1.6205987930297852, "learning_rate": 8.293368487462604e-06, "loss": 0.13890838623046875, "step": 2320 }, { "epoch": 0.3234167073085766, "grad_norm": 0.7915484309196472, "learning_rate": 8.29159480996926e-06, "loss": 0.08218574523925781, "step": 2321 }, { "epoch": 0.323556050999791, "grad_norm": 0.8732283711433411, "learning_rate": 8.289820401158695e-06, "loss": 0.07818222045898438, "step": 2322 }, { "epoch": 0.3236953946910054, "grad_norm": 1.2069909572601318, "learning_rate": 8.288045261425146e-06, "loss": 0.08641815185546875, "step": 2323 }, { "epoch": 0.32383473838221977, "grad_norm": 2.314866065979004, "learning_rate": 8.286269391163006e-06, "loss": 0.10660934448242188, "step": 2324 }, { "epoch": 0.32397408207343414, "grad_norm": 2.0038325786590576, "learning_rate": 8.284492790766835e-06, "loss": 0.1499347686767578, "step": 2325 }, { "epoch": 0.3241134257646485, "grad_norm": 0.7189550399780273, "learning_rate": 8.282715460631354e-06, "loss": 0.0966339111328125, "step": 2326 }, { "epoch": 0.3242527694558629, "grad_norm": 3.2636358737945557, "learning_rate": 8.280937401151441e-06, "loss": 0.14772796630859375, "step": 2327 }, { "epoch": 0.3243921131470773, "grad_norm": 5.4806365966796875, "learning_rate": 8.279158612722145e-06, "loss": 0.1761474609375, "step": 2328 }, { "epoch": 0.32453145683829165, "grad_norm": 1.0627187490463257, "learning_rate": 8.277379095738668e-06, "loss": 0.10427284240722656, "step": 2329 }, { "epoch": 0.32467080052950603, "grad_norm": 0.842446506023407, "learning_rate": 8.27559885059638e-06, "loss": 0.07635307312011719, "step": 2330 }, { "epoch": 0.3248101442207204, "grad_norm": 1.0982393026351929, "learning_rate": 8.273817877690809e-06, "loss": 0.13838577270507812, "step": 2331 }, { "epoch": 0.3249494879119348, "grad_norm": 0.5319634675979614, "learning_rate": 8.272036177417649e-06, "loss": 0.0644073486328125, "step": 2332 }, { "epoch": 0.32508883160314916, "grad_norm": 1.167734146118164, "learning_rate": 8.270253750172754e-06, "loss": 0.09275197982788086, "step": 2333 }, { "epoch": 0.32522817529436354, "grad_norm": 0.8825164437294006, "learning_rate": 8.268470596352134e-06, "loss": 0.07169246673583984, "step": 2334 }, { "epoch": 0.3253675189855779, "grad_norm": 2.074735164642334, "learning_rate": 8.26668671635197e-06, "loss": 0.10906314849853516, "step": 2335 }, { "epoch": 0.3255068626767923, "grad_norm": 2.003065586090088, "learning_rate": 8.264902110568598e-06, "loss": 0.16430282592773438, "step": 2336 }, { "epoch": 0.3256462063680067, "grad_norm": 0.7390332818031311, "learning_rate": 8.263116779398514e-06, "loss": 0.077850341796875, "step": 2337 }, { "epoch": 0.32578555005922105, "grad_norm": 1.1837952136993408, "learning_rate": 8.261330723238381e-06, "loss": 0.10950088500976562, "step": 2338 }, { "epoch": 0.32592489375043543, "grad_norm": 0.9951052069664001, "learning_rate": 8.25954394248502e-06, "loss": 0.09175395965576172, "step": 2339 }, { "epoch": 0.3260642374416498, "grad_norm": 1.136976718902588, "learning_rate": 8.25775643753541e-06, "loss": 0.09958267211914062, "step": 2340 }, { "epoch": 0.3262035811328642, "grad_norm": 1.766823172569275, "learning_rate": 8.255968208786694e-06, "loss": 0.11538887023925781, "step": 2341 }, { "epoch": 0.3263429248240786, "grad_norm": 0.6685525178909302, "learning_rate": 8.25417925663618e-06, "loss": 0.06981945037841797, "step": 2342 }, { "epoch": 0.326482268515293, "grad_norm": 0.6408440470695496, "learning_rate": 8.252389581481328e-06, "loss": 0.07071685791015625, "step": 2343 }, { "epoch": 0.3266216122065074, "grad_norm": 0.9807579517364502, "learning_rate": 8.250599183719763e-06, "loss": 0.10450363159179688, "step": 2344 }, { "epoch": 0.32676095589772175, "grad_norm": 1.852003574371338, "learning_rate": 8.248808063749273e-06, "loss": 0.10614395141601562, "step": 2345 }, { "epoch": 0.3269002995889361, "grad_norm": 0.9741390943527222, "learning_rate": 8.247016221967802e-06, "loss": 0.085235595703125, "step": 2346 }, { "epoch": 0.3270396432801505, "grad_norm": 1.2106738090515137, "learning_rate": 8.245223658773459e-06, "loss": 0.0996856689453125, "step": 2347 }, { "epoch": 0.3271789869713649, "grad_norm": 1.368910312652588, "learning_rate": 8.243430374564507e-06, "loss": 0.12779617309570312, "step": 2348 }, { "epoch": 0.32731833066257926, "grad_norm": 1.2403666973114014, "learning_rate": 8.241636369739376e-06, "loss": 0.09025764465332031, "step": 2349 }, { "epoch": 0.32745767435379364, "grad_norm": 0.949012279510498, "learning_rate": 8.23984164469665e-06, "loss": 0.12286567687988281, "step": 2350 }, { "epoch": 0.327597018045008, "grad_norm": 1.3460973501205444, "learning_rate": 8.23804619983508e-06, "loss": 0.10917282104492188, "step": 2351 }, { "epoch": 0.3277363617362224, "grad_norm": 0.8728182911872864, "learning_rate": 8.236250035553569e-06, "loss": 0.09606170654296875, "step": 2352 }, { "epoch": 0.32787570542743677, "grad_norm": 0.6961316466331482, "learning_rate": 8.234453152251183e-06, "loss": 0.07979965209960938, "step": 2353 }, { "epoch": 0.32801504911865115, "grad_norm": 1.0698652267456055, "learning_rate": 8.23265555032715e-06, "loss": 0.08638572692871094, "step": 2354 }, { "epoch": 0.3281543928098655, "grad_norm": 1.3018534183502197, "learning_rate": 8.23085723018086e-06, "loss": 0.11106681823730469, "step": 2355 }, { "epoch": 0.3282937365010799, "grad_norm": 1.2937822341918945, "learning_rate": 8.229058192211851e-06, "loss": 0.07712173461914062, "step": 2356 }, { "epoch": 0.3284330801922943, "grad_norm": 1.2300959825515747, "learning_rate": 8.227258436819836e-06, "loss": 0.11435699462890625, "step": 2357 }, { "epoch": 0.32857242388350866, "grad_norm": 0.6396428942680359, "learning_rate": 8.225457964404675e-06, "loss": 0.09202194213867188, "step": 2358 }, { "epoch": 0.32871176757472303, "grad_norm": 1.1068828105926514, "learning_rate": 8.223656775366393e-06, "loss": 0.07221031188964844, "step": 2359 }, { "epoch": 0.3288511112659374, "grad_norm": 1.3541756868362427, "learning_rate": 8.221854870105172e-06, "loss": 0.09602928161621094, "step": 2360 }, { "epoch": 0.3289904549571518, "grad_norm": 1.1349815130233765, "learning_rate": 8.220052249021356e-06, "loss": 0.08991813659667969, "step": 2361 }, { "epoch": 0.32912979864836617, "grad_norm": 0.6571307182312012, "learning_rate": 8.218248912515443e-06, "loss": 0.07701683044433594, "step": 2362 }, { "epoch": 0.3292691423395806, "grad_norm": 1.2477684020996094, "learning_rate": 8.216444860988098e-06, "loss": 0.13922691345214844, "step": 2363 }, { "epoch": 0.329408486030795, "grad_norm": 0.8892494440078735, "learning_rate": 8.214640094840136e-06, "loss": 0.09946823120117188, "step": 2364 }, { "epoch": 0.32954782972200936, "grad_norm": 0.6544662714004517, "learning_rate": 8.212834614472538e-06, "loss": 0.10059547424316406, "step": 2365 }, { "epoch": 0.32968717341322373, "grad_norm": 0.7340983152389526, "learning_rate": 8.211028420286437e-06, "loss": 0.10333251953125, "step": 2366 }, { "epoch": 0.3298265171044381, "grad_norm": 0.7970487475395203, "learning_rate": 8.209221512683132e-06, "loss": 0.0860452651977539, "step": 2367 }, { "epoch": 0.3299658607956525, "grad_norm": 1.7046566009521484, "learning_rate": 8.207413892064073e-06, "loss": 0.13075828552246094, "step": 2368 }, { "epoch": 0.33010520448686687, "grad_norm": 1.231590747833252, "learning_rate": 8.205605558830873e-06, "loss": 0.10387325286865234, "step": 2369 }, { "epoch": 0.33024454817808124, "grad_norm": 1.0876376628875732, "learning_rate": 8.203796513385307e-06, "loss": 0.11605644226074219, "step": 2370 }, { "epoch": 0.3303838918692956, "grad_norm": 0.7550297975540161, "learning_rate": 8.201986756129297e-06, "loss": 0.07375335693359375, "step": 2371 }, { "epoch": 0.33052323556051, "grad_norm": 1.0208951234817505, "learning_rate": 8.200176287464931e-06, "loss": 0.09639215469360352, "step": 2372 }, { "epoch": 0.3306625792517244, "grad_norm": 0.9713460803031921, "learning_rate": 8.198365107794457e-06, "loss": 0.09945106506347656, "step": 2373 }, { "epoch": 0.33080192294293875, "grad_norm": 0.5714378356933594, "learning_rate": 8.196553217520275e-06, "loss": 0.08745384216308594, "step": 2374 }, { "epoch": 0.33094126663415313, "grad_norm": 0.7693279981613159, "learning_rate": 8.194740617044948e-06, "loss": 0.08680152893066406, "step": 2375 }, { "epoch": 0.3310806103253675, "grad_norm": 0.3266012966632843, "learning_rate": 8.192927306771193e-06, "loss": 0.06812858581542969, "step": 2376 }, { "epoch": 0.3312199540165819, "grad_norm": 1.094542145729065, "learning_rate": 8.191113287101884e-06, "loss": 0.09924888610839844, "step": 2377 }, { "epoch": 0.33135929770779626, "grad_norm": 0.9656255841255188, "learning_rate": 8.18929855844006e-06, "loss": 0.11257553100585938, "step": 2378 }, { "epoch": 0.33149864139901064, "grad_norm": 0.7751079797744751, "learning_rate": 8.187483121188908e-06, "loss": 0.10182857513427734, "step": 2379 }, { "epoch": 0.331637985090225, "grad_norm": 0.9176552891731262, "learning_rate": 8.185666975751778e-06, "loss": 0.09132671356201172, "step": 2380 }, { "epoch": 0.3317773287814394, "grad_norm": 0.5585100650787354, "learning_rate": 8.183850122532174e-06, "loss": 0.06924629211425781, "step": 2381 }, { "epoch": 0.3319166724726538, "grad_norm": 0.7221525311470032, "learning_rate": 8.182032561933764e-06, "loss": 0.10475540161132812, "step": 2382 }, { "epoch": 0.3320560161638682, "grad_norm": 1.1006571054458618, "learning_rate": 8.180214294360365e-06, "loss": 0.09964179992675781, "step": 2383 }, { "epoch": 0.3321953598550826, "grad_norm": 0.710578203201294, "learning_rate": 8.178395320215953e-06, "loss": 0.08131790161132812, "step": 2384 }, { "epoch": 0.33233470354629696, "grad_norm": 1.4885505437850952, "learning_rate": 8.176575639904668e-06, "loss": 0.13362884521484375, "step": 2385 }, { "epoch": 0.33247404723751134, "grad_norm": 1.1376725435256958, "learning_rate": 8.174755253830797e-06, "loss": 0.1279430389404297, "step": 2386 }, { "epoch": 0.3326133909287257, "grad_norm": 1.8129782676696777, "learning_rate": 8.17293416239879e-06, "loss": 0.11075592041015625, "step": 2387 }, { "epoch": 0.3327527346199401, "grad_norm": 1.517320156097412, "learning_rate": 8.171112366013252e-06, "loss": 0.10706329345703125, "step": 2388 }, { "epoch": 0.33289207831115447, "grad_norm": 1.7780040502548218, "learning_rate": 8.169289865078942e-06, "loss": 0.13457870483398438, "step": 2389 }, { "epoch": 0.33303142200236885, "grad_norm": 2.1330151557922363, "learning_rate": 8.167466660000781e-06, "loss": 0.15411758422851562, "step": 2390 }, { "epoch": 0.3331707656935832, "grad_norm": 1.4089076519012451, "learning_rate": 8.165642751183844e-06, "loss": 0.09728431701660156, "step": 2391 }, { "epoch": 0.3333101093847976, "grad_norm": 1.0696840286254883, "learning_rate": 8.163818139033359e-06, "loss": 0.0783538818359375, "step": 2392 }, { "epoch": 0.333449453076012, "grad_norm": 0.4504489600658417, "learning_rate": 8.161992823954715e-06, "loss": 0.07872962951660156, "step": 2393 }, { "epoch": 0.33358879676722636, "grad_norm": 0.9479125738143921, "learning_rate": 8.160166806353455e-06, "loss": 0.10843467712402344, "step": 2394 }, { "epoch": 0.33372814045844074, "grad_norm": 0.9680537581443787, "learning_rate": 8.15834008663528e-06, "loss": 0.09654617309570312, "step": 2395 }, { "epoch": 0.3338674841496551, "grad_norm": 0.9921311140060425, "learning_rate": 8.156512665206043e-06, "loss": 0.08444786071777344, "step": 2396 }, { "epoch": 0.3340068278408695, "grad_norm": 0.706591784954071, "learning_rate": 8.154684542471754e-06, "loss": 0.07336997985839844, "step": 2397 }, { "epoch": 0.33414617153208387, "grad_norm": 0.8084640502929688, "learning_rate": 8.152855718838583e-06, "loss": 0.07286834716796875, "step": 2398 }, { "epoch": 0.33428551522329825, "grad_norm": 1.0533324480056763, "learning_rate": 8.151026194712854e-06, "loss": 0.1179046630859375, "step": 2399 }, { "epoch": 0.3344248589145126, "grad_norm": 0.8159856200218201, "learning_rate": 8.149195970501043e-06, "loss": 0.08368587493896484, "step": 2400 }, { "epoch": 0.334564202605727, "grad_norm": 1.2761001586914062, "learning_rate": 8.147365046609786e-06, "loss": 0.08119010925292969, "step": 2401 }, { "epoch": 0.3347035462969414, "grad_norm": 0.9095315933227539, "learning_rate": 8.145533423445869e-06, "loss": 0.09299564361572266, "step": 2402 }, { "epoch": 0.3348428899881558, "grad_norm": 0.995787501335144, "learning_rate": 8.14370110141624e-06, "loss": 0.08441162109375, "step": 2403 }, { "epoch": 0.3349822336793702, "grad_norm": 0.5393896102905273, "learning_rate": 8.141868080927998e-06, "loss": 0.07260894775390625, "step": 2404 }, { "epoch": 0.33512157737058457, "grad_norm": 1.4588011503219604, "learning_rate": 8.140034362388398e-06, "loss": 0.1286754608154297, "step": 2405 }, { "epoch": 0.33526092106179894, "grad_norm": 0.7411159873008728, "learning_rate": 8.13819994620485e-06, "loss": 0.11254501342773438, "step": 2406 }, { "epoch": 0.3354002647530133, "grad_norm": 0.8156509399414062, "learning_rate": 8.136364832784923e-06, "loss": 0.0999288558959961, "step": 2407 }, { "epoch": 0.3355396084442277, "grad_norm": 1.3798987865447998, "learning_rate": 8.134529022536332e-06, "loss": 0.11838245391845703, "step": 2408 }, { "epoch": 0.3356789521354421, "grad_norm": 0.5318801999092102, "learning_rate": 8.132692515866959e-06, "loss": 0.07513141632080078, "step": 2409 }, { "epoch": 0.33581829582665645, "grad_norm": 0.9373680949211121, "learning_rate": 8.130855313184824e-06, "loss": 0.1075897216796875, "step": 2410 }, { "epoch": 0.33595763951787083, "grad_norm": 0.45901334285736084, "learning_rate": 8.129017414898121e-06, "loss": 0.07619667053222656, "step": 2411 }, { "epoch": 0.3360969832090852, "grad_norm": 0.9437028169631958, "learning_rate": 8.127178821415183e-06, "loss": 0.10165786743164062, "step": 2412 }, { "epoch": 0.3362363269002996, "grad_norm": 1.0662530660629272, "learning_rate": 8.125339533144507e-06, "loss": 0.11285018920898438, "step": 2413 }, { "epoch": 0.33637567059151396, "grad_norm": 0.5067846179008484, "learning_rate": 8.123499550494737e-06, "loss": 0.06778144836425781, "step": 2414 }, { "epoch": 0.33651501428272834, "grad_norm": 0.7032023668289185, "learning_rate": 8.12165887387468e-06, "loss": 0.09105682373046875, "step": 2415 }, { "epoch": 0.3366543579739427, "grad_norm": 0.46985509991645813, "learning_rate": 8.11981750369329e-06, "loss": 0.07125091552734375, "step": 2416 }, { "epoch": 0.3367937016651571, "grad_norm": 0.8002470135688782, "learning_rate": 8.117975440359677e-06, "loss": 0.09110260009765625, "step": 2417 }, { "epoch": 0.3369330453563715, "grad_norm": 0.851895272731781, "learning_rate": 8.116132684283104e-06, "loss": 0.0949249267578125, "step": 2418 }, { "epoch": 0.33707238904758585, "grad_norm": 0.7637735605239868, "learning_rate": 8.114289235872993e-06, "loss": 0.07961082458496094, "step": 2419 }, { "epoch": 0.33721173273880023, "grad_norm": 0.926108717918396, "learning_rate": 8.112445095538915e-06, "loss": 0.09411811828613281, "step": 2420 }, { "epoch": 0.3373510764300146, "grad_norm": 0.6428762078285217, "learning_rate": 8.110600263690592e-06, "loss": 0.083526611328125, "step": 2421 }, { "epoch": 0.337490420121229, "grad_norm": 0.7781724333763123, "learning_rate": 8.10875474073791e-06, "loss": 0.10384368896484375, "step": 2422 }, { "epoch": 0.3376297638124434, "grad_norm": 2.4685182571411133, "learning_rate": 8.106908527090895e-06, "loss": 0.1287555694580078, "step": 2423 }, { "epoch": 0.3377691075036578, "grad_norm": 1.10919189453125, "learning_rate": 8.10506162315974e-06, "loss": 0.09116172790527344, "step": 2424 }, { "epoch": 0.3379084511948722, "grad_norm": 1.3187936544418335, "learning_rate": 8.103214029354783e-06, "loss": 0.12462234497070312, "step": 2425 }, { "epoch": 0.33804779488608655, "grad_norm": 0.7739033699035645, "learning_rate": 8.101365746086514e-06, "loss": 0.09320640563964844, "step": 2426 }, { "epoch": 0.3381871385773009, "grad_norm": 0.8636177778244019, "learning_rate": 8.099516773765581e-06, "loss": 0.09852790832519531, "step": 2427 }, { "epoch": 0.3383264822685153, "grad_norm": 0.9415655732154846, "learning_rate": 8.097667112802784e-06, "loss": 0.0818166732788086, "step": 2428 }, { "epoch": 0.3384658259597297, "grad_norm": 1.6239463090896606, "learning_rate": 8.095816763609077e-06, "loss": 0.10031890869140625, "step": 2429 }, { "epoch": 0.33860516965094406, "grad_norm": 0.9062644243240356, "learning_rate": 8.093965726595565e-06, "loss": 0.09276199340820312, "step": 2430 }, { "epoch": 0.33874451334215844, "grad_norm": 0.982742428779602, "learning_rate": 8.092114002173503e-06, "loss": 0.10213088989257812, "step": 2431 }, { "epoch": 0.3388838570333728, "grad_norm": 1.1410466432571411, "learning_rate": 8.090261590754304e-06, "loss": 0.1476764678955078, "step": 2432 }, { "epoch": 0.3390232007245872, "grad_norm": 1.3902695178985596, "learning_rate": 8.088408492749534e-06, "loss": 0.09833145141601562, "step": 2433 }, { "epoch": 0.33916254441580157, "grad_norm": 1.160666823387146, "learning_rate": 8.086554708570901e-06, "loss": 0.08595085144042969, "step": 2434 }, { "epoch": 0.33930188810701595, "grad_norm": 1.7885642051696777, "learning_rate": 8.084700238630283e-06, "loss": 0.13071060180664062, "step": 2435 }, { "epoch": 0.3394412317982303, "grad_norm": 1.0017808675765991, "learning_rate": 8.082845083339698e-06, "loss": 0.12786483764648438, "step": 2436 }, { "epoch": 0.3395805754894447, "grad_norm": 1.1850652694702148, "learning_rate": 8.080989243111315e-06, "loss": 0.10738182067871094, "step": 2437 }, { "epoch": 0.3397199191806591, "grad_norm": 0.8695725798606873, "learning_rate": 8.079132718357465e-06, "loss": 0.08945655822753906, "step": 2438 }, { "epoch": 0.33985926287187346, "grad_norm": 0.7992113828659058, "learning_rate": 8.07727550949062e-06, "loss": 0.08982086181640625, "step": 2439 }, { "epoch": 0.33999860656308784, "grad_norm": 0.6821958422660828, "learning_rate": 8.075417616923413e-06, "loss": 0.07019615173339844, "step": 2440 }, { "epoch": 0.3401379502543022, "grad_norm": 1.342294454574585, "learning_rate": 8.073559041068626e-06, "loss": 0.08301830291748047, "step": 2441 }, { "epoch": 0.3402772939455166, "grad_norm": 0.6230244636535645, "learning_rate": 8.071699782339188e-06, "loss": 0.08755111694335938, "step": 2442 }, { "epoch": 0.340416637636731, "grad_norm": 1.006669521331787, "learning_rate": 8.06983984114819e-06, "loss": 0.11635780334472656, "step": 2443 }, { "epoch": 0.3405559813279454, "grad_norm": 1.975887417793274, "learning_rate": 8.067979217908864e-06, "loss": 0.15198135375976562, "step": 2444 }, { "epoch": 0.3406953250191598, "grad_norm": 1.0712716579437256, "learning_rate": 8.066117913034597e-06, "loss": 0.11379814147949219, "step": 2445 }, { "epoch": 0.34083466871037416, "grad_norm": 1.8543787002563477, "learning_rate": 8.06425592693893e-06, "loss": 0.10609245300292969, "step": 2446 }, { "epoch": 0.34097401240158853, "grad_norm": 1.2131563425064087, "learning_rate": 8.062393260035557e-06, "loss": 0.12449264526367188, "step": 2447 }, { "epoch": 0.3411133560928029, "grad_norm": 1.223634123802185, "learning_rate": 8.060529912738316e-06, "loss": 0.10676765441894531, "step": 2448 }, { "epoch": 0.3412526997840173, "grad_norm": 4.145670413970947, "learning_rate": 8.058665885461201e-06, "loss": 0.13136672973632812, "step": 2449 }, { "epoch": 0.34139204347523167, "grad_norm": 2.31964111328125, "learning_rate": 8.056801178618357e-06, "loss": 0.10746002197265625, "step": 2450 }, { "epoch": 0.34153138716644604, "grad_norm": 1.2100411653518677, "learning_rate": 8.05493579262408e-06, "loss": 0.08892631530761719, "step": 2451 }, { "epoch": 0.3416707308576604, "grad_norm": 1.385391116142273, "learning_rate": 8.053069727892813e-06, "loss": 0.13339996337890625, "step": 2452 }, { "epoch": 0.3418100745488748, "grad_norm": 1.0154677629470825, "learning_rate": 8.051202984839157e-06, "loss": 0.09896278381347656, "step": 2453 }, { "epoch": 0.3419494182400892, "grad_norm": 0.9385907649993896, "learning_rate": 8.049335563877858e-06, "loss": 0.10460567474365234, "step": 2454 }, { "epoch": 0.34208876193130355, "grad_norm": 0.723159670829773, "learning_rate": 8.047467465423813e-06, "loss": 0.08362197875976562, "step": 2455 }, { "epoch": 0.34222810562251793, "grad_norm": 1.9781486988067627, "learning_rate": 8.045598689892072e-06, "loss": 0.11586380004882812, "step": 2456 }, { "epoch": 0.3423674493137323, "grad_norm": 0.6241307854652405, "learning_rate": 8.043729237697835e-06, "loss": 0.10245800018310547, "step": 2457 }, { "epoch": 0.3425067930049467, "grad_norm": 0.33451664447784424, "learning_rate": 8.041859109256452e-06, "loss": 0.0550689697265625, "step": 2458 }, { "epoch": 0.34264613669616106, "grad_norm": 1.1797336339950562, "learning_rate": 8.03998830498342e-06, "loss": 0.11465072631835938, "step": 2459 }, { "epoch": 0.34278548038737544, "grad_norm": 1.958740234375, "learning_rate": 8.038116825294393e-06, "loss": 0.12375259399414062, "step": 2460 }, { "epoch": 0.3429248240785898, "grad_norm": 1.155096411705017, "learning_rate": 8.036244670605166e-06, "loss": 0.09614372253417969, "step": 2461 }, { "epoch": 0.3430641677698042, "grad_norm": 0.9067709445953369, "learning_rate": 8.034371841331693e-06, "loss": 0.1338958740234375, "step": 2462 }, { "epoch": 0.34320351146101863, "grad_norm": 1.6193598508834839, "learning_rate": 8.032498337890073e-06, "loss": 0.11301994323730469, "step": 2463 }, { "epoch": 0.343342855152233, "grad_norm": 1.2711126804351807, "learning_rate": 8.030624160696554e-06, "loss": 0.13879776000976562, "step": 2464 }, { "epoch": 0.3434821988434474, "grad_norm": 0.8150821924209595, "learning_rate": 8.02874931016754e-06, "loss": 0.08673810958862305, "step": 2465 }, { "epoch": 0.34362154253466176, "grad_norm": 0.6963856816291809, "learning_rate": 8.026873786719574e-06, "loss": 0.08222389221191406, "step": 2466 }, { "epoch": 0.34376088622587614, "grad_norm": 0.7507519125938416, "learning_rate": 8.024997590769359e-06, "loss": 0.08344650268554688, "step": 2467 }, { "epoch": 0.3439002299170905, "grad_norm": 3.05126953125, "learning_rate": 8.02312072273374e-06, "loss": 0.1558094024658203, "step": 2468 }, { "epoch": 0.3440395736083049, "grad_norm": 0.7051288485527039, "learning_rate": 8.021243183029715e-06, "loss": 0.09685420989990234, "step": 2469 }, { "epoch": 0.34417891729951927, "grad_norm": 0.895697832107544, "learning_rate": 8.019364972074432e-06, "loss": 0.08056068420410156, "step": 2470 }, { "epoch": 0.34431826099073365, "grad_norm": 0.7940415740013123, "learning_rate": 8.017486090285185e-06, "loss": 0.07422828674316406, "step": 2471 }, { "epoch": 0.344457604681948, "grad_norm": 0.9006580710411072, "learning_rate": 8.01560653807942e-06, "loss": 0.08166313171386719, "step": 2472 }, { "epoch": 0.3445969483731624, "grad_norm": 0.9526870846748352, "learning_rate": 8.013726315874729e-06, "loss": 0.08595085144042969, "step": 2473 }, { "epoch": 0.3447362920643768, "grad_norm": 0.9055771827697754, "learning_rate": 8.011845424088856e-06, "loss": 0.10300827026367188, "step": 2474 }, { "epoch": 0.34487563575559116, "grad_norm": 0.7730977535247803, "learning_rate": 8.009963863139689e-06, "loss": 0.08330535888671875, "step": 2475 }, { "epoch": 0.34501497944680554, "grad_norm": 0.8256775736808777, "learning_rate": 8.008081633445272e-06, "loss": 0.12973785400390625, "step": 2476 }, { "epoch": 0.3451543231380199, "grad_norm": 0.5701788663864136, "learning_rate": 8.00619873542379e-06, "loss": 0.06934833526611328, "step": 2477 }, { "epoch": 0.3452936668292343, "grad_norm": 1.4844738245010376, "learning_rate": 8.004315169493586e-06, "loss": 0.08999156951904297, "step": 2478 }, { "epoch": 0.34543301052044867, "grad_norm": 0.6253215670585632, "learning_rate": 8.002430936073137e-06, "loss": 0.07123756408691406, "step": 2479 }, { "epoch": 0.34557235421166305, "grad_norm": 0.941979169845581, "learning_rate": 8.000546035581083e-06, "loss": 0.10882568359375, "step": 2480 }, { "epoch": 0.3457116979028774, "grad_norm": 0.8493534922599792, "learning_rate": 7.998660468436202e-06, "loss": 0.08840751647949219, "step": 2481 }, { "epoch": 0.3458510415940918, "grad_norm": 1.200730562210083, "learning_rate": 7.996774235057425e-06, "loss": 0.11445045471191406, "step": 2482 }, { "epoch": 0.34599038528530623, "grad_norm": 0.8593319654464722, "learning_rate": 7.994887335863832e-06, "loss": 0.079925537109375, "step": 2483 }, { "epoch": 0.3461297289765206, "grad_norm": 0.5412774682044983, "learning_rate": 7.992999771274646e-06, "loss": 0.07991409301757812, "step": 2484 }, { "epoch": 0.346269072667735, "grad_norm": 0.9766780734062195, "learning_rate": 7.991111541709244e-06, "loss": 0.08479690551757812, "step": 2485 }, { "epoch": 0.34640841635894937, "grad_norm": 2.033601760864258, "learning_rate": 7.989222647587146e-06, "loss": 0.1016693115234375, "step": 2486 }, { "epoch": 0.34654776005016374, "grad_norm": 1.7602322101593018, "learning_rate": 7.987333089328018e-06, "loss": 0.11165428161621094, "step": 2487 }, { "epoch": 0.3466871037413781, "grad_norm": 0.7133159637451172, "learning_rate": 7.985442867351682e-06, "loss": 0.07168102264404297, "step": 2488 }, { "epoch": 0.3468264474325925, "grad_norm": 0.640943706035614, "learning_rate": 7.983551982078097e-06, "loss": 0.08912849426269531, "step": 2489 }, { "epoch": 0.3469657911238069, "grad_norm": 0.7554327249526978, "learning_rate": 7.98166043392738e-06, "loss": 0.09101295471191406, "step": 2490 }, { "epoch": 0.34710513481502125, "grad_norm": 0.7908673882484436, "learning_rate": 7.979768223319786e-06, "loss": 0.10592269897460938, "step": 2491 }, { "epoch": 0.34724447850623563, "grad_norm": 0.6691848635673523, "learning_rate": 7.977875350675721e-06, "loss": 0.07925033569335938, "step": 2492 }, { "epoch": 0.34738382219745, "grad_norm": 1.2787809371948242, "learning_rate": 7.975981816415741e-06, "loss": 0.10398197174072266, "step": 2493 }, { "epoch": 0.3475231658886644, "grad_norm": 1.3524993658065796, "learning_rate": 7.974087620960543e-06, "loss": 0.11260986328125, "step": 2494 }, { "epoch": 0.34766250957987876, "grad_norm": 3.6515073776245117, "learning_rate": 7.972192764730975e-06, "loss": 0.16890335083007812, "step": 2495 }, { "epoch": 0.34780185327109314, "grad_norm": 1.1265252828598022, "learning_rate": 7.970297248148033e-06, "loss": 0.13086318969726562, "step": 2496 }, { "epoch": 0.3479411969623075, "grad_norm": 0.6538437008857727, "learning_rate": 7.968401071632854e-06, "loss": 0.07283592224121094, "step": 2497 }, { "epoch": 0.3480805406535219, "grad_norm": 1.4303388595581055, "learning_rate": 7.966504235606726e-06, "loss": 0.11602401733398438, "step": 2498 }, { "epoch": 0.3482198843447363, "grad_norm": 1.083965539932251, "learning_rate": 7.964606740491085e-06, "loss": 0.09336280822753906, "step": 2499 }, { "epoch": 0.34835922803595065, "grad_norm": 0.8597456812858582, "learning_rate": 7.962708586707508e-06, "loss": 0.0805044174194336, "step": 2500 }, { "epoch": 0.34849857172716503, "grad_norm": 1.1187070608139038, "learning_rate": 7.960809774677722e-06, "loss": 0.10509490966796875, "step": 2501 }, { "epoch": 0.3486379154183794, "grad_norm": 0.7073162794113159, "learning_rate": 7.958910304823603e-06, "loss": 0.10370063781738281, "step": 2502 }, { "epoch": 0.34877725910959384, "grad_norm": 0.8511651754379272, "learning_rate": 7.957010177567167e-06, "loss": 0.10826683044433594, "step": 2503 }, { "epoch": 0.3489166028008082, "grad_norm": 0.8635648488998413, "learning_rate": 7.955109393330577e-06, "loss": 0.09896659851074219, "step": 2504 }, { "epoch": 0.3490559464920226, "grad_norm": 0.8159201741218567, "learning_rate": 7.953207952536147e-06, "loss": 0.09103012084960938, "step": 2505 }, { "epoch": 0.349195290183237, "grad_norm": 0.3875655233860016, "learning_rate": 7.951305855606333e-06, "loss": 0.06818962097167969, "step": 2506 }, { "epoch": 0.34933463387445135, "grad_norm": 0.9823933839797974, "learning_rate": 7.949403102963738e-06, "loss": 0.12135505676269531, "step": 2507 }, { "epoch": 0.3494739775656657, "grad_norm": 0.7094467878341675, "learning_rate": 7.947499695031108e-06, "loss": 0.08048629760742188, "step": 2508 }, { "epoch": 0.3496133212568801, "grad_norm": 0.7495715618133545, "learning_rate": 7.94559563223134e-06, "loss": 0.08504295349121094, "step": 2509 }, { "epoch": 0.3497526649480945, "grad_norm": 0.7869492769241333, "learning_rate": 7.943690914987472e-06, "loss": 0.08808517456054688, "step": 2510 }, { "epoch": 0.34989200863930886, "grad_norm": 1.1454601287841797, "learning_rate": 7.941785543722686e-06, "loss": 0.12071800231933594, "step": 2511 }, { "epoch": 0.35003135233052324, "grad_norm": 1.345692753791809, "learning_rate": 7.939879518860316e-06, "loss": 0.1013946533203125, "step": 2512 }, { "epoch": 0.3501706960217376, "grad_norm": 1.1145281791687012, "learning_rate": 7.937972840823836e-06, "loss": 0.10800552368164062, "step": 2513 }, { "epoch": 0.350310039712952, "grad_norm": 1.98015296459198, "learning_rate": 7.936065510036863e-06, "loss": 0.18168258666992188, "step": 2514 }, { "epoch": 0.35044938340416637, "grad_norm": 1.0327298641204834, "learning_rate": 7.934157526923167e-06, "loss": 0.08695602416992188, "step": 2515 }, { "epoch": 0.35058872709538075, "grad_norm": 0.4441479742527008, "learning_rate": 7.932248891906657e-06, "loss": 0.06367111206054688, "step": 2516 }, { "epoch": 0.3507280707865951, "grad_norm": 1.1828726530075073, "learning_rate": 7.930339605411387e-06, "loss": 0.10540580749511719, "step": 2517 }, { "epoch": 0.3508674144778095, "grad_norm": 1.9980117082595825, "learning_rate": 7.92842966786156e-06, "loss": 0.1156005859375, "step": 2518 }, { "epoch": 0.3510067581690239, "grad_norm": 1.0760133266448975, "learning_rate": 7.926519079681514e-06, "loss": 0.08944320678710938, "step": 2519 }, { "epoch": 0.35114610186023826, "grad_norm": 3.0053999423980713, "learning_rate": 7.924607841295744e-06, "loss": 0.15419769287109375, "step": 2520 }, { "epoch": 0.35128544555145264, "grad_norm": 0.8679763078689575, "learning_rate": 7.92269595312888e-06, "loss": 0.06687355041503906, "step": 2521 }, { "epoch": 0.351424789242667, "grad_norm": 1.052032470703125, "learning_rate": 7.920783415605703e-06, "loss": 0.09728240966796875, "step": 2522 }, { "epoch": 0.35156413293388145, "grad_norm": 1.257395625114441, "learning_rate": 7.918870229151134e-06, "loss": 0.10284805297851562, "step": 2523 }, { "epoch": 0.3517034766250958, "grad_norm": 0.6781352758407593, "learning_rate": 7.916956394190238e-06, "loss": 0.08642053604125977, "step": 2524 }, { "epoch": 0.3518428203163102, "grad_norm": 0.5075668692588806, "learning_rate": 7.915041911148229e-06, "loss": 0.07611083984375, "step": 2525 }, { "epoch": 0.3519821640075246, "grad_norm": 0.8846243023872375, "learning_rate": 7.913126780450455e-06, "loss": 0.09262847900390625, "step": 2526 }, { "epoch": 0.35212150769873896, "grad_norm": 1.154899001121521, "learning_rate": 7.911211002522422e-06, "loss": 0.09116363525390625, "step": 2527 }, { "epoch": 0.35226085138995333, "grad_norm": 0.6747080683708191, "learning_rate": 7.909294577789765e-06, "loss": 0.07762908935546875, "step": 2528 }, { "epoch": 0.3524001950811677, "grad_norm": 0.3118712604045868, "learning_rate": 7.907377506678274e-06, "loss": 0.05618572235107422, "step": 2529 }, { "epoch": 0.3525395387723821, "grad_norm": 1.581920862197876, "learning_rate": 7.905459789613878e-06, "loss": 0.09559822082519531, "step": 2530 }, { "epoch": 0.35267888246359647, "grad_norm": 1.6659706830978394, "learning_rate": 7.90354142702265e-06, "loss": 0.09758281707763672, "step": 2531 }, { "epoch": 0.35281822615481084, "grad_norm": 1.3690423965454102, "learning_rate": 7.901622419330805e-06, "loss": 0.0949850082397461, "step": 2532 }, { "epoch": 0.3529575698460252, "grad_norm": 0.4849301278591156, "learning_rate": 7.899702766964705e-06, "loss": 0.0801544189453125, "step": 2533 }, { "epoch": 0.3530969135372396, "grad_norm": 2.0833144187927246, "learning_rate": 7.89778247035085e-06, "loss": 0.0959930419921875, "step": 2534 }, { "epoch": 0.353236257228454, "grad_norm": 1.257278561592102, "learning_rate": 7.895861529915889e-06, "loss": 0.10481739044189453, "step": 2535 }, { "epoch": 0.35337560091966835, "grad_norm": 1.113379955291748, "learning_rate": 7.893939946086609e-06, "loss": 0.08855438232421875, "step": 2536 }, { "epoch": 0.35351494461088273, "grad_norm": 0.8385326266288757, "learning_rate": 7.892017719289941e-06, "loss": 0.08106040954589844, "step": 2537 }, { "epoch": 0.3536542883020971, "grad_norm": 0.9846506118774414, "learning_rate": 7.890094849952964e-06, "loss": 0.08665084838867188, "step": 2538 }, { "epoch": 0.3537936319933115, "grad_norm": 0.9594300985336304, "learning_rate": 7.888171338502893e-06, "loss": 0.10571670532226562, "step": 2539 }, { "epoch": 0.35393297568452586, "grad_norm": 0.8448732495307922, "learning_rate": 7.886247185367088e-06, "loss": 0.11776351928710938, "step": 2540 }, { "epoch": 0.35407231937574024, "grad_norm": 0.7302458882331848, "learning_rate": 7.884322390973053e-06, "loss": 0.07789039611816406, "step": 2541 }, { "epoch": 0.3542116630669546, "grad_norm": 0.9344356060028076, "learning_rate": 7.882396955748432e-06, "loss": 0.08267879486083984, "step": 2542 }, { "epoch": 0.35435100675816905, "grad_norm": 0.6471397280693054, "learning_rate": 7.880470880121015e-06, "loss": 0.07425498962402344, "step": 2543 }, { "epoch": 0.35449035044938343, "grad_norm": 1.3299769163131714, "learning_rate": 7.878544164518731e-06, "loss": 0.09575653076171875, "step": 2544 }, { "epoch": 0.3546296941405978, "grad_norm": 0.6842541098594666, "learning_rate": 7.87661680936965e-06, "loss": 0.07715320587158203, "step": 2545 }, { "epoch": 0.3547690378318122, "grad_norm": 1.1273325681686401, "learning_rate": 7.87468881510199e-06, "loss": 0.11245155334472656, "step": 2546 }, { "epoch": 0.35490838152302656, "grad_norm": 1.27675461769104, "learning_rate": 7.872760182144104e-06, "loss": 0.11237812042236328, "step": 2547 }, { "epoch": 0.35504772521424094, "grad_norm": 0.964769184589386, "learning_rate": 7.870830910924491e-06, "loss": 0.08182907104492188, "step": 2548 }, { "epoch": 0.3551870689054553, "grad_norm": 0.85909104347229, "learning_rate": 7.868901001871797e-06, "loss": 0.09904861450195312, "step": 2549 }, { "epoch": 0.3553264125966697, "grad_norm": 0.6734774708747864, "learning_rate": 7.866970455414793e-06, "loss": 0.107757568359375, "step": 2550 }, { "epoch": 0.35546575628788407, "grad_norm": 0.5159003734588623, "learning_rate": 7.86503927198241e-06, "loss": 0.06509590148925781, "step": 2551 }, { "epoch": 0.35560509997909845, "grad_norm": 0.9498351216316223, "learning_rate": 7.863107452003711e-06, "loss": 0.0932464599609375, "step": 2552 }, { "epoch": 0.3557444436703128, "grad_norm": 2.149906873703003, "learning_rate": 7.861174995907901e-06, "loss": 0.11307525634765625, "step": 2553 }, { "epoch": 0.3558837873615272, "grad_norm": 2.1189825534820557, "learning_rate": 7.85924190412433e-06, "loss": 0.11194229125976562, "step": 2554 }, { "epoch": 0.3560231310527416, "grad_norm": 1.6502957344055176, "learning_rate": 7.857308177082484e-06, "loss": 0.17005157470703125, "step": 2555 }, { "epoch": 0.35616247474395596, "grad_norm": 1.1179805994033813, "learning_rate": 7.855373815211995e-06, "loss": 0.0680856704711914, "step": 2556 }, { "epoch": 0.35630181843517034, "grad_norm": 0.5389050245285034, "learning_rate": 7.853438818942633e-06, "loss": 0.06667137145996094, "step": 2557 }, { "epoch": 0.3564411621263847, "grad_norm": 0.8081263899803162, "learning_rate": 7.851503188704312e-06, "loss": 0.12371110916137695, "step": 2558 }, { "epoch": 0.3565805058175991, "grad_norm": 1.7252013683319092, "learning_rate": 7.849566924927082e-06, "loss": 0.12300872802734375, "step": 2559 }, { "epoch": 0.35671984950881347, "grad_norm": 2.178910970687866, "learning_rate": 7.84763002804114e-06, "loss": 0.09327316284179688, "step": 2560 }, { "epoch": 0.35685919320002785, "grad_norm": 1.2993861436843872, "learning_rate": 7.845692498476816e-06, "loss": 0.10591888427734375, "step": 2561 }, { "epoch": 0.3569985368912422, "grad_norm": 0.8043264746665955, "learning_rate": 7.843754336664589e-06, "loss": 0.10063934326171875, "step": 2562 }, { "epoch": 0.3571378805824566, "grad_norm": 0.8988452553749084, "learning_rate": 7.84181554303507e-06, "loss": 0.10122156143188477, "step": 2563 }, { "epoch": 0.35727722427367103, "grad_norm": 1.3973681926727295, "learning_rate": 7.839876118019019e-06, "loss": 0.11832809448242188, "step": 2564 }, { "epoch": 0.3574165679648854, "grad_norm": 0.4164997637271881, "learning_rate": 7.837936062047329e-06, "loss": 0.08063125610351562, "step": 2565 }, { "epoch": 0.3575559116560998, "grad_norm": 1.3505971431732178, "learning_rate": 7.835995375551038e-06, "loss": 0.10748863220214844, "step": 2566 }, { "epoch": 0.35769525534731417, "grad_norm": 1.2846074104309082, "learning_rate": 7.83405405896132e-06, "loss": 0.09005165100097656, "step": 2567 }, { "epoch": 0.35783459903852854, "grad_norm": 0.37714189291000366, "learning_rate": 7.832112112709496e-06, "loss": 0.07845687866210938, "step": 2568 }, { "epoch": 0.3579739427297429, "grad_norm": 0.7663902640342712, "learning_rate": 7.830169537227015e-06, "loss": 0.06302261352539062, "step": 2569 }, { "epoch": 0.3581132864209573, "grad_norm": 1.8549461364746094, "learning_rate": 7.828226332945479e-06, "loss": 0.10482978820800781, "step": 2570 }, { "epoch": 0.3582526301121717, "grad_norm": 1.623060941696167, "learning_rate": 7.82628250029662e-06, "loss": 0.13002395629882812, "step": 2571 }, { "epoch": 0.35839197380338605, "grad_norm": 1.2297393083572388, "learning_rate": 7.824338039712316e-06, "loss": 0.15045166015625, "step": 2572 }, { "epoch": 0.35853131749460043, "grad_norm": 1.2039755582809448, "learning_rate": 7.82239295162458e-06, "loss": 0.09059906005859375, "step": 2573 }, { "epoch": 0.3586706611858148, "grad_norm": 1.7184832096099854, "learning_rate": 7.820447236465565e-06, "loss": 0.14076805114746094, "step": 2574 }, { "epoch": 0.3588100048770292, "grad_norm": 1.6395987272262573, "learning_rate": 7.818500894667566e-06, "loss": 0.15004730224609375, "step": 2575 }, { "epoch": 0.35894934856824356, "grad_norm": 1.2162929773330688, "learning_rate": 7.816553926663018e-06, "loss": 0.10434722900390625, "step": 2576 }, { "epoch": 0.35908869225945794, "grad_norm": 1.578371286392212, "learning_rate": 7.81460633288449e-06, "loss": 0.08609771728515625, "step": 2577 }, { "epoch": 0.3592280359506723, "grad_norm": 1.0303527116775513, "learning_rate": 7.812658113764691e-06, "loss": 0.10982131958007812, "step": 2578 }, { "epoch": 0.3593673796418867, "grad_norm": 1.305757999420166, "learning_rate": 7.810709269736476e-06, "loss": 0.10159683227539062, "step": 2579 }, { "epoch": 0.3595067233331011, "grad_norm": 0.9829119443893433, "learning_rate": 7.808759801232829e-06, "loss": 0.10290050506591797, "step": 2580 }, { "epoch": 0.35964606702431545, "grad_norm": 0.9621319770812988, "learning_rate": 7.80680970868688e-06, "loss": 0.08416557312011719, "step": 2581 }, { "epoch": 0.35978541071552983, "grad_norm": 1.8013635873794556, "learning_rate": 7.804858992531893e-06, "loss": 0.1238555908203125, "step": 2582 }, { "epoch": 0.3599247544067442, "grad_norm": 0.8679465055465698, "learning_rate": 7.802907653201275e-06, "loss": 0.08379077911376953, "step": 2583 }, { "epoch": 0.36006409809795864, "grad_norm": 1.0342127084732056, "learning_rate": 7.800955691128568e-06, "loss": 0.09406661987304688, "step": 2584 }, { "epoch": 0.360203441789173, "grad_norm": 1.1303013563156128, "learning_rate": 7.799003106747453e-06, "loss": 0.1775684356689453, "step": 2585 }, { "epoch": 0.3603427854803874, "grad_norm": 1.3834038972854614, "learning_rate": 7.79704990049175e-06, "loss": 0.10587120056152344, "step": 2586 }, { "epoch": 0.3604821291716018, "grad_norm": 1.3045299053192139, "learning_rate": 7.795096072795418e-06, "loss": 0.07615280151367188, "step": 2587 }, { "epoch": 0.36062147286281615, "grad_norm": 0.9153901934623718, "learning_rate": 7.793141624092551e-06, "loss": 0.07862472534179688, "step": 2588 }, { "epoch": 0.36076081655403053, "grad_norm": 0.9569130539894104, "learning_rate": 7.791186554817383e-06, "loss": 0.09778213500976562, "step": 2589 }, { "epoch": 0.3609001602452449, "grad_norm": 0.5944858193397522, "learning_rate": 7.789230865404287e-06, "loss": 0.07156944274902344, "step": 2590 }, { "epoch": 0.3610395039364593, "grad_norm": 1.0328375101089478, "learning_rate": 7.787274556287771e-06, "loss": 0.07726287841796875, "step": 2591 }, { "epoch": 0.36117884762767366, "grad_norm": 1.1205052137374878, "learning_rate": 7.785317627902484e-06, "loss": 0.1098337173461914, "step": 2592 }, { "epoch": 0.36131819131888804, "grad_norm": 0.6748964786529541, "learning_rate": 7.783360080683212e-06, "loss": 0.07024192810058594, "step": 2593 }, { "epoch": 0.3614575350101024, "grad_norm": 0.9788240194320679, "learning_rate": 7.781401915064873e-06, "loss": 0.0921478271484375, "step": 2594 }, { "epoch": 0.3615968787013168, "grad_norm": 0.8391798138618469, "learning_rate": 7.779443131482529e-06, "loss": 0.09285545349121094, "step": 2595 }, { "epoch": 0.36173622239253117, "grad_norm": 1.900832176208496, "learning_rate": 7.777483730371375e-06, "loss": 0.10034561157226562, "step": 2596 }, { "epoch": 0.36187556608374555, "grad_norm": 0.6644734740257263, "learning_rate": 7.77552371216675e-06, "loss": 0.09956932067871094, "step": 2597 }, { "epoch": 0.3620149097749599, "grad_norm": 0.981341540813446, "learning_rate": 7.773563077304123e-06, "loss": 0.08151531219482422, "step": 2598 }, { "epoch": 0.3621542534661743, "grad_norm": 0.2842082381248474, "learning_rate": 7.7716018262191e-06, "loss": 0.05117607116699219, "step": 2599 }, { "epoch": 0.3622935971573887, "grad_norm": 1.5985140800476074, "learning_rate": 7.769639959347428e-06, "loss": 0.12653160095214844, "step": 2600 }, { "epoch": 0.36243294084860306, "grad_norm": 0.947273313999176, "learning_rate": 7.767677477124988e-06, "loss": 0.09284591674804688, "step": 2601 }, { "epoch": 0.36257228453981744, "grad_norm": 1.400417685508728, "learning_rate": 7.765714379987804e-06, "loss": 0.11986446380615234, "step": 2602 }, { "epoch": 0.3627116282310318, "grad_norm": 1.7335790395736694, "learning_rate": 7.763750668372023e-06, "loss": 0.1332855224609375, "step": 2603 }, { "epoch": 0.36285097192224625, "grad_norm": 0.8849896192550659, "learning_rate": 7.761786342713941e-06, "loss": 0.08638572692871094, "step": 2604 }, { "epoch": 0.3629903156134606, "grad_norm": 1.1068960428237915, "learning_rate": 7.75982140344999e-06, "loss": 0.13786888122558594, "step": 2605 }, { "epoch": 0.363129659304675, "grad_norm": 0.652332603931427, "learning_rate": 7.757855851016727e-06, "loss": 0.07527828216552734, "step": 2606 }, { "epoch": 0.3632690029958894, "grad_norm": 1.0707674026489258, "learning_rate": 7.755889685850858e-06, "loss": 0.08463096618652344, "step": 2607 }, { "epoch": 0.36340834668710376, "grad_norm": 1.3505029678344727, "learning_rate": 7.75392290838922e-06, "loss": 0.12402153015136719, "step": 2608 }, { "epoch": 0.36354769037831813, "grad_norm": 1.1349002122879028, "learning_rate": 7.751955519068783e-06, "loss": 0.09587287902832031, "step": 2609 }, { "epoch": 0.3636870340695325, "grad_norm": 1.607585072517395, "learning_rate": 7.74998751832666e-06, "loss": 0.09595870971679688, "step": 2610 }, { "epoch": 0.3638263777607469, "grad_norm": 0.9698870778083801, "learning_rate": 7.748018906600092e-06, "loss": 0.10762405395507812, "step": 2611 }, { "epoch": 0.36396572145196127, "grad_norm": 0.9905197620391846, "learning_rate": 7.746049684326462e-06, "loss": 0.1064453125, "step": 2612 }, { "epoch": 0.36410506514317564, "grad_norm": 0.7759420275688171, "learning_rate": 7.744079851943286e-06, "loss": 0.09285163879394531, "step": 2613 }, { "epoch": 0.36424440883439, "grad_norm": 1.1738899946212769, "learning_rate": 7.742109409888213e-06, "loss": 0.09330177307128906, "step": 2614 }, { "epoch": 0.3643837525256044, "grad_norm": 0.42383286356925964, "learning_rate": 7.740138358599035e-06, "loss": 0.07508182525634766, "step": 2615 }, { "epoch": 0.3645230962168188, "grad_norm": 0.8585118651390076, "learning_rate": 7.73816669851367e-06, "loss": 0.09047508239746094, "step": 2616 }, { "epoch": 0.36466243990803315, "grad_norm": 1.0804872512817383, "learning_rate": 7.73619443007018e-06, "loss": 0.10293769836425781, "step": 2617 }, { "epoch": 0.36480178359924753, "grad_norm": 0.6532013416290283, "learning_rate": 7.734221553706756e-06, "loss": 0.08365249633789062, "step": 2618 }, { "epoch": 0.3649411272904619, "grad_norm": 0.6782917976379395, "learning_rate": 7.732248069861726e-06, "loss": 0.07615089416503906, "step": 2619 }, { "epoch": 0.3650804709816763, "grad_norm": 0.6773682832717896, "learning_rate": 7.730273978973552e-06, "loss": 0.08908462524414062, "step": 2620 }, { "epoch": 0.36521981467289066, "grad_norm": 1.0134902000427246, "learning_rate": 7.728299281480833e-06, "loss": 0.10834121704101562, "step": 2621 }, { "epoch": 0.36535915836410504, "grad_norm": 1.6635040044784546, "learning_rate": 7.726323977822304e-06, "loss": 0.12174320220947266, "step": 2622 }, { "epoch": 0.3654985020553194, "grad_norm": 0.45016294717788696, "learning_rate": 7.72434806843683e-06, "loss": 0.06529998779296875, "step": 2623 }, { "epoch": 0.36563784574653385, "grad_norm": 0.7585218548774719, "learning_rate": 7.72237155376341e-06, "loss": 0.0739593505859375, "step": 2624 }, { "epoch": 0.36577718943774823, "grad_norm": 0.9309367537498474, "learning_rate": 7.720394434241185e-06, "loss": 0.10880661010742188, "step": 2625 }, { "epoch": 0.3659165331289626, "grad_norm": 1.378732681274414, "learning_rate": 7.718416710309425e-06, "loss": 0.0998992919921875, "step": 2626 }, { "epoch": 0.366055876820177, "grad_norm": 1.4060934782028198, "learning_rate": 7.716438382407534e-06, "loss": 0.12285709381103516, "step": 2627 }, { "epoch": 0.36619522051139136, "grad_norm": 2.128500461578369, "learning_rate": 7.714459450975052e-06, "loss": 0.1453990936279297, "step": 2628 }, { "epoch": 0.36633456420260574, "grad_norm": 1.387202262878418, "learning_rate": 7.712479916451651e-06, "loss": 0.0895233154296875, "step": 2629 }, { "epoch": 0.3664739078938201, "grad_norm": 1.3430724143981934, "learning_rate": 7.710499779277141e-06, "loss": 0.0941009521484375, "step": 2630 }, { "epoch": 0.3666132515850345, "grad_norm": 0.8046894073486328, "learning_rate": 7.708519039891462e-06, "loss": 0.08544540405273438, "step": 2631 }, { "epoch": 0.36675259527624887, "grad_norm": 0.8685178160667419, "learning_rate": 7.70653769873469e-06, "loss": 0.09501266479492188, "step": 2632 }, { "epoch": 0.36689193896746325, "grad_norm": 0.997161865234375, "learning_rate": 7.70455575624703e-06, "loss": 0.1116943359375, "step": 2633 }, { "epoch": 0.3670312826586776, "grad_norm": 0.7455152273178101, "learning_rate": 7.702573212868827e-06, "loss": 0.12730026245117188, "step": 2634 }, { "epoch": 0.367170626349892, "grad_norm": 0.33344751596450806, "learning_rate": 7.70059006904056e-06, "loss": 0.055408477783203125, "step": 2635 }, { "epoch": 0.3673099700411064, "grad_norm": 2.263958215713501, "learning_rate": 7.698606325202832e-06, "loss": 0.1382007598876953, "step": 2636 }, { "epoch": 0.36744931373232076, "grad_norm": 1.0603986978530884, "learning_rate": 7.69662198179639e-06, "loss": 0.10074996948242188, "step": 2637 }, { "epoch": 0.36758865742353514, "grad_norm": 1.0386631488800049, "learning_rate": 7.694637039262109e-06, "loss": 0.11679458618164062, "step": 2638 }, { "epoch": 0.3677280011147495, "grad_norm": 0.7110656499862671, "learning_rate": 7.692651498040996e-06, "loss": 0.080230712890625, "step": 2639 }, { "epoch": 0.3678673448059639, "grad_norm": 0.9292808771133423, "learning_rate": 7.690665358574197e-06, "loss": 0.0961151123046875, "step": 2640 }, { "epoch": 0.36800668849717827, "grad_norm": 0.5771065950393677, "learning_rate": 7.688678621302981e-06, "loss": 0.07821369171142578, "step": 2641 }, { "epoch": 0.36814603218839265, "grad_norm": 0.8601033687591553, "learning_rate": 7.686691286668761e-06, "loss": 0.09186935424804688, "step": 2642 }, { "epoch": 0.368285375879607, "grad_norm": 1.3796544075012207, "learning_rate": 7.684703355113074e-06, "loss": 0.109832763671875, "step": 2643 }, { "epoch": 0.36842471957082146, "grad_norm": 0.623563826084137, "learning_rate": 7.682714827077595e-06, "loss": 0.06427955627441406, "step": 2644 }, { "epoch": 0.36856406326203583, "grad_norm": 0.8341049551963806, "learning_rate": 7.68072570300413e-06, "loss": 0.08236312866210938, "step": 2645 }, { "epoch": 0.3687034069532502, "grad_norm": 0.9715297818183899, "learning_rate": 7.678735983334615e-06, "loss": 0.09648513793945312, "step": 2646 }, { "epoch": 0.3688427506444646, "grad_norm": 0.6868723630905151, "learning_rate": 7.676745668511121e-06, "loss": 0.08786582946777344, "step": 2647 }, { "epoch": 0.36898209433567897, "grad_norm": 0.7465823292732239, "learning_rate": 7.67475475897585e-06, "loss": 0.0917825698852539, "step": 2648 }, { "epoch": 0.36912143802689334, "grad_norm": 0.8570188879966736, "learning_rate": 7.672763255171138e-06, "loss": 0.0966024398803711, "step": 2649 }, { "epoch": 0.3692607817181077, "grad_norm": 0.6561558246612549, "learning_rate": 7.67077115753945e-06, "loss": 0.08092212677001953, "step": 2650 }, { "epoch": 0.3694001254093221, "grad_norm": 0.8837922215461731, "learning_rate": 7.668778466523386e-06, "loss": 0.08923912048339844, "step": 2651 }, { "epoch": 0.3695394691005365, "grad_norm": 0.9425416588783264, "learning_rate": 7.666785182565676e-06, "loss": 0.08594703674316406, "step": 2652 }, { "epoch": 0.36967881279175085, "grad_norm": 0.7895795702934265, "learning_rate": 7.664791306109183e-06, "loss": 0.0997314453125, "step": 2653 }, { "epoch": 0.36981815648296523, "grad_norm": 0.8189703226089478, "learning_rate": 7.6627968375969e-06, "loss": 0.11336135864257812, "step": 2654 }, { "epoch": 0.3699575001741796, "grad_norm": 1.4393786191940308, "learning_rate": 7.660801777471951e-06, "loss": 0.14703369140625, "step": 2655 }, { "epoch": 0.370096843865394, "grad_norm": 0.4675108790397644, "learning_rate": 7.658806126177596e-06, "loss": 0.0822906494140625, "step": 2656 }, { "epoch": 0.37023618755660836, "grad_norm": 0.8737047910690308, "learning_rate": 7.65680988415722e-06, "loss": 0.1029052734375, "step": 2657 }, { "epoch": 0.37037553124782274, "grad_norm": 0.5724852681159973, "learning_rate": 7.654813051854345e-06, "loss": 0.06069183349609375, "step": 2658 }, { "epoch": 0.3705148749390371, "grad_norm": 1.6002198457717896, "learning_rate": 7.652815629712616e-06, "loss": 0.11154556274414062, "step": 2659 }, { "epoch": 0.3706542186302515, "grad_norm": 1.5330488681793213, "learning_rate": 7.650817618175824e-06, "loss": 0.1208038330078125, "step": 2660 }, { "epoch": 0.3707935623214659, "grad_norm": 0.46261534094810486, "learning_rate": 7.648819017687875e-06, "loss": 0.07334232330322266, "step": 2661 }, { "epoch": 0.37093290601268025, "grad_norm": 1.5377029180526733, "learning_rate": 7.646819828692813e-06, "loss": 0.08531379699707031, "step": 2662 }, { "epoch": 0.37107224970389463, "grad_norm": 0.9321056008338928, "learning_rate": 7.644820051634813e-06, "loss": 0.08771514892578125, "step": 2663 }, { "epoch": 0.37121159339510906, "grad_norm": 1.3717957735061646, "learning_rate": 7.64281968695818e-06, "loss": 0.11562728881835938, "step": 2664 }, { "epoch": 0.37135093708632344, "grad_norm": 0.8430754542350769, "learning_rate": 7.640818735107351e-06, "loss": 0.080902099609375, "step": 2665 }, { "epoch": 0.3714902807775378, "grad_norm": 1.0487089157104492, "learning_rate": 7.638817196526887e-06, "loss": 0.1336822509765625, "step": 2666 }, { "epoch": 0.3716296244687522, "grad_norm": 1.5999090671539307, "learning_rate": 7.636815071661488e-06, "loss": 0.0988311767578125, "step": 2667 }, { "epoch": 0.3717689681599666, "grad_norm": 0.6660745143890381, "learning_rate": 7.634812360955982e-06, "loss": 0.0925140380859375, "step": 2668 }, { "epoch": 0.37190831185118095, "grad_norm": 1.262190818786621, "learning_rate": 7.63280906485532e-06, "loss": 0.10933303833007812, "step": 2669 }, { "epoch": 0.37204765554239533, "grad_norm": 0.9789522290229797, "learning_rate": 7.630805183804593e-06, "loss": 0.0867462158203125, "step": 2670 }, { "epoch": 0.3721869992336097, "grad_norm": 1.2230029106140137, "learning_rate": 7.628800718249017e-06, "loss": 0.09474563598632812, "step": 2671 }, { "epoch": 0.3723263429248241, "grad_norm": 0.33032935857772827, "learning_rate": 7.626795668633938e-06, "loss": 0.06221771240234375, "step": 2672 }, { "epoch": 0.37246568661603846, "grad_norm": 2.102725028991699, "learning_rate": 7.624790035404831e-06, "loss": 0.09935760498046875, "step": 2673 }, { "epoch": 0.37260503030725284, "grad_norm": 0.8670265078544617, "learning_rate": 7.622783819007305e-06, "loss": 0.0786590576171875, "step": 2674 }, { "epoch": 0.3727443739984672, "grad_norm": 0.6316449046134949, "learning_rate": 7.620777019887091e-06, "loss": 0.09352493286132812, "step": 2675 }, { "epoch": 0.3728837176896816, "grad_norm": 0.8409988880157471, "learning_rate": 7.6187696384900585e-06, "loss": 0.09809303283691406, "step": 2676 }, { "epoch": 0.37302306138089597, "grad_norm": 2.1602933406829834, "learning_rate": 7.616761675262199e-06, "loss": 0.13886451721191406, "step": 2677 }, { "epoch": 0.37316240507211035, "grad_norm": 1.704835295677185, "learning_rate": 7.614753130649638e-06, "loss": 0.12755584716796875, "step": 2678 }, { "epoch": 0.3733017487633247, "grad_norm": 0.8761610984802246, "learning_rate": 7.612744005098625e-06, "loss": 0.07877445220947266, "step": 2679 }, { "epoch": 0.3734410924545391, "grad_norm": 0.45714524388313293, "learning_rate": 7.6107342990555466e-06, "loss": 0.06469345092773438, "step": 2680 }, { "epoch": 0.3735804361457535, "grad_norm": 0.9500712752342224, "learning_rate": 7.60872401296691e-06, "loss": 0.07209205627441406, "step": 2681 }, { "epoch": 0.37371977983696786, "grad_norm": 2.1427218914031982, "learning_rate": 7.606713147279356e-06, "loss": 0.11430549621582031, "step": 2682 }, { "epoch": 0.37385912352818224, "grad_norm": 1.5496026277542114, "learning_rate": 7.604701702439652e-06, "loss": 0.13257408142089844, "step": 2683 }, { "epoch": 0.37399846721939667, "grad_norm": 1.140069603919983, "learning_rate": 7.602689678894697e-06, "loss": 0.08836507797241211, "step": 2684 }, { "epoch": 0.37413781091061105, "grad_norm": 0.5229848027229309, "learning_rate": 7.6006770770915165e-06, "loss": 0.07672309875488281, "step": 2685 }, { "epoch": 0.3742771546018254, "grad_norm": 0.851206362247467, "learning_rate": 7.598663897477263e-06, "loss": 0.08873653411865234, "step": 2686 }, { "epoch": 0.3744164982930398, "grad_norm": 1.2564115524291992, "learning_rate": 7.59665014049922e-06, "loss": 0.11538124084472656, "step": 2687 }, { "epoch": 0.3745558419842542, "grad_norm": 1.3873119354248047, "learning_rate": 7.594635806604797e-06, "loss": 0.11807632446289062, "step": 2688 }, { "epoch": 0.37469518567546856, "grad_norm": 1.1367523670196533, "learning_rate": 7.592620896241536e-06, "loss": 0.07495689392089844, "step": 2689 }, { "epoch": 0.37483452936668293, "grad_norm": 1.2135981321334839, "learning_rate": 7.590605409857103e-06, "loss": 0.09852790832519531, "step": 2690 }, { "epoch": 0.3749738730578973, "grad_norm": 0.5426661968231201, "learning_rate": 7.58858934789929e-06, "loss": 0.09875106811523438, "step": 2691 }, { "epoch": 0.3751132167491117, "grad_norm": 1.6180075407028198, "learning_rate": 7.586572710816025e-06, "loss": 0.12324333190917969, "step": 2692 }, { "epoch": 0.37525256044032607, "grad_norm": 2.122147798538208, "learning_rate": 7.584555499055355e-06, "loss": 0.10907363891601562, "step": 2693 }, { "epoch": 0.37539190413154044, "grad_norm": 0.8908865451812744, "learning_rate": 7.58253771306546e-06, "loss": 0.10401153564453125, "step": 2694 }, { "epoch": 0.3755312478227548, "grad_norm": 0.7651374340057373, "learning_rate": 7.5805193532946445e-06, "loss": 0.07074737548828125, "step": 2695 }, { "epoch": 0.3756705915139692, "grad_norm": 1.7752858400344849, "learning_rate": 7.578500420191344e-06, "loss": 0.12226295471191406, "step": 2696 }, { "epoch": 0.3758099352051836, "grad_norm": 0.922805666923523, "learning_rate": 7.576480914204118e-06, "loss": 0.07891845703125, "step": 2697 }, { "epoch": 0.37594927889639795, "grad_norm": 1.704084873199463, "learning_rate": 7.574460835781654e-06, "loss": 0.09969902038574219, "step": 2698 }, { "epoch": 0.37608862258761233, "grad_norm": 0.6209998726844788, "learning_rate": 7.572440185372769e-06, "loss": 0.08217430114746094, "step": 2699 }, { "epoch": 0.3762279662788267, "grad_norm": 0.7250328660011292, "learning_rate": 7.570418963426405e-06, "loss": 0.08876228332519531, "step": 2700 }, { "epoch": 0.3763673099700411, "grad_norm": 1.3925824165344238, "learning_rate": 7.568397170391631e-06, "loss": 0.16161727905273438, "step": 2701 }, { "epoch": 0.37650665366125546, "grad_norm": 2.149243116378784, "learning_rate": 7.566374806717642e-06, "loss": 0.08498477935791016, "step": 2702 }, { "epoch": 0.37664599735246984, "grad_norm": 2.4570531845092773, "learning_rate": 7.564351872853763e-06, "loss": 0.11546516418457031, "step": 2703 }, { "epoch": 0.3767853410436843, "grad_norm": 0.9045449495315552, "learning_rate": 7.562328369249443e-06, "loss": 0.08735084533691406, "step": 2704 }, { "epoch": 0.37692468473489865, "grad_norm": 1.4899897575378418, "learning_rate": 7.560304296354259e-06, "loss": 0.10373306274414062, "step": 2705 }, { "epoch": 0.37706402842611303, "grad_norm": 0.8704450130462646, "learning_rate": 7.5582796546179125e-06, "loss": 0.09960746765136719, "step": 2706 }, { "epoch": 0.3772033721173274, "grad_norm": 1.0027211904525757, "learning_rate": 7.556254444490232e-06, "loss": 0.08464813232421875, "step": 2707 }, { "epoch": 0.3773427158085418, "grad_norm": 0.7640451192855835, "learning_rate": 7.554228666421176e-06, "loss": 0.09842872619628906, "step": 2708 }, { "epoch": 0.37748205949975616, "grad_norm": 0.6031020879745483, "learning_rate": 7.552202320860823e-06, "loss": 0.07926368713378906, "step": 2709 }, { "epoch": 0.37762140319097054, "grad_norm": 2.413330078125, "learning_rate": 7.550175408259383e-06, "loss": 0.12441825866699219, "step": 2710 }, { "epoch": 0.3777607468821849, "grad_norm": 1.4039173126220703, "learning_rate": 7.548147929067189e-06, "loss": 0.09520530700683594, "step": 2711 }, { "epoch": 0.3779000905733993, "grad_norm": 1.3648772239685059, "learning_rate": 7.546119883734699e-06, "loss": 0.10811138153076172, "step": 2712 }, { "epoch": 0.37803943426461367, "grad_norm": 1.7997660636901855, "learning_rate": 7.544091272712501e-06, "loss": 0.15881967544555664, "step": 2713 }, { "epoch": 0.37817877795582805, "grad_norm": 1.05487859249115, "learning_rate": 7.542062096451306e-06, "loss": 0.11419486999511719, "step": 2714 }, { "epoch": 0.3783181216470424, "grad_norm": 1.0135748386383057, "learning_rate": 7.540032355401948e-06, "loss": 0.07793807983398438, "step": 2715 }, { "epoch": 0.3784574653382568, "grad_norm": 1.6238149404525757, "learning_rate": 7.53800205001539e-06, "loss": 0.10307502746582031, "step": 2716 }, { "epoch": 0.3785968090294712, "grad_norm": 1.3936116695404053, "learning_rate": 7.53597118074272e-06, "loss": 0.09581184387207031, "step": 2717 }, { "epoch": 0.37873615272068556, "grad_norm": 0.8747954964637756, "learning_rate": 7.5339397480351525e-06, "loss": 0.10614013671875, "step": 2718 }, { "epoch": 0.37887549641189994, "grad_norm": 1.4901119470596313, "learning_rate": 7.531907752344023e-06, "loss": 0.11805915832519531, "step": 2719 }, { "epoch": 0.3790148401031143, "grad_norm": 1.9763730764389038, "learning_rate": 7.529875194120795e-06, "loss": 0.14119911193847656, "step": 2720 }, { "epoch": 0.3791541837943287, "grad_norm": 0.99687659740448, "learning_rate": 7.527842073817056e-06, "loss": 0.09323883056640625, "step": 2721 }, { "epoch": 0.37929352748554307, "grad_norm": 0.7172250151634216, "learning_rate": 7.525808391884521e-06, "loss": 0.07081794738769531, "step": 2722 }, { "epoch": 0.37943287117675745, "grad_norm": 1.0202380418777466, "learning_rate": 7.523774148775027e-06, "loss": 0.08795547485351562, "step": 2723 }, { "epoch": 0.3795722148679719, "grad_norm": 0.651542067527771, "learning_rate": 7.521739344940535e-06, "loss": 0.069488525390625, "step": 2724 }, { "epoch": 0.37971155855918626, "grad_norm": 1.1412200927734375, "learning_rate": 7.519703980833133e-06, "loss": 0.09177017211914062, "step": 2725 }, { "epoch": 0.37985090225040063, "grad_norm": 1.609694480895996, "learning_rate": 7.517668056905033e-06, "loss": 0.10470199584960938, "step": 2726 }, { "epoch": 0.379990245941615, "grad_norm": 1.3368706703186035, "learning_rate": 7.515631573608568e-06, "loss": 0.10950469970703125, "step": 2727 }, { "epoch": 0.3801295896328294, "grad_norm": 0.7091033458709717, "learning_rate": 7.513594531396202e-06, "loss": 0.10251235961914062, "step": 2728 }, { "epoch": 0.38026893332404377, "grad_norm": 3.1468265056610107, "learning_rate": 7.511556930720517e-06, "loss": 0.119964599609375, "step": 2729 }, { "epoch": 0.38040827701525814, "grad_norm": 1.20907461643219, "learning_rate": 7.5095187720342224e-06, "loss": 0.0798807144165039, "step": 2730 }, { "epoch": 0.3805476207064725, "grad_norm": 1.7622965574264526, "learning_rate": 7.50748005579015e-06, "loss": 0.10100269317626953, "step": 2731 }, { "epoch": 0.3806869643976869, "grad_norm": 0.9547238945960999, "learning_rate": 7.505440782441256e-06, "loss": 0.08672523498535156, "step": 2732 }, { "epoch": 0.3808263080889013, "grad_norm": 1.1725212335586548, "learning_rate": 7.503400952440618e-06, "loss": 0.10998916625976562, "step": 2733 }, { "epoch": 0.38096565178011566, "grad_norm": 1.0259273052215576, "learning_rate": 7.501360566241444e-06, "loss": 0.11072158813476562, "step": 2734 }, { "epoch": 0.38110499547133003, "grad_norm": 0.5225174427032471, "learning_rate": 7.499319624297059e-06, "loss": 0.0956716537475586, "step": 2735 }, { "epoch": 0.3812443391625444, "grad_norm": 0.9725214242935181, "learning_rate": 7.497278127060914e-06, "loss": 0.0845794677734375, "step": 2736 }, { "epoch": 0.3813836828537588, "grad_norm": 2.412216901779175, "learning_rate": 7.4952360749865825e-06, "loss": 0.1350088119506836, "step": 2737 }, { "epoch": 0.38152302654497317, "grad_norm": 4.4592814445495605, "learning_rate": 7.493193468527764e-06, "loss": 0.13622665405273438, "step": 2738 }, { "epoch": 0.38166237023618754, "grad_norm": 1.2576411962509155, "learning_rate": 7.491150308138275e-06, "loss": 0.13079833984375, "step": 2739 }, { "epoch": 0.3818017139274019, "grad_norm": 1.566916823387146, "learning_rate": 7.489106594272063e-06, "loss": 0.1307830810546875, "step": 2740 }, { "epoch": 0.3819410576186163, "grad_norm": 1.1752616167068481, "learning_rate": 7.487062327383192e-06, "loss": 0.07335853576660156, "step": 2741 }, { "epoch": 0.3820804013098307, "grad_norm": 2.106729507446289, "learning_rate": 7.485017507925853e-06, "loss": 0.1104278564453125, "step": 2742 }, { "epoch": 0.38221974500104505, "grad_norm": 1.2169950008392334, "learning_rate": 7.482972136354359e-06, "loss": 0.10726547241210938, "step": 2743 }, { "epoch": 0.3823590886922595, "grad_norm": 0.8464601039886475, "learning_rate": 7.480926213123142e-06, "loss": 0.10140037536621094, "step": 2744 }, { "epoch": 0.38249843238347386, "grad_norm": 0.4044667184352875, "learning_rate": 7.4788797386867596e-06, "loss": 0.06764602661132812, "step": 2745 }, { "epoch": 0.38263777607468824, "grad_norm": 0.8052082657814026, "learning_rate": 7.476832713499896e-06, "loss": 0.10320854187011719, "step": 2746 }, { "epoch": 0.3827771197659026, "grad_norm": 1.0014756917953491, "learning_rate": 7.474785138017349e-06, "loss": 0.09865379333496094, "step": 2747 }, { "epoch": 0.382916463457117, "grad_norm": 1.8332188129425049, "learning_rate": 7.472737012694045e-06, "loss": 0.12546348571777344, "step": 2748 }, { "epoch": 0.3830558071483314, "grad_norm": 0.9360504150390625, "learning_rate": 7.470688337985029e-06, "loss": 0.09827041625976562, "step": 2749 }, { "epoch": 0.38319515083954575, "grad_norm": 0.9596089720726013, "learning_rate": 7.468639114345473e-06, "loss": 0.10425281524658203, "step": 2750 }, { "epoch": 0.38333449453076013, "grad_norm": 1.1046366691589355, "learning_rate": 7.466589342230664e-06, "loss": 0.11086845397949219, "step": 2751 }, { "epoch": 0.3834738382219745, "grad_norm": 0.6011776328086853, "learning_rate": 7.464539022096018e-06, "loss": 0.06333255767822266, "step": 2752 }, { "epoch": 0.3836131819131889, "grad_norm": 2.0250515937805176, "learning_rate": 7.462488154397067e-06, "loss": 0.11240196228027344, "step": 2753 }, { "epoch": 0.38375252560440326, "grad_norm": 1.1617769002914429, "learning_rate": 7.460436739589467e-06, "loss": 0.13191986083984375, "step": 2754 }, { "epoch": 0.38389186929561764, "grad_norm": 0.7626031041145325, "learning_rate": 7.458384778128997e-06, "loss": 0.070465087890625, "step": 2755 }, { "epoch": 0.384031212986832, "grad_norm": 1.1206451654434204, "learning_rate": 7.4563322704715556e-06, "loss": 0.12818527221679688, "step": 2756 }, { "epoch": 0.3841705566780464, "grad_norm": 0.7049247622489929, "learning_rate": 7.45427921707316e-06, "loss": 0.07486534118652344, "step": 2757 }, { "epoch": 0.38430990036926077, "grad_norm": 1.563226342201233, "learning_rate": 7.452225618389959e-06, "loss": 0.12804603576660156, "step": 2758 }, { "epoch": 0.38444924406047515, "grad_norm": 0.5383937954902649, "learning_rate": 7.450171474878207e-06, "loss": 0.07350826263427734, "step": 2759 }, { "epoch": 0.3845885877516895, "grad_norm": 0.9208130240440369, "learning_rate": 7.4481167869942934e-06, "loss": 0.11795997619628906, "step": 2760 }, { "epoch": 0.3847279314429039, "grad_norm": 0.711803674697876, "learning_rate": 7.446061555194721e-06, "loss": 0.08341455459594727, "step": 2761 }, { "epoch": 0.3848672751341183, "grad_norm": 0.6205193400382996, "learning_rate": 7.4440057799361155e-06, "loss": 0.077239990234375, "step": 2762 }, { "epoch": 0.38500661882533266, "grad_norm": 0.8662476539611816, "learning_rate": 7.441949461675223e-06, "loss": 0.08988189697265625, "step": 2763 }, { "epoch": 0.38514596251654704, "grad_norm": 1.2202973365783691, "learning_rate": 7.439892600868911e-06, "loss": 0.13089942932128906, "step": 2764 }, { "epoch": 0.38528530620776147, "grad_norm": 0.40757593512535095, "learning_rate": 7.437835197974167e-06, "loss": 0.07526397705078125, "step": 2765 }, { "epoch": 0.38542464989897585, "grad_norm": 1.3476656675338745, "learning_rate": 7.435777253448099e-06, "loss": 0.12331581115722656, "step": 2766 }, { "epoch": 0.3855639935901902, "grad_norm": 0.8190451264381409, "learning_rate": 7.433718767747934e-06, "loss": 0.08985137939453125, "step": 2767 }, { "epoch": 0.3857033372814046, "grad_norm": 0.9023891687393188, "learning_rate": 7.431659741331022e-06, "loss": 0.07475948333740234, "step": 2768 }, { "epoch": 0.385842680972619, "grad_norm": 1.4769963026046753, "learning_rate": 7.429600174654832e-06, "loss": 0.11969757080078125, "step": 2769 }, { "epoch": 0.38598202466383336, "grad_norm": 0.9424033761024475, "learning_rate": 7.427540068176951e-06, "loss": 0.086273193359375, "step": 2770 }, { "epoch": 0.38612136835504773, "grad_norm": 0.9487465620040894, "learning_rate": 7.4254794223550885e-06, "loss": 0.08994865417480469, "step": 2771 }, { "epoch": 0.3862607120462621, "grad_norm": 1.8735674619674683, "learning_rate": 7.423418237647073e-06, "loss": 0.11609649658203125, "step": 2772 }, { "epoch": 0.3864000557374765, "grad_norm": 0.71442711353302, "learning_rate": 7.421356514510853e-06, "loss": 0.07481193542480469, "step": 2773 }, { "epoch": 0.38653939942869087, "grad_norm": 0.5170733332633972, "learning_rate": 7.419294253404497e-06, "loss": 0.07294750213623047, "step": 2774 }, { "epoch": 0.38667874311990524, "grad_norm": 1.5182075500488281, "learning_rate": 7.417231454786189e-06, "loss": 0.09654045104980469, "step": 2775 }, { "epoch": 0.3868180868111196, "grad_norm": 1.245602011680603, "learning_rate": 7.41516811911424e-06, "loss": 0.11224746704101562, "step": 2776 }, { "epoch": 0.386957430502334, "grad_norm": 1.2353674173355103, "learning_rate": 7.4131042468470725e-06, "loss": 0.11304092407226562, "step": 2777 }, { "epoch": 0.3870967741935484, "grad_norm": 2.809511661529541, "learning_rate": 7.411039838443234e-06, "loss": 0.09914779663085938, "step": 2778 }, { "epoch": 0.38723611788476275, "grad_norm": 1.2447707653045654, "learning_rate": 7.4089748943613895e-06, "loss": 0.07857322692871094, "step": 2779 }, { "epoch": 0.38737546157597713, "grad_norm": 1.0277462005615234, "learning_rate": 7.406909415060321e-06, "loss": 0.1190948486328125, "step": 2780 }, { "epoch": 0.3875148052671915, "grad_norm": 0.8876571655273438, "learning_rate": 7.404843400998931e-06, "loss": 0.10613441467285156, "step": 2781 }, { "epoch": 0.3876541489584059, "grad_norm": 1.7457334995269775, "learning_rate": 7.4027768526362395e-06, "loss": 0.11511802673339844, "step": 2782 }, { "epoch": 0.38779349264962026, "grad_norm": 1.3630317449569702, "learning_rate": 7.4007097704313894e-06, "loss": 0.1064300537109375, "step": 2783 }, { "epoch": 0.38793283634083464, "grad_norm": 1.5874310731887817, "learning_rate": 7.398642154843637e-06, "loss": 0.10643196105957031, "step": 2784 }, { "epoch": 0.3880721800320491, "grad_norm": 0.7969020009040833, "learning_rate": 7.39657400633236e-06, "loss": 0.07548141479492188, "step": 2785 }, { "epoch": 0.38821152372326345, "grad_norm": 0.7701838612556458, "learning_rate": 7.394505325357053e-06, "loss": 0.08335494995117188, "step": 2786 }, { "epoch": 0.38835086741447783, "grad_norm": 0.7254396080970764, "learning_rate": 7.392436112377331e-06, "loss": 0.09271812438964844, "step": 2787 }, { "epoch": 0.3884902111056922, "grad_norm": 1.5412023067474365, "learning_rate": 7.390366367852923e-06, "loss": 0.1008758544921875, "step": 2788 }, { "epoch": 0.3886295547969066, "grad_norm": 0.5092147588729858, "learning_rate": 7.388296092243683e-06, "loss": 0.06979751586914062, "step": 2789 }, { "epoch": 0.38876889848812096, "grad_norm": 0.38294097781181335, "learning_rate": 7.386225286009576e-06, "loss": 0.06377220153808594, "step": 2790 }, { "epoch": 0.38890824217933534, "grad_norm": 0.6573617458343506, "learning_rate": 7.384153949610689e-06, "loss": 0.07810688018798828, "step": 2791 }, { "epoch": 0.3890475858705497, "grad_norm": 0.9029906988143921, "learning_rate": 7.382082083507226e-06, "loss": 0.08032989501953125, "step": 2792 }, { "epoch": 0.3891869295617641, "grad_norm": 0.814305305480957, "learning_rate": 7.380009688159507e-06, "loss": 0.09347057342529297, "step": 2793 }, { "epoch": 0.38932627325297847, "grad_norm": 0.9147196412086487, "learning_rate": 7.377936764027973e-06, "loss": 0.11145782470703125, "step": 2794 }, { "epoch": 0.38946561694419285, "grad_norm": 0.9318459630012512, "learning_rate": 7.375863311573179e-06, "loss": 0.10149192810058594, "step": 2795 }, { "epoch": 0.3896049606354072, "grad_norm": 1.087348222732544, "learning_rate": 7.373789331255799e-06, "loss": 0.12082099914550781, "step": 2796 }, { "epoch": 0.3897443043266216, "grad_norm": 2.416243076324463, "learning_rate": 7.371714823536624e-06, "loss": 0.1404132843017578, "step": 2797 }, { "epoch": 0.389883648017836, "grad_norm": 1.4804506301879883, "learning_rate": 7.369639788876561e-06, "loss": 0.12264060974121094, "step": 2798 }, { "epoch": 0.39002299170905036, "grad_norm": 0.9950807690620422, "learning_rate": 7.367564227736639e-06, "loss": 0.12455177307128906, "step": 2799 }, { "epoch": 0.39016233540026474, "grad_norm": 0.8402647972106934, "learning_rate": 7.365488140577997e-06, "loss": 0.08763313293457031, "step": 2800 }, { "epoch": 0.3903016790914791, "grad_norm": 2.0878334045410156, "learning_rate": 7.3634115278618955e-06, "loss": 0.17906856536865234, "step": 2801 }, { "epoch": 0.3904410227826935, "grad_norm": 0.9839760065078735, "learning_rate": 7.36133439004971e-06, "loss": 0.10984039306640625, "step": 2802 }, { "epoch": 0.39058036647390787, "grad_norm": 1.4973078966140747, "learning_rate": 7.3592567276029336e-06, "loss": 0.1180419921875, "step": 2803 }, { "epoch": 0.39071971016512225, "grad_norm": 0.8186124563217163, "learning_rate": 7.357178540983174e-06, "loss": 0.08049201965332031, "step": 2804 }, { "epoch": 0.3908590538563367, "grad_norm": 1.1073150634765625, "learning_rate": 7.355099830652159e-06, "loss": 0.10425376892089844, "step": 2805 }, { "epoch": 0.39099839754755106, "grad_norm": 1.3039976358413696, "learning_rate": 7.353020597071729e-06, "loss": 0.08493995666503906, "step": 2806 }, { "epoch": 0.39113774123876544, "grad_norm": 0.9983201622962952, "learning_rate": 7.350940840703842e-06, "loss": 0.1188812255859375, "step": 2807 }, { "epoch": 0.3912770849299798, "grad_norm": 0.7417959570884705, "learning_rate": 7.348860562010574e-06, "loss": 0.0892333984375, "step": 2808 }, { "epoch": 0.3914164286211942, "grad_norm": 2.5353546142578125, "learning_rate": 7.346779761454113e-06, "loss": 0.13927841186523438, "step": 2809 }, { "epoch": 0.39155577231240857, "grad_norm": 1.3687549829483032, "learning_rate": 7.3446984394967705e-06, "loss": 0.14473915100097656, "step": 2810 }, { "epoch": 0.39169511600362295, "grad_norm": 1.1798118352890015, "learning_rate": 7.342616596600961e-06, "loss": 0.11455154418945312, "step": 2811 }, { "epoch": 0.3918344596948373, "grad_norm": 1.4229562282562256, "learning_rate": 7.3405342332292286e-06, "loss": 0.09488487243652344, "step": 2812 }, { "epoch": 0.3919738033860517, "grad_norm": 1.583497405052185, "learning_rate": 7.338451349844225e-06, "loss": 0.10467147827148438, "step": 2813 }, { "epoch": 0.3921131470772661, "grad_norm": 0.9112492203712463, "learning_rate": 7.336367946908718e-06, "loss": 0.10771369934082031, "step": 2814 }, { "epoch": 0.39225249076848046, "grad_norm": 1.1913164854049683, "learning_rate": 7.334284024885595e-06, "loss": 0.09684562683105469, "step": 2815 }, { "epoch": 0.39239183445969483, "grad_norm": 1.4289721250534058, "learning_rate": 7.332199584237854e-06, "loss": 0.09559917449951172, "step": 2816 }, { "epoch": 0.3925311781509092, "grad_norm": 0.8438602089881897, "learning_rate": 7.330114625428609e-06, "loss": 0.08637809753417969, "step": 2817 }, { "epoch": 0.3926705218421236, "grad_norm": 0.6458803415298462, "learning_rate": 7.328029148921093e-06, "loss": 0.06750679016113281, "step": 2818 }, { "epoch": 0.39280986553333797, "grad_norm": 0.9062337279319763, "learning_rate": 7.32594315517865e-06, "loss": 0.08816719055175781, "step": 2819 }, { "epoch": 0.39294920922455234, "grad_norm": 1.4983747005462646, "learning_rate": 7.32385664466474e-06, "loss": 0.0977325439453125, "step": 2820 }, { "epoch": 0.3930885529157667, "grad_norm": 0.6942716240882874, "learning_rate": 7.321769617842937e-06, "loss": 0.0727081298828125, "step": 2821 }, { "epoch": 0.3932278966069811, "grad_norm": 1.8900327682495117, "learning_rate": 7.319682075176932e-06, "loss": 0.12387847900390625, "step": 2822 }, { "epoch": 0.3933672402981955, "grad_norm": 1.0913857221603394, "learning_rate": 7.317594017130529e-06, "loss": 0.10368728637695312, "step": 2823 }, { "epoch": 0.39350658398940985, "grad_norm": 0.7303573489189148, "learning_rate": 7.3155054441676485e-06, "loss": 0.09252738952636719, "step": 2824 }, { "epoch": 0.3936459276806243, "grad_norm": 1.4005131721496582, "learning_rate": 7.313416356752321e-06, "loss": 0.09465217590332031, "step": 2825 }, { "epoch": 0.39378527137183866, "grad_norm": 1.6763367652893066, "learning_rate": 7.311326755348697e-06, "loss": 0.16651535034179688, "step": 2826 }, { "epoch": 0.39392461506305304, "grad_norm": 2.1439261436462402, "learning_rate": 7.309236640421033e-06, "loss": 0.14134597778320312, "step": 2827 }, { "epoch": 0.3940639587542674, "grad_norm": 0.7871216535568237, "learning_rate": 7.30714601243371e-06, "loss": 0.11655616760253906, "step": 2828 }, { "epoch": 0.3942033024454818, "grad_norm": 1.76210355758667, "learning_rate": 7.305054871851217e-06, "loss": 0.12522125244140625, "step": 2829 }, { "epoch": 0.3943426461366962, "grad_norm": 1.3140443563461304, "learning_rate": 7.302963219138156e-06, "loss": 0.10935211181640625, "step": 2830 }, { "epoch": 0.39448198982791055, "grad_norm": 2.7648260593414307, "learning_rate": 7.3008710547592465e-06, "loss": 0.12582111358642578, "step": 2831 }, { "epoch": 0.39462133351912493, "grad_norm": 1.2853647470474243, "learning_rate": 7.298778379179317e-06, "loss": 0.09466552734375, "step": 2832 }, { "epoch": 0.3947606772103393, "grad_norm": 1.0158677101135254, "learning_rate": 7.296685192863313e-06, "loss": 0.08890151977539062, "step": 2833 }, { "epoch": 0.3949000209015537, "grad_norm": 0.9794784188270569, "learning_rate": 7.2945914962762954e-06, "loss": 0.09069442749023438, "step": 2834 }, { "epoch": 0.39503936459276806, "grad_norm": 1.7342252731323242, "learning_rate": 7.292497289883432e-06, "loss": 0.12172317504882812, "step": 2835 }, { "epoch": 0.39517870828398244, "grad_norm": 1.1207438707351685, "learning_rate": 7.29040257415001e-06, "loss": 0.10068893432617188, "step": 2836 }, { "epoch": 0.3953180519751968, "grad_norm": 1.7166072130203247, "learning_rate": 7.288307349541427e-06, "loss": 0.08907604217529297, "step": 2837 }, { "epoch": 0.3954573956664112, "grad_norm": 0.7455818057060242, "learning_rate": 7.286211616523193e-06, "loss": 0.06540679931640625, "step": 2838 }, { "epoch": 0.39559673935762557, "grad_norm": 1.6405688524246216, "learning_rate": 7.284115375560934e-06, "loss": 0.11849594116210938, "step": 2839 }, { "epoch": 0.39573608304883995, "grad_norm": 0.8388352394104004, "learning_rate": 7.282018627120386e-06, "loss": 0.08108711242675781, "step": 2840 }, { "epoch": 0.3958754267400543, "grad_norm": 0.8558743596076965, "learning_rate": 7.279921371667397e-06, "loss": 0.08488845825195312, "step": 2841 }, { "epoch": 0.3960147704312687, "grad_norm": 0.6920443177223206, "learning_rate": 7.2778236096679325e-06, "loss": 0.09478950500488281, "step": 2842 }, { "epoch": 0.3961541141224831, "grad_norm": 1.1264653205871582, "learning_rate": 7.275725341588064e-06, "loss": 0.10549449920654297, "step": 2843 }, { "epoch": 0.39629345781369746, "grad_norm": 0.5088220834732056, "learning_rate": 7.27362656789398e-06, "loss": 0.0659017562866211, "step": 2844 }, { "epoch": 0.3964328015049119, "grad_norm": 0.7169369459152222, "learning_rate": 7.2715272890519815e-06, "loss": 0.09208297729492188, "step": 2845 }, { "epoch": 0.39657214519612627, "grad_norm": 1.3439853191375732, "learning_rate": 7.2694275055284795e-06, "loss": 0.12746429443359375, "step": 2846 }, { "epoch": 0.39671148888734065, "grad_norm": 1.9048656225204468, "learning_rate": 7.267327217789998e-06, "loss": 0.11641883850097656, "step": 2847 }, { "epoch": 0.396850832578555, "grad_norm": 1.4707962274551392, "learning_rate": 7.26522642630317e-06, "loss": 0.09962081909179688, "step": 2848 }, { "epoch": 0.3969901762697694, "grad_norm": 0.6035178899765015, "learning_rate": 7.263125131534749e-06, "loss": 0.07508277893066406, "step": 2849 }, { "epoch": 0.3971295199609838, "grad_norm": 0.7984126210212708, "learning_rate": 7.26102333395159e-06, "loss": 0.09100532531738281, "step": 2850 }, { "epoch": 0.39726886365219816, "grad_norm": 0.667176365852356, "learning_rate": 7.2589210340206675e-06, "loss": 0.07759284973144531, "step": 2851 }, { "epoch": 0.39740820734341253, "grad_norm": 1.498204231262207, "learning_rate": 7.256818232209062e-06, "loss": 0.09772682189941406, "step": 2852 }, { "epoch": 0.3975475510346269, "grad_norm": 1.224646806716919, "learning_rate": 7.25471492898397e-06, "loss": 0.08989334106445312, "step": 2853 }, { "epoch": 0.3976868947258413, "grad_norm": 0.8966752886772156, "learning_rate": 7.2526111248126976e-06, "loss": 0.13393402099609375, "step": 2854 }, { "epoch": 0.39782623841705567, "grad_norm": 1.367539405822754, "learning_rate": 7.250506820162661e-06, "loss": 0.11469268798828125, "step": 2855 }, { "epoch": 0.39796558210827004, "grad_norm": 1.4716556072235107, "learning_rate": 7.248402015501388e-06, "loss": 0.0953207015991211, "step": 2856 }, { "epoch": 0.3981049257994844, "grad_norm": 0.9242132306098938, "learning_rate": 7.246296711296519e-06, "loss": 0.09709739685058594, "step": 2857 }, { "epoch": 0.3982442694906988, "grad_norm": 0.7977060079574585, "learning_rate": 7.244190908015805e-06, "loss": 0.08585548400878906, "step": 2858 }, { "epoch": 0.3983836131819132, "grad_norm": 1.526614785194397, "learning_rate": 7.2420846061271065e-06, "loss": 0.12800979614257812, "step": 2859 }, { "epoch": 0.39852295687312755, "grad_norm": 0.580603837966919, "learning_rate": 7.239977806098398e-06, "loss": 0.07091617584228516, "step": 2860 }, { "epoch": 0.39866230056434193, "grad_norm": 1.0906919240951538, "learning_rate": 7.237870508397757e-06, "loss": 0.11487960815429688, "step": 2861 }, { "epoch": 0.3988016442555563, "grad_norm": 0.736953854560852, "learning_rate": 7.235762713493384e-06, "loss": 0.08519172668457031, "step": 2862 }, { "epoch": 0.3989409879467707, "grad_norm": 0.8075886368751526, "learning_rate": 7.2336544218535776e-06, "loss": 0.094879150390625, "step": 2863 }, { "epoch": 0.39908033163798506, "grad_norm": 1.5863120555877686, "learning_rate": 7.231545633946755e-06, "loss": 0.1080322265625, "step": 2864 }, { "epoch": 0.3992196753291995, "grad_norm": 2.0908820629119873, "learning_rate": 7.229436350241439e-06, "loss": 0.11655139923095703, "step": 2865 }, { "epoch": 0.3993590190204139, "grad_norm": 0.524876058101654, "learning_rate": 7.2273265712062646e-06, "loss": 0.07100296020507812, "step": 2866 }, { "epoch": 0.39949836271162825, "grad_norm": 0.6847808361053467, "learning_rate": 7.225216297309977e-06, "loss": 0.08309173583984375, "step": 2867 }, { "epoch": 0.39963770640284263, "grad_norm": 0.7635623812675476, "learning_rate": 7.22310552902143e-06, "loss": 0.08487987518310547, "step": 2868 }, { "epoch": 0.399777050094057, "grad_norm": 0.9191048741340637, "learning_rate": 7.220994266809591e-06, "loss": 0.07683181762695312, "step": 2869 }, { "epoch": 0.3999163937852714, "grad_norm": 0.8839223980903625, "learning_rate": 7.21888251114353e-06, "loss": 0.08075904846191406, "step": 2870 }, { "epoch": 0.40005573747648576, "grad_norm": 0.6946244239807129, "learning_rate": 7.2167702624924345e-06, "loss": 0.10128593444824219, "step": 2871 }, { "epoch": 0.40019508116770014, "grad_norm": 1.3291383981704712, "learning_rate": 7.2146575213255945e-06, "loss": 0.1302776336669922, "step": 2872 }, { "epoch": 0.4003344248589145, "grad_norm": 0.7857061624526978, "learning_rate": 7.212544288112415e-06, "loss": 0.08207130432128906, "step": 2873 }, { "epoch": 0.4004737685501289, "grad_norm": 0.37223055958747864, "learning_rate": 7.21043056332241e-06, "loss": 0.061026573181152344, "step": 2874 }, { "epoch": 0.40061311224134327, "grad_norm": 1.1545871496200562, "learning_rate": 7.208316347425197e-06, "loss": 0.1308746337890625, "step": 2875 }, { "epoch": 0.40075245593255765, "grad_norm": 1.4579797983169556, "learning_rate": 7.206201640890509e-06, "loss": 0.1262531280517578, "step": 2876 }, { "epoch": 0.400891799623772, "grad_norm": 1.2077782154083252, "learning_rate": 7.204086444188184e-06, "loss": 0.086517333984375, "step": 2877 }, { "epoch": 0.4010311433149864, "grad_norm": 0.9114617705345154, "learning_rate": 7.201970757788172e-06, "loss": 0.08617019653320312, "step": 2878 }, { "epoch": 0.4011704870062008, "grad_norm": 1.2728197574615479, "learning_rate": 7.199854582160529e-06, "loss": 0.12010574340820312, "step": 2879 }, { "epoch": 0.40130983069741516, "grad_norm": 1.0825895071029663, "learning_rate": 7.197737917775422e-06, "loss": 0.0988616943359375, "step": 2880 }, { "epoch": 0.40144917438862954, "grad_norm": 0.6934299468994141, "learning_rate": 7.1956207651031254e-06, "loss": 0.101104736328125, "step": 2881 }, { "epoch": 0.4015885180798439, "grad_norm": 1.0516258478164673, "learning_rate": 7.193503124614021e-06, "loss": 0.09938240051269531, "step": 2882 }, { "epoch": 0.4017278617710583, "grad_norm": 1.7590742111206055, "learning_rate": 7.191384996778601e-06, "loss": 0.11414718627929688, "step": 2883 }, { "epoch": 0.40186720546227267, "grad_norm": 0.7002395987510681, "learning_rate": 7.189266382067464e-06, "loss": 0.11032485961914062, "step": 2884 }, { "epoch": 0.4020065491534871, "grad_norm": 0.6572365760803223, "learning_rate": 7.1871472809513185e-06, "loss": 0.08056831359863281, "step": 2885 }, { "epoch": 0.4021458928447015, "grad_norm": 0.9494178295135498, "learning_rate": 7.185027693900982e-06, "loss": 0.11026954650878906, "step": 2886 }, { "epoch": 0.40228523653591586, "grad_norm": 1.281967043876648, "learning_rate": 7.182907621387376e-06, "loss": 0.1190948486328125, "step": 2887 }, { "epoch": 0.40242458022713024, "grad_norm": 0.524695634841919, "learning_rate": 7.180787063881534e-06, "loss": 0.07182121276855469, "step": 2888 }, { "epoch": 0.4025639239183446, "grad_norm": 1.7971256971359253, "learning_rate": 7.178666021854593e-06, "loss": 0.10129547119140625, "step": 2889 }, { "epoch": 0.402703267609559, "grad_norm": 1.1058554649353027, "learning_rate": 7.176544495777804e-06, "loss": 0.09153556823730469, "step": 2890 }, { "epoch": 0.40284261130077337, "grad_norm": 1.1188114881515503, "learning_rate": 7.174422486122517e-06, "loss": 0.09668159484863281, "step": 2891 }, { "epoch": 0.40298195499198775, "grad_norm": 0.5231913924217224, "learning_rate": 7.1722999933602e-06, "loss": 0.07364845275878906, "step": 2892 }, { "epoch": 0.4031212986832021, "grad_norm": 1.0624569654464722, "learning_rate": 7.170177017962415e-06, "loss": 0.11403083801269531, "step": 2893 }, { "epoch": 0.4032606423744165, "grad_norm": 1.4180710315704346, "learning_rate": 7.168053560400845e-06, "loss": 0.08739662170410156, "step": 2894 }, { "epoch": 0.4033999860656309, "grad_norm": 2.7913818359375, "learning_rate": 7.16592962114727e-06, "loss": 0.16301918029785156, "step": 2895 }, { "epoch": 0.40353932975684526, "grad_norm": 0.6664533615112305, "learning_rate": 7.163805200673584e-06, "loss": 0.09844589233398438, "step": 2896 }, { "epoch": 0.40367867344805963, "grad_norm": 0.8431658148765564, "learning_rate": 7.161680299451782e-06, "loss": 0.10503387451171875, "step": 2897 }, { "epoch": 0.403818017139274, "grad_norm": 1.222514271736145, "learning_rate": 7.159554917953968e-06, "loss": 0.07980537414550781, "step": 2898 }, { "epoch": 0.4039573608304884, "grad_norm": 1.9486442804336548, "learning_rate": 7.157429056652357e-06, "loss": 0.1162109375, "step": 2899 }, { "epoch": 0.40409670452170277, "grad_norm": 0.30739474296569824, "learning_rate": 7.155302716019263e-06, "loss": 0.06425857543945312, "step": 2900 }, { "epoch": 0.40423604821291714, "grad_norm": 1.7671278715133667, "learning_rate": 7.153175896527112e-06, "loss": 0.09937000274658203, "step": 2901 }, { "epoch": 0.4043753919041315, "grad_norm": 0.7325977087020874, "learning_rate": 7.151048598648436e-06, "loss": 0.07154083251953125, "step": 2902 }, { "epoch": 0.4045147355953459, "grad_norm": 1.2145799398422241, "learning_rate": 7.148920822855869e-06, "loss": 0.10834884643554688, "step": 2903 }, { "epoch": 0.4046540792865603, "grad_norm": 1.2056341171264648, "learning_rate": 7.146792569622157e-06, "loss": 0.10076665878295898, "step": 2904 }, { "epoch": 0.4047934229777747, "grad_norm": 1.4811519384384155, "learning_rate": 7.144663839420147e-06, "loss": 0.12008285522460938, "step": 2905 }, { "epoch": 0.4049327666689891, "grad_norm": 0.913270115852356, "learning_rate": 7.142534632722797e-06, "loss": 0.0975341796875, "step": 2906 }, { "epoch": 0.40507211036020346, "grad_norm": 1.218335509300232, "learning_rate": 7.140404950003164e-06, "loss": 0.09726715087890625, "step": 2907 }, { "epoch": 0.40521145405141784, "grad_norm": 1.5764853954315186, "learning_rate": 7.138274791734421e-06, "loss": 0.1281719207763672, "step": 2908 }, { "epoch": 0.4053507977426322, "grad_norm": 0.6594998240470886, "learning_rate": 7.136144158389834e-06, "loss": 0.08674049377441406, "step": 2909 }, { "epoch": 0.4054901414338466, "grad_norm": 0.9264422059059143, "learning_rate": 7.134013050442785e-06, "loss": 0.078948974609375, "step": 2910 }, { "epoch": 0.405629485125061, "grad_norm": 1.6294111013412476, "learning_rate": 7.1318814683667555e-06, "loss": 0.1530590057373047, "step": 2911 }, { "epoch": 0.40576882881627535, "grad_norm": 0.6041660308837891, "learning_rate": 7.129749412635337e-06, "loss": 0.07192039489746094, "step": 2912 }, { "epoch": 0.40590817250748973, "grad_norm": 0.5352585911750793, "learning_rate": 7.1276168837222215e-06, "loss": 0.06773948669433594, "step": 2913 }, { "epoch": 0.4060475161987041, "grad_norm": 0.6371232867240906, "learning_rate": 7.125483882101208e-06, "loss": 0.08753776550292969, "step": 2914 }, { "epoch": 0.4061868598899185, "grad_norm": 0.7276208996772766, "learning_rate": 7.123350408246203e-06, "loss": 0.10293197631835938, "step": 2915 }, { "epoch": 0.40632620358113286, "grad_norm": 0.7739971876144409, "learning_rate": 7.121216462631213e-06, "loss": 0.08570384979248047, "step": 2916 }, { "epoch": 0.40646554727234724, "grad_norm": 0.31530898809432983, "learning_rate": 7.1190820457303535e-06, "loss": 0.05947113037109375, "step": 2917 }, { "epoch": 0.4066048909635616, "grad_norm": 1.0221904516220093, "learning_rate": 7.116947158017842e-06, "loss": 0.1186065673828125, "step": 2918 }, { "epoch": 0.406744234654776, "grad_norm": 1.3691476583480835, "learning_rate": 7.114811799968005e-06, "loss": 0.11110877990722656, "step": 2919 }, { "epoch": 0.40688357834599037, "grad_norm": 0.9878392815589905, "learning_rate": 7.1126759720552665e-06, "loss": 0.08787059783935547, "step": 2920 }, { "epoch": 0.40702292203720475, "grad_norm": 0.6643319129943848, "learning_rate": 7.11053967475416e-06, "loss": 0.0736856460571289, "step": 2921 }, { "epoch": 0.4071622657284191, "grad_norm": 1.8627194166183472, "learning_rate": 7.108402908539323e-06, "loss": 0.10744667053222656, "step": 2922 }, { "epoch": 0.4073016094196335, "grad_norm": 0.6097662448883057, "learning_rate": 7.106265673885494e-06, "loss": 0.08152961730957031, "step": 2923 }, { "epoch": 0.4074409531108479, "grad_norm": 0.6001483201980591, "learning_rate": 7.104127971267521e-06, "loss": 0.07617759704589844, "step": 2924 }, { "epoch": 0.4075802968020623, "grad_norm": 0.7730124592781067, "learning_rate": 7.10198980116035e-06, "loss": 0.08722305297851562, "step": 2925 }, { "epoch": 0.4077196404932767, "grad_norm": 0.9822909235954285, "learning_rate": 7.099851164039035e-06, "loss": 0.09269332885742188, "step": 2926 }, { "epoch": 0.40785898418449107, "grad_norm": 0.7106360197067261, "learning_rate": 7.0977120603787296e-06, "loss": 0.08516120910644531, "step": 2927 }, { "epoch": 0.40799832787570545, "grad_norm": 2.381431818008423, "learning_rate": 7.095572490654698e-06, "loss": 0.13875389099121094, "step": 2928 }, { "epoch": 0.4081376715669198, "grad_norm": 1.3926811218261719, "learning_rate": 7.0934324553423015e-06, "loss": 0.095916748046875, "step": 2929 }, { "epoch": 0.4082770152581342, "grad_norm": 1.2871723175048828, "learning_rate": 7.091291954917007e-06, "loss": 0.12248420715332031, "step": 2930 }, { "epoch": 0.4084163589493486, "grad_norm": 1.6172192096710205, "learning_rate": 7.089150989854385e-06, "loss": 0.14163970947265625, "step": 2931 }, { "epoch": 0.40855570264056296, "grad_norm": 0.7433708906173706, "learning_rate": 7.0870095606301095e-06, "loss": 0.08408164978027344, "step": 2932 }, { "epoch": 0.40869504633177733, "grad_norm": 1.5578006505966187, "learning_rate": 7.084867667719957e-06, "loss": 0.11551856994628906, "step": 2933 }, { "epoch": 0.4088343900229917, "grad_norm": 1.8336646556854248, "learning_rate": 7.082725311599808e-06, "loss": 0.12077522277832031, "step": 2934 }, { "epoch": 0.4089737337142061, "grad_norm": 1.180413007736206, "learning_rate": 7.080582492745642e-06, "loss": 0.08811759948730469, "step": 2935 }, { "epoch": 0.40911307740542047, "grad_norm": 0.736437976360321, "learning_rate": 7.0784392116335475e-06, "loss": 0.12894630432128906, "step": 2936 }, { "epoch": 0.40925242109663484, "grad_norm": 1.5012696981430054, "learning_rate": 7.076295468739711e-06, "loss": 0.12112236022949219, "step": 2937 }, { "epoch": 0.4093917647878492, "grad_norm": 1.3072317838668823, "learning_rate": 7.074151264540425e-06, "loss": 0.13187408447265625, "step": 2938 }, { "epoch": 0.4095311084790636, "grad_norm": 1.023180603981018, "learning_rate": 7.0720065995120815e-06, "loss": 0.09916305541992188, "step": 2939 }, { "epoch": 0.409670452170278, "grad_norm": 1.5808688402175903, "learning_rate": 7.069861474131176e-06, "loss": 0.096160888671875, "step": 2940 }, { "epoch": 0.40980979586149235, "grad_norm": 0.8281257748603821, "learning_rate": 7.067715888874307e-06, "loss": 0.08753585815429688, "step": 2941 }, { "epoch": 0.40994913955270673, "grad_norm": 1.4226078987121582, "learning_rate": 7.065569844218175e-06, "loss": 0.09314727783203125, "step": 2942 }, { "epoch": 0.4100884832439211, "grad_norm": 1.3059943914413452, "learning_rate": 7.0634233406395806e-06, "loss": 0.07947015762329102, "step": 2943 }, { "epoch": 0.4102278269351355, "grad_norm": 0.9606468081474304, "learning_rate": 7.061276378615428e-06, "loss": 0.10863304138183594, "step": 2944 }, { "epoch": 0.4103671706263499, "grad_norm": 0.8553252220153809, "learning_rate": 7.059128958622725e-06, "loss": 0.11048126220703125, "step": 2945 }, { "epoch": 0.4105065143175643, "grad_norm": 1.0750707387924194, "learning_rate": 7.056981081138578e-06, "loss": 0.11648750305175781, "step": 2946 }, { "epoch": 0.4106458580087787, "grad_norm": 1.2945455312728882, "learning_rate": 7.054832746640196e-06, "loss": 0.12211227416992188, "step": 2947 }, { "epoch": 0.41078520169999305, "grad_norm": 0.9296450614929199, "learning_rate": 7.05268395560489e-06, "loss": 0.10971450805664062, "step": 2948 }, { "epoch": 0.41092454539120743, "grad_norm": 1.298449158668518, "learning_rate": 7.050534708510073e-06, "loss": 0.0931549072265625, "step": 2949 }, { "epoch": 0.4110638890824218, "grad_norm": 0.8966913223266602, "learning_rate": 7.048385005833258e-06, "loss": 0.07433891296386719, "step": 2950 }, { "epoch": 0.4112032327736362, "grad_norm": 0.9088122844696045, "learning_rate": 7.04623484805206e-06, "loss": 0.09000205993652344, "step": 2951 }, { "epoch": 0.41134257646485056, "grad_norm": 0.6080167889595032, "learning_rate": 7.044084235644196e-06, "loss": 0.08001899719238281, "step": 2952 }, { "epoch": 0.41148192015606494, "grad_norm": 0.7761051654815674, "learning_rate": 7.041933169087482e-06, "loss": 0.083831787109375, "step": 2953 }, { "epoch": 0.4116212638472793, "grad_norm": 0.9744357466697693, "learning_rate": 7.039781648859836e-06, "loss": 0.1258087158203125, "step": 2954 }, { "epoch": 0.4117606075384937, "grad_norm": 0.42166152596473694, "learning_rate": 7.037629675439276e-06, "loss": 0.0661468505859375, "step": 2955 }, { "epoch": 0.4118999512297081, "grad_norm": 1.0083000659942627, "learning_rate": 7.035477249303923e-06, "loss": 0.10171127319335938, "step": 2956 }, { "epoch": 0.41203929492092245, "grad_norm": 1.2881306409835815, "learning_rate": 7.033324370931993e-06, "loss": 0.080352783203125, "step": 2957 }, { "epoch": 0.4121786386121368, "grad_norm": 0.8776584267616272, "learning_rate": 7.031171040801813e-06, "loss": 0.08480072021484375, "step": 2958 }, { "epoch": 0.4123179823033512, "grad_norm": 1.324405312538147, "learning_rate": 7.029017259391797e-06, "loss": 0.0973958969116211, "step": 2959 }, { "epoch": 0.4124573259945656, "grad_norm": 1.3594647645950317, "learning_rate": 7.026863027180472e-06, "loss": 0.12166595458984375, "step": 2960 }, { "epoch": 0.41259666968577996, "grad_norm": 1.0442551374435425, "learning_rate": 7.024708344646455e-06, "loss": 0.111053466796875, "step": 2961 }, { "epoch": 0.41273601337699434, "grad_norm": 1.19435715675354, "learning_rate": 7.022553212268469e-06, "loss": 0.11005973815917969, "step": 2962 }, { "epoch": 0.4128753570682087, "grad_norm": 1.6720614433288574, "learning_rate": 7.020397630525336e-06, "loss": 0.10543251037597656, "step": 2963 }, { "epoch": 0.4130147007594231, "grad_norm": 0.9405527710914612, "learning_rate": 7.018241599895974e-06, "loss": 0.08877372741699219, "step": 2964 }, { "epoch": 0.41315404445063747, "grad_norm": 0.8995718955993652, "learning_rate": 7.016085120859406e-06, "loss": 0.1097869873046875, "step": 2965 }, { "epoch": 0.4132933881418519, "grad_norm": 0.7050274014472961, "learning_rate": 7.013928193894753e-06, "loss": 0.08198738098144531, "step": 2966 }, { "epoch": 0.4134327318330663, "grad_norm": 0.7602409720420837, "learning_rate": 7.011770819481234e-06, "loss": 0.08875083923339844, "step": 2967 }, { "epoch": 0.41357207552428066, "grad_norm": 0.9601369500160217, "learning_rate": 7.0096129980981674e-06, "loss": 0.08916091918945312, "step": 2968 }, { "epoch": 0.41371141921549504, "grad_norm": 1.2428662776947021, "learning_rate": 7.0074547302249755e-06, "loss": 0.10610008239746094, "step": 2969 }, { "epoch": 0.4138507629067094, "grad_norm": 0.3837394118309021, "learning_rate": 7.005296016341171e-06, "loss": 0.061321258544921875, "step": 2970 }, { "epoch": 0.4139901065979238, "grad_norm": 0.7127225995063782, "learning_rate": 7.003136856926374e-06, "loss": 0.08311653137207031, "step": 2971 }, { "epoch": 0.41412945028913817, "grad_norm": 0.739004373550415, "learning_rate": 7.0009772524603e-06, "loss": 0.08505058288574219, "step": 2972 }, { "epoch": 0.41426879398035255, "grad_norm": 0.6197059750556946, "learning_rate": 6.998817203422763e-06, "loss": 0.08281898498535156, "step": 2973 }, { "epoch": 0.4144081376715669, "grad_norm": 1.1635167598724365, "learning_rate": 6.996656710293679e-06, "loss": 0.09379005432128906, "step": 2974 }, { "epoch": 0.4145474813627813, "grad_norm": 0.7661646604537964, "learning_rate": 6.994495773553056e-06, "loss": 0.09855079650878906, "step": 2975 }, { "epoch": 0.4146868250539957, "grad_norm": 1.2632640600204468, "learning_rate": 6.992334393681008e-06, "loss": 0.0891275405883789, "step": 2976 }, { "epoch": 0.41482616874521006, "grad_norm": 0.47309744358062744, "learning_rate": 6.990172571157744e-06, "loss": 0.07502174377441406, "step": 2977 }, { "epoch": 0.41496551243642443, "grad_norm": 0.8861979842185974, "learning_rate": 6.988010306463571e-06, "loss": 0.12998008728027344, "step": 2978 }, { "epoch": 0.4151048561276388, "grad_norm": 1.3250377178192139, "learning_rate": 6.985847600078894e-06, "loss": 0.11785697937011719, "step": 2979 }, { "epoch": 0.4152441998188532, "grad_norm": 1.6023272275924683, "learning_rate": 6.98368445248422e-06, "loss": 0.11009788513183594, "step": 2980 }, { "epoch": 0.41538354351006757, "grad_norm": 0.8196327686309814, "learning_rate": 6.981520864160147e-06, "loss": 0.07997608184814453, "step": 2981 }, { "epoch": 0.41552288720128194, "grad_norm": 0.7466355562210083, "learning_rate": 6.979356835587377e-06, "loss": 0.09559917449951172, "step": 2982 }, { "epoch": 0.4156622308924963, "grad_norm": 0.9303371906280518, "learning_rate": 6.977192367246709e-06, "loss": 0.08302021026611328, "step": 2983 }, { "epoch": 0.4158015745837107, "grad_norm": 0.5224902629852295, "learning_rate": 6.9750274596190344e-06, "loss": 0.08148384094238281, "step": 2984 }, { "epoch": 0.4159409182749251, "grad_norm": 0.8953731656074524, "learning_rate": 6.972862113185353e-06, "loss": 0.1007843017578125, "step": 2985 }, { "epoch": 0.4160802619661395, "grad_norm": 0.6795132160186768, "learning_rate": 6.970696328426749e-06, "loss": 0.09694385528564453, "step": 2986 }, { "epoch": 0.4162196056573539, "grad_norm": 1.0907071828842163, "learning_rate": 6.968530105824413e-06, "loss": 0.08462715148925781, "step": 2987 }, { "epoch": 0.41635894934856826, "grad_norm": 0.6188135147094727, "learning_rate": 6.966363445859629e-06, "loss": 0.061733245849609375, "step": 2988 }, { "epoch": 0.41649829303978264, "grad_norm": 0.8132709860801697, "learning_rate": 6.96419634901378e-06, "loss": 0.07799720764160156, "step": 2989 }, { "epoch": 0.416637636730997, "grad_norm": 1.1371368169784546, "learning_rate": 6.962028815768347e-06, "loss": 0.09668922424316406, "step": 2990 }, { "epoch": 0.4167769804222114, "grad_norm": 1.2164162397384644, "learning_rate": 6.959860846604903e-06, "loss": 0.09996986389160156, "step": 2991 }, { "epoch": 0.4169163241134258, "grad_norm": 1.5427533388137817, "learning_rate": 6.957692442005126e-06, "loss": 0.11583518981933594, "step": 2992 }, { "epoch": 0.41705566780464015, "grad_norm": 0.9446366429328918, "learning_rate": 6.95552360245078e-06, "loss": 0.11015892028808594, "step": 2993 }, { "epoch": 0.41719501149585453, "grad_norm": 0.4972839951515198, "learning_rate": 6.953354328423737e-06, "loss": 0.061669349670410156, "step": 2994 }, { "epoch": 0.4173343551870689, "grad_norm": 0.999773383140564, "learning_rate": 6.951184620405958e-06, "loss": 0.09671401977539062, "step": 2995 }, { "epoch": 0.4174736988782833, "grad_norm": 1.3924733400344849, "learning_rate": 6.949014478879502e-06, "loss": 0.10216712951660156, "step": 2996 }, { "epoch": 0.41761304256949766, "grad_norm": 1.1959148645401, "learning_rate": 6.946843904326527e-06, "loss": 0.11322784423828125, "step": 2997 }, { "epoch": 0.41775238626071204, "grad_norm": 1.5114995241165161, "learning_rate": 6.944672897229282e-06, "loss": 0.08904266357421875, "step": 2998 }, { "epoch": 0.4178917299519264, "grad_norm": 0.7956535816192627, "learning_rate": 6.942501458070117e-06, "loss": 0.0985260009765625, "step": 2999 }, { "epoch": 0.4180310736431408, "grad_norm": 0.6480534076690674, "learning_rate": 6.940329587331477e-06, "loss": 0.07275676727294922, "step": 3000 }, { "epoch": 0.41817041733435517, "grad_norm": 0.742716372013092, "learning_rate": 6.938157285495901e-06, "loss": 0.07286262512207031, "step": 3001 }, { "epoch": 0.41830976102556955, "grad_norm": 0.41587674617767334, "learning_rate": 6.935984553046025e-06, "loss": 0.058349609375, "step": 3002 }, { "epoch": 0.4184491047167839, "grad_norm": 0.6608359813690186, "learning_rate": 6.93381139046458e-06, "loss": 0.08695411682128906, "step": 3003 }, { "epoch": 0.4185884484079983, "grad_norm": 0.31868451833724976, "learning_rate": 6.931637798234394e-06, "loss": 0.05985450744628906, "step": 3004 }, { "epoch": 0.4187277920992127, "grad_norm": 1.0210751295089722, "learning_rate": 6.929463776838389e-06, "loss": 0.09753990173339844, "step": 3005 }, { "epoch": 0.4188671357904271, "grad_norm": 0.7745292782783508, "learning_rate": 6.927289326759585e-06, "loss": 0.09672927856445312, "step": 3006 }, { "epoch": 0.4190064794816415, "grad_norm": 0.942366898059845, "learning_rate": 6.925114448481089e-06, "loss": 0.09616470336914062, "step": 3007 }, { "epoch": 0.41914582317285587, "grad_norm": 1.2701269388198853, "learning_rate": 6.922939142486118e-06, "loss": 0.11209487915039062, "step": 3008 }, { "epoch": 0.41928516686407025, "grad_norm": 1.3544906377792358, "learning_rate": 6.9207634092579686e-06, "loss": 0.09657859802246094, "step": 3009 }, { "epoch": 0.4194245105552846, "grad_norm": 1.4392309188842773, "learning_rate": 6.9185872492800434e-06, "loss": 0.09362030029296875, "step": 3010 }, { "epoch": 0.419563854246499, "grad_norm": 0.6814238429069519, "learning_rate": 6.916410663035832e-06, "loss": 0.10833740234375, "step": 3011 }, { "epoch": 0.4197031979377134, "grad_norm": 0.8626049160957336, "learning_rate": 6.9142336510089235e-06, "loss": 0.09232711791992188, "step": 3012 }, { "epoch": 0.41984254162892776, "grad_norm": 1.2815674543380737, "learning_rate": 6.912056213683001e-06, "loss": 0.10033798217773438, "step": 3013 }, { "epoch": 0.41998188532014213, "grad_norm": 0.40254542231559753, "learning_rate": 6.909878351541841e-06, "loss": 0.06935501098632812, "step": 3014 }, { "epoch": 0.4201212290113565, "grad_norm": 0.4129500985145569, "learning_rate": 6.907700065069315e-06, "loss": 0.07719612121582031, "step": 3015 }, { "epoch": 0.4202605727025709, "grad_norm": 0.703129231929779, "learning_rate": 6.905521354749387e-06, "loss": 0.09568405151367188, "step": 3016 }, { "epoch": 0.42039991639378527, "grad_norm": 0.5197794437408447, "learning_rate": 6.90334222106612e-06, "loss": 0.06962203979492188, "step": 3017 }, { "epoch": 0.42053926008499964, "grad_norm": 1.166296362876892, "learning_rate": 6.901162664503662e-06, "loss": 0.09388351440429688, "step": 3018 }, { "epoch": 0.420678603776214, "grad_norm": 0.5539061427116394, "learning_rate": 6.898982685546267e-06, "loss": 0.07936859130859375, "step": 3019 }, { "epoch": 0.4208179474674284, "grad_norm": 2.2291908264160156, "learning_rate": 6.896802284678273e-06, "loss": 0.13102340698242188, "step": 3020 }, { "epoch": 0.4209572911586428, "grad_norm": 2.5750977993011475, "learning_rate": 6.894621462384116e-06, "loss": 0.1202545166015625, "step": 3021 }, { "epoch": 0.42109663484985715, "grad_norm": 0.7434513568878174, "learning_rate": 6.8924402191483245e-06, "loss": 0.09169960021972656, "step": 3022 }, { "epoch": 0.42123597854107153, "grad_norm": 0.6430135369300842, "learning_rate": 6.890258555455521e-06, "loss": 0.09569358825683594, "step": 3023 }, { "epoch": 0.4213753222322859, "grad_norm": 1.4101582765579224, "learning_rate": 6.888076471790423e-06, "loss": 0.16951370239257812, "step": 3024 }, { "epoch": 0.4215146659235003, "grad_norm": 1.7766746282577515, "learning_rate": 6.8858939686378376e-06, "loss": 0.12355804443359375, "step": 3025 }, { "epoch": 0.4216540096147147, "grad_norm": 1.5463025569915771, "learning_rate": 6.8837110464826685e-06, "loss": 0.1057891845703125, "step": 3026 }, { "epoch": 0.4217933533059291, "grad_norm": 0.8265407681465149, "learning_rate": 6.881527705809912e-06, "loss": 0.11116600036621094, "step": 3027 }, { "epoch": 0.4219326969971435, "grad_norm": 1.184775471687317, "learning_rate": 6.879343947104653e-06, "loss": 0.11890316009521484, "step": 3028 }, { "epoch": 0.42207204068835785, "grad_norm": 0.656487762928009, "learning_rate": 6.8771597708520766e-06, "loss": 0.08837699890136719, "step": 3029 }, { "epoch": 0.42221138437957223, "grad_norm": 1.0868223905563354, "learning_rate": 6.874975177537455e-06, "loss": 0.10114288330078125, "step": 3030 }, { "epoch": 0.4223507280707866, "grad_norm": 1.625211477279663, "learning_rate": 6.872790167646155e-06, "loss": 0.1306476593017578, "step": 3031 }, { "epoch": 0.422490071762001, "grad_norm": 1.7032480239868164, "learning_rate": 6.870604741663638e-06, "loss": 0.12286186218261719, "step": 3032 }, { "epoch": 0.42262941545321536, "grad_norm": 1.2510508298873901, "learning_rate": 6.868418900075452e-06, "loss": 0.08862495422363281, "step": 3033 }, { "epoch": 0.42276875914442974, "grad_norm": 1.2122875452041626, "learning_rate": 6.866232643367243e-06, "loss": 0.08127212524414062, "step": 3034 }, { "epoch": 0.4229081028356441, "grad_norm": 0.7148032188415527, "learning_rate": 6.864045972024749e-06, "loss": 0.066650390625, "step": 3035 }, { "epoch": 0.4230474465268585, "grad_norm": 1.4850590229034424, "learning_rate": 6.861858886533796e-06, "loss": 0.0943450927734375, "step": 3036 }, { "epoch": 0.4231867902180729, "grad_norm": 2.2463319301605225, "learning_rate": 6.859671387380307e-06, "loss": 0.13215065002441406, "step": 3037 }, { "epoch": 0.42332613390928725, "grad_norm": 0.6498093605041504, "learning_rate": 6.85748347505029e-06, "loss": 0.07236099243164062, "step": 3038 }, { "epoch": 0.4234654776005016, "grad_norm": 0.8766641020774841, "learning_rate": 6.855295150029853e-06, "loss": 0.09007740020751953, "step": 3039 }, { "epoch": 0.423604821291716, "grad_norm": 0.5468968749046326, "learning_rate": 6.853106412805192e-06, "loss": 0.07827949523925781, "step": 3040 }, { "epoch": 0.4237441649829304, "grad_norm": 1.4007645845413208, "learning_rate": 6.850917263862591e-06, "loss": 0.12024068832397461, "step": 3041 }, { "epoch": 0.42388350867414476, "grad_norm": 0.9132962822914124, "learning_rate": 6.848727703688432e-06, "loss": 0.0876779556274414, "step": 3042 }, { "epoch": 0.42402285236535914, "grad_norm": 1.146604299545288, "learning_rate": 6.846537732769185e-06, "loss": 0.10318565368652344, "step": 3043 }, { "epoch": 0.4241621960565735, "grad_norm": 1.086350679397583, "learning_rate": 6.8443473515914105e-06, "loss": 0.11294364929199219, "step": 3044 }, { "epoch": 0.4243015397477879, "grad_norm": 1.0318999290466309, "learning_rate": 6.842156560641762e-06, "loss": 0.08623886108398438, "step": 3045 }, { "epoch": 0.4244408834390023, "grad_norm": 0.4759567677974701, "learning_rate": 6.839965360406983e-06, "loss": 0.07883644104003906, "step": 3046 }, { "epoch": 0.4245802271302167, "grad_norm": 0.9564563035964966, "learning_rate": 6.837773751373908e-06, "loss": 0.08910369873046875, "step": 3047 }, { "epoch": 0.4247195708214311, "grad_norm": 1.144313097000122, "learning_rate": 6.835581734029462e-06, "loss": 0.1020050048828125, "step": 3048 }, { "epoch": 0.42485891451264546, "grad_norm": 0.8910083770751953, "learning_rate": 6.833389308860662e-06, "loss": 0.08817386627197266, "step": 3049 }, { "epoch": 0.42499825820385984, "grad_norm": 0.6969534158706665, "learning_rate": 6.831196476354615e-06, "loss": 0.07978248596191406, "step": 3050 }, { "epoch": 0.4251376018950742, "grad_norm": 1.958369255065918, "learning_rate": 6.829003236998517e-06, "loss": 0.12706947326660156, "step": 3051 }, { "epoch": 0.4252769455862886, "grad_norm": 0.6442797183990479, "learning_rate": 6.8268095912796574e-06, "loss": 0.06746101379394531, "step": 3052 }, { "epoch": 0.42541628927750297, "grad_norm": 0.8472126126289368, "learning_rate": 6.824615539685413e-06, "loss": 0.07511520385742188, "step": 3053 }, { "epoch": 0.42555563296871735, "grad_norm": 0.8631493449211121, "learning_rate": 6.822421082703253e-06, "loss": 0.1095428466796875, "step": 3054 }, { "epoch": 0.4256949766599317, "grad_norm": 1.265178918838501, "learning_rate": 6.820226220820733e-06, "loss": 0.08818817138671875, "step": 3055 }, { "epoch": 0.4258343203511461, "grad_norm": 1.4213168621063232, "learning_rate": 6.818030954525505e-06, "loss": 0.10124015808105469, "step": 3056 }, { "epoch": 0.4259736640423605, "grad_norm": 1.2231391668319702, "learning_rate": 6.815835284305304e-06, "loss": 0.1002044677734375, "step": 3057 }, { "epoch": 0.42611300773357486, "grad_norm": 1.0587488412857056, "learning_rate": 6.8136392106479624e-06, "loss": 0.10007095336914062, "step": 3058 }, { "epoch": 0.42625235142478923, "grad_norm": 0.5746868252754211, "learning_rate": 6.81144273404139e-06, "loss": 0.07568550109863281, "step": 3059 }, { "epoch": 0.4263916951160036, "grad_norm": 0.5046036839485168, "learning_rate": 6.8092458549736e-06, "loss": 0.08017158508300781, "step": 3060 }, { "epoch": 0.426531038807218, "grad_norm": 0.6811658143997192, "learning_rate": 6.807048573932687e-06, "loss": 0.08577537536621094, "step": 3061 }, { "epoch": 0.42667038249843237, "grad_norm": 1.1318087577819824, "learning_rate": 6.8048508914068355e-06, "loss": 0.1248779296875, "step": 3062 }, { "epoch": 0.42680972618964674, "grad_norm": 1.0492210388183594, "learning_rate": 6.802652807884322e-06, "loss": 0.11278533935546875, "step": 3063 }, { "epoch": 0.4269490698808611, "grad_norm": 0.7284271121025085, "learning_rate": 6.80045432385351e-06, "loss": 0.09013938903808594, "step": 3064 }, { "epoch": 0.4270884135720755, "grad_norm": 0.5800374746322632, "learning_rate": 6.798255439802852e-06, "loss": 0.08647918701171875, "step": 3065 }, { "epoch": 0.42722775726328993, "grad_norm": 0.7750113010406494, "learning_rate": 6.796056156220892e-06, "loss": 0.08581352233886719, "step": 3066 }, { "epoch": 0.4273671009545043, "grad_norm": 1.6201361417770386, "learning_rate": 6.793856473596256e-06, "loss": 0.08975458145141602, "step": 3067 }, { "epoch": 0.4275064446457187, "grad_norm": 0.520975649356842, "learning_rate": 6.791656392417666e-06, "loss": 0.0822906494140625, "step": 3068 }, { "epoch": 0.42764578833693306, "grad_norm": 0.3375762701034546, "learning_rate": 6.789455913173933e-06, "loss": 0.056545257568359375, "step": 3069 }, { "epoch": 0.42778513202814744, "grad_norm": 1.6045759916305542, "learning_rate": 6.787255036353947e-06, "loss": 0.0992574691772461, "step": 3070 }, { "epoch": 0.4279244757193618, "grad_norm": 2.0817742347717285, "learning_rate": 6.785053762446696e-06, "loss": 0.10925674438476562, "step": 3071 }, { "epoch": 0.4280638194105762, "grad_norm": 0.5306879281997681, "learning_rate": 6.782852091941254e-06, "loss": 0.08272552490234375, "step": 3072 }, { "epoch": 0.4282031631017906, "grad_norm": 1.1482527256011963, "learning_rate": 6.780650025326778e-06, "loss": 0.11101341247558594, "step": 3073 }, { "epoch": 0.42834250679300495, "grad_norm": 0.7206487059593201, "learning_rate": 6.778447563092523e-06, "loss": 0.09915542602539062, "step": 3074 }, { "epoch": 0.42848185048421933, "grad_norm": 1.086918592453003, "learning_rate": 6.776244705727818e-06, "loss": 0.11507415771484375, "step": 3075 }, { "epoch": 0.4286211941754337, "grad_norm": 1.1288506984710693, "learning_rate": 6.774041453722093e-06, "loss": 0.1420459747314453, "step": 3076 }, { "epoch": 0.4287605378666481, "grad_norm": 0.5907233953475952, "learning_rate": 6.771837807564861e-06, "loss": 0.09478187561035156, "step": 3077 }, { "epoch": 0.42889988155786246, "grad_norm": 1.1310957670211792, "learning_rate": 6.769633767745718e-06, "loss": 0.0860137939453125, "step": 3078 }, { "epoch": 0.42903922524907684, "grad_norm": 0.9670785665512085, "learning_rate": 6.767429334754354e-06, "loss": 0.10710811614990234, "step": 3079 }, { "epoch": 0.4291785689402912, "grad_norm": 1.4579250812530518, "learning_rate": 6.7652245090805426e-06, "loss": 0.10672760009765625, "step": 3080 }, { "epoch": 0.4293179126315056, "grad_norm": 1.7854975461959839, "learning_rate": 6.763019291214146e-06, "loss": 0.09710311889648438, "step": 3081 }, { "epoch": 0.42945725632271997, "grad_norm": 0.9300206899642944, "learning_rate": 6.760813681645114e-06, "loss": 0.10845184326171875, "step": 3082 }, { "epoch": 0.42959660001393435, "grad_norm": 0.9551122188568115, "learning_rate": 6.758607680863481e-06, "loss": 0.10354804992675781, "step": 3083 }, { "epoch": 0.4297359437051487, "grad_norm": 1.3158482313156128, "learning_rate": 6.756401289359371e-06, "loss": 0.10352325439453125, "step": 3084 }, { "epoch": 0.4298752873963631, "grad_norm": 1.2484991550445557, "learning_rate": 6.754194507622995e-06, "loss": 0.10168647766113281, "step": 3085 }, { "epoch": 0.43001463108757754, "grad_norm": 0.8483933210372925, "learning_rate": 6.7519873361446475e-06, "loss": 0.09220123291015625, "step": 3086 }, { "epoch": 0.4301539747787919, "grad_norm": 1.1008448600769043, "learning_rate": 6.7497797754147134e-06, "loss": 0.09110450744628906, "step": 3087 }, { "epoch": 0.4302933184700063, "grad_norm": 1.0237922668457031, "learning_rate": 6.74757182592366e-06, "loss": 0.07846260070800781, "step": 3088 }, { "epoch": 0.43043266216122067, "grad_norm": 1.5035803318023682, "learning_rate": 6.7453634881620445e-06, "loss": 0.10501480102539062, "step": 3089 }, { "epoch": 0.43057200585243505, "grad_norm": 0.8230171203613281, "learning_rate": 6.743154762620511e-06, "loss": 0.08716583251953125, "step": 3090 }, { "epoch": 0.4307113495436494, "grad_norm": 0.7490379214286804, "learning_rate": 6.740945649789784e-06, "loss": 0.08880996704101562, "step": 3091 }, { "epoch": 0.4308506932348638, "grad_norm": 0.9697049260139465, "learning_rate": 6.738736150160681e-06, "loss": 0.11539840698242188, "step": 3092 }, { "epoch": 0.4309900369260782, "grad_norm": 0.9064434766769409, "learning_rate": 6.736526264224101e-06, "loss": 0.09260940551757812, "step": 3093 }, { "epoch": 0.43112938061729256, "grad_norm": 0.658707857131958, "learning_rate": 6.734315992471032e-06, "loss": 0.06594657897949219, "step": 3094 }, { "epoch": 0.43126872430850693, "grad_norm": 0.7012165188789368, "learning_rate": 6.7321053353925446e-06, "loss": 0.09490394592285156, "step": 3095 }, { "epoch": 0.4314080679997213, "grad_norm": 1.9025346040725708, "learning_rate": 6.729894293479795e-06, "loss": 0.09625625610351562, "step": 3096 }, { "epoch": 0.4315474116909357, "grad_norm": 0.8278725147247314, "learning_rate": 6.727682867224028e-06, "loss": 0.08091259002685547, "step": 3097 }, { "epoch": 0.43168675538215007, "grad_norm": 0.919255256652832, "learning_rate": 6.725471057116573e-06, "loss": 0.11510467529296875, "step": 3098 }, { "epoch": 0.43182609907336444, "grad_norm": 1.0515666007995605, "learning_rate": 6.723258863648841e-06, "loss": 0.12291145324707031, "step": 3099 }, { "epoch": 0.4319654427645788, "grad_norm": 2.6673061847686768, "learning_rate": 6.72104628731233e-06, "loss": 0.123199462890625, "step": 3100 }, { "epoch": 0.4321047864557932, "grad_norm": 0.98553067445755, "learning_rate": 6.718833328598629e-06, "loss": 0.0998697280883789, "step": 3101 }, { "epoch": 0.4322441301470076, "grad_norm": 1.0967888832092285, "learning_rate": 6.716619987999404e-06, "loss": 0.08808326721191406, "step": 3102 }, { "epoch": 0.43238347383822195, "grad_norm": 0.7472319602966309, "learning_rate": 6.714406266006408e-06, "loss": 0.08725547790527344, "step": 3103 }, { "epoch": 0.43252281752943633, "grad_norm": 2.375187635421753, "learning_rate": 6.712192163111481e-06, "loss": 0.15014934539794922, "step": 3104 }, { "epoch": 0.4326621612206507, "grad_norm": 1.0778474807739258, "learning_rate": 6.709977679806543e-06, "loss": 0.09688186645507812, "step": 3105 }, { "epoch": 0.43280150491186514, "grad_norm": 0.710385799407959, "learning_rate": 6.707762816583608e-06, "loss": 0.11174774169921875, "step": 3106 }, { "epoch": 0.4329408486030795, "grad_norm": 0.7214274406433105, "learning_rate": 6.705547573934759e-06, "loss": 0.0922555923461914, "step": 3107 }, { "epoch": 0.4330801922942939, "grad_norm": 1.293122410774231, "learning_rate": 6.703331952352181e-06, "loss": 0.10877799987792969, "step": 3108 }, { "epoch": 0.4332195359855083, "grad_norm": 1.449845552444458, "learning_rate": 6.70111595232813e-06, "loss": 0.106719970703125, "step": 3109 }, { "epoch": 0.43335887967672265, "grad_norm": 1.4313406944274902, "learning_rate": 6.6988995743549516e-06, "loss": 0.10454273223876953, "step": 3110 }, { "epoch": 0.43349822336793703, "grad_norm": 0.7905516028404236, "learning_rate": 6.696682818925074e-06, "loss": 0.0790252685546875, "step": 3111 }, { "epoch": 0.4336375670591514, "grad_norm": 0.9773372411727905, "learning_rate": 6.694465686531011e-06, "loss": 0.08688163757324219, "step": 3112 }, { "epoch": 0.4337769107503658, "grad_norm": 1.637200951576233, "learning_rate": 6.692248177665357e-06, "loss": 0.12139415740966797, "step": 3113 }, { "epoch": 0.43391625444158016, "grad_norm": 0.5455917716026306, "learning_rate": 6.690030292820792e-06, "loss": 0.06365013122558594, "step": 3114 }, { "epoch": 0.43405559813279454, "grad_norm": 1.6208388805389404, "learning_rate": 6.687812032490081e-06, "loss": 0.10938644409179688, "step": 3115 }, { "epoch": 0.4341949418240089, "grad_norm": 0.8771485090255737, "learning_rate": 6.685593397166069e-06, "loss": 0.09677314758300781, "step": 3116 }, { "epoch": 0.4343342855152233, "grad_norm": 0.42537274956703186, "learning_rate": 6.683374387341688e-06, "loss": 0.06628036499023438, "step": 3117 }, { "epoch": 0.4344736292064377, "grad_norm": 0.5151576399803162, "learning_rate": 6.681155003509949e-06, "loss": 0.07762718200683594, "step": 3118 }, { "epoch": 0.43461297289765205, "grad_norm": 1.006239652633667, "learning_rate": 6.67893524616395e-06, "loss": 0.1317424774169922, "step": 3119 }, { "epoch": 0.4347523165888664, "grad_norm": 0.7229102849960327, "learning_rate": 6.67671511579687e-06, "loss": 0.09320926666259766, "step": 3120 }, { "epoch": 0.4348916602800808, "grad_norm": 0.9252650141716003, "learning_rate": 6.67449461290197e-06, "loss": 0.10548591613769531, "step": 3121 }, { "epoch": 0.4350310039712952, "grad_norm": 0.4287383258342743, "learning_rate": 6.6722737379726e-06, "loss": 0.06416511535644531, "step": 3122 }, { "epoch": 0.43517034766250956, "grad_norm": 1.2376949787139893, "learning_rate": 6.670052491502182e-06, "loss": 0.10236358642578125, "step": 3123 }, { "epoch": 0.43530969135372394, "grad_norm": 1.2750616073608398, "learning_rate": 6.667830873984228e-06, "loss": 0.11559486389160156, "step": 3124 }, { "epoch": 0.4354490350449383, "grad_norm": 1.589106798171997, "learning_rate": 6.66560888591233e-06, "loss": 0.10345649719238281, "step": 3125 }, { "epoch": 0.43558837873615275, "grad_norm": 1.0013779401779175, "learning_rate": 6.663386527780166e-06, "loss": 0.10259056091308594, "step": 3126 }, { "epoch": 0.4357277224273671, "grad_norm": 0.8202335238456726, "learning_rate": 6.66116380008149e-06, "loss": 0.08813190460205078, "step": 3127 }, { "epoch": 0.4358670661185815, "grad_norm": 3.1896095275878906, "learning_rate": 6.6589407033101435e-06, "loss": 0.14628219604492188, "step": 3128 }, { "epoch": 0.4360064098097959, "grad_norm": 0.827866792678833, "learning_rate": 6.656717237960047e-06, "loss": 0.069427490234375, "step": 3129 }, { "epoch": 0.43614575350101026, "grad_norm": 0.703829824924469, "learning_rate": 6.654493404525204e-06, "loss": 0.08101463317871094, "step": 3130 }, { "epoch": 0.43628509719222464, "grad_norm": 1.2512580156326294, "learning_rate": 6.652269203499699e-06, "loss": 0.08588218688964844, "step": 3131 }, { "epoch": 0.436424440883439, "grad_norm": 0.7809702157974243, "learning_rate": 6.650044635377698e-06, "loss": 0.08693599700927734, "step": 3132 }, { "epoch": 0.4365637845746534, "grad_norm": 1.5217267274856567, "learning_rate": 6.64781970065345e-06, "loss": 0.09298324584960938, "step": 3133 }, { "epoch": 0.43670312826586777, "grad_norm": 0.7776961326599121, "learning_rate": 6.645594399821286e-06, "loss": 0.06733322143554688, "step": 3134 }, { "epoch": 0.43684247195708215, "grad_norm": 0.6390858292579651, "learning_rate": 6.6433687333756165e-06, "loss": 0.0709371566772461, "step": 3135 }, { "epoch": 0.4369818156482965, "grad_norm": 0.4366070330142975, "learning_rate": 6.641142701810932e-06, "loss": 0.06607818603515625, "step": 3136 }, { "epoch": 0.4371211593395109, "grad_norm": 0.49680668115615845, "learning_rate": 6.638916305621807e-06, "loss": 0.0898284912109375, "step": 3137 }, { "epoch": 0.4372605030307253, "grad_norm": 1.1609547138214111, "learning_rate": 6.636689545302898e-06, "loss": 0.10903739929199219, "step": 3138 }, { "epoch": 0.43739984672193966, "grad_norm": 0.9063841104507446, "learning_rate": 6.634462421348935e-06, "loss": 0.10341072082519531, "step": 3139 }, { "epoch": 0.43753919041315403, "grad_norm": 1.4966557025909424, "learning_rate": 6.63223493425474e-06, "loss": 0.14720535278320312, "step": 3140 }, { "epoch": 0.4376785341043684, "grad_norm": 0.847554087638855, "learning_rate": 6.630007084515205e-06, "loss": 0.08963584899902344, "step": 3141 }, { "epoch": 0.4378178777955828, "grad_norm": 0.46136003732681274, "learning_rate": 6.627778872625311e-06, "loss": 0.07761764526367188, "step": 3142 }, { "epoch": 0.43795722148679717, "grad_norm": 1.8665621280670166, "learning_rate": 6.625550299080115e-06, "loss": 0.10807228088378906, "step": 3143 }, { "epoch": 0.43809656517801154, "grad_norm": 0.32096028327941895, "learning_rate": 6.6233213643747525e-06, "loss": 0.05603218078613281, "step": 3144 }, { "epoch": 0.4382359088692259, "grad_norm": 0.8072521090507507, "learning_rate": 6.621092069004445e-06, "loss": 0.09607887268066406, "step": 3145 }, { "epoch": 0.4383752525604403, "grad_norm": 0.4516966640949249, "learning_rate": 6.618862413464491e-06, "loss": 0.07666397094726562, "step": 3146 }, { "epoch": 0.43851459625165473, "grad_norm": 1.2214245796203613, "learning_rate": 6.616632398250266e-06, "loss": 0.1106414794921875, "step": 3147 }, { "epoch": 0.4386539399428691, "grad_norm": 0.8242385387420654, "learning_rate": 6.614402023857231e-06, "loss": 0.08354949951171875, "step": 3148 }, { "epoch": 0.4387932836340835, "grad_norm": 0.6650422215461731, "learning_rate": 6.612171290780925e-06, "loss": 0.0778045654296875, "step": 3149 }, { "epoch": 0.43893262732529786, "grad_norm": 0.9295251965522766, "learning_rate": 6.6099401995169635e-06, "loss": 0.09825706481933594, "step": 3150 }, { "epoch": 0.43907197101651224, "grad_norm": 1.1801426410675049, "learning_rate": 6.607708750561046e-06, "loss": 0.10796165466308594, "step": 3151 }, { "epoch": 0.4392113147077266, "grad_norm": 0.709770679473877, "learning_rate": 6.605476944408948e-06, "loss": 0.08896636962890625, "step": 3152 }, { "epoch": 0.439350658398941, "grad_norm": 1.7602611780166626, "learning_rate": 6.603244781556527e-06, "loss": 0.12056350708007812, "step": 3153 }, { "epoch": 0.4394900020901554, "grad_norm": 1.246283769607544, "learning_rate": 6.601012262499718e-06, "loss": 0.11364173889160156, "step": 3154 }, { "epoch": 0.43962934578136975, "grad_norm": 1.190759301185608, "learning_rate": 6.598779387734535e-06, "loss": 0.14255905151367188, "step": 3155 }, { "epoch": 0.43976868947258413, "grad_norm": 1.46743643283844, "learning_rate": 6.596546157757075e-06, "loss": 0.10435295104980469, "step": 3156 }, { "epoch": 0.4399080331637985, "grad_norm": 0.9813275933265686, "learning_rate": 6.594312573063506e-06, "loss": 0.09832191467285156, "step": 3157 }, { "epoch": 0.4400473768550129, "grad_norm": 1.1463825702667236, "learning_rate": 6.592078634150084e-06, "loss": 0.10547447204589844, "step": 3158 }, { "epoch": 0.44018672054622726, "grad_norm": 0.8078344464302063, "learning_rate": 6.589844341513137e-06, "loss": 0.08147239685058594, "step": 3159 }, { "epoch": 0.44032606423744164, "grad_norm": 0.9010326266288757, "learning_rate": 6.587609695649073e-06, "loss": 0.06612300872802734, "step": 3160 }, { "epoch": 0.440465407928656, "grad_norm": 1.1047104597091675, "learning_rate": 6.585374697054382e-06, "loss": 0.1341075897216797, "step": 3161 }, { "epoch": 0.4406047516198704, "grad_norm": 0.7469981908798218, "learning_rate": 6.583139346225627e-06, "loss": 0.07870674133300781, "step": 3162 }, { "epoch": 0.44074409531108477, "grad_norm": 0.5715487599372864, "learning_rate": 6.580903643659453e-06, "loss": 0.07759666442871094, "step": 3163 }, { "epoch": 0.44088343900229915, "grad_norm": 0.7554451823234558, "learning_rate": 6.578667589852583e-06, "loss": 0.08979988098144531, "step": 3164 }, { "epoch": 0.4410227826935135, "grad_norm": 1.068941593170166, "learning_rate": 6.576431185301815e-06, "loss": 0.10141563415527344, "step": 3165 }, { "epoch": 0.4411621263847279, "grad_norm": 0.6923917531967163, "learning_rate": 6.574194430504027e-06, "loss": 0.08113574981689453, "step": 3166 }, { "epoch": 0.44130147007594234, "grad_norm": 0.9365276098251343, "learning_rate": 6.571957325956178e-06, "loss": 0.13990402221679688, "step": 3167 }, { "epoch": 0.4414408137671567, "grad_norm": 1.8556712865829468, "learning_rate": 6.569719872155299e-06, "loss": 0.12951087951660156, "step": 3168 }, { "epoch": 0.4415801574583711, "grad_norm": 1.208693027496338, "learning_rate": 6.567482069598503e-06, "loss": 0.09564018249511719, "step": 3169 }, { "epoch": 0.44171950114958547, "grad_norm": 1.0498473644256592, "learning_rate": 6.565243918782975e-06, "loss": 0.1133575439453125, "step": 3170 }, { "epoch": 0.44185884484079985, "grad_norm": 0.5760064125061035, "learning_rate": 6.563005420205984e-06, "loss": 0.06515216827392578, "step": 3171 }, { "epoch": 0.4419981885320142, "grad_norm": 0.861977219581604, "learning_rate": 6.560766574364874e-06, "loss": 0.08542442321777344, "step": 3172 }, { "epoch": 0.4421375322232286, "grad_norm": 1.0428144931793213, "learning_rate": 6.558527381757063e-06, "loss": 0.12207794189453125, "step": 3173 }, { "epoch": 0.442276875914443, "grad_norm": 0.6635869145393372, "learning_rate": 6.55628784288005e-06, "loss": 0.10173606872558594, "step": 3174 }, { "epoch": 0.44241621960565736, "grad_norm": 0.7191823124885559, "learning_rate": 6.5540479582314085e-06, "loss": 0.09441184997558594, "step": 3175 }, { "epoch": 0.44255556329687173, "grad_norm": 0.5570213198661804, "learning_rate": 6.55180772830879e-06, "loss": 0.06316757202148438, "step": 3176 }, { "epoch": 0.4426949069880861, "grad_norm": 1.2874747514724731, "learning_rate": 6.5495671536099235e-06, "loss": 0.08115959167480469, "step": 3177 }, { "epoch": 0.4428342506793005, "grad_norm": 0.5817548036575317, "learning_rate": 6.5473262346326125e-06, "loss": 0.09474754333496094, "step": 3178 }, { "epoch": 0.44297359437051487, "grad_norm": 0.7975959777832031, "learning_rate": 6.545084971874738e-06, "loss": 0.10606193542480469, "step": 3179 }, { "epoch": 0.44311293806172924, "grad_norm": 1.0993367433547974, "learning_rate": 6.542843365834257e-06, "loss": 0.12380218505859375, "step": 3180 }, { "epoch": 0.4432522817529436, "grad_norm": 0.48438677191734314, "learning_rate": 6.540601417009205e-06, "loss": 0.061981201171875, "step": 3181 }, { "epoch": 0.443391625444158, "grad_norm": 0.8917725086212158, "learning_rate": 6.538359125897691e-06, "loss": 0.11500263214111328, "step": 3182 }, { "epoch": 0.4435309691353724, "grad_norm": 1.0732979774475098, "learning_rate": 6.536116492997899e-06, "loss": 0.10655975341796875, "step": 3183 }, { "epoch": 0.44367031282658675, "grad_norm": 1.1453241109848022, "learning_rate": 6.5338735188080916e-06, "loss": 0.12381362915039062, "step": 3184 }, { "epoch": 0.44380965651780113, "grad_norm": 0.8013611435890198, "learning_rate": 6.53163020382661e-06, "loss": 0.10307502746582031, "step": 3185 }, { "epoch": 0.4439490002090155, "grad_norm": 0.9108036160469055, "learning_rate": 6.529386548551864e-06, "loss": 0.09304618835449219, "step": 3186 }, { "epoch": 0.44408834390022994, "grad_norm": 0.908025324344635, "learning_rate": 6.5271425534823415e-06, "loss": 0.07575225830078125, "step": 3187 }, { "epoch": 0.4442276875914443, "grad_norm": 0.3900138735771179, "learning_rate": 6.524898219116612e-06, "loss": 0.06712913513183594, "step": 3188 }, { "epoch": 0.4443670312826587, "grad_norm": 0.9620790481567383, "learning_rate": 6.522653545953309e-06, "loss": 0.09563636779785156, "step": 3189 }, { "epoch": 0.4445063749738731, "grad_norm": 0.5524144768714905, "learning_rate": 6.520408534491154e-06, "loss": 0.07597541809082031, "step": 3190 }, { "epoch": 0.44464571866508745, "grad_norm": 1.0062233209609985, "learning_rate": 6.518163185228932e-06, "loss": 0.10425186157226562, "step": 3191 }, { "epoch": 0.44478506235630183, "grad_norm": 1.1867729425430298, "learning_rate": 6.515917498665511e-06, "loss": 0.17680740356445312, "step": 3192 }, { "epoch": 0.4449244060475162, "grad_norm": 1.0034815073013306, "learning_rate": 6.51367147529983e-06, "loss": 0.08327865600585938, "step": 3193 }, { "epoch": 0.4450637497387306, "grad_norm": 0.5423995852470398, "learning_rate": 6.511425115630906e-06, "loss": 0.08684158325195312, "step": 3194 }, { "epoch": 0.44520309342994496, "grad_norm": 0.699979841709137, "learning_rate": 6.509178420157828e-06, "loss": 0.07283782958984375, "step": 3195 }, { "epoch": 0.44534243712115934, "grad_norm": 0.7273909449577332, "learning_rate": 6.506931389379759e-06, "loss": 0.09296417236328125, "step": 3196 }, { "epoch": 0.4454817808123737, "grad_norm": 0.7457610368728638, "learning_rate": 6.50468402379594e-06, "loss": 0.10185050964355469, "step": 3197 }, { "epoch": 0.4456211245035881, "grad_norm": 1.335027813911438, "learning_rate": 6.502436323905683e-06, "loss": 0.12169075012207031, "step": 3198 }, { "epoch": 0.4457604681948025, "grad_norm": 1.671728491783142, "learning_rate": 6.500188290208377e-06, "loss": 0.07667922973632812, "step": 3199 }, { "epoch": 0.44589981188601685, "grad_norm": 0.48090919852256775, "learning_rate": 6.49793992320348e-06, "loss": 0.09273338317871094, "step": 3200 }, { "epoch": 0.44603915557723123, "grad_norm": 1.205275535583496, "learning_rate": 6.495691223390534e-06, "loss": 0.11070632934570312, "step": 3201 }, { "epoch": 0.4461784992684456, "grad_norm": 0.65388423204422, "learning_rate": 6.4934421912691445e-06, "loss": 0.08406829833984375, "step": 3202 }, { "epoch": 0.44631784295966, "grad_norm": 1.2798669338226318, "learning_rate": 6.4911928273389946e-06, "loss": 0.13627243041992188, "step": 3203 }, { "epoch": 0.44645718665087436, "grad_norm": 0.5946604609489441, "learning_rate": 6.488943132099845e-06, "loss": 0.07951164245605469, "step": 3204 }, { "epoch": 0.44659653034208874, "grad_norm": 0.7097824811935425, "learning_rate": 6.486693106051523e-06, "loss": 0.11686134338378906, "step": 3205 }, { "epoch": 0.4467358740333031, "grad_norm": 0.6512244939804077, "learning_rate": 6.484442749693935e-06, "loss": 0.09120464324951172, "step": 3206 }, { "epoch": 0.44687521772451755, "grad_norm": 0.9155833125114441, "learning_rate": 6.482192063527058e-06, "loss": 0.10842514038085938, "step": 3207 }, { "epoch": 0.4470145614157319, "grad_norm": 0.8672296404838562, "learning_rate": 6.479941048050944e-06, "loss": 0.09071922302246094, "step": 3208 }, { "epoch": 0.4471539051069463, "grad_norm": 0.8147873878479004, "learning_rate": 6.477689703765717e-06, "loss": 0.10029792785644531, "step": 3209 }, { "epoch": 0.4472932487981607, "grad_norm": 0.44574180245399475, "learning_rate": 6.475438031171574e-06, "loss": 0.06601333618164062, "step": 3210 }, { "epoch": 0.44743259248937506, "grad_norm": 0.9721744060516357, "learning_rate": 6.4731860307687845e-06, "loss": 0.09491539001464844, "step": 3211 }, { "epoch": 0.44757193618058944, "grad_norm": 0.7705510258674622, "learning_rate": 6.470933703057693e-06, "loss": 0.10207557678222656, "step": 3212 }, { "epoch": 0.4477112798718038, "grad_norm": 0.6407455801963806, "learning_rate": 6.468681048538715e-06, "loss": 0.08218574523925781, "step": 3213 }, { "epoch": 0.4478506235630182, "grad_norm": 1.1844524145126343, "learning_rate": 6.4664280677123385e-06, "loss": 0.1300048828125, "step": 3214 }, { "epoch": 0.44798996725423257, "grad_norm": 0.6376287937164307, "learning_rate": 6.464174761079124e-06, "loss": 0.0853271484375, "step": 3215 }, { "epoch": 0.44812931094544695, "grad_norm": 1.1249395608901978, "learning_rate": 6.461921129139704e-06, "loss": 0.12227630615234375, "step": 3216 }, { "epoch": 0.4482686546366613, "grad_norm": 1.1849538087844849, "learning_rate": 6.459667172394788e-06, "loss": 0.08713817596435547, "step": 3217 }, { "epoch": 0.4484079983278757, "grad_norm": 0.9773209691047668, "learning_rate": 6.4574128913451495e-06, "loss": 0.1367340087890625, "step": 3218 }, { "epoch": 0.4485473420190901, "grad_norm": 2.028251886367798, "learning_rate": 6.455158286491641e-06, "loss": 0.16338348388671875, "step": 3219 }, { "epoch": 0.44868668571030446, "grad_norm": 1.1879178285598755, "learning_rate": 6.452903358335182e-06, "loss": 0.11079025268554688, "step": 3220 }, { "epoch": 0.44882602940151883, "grad_norm": 2.2423527240753174, "learning_rate": 6.450648107376767e-06, "loss": 0.16477584838867188, "step": 3221 }, { "epoch": 0.4489653730927332, "grad_norm": 0.8465657234191895, "learning_rate": 6.4483925341174625e-06, "loss": 0.08568859100341797, "step": 3222 }, { "epoch": 0.4491047167839476, "grad_norm": 1.1600370407104492, "learning_rate": 6.4461366390584025e-06, "loss": 0.10888290405273438, "step": 3223 }, { "epoch": 0.44924406047516197, "grad_norm": 0.7692615985870361, "learning_rate": 6.443880422700799e-06, "loss": 0.0902872085571289, "step": 3224 }, { "epoch": 0.44938340416637634, "grad_norm": 0.6210271716117859, "learning_rate": 6.441623885545929e-06, "loss": 0.069366455078125, "step": 3225 }, { "epoch": 0.4495227478575907, "grad_norm": 1.1062285900115967, "learning_rate": 6.439367028095145e-06, "loss": 0.10869979858398438, "step": 3226 }, { "epoch": 0.44966209154880515, "grad_norm": 0.6880283355712891, "learning_rate": 6.437109850849868e-06, "loss": 0.08829116821289062, "step": 3227 }, { "epoch": 0.44980143524001953, "grad_norm": 1.242253065109253, "learning_rate": 6.434852354311592e-06, "loss": 0.11919593811035156, "step": 3228 }, { "epoch": 0.4499407789312339, "grad_norm": 1.1200443506240845, "learning_rate": 6.432594538981881e-06, "loss": 0.11123371124267578, "step": 3229 }, { "epoch": 0.4500801226224483, "grad_norm": 1.3194098472595215, "learning_rate": 6.430336405362371e-06, "loss": 0.14660263061523438, "step": 3230 }, { "epoch": 0.45021946631366266, "grad_norm": 0.9247398376464844, "learning_rate": 6.428077953954766e-06, "loss": 0.08646392822265625, "step": 3231 }, { "epoch": 0.45035881000487704, "grad_norm": 0.5438258647918701, "learning_rate": 6.425819185260842e-06, "loss": 0.08541011810302734, "step": 3232 }, { "epoch": 0.4504981536960914, "grad_norm": 1.7231941223144531, "learning_rate": 6.42356009978245e-06, "loss": 0.15473556518554688, "step": 3233 }, { "epoch": 0.4506374973873058, "grad_norm": 1.1912317276000977, "learning_rate": 6.421300698021502e-06, "loss": 0.09359455108642578, "step": 3234 }, { "epoch": 0.4507768410785202, "grad_norm": 1.0143240690231323, "learning_rate": 6.419040980479989e-06, "loss": 0.09100914001464844, "step": 3235 }, { "epoch": 0.45091618476973455, "grad_norm": 1.4794694185256958, "learning_rate": 6.416780947659967e-06, "loss": 0.11746025085449219, "step": 3236 }, { "epoch": 0.45105552846094893, "grad_norm": 3.5227863788604736, "learning_rate": 6.4145206000635626e-06, "loss": 0.14813232421875, "step": 3237 }, { "epoch": 0.4511948721521633, "grad_norm": 0.5386263132095337, "learning_rate": 6.412259938192978e-06, "loss": 0.068206787109375, "step": 3238 }, { "epoch": 0.4513342158433777, "grad_norm": 0.9899174571037292, "learning_rate": 6.4099989625504756e-06, "loss": 0.11128044128417969, "step": 3239 }, { "epoch": 0.45147355953459206, "grad_norm": 0.6064162254333496, "learning_rate": 6.4077376736383954e-06, "loss": 0.08326148986816406, "step": 3240 }, { "epoch": 0.45161290322580644, "grad_norm": 0.8058703541755676, "learning_rate": 6.405476071959142e-06, "loss": 0.1270275115966797, "step": 3241 }, { "epoch": 0.4517522469170208, "grad_norm": 0.49748802185058594, "learning_rate": 6.403214158015194e-06, "loss": 0.07755565643310547, "step": 3242 }, { "epoch": 0.4518915906082352, "grad_norm": 1.1110981702804565, "learning_rate": 6.400951932309097e-06, "loss": 0.11175727844238281, "step": 3243 }, { "epoch": 0.45203093429944957, "grad_norm": 1.0712512731552124, "learning_rate": 6.3986893953434625e-06, "loss": 0.09116935729980469, "step": 3244 }, { "epoch": 0.45217027799066395, "grad_norm": 1.062434434890747, "learning_rate": 6.396426547620979e-06, "loss": 0.10681915283203125, "step": 3245 }, { "epoch": 0.4523096216818783, "grad_norm": 0.7274386882781982, "learning_rate": 6.394163389644397e-06, "loss": 0.11203575134277344, "step": 3246 }, { "epoch": 0.45244896537309276, "grad_norm": 0.6192157864570618, "learning_rate": 6.391899921916538e-06, "loss": 0.09506034851074219, "step": 3247 }, { "epoch": 0.45258830906430714, "grad_norm": 0.7967462539672852, "learning_rate": 6.389636144940294e-06, "loss": 0.08646202087402344, "step": 3248 }, { "epoch": 0.4527276527555215, "grad_norm": 0.818759560585022, "learning_rate": 6.387372059218626e-06, "loss": 0.07726669311523438, "step": 3249 }, { "epoch": 0.4528669964467359, "grad_norm": 1.8085963726043701, "learning_rate": 6.38510766525456e-06, "loss": 0.11324834823608398, "step": 3250 }, { "epoch": 0.45300634013795027, "grad_norm": 0.876104474067688, "learning_rate": 6.382842963551193e-06, "loss": 0.08128929138183594, "step": 3251 }, { "epoch": 0.45314568382916465, "grad_norm": 1.073166012763977, "learning_rate": 6.380577954611691e-06, "loss": 0.0922079086303711, "step": 3252 }, { "epoch": 0.453285027520379, "grad_norm": 1.650226354598999, "learning_rate": 6.378312638939286e-06, "loss": 0.1120452880859375, "step": 3253 }, { "epoch": 0.4534243712115934, "grad_norm": 1.5335313081741333, "learning_rate": 6.3760470170372815e-06, "loss": 0.17691516876220703, "step": 3254 }, { "epoch": 0.4535637149028078, "grad_norm": 0.6402930021286011, "learning_rate": 6.373781089409043e-06, "loss": 0.07904434204101562, "step": 3255 }, { "epoch": 0.45370305859402216, "grad_norm": 0.8184432983398438, "learning_rate": 6.371514856558013e-06, "loss": 0.09550762176513672, "step": 3256 }, { "epoch": 0.45384240228523653, "grad_norm": 0.8708903789520264, "learning_rate": 6.369248318987692e-06, "loss": 0.1283245086669922, "step": 3257 }, { "epoch": 0.4539817459764509, "grad_norm": 0.9069610238075256, "learning_rate": 6.3669814772016555e-06, "loss": 0.10837936401367188, "step": 3258 }, { "epoch": 0.4541210896676653, "grad_norm": 0.6853559613227844, "learning_rate": 6.3647143317035445e-06, "loss": 0.07577991485595703, "step": 3259 }, { "epoch": 0.45426043335887967, "grad_norm": 0.6762079000473022, "learning_rate": 6.362446882997064e-06, "loss": 0.10235786437988281, "step": 3260 }, { "epoch": 0.45439977705009404, "grad_norm": 0.7702606320381165, "learning_rate": 6.360179131585993e-06, "loss": 0.08069801330566406, "step": 3261 }, { "epoch": 0.4545391207413084, "grad_norm": 0.7308446168899536, "learning_rate": 6.357911077974173e-06, "loss": 0.1045379638671875, "step": 3262 }, { "epoch": 0.4546784644325228, "grad_norm": 0.7523708343505859, "learning_rate": 6.355642722665512e-06, "loss": 0.11104965209960938, "step": 3263 }, { "epoch": 0.4548178081237372, "grad_norm": 1.4470324516296387, "learning_rate": 6.353374066163988e-06, "loss": 0.11575889587402344, "step": 3264 }, { "epoch": 0.45495715181495155, "grad_norm": 2.1389241218566895, "learning_rate": 6.351105108973644e-06, "loss": 0.15002822875976562, "step": 3265 }, { "epoch": 0.45509649550616593, "grad_norm": 0.6525812745094299, "learning_rate": 6.34883585159859e-06, "loss": 0.0867767333984375, "step": 3266 }, { "epoch": 0.45523583919738037, "grad_norm": 1.137678623199463, "learning_rate": 6.346566294543008e-06, "loss": 0.08702278137207031, "step": 3267 }, { "epoch": 0.45537518288859474, "grad_norm": 1.4111932516098022, "learning_rate": 6.344296438311134e-06, "loss": 0.1409626007080078, "step": 3268 }, { "epoch": 0.4555145265798091, "grad_norm": 1.3777865171432495, "learning_rate": 6.342026283407286e-06, "loss": 0.10697174072265625, "step": 3269 }, { "epoch": 0.4556538702710235, "grad_norm": 0.7448800802230835, "learning_rate": 6.339755830335834e-06, "loss": 0.0782928466796875, "step": 3270 }, { "epoch": 0.4557932139622379, "grad_norm": 0.7120360136032104, "learning_rate": 6.337485079601224e-06, "loss": 0.060367584228515625, "step": 3271 }, { "epoch": 0.45593255765345225, "grad_norm": 3.045095205307007, "learning_rate": 6.335214031707966e-06, "loss": 0.10266304016113281, "step": 3272 }, { "epoch": 0.45607190134466663, "grad_norm": 2.3272218704223633, "learning_rate": 6.332942687160632e-06, "loss": 0.09620285034179688, "step": 3273 }, { "epoch": 0.456211245035881, "grad_norm": 1.571864128112793, "learning_rate": 6.3306710464638645e-06, "loss": 0.14051437377929688, "step": 3274 }, { "epoch": 0.4563505887270954, "grad_norm": 0.7774268388748169, "learning_rate": 6.328399110122371e-06, "loss": 0.09914398193359375, "step": 3275 }, { "epoch": 0.45648993241830976, "grad_norm": 0.6872128844261169, "learning_rate": 6.3261268786409225e-06, "loss": 0.08663177490234375, "step": 3276 }, { "epoch": 0.45662927610952414, "grad_norm": 0.8142517805099487, "learning_rate": 6.323854352524359e-06, "loss": 0.07268142700195312, "step": 3277 }, { "epoch": 0.4567686198007385, "grad_norm": 1.398407220840454, "learning_rate": 6.321581532277581e-06, "loss": 0.12692832946777344, "step": 3278 }, { "epoch": 0.4569079634919529, "grad_norm": 1.138433814048767, "learning_rate": 6.319308418405559e-06, "loss": 0.11179733276367188, "step": 3279 }, { "epoch": 0.4570473071831673, "grad_norm": 1.1890497207641602, "learning_rate": 6.317035011413327e-06, "loss": 0.09960746765136719, "step": 3280 }, { "epoch": 0.45718665087438165, "grad_norm": 0.5739812254905701, "learning_rate": 6.314761311805983e-06, "loss": 0.0789794921875, "step": 3281 }, { "epoch": 0.45732599456559603, "grad_norm": 1.4107930660247803, "learning_rate": 6.312487320088693e-06, "loss": 0.09980106353759766, "step": 3282 }, { "epoch": 0.4574653382568104, "grad_norm": 0.729939341545105, "learning_rate": 6.3102130367666855e-06, "loss": 0.10198116302490234, "step": 3283 }, { "epoch": 0.4576046819480248, "grad_norm": 1.7780829668045044, "learning_rate": 6.307938462345253e-06, "loss": 0.13692378997802734, "step": 3284 }, { "epoch": 0.45774402563923916, "grad_norm": 0.6377604603767395, "learning_rate": 6.305663597329756e-06, "loss": 0.12047958374023438, "step": 3285 }, { "epoch": 0.45788336933045354, "grad_norm": 0.6440433263778687, "learning_rate": 6.303388442225616e-06, "loss": 0.10817909240722656, "step": 3286 }, { "epoch": 0.45802271302166797, "grad_norm": 0.5347619652748108, "learning_rate": 6.30111299753832e-06, "loss": 0.08347892761230469, "step": 3287 }, { "epoch": 0.45816205671288235, "grad_norm": 0.5218093991279602, "learning_rate": 6.298837263773423e-06, "loss": 0.06805419921875, "step": 3288 }, { "epoch": 0.4583014004040967, "grad_norm": 0.6652559041976929, "learning_rate": 6.2965612414365365e-06, "loss": 0.07275390625, "step": 3289 }, { "epoch": 0.4584407440953111, "grad_norm": 0.3125375509262085, "learning_rate": 6.294284931033344e-06, "loss": 0.05499076843261719, "step": 3290 }, { "epoch": 0.4585800877865255, "grad_norm": 0.9514482617378235, "learning_rate": 6.292008333069589e-06, "loss": 0.11029624938964844, "step": 3291 }, { "epoch": 0.45871943147773986, "grad_norm": 0.5040321946144104, "learning_rate": 6.289731448051079e-06, "loss": 0.07922172546386719, "step": 3292 }, { "epoch": 0.45885877516895424, "grad_norm": 0.6858593225479126, "learning_rate": 6.287454276483687e-06, "loss": 0.08736991882324219, "step": 3293 }, { "epoch": 0.4589981188601686, "grad_norm": 0.7865546345710754, "learning_rate": 6.2851768188733485e-06, "loss": 0.11589813232421875, "step": 3294 }, { "epoch": 0.459137462551383, "grad_norm": 0.7426467537879944, "learning_rate": 6.282899075726061e-06, "loss": 0.08817100524902344, "step": 3295 }, { "epoch": 0.45927680624259737, "grad_norm": 0.5311557054519653, "learning_rate": 6.280621047547888e-06, "loss": 0.07881355285644531, "step": 3296 }, { "epoch": 0.45941614993381175, "grad_norm": 1.2642289400100708, "learning_rate": 6.278342734844955e-06, "loss": 0.10471153259277344, "step": 3297 }, { "epoch": 0.4595554936250261, "grad_norm": 0.6198820471763611, "learning_rate": 6.276064138123453e-06, "loss": 0.08771324157714844, "step": 3298 }, { "epoch": 0.4596948373162405, "grad_norm": 0.9296325445175171, "learning_rate": 6.27378525788963e-06, "loss": 0.09330558776855469, "step": 3299 }, { "epoch": 0.4598341810074549, "grad_norm": 1.4222092628479004, "learning_rate": 6.271506094649804e-06, "loss": 0.11575889587402344, "step": 3300 }, { "epoch": 0.45997352469866926, "grad_norm": 0.5179286003112793, "learning_rate": 6.269226648910356e-06, "loss": 0.06936359405517578, "step": 3301 }, { "epoch": 0.46011286838988363, "grad_norm": 1.6509913206100464, "learning_rate": 6.266946921177721e-06, "loss": 0.10863113403320312, "step": 3302 }, { "epoch": 0.460252212081098, "grad_norm": 0.7278928756713867, "learning_rate": 6.264666911958404e-06, "loss": 0.09373855590820312, "step": 3303 }, { "epoch": 0.4603915557723124, "grad_norm": 0.9477945566177368, "learning_rate": 6.262386621758975e-06, "loss": 0.08136749267578125, "step": 3304 }, { "epoch": 0.46053089946352677, "grad_norm": 1.2100499868392944, "learning_rate": 6.2601060510860565e-06, "loss": 0.10134696960449219, "step": 3305 }, { "epoch": 0.46067024315474114, "grad_norm": 0.9044459462165833, "learning_rate": 6.2578252004463436e-06, "loss": 0.09594535827636719, "step": 3306 }, { "epoch": 0.4608095868459556, "grad_norm": 0.5844743251800537, "learning_rate": 6.255544070346588e-06, "loss": 0.07663917541503906, "step": 3307 }, { "epoch": 0.46094893053716995, "grad_norm": 0.8406086564064026, "learning_rate": 6.2532626612936035e-06, "loss": 0.08928680419921875, "step": 3308 }, { "epoch": 0.46108827422838433, "grad_norm": 1.6368426084518433, "learning_rate": 6.250980973794268e-06, "loss": 0.10560035705566406, "step": 3309 }, { "epoch": 0.4612276179195987, "grad_norm": 1.0517981052398682, "learning_rate": 6.248699008355522e-06, "loss": 0.11008834838867188, "step": 3310 }, { "epoch": 0.4613669616108131, "grad_norm": 0.5880938172340393, "learning_rate": 6.2464167654843645e-06, "loss": 0.0802011489868164, "step": 3311 }, { "epoch": 0.46150630530202746, "grad_norm": 0.890955924987793, "learning_rate": 6.2441342456878565e-06, "loss": 0.12262105941772461, "step": 3312 }, { "epoch": 0.46164564899324184, "grad_norm": 0.9625645279884338, "learning_rate": 6.2418514494731245e-06, "loss": 0.08122825622558594, "step": 3313 }, { "epoch": 0.4617849926844562, "grad_norm": 1.0509727001190186, "learning_rate": 6.239568377347352e-06, "loss": 0.11742258071899414, "step": 3314 }, { "epoch": 0.4619243363756706, "grad_norm": 0.7277135848999023, "learning_rate": 6.237285029817786e-06, "loss": 0.0994415283203125, "step": 3315 }, { "epoch": 0.462063680066885, "grad_norm": 0.6444675922393799, "learning_rate": 6.235001407391732e-06, "loss": 0.08909320831298828, "step": 3316 }, { "epoch": 0.46220302375809935, "grad_norm": 1.1213346719741821, "learning_rate": 6.232717510576563e-06, "loss": 0.08488273620605469, "step": 3317 }, { "epoch": 0.46234236744931373, "grad_norm": 2.8661177158355713, "learning_rate": 6.230433339879706e-06, "loss": 0.11906623840332031, "step": 3318 }, { "epoch": 0.4624817111405281, "grad_norm": 0.7088794708251953, "learning_rate": 6.228148895808652e-06, "loss": 0.0831003189086914, "step": 3319 }, { "epoch": 0.4626210548317425, "grad_norm": 0.9400304555892944, "learning_rate": 6.225864178870954e-06, "loss": 0.10994148254394531, "step": 3320 }, { "epoch": 0.46276039852295686, "grad_norm": 0.6214890480041504, "learning_rate": 6.22357918957422e-06, "loss": 0.06322431564331055, "step": 3321 }, { "epoch": 0.46289974221417124, "grad_norm": 0.8164837956428528, "learning_rate": 6.221293928426128e-06, "loss": 0.08365631103515625, "step": 3322 }, { "epoch": 0.4630390859053856, "grad_norm": 0.8903563618659973, "learning_rate": 6.219008395934405e-06, "loss": 0.12766647338867188, "step": 3323 }, { "epoch": 0.4631784295966, "grad_norm": 1.0983550548553467, "learning_rate": 6.216722592606847e-06, "loss": 0.10289859771728516, "step": 3324 }, { "epoch": 0.46331777328781437, "grad_norm": 0.2958800792694092, "learning_rate": 6.214436518951308e-06, "loss": 0.05912017822265625, "step": 3325 }, { "epoch": 0.46345711697902875, "grad_norm": 2.043830394744873, "learning_rate": 6.212150175475701e-06, "loss": 0.1232147216796875, "step": 3326 }, { "epoch": 0.4635964606702432, "grad_norm": 1.2812793254852295, "learning_rate": 6.209863562687998e-06, "loss": 0.10471343994140625, "step": 3327 }, { "epoch": 0.46373580436145756, "grad_norm": 0.7407235503196716, "learning_rate": 6.207576681096233e-06, "loss": 0.08704566955566406, "step": 3328 }, { "epoch": 0.46387514805267194, "grad_norm": 0.6696612238883972, "learning_rate": 6.2052895312085e-06, "loss": 0.10296154022216797, "step": 3329 }, { "epoch": 0.4640144917438863, "grad_norm": 0.49303409457206726, "learning_rate": 6.203002113532949e-06, "loss": 0.08372116088867188, "step": 3330 }, { "epoch": 0.4641538354351007, "grad_norm": 0.7524163126945496, "learning_rate": 6.200714428577794e-06, "loss": 0.08034515380859375, "step": 3331 }, { "epoch": 0.46429317912631507, "grad_norm": 1.0633257627487183, "learning_rate": 6.198426476851305e-06, "loss": 0.1303234100341797, "step": 3332 }, { "epoch": 0.46443252281752945, "grad_norm": 0.4773024022579193, "learning_rate": 6.196138258861815e-06, "loss": 0.07700347900390625, "step": 3333 }, { "epoch": 0.4645718665087438, "grad_norm": 0.4073560833930969, "learning_rate": 6.193849775117709e-06, "loss": 0.07451820373535156, "step": 3334 }, { "epoch": 0.4647112101999582, "grad_norm": 0.6085910201072693, "learning_rate": 6.191561026127444e-06, "loss": 0.08561515808105469, "step": 3335 }, { "epoch": 0.4648505538911726, "grad_norm": 1.4385159015655518, "learning_rate": 6.18927201239952e-06, "loss": 0.11544132232666016, "step": 3336 }, { "epoch": 0.46498989758238696, "grad_norm": 0.815643310546875, "learning_rate": 6.186982734442505e-06, "loss": 0.0904245376586914, "step": 3337 }, { "epoch": 0.46512924127360133, "grad_norm": 1.8756096363067627, "learning_rate": 6.184693192765028e-06, "loss": 0.12747859954833984, "step": 3338 }, { "epoch": 0.4652685849648157, "grad_norm": 3.6210083961486816, "learning_rate": 6.1824033878757685e-06, "loss": 0.09552764892578125, "step": 3339 }, { "epoch": 0.4654079286560301, "grad_norm": 2.1711387634277344, "learning_rate": 6.180113320283473e-06, "loss": 0.11487102508544922, "step": 3340 }, { "epoch": 0.46554727234724447, "grad_norm": 0.79695725440979, "learning_rate": 6.177822990496939e-06, "loss": 0.11280441284179688, "step": 3341 }, { "epoch": 0.46568661603845884, "grad_norm": 0.759001612663269, "learning_rate": 6.175532399025027e-06, "loss": 0.09724044799804688, "step": 3342 }, { "epoch": 0.4658259597296732, "grad_norm": 1.3809661865234375, "learning_rate": 6.173241546376654e-06, "loss": 0.11312675476074219, "step": 3343 }, { "epoch": 0.4659653034208876, "grad_norm": 1.9222506284713745, "learning_rate": 6.170950433060795e-06, "loss": 0.15031051635742188, "step": 3344 }, { "epoch": 0.466104647112102, "grad_norm": 1.1555739641189575, "learning_rate": 6.168659059586483e-06, "loss": 0.08835220336914062, "step": 3345 }, { "epoch": 0.46624399080331635, "grad_norm": 0.786288857460022, "learning_rate": 6.166367426462808e-06, "loss": 0.09444713592529297, "step": 3346 }, { "epoch": 0.46638333449453073, "grad_norm": 0.5748496055603027, "learning_rate": 6.16407553419892e-06, "loss": 0.06637096405029297, "step": 3347 }, { "epoch": 0.46652267818574517, "grad_norm": 2.4979350566864014, "learning_rate": 6.161783383304024e-06, "loss": 0.11719131469726562, "step": 3348 }, { "epoch": 0.46666202187695954, "grad_norm": 2.0591542720794678, "learning_rate": 6.159490974287386e-06, "loss": 0.0959625244140625, "step": 3349 }, { "epoch": 0.4668013655681739, "grad_norm": 1.312384009361267, "learning_rate": 6.157198307658323e-06, "loss": 0.08984565734863281, "step": 3350 }, { "epoch": 0.4669407092593883, "grad_norm": 0.7795844674110413, "learning_rate": 6.154905383926218e-06, "loss": 0.1013188362121582, "step": 3351 }, { "epoch": 0.4670800529506027, "grad_norm": 0.9728420972824097, "learning_rate": 6.152612203600502e-06, "loss": 0.1277294158935547, "step": 3352 }, { "epoch": 0.46721939664181705, "grad_norm": 0.8881888389587402, "learning_rate": 6.150318767190668e-06, "loss": 0.06570243835449219, "step": 3353 }, { "epoch": 0.46735874033303143, "grad_norm": 1.0748257637023926, "learning_rate": 6.148025075206268e-06, "loss": 0.09820365905761719, "step": 3354 }, { "epoch": 0.4674980840242458, "grad_norm": 0.6513294577598572, "learning_rate": 6.145731128156904e-06, "loss": 0.08356475830078125, "step": 3355 }, { "epoch": 0.4676374277154602, "grad_norm": 0.897659182548523, "learning_rate": 6.143436926552242e-06, "loss": 0.09108543395996094, "step": 3356 }, { "epoch": 0.46777677140667456, "grad_norm": 1.134299635887146, "learning_rate": 6.141142470902001e-06, "loss": 0.10291671752929688, "step": 3357 }, { "epoch": 0.46791611509788894, "grad_norm": 0.9833476543426514, "learning_rate": 6.138847761715955e-06, "loss": 0.0805215835571289, "step": 3358 }, { "epoch": 0.4680554587891033, "grad_norm": 2.641042470932007, "learning_rate": 6.1365527995039366e-06, "loss": 0.09748458862304688, "step": 3359 }, { "epoch": 0.4681948024803177, "grad_norm": 0.43663275241851807, "learning_rate": 6.134257584775833e-06, "loss": 0.05917835235595703, "step": 3360 }, { "epoch": 0.4683341461715321, "grad_norm": 1.6267718076705933, "learning_rate": 6.131962118041591e-06, "loss": 0.10498619079589844, "step": 3361 }, { "epoch": 0.46847348986274645, "grad_norm": 0.7493172287940979, "learning_rate": 6.129666399811209e-06, "loss": 0.07992744445800781, "step": 3362 }, { "epoch": 0.46861283355396083, "grad_norm": 1.2141354084014893, "learning_rate": 6.127370430594745e-06, "loss": 0.11040019989013672, "step": 3363 }, { "epoch": 0.4687521772451752, "grad_norm": 0.802086353302002, "learning_rate": 6.125074210902307e-06, "loss": 0.08902740478515625, "step": 3364 }, { "epoch": 0.4688915209363896, "grad_norm": 0.9919483661651611, "learning_rate": 6.122777741244067e-06, "loss": 0.13258743286132812, "step": 3365 }, { "epoch": 0.46903086462760396, "grad_norm": 1.225511908531189, "learning_rate": 6.120481022130245e-06, "loss": 0.12927818298339844, "step": 3366 }, { "epoch": 0.46917020831881834, "grad_norm": 0.6012612581253052, "learning_rate": 6.118184054071124e-06, "loss": 0.06964302062988281, "step": 3367 }, { "epoch": 0.46930955201003277, "grad_norm": 0.7194647192955017, "learning_rate": 6.115886837577031e-06, "loss": 0.07234382629394531, "step": 3368 }, { "epoch": 0.46944889570124715, "grad_norm": 0.8274115324020386, "learning_rate": 6.113589373158361e-06, "loss": 0.0989065170288086, "step": 3369 }, { "epoch": 0.4695882393924615, "grad_norm": 1.533910870552063, "learning_rate": 6.111291661325556e-06, "loss": 0.11561393737792969, "step": 3370 }, { "epoch": 0.4697275830836759, "grad_norm": 0.9808705449104309, "learning_rate": 6.108993702589114e-06, "loss": 0.09202766418457031, "step": 3371 }, { "epoch": 0.4698669267748903, "grad_norm": 1.1129584312438965, "learning_rate": 6.106695497459591e-06, "loss": 0.0884866714477539, "step": 3372 }, { "epoch": 0.47000627046610466, "grad_norm": 1.0066311359405518, "learning_rate": 6.104397046447593e-06, "loss": 0.09113502502441406, "step": 3373 }, { "epoch": 0.47014561415731904, "grad_norm": 0.6425580978393555, "learning_rate": 6.102098350063786e-06, "loss": 0.0764760971069336, "step": 3374 }, { "epoch": 0.4702849578485334, "grad_norm": 0.731098473072052, "learning_rate": 6.099799408818889e-06, "loss": 0.09151458740234375, "step": 3375 }, { "epoch": 0.4704243015397478, "grad_norm": 0.739998459815979, "learning_rate": 6.097500223223669e-06, "loss": 0.10045337677001953, "step": 3376 }, { "epoch": 0.47056364523096217, "grad_norm": 0.8691949248313904, "learning_rate": 6.095200793788958e-06, "loss": 0.08979225158691406, "step": 3377 }, { "epoch": 0.47070298892217655, "grad_norm": 1.1473973989486694, "learning_rate": 6.092901121025634e-06, "loss": 0.0884866714477539, "step": 3378 }, { "epoch": 0.4708423326133909, "grad_norm": 1.2381806373596191, "learning_rate": 6.090601205444632e-06, "loss": 0.1067509651184082, "step": 3379 }, { "epoch": 0.4709816763046053, "grad_norm": 0.9903778433799744, "learning_rate": 6.088301047556942e-06, "loss": 0.1360797882080078, "step": 3380 }, { "epoch": 0.4711210199958197, "grad_norm": 0.7872458696365356, "learning_rate": 6.086000647873604e-06, "loss": 0.09325027465820312, "step": 3381 }, { "epoch": 0.47126036368703406, "grad_norm": 0.44178634881973267, "learning_rate": 6.083700006905715e-06, "loss": 0.07732009887695312, "step": 3382 }, { "epoch": 0.47139970737824843, "grad_norm": 0.5003454685211182, "learning_rate": 6.081399125164429e-06, "loss": 0.06753349304199219, "step": 3383 }, { "epoch": 0.4715390510694628, "grad_norm": 0.6707443594932556, "learning_rate": 6.079098003160943e-06, "loss": 0.09385490417480469, "step": 3384 }, { "epoch": 0.4716783947606772, "grad_norm": 0.5263277292251587, "learning_rate": 6.076796641406518e-06, "loss": 0.07420921325683594, "step": 3385 }, { "epoch": 0.47181773845189157, "grad_norm": 2.4121243953704834, "learning_rate": 6.074495040412465e-06, "loss": 0.1398468017578125, "step": 3386 }, { "epoch": 0.47195708214310594, "grad_norm": 0.6443471312522888, "learning_rate": 6.072193200690142e-06, "loss": 0.08577156066894531, "step": 3387 }, { "epoch": 0.4720964258343204, "grad_norm": 0.5440993309020996, "learning_rate": 6.069891122750971e-06, "loss": 0.06981277465820312, "step": 3388 }, { "epoch": 0.47223576952553475, "grad_norm": 1.4863964319229126, "learning_rate": 6.067588807106416e-06, "loss": 0.10833549499511719, "step": 3389 }, { "epoch": 0.47237511321674913, "grad_norm": 1.400130271911621, "learning_rate": 6.0652862542680034e-06, "loss": 0.11296653747558594, "step": 3390 }, { "epoch": 0.4725144569079635, "grad_norm": 1.9227118492126465, "learning_rate": 6.062983464747305e-06, "loss": 0.12976837158203125, "step": 3391 }, { "epoch": 0.4726538005991779, "grad_norm": 0.562820315361023, "learning_rate": 6.06068043905595e-06, "loss": 0.06840801239013672, "step": 3392 }, { "epoch": 0.47279314429039226, "grad_norm": 0.9121944308280945, "learning_rate": 6.0583771777056166e-06, "loss": 0.1107330322265625, "step": 3393 }, { "epoch": 0.47293248798160664, "grad_norm": 0.8843916058540344, "learning_rate": 6.056073681208038e-06, "loss": 0.10100364685058594, "step": 3394 }, { "epoch": 0.473071831672821, "grad_norm": 0.7558985948562622, "learning_rate": 6.053769950074997e-06, "loss": 0.09270286560058594, "step": 3395 }, { "epoch": 0.4732111753640354, "grad_norm": 0.5620368123054504, "learning_rate": 6.051465984818332e-06, "loss": 0.08552932739257812, "step": 3396 }, { "epoch": 0.4733505190552498, "grad_norm": 0.730693519115448, "learning_rate": 6.049161785949931e-06, "loss": 0.08527183532714844, "step": 3397 }, { "epoch": 0.47348986274646415, "grad_norm": 1.0548419952392578, "learning_rate": 6.046857353981732e-06, "loss": 0.09273004531860352, "step": 3398 }, { "epoch": 0.47362920643767853, "grad_norm": 0.6407318711280823, "learning_rate": 6.044552689425731e-06, "loss": 0.10655975341796875, "step": 3399 }, { "epoch": 0.4737685501288929, "grad_norm": 1.185056209564209, "learning_rate": 6.042247792793968e-06, "loss": 0.15840911865234375, "step": 3400 }, { "epoch": 0.4739078938201073, "grad_norm": 0.5714781880378723, "learning_rate": 6.0399426645985424e-06, "loss": 0.07733917236328125, "step": 3401 }, { "epoch": 0.47404723751132166, "grad_norm": 1.2286683320999146, "learning_rate": 6.037637305351599e-06, "loss": 0.11465263366699219, "step": 3402 }, { "epoch": 0.47418658120253604, "grad_norm": 2.466287612915039, "learning_rate": 6.035331715565333e-06, "loss": 0.1666431427001953, "step": 3403 }, { "epoch": 0.4743259248937504, "grad_norm": 0.7983382940292358, "learning_rate": 6.033025895752002e-06, "loss": 0.10039424896240234, "step": 3404 }, { "epoch": 0.4744652685849648, "grad_norm": 0.4652060270309448, "learning_rate": 6.030719846423897e-06, "loss": 0.07309913635253906, "step": 3405 }, { "epoch": 0.47460461227617917, "grad_norm": 0.9088165760040283, "learning_rate": 6.028413568093375e-06, "loss": 0.08818435668945312, "step": 3406 }, { "epoch": 0.47474395596739355, "grad_norm": 3.144831895828247, "learning_rate": 6.026107061272838e-06, "loss": 0.13792800903320312, "step": 3407 }, { "epoch": 0.474883299658608, "grad_norm": 0.8331008553504944, "learning_rate": 6.023800326474738e-06, "loss": 0.08378791809082031, "step": 3408 }, { "epoch": 0.47502264334982236, "grad_norm": 0.5085458159446716, "learning_rate": 6.0214933642115794e-06, "loss": 0.07199954986572266, "step": 3409 }, { "epoch": 0.47516198704103674, "grad_norm": 0.45154377818107605, "learning_rate": 6.019186174995916e-06, "loss": 0.070831298828125, "step": 3410 }, { "epoch": 0.4753013307322511, "grad_norm": 1.321340799331665, "learning_rate": 6.016878759340352e-06, "loss": 0.1078033447265625, "step": 3411 }, { "epoch": 0.4754406744234655, "grad_norm": 0.7050241827964783, "learning_rate": 6.014571117757545e-06, "loss": 0.07603263854980469, "step": 3412 }, { "epoch": 0.47558001811467987, "grad_norm": 1.7637512683868408, "learning_rate": 6.012263250760199e-06, "loss": 0.11039924621582031, "step": 3413 }, { "epoch": 0.47571936180589425, "grad_norm": 1.5738857984542847, "learning_rate": 6.009955158861066e-06, "loss": 0.13329315185546875, "step": 3414 }, { "epoch": 0.4758587054971086, "grad_norm": 1.188688039779663, "learning_rate": 6.007646842572959e-06, "loss": 0.08748626708984375, "step": 3415 }, { "epoch": 0.475998049188323, "grad_norm": 0.7608343958854675, "learning_rate": 6.005338302408724e-06, "loss": 0.09145736694335938, "step": 3416 }, { "epoch": 0.4761373928795374, "grad_norm": 0.6512467265129089, "learning_rate": 6.0030295388812736e-06, "loss": 0.09931182861328125, "step": 3417 }, { "epoch": 0.47627673657075176, "grad_norm": 0.8127191066741943, "learning_rate": 6.000720552503557e-06, "loss": 0.07550239562988281, "step": 3418 }, { "epoch": 0.47641608026196614, "grad_norm": 0.9212750792503357, "learning_rate": 5.998411343788582e-06, "loss": 0.10733413696289062, "step": 3419 }, { "epoch": 0.4765554239531805, "grad_norm": 0.4857236444950104, "learning_rate": 5.996101913249402e-06, "loss": 0.06436920166015625, "step": 3420 }, { "epoch": 0.4766947676443949, "grad_norm": 0.30280813574790955, "learning_rate": 5.993792261399115e-06, "loss": 0.059212684631347656, "step": 3421 }, { "epoch": 0.47683411133560927, "grad_norm": 0.556139349937439, "learning_rate": 5.991482388750878e-06, "loss": 0.08479881286621094, "step": 3422 }, { "epoch": 0.47697345502682365, "grad_norm": 0.9780042171478271, "learning_rate": 5.989172295817889e-06, "loss": 0.1038818359375, "step": 3423 }, { "epoch": 0.477112798718038, "grad_norm": 1.7819905281066895, "learning_rate": 5.9868619831134e-06, "loss": 0.11764144897460938, "step": 3424 }, { "epoch": 0.4772521424092524, "grad_norm": 1.0756043195724487, "learning_rate": 5.984551451150709e-06, "loss": 0.10085678100585938, "step": 3425 }, { "epoch": 0.4773914861004668, "grad_norm": 0.8524236679077148, "learning_rate": 5.9822407004431625e-06, "loss": 0.08650779724121094, "step": 3426 }, { "epoch": 0.47753082979168116, "grad_norm": 0.7342306971549988, "learning_rate": 5.979929731504158e-06, "loss": 0.11530494689941406, "step": 3427 }, { "epoch": 0.4776701734828956, "grad_norm": 1.559676170349121, "learning_rate": 5.977618544847139e-06, "loss": 0.19122886657714844, "step": 3428 }, { "epoch": 0.47780951717410997, "grad_norm": 0.7737494707107544, "learning_rate": 5.975307140985599e-06, "loss": 0.08263587951660156, "step": 3429 }, { "epoch": 0.47794886086532434, "grad_norm": 1.5195978879928589, "learning_rate": 5.972995520433078e-06, "loss": 0.1017293930053711, "step": 3430 }, { "epoch": 0.4780882045565387, "grad_norm": 2.581963300704956, "learning_rate": 5.970683683703168e-06, "loss": 0.10396003723144531, "step": 3431 }, { "epoch": 0.4782275482477531, "grad_norm": 0.29620596766471863, "learning_rate": 5.968371631309502e-06, "loss": 0.0628957748413086, "step": 3432 }, { "epoch": 0.4783668919389675, "grad_norm": 1.019711971282959, "learning_rate": 5.966059363765771e-06, "loss": 0.13601398468017578, "step": 3433 }, { "epoch": 0.47850623563018185, "grad_norm": 1.4383690357208252, "learning_rate": 5.9637468815857016e-06, "loss": 0.08932113647460938, "step": 3434 }, { "epoch": 0.47864557932139623, "grad_norm": 0.7617345452308655, "learning_rate": 5.961434185283079e-06, "loss": 0.06612777709960938, "step": 3435 }, { "epoch": 0.4787849230126106, "grad_norm": 1.253445029258728, "learning_rate": 5.959121275371732e-06, "loss": 0.09314918518066406, "step": 3436 }, { "epoch": 0.478924266703825, "grad_norm": 1.7689846754074097, "learning_rate": 5.956808152365532e-06, "loss": 0.12749481201171875, "step": 3437 }, { "epoch": 0.47906361039503936, "grad_norm": 0.6479687690734863, "learning_rate": 5.954494816778408e-06, "loss": 0.10323143005371094, "step": 3438 }, { "epoch": 0.47920295408625374, "grad_norm": 1.3610284328460693, "learning_rate": 5.952181269124324e-06, "loss": 0.10865974426269531, "step": 3439 }, { "epoch": 0.4793422977774681, "grad_norm": 1.2932384014129639, "learning_rate": 5.949867509917303e-06, "loss": 0.1255817413330078, "step": 3440 }, { "epoch": 0.4794816414686825, "grad_norm": 1.2708179950714111, "learning_rate": 5.9475535396714055e-06, "loss": 0.0978851318359375, "step": 3441 }, { "epoch": 0.4796209851598969, "grad_norm": 0.7757161855697632, "learning_rate": 5.945239358900746e-06, "loss": 0.08404731750488281, "step": 3442 }, { "epoch": 0.47976032885111125, "grad_norm": 0.6328369379043579, "learning_rate": 5.94292496811948e-06, "loss": 0.10523080825805664, "step": 3443 }, { "epoch": 0.47989967254232563, "grad_norm": 0.8705531358718872, "learning_rate": 5.940610367841815e-06, "loss": 0.08406734466552734, "step": 3444 }, { "epoch": 0.48003901623354, "grad_norm": 0.8122291564941406, "learning_rate": 5.938295558581999e-06, "loss": 0.10177230834960938, "step": 3445 }, { "epoch": 0.4801783599247544, "grad_norm": 2.455928325653076, "learning_rate": 5.935980540854332e-06, "loss": 0.1407184600830078, "step": 3446 }, { "epoch": 0.48031770361596876, "grad_norm": 0.8440749645233154, "learning_rate": 5.933665315173158e-06, "loss": 0.07628059387207031, "step": 3447 }, { "epoch": 0.4804570473071832, "grad_norm": 0.7123702168464661, "learning_rate": 5.931349882052866e-06, "loss": 0.07803153991699219, "step": 3448 }, { "epoch": 0.48059639099839757, "grad_norm": 1.647125482559204, "learning_rate": 5.929034242007895e-06, "loss": 0.0961456298828125, "step": 3449 }, { "epoch": 0.48073573468961195, "grad_norm": 1.1315312385559082, "learning_rate": 5.926718395552723e-06, "loss": 0.11034393310546875, "step": 3450 }, { "epoch": 0.4808750783808263, "grad_norm": 1.070287823677063, "learning_rate": 5.924402343201883e-06, "loss": 0.08835792541503906, "step": 3451 }, { "epoch": 0.4810144220720407, "grad_norm": 0.7921733260154724, "learning_rate": 5.922086085469947e-06, "loss": 0.08263969421386719, "step": 3452 }, { "epoch": 0.4811537657632551, "grad_norm": 0.6174148321151733, "learning_rate": 5.919769622871533e-06, "loss": 0.08214187622070312, "step": 3453 }, { "epoch": 0.48129310945446946, "grad_norm": 0.9073300361633301, "learning_rate": 5.917452955921309e-06, "loss": 0.09493255615234375, "step": 3454 }, { "epoch": 0.48143245314568384, "grad_norm": 1.1569548845291138, "learning_rate": 5.915136085133983e-06, "loss": 0.1350994110107422, "step": 3455 }, { "epoch": 0.4815717968368982, "grad_norm": 1.1402983665466309, "learning_rate": 5.9128190110243115e-06, "loss": 0.1104583740234375, "step": 3456 }, { "epoch": 0.4817111405281126, "grad_norm": 0.6382923722267151, "learning_rate": 5.910501734107097e-06, "loss": 0.07137298583984375, "step": 3457 }, { "epoch": 0.48185048421932697, "grad_norm": 0.9000476002693176, "learning_rate": 5.908184254897183e-06, "loss": 0.0786886215209961, "step": 3458 }, { "epoch": 0.48198982791054135, "grad_norm": 1.4491883516311646, "learning_rate": 5.905866573909462e-06, "loss": 0.11448192596435547, "step": 3459 }, { "epoch": 0.4821291716017557, "grad_norm": 1.092578649520874, "learning_rate": 5.9035486916588705e-06, "loss": 0.09421730041503906, "step": 3460 }, { "epoch": 0.4822685152929701, "grad_norm": 2.076416254043579, "learning_rate": 5.901230608660386e-06, "loss": 0.11243438720703125, "step": 3461 }, { "epoch": 0.4824078589841845, "grad_norm": 1.452184796333313, "learning_rate": 5.898912325429038e-06, "loss": 0.08867740631103516, "step": 3462 }, { "epoch": 0.48254720267539886, "grad_norm": 0.8772801756858826, "learning_rate": 5.896593842479893e-06, "loss": 0.09163570404052734, "step": 3463 }, { "epoch": 0.48268654636661323, "grad_norm": 0.8845012784004211, "learning_rate": 5.8942751603280645e-06, "loss": 0.10274696350097656, "step": 3464 }, { "epoch": 0.4828258900578276, "grad_norm": 0.5951111316680908, "learning_rate": 5.891956279488715e-06, "loss": 0.08360862731933594, "step": 3465 }, { "epoch": 0.482965233749042, "grad_norm": 0.6921608448028564, "learning_rate": 5.889637200477041e-06, "loss": 0.0927734375, "step": 3466 }, { "epoch": 0.48310457744025637, "grad_norm": 0.6971401572227478, "learning_rate": 5.887317923808294e-06, "loss": 0.11531639099121094, "step": 3467 }, { "epoch": 0.4832439211314708, "grad_norm": 0.8570870757102966, "learning_rate": 5.88499844999776e-06, "loss": 0.08969497680664062, "step": 3468 }, { "epoch": 0.4833832648226852, "grad_norm": 0.8553553223609924, "learning_rate": 5.882678779560776e-06, "loss": 0.10855865478515625, "step": 3469 }, { "epoch": 0.48352260851389955, "grad_norm": 0.5712223649024963, "learning_rate": 5.880358913012722e-06, "loss": 0.09207344055175781, "step": 3470 }, { "epoch": 0.48366195220511393, "grad_norm": 0.9901331663131714, "learning_rate": 5.878038850869012e-06, "loss": 0.08306884765625, "step": 3471 }, { "epoch": 0.4838012958963283, "grad_norm": 0.7426438927650452, "learning_rate": 5.875718593645118e-06, "loss": 0.07445907592773438, "step": 3472 }, { "epoch": 0.4839406395875427, "grad_norm": 0.7728192806243896, "learning_rate": 5.873398141856545e-06, "loss": 0.08490371704101562, "step": 3473 }, { "epoch": 0.48407998327875706, "grad_norm": 1.009385585784912, "learning_rate": 5.871077496018844e-06, "loss": 0.08725643157958984, "step": 3474 }, { "epoch": 0.48421932696997144, "grad_norm": 0.8280102014541626, "learning_rate": 5.868756656647611e-06, "loss": 0.09005928039550781, "step": 3475 }, { "epoch": 0.4843586706611858, "grad_norm": 0.7482296228408813, "learning_rate": 5.866435624258483e-06, "loss": 0.10413360595703125, "step": 3476 }, { "epoch": 0.4844980143524002, "grad_norm": 0.894905149936676, "learning_rate": 5.86411439936714e-06, "loss": 0.08003425598144531, "step": 3477 }, { "epoch": 0.4846373580436146, "grad_norm": 0.4732353091239929, "learning_rate": 5.861792982489306e-06, "loss": 0.07808780670166016, "step": 3478 }, { "epoch": 0.48477670173482895, "grad_norm": 1.0390983819961548, "learning_rate": 5.8594713741407465e-06, "loss": 0.11332511901855469, "step": 3479 }, { "epoch": 0.48491604542604333, "grad_norm": 1.0331178903579712, "learning_rate": 5.857149574837269e-06, "loss": 0.1122579574584961, "step": 3480 }, { "epoch": 0.4850553891172577, "grad_norm": 0.7219793796539307, "learning_rate": 5.854827585094725e-06, "loss": 0.10980224609375, "step": 3481 }, { "epoch": 0.4851947328084721, "grad_norm": 0.7681044340133667, "learning_rate": 5.852505405429007e-06, "loss": 0.08630180358886719, "step": 3482 }, { "epoch": 0.48533407649968646, "grad_norm": 0.9988455176353455, "learning_rate": 5.850183036356054e-06, "loss": 0.09206342697143555, "step": 3483 }, { "epoch": 0.48547342019090084, "grad_norm": 1.8504445552825928, "learning_rate": 5.847860478391838e-06, "loss": 0.1150674819946289, "step": 3484 }, { "epoch": 0.4856127638821152, "grad_norm": 0.434927761554718, "learning_rate": 5.845537732052381e-06, "loss": 0.0676727294921875, "step": 3485 }, { "epoch": 0.4857521075733296, "grad_norm": 2.5767641067504883, "learning_rate": 5.8432147978537444e-06, "loss": 0.20302581787109375, "step": 3486 }, { "epoch": 0.48589145126454397, "grad_norm": 0.5539565682411194, "learning_rate": 5.840891676312029e-06, "loss": 0.08562850952148438, "step": 3487 }, { "epoch": 0.4860307949557584, "grad_norm": 0.7180027365684509, "learning_rate": 5.838568367943383e-06, "loss": 0.100067138671875, "step": 3488 }, { "epoch": 0.4861701386469728, "grad_norm": 0.5224490165710449, "learning_rate": 5.836244873263989e-06, "loss": 0.07446479797363281, "step": 3489 }, { "epoch": 0.48630948233818716, "grad_norm": 0.7933628559112549, "learning_rate": 5.8339211927900776e-06, "loss": 0.07412242889404297, "step": 3490 }, { "epoch": 0.48644882602940154, "grad_norm": 0.94810950756073, "learning_rate": 5.831597327037914e-06, "loss": 0.10167407989501953, "step": 3491 }, { "epoch": 0.4865881697206159, "grad_norm": 1.4420721530914307, "learning_rate": 5.829273276523811e-06, "loss": 0.13825035095214844, "step": 3492 }, { "epoch": 0.4867275134118303, "grad_norm": 0.6966227293014526, "learning_rate": 5.82694904176412e-06, "loss": 0.07587432861328125, "step": 3493 }, { "epoch": 0.48686685710304467, "grad_norm": 0.6669252514839172, "learning_rate": 5.82462462327523e-06, "loss": 0.08675575256347656, "step": 3494 }, { "epoch": 0.48700620079425905, "grad_norm": 0.8390084505081177, "learning_rate": 5.822300021573574e-06, "loss": 0.11693954467773438, "step": 3495 }, { "epoch": 0.4871455444854734, "grad_norm": 1.0300793647766113, "learning_rate": 5.819975237175629e-06, "loss": 0.10140514373779297, "step": 3496 }, { "epoch": 0.4872848881766878, "grad_norm": 0.7767142653465271, "learning_rate": 5.817650270597906e-06, "loss": 0.09180641174316406, "step": 3497 }, { "epoch": 0.4874242318679022, "grad_norm": 1.5891587734222412, "learning_rate": 5.815325122356959e-06, "loss": 0.10684013366699219, "step": 3498 }, { "epoch": 0.48756357555911656, "grad_norm": 3.778167963027954, "learning_rate": 5.8129997929693845e-06, "loss": 0.1488323211669922, "step": 3499 }, { "epoch": 0.48770291925033094, "grad_norm": 0.5861527323722839, "learning_rate": 5.810674282951817e-06, "loss": 0.07528877258300781, "step": 3500 }, { "epoch": 0.4878422629415453, "grad_norm": 0.6812779307365417, "learning_rate": 5.808348592820932e-06, "loss": 0.09694480895996094, "step": 3501 }, { "epoch": 0.4879816066327597, "grad_norm": 1.231650948524475, "learning_rate": 5.806022723093445e-06, "loss": 0.10815238952636719, "step": 3502 }, { "epoch": 0.48812095032397407, "grad_norm": 1.3690305948257446, "learning_rate": 5.80369667428611e-06, "loss": 0.08878707885742188, "step": 3503 }, { "epoch": 0.48826029401518845, "grad_norm": 0.9310296773910522, "learning_rate": 5.801370446915724e-06, "loss": 0.12178230285644531, "step": 3504 }, { "epoch": 0.4883996377064028, "grad_norm": 1.0878078937530518, "learning_rate": 5.799044041499119e-06, "loss": 0.07891464233398438, "step": 3505 }, { "epoch": 0.4885389813976172, "grad_norm": 0.935511589050293, "learning_rate": 5.7967174585531705e-06, "loss": 0.09558486938476562, "step": 3506 }, { "epoch": 0.4886783250888316, "grad_norm": 0.5222743153572083, "learning_rate": 5.794390698594793e-06, "loss": 0.09366035461425781, "step": 3507 }, { "epoch": 0.488817668780046, "grad_norm": 0.8704900741577148, "learning_rate": 5.792063762140938e-06, "loss": 0.08343219757080078, "step": 3508 }, { "epoch": 0.4889570124712604, "grad_norm": 1.263551115989685, "learning_rate": 5.789736649708598e-06, "loss": 0.11536216735839844, "step": 3509 }, { "epoch": 0.48909635616247477, "grad_norm": 1.3187464475631714, "learning_rate": 5.787409361814805e-06, "loss": 0.1107330322265625, "step": 3510 }, { "epoch": 0.48923569985368914, "grad_norm": 0.5971213579177856, "learning_rate": 5.785081898976627e-06, "loss": 0.0821685791015625, "step": 3511 }, { "epoch": 0.4893750435449035, "grad_norm": 0.990889847278595, "learning_rate": 5.782754261711177e-06, "loss": 0.09687232971191406, "step": 3512 }, { "epoch": 0.4895143872361179, "grad_norm": 1.3302980661392212, "learning_rate": 5.7804264505356e-06, "loss": 0.1052694320678711, "step": 3513 }, { "epoch": 0.4896537309273323, "grad_norm": 0.8607335090637207, "learning_rate": 5.778098465967082e-06, "loss": 0.10973930358886719, "step": 3514 }, { "epoch": 0.48979307461854665, "grad_norm": 1.0419020652770996, "learning_rate": 5.7757703085228515e-06, "loss": 0.09630012512207031, "step": 3515 }, { "epoch": 0.48993241830976103, "grad_norm": 0.6730360984802246, "learning_rate": 5.773441978720167e-06, "loss": 0.08843326568603516, "step": 3516 }, { "epoch": 0.4900717620009754, "grad_norm": 0.5976685881614685, "learning_rate": 5.771113477076335e-06, "loss": 0.09555339813232422, "step": 3517 }, { "epoch": 0.4902111056921898, "grad_norm": 1.69373619556427, "learning_rate": 5.7687848041086905e-06, "loss": 0.15521621704101562, "step": 3518 }, { "epoch": 0.49035044938340416, "grad_norm": 0.8669997453689575, "learning_rate": 5.766455960334616e-06, "loss": 0.08600616455078125, "step": 3519 }, { "epoch": 0.49048979307461854, "grad_norm": 0.6040157079696655, "learning_rate": 5.764126946271526e-06, "loss": 0.06940889358520508, "step": 3520 }, { "epoch": 0.4906291367658329, "grad_norm": 0.41245996952056885, "learning_rate": 5.761797762436872e-06, "loss": 0.06449413299560547, "step": 3521 }, { "epoch": 0.4907684804570473, "grad_norm": 0.48041847348213196, "learning_rate": 5.759468409348149e-06, "loss": 0.06773948669433594, "step": 3522 }, { "epoch": 0.4909078241482617, "grad_norm": 0.9638617038726807, "learning_rate": 5.757138887522884e-06, "loss": 0.07654953002929688, "step": 3523 }, { "epoch": 0.49104716783947605, "grad_norm": 1.302079439163208, "learning_rate": 5.754809197478644e-06, "loss": 0.09423637390136719, "step": 3524 }, { "epoch": 0.49118651153069043, "grad_norm": 1.0558518171310425, "learning_rate": 5.752479339733033e-06, "loss": 0.13423728942871094, "step": 3525 }, { "epoch": 0.4913258552219048, "grad_norm": 1.4675168991088867, "learning_rate": 5.750149314803691e-06, "loss": 0.1390666961669922, "step": 3526 }, { "epoch": 0.4914651989131192, "grad_norm": 1.1681509017944336, "learning_rate": 5.747819123208299e-06, "loss": 0.11621284484863281, "step": 3527 }, { "epoch": 0.4916045426043336, "grad_norm": 0.8670096397399902, "learning_rate": 5.7454887654645706e-06, "loss": 0.11299896240234375, "step": 3528 }, { "epoch": 0.491743886295548, "grad_norm": 0.8285804986953735, "learning_rate": 5.7431582420902576e-06, "loss": 0.11477088928222656, "step": 3529 }, { "epoch": 0.49188322998676237, "grad_norm": 0.6435460448265076, "learning_rate": 5.740827553603149e-06, "loss": 0.10546875, "step": 3530 }, { "epoch": 0.49202257367797675, "grad_norm": 0.6830347180366516, "learning_rate": 5.738496700521073e-06, "loss": 0.08634567260742188, "step": 3531 }, { "epoch": 0.4921619173691911, "grad_norm": 0.8520460724830627, "learning_rate": 5.736165683361889e-06, "loss": 0.09766197204589844, "step": 3532 }, { "epoch": 0.4923012610604055, "grad_norm": 0.6951937079429626, "learning_rate": 5.7338345026434995e-06, "loss": 0.08975982666015625, "step": 3533 }, { "epoch": 0.4924406047516199, "grad_norm": 0.8652635812759399, "learning_rate": 5.731503158883835e-06, "loss": 0.10161209106445312, "step": 3534 }, { "epoch": 0.49257994844283426, "grad_norm": 1.0100737810134888, "learning_rate": 5.729171652600869e-06, "loss": 0.12387466430664062, "step": 3535 }, { "epoch": 0.49271929213404864, "grad_norm": 0.5286129117012024, "learning_rate": 5.726839984312611e-06, "loss": 0.08962631225585938, "step": 3536 }, { "epoch": 0.492858635825263, "grad_norm": 1.6512959003448486, "learning_rate": 5.724508154537101e-06, "loss": 0.07828140258789062, "step": 3537 }, { "epoch": 0.4929979795164774, "grad_norm": 1.807449221611023, "learning_rate": 5.72217616379242e-06, "loss": 0.10304450988769531, "step": 3538 }, { "epoch": 0.49313732320769177, "grad_norm": 0.4718913733959198, "learning_rate": 5.719844012596683e-06, "loss": 0.05541038513183594, "step": 3539 }, { "epoch": 0.49327666689890615, "grad_norm": 0.8409090042114258, "learning_rate": 5.7175117014680415e-06, "loss": 0.09660720825195312, "step": 3540 }, { "epoch": 0.4934160105901205, "grad_norm": 1.6794360876083374, "learning_rate": 5.71517923092468e-06, "loss": 0.1406402587890625, "step": 3541 }, { "epoch": 0.4935553542813349, "grad_norm": 1.524227499961853, "learning_rate": 5.712846601484822e-06, "loss": 0.1429290771484375, "step": 3542 }, { "epoch": 0.4936946979725493, "grad_norm": 1.114256501197815, "learning_rate": 5.710513813666722e-06, "loss": 0.10375213623046875, "step": 3543 }, { "epoch": 0.49383404166376366, "grad_norm": 1.0009926557540894, "learning_rate": 5.708180867988676e-06, "loss": 0.09181594848632812, "step": 3544 }, { "epoch": 0.49397338535497803, "grad_norm": 0.588932454586029, "learning_rate": 5.705847764969008e-06, "loss": 0.08665847778320312, "step": 3545 }, { "epoch": 0.4941127290461924, "grad_norm": 2.697685718536377, "learning_rate": 5.703514505126081e-06, "loss": 0.12556838989257812, "step": 3546 }, { "epoch": 0.4942520727374068, "grad_norm": 0.8114398717880249, "learning_rate": 5.701181088978295e-06, "loss": 0.10207366943359375, "step": 3547 }, { "epoch": 0.49439141642862117, "grad_norm": 1.6126325130462646, "learning_rate": 5.698847517044076e-06, "loss": 0.11813545227050781, "step": 3548 }, { "epoch": 0.4945307601198356, "grad_norm": 1.1091934442520142, "learning_rate": 5.696513789841897e-06, "loss": 0.11680412292480469, "step": 3549 }, { "epoch": 0.49467010381105, "grad_norm": 0.7887816429138184, "learning_rate": 5.6941799078902525e-06, "loss": 0.08611869812011719, "step": 3550 }, { "epoch": 0.49480944750226435, "grad_norm": 1.1157288551330566, "learning_rate": 5.691845871707682e-06, "loss": 0.107269287109375, "step": 3551 }, { "epoch": 0.49494879119347873, "grad_norm": 0.5014748573303223, "learning_rate": 5.689511681812755e-06, "loss": 0.07729148864746094, "step": 3552 }, { "epoch": 0.4950881348846931, "grad_norm": 0.6651365756988525, "learning_rate": 5.687177338724073e-06, "loss": 0.07813262939453125, "step": 3553 }, { "epoch": 0.4952274785759075, "grad_norm": 3.7655386924743652, "learning_rate": 5.684842842960276e-06, "loss": 0.17855262756347656, "step": 3554 }, { "epoch": 0.49536682226712186, "grad_norm": 1.9519790410995483, "learning_rate": 5.682508195040032e-06, "loss": 0.15268325805664062, "step": 3555 }, { "epoch": 0.49550616595833624, "grad_norm": 1.1253551244735718, "learning_rate": 5.68017339548205e-06, "loss": 0.13490676879882812, "step": 3556 }, { "epoch": 0.4956455096495506, "grad_norm": 1.0636683702468872, "learning_rate": 5.6778384448050694e-06, "loss": 0.10366630554199219, "step": 3557 }, { "epoch": 0.495784853340765, "grad_norm": 0.7243828773498535, "learning_rate": 5.675503343527861e-06, "loss": 0.11105632781982422, "step": 3558 }, { "epoch": 0.4959241970319794, "grad_norm": 0.7985156774520874, "learning_rate": 5.673168092169231e-06, "loss": 0.08109664916992188, "step": 3559 }, { "epoch": 0.49606354072319375, "grad_norm": 0.7016703486442566, "learning_rate": 5.670832691248021e-06, "loss": 0.07688570022583008, "step": 3560 }, { "epoch": 0.49620288441440813, "grad_norm": 0.8566029667854309, "learning_rate": 5.668497141283101e-06, "loss": 0.06675910949707031, "step": 3561 }, { "epoch": 0.4963422281056225, "grad_norm": 1.1154032945632935, "learning_rate": 5.66616144279338e-06, "loss": 0.10143661499023438, "step": 3562 }, { "epoch": 0.4964815717968369, "grad_norm": 1.1868549585342407, "learning_rate": 5.663825596297794e-06, "loss": 0.12277793884277344, "step": 3563 }, { "epoch": 0.49662091548805126, "grad_norm": 0.9242585897445679, "learning_rate": 5.661489602315314e-06, "loss": 0.11007118225097656, "step": 3564 }, { "epoch": 0.49676025917926564, "grad_norm": 0.5621718764305115, "learning_rate": 5.6591534613649505e-06, "loss": 0.07435798645019531, "step": 3565 }, { "epoch": 0.49689960287048, "grad_norm": 1.2413487434387207, "learning_rate": 5.656817173965733e-06, "loss": 0.08253669738769531, "step": 3566 }, { "epoch": 0.4970389465616944, "grad_norm": 0.6430429220199585, "learning_rate": 5.6544807406367365e-06, "loss": 0.06692314147949219, "step": 3567 }, { "epoch": 0.49717829025290877, "grad_norm": 0.9838617444038391, "learning_rate": 5.6521441618970605e-06, "loss": 0.08940696716308594, "step": 3568 }, { "epoch": 0.4973176339441232, "grad_norm": 0.757108747959137, "learning_rate": 5.649807438265842e-06, "loss": 0.10260963439941406, "step": 3569 }, { "epoch": 0.4974569776353376, "grad_norm": 1.0022064447402954, "learning_rate": 5.647470570262246e-06, "loss": 0.10067558288574219, "step": 3570 }, { "epoch": 0.49759632132655196, "grad_norm": 1.056845784187317, "learning_rate": 5.64513355840547e-06, "loss": 0.08495903015136719, "step": 3571 }, { "epoch": 0.49773566501776634, "grad_norm": 0.5705150365829468, "learning_rate": 5.642796403214747e-06, "loss": 0.0721898078918457, "step": 3572 }, { "epoch": 0.4978750087089807, "grad_norm": 1.4278697967529297, "learning_rate": 5.640459105209337e-06, "loss": 0.07318305969238281, "step": 3573 }, { "epoch": 0.4980143524001951, "grad_norm": 2.4776194095611572, "learning_rate": 5.638121664908537e-06, "loss": 0.1169281005859375, "step": 3574 }, { "epoch": 0.49815369609140947, "grad_norm": 0.7468923330307007, "learning_rate": 5.635784082831671e-06, "loss": 0.08640861511230469, "step": 3575 }, { "epoch": 0.49829303978262385, "grad_norm": 1.1453194618225098, "learning_rate": 5.633446359498098e-06, "loss": 0.09030914306640625, "step": 3576 }, { "epoch": 0.4984323834738382, "grad_norm": 1.6351470947265625, "learning_rate": 5.6311084954272055e-06, "loss": 0.12289047241210938, "step": 3577 }, { "epoch": 0.4985717271650526, "grad_norm": 0.6077507138252258, "learning_rate": 5.628770491138414e-06, "loss": 0.09051322937011719, "step": 3578 }, { "epoch": 0.498711070856267, "grad_norm": 0.7947307825088501, "learning_rate": 5.626432347151173e-06, "loss": 0.09427452087402344, "step": 3579 }, { "epoch": 0.49885041454748136, "grad_norm": 0.6763942837715149, "learning_rate": 5.624094063984967e-06, "loss": 0.09727764129638672, "step": 3580 }, { "epoch": 0.49898975823869574, "grad_norm": 0.8089797496795654, "learning_rate": 5.621755642159309e-06, "loss": 0.08648300170898438, "step": 3581 }, { "epoch": 0.4991291019299101, "grad_norm": 0.9932186007499695, "learning_rate": 5.61941708219374e-06, "loss": 0.09829235076904297, "step": 3582 }, { "epoch": 0.4992684456211245, "grad_norm": 0.8415701389312744, "learning_rate": 5.617078384607839e-06, "loss": 0.08713340759277344, "step": 3583 }, { "epoch": 0.49940778931233887, "grad_norm": 1.0273442268371582, "learning_rate": 5.614739549921208e-06, "loss": 0.12205314636230469, "step": 3584 }, { "epoch": 0.49954713300355325, "grad_norm": 0.7142120599746704, "learning_rate": 5.612400578653484e-06, "loss": 0.091949462890625, "step": 3585 }, { "epoch": 0.4996864766947676, "grad_norm": 1.8864425420761108, "learning_rate": 5.610061471324335e-06, "loss": 0.10850906372070312, "step": 3586 }, { "epoch": 0.499825820385982, "grad_norm": 1.2707806825637817, "learning_rate": 5.607722228453452e-06, "loss": 0.09391212463378906, "step": 3587 }, { "epoch": 0.4999651640771964, "grad_norm": 0.5782228112220764, "learning_rate": 5.605382850560565e-06, "loss": 0.08285713195800781, "step": 3588 }, { "epoch": 0.5001045077684108, "grad_norm": 1.0475566387176514, "learning_rate": 5.6030433381654305e-06, "loss": 0.1006174087524414, "step": 3589 }, { "epoch": 0.5002438514596251, "grad_norm": 1.1417888402938843, "learning_rate": 5.600703691787833e-06, "loss": 0.1058502197265625, "step": 3590 }, { "epoch": 0.5003831951508395, "grad_norm": 1.545357346534729, "learning_rate": 5.598363911947591e-06, "loss": 0.13747787475585938, "step": 3591 }, { "epoch": 0.5005225388420539, "grad_norm": 1.3954524993896484, "learning_rate": 5.596023999164547e-06, "loss": 0.09139823913574219, "step": 3592 }, { "epoch": 0.5006618825332683, "grad_norm": 0.49157410860061646, "learning_rate": 5.593683953958579e-06, "loss": 0.08749771118164062, "step": 3593 }, { "epoch": 0.5008012262244826, "grad_norm": 0.6305290460586548, "learning_rate": 5.591343776849591e-06, "loss": 0.09576416015625, "step": 3594 }, { "epoch": 0.500940569915697, "grad_norm": 0.5698922276496887, "learning_rate": 5.5890034683575145e-06, "loss": 0.08950996398925781, "step": 3595 }, { "epoch": 0.5010799136069114, "grad_norm": 1.3090637922286987, "learning_rate": 5.586663029002314e-06, "loss": 0.0911264419555664, "step": 3596 }, { "epoch": 0.5012192572981258, "grad_norm": 0.641037106513977, "learning_rate": 5.584322459303984e-06, "loss": 0.08056640625, "step": 3597 }, { "epoch": 0.5013586009893402, "grad_norm": 0.3429011106491089, "learning_rate": 5.581981759782543e-06, "loss": 0.056522369384765625, "step": 3598 }, { "epoch": 0.5014979446805546, "grad_norm": 1.0345219373703003, "learning_rate": 5.579640930958043e-06, "loss": 0.0838470458984375, "step": 3599 }, { "epoch": 0.501637288371769, "grad_norm": 1.0451244115829468, "learning_rate": 5.57729997335056e-06, "loss": 0.07712554931640625, "step": 3600 }, { "epoch": 0.5017766320629834, "grad_norm": 1.1906483173370361, "learning_rate": 5.5749588874802055e-06, "loss": 0.15821456909179688, "step": 3601 }, { "epoch": 0.5019159757541978, "grad_norm": 0.8517275452613831, "learning_rate": 5.572617673867111e-06, "loss": 0.08781814575195312, "step": 3602 }, { "epoch": 0.5020553194454122, "grad_norm": 0.9478939771652222, "learning_rate": 5.570276333031441e-06, "loss": 0.07505416870117188, "step": 3603 }, { "epoch": 0.5021946631366265, "grad_norm": 0.6063891649246216, "learning_rate": 5.567934865493392e-06, "loss": 0.08834266662597656, "step": 3604 }, { "epoch": 0.5023340068278409, "grad_norm": 1.1007312536239624, "learning_rate": 5.5655932717731805e-06, "loss": 0.10810661315917969, "step": 3605 }, { "epoch": 0.5024733505190553, "grad_norm": 0.8803755044937134, "learning_rate": 5.563251552391058e-06, "loss": 0.10617828369140625, "step": 3606 }, { "epoch": 0.5026126942102697, "grad_norm": 1.3158982992172241, "learning_rate": 5.560909707867299e-06, "loss": 0.10796165466308594, "step": 3607 }, { "epoch": 0.502752037901484, "grad_norm": 0.7782338261604309, "learning_rate": 5.558567738722208e-06, "loss": 0.09563064575195312, "step": 3608 }, { "epoch": 0.5028913815926984, "grad_norm": 0.6194863319396973, "learning_rate": 5.556225645476119e-06, "loss": 0.083160400390625, "step": 3609 }, { "epoch": 0.5030307252839128, "grad_norm": 1.1999038457870483, "learning_rate": 5.55388342864939e-06, "loss": 0.119964599609375, "step": 3610 }, { "epoch": 0.5031700689751272, "grad_norm": 0.5968078970909119, "learning_rate": 5.5515410887624085e-06, "loss": 0.08013153076171875, "step": 3611 }, { "epoch": 0.5033094126663415, "grad_norm": 0.9817954897880554, "learning_rate": 5.549198626335589e-06, "loss": 0.09661102294921875, "step": 3612 }, { "epoch": 0.5034487563575559, "grad_norm": 0.9098002910614014, "learning_rate": 5.546856041889374e-06, "loss": 0.11218643188476562, "step": 3613 }, { "epoch": 0.5035881000487703, "grad_norm": 0.7115742564201355, "learning_rate": 5.544513335944228e-06, "loss": 0.11153221130371094, "step": 3614 }, { "epoch": 0.5037274437399847, "grad_norm": 1.7466331720352173, "learning_rate": 5.542170509020655e-06, "loss": 0.1100921630859375, "step": 3615 }, { "epoch": 0.5038667874311991, "grad_norm": 1.055674433708191, "learning_rate": 5.539827561639169e-06, "loss": 0.09088516235351562, "step": 3616 }, { "epoch": 0.5040061311224134, "grad_norm": 0.686211347579956, "learning_rate": 5.537484494320324e-06, "loss": 0.09955978393554688, "step": 3617 }, { "epoch": 0.5041454748136278, "grad_norm": 0.4688786566257477, "learning_rate": 5.535141307584697e-06, "loss": 0.07604718208312988, "step": 3618 }, { "epoch": 0.5042848185048422, "grad_norm": 1.505163550376892, "learning_rate": 5.532798001952888e-06, "loss": 0.08087348937988281, "step": 3619 }, { "epoch": 0.5044241621960566, "grad_norm": 1.3191776275634766, "learning_rate": 5.530454577945529e-06, "loss": 0.08801841735839844, "step": 3620 }, { "epoch": 0.504563505887271, "grad_norm": 0.6872311234474182, "learning_rate": 5.52811103608327e-06, "loss": 0.0907135009765625, "step": 3621 }, { "epoch": 0.5047028495784853, "grad_norm": 0.43132883310317993, "learning_rate": 5.525767376886797e-06, "loss": 0.08791160583496094, "step": 3622 }, { "epoch": 0.5048421932696997, "grad_norm": 1.22054123878479, "learning_rate": 5.523423600876816e-06, "loss": 0.13653945922851562, "step": 3623 }, { "epoch": 0.5049815369609141, "grad_norm": 0.9072512984275818, "learning_rate": 5.521079708574062e-06, "loss": 0.08420372009277344, "step": 3624 }, { "epoch": 0.5051208806521285, "grad_norm": 1.155147671699524, "learning_rate": 5.5187357004992926e-06, "loss": 0.08759880065917969, "step": 3625 }, { "epoch": 0.5052602243433428, "grad_norm": 0.6881645917892456, "learning_rate": 5.516391577173293e-06, "loss": 0.08678817749023438, "step": 3626 }, { "epoch": 0.5053995680345572, "grad_norm": 0.516182005405426, "learning_rate": 5.514047339116874e-06, "loss": 0.07890510559082031, "step": 3627 }, { "epoch": 0.5055389117257716, "grad_norm": 0.7843391299247742, "learning_rate": 5.511702986850873e-06, "loss": 0.07591819763183594, "step": 3628 }, { "epoch": 0.505678255416986, "grad_norm": 1.3529839515686035, "learning_rate": 5.509358520896151e-06, "loss": 0.12393951416015625, "step": 3629 }, { "epoch": 0.5058175991082003, "grad_norm": 0.7539736032485962, "learning_rate": 5.507013941773593e-06, "loss": 0.08289146423339844, "step": 3630 }, { "epoch": 0.5059569427994147, "grad_norm": 1.0871257781982422, "learning_rate": 5.504669250004116e-06, "loss": 0.09198760986328125, "step": 3631 }, { "epoch": 0.5060962864906291, "grad_norm": 1.1839364767074585, "learning_rate": 5.502324446108649e-06, "loss": 0.10416412353515625, "step": 3632 }, { "epoch": 0.5062356301818435, "grad_norm": 1.3986942768096924, "learning_rate": 5.49997953060816e-06, "loss": 0.08853530883789062, "step": 3633 }, { "epoch": 0.5063749738730579, "grad_norm": 0.59627765417099, "learning_rate": 5.497634504023634e-06, "loss": 0.07536888122558594, "step": 3634 }, { "epoch": 0.5065143175642722, "grad_norm": 1.892973780632019, "learning_rate": 5.495289366876083e-06, "loss": 0.11208534240722656, "step": 3635 }, { "epoch": 0.5066536612554866, "grad_norm": 1.2369087934494019, "learning_rate": 5.492944119686544e-06, "loss": 0.12450981140136719, "step": 3636 }, { "epoch": 0.506793004946701, "grad_norm": 0.7073702216148376, "learning_rate": 5.4905987629760724e-06, "loss": 0.09191513061523438, "step": 3637 }, { "epoch": 0.5069323486379154, "grad_norm": 0.5835978984832764, "learning_rate": 5.488253297265757e-06, "loss": 0.07133293151855469, "step": 3638 }, { "epoch": 0.5070716923291299, "grad_norm": 0.8138412237167358, "learning_rate": 5.485907723076708e-06, "loss": 0.07561492919921875, "step": 3639 }, { "epoch": 0.5072110360203442, "grad_norm": 1.1627079248428345, "learning_rate": 5.483562040930055e-06, "loss": 0.10153007507324219, "step": 3640 }, { "epoch": 0.5073503797115586, "grad_norm": 0.845715343952179, "learning_rate": 5.481216251346956e-06, "loss": 0.08170700073242188, "step": 3641 }, { "epoch": 0.507489723402773, "grad_norm": 1.7387747764587402, "learning_rate": 5.478870354848593e-06, "loss": 0.13547420501708984, "step": 3642 }, { "epoch": 0.5076290670939874, "grad_norm": 1.3852691650390625, "learning_rate": 5.47652435195617e-06, "loss": 0.11530113220214844, "step": 3643 }, { "epoch": 0.5077684107852017, "grad_norm": 0.6940891146659851, "learning_rate": 5.4741782431909144e-06, "loss": 0.08092117309570312, "step": 3644 }, { "epoch": 0.5079077544764161, "grad_norm": 1.1733726263046265, "learning_rate": 5.471832029074079e-06, "loss": 0.1132659912109375, "step": 3645 }, { "epoch": 0.5080470981676305, "grad_norm": 0.8612898588180542, "learning_rate": 5.469485710126938e-06, "loss": 0.07826805114746094, "step": 3646 }, { "epoch": 0.5081864418588449, "grad_norm": 0.8103766441345215, "learning_rate": 5.467139286870794e-06, "loss": 0.10349845886230469, "step": 3647 }, { "epoch": 0.5083257855500593, "grad_norm": 1.0943511724472046, "learning_rate": 5.464792759826962e-06, "loss": 0.1348114013671875, "step": 3648 }, { "epoch": 0.5084651292412736, "grad_norm": 1.5913159847259521, "learning_rate": 5.462446129516793e-06, "loss": 0.12448501586914062, "step": 3649 }, { "epoch": 0.508604472932488, "grad_norm": 0.38456547260284424, "learning_rate": 5.460099396461649e-06, "loss": 0.06681251525878906, "step": 3650 }, { "epoch": 0.5087438166237024, "grad_norm": 1.1579136848449707, "learning_rate": 5.457752561182924e-06, "loss": 0.10672569274902344, "step": 3651 }, { "epoch": 0.5088831603149168, "grad_norm": 0.34042608737945557, "learning_rate": 5.455405624202032e-06, "loss": 0.0661773681640625, "step": 3652 }, { "epoch": 0.5090225040061311, "grad_norm": 1.0985894203186035, "learning_rate": 5.453058586040406e-06, "loss": 0.11318016052246094, "step": 3653 }, { "epoch": 0.5091618476973455, "grad_norm": 0.7163338661193848, "learning_rate": 5.450711447219507e-06, "loss": 0.09663200378417969, "step": 3654 }, { "epoch": 0.5093011913885599, "grad_norm": 0.8512552976608276, "learning_rate": 5.448364208260813e-06, "loss": 0.07034683227539062, "step": 3655 }, { "epoch": 0.5094405350797743, "grad_norm": 0.45578983426094055, "learning_rate": 5.446016869685829e-06, "loss": 0.06485748291015625, "step": 3656 }, { "epoch": 0.5095798787709886, "grad_norm": 0.9466754794120789, "learning_rate": 5.44366943201608e-06, "loss": 0.08558273315429688, "step": 3657 }, { "epoch": 0.509719222462203, "grad_norm": 0.5666577816009521, "learning_rate": 5.441321895773112e-06, "loss": 0.09176063537597656, "step": 3658 }, { "epoch": 0.5098585661534174, "grad_norm": 0.5132593512535095, "learning_rate": 5.438974261478494e-06, "loss": 0.08272552490234375, "step": 3659 }, { "epoch": 0.5099979098446318, "grad_norm": 0.792766809463501, "learning_rate": 5.436626529653817e-06, "loss": 0.08005237579345703, "step": 3660 }, { "epoch": 0.5101372535358462, "grad_norm": 1.2908154726028442, "learning_rate": 5.434278700820693e-06, "loss": 0.10441017150878906, "step": 3661 }, { "epoch": 0.5102765972270605, "grad_norm": 0.9050673842430115, "learning_rate": 5.431930775500756e-06, "loss": 0.1056976318359375, "step": 3662 }, { "epoch": 0.5104159409182749, "grad_norm": 0.8638351559638977, "learning_rate": 5.429582754215664e-06, "loss": 0.09189605712890625, "step": 3663 }, { "epoch": 0.5105552846094893, "grad_norm": 1.5793465375900269, "learning_rate": 5.4272346374870885e-06, "loss": 0.1310272216796875, "step": 3664 }, { "epoch": 0.5106946283007037, "grad_norm": 0.5450943112373352, "learning_rate": 5.424886425836734e-06, "loss": 0.0864105224609375, "step": 3665 }, { "epoch": 0.510833971991918, "grad_norm": 0.6850482225418091, "learning_rate": 5.4225381197863135e-06, "loss": 0.0791778564453125, "step": 3666 }, { "epoch": 0.5109733156831324, "grad_norm": 0.8402933478355408, "learning_rate": 5.420189719857571e-06, "loss": 0.08842658996582031, "step": 3667 }, { "epoch": 0.5111126593743468, "grad_norm": 0.5377743244171143, "learning_rate": 5.417841226572263e-06, "loss": 0.08717536926269531, "step": 3668 }, { "epoch": 0.5112520030655612, "grad_norm": 0.6069949865341187, "learning_rate": 5.415492640452177e-06, "loss": 0.07907485961914062, "step": 3669 }, { "epoch": 0.5113913467567756, "grad_norm": 0.4402421712875366, "learning_rate": 5.4131439620191115e-06, "loss": 0.06109046936035156, "step": 3670 }, { "epoch": 0.5115306904479899, "grad_norm": 2.173609972000122, "learning_rate": 5.4107951917948896e-06, "loss": 0.15056419372558594, "step": 3671 }, { "epoch": 0.5116700341392043, "grad_norm": 1.0276691913604736, "learning_rate": 5.408446330301355e-06, "loss": 0.0946807861328125, "step": 3672 }, { "epoch": 0.5118093778304187, "grad_norm": 0.4353816509246826, "learning_rate": 5.40609737806037e-06, "loss": 0.07221508026123047, "step": 3673 }, { "epoch": 0.5119487215216331, "grad_norm": 1.0845836400985718, "learning_rate": 5.403748335593819e-06, "loss": 0.09103870391845703, "step": 3674 }, { "epoch": 0.5120880652128474, "grad_norm": 0.9429849982261658, "learning_rate": 5.4013992034236065e-06, "loss": 0.07464027404785156, "step": 3675 }, { "epoch": 0.5122274089040618, "grad_norm": 0.838378369808197, "learning_rate": 5.3990499820716545e-06, "loss": 0.08720207214355469, "step": 3676 }, { "epoch": 0.5123667525952762, "grad_norm": 1.4066243171691895, "learning_rate": 5.396700672059907e-06, "loss": 0.13070201873779297, "step": 3677 }, { "epoch": 0.5125060962864906, "grad_norm": 0.659531831741333, "learning_rate": 5.394351273910327e-06, "loss": 0.08163261413574219, "step": 3678 }, { "epoch": 0.5126454399777051, "grad_norm": 1.3265666961669922, "learning_rate": 5.392001788144897e-06, "loss": 0.1159515380859375, "step": 3679 }, { "epoch": 0.5127847836689194, "grad_norm": 0.7147417068481445, "learning_rate": 5.389652215285618e-06, "loss": 0.10282325744628906, "step": 3680 }, { "epoch": 0.5129241273601338, "grad_norm": 1.010270118713379, "learning_rate": 5.387302555854516e-06, "loss": 0.09992218017578125, "step": 3681 }, { "epoch": 0.5130634710513482, "grad_norm": 0.9492425322532654, "learning_rate": 5.384952810373625e-06, "loss": 0.08658027648925781, "step": 3682 }, { "epoch": 0.5132028147425626, "grad_norm": 1.7539615631103516, "learning_rate": 5.382602979365009e-06, "loss": 0.08267593383789062, "step": 3683 }, { "epoch": 0.513342158433777, "grad_norm": 0.9900569319725037, "learning_rate": 5.380253063350747e-06, "loss": 0.08271026611328125, "step": 3684 }, { "epoch": 0.5134815021249913, "grad_norm": 0.8390770554542542, "learning_rate": 5.377903062852935e-06, "loss": 0.08411407470703125, "step": 3685 }, { "epoch": 0.5136208458162057, "grad_norm": 0.8664339184761047, "learning_rate": 5.375552978393691e-06, "loss": 0.10126686096191406, "step": 3686 }, { "epoch": 0.5137601895074201, "grad_norm": 0.9098791480064392, "learning_rate": 5.373202810495149e-06, "loss": 0.11723899841308594, "step": 3687 }, { "epoch": 0.5138995331986345, "grad_norm": 0.46171295642852783, "learning_rate": 5.370852559679461e-06, "loss": 0.06531906127929688, "step": 3688 }, { "epoch": 0.5140388768898488, "grad_norm": 0.8055291771888733, "learning_rate": 5.368502226468803e-06, "loss": 0.08572006225585938, "step": 3689 }, { "epoch": 0.5141782205810632, "grad_norm": 1.2825936079025269, "learning_rate": 5.366151811385363e-06, "loss": 0.1002655029296875, "step": 3690 }, { "epoch": 0.5143175642722776, "grad_norm": 0.909519374370575, "learning_rate": 5.363801314951349e-06, "loss": 0.11194419860839844, "step": 3691 }, { "epoch": 0.514456907963492, "grad_norm": 0.9293542504310608, "learning_rate": 5.361450737688989e-06, "loss": 0.09233283996582031, "step": 3692 }, { "epoch": 0.5145962516547063, "grad_norm": 0.9096304178237915, "learning_rate": 5.359100080120527e-06, "loss": 0.07300186157226562, "step": 3693 }, { "epoch": 0.5147355953459207, "grad_norm": 0.9639062881469727, "learning_rate": 5.356749342768226e-06, "loss": 0.11249160766601562, "step": 3694 }, { "epoch": 0.5148749390371351, "grad_norm": 0.7009774446487427, "learning_rate": 5.354398526154365e-06, "loss": 0.0829935073852539, "step": 3695 }, { "epoch": 0.5150142827283495, "grad_norm": 1.272674560546875, "learning_rate": 5.352047630801242e-06, "loss": 0.09863662719726562, "step": 3696 }, { "epoch": 0.5151536264195639, "grad_norm": 0.9533659219741821, "learning_rate": 5.349696657231176e-06, "loss": 0.10113906860351562, "step": 3697 }, { "epoch": 0.5152929701107782, "grad_norm": 1.4625164270401, "learning_rate": 5.347345605966493e-06, "loss": 0.1349163055419922, "step": 3698 }, { "epoch": 0.5154323138019926, "grad_norm": 0.3523915410041809, "learning_rate": 5.344994477529548e-06, "loss": 0.07020759582519531, "step": 3699 }, { "epoch": 0.515571657493207, "grad_norm": 1.1151126623153687, "learning_rate": 5.342643272442706e-06, "loss": 0.11315155029296875, "step": 3700 }, { "epoch": 0.5157110011844214, "grad_norm": 1.2659283876419067, "learning_rate": 5.340291991228352e-06, "loss": 0.11920928955078125, "step": 3701 }, { "epoch": 0.5158503448756357, "grad_norm": 0.2623414993286133, "learning_rate": 5.337940634408888e-06, "loss": 0.059444427490234375, "step": 3702 }, { "epoch": 0.5159896885668501, "grad_norm": 0.485500305891037, "learning_rate": 5.335589202506727e-06, "loss": 0.08522224426269531, "step": 3703 }, { "epoch": 0.5161290322580645, "grad_norm": 2.288973569869995, "learning_rate": 5.333237696044309e-06, "loss": 0.11989784240722656, "step": 3704 }, { "epoch": 0.5162683759492789, "grad_norm": 0.9977279305458069, "learning_rate": 5.330886115544081e-06, "loss": 0.09276485443115234, "step": 3705 }, { "epoch": 0.5164077196404933, "grad_norm": 0.8240256309509277, "learning_rate": 5.328534461528515e-06, "loss": 0.07817840576171875, "step": 3706 }, { "epoch": 0.5165470633317076, "grad_norm": 1.0821620225906372, "learning_rate": 5.326182734520091e-06, "loss": 0.11505126953125, "step": 3707 }, { "epoch": 0.516686407022922, "grad_norm": 1.6913576126098633, "learning_rate": 5.32383093504131e-06, "loss": 0.1375579833984375, "step": 3708 }, { "epoch": 0.5168257507141364, "grad_norm": 2.416957139968872, "learning_rate": 5.32147906361469e-06, "loss": 0.11888694763183594, "step": 3709 }, { "epoch": 0.5169650944053508, "grad_norm": 1.3361891508102417, "learning_rate": 5.31912712076276e-06, "loss": 0.10690689086914062, "step": 3710 }, { "epoch": 0.5171044380965651, "grad_norm": 0.5818532109260559, "learning_rate": 5.316775107008069e-06, "loss": 0.08999252319335938, "step": 3711 }, { "epoch": 0.5172437817877795, "grad_norm": 0.9265190362930298, "learning_rate": 5.314423022873181e-06, "loss": 0.08698844909667969, "step": 3712 }, { "epoch": 0.5173831254789939, "grad_norm": 0.5691593885421753, "learning_rate": 5.312070868880678e-06, "loss": 0.08345746994018555, "step": 3713 }, { "epoch": 0.5175224691702083, "grad_norm": 0.8467488288879395, "learning_rate": 5.3097186455531506e-06, "loss": 0.0994424819946289, "step": 3714 }, { "epoch": 0.5176618128614227, "grad_norm": 0.649223804473877, "learning_rate": 5.307366353413214e-06, "loss": 0.09058380126953125, "step": 3715 }, { "epoch": 0.517801156552637, "grad_norm": 1.4483000040054321, "learning_rate": 5.305013992983487e-06, "loss": 0.10166740417480469, "step": 3716 }, { "epoch": 0.5179405002438514, "grad_norm": 0.6718856692314148, "learning_rate": 5.302661564786617e-06, "loss": 0.08855056762695312, "step": 3717 }, { "epoch": 0.5180798439350658, "grad_norm": 0.5318115949630737, "learning_rate": 5.300309069345257e-06, "loss": 0.07718467712402344, "step": 3718 }, { "epoch": 0.5182191876262803, "grad_norm": 0.5435909032821655, "learning_rate": 5.297956507182077e-06, "loss": 0.07844066619873047, "step": 3719 }, { "epoch": 0.5183585313174947, "grad_norm": 1.0273622274398804, "learning_rate": 5.295603878819764e-06, "loss": 0.1223592758178711, "step": 3720 }, { "epoch": 0.518497875008709, "grad_norm": 0.706482470035553, "learning_rate": 5.2932511847810175e-06, "loss": 0.08892250061035156, "step": 3721 }, { "epoch": 0.5186372186999234, "grad_norm": 0.701344907283783, "learning_rate": 5.290898425588553e-06, "loss": 0.08290863037109375, "step": 3722 }, { "epoch": 0.5187765623911378, "grad_norm": 1.2769943475723267, "learning_rate": 5.2885456017651e-06, "loss": 0.10389328002929688, "step": 3723 }, { "epoch": 0.5189159060823522, "grad_norm": 1.0966628789901733, "learning_rate": 5.286192713833402e-06, "loss": 0.13339614868164062, "step": 3724 }, { "epoch": 0.5190552497735665, "grad_norm": 0.4998598098754883, "learning_rate": 5.283839762316217e-06, "loss": 0.08022689819335938, "step": 3725 }, { "epoch": 0.5191945934647809, "grad_norm": 0.9317511916160583, "learning_rate": 5.281486747736316e-06, "loss": 0.09180831909179688, "step": 3726 }, { "epoch": 0.5193339371559953, "grad_norm": 1.0814921855926514, "learning_rate": 5.279133670616488e-06, "loss": 0.11910057067871094, "step": 3727 }, { "epoch": 0.5194732808472097, "grad_norm": 1.1160595417022705, "learning_rate": 5.276780531479528e-06, "loss": 0.10182476043701172, "step": 3728 }, { "epoch": 0.519612624538424, "grad_norm": 0.8821375966072083, "learning_rate": 5.274427330848257e-06, "loss": 0.11697006225585938, "step": 3729 }, { "epoch": 0.5197519682296384, "grad_norm": 0.8413132429122925, "learning_rate": 5.2720740692454944e-06, "loss": 0.08835983276367188, "step": 3730 }, { "epoch": 0.5198913119208528, "grad_norm": 1.0471138954162598, "learning_rate": 5.269720747194088e-06, "loss": 0.12019157409667969, "step": 3731 }, { "epoch": 0.5200306556120672, "grad_norm": 1.3706748485565186, "learning_rate": 5.267367365216887e-06, "loss": 0.14229965209960938, "step": 3732 }, { "epoch": 0.5201699993032816, "grad_norm": 0.6344633102416992, "learning_rate": 5.265013923836763e-06, "loss": 0.09418487548828125, "step": 3733 }, { "epoch": 0.5203093429944959, "grad_norm": 0.5930453538894653, "learning_rate": 5.262660423576595e-06, "loss": 0.0917510986328125, "step": 3734 }, { "epoch": 0.5204486866857103, "grad_norm": 0.7439722418785095, "learning_rate": 5.260306864959278e-06, "loss": 0.07514190673828125, "step": 3735 }, { "epoch": 0.5205880303769247, "grad_norm": 1.2007710933685303, "learning_rate": 5.25795324850772e-06, "loss": 0.08959770202636719, "step": 3736 }, { "epoch": 0.5207273740681391, "grad_norm": 1.020255446434021, "learning_rate": 5.255599574744836e-06, "loss": 0.11610984802246094, "step": 3737 }, { "epoch": 0.5208667177593534, "grad_norm": 1.5269001722335815, "learning_rate": 5.253245844193564e-06, "loss": 0.11373710632324219, "step": 3738 }, { "epoch": 0.5210060614505678, "grad_norm": 0.7694551348686218, "learning_rate": 5.250892057376848e-06, "loss": 0.06650543212890625, "step": 3739 }, { "epoch": 0.5211454051417822, "grad_norm": 0.8944742679595947, "learning_rate": 5.248538214817642e-06, "loss": 0.09611320495605469, "step": 3740 }, { "epoch": 0.5212847488329966, "grad_norm": 0.6349077224731445, "learning_rate": 5.246184317038922e-06, "loss": 0.09211444854736328, "step": 3741 }, { "epoch": 0.521424092524211, "grad_norm": 0.8299018740653992, "learning_rate": 5.243830364563665e-06, "loss": 0.09340286254882812, "step": 3742 }, { "epoch": 0.5215634362154253, "grad_norm": 0.9082014560699463, "learning_rate": 5.241476357914869e-06, "loss": 0.08259963989257812, "step": 3743 }, { "epoch": 0.5217027799066397, "grad_norm": 0.6689212918281555, "learning_rate": 5.239122297615539e-06, "loss": 0.08295822143554688, "step": 3744 }, { "epoch": 0.5218421235978541, "grad_norm": 1.4428414106369019, "learning_rate": 5.236768184188693e-06, "loss": 0.14798259735107422, "step": 3745 }, { "epoch": 0.5219814672890685, "grad_norm": 1.2361469268798828, "learning_rate": 5.234414018157361e-06, "loss": 0.09642791748046875, "step": 3746 }, { "epoch": 0.5221208109802828, "grad_norm": 0.9637683629989624, "learning_rate": 5.232059800044589e-06, "loss": 0.08991432189941406, "step": 3747 }, { "epoch": 0.5222601546714972, "grad_norm": 1.4139606952667236, "learning_rate": 5.229705530373424e-06, "loss": 0.1258678436279297, "step": 3748 }, { "epoch": 0.5223994983627116, "grad_norm": 0.593676745891571, "learning_rate": 5.2273512096669364e-06, "loss": 0.08368492126464844, "step": 3749 }, { "epoch": 0.522538842053926, "grad_norm": 0.9895542860031128, "learning_rate": 5.2249968384482e-06, "loss": 0.09264183044433594, "step": 3750 }, { "epoch": 0.5226781857451404, "grad_norm": 0.8938314914703369, "learning_rate": 5.222642417240305e-06, "loss": 0.10034847259521484, "step": 3751 }, { "epoch": 0.5228175294363547, "grad_norm": 1.0196161270141602, "learning_rate": 5.220287946566347e-06, "loss": 0.09807014465332031, "step": 3752 }, { "epoch": 0.5229568731275691, "grad_norm": 0.4567791223526001, "learning_rate": 5.2179334269494345e-06, "loss": 0.0687255859375, "step": 3753 }, { "epoch": 0.5230962168187835, "grad_norm": 1.8589189052581787, "learning_rate": 5.215578858912691e-06, "loss": 0.10651397705078125, "step": 3754 }, { "epoch": 0.5232355605099979, "grad_norm": 0.5750988125801086, "learning_rate": 5.213224242979247e-06, "loss": 0.08782005310058594, "step": 3755 }, { "epoch": 0.5233749042012122, "grad_norm": 0.7203028798103333, "learning_rate": 5.2108695796722446e-06, "loss": 0.0919952392578125, "step": 3756 }, { "epoch": 0.5235142478924266, "grad_norm": 0.889830470085144, "learning_rate": 5.208514869514835e-06, "loss": 0.08275413513183594, "step": 3757 }, { "epoch": 0.523653591583641, "grad_norm": 1.7553999423980713, "learning_rate": 5.206160113030182e-06, "loss": 0.10333919525146484, "step": 3758 }, { "epoch": 0.5237929352748554, "grad_norm": 1.2498829364776611, "learning_rate": 5.203805310741459e-06, "loss": 0.0958108901977539, "step": 3759 }, { "epoch": 0.5239322789660699, "grad_norm": 0.5922291278839111, "learning_rate": 5.201450463171849e-06, "loss": 0.09794998168945312, "step": 3760 }, { "epoch": 0.5240716226572842, "grad_norm": 0.5941901803016663, "learning_rate": 5.199095570844546e-06, "loss": 0.08293724060058594, "step": 3761 }, { "epoch": 0.5242109663484986, "grad_norm": 1.0817201137542725, "learning_rate": 5.19674063428275e-06, "loss": 0.0883026123046875, "step": 3762 }, { "epoch": 0.524350310039713, "grad_norm": 0.7354008555412292, "learning_rate": 5.1943856540096795e-06, "loss": 0.08466911315917969, "step": 3763 }, { "epoch": 0.5244896537309274, "grad_norm": 1.1586624383926392, "learning_rate": 5.192030630548552e-06, "loss": 0.1351461410522461, "step": 3764 }, { "epoch": 0.5246289974221418, "grad_norm": 0.588100790977478, "learning_rate": 5.1896755644226046e-06, "loss": 0.07403945922851562, "step": 3765 }, { "epoch": 0.5247683411133561, "grad_norm": 0.5232214331626892, "learning_rate": 5.1873204561550764e-06, "loss": 0.07175064086914062, "step": 3766 }, { "epoch": 0.5249076848045705, "grad_norm": 0.6018058657646179, "learning_rate": 5.18496530626922e-06, "loss": 0.08378982543945312, "step": 3767 }, { "epoch": 0.5250470284957849, "grad_norm": 0.5853307247161865, "learning_rate": 5.182610115288296e-06, "loss": 0.09538650512695312, "step": 3768 }, { "epoch": 0.5251863721869993, "grad_norm": 1.4870833158493042, "learning_rate": 5.180254883735571e-06, "loss": 0.09820365905761719, "step": 3769 }, { "epoch": 0.5253257158782136, "grad_norm": 0.9588757157325745, "learning_rate": 5.1778996121343274e-06, "loss": 0.07880401611328125, "step": 3770 }, { "epoch": 0.525465059569428, "grad_norm": 1.2070233821868896, "learning_rate": 5.175544301007852e-06, "loss": 0.11617374420166016, "step": 3771 }, { "epoch": 0.5256044032606424, "grad_norm": 1.168869137763977, "learning_rate": 5.173188950879441e-06, "loss": 0.12087821960449219, "step": 3772 }, { "epoch": 0.5257437469518568, "grad_norm": 0.5730278491973877, "learning_rate": 5.170833562272398e-06, "loss": 0.06961250305175781, "step": 3773 }, { "epoch": 0.5258830906430711, "grad_norm": 1.1751081943511963, "learning_rate": 5.168478135710038e-06, "loss": 0.112945556640625, "step": 3774 }, { "epoch": 0.5260224343342855, "grad_norm": 1.803816795349121, "learning_rate": 5.166122671715683e-06, "loss": 0.13003158569335938, "step": 3775 }, { "epoch": 0.5261617780254999, "grad_norm": 1.130250334739685, "learning_rate": 5.163767170812663e-06, "loss": 0.09749555587768555, "step": 3776 }, { "epoch": 0.5263011217167143, "grad_norm": 1.4643725156784058, "learning_rate": 5.1614116335243155e-06, "loss": 0.10565567016601562, "step": 3777 }, { "epoch": 0.5264404654079287, "grad_norm": 1.8274779319763184, "learning_rate": 5.1590560603739885e-06, "loss": 0.0930023193359375, "step": 3778 }, { "epoch": 0.526579809099143, "grad_norm": 2.247077703475952, "learning_rate": 5.156700451885037e-06, "loss": 0.10180854797363281, "step": 3779 }, { "epoch": 0.5267191527903574, "grad_norm": 0.9895345568656921, "learning_rate": 5.154344808580821e-06, "loss": 0.08075141906738281, "step": 3780 }, { "epoch": 0.5268584964815718, "grad_norm": 1.1540637016296387, "learning_rate": 5.151989130984715e-06, "loss": 0.13906288146972656, "step": 3781 }, { "epoch": 0.5269978401727862, "grad_norm": 1.2063180208206177, "learning_rate": 5.149633419620092e-06, "loss": 0.1057281494140625, "step": 3782 }, { "epoch": 0.5271371838640005, "grad_norm": 0.774202823638916, "learning_rate": 5.147277675010339e-06, "loss": 0.08045005798339844, "step": 3783 }, { "epoch": 0.5272765275552149, "grad_norm": 0.5756198763847351, "learning_rate": 5.144921897678851e-06, "loss": 0.076568603515625, "step": 3784 }, { "epoch": 0.5274158712464293, "grad_norm": 1.4954463243484497, "learning_rate": 5.142566088149024e-06, "loss": 0.1273651123046875, "step": 3785 }, { "epoch": 0.5275552149376437, "grad_norm": 1.023632526397705, "learning_rate": 5.1402102469442686e-06, "loss": 0.09293365478515625, "step": 3786 }, { "epoch": 0.5276945586288581, "grad_norm": 1.4302136898040771, "learning_rate": 5.137854374587996e-06, "loss": 0.09376144409179688, "step": 3787 }, { "epoch": 0.5278339023200724, "grad_norm": 0.6582548022270203, "learning_rate": 5.135498471603629e-06, "loss": 0.07761955261230469, "step": 3788 }, { "epoch": 0.5279732460112868, "grad_norm": 1.3383920192718506, "learning_rate": 5.133142538514596e-06, "loss": 0.10949325561523438, "step": 3789 }, { "epoch": 0.5281125897025012, "grad_norm": 3.680168867111206, "learning_rate": 5.130786575844329e-06, "loss": 0.11912345886230469, "step": 3790 }, { "epoch": 0.5282519333937156, "grad_norm": 0.6962665915489197, "learning_rate": 5.128430584116273e-06, "loss": 0.07500267028808594, "step": 3791 }, { "epoch": 0.52839127708493, "grad_norm": 0.49362242221832275, "learning_rate": 5.126074563853872e-06, "loss": 0.07319259643554688, "step": 3792 }, { "epoch": 0.5285306207761443, "grad_norm": 0.9728458523750305, "learning_rate": 5.123718515580581e-06, "loss": 0.07659244537353516, "step": 3793 }, { "epoch": 0.5286699644673587, "grad_norm": 0.6508670449256897, "learning_rate": 5.1213624398198606e-06, "loss": 0.08276557922363281, "step": 3794 }, { "epoch": 0.5288093081585731, "grad_norm": 0.5787702798843384, "learning_rate": 5.119006337095178e-06, "loss": 0.0828399658203125, "step": 3795 }, { "epoch": 0.5289486518497875, "grad_norm": 1.1440526247024536, "learning_rate": 5.1166502079300015e-06, "loss": 0.13692283630371094, "step": 3796 }, { "epoch": 0.5290879955410018, "grad_norm": 1.2500840425491333, "learning_rate": 5.114294052847814e-06, "loss": 0.1394796371459961, "step": 3797 }, { "epoch": 0.5292273392322162, "grad_norm": 1.4950501918792725, "learning_rate": 5.111937872372097e-06, "loss": 0.13654136657714844, "step": 3798 }, { "epoch": 0.5293666829234306, "grad_norm": 0.8344799280166626, "learning_rate": 5.109581667026341e-06, "loss": 0.12459564208984375, "step": 3799 }, { "epoch": 0.5295060266146451, "grad_norm": 0.9788835644721985, "learning_rate": 5.107225437334039e-06, "loss": 0.12935829162597656, "step": 3800 }, { "epoch": 0.5296453703058595, "grad_norm": 1.049376368522644, "learning_rate": 5.1048691838186935e-06, "loss": 0.09449386596679688, "step": 3801 }, { "epoch": 0.5297847139970738, "grad_norm": 1.0025089979171753, "learning_rate": 5.102512907003812e-06, "loss": 0.10337066650390625, "step": 3802 }, { "epoch": 0.5299240576882882, "grad_norm": 0.6459885239601135, "learning_rate": 5.100156607412899e-06, "loss": 0.07900714874267578, "step": 3803 }, { "epoch": 0.5300634013795026, "grad_norm": 1.5202919244766235, "learning_rate": 5.097800285569476e-06, "loss": 0.10520553588867188, "step": 3804 }, { "epoch": 0.530202745070717, "grad_norm": 0.6716084480285645, "learning_rate": 5.095443941997062e-06, "loss": 0.07288932800292969, "step": 3805 }, { "epoch": 0.5303420887619313, "grad_norm": 0.6072712540626526, "learning_rate": 5.093087577219183e-06, "loss": 0.08303260803222656, "step": 3806 }, { "epoch": 0.5304814324531457, "grad_norm": 0.5432005524635315, "learning_rate": 5.090731191759371e-06, "loss": 0.06983470916748047, "step": 3807 }, { "epoch": 0.5306207761443601, "grad_norm": 0.5876508355140686, "learning_rate": 5.088374786141159e-06, "loss": 0.07031440734863281, "step": 3808 }, { "epoch": 0.5307601198355745, "grad_norm": 0.7155005931854248, "learning_rate": 5.086018360888087e-06, "loss": 0.08210563659667969, "step": 3809 }, { "epoch": 0.5308994635267889, "grad_norm": 1.2757561206817627, "learning_rate": 5.083661916523699e-06, "loss": 0.11329078674316406, "step": 3810 }, { "epoch": 0.5310388072180032, "grad_norm": 0.8449915647506714, "learning_rate": 5.081305453571543e-06, "loss": 0.10230827331542969, "step": 3811 }, { "epoch": 0.5311781509092176, "grad_norm": 0.8584681749343872, "learning_rate": 5.07894897255517e-06, "loss": 0.09139442443847656, "step": 3812 }, { "epoch": 0.531317494600432, "grad_norm": 0.7072046995162964, "learning_rate": 5.076592473998141e-06, "loss": 0.0773468017578125, "step": 3813 }, { "epoch": 0.5314568382916464, "grad_norm": 0.5815380811691284, "learning_rate": 5.07423595842401e-06, "loss": 0.07800102233886719, "step": 3814 }, { "epoch": 0.5315961819828607, "grad_norm": 0.43813085556030273, "learning_rate": 5.071879426356345e-06, "loss": 0.07392024993896484, "step": 3815 }, { "epoch": 0.5317355256740751, "grad_norm": 1.101898431777954, "learning_rate": 5.069522878318712e-06, "loss": 0.11473464965820312, "step": 3816 }, { "epoch": 0.5318748693652895, "grad_norm": 1.3244706392288208, "learning_rate": 5.067166314834684e-06, "loss": 0.11911964416503906, "step": 3817 }, { "epoch": 0.5320142130565039, "grad_norm": 1.0700496435165405, "learning_rate": 5.064809736427835e-06, "loss": 0.15758514404296875, "step": 3818 }, { "epoch": 0.5321535567477182, "grad_norm": 1.4579048156738281, "learning_rate": 5.062453143621739e-06, "loss": 0.115692138671875, "step": 3819 }, { "epoch": 0.5322929004389326, "grad_norm": 1.2769805192947388, "learning_rate": 5.060096536939982e-06, "loss": 0.09146690368652344, "step": 3820 }, { "epoch": 0.532432244130147, "grad_norm": 0.9813410043716431, "learning_rate": 5.057739916906147e-06, "loss": 0.09407997131347656, "step": 3821 }, { "epoch": 0.5325715878213614, "grad_norm": 0.8725226521492004, "learning_rate": 5.05538328404382e-06, "loss": 0.09971332550048828, "step": 3822 }, { "epoch": 0.5327109315125758, "grad_norm": 0.944149911403656, "learning_rate": 5.053026638876591e-06, "loss": 0.09847259521484375, "step": 3823 }, { "epoch": 0.5328502752037901, "grad_norm": 1.854398488998413, "learning_rate": 5.050669981928056e-06, "loss": 0.1423473358154297, "step": 3824 }, { "epoch": 0.5329896188950045, "grad_norm": 1.4281641244888306, "learning_rate": 5.048313313721806e-06, "loss": 0.09438705444335938, "step": 3825 }, { "epoch": 0.5331289625862189, "grad_norm": 0.8489946126937866, "learning_rate": 5.04595663478144e-06, "loss": 0.13805770874023438, "step": 3826 }, { "epoch": 0.5332683062774333, "grad_norm": 0.9051153659820557, "learning_rate": 5.0435999456305605e-06, "loss": 0.08784675598144531, "step": 3827 }, { "epoch": 0.5334076499686476, "grad_norm": 1.2740554809570312, "learning_rate": 5.0412432467927674e-06, "loss": 0.0911407470703125, "step": 3828 }, { "epoch": 0.533546993659862, "grad_norm": 0.7680608034133911, "learning_rate": 5.038886538791668e-06, "loss": 0.07306861877441406, "step": 3829 }, { "epoch": 0.5336863373510764, "grad_norm": 1.2592626810073853, "learning_rate": 5.036529822150865e-06, "loss": 0.09555435180664062, "step": 3830 }, { "epoch": 0.5338256810422908, "grad_norm": 0.4822078347206116, "learning_rate": 5.034173097393973e-06, "loss": 0.06100654602050781, "step": 3831 }, { "epoch": 0.5339650247335052, "grad_norm": 0.8478269577026367, "learning_rate": 5.031816365044595e-06, "loss": 0.08293628692626953, "step": 3832 }, { "epoch": 0.5341043684247195, "grad_norm": 0.6113080382347107, "learning_rate": 5.02945962562635e-06, "loss": 0.08519554138183594, "step": 3833 }, { "epoch": 0.5342437121159339, "grad_norm": 0.6494558453559875, "learning_rate": 5.027102879662847e-06, "loss": 0.07369041442871094, "step": 3834 }, { "epoch": 0.5343830558071483, "grad_norm": 1.1725163459777832, "learning_rate": 5.024746127677703e-06, "loss": 0.12369346618652344, "step": 3835 }, { "epoch": 0.5345223994983627, "grad_norm": 1.1380088329315186, "learning_rate": 5.022389370194536e-06, "loss": 0.12130355834960938, "step": 3836 }, { "epoch": 0.534661743189577, "grad_norm": 0.7473833560943604, "learning_rate": 5.020032607736961e-06, "loss": 0.07379817962646484, "step": 3837 }, { "epoch": 0.5348010868807914, "grad_norm": 0.9040931463241577, "learning_rate": 5.017675840828597e-06, "loss": 0.07839250564575195, "step": 3838 }, { "epoch": 0.5349404305720058, "grad_norm": 1.3010969161987305, "learning_rate": 5.015319069993066e-06, "loss": 0.12400436401367188, "step": 3839 }, { "epoch": 0.5350797742632203, "grad_norm": 0.5224570035934448, "learning_rate": 5.012962295753988e-06, "loss": 0.07738304138183594, "step": 3840 }, { "epoch": 0.5352191179544347, "grad_norm": 0.7854145169258118, "learning_rate": 5.010605518634982e-06, "loss": 0.07733726501464844, "step": 3841 }, { "epoch": 0.535358461645649, "grad_norm": 0.9536536931991577, "learning_rate": 5.008248739159674e-06, "loss": 0.14093780517578125, "step": 3842 }, { "epoch": 0.5354978053368634, "grad_norm": 1.796741247177124, "learning_rate": 5.005891957851683e-06, "loss": 0.106109619140625, "step": 3843 }, { "epoch": 0.5356371490280778, "grad_norm": 0.6619245409965515, "learning_rate": 5.003535175234633e-06, "loss": 0.08109092712402344, "step": 3844 }, { "epoch": 0.5357764927192922, "grad_norm": 1.326302170753479, "learning_rate": 5.001178391832149e-06, "loss": 0.0808868408203125, "step": 3845 }, { "epoch": 0.5359158364105066, "grad_norm": 1.0865286588668823, "learning_rate": 4.998821608167853e-06, "loss": 0.08757972717285156, "step": 3846 }, { "epoch": 0.5360551801017209, "grad_norm": 0.9529523253440857, "learning_rate": 4.996464824765369e-06, "loss": 0.09069442749023438, "step": 3847 }, { "epoch": 0.5361945237929353, "grad_norm": 2.4410688877105713, "learning_rate": 4.994108042148318e-06, "loss": 0.15006637573242188, "step": 3848 }, { "epoch": 0.5363338674841497, "grad_norm": 1.0628684759140015, "learning_rate": 4.991751260840328e-06, "loss": 0.07408905029296875, "step": 3849 }, { "epoch": 0.5364732111753641, "grad_norm": 0.710394024848938, "learning_rate": 4.9893944813650185e-06, "loss": 0.0913386344909668, "step": 3850 }, { "epoch": 0.5366125548665784, "grad_norm": 0.4008599519729614, "learning_rate": 4.987037704246015e-06, "loss": 0.07183265686035156, "step": 3851 }, { "epoch": 0.5367518985577928, "grad_norm": 0.5014761090278625, "learning_rate": 4.984680930006936e-06, "loss": 0.07602691650390625, "step": 3852 }, { "epoch": 0.5368912422490072, "grad_norm": 0.9745994806289673, "learning_rate": 4.982324159171404e-06, "loss": 0.07879638671875, "step": 3853 }, { "epoch": 0.5370305859402216, "grad_norm": 1.0599299669265747, "learning_rate": 4.979967392263041e-06, "loss": 0.09540748596191406, "step": 3854 }, { "epoch": 0.537169929631436, "grad_norm": 0.6598411798477173, "learning_rate": 4.977610629805465e-06, "loss": 0.0833892822265625, "step": 3855 }, { "epoch": 0.5373092733226503, "grad_norm": 1.1550815105438232, "learning_rate": 4.975253872322297e-06, "loss": 0.1288776397705078, "step": 3856 }, { "epoch": 0.5374486170138647, "grad_norm": 0.9737605452537537, "learning_rate": 4.972897120337155e-06, "loss": 0.1143646240234375, "step": 3857 }, { "epoch": 0.5375879607050791, "grad_norm": 1.176160454750061, "learning_rate": 4.970540374373653e-06, "loss": 0.1077728271484375, "step": 3858 }, { "epoch": 0.5377273043962935, "grad_norm": 1.0125771760940552, "learning_rate": 4.9681836349554064e-06, "loss": 0.13007259368896484, "step": 3859 }, { "epoch": 0.5378666480875078, "grad_norm": 0.40501657128334045, "learning_rate": 4.965826902606029e-06, "loss": 0.07094383239746094, "step": 3860 }, { "epoch": 0.5380059917787222, "grad_norm": 0.8513891100883484, "learning_rate": 4.963470177849135e-06, "loss": 0.09546470642089844, "step": 3861 }, { "epoch": 0.5381453354699366, "grad_norm": 0.7918998599052429, "learning_rate": 4.961113461208335e-06, "loss": 0.1043405532836914, "step": 3862 }, { "epoch": 0.538284679161151, "grad_norm": 1.0367395877838135, "learning_rate": 4.958756753207234e-06, "loss": 0.11187934875488281, "step": 3863 }, { "epoch": 0.5384240228523653, "grad_norm": 0.4031146466732025, "learning_rate": 4.956400054369441e-06, "loss": 0.07689094543457031, "step": 3864 }, { "epoch": 0.5385633665435797, "grad_norm": 0.3819757103919983, "learning_rate": 4.954043365218561e-06, "loss": 0.05839824676513672, "step": 3865 }, { "epoch": 0.5387027102347941, "grad_norm": 1.5016565322875977, "learning_rate": 4.951686686278195e-06, "loss": 0.13130855560302734, "step": 3866 }, { "epoch": 0.5388420539260085, "grad_norm": 1.123144268989563, "learning_rate": 4.949330018071947e-06, "loss": 0.10271644592285156, "step": 3867 }, { "epoch": 0.5389813976172229, "grad_norm": 0.4021300971508026, "learning_rate": 4.946973361123411e-06, "loss": 0.08158111572265625, "step": 3868 }, { "epoch": 0.5391207413084372, "grad_norm": 0.6505223512649536, "learning_rate": 4.9446167159561814e-06, "loss": 0.09450531005859375, "step": 3869 }, { "epoch": 0.5392600849996516, "grad_norm": 0.6083394885063171, "learning_rate": 4.942260083093854e-06, "loss": 0.09990310668945312, "step": 3870 }, { "epoch": 0.539399428690866, "grad_norm": 0.8686556816101074, "learning_rate": 4.939903463060018e-06, "loss": 0.09461212158203125, "step": 3871 }, { "epoch": 0.5395387723820804, "grad_norm": 0.50344318151474, "learning_rate": 4.937546856378263e-06, "loss": 0.08338117599487305, "step": 3872 }, { "epoch": 0.5396781160732947, "grad_norm": 0.793565571308136, "learning_rate": 4.935190263572168e-06, "loss": 0.07829475402832031, "step": 3873 }, { "epoch": 0.5398174597645091, "grad_norm": 0.9331004023551941, "learning_rate": 4.932833685165318e-06, "loss": 0.11272811889648438, "step": 3874 }, { "epoch": 0.5399568034557235, "grad_norm": 0.49384117126464844, "learning_rate": 4.930477121681289e-06, "loss": 0.07718849182128906, "step": 3875 }, { "epoch": 0.5400961471469379, "grad_norm": 0.5037315487861633, "learning_rate": 4.9281205736436555e-06, "loss": 0.0631103515625, "step": 3876 }, { "epoch": 0.5402354908381523, "grad_norm": 1.2385382652282715, "learning_rate": 4.925764041575991e-06, "loss": 0.11349678039550781, "step": 3877 }, { "epoch": 0.5403748345293666, "grad_norm": 0.7585515975952148, "learning_rate": 4.9234075260018615e-06, "loss": 0.09375190734863281, "step": 3878 }, { "epoch": 0.540514178220581, "grad_norm": 1.3752906322479248, "learning_rate": 4.921051027444831e-06, "loss": 0.11844635009765625, "step": 3879 }, { "epoch": 0.5406535219117955, "grad_norm": 0.7373446226119995, "learning_rate": 4.918694546428458e-06, "loss": 0.1078939437866211, "step": 3880 }, { "epoch": 0.5407928656030099, "grad_norm": 0.5030845999717712, "learning_rate": 4.916338083476303e-06, "loss": 0.08263683319091797, "step": 3881 }, { "epoch": 0.5409322092942243, "grad_norm": 0.46891552209854126, "learning_rate": 4.913981639111914e-06, "loss": 0.08107185363769531, "step": 3882 }, { "epoch": 0.5410715529854386, "grad_norm": 1.2999154329299927, "learning_rate": 4.9116252138588435e-06, "loss": 0.12234878540039062, "step": 3883 }, { "epoch": 0.541210896676653, "grad_norm": 1.2215067148208618, "learning_rate": 4.90926880824063e-06, "loss": 0.13666534423828125, "step": 3884 }, { "epoch": 0.5413502403678674, "grad_norm": 1.287726640701294, "learning_rate": 4.906912422780818e-06, "loss": 0.09940910339355469, "step": 3885 }, { "epoch": 0.5414895840590818, "grad_norm": 1.587903380393982, "learning_rate": 4.904556058002939e-06, "loss": 0.1265087127685547, "step": 3886 }, { "epoch": 0.5416289277502961, "grad_norm": 1.0867836475372314, "learning_rate": 4.902199714430525e-06, "loss": 0.08161163330078125, "step": 3887 }, { "epoch": 0.5417682714415105, "grad_norm": 1.4264039993286133, "learning_rate": 4.899843392587104e-06, "loss": 0.087646484375, "step": 3888 }, { "epoch": 0.5419076151327249, "grad_norm": 0.5731040239334106, "learning_rate": 4.8974870929961915e-06, "loss": 0.0888519287109375, "step": 3889 }, { "epoch": 0.5420469588239393, "grad_norm": 1.0887939929962158, "learning_rate": 4.895130816181307e-06, "loss": 0.10416793823242188, "step": 3890 }, { "epoch": 0.5421863025151537, "grad_norm": 1.6758478879928589, "learning_rate": 4.8927745626659625e-06, "loss": 0.09218406677246094, "step": 3891 }, { "epoch": 0.542325646206368, "grad_norm": 1.696738839149475, "learning_rate": 4.89041833297366e-06, "loss": 0.08920860290527344, "step": 3892 }, { "epoch": 0.5424649898975824, "grad_norm": 1.6189439296722412, "learning_rate": 4.888062127627904e-06, "loss": 0.11066818237304688, "step": 3893 }, { "epoch": 0.5426043335887968, "grad_norm": 1.816677451133728, "learning_rate": 4.885705947152187e-06, "loss": 0.11171150207519531, "step": 3894 }, { "epoch": 0.5427436772800112, "grad_norm": 0.8767626285552979, "learning_rate": 4.883349792069999e-06, "loss": 0.09045600891113281, "step": 3895 }, { "epoch": 0.5428830209712255, "grad_norm": 0.43212655186653137, "learning_rate": 4.880993662904824e-06, "loss": 0.07401180267333984, "step": 3896 }, { "epoch": 0.5430223646624399, "grad_norm": 0.7267751693725586, "learning_rate": 4.87863756018014e-06, "loss": 0.110992431640625, "step": 3897 }, { "epoch": 0.5431617083536543, "grad_norm": 5.1416401863098145, "learning_rate": 4.87628148441942e-06, "loss": 0.11362361907958984, "step": 3898 }, { "epoch": 0.5433010520448687, "grad_norm": 0.8617910146713257, "learning_rate": 4.8739254361461305e-06, "loss": 0.07433891296386719, "step": 3899 }, { "epoch": 0.543440395736083, "grad_norm": 1.1076325178146362, "learning_rate": 4.871569415883729e-06, "loss": 0.12603187561035156, "step": 3900 }, { "epoch": 0.5435797394272974, "grad_norm": 1.9246214628219604, "learning_rate": 4.869213424155671e-06, "loss": 0.09070014953613281, "step": 3901 }, { "epoch": 0.5437190831185118, "grad_norm": 1.8565568923950195, "learning_rate": 4.8668574614854055e-06, "loss": 0.08547401428222656, "step": 3902 }, { "epoch": 0.5438584268097262, "grad_norm": 1.40500009059906, "learning_rate": 4.864501528396371e-06, "loss": 0.0829620361328125, "step": 3903 }, { "epoch": 0.5439977705009406, "grad_norm": 0.6393983364105225, "learning_rate": 4.862145625412006e-06, "loss": 0.06205940246582031, "step": 3904 }, { "epoch": 0.5441371141921549, "grad_norm": 0.7701408863067627, "learning_rate": 4.859789753055734e-06, "loss": 0.08113861083984375, "step": 3905 }, { "epoch": 0.5442764578833693, "grad_norm": 0.850904643535614, "learning_rate": 4.857433911850977e-06, "loss": 0.10338020324707031, "step": 3906 }, { "epoch": 0.5444158015745837, "grad_norm": 1.2242194414138794, "learning_rate": 4.8550781023211516e-06, "loss": 0.14708518981933594, "step": 3907 }, { "epoch": 0.5445551452657981, "grad_norm": 1.4442397356033325, "learning_rate": 4.852722324989661e-06, "loss": 0.12061500549316406, "step": 3908 }, { "epoch": 0.5446944889570124, "grad_norm": 1.676162600517273, "learning_rate": 4.85036658037991e-06, "loss": 0.10350799560546875, "step": 3909 }, { "epoch": 0.5448338326482268, "grad_norm": 1.3301702737808228, "learning_rate": 4.848010869015288e-06, "loss": 0.09857559204101562, "step": 3910 }, { "epoch": 0.5449731763394412, "grad_norm": 0.7188757658004761, "learning_rate": 4.84565519141918e-06, "loss": 0.09069061279296875, "step": 3911 }, { "epoch": 0.5451125200306556, "grad_norm": 1.2385696172714233, "learning_rate": 4.843299548114964e-06, "loss": 0.10400772094726562, "step": 3912 }, { "epoch": 0.54525186372187, "grad_norm": 1.3860836029052734, "learning_rate": 4.840943939626012e-06, "loss": 0.09528350830078125, "step": 3913 }, { "epoch": 0.5453912074130843, "grad_norm": 1.6176588535308838, "learning_rate": 4.838588366475685e-06, "loss": 0.10754776000976562, "step": 3914 }, { "epoch": 0.5455305511042987, "grad_norm": 0.9214066863059998, "learning_rate": 4.83623282918734e-06, "loss": 0.07308769226074219, "step": 3915 }, { "epoch": 0.5456698947955131, "grad_norm": 0.7445971369743347, "learning_rate": 4.833877328284319e-06, "loss": 0.10336875915527344, "step": 3916 }, { "epoch": 0.5458092384867275, "grad_norm": 1.1982626914978027, "learning_rate": 4.831521864289964e-06, "loss": 0.11036872863769531, "step": 3917 }, { "epoch": 0.5459485821779418, "grad_norm": 0.9138396382331848, "learning_rate": 4.829166437727603e-06, "loss": 0.10763740539550781, "step": 3918 }, { "epoch": 0.5460879258691562, "grad_norm": 1.2926313877105713, "learning_rate": 4.82681104912056e-06, "loss": 0.12316131591796875, "step": 3919 }, { "epoch": 0.5462272695603707, "grad_norm": 1.1790165901184082, "learning_rate": 4.82445569899215e-06, "loss": 0.10935401916503906, "step": 3920 }, { "epoch": 0.5463666132515851, "grad_norm": 0.804954469203949, "learning_rate": 4.822100387865673e-06, "loss": 0.0832366943359375, "step": 3921 }, { "epoch": 0.5465059569427995, "grad_norm": 0.759903073310852, "learning_rate": 4.8197451162644305e-06, "loss": 0.09711551666259766, "step": 3922 }, { "epoch": 0.5466453006340138, "grad_norm": 0.652821958065033, "learning_rate": 4.817389884711706e-06, "loss": 0.07841300964355469, "step": 3923 }, { "epoch": 0.5467846443252282, "grad_norm": 1.3374172449111938, "learning_rate": 4.815034693730781e-06, "loss": 0.12430191040039062, "step": 3924 }, { "epoch": 0.5469239880164426, "grad_norm": 2.2288084030151367, "learning_rate": 4.812679543844924e-06, "loss": 0.11001396179199219, "step": 3925 }, { "epoch": 0.547063331707657, "grad_norm": 1.071874976158142, "learning_rate": 4.810324435577397e-06, "loss": 0.09099960327148438, "step": 3926 }, { "epoch": 0.5472026753988714, "grad_norm": 0.6687663197517395, "learning_rate": 4.807969369451449e-06, "loss": 0.07838821411132812, "step": 3927 }, { "epoch": 0.5473420190900857, "grad_norm": 1.5980814695358276, "learning_rate": 4.805614345990322e-06, "loss": 0.14415740966796875, "step": 3928 }, { "epoch": 0.5474813627813001, "grad_norm": 0.7667539119720459, "learning_rate": 4.803259365717251e-06, "loss": 0.09831809997558594, "step": 3929 }, { "epoch": 0.5476207064725145, "grad_norm": 0.36514216661453247, "learning_rate": 4.800904429155458e-06, "loss": 0.057338714599609375, "step": 3930 }, { "epoch": 0.5477600501637289, "grad_norm": 0.4783473312854767, "learning_rate": 4.7985495368281534e-06, "loss": 0.07420825958251953, "step": 3931 }, { "epoch": 0.5478993938549432, "grad_norm": 2.4284896850585938, "learning_rate": 4.796194689258542e-06, "loss": 0.1150970458984375, "step": 3932 }, { "epoch": 0.5480387375461576, "grad_norm": 0.9006457328796387, "learning_rate": 4.793839886969819e-06, "loss": 0.08595657348632812, "step": 3933 }, { "epoch": 0.548178081237372, "grad_norm": 0.8196830749511719, "learning_rate": 4.791485130485167e-06, "loss": 0.111297607421875, "step": 3934 }, { "epoch": 0.5483174249285864, "grad_norm": 1.0951379537582397, "learning_rate": 4.789130420327756e-06, "loss": 0.08621597290039062, "step": 3935 }, { "epoch": 0.5484567686198007, "grad_norm": 0.9271626472473145, "learning_rate": 4.786775757020755e-06, "loss": 0.09168052673339844, "step": 3936 }, { "epoch": 0.5485961123110151, "grad_norm": 0.99973464012146, "learning_rate": 4.784421141087311e-06, "loss": 0.11714553833007812, "step": 3937 }, { "epoch": 0.5487354560022295, "grad_norm": 0.5987095236778259, "learning_rate": 4.782066573050567e-06, "loss": 0.07578372955322266, "step": 3938 }, { "epoch": 0.5488747996934439, "grad_norm": 0.5922589898109436, "learning_rate": 4.779712053433655e-06, "loss": 0.07125091552734375, "step": 3939 }, { "epoch": 0.5490141433846583, "grad_norm": 0.7198578119277954, "learning_rate": 4.777357582759696e-06, "loss": 0.07889556884765625, "step": 3940 }, { "epoch": 0.5491534870758726, "grad_norm": 0.6862565279006958, "learning_rate": 4.7750031615518e-06, "loss": 0.08768081665039062, "step": 3941 }, { "epoch": 0.549292830767087, "grad_norm": 0.6433971524238586, "learning_rate": 4.772648790333065e-06, "loss": 0.08928298950195312, "step": 3942 }, { "epoch": 0.5494321744583014, "grad_norm": 0.42083215713500977, "learning_rate": 4.7702944696265766e-06, "loss": 0.07530879974365234, "step": 3943 }, { "epoch": 0.5495715181495158, "grad_norm": 0.9523960947990417, "learning_rate": 4.767940199955413e-06, "loss": 0.12494850158691406, "step": 3944 }, { "epoch": 0.5497108618407301, "grad_norm": 0.6906194090843201, "learning_rate": 4.765585981842639e-06, "loss": 0.07679557800292969, "step": 3945 }, { "epoch": 0.5498502055319445, "grad_norm": 0.44413670897483826, "learning_rate": 4.76323181581131e-06, "loss": 0.059539794921875, "step": 3946 }, { "epoch": 0.5499895492231589, "grad_norm": 0.5719175338745117, "learning_rate": 4.760877702384464e-06, "loss": 0.06993865966796875, "step": 3947 }, { "epoch": 0.5501288929143733, "grad_norm": 1.0484542846679688, "learning_rate": 4.758523642085133e-06, "loss": 0.09053230285644531, "step": 3948 }, { "epoch": 0.5502682366055877, "grad_norm": 1.3044748306274414, "learning_rate": 4.756169635436336e-06, "loss": 0.09696197509765625, "step": 3949 }, { "epoch": 0.550407580296802, "grad_norm": 0.39521074295043945, "learning_rate": 4.75381568296108e-06, "loss": 0.07103824615478516, "step": 3950 }, { "epoch": 0.5505469239880164, "grad_norm": 1.1264933347702026, "learning_rate": 4.751461785182358e-06, "loss": 0.11066150665283203, "step": 3951 }, { "epoch": 0.5506862676792308, "grad_norm": 1.3874733448028564, "learning_rate": 4.7491079426231556e-06, "loss": 0.10976028442382812, "step": 3952 }, { "epoch": 0.5508256113704452, "grad_norm": 0.8233670592308044, "learning_rate": 4.746754155806437e-06, "loss": 0.09841728210449219, "step": 3953 }, { "epoch": 0.5509649550616595, "grad_norm": 1.6272374391555786, "learning_rate": 4.744400425255165e-06, "loss": 0.12302589416503906, "step": 3954 }, { "epoch": 0.5511042987528739, "grad_norm": 0.6651246547698975, "learning_rate": 4.7420467514922815e-06, "loss": 0.06821441650390625, "step": 3955 }, { "epoch": 0.5512436424440883, "grad_norm": 0.9412693977355957, "learning_rate": 4.739693135040722e-06, "loss": 0.08115768432617188, "step": 3956 }, { "epoch": 0.5513829861353027, "grad_norm": 0.5512532591819763, "learning_rate": 4.737339576423406e-06, "loss": 0.059814453125, "step": 3957 }, { "epoch": 0.551522329826517, "grad_norm": 1.4521161317825317, "learning_rate": 4.734986076163238e-06, "loss": 0.13341331481933594, "step": 3958 }, { "epoch": 0.5516616735177314, "grad_norm": 0.95435631275177, "learning_rate": 4.732632634783114e-06, "loss": 0.091827392578125, "step": 3959 }, { "epoch": 0.5518010172089458, "grad_norm": 1.1086671352386475, "learning_rate": 4.730279252805914e-06, "loss": 0.1148080825805664, "step": 3960 }, { "epoch": 0.5519403609001603, "grad_norm": 1.5817941427230835, "learning_rate": 4.727925930754506e-06, "loss": 0.10220909118652344, "step": 3961 }, { "epoch": 0.5520797045913747, "grad_norm": 1.0455102920532227, "learning_rate": 4.725572669151747e-06, "loss": 0.09704208374023438, "step": 3962 }, { "epoch": 0.552219048282589, "grad_norm": 0.963230311870575, "learning_rate": 4.723219468520474e-06, "loss": 0.11974525451660156, "step": 3963 }, { "epoch": 0.5523583919738034, "grad_norm": 0.7927589416503906, "learning_rate": 4.720866329383514e-06, "loss": 0.08677101135253906, "step": 3964 }, { "epoch": 0.5524977356650178, "grad_norm": 0.8194003105163574, "learning_rate": 4.718513252263685e-06, "loss": 0.0792083740234375, "step": 3965 }, { "epoch": 0.5526370793562322, "grad_norm": 0.5878531336784363, "learning_rate": 4.716160237683785e-06, "loss": 0.08880996704101562, "step": 3966 }, { "epoch": 0.5527764230474466, "grad_norm": 0.5379693508148193, "learning_rate": 4.7138072861666e-06, "loss": 0.0723733901977539, "step": 3967 }, { "epoch": 0.5529157667386609, "grad_norm": 0.8436336517333984, "learning_rate": 4.711454398234902e-06, "loss": 0.09375953674316406, "step": 3968 }, { "epoch": 0.5530551104298753, "grad_norm": 0.7935565114021301, "learning_rate": 4.7091015744114475e-06, "loss": 0.09076309204101562, "step": 3969 }, { "epoch": 0.5531944541210897, "grad_norm": 0.6454147696495056, "learning_rate": 4.706748815218984e-06, "loss": 0.08078575134277344, "step": 3970 }, { "epoch": 0.5533337978123041, "grad_norm": 0.9911715984344482, "learning_rate": 4.704396121180237e-06, "loss": 0.10928916931152344, "step": 3971 }, { "epoch": 0.5534731415035185, "grad_norm": 0.7938311100006104, "learning_rate": 4.702043492817924e-06, "loss": 0.08135604858398438, "step": 3972 }, { "epoch": 0.5536124851947328, "grad_norm": 0.6329994797706604, "learning_rate": 4.6996909306547455e-06, "loss": 0.06920337677001953, "step": 3973 }, { "epoch": 0.5537518288859472, "grad_norm": 0.7561540603637695, "learning_rate": 4.697338435213385e-06, "loss": 0.07792091369628906, "step": 3974 }, { "epoch": 0.5538911725771616, "grad_norm": 1.1133332252502441, "learning_rate": 4.694986007016514e-06, "loss": 0.10336589813232422, "step": 3975 }, { "epoch": 0.554030516268376, "grad_norm": 1.3078261613845825, "learning_rate": 4.692633646586788e-06, "loss": 0.11868095397949219, "step": 3976 }, { "epoch": 0.5541698599595903, "grad_norm": 1.1276692152023315, "learning_rate": 4.690281354446849e-06, "loss": 0.09826087951660156, "step": 3977 }, { "epoch": 0.5543092036508047, "grad_norm": 1.3911023139953613, "learning_rate": 4.6879291311193244e-06, "loss": 0.14046096801757812, "step": 3978 }, { "epoch": 0.5544485473420191, "grad_norm": 1.3225958347320557, "learning_rate": 4.68557697712682e-06, "loss": 0.13048934936523438, "step": 3979 }, { "epoch": 0.5545878910332335, "grad_norm": 0.8270086050033569, "learning_rate": 4.683224892991932e-06, "loss": 0.07521629333496094, "step": 3980 }, { "epoch": 0.5547272347244478, "grad_norm": 0.8182215690612793, "learning_rate": 4.680872879237242e-06, "loss": 0.0995931625366211, "step": 3981 }, { "epoch": 0.5548665784156622, "grad_norm": 1.6553343534469604, "learning_rate": 4.678520936385313e-06, "loss": 0.10699081420898438, "step": 3982 }, { "epoch": 0.5550059221068766, "grad_norm": 0.5195104479789734, "learning_rate": 4.676169064958692e-06, "loss": 0.06927013397216797, "step": 3983 }, { "epoch": 0.555145265798091, "grad_norm": 0.5736615061759949, "learning_rate": 4.6738172654799105e-06, "loss": 0.0656118392944336, "step": 3984 }, { "epoch": 0.5552846094893054, "grad_norm": 0.7341468334197998, "learning_rate": 4.671465538471487e-06, "loss": 0.09099388122558594, "step": 3985 }, { "epoch": 0.5554239531805197, "grad_norm": 0.7049618363380432, "learning_rate": 4.66911388445592e-06, "loss": 0.06735610961914062, "step": 3986 }, { "epoch": 0.5555632968717341, "grad_norm": 0.4582832157611847, "learning_rate": 4.666762303955692e-06, "loss": 0.06194496154785156, "step": 3987 }, { "epoch": 0.5557026405629485, "grad_norm": 0.6106932163238525, "learning_rate": 4.664410797493275e-06, "loss": 0.08267974853515625, "step": 3988 }, { "epoch": 0.5558419842541629, "grad_norm": 0.8029181957244873, "learning_rate": 4.662059365591115e-06, "loss": 0.12790298461914062, "step": 3989 }, { "epoch": 0.5559813279453772, "grad_norm": 1.3322442770004272, "learning_rate": 4.6597080087716494e-06, "loss": 0.11147880554199219, "step": 3990 }, { "epoch": 0.5561206716365916, "grad_norm": 0.6900964379310608, "learning_rate": 4.657356727557295e-06, "loss": 0.08238983154296875, "step": 3991 }, { "epoch": 0.556260015327806, "grad_norm": 0.9439769387245178, "learning_rate": 4.655005522470453e-06, "loss": 0.09686660766601562, "step": 3992 }, { "epoch": 0.5563993590190204, "grad_norm": 0.7156833410263062, "learning_rate": 4.652654394033508e-06, "loss": 0.06919479370117188, "step": 3993 }, { "epoch": 0.5565387027102348, "grad_norm": 0.4876377582550049, "learning_rate": 4.650303342768827e-06, "loss": 0.08342361450195312, "step": 3994 }, { "epoch": 0.5566780464014491, "grad_norm": 0.7849727272987366, "learning_rate": 4.6479523691987585e-06, "loss": 0.08956146240234375, "step": 3995 }, { "epoch": 0.5568173900926635, "grad_norm": 1.3304879665374756, "learning_rate": 4.645601473845636e-06, "loss": 0.10634231567382812, "step": 3996 }, { "epoch": 0.5569567337838779, "grad_norm": 0.9872373342514038, "learning_rate": 4.6432506572317754e-06, "loss": 0.10681533813476562, "step": 3997 }, { "epoch": 0.5570960774750923, "grad_norm": 1.1692700386047363, "learning_rate": 4.6408999198794744e-06, "loss": 0.11354446411132812, "step": 3998 }, { "epoch": 0.5572354211663066, "grad_norm": 1.0047956705093384, "learning_rate": 4.6385492623110135e-06, "loss": 0.11027717590332031, "step": 3999 }, { "epoch": 0.557374764857521, "grad_norm": 0.5364369750022888, "learning_rate": 4.636198685048653e-06, "loss": 0.06665229797363281, "step": 4000 }, { "epoch": 0.5575141085487355, "grad_norm": 0.7514595985412598, "learning_rate": 4.633848188614639e-06, "loss": 0.10024642944335938, "step": 4001 }, { "epoch": 0.5576534522399499, "grad_norm": 0.47283825278282166, "learning_rate": 4.631497773531199e-06, "loss": 0.0785980224609375, "step": 4002 }, { "epoch": 0.5577927959311643, "grad_norm": 1.0749353170394897, "learning_rate": 4.629147440320539e-06, "loss": 0.10866737365722656, "step": 4003 }, { "epoch": 0.5579321396223786, "grad_norm": 1.006678819656372, "learning_rate": 4.626797189504855e-06, "loss": 0.08440017700195312, "step": 4004 }, { "epoch": 0.558071483313593, "grad_norm": 0.5400375723838806, "learning_rate": 4.624447021606311e-06, "loss": 0.06698131561279297, "step": 4005 }, { "epoch": 0.5582108270048074, "grad_norm": 1.0383027791976929, "learning_rate": 4.6220969371470665e-06, "loss": 0.11185836791992188, "step": 4006 }, { "epoch": 0.5583501706960218, "grad_norm": 0.45250147581100464, "learning_rate": 4.619746936649254e-06, "loss": 0.06287145614624023, "step": 4007 }, { "epoch": 0.5584895143872362, "grad_norm": 1.3886622190475464, "learning_rate": 4.617397020634991e-06, "loss": 0.10769367218017578, "step": 4008 }, { "epoch": 0.5586288580784505, "grad_norm": 0.9592447280883789, "learning_rate": 4.615047189626376e-06, "loss": 0.08444404602050781, "step": 4009 }, { "epoch": 0.5587682017696649, "grad_norm": 0.7054246664047241, "learning_rate": 4.612697444145487e-06, "loss": 0.06471633911132812, "step": 4010 }, { "epoch": 0.5589075454608793, "grad_norm": 0.6908400058746338, "learning_rate": 4.610347784714383e-06, "loss": 0.07426881790161133, "step": 4011 }, { "epoch": 0.5590468891520937, "grad_norm": 1.5038301944732666, "learning_rate": 4.6079982118551045e-06, "loss": 0.09021186828613281, "step": 4012 }, { "epoch": 0.559186232843308, "grad_norm": 1.2486261129379272, "learning_rate": 4.605648726089674e-06, "loss": 0.11238479614257812, "step": 4013 }, { "epoch": 0.5593255765345224, "grad_norm": 0.7497074604034424, "learning_rate": 4.603299327940094e-06, "loss": 0.08785057067871094, "step": 4014 }, { "epoch": 0.5594649202257368, "grad_norm": 1.0306986570358276, "learning_rate": 4.600950017928348e-06, "loss": 0.09393310546875, "step": 4015 }, { "epoch": 0.5596042639169512, "grad_norm": 0.8329711556434631, "learning_rate": 4.598600796576395e-06, "loss": 0.08992767333984375, "step": 4016 }, { "epoch": 0.5597436076081655, "grad_norm": 1.1293299198150635, "learning_rate": 4.596251664406182e-06, "loss": 0.1007390022277832, "step": 4017 }, { "epoch": 0.5598829512993799, "grad_norm": 1.26115083694458, "learning_rate": 4.593902621939632e-06, "loss": 0.11092376708984375, "step": 4018 }, { "epoch": 0.5600222949905943, "grad_norm": 0.8166880011558533, "learning_rate": 4.591553669698646e-06, "loss": 0.10976028442382812, "step": 4019 }, { "epoch": 0.5601616386818087, "grad_norm": 0.8682858347892761, "learning_rate": 4.589204808205113e-06, "loss": 0.10557937622070312, "step": 4020 }, { "epoch": 0.5603009823730231, "grad_norm": 1.2353565692901611, "learning_rate": 4.58685603798089e-06, "loss": 0.1322174072265625, "step": 4021 }, { "epoch": 0.5604403260642374, "grad_norm": 0.44333502650260925, "learning_rate": 4.5845073595478245e-06, "loss": 0.06135368347167969, "step": 4022 }, { "epoch": 0.5605796697554518, "grad_norm": 1.1575841903686523, "learning_rate": 4.5821587734277374e-06, "loss": 0.09376335144042969, "step": 4023 }, { "epoch": 0.5607190134466662, "grad_norm": 0.6707767844200134, "learning_rate": 4.57981028014243e-06, "loss": 0.07666015625, "step": 4024 }, { "epoch": 0.5608583571378806, "grad_norm": 2.0318963527679443, "learning_rate": 4.577461880213688e-06, "loss": 0.11756229400634766, "step": 4025 }, { "epoch": 0.560997700829095, "grad_norm": 1.187767505645752, "learning_rate": 4.575113574163269e-06, "loss": 0.14821624755859375, "step": 4026 }, { "epoch": 0.5611370445203093, "grad_norm": 0.7079771757125854, "learning_rate": 4.572765362512912e-06, "loss": 0.09909820556640625, "step": 4027 }, { "epoch": 0.5612763882115237, "grad_norm": 1.1006884574890137, "learning_rate": 4.570417245784337e-06, "loss": 0.08649826049804688, "step": 4028 }, { "epoch": 0.5614157319027381, "grad_norm": 1.3207130432128906, "learning_rate": 4.568069224499244e-06, "loss": 0.10101509094238281, "step": 4029 }, { "epoch": 0.5615550755939525, "grad_norm": 0.5094632506370544, "learning_rate": 4.565721299179308e-06, "loss": 0.07327461242675781, "step": 4030 }, { "epoch": 0.5616944192851668, "grad_norm": 0.6809772849082947, "learning_rate": 4.563373470346186e-06, "loss": 0.07770729064941406, "step": 4031 }, { "epoch": 0.5618337629763812, "grad_norm": 1.7154532670974731, "learning_rate": 4.561025738521508e-06, "loss": 0.11434745788574219, "step": 4032 }, { "epoch": 0.5619731066675956, "grad_norm": 1.187243103981018, "learning_rate": 4.55867810422689e-06, "loss": 0.1290454864501953, "step": 4033 }, { "epoch": 0.56211245035881, "grad_norm": 1.036806344985962, "learning_rate": 4.5563305679839214e-06, "loss": 0.09939956665039062, "step": 4034 }, { "epoch": 0.5622517940500243, "grad_norm": 1.3077958822250366, "learning_rate": 4.553983130314171e-06, "loss": 0.12899303436279297, "step": 4035 }, { "epoch": 0.5623911377412387, "grad_norm": 0.8756757378578186, "learning_rate": 4.551635791739188e-06, "loss": 0.10151863098144531, "step": 4036 }, { "epoch": 0.5625304814324531, "grad_norm": 0.38382819294929504, "learning_rate": 4.549288552780494e-06, "loss": 0.06885671615600586, "step": 4037 }, { "epoch": 0.5626698251236675, "grad_norm": 3.4759421348571777, "learning_rate": 4.546941413959595e-06, "loss": 0.15139389038085938, "step": 4038 }, { "epoch": 0.5628091688148819, "grad_norm": 1.5694446563720703, "learning_rate": 4.544594375797969e-06, "loss": 0.11815261840820312, "step": 4039 }, { "epoch": 0.5629485125060962, "grad_norm": 0.741830587387085, "learning_rate": 4.542247438817076e-06, "loss": 0.08726882934570312, "step": 4040 }, { "epoch": 0.5630878561973107, "grad_norm": 0.8412365913391113, "learning_rate": 4.539900603538352e-06, "loss": 0.08181190490722656, "step": 4041 }, { "epoch": 0.5632271998885251, "grad_norm": 0.565952718257904, "learning_rate": 4.53755387048321e-06, "loss": 0.07324600219726562, "step": 4042 }, { "epoch": 0.5633665435797395, "grad_norm": 0.9794660806655884, "learning_rate": 4.53520724017304e-06, "loss": 0.09179925918579102, "step": 4043 }, { "epoch": 0.5635058872709539, "grad_norm": 0.4254066050052643, "learning_rate": 4.532860713129208e-06, "loss": 0.07629776000976562, "step": 4044 }, { "epoch": 0.5636452309621682, "grad_norm": 0.8614820837974548, "learning_rate": 4.530514289873062e-06, "loss": 0.06949043273925781, "step": 4045 }, { "epoch": 0.5637845746533826, "grad_norm": 0.6074066162109375, "learning_rate": 4.528167970925922e-06, "loss": 0.08225250244140625, "step": 4046 }, { "epoch": 0.563923918344597, "grad_norm": 0.8152393102645874, "learning_rate": 4.525821756809088e-06, "loss": 0.09567451477050781, "step": 4047 }, { "epoch": 0.5640632620358114, "grad_norm": 1.3356554508209229, "learning_rate": 4.523475648043832e-06, "loss": 0.11378097534179688, "step": 4048 }, { "epoch": 0.5642026057270257, "grad_norm": 0.42433515191078186, "learning_rate": 4.5211296451514085e-06, "loss": 0.05995941162109375, "step": 4049 }, { "epoch": 0.5643419494182401, "grad_norm": 1.012851357460022, "learning_rate": 4.518783748653045e-06, "loss": 0.10188484191894531, "step": 4050 }, { "epoch": 0.5644812931094545, "grad_norm": 0.6971046328544617, "learning_rate": 4.516437959069946e-06, "loss": 0.08754348754882812, "step": 4051 }, { "epoch": 0.5646206368006689, "grad_norm": 1.5606584548950195, "learning_rate": 4.514092276923295e-06, "loss": 0.10636043548583984, "step": 4052 }, { "epoch": 0.5647599804918833, "grad_norm": 1.1614187955856323, "learning_rate": 4.5117467027342435e-06, "loss": 0.13516616821289062, "step": 4053 }, { "epoch": 0.5648993241830976, "grad_norm": 1.159551739692688, "learning_rate": 4.509401237023928e-06, "loss": 0.11067962646484375, "step": 4054 }, { "epoch": 0.565038667874312, "grad_norm": 0.9110174179077148, "learning_rate": 4.507055880313458e-06, "loss": 0.09567642211914062, "step": 4055 }, { "epoch": 0.5651780115655264, "grad_norm": 0.8377240896224976, "learning_rate": 4.504710633123917e-06, "loss": 0.09461593627929688, "step": 4056 }, { "epoch": 0.5653173552567408, "grad_norm": 1.1295212507247925, "learning_rate": 4.502365495976367e-06, "loss": 0.11483573913574219, "step": 4057 }, { "epoch": 0.5654566989479551, "grad_norm": 0.700167715549469, "learning_rate": 4.5000204693918405e-06, "loss": 0.08259963989257812, "step": 4058 }, { "epoch": 0.5655960426391695, "grad_norm": 0.7542256712913513, "learning_rate": 4.497675553891352e-06, "loss": 0.09292411804199219, "step": 4059 }, { "epoch": 0.5657353863303839, "grad_norm": 1.2655270099639893, "learning_rate": 4.495330749995887e-06, "loss": 0.10224533081054688, "step": 4060 }, { "epoch": 0.5658747300215983, "grad_norm": 1.14313542842865, "learning_rate": 4.492986058226407e-06, "loss": 0.103515625, "step": 4061 }, { "epoch": 0.5660140737128126, "grad_norm": 1.0835856199264526, "learning_rate": 4.490641479103851e-06, "loss": 0.11336135864257812, "step": 4062 }, { "epoch": 0.566153417404027, "grad_norm": 1.087712287902832, "learning_rate": 4.4882970131491286e-06, "loss": 0.10636138916015625, "step": 4063 }, { "epoch": 0.5662927610952414, "grad_norm": 1.0209821462631226, "learning_rate": 4.485952660883126e-06, "loss": 0.1313762664794922, "step": 4064 }, { "epoch": 0.5664321047864558, "grad_norm": 0.7504811882972717, "learning_rate": 4.483608422826708e-06, "loss": 0.11034965515136719, "step": 4065 }, { "epoch": 0.5665714484776702, "grad_norm": 0.7816259860992432, "learning_rate": 4.481264299500709e-06, "loss": 0.11413955688476562, "step": 4066 }, { "epoch": 0.5667107921688845, "grad_norm": 1.2304643392562866, "learning_rate": 4.478920291425939e-06, "loss": 0.10959815979003906, "step": 4067 }, { "epoch": 0.5668501358600989, "grad_norm": 0.785270094871521, "learning_rate": 4.476576399123187e-06, "loss": 0.09001350402832031, "step": 4068 }, { "epoch": 0.5669894795513133, "grad_norm": 0.8945786356925964, "learning_rate": 4.474232623113204e-06, "loss": 0.08034896850585938, "step": 4069 }, { "epoch": 0.5671288232425277, "grad_norm": 1.835556983947754, "learning_rate": 4.471888963916732e-06, "loss": 0.10420417785644531, "step": 4070 }, { "epoch": 0.567268166933742, "grad_norm": 1.323665738105774, "learning_rate": 4.4695454220544735e-06, "loss": 0.11922073364257812, "step": 4071 }, { "epoch": 0.5674075106249564, "grad_norm": 0.667069137096405, "learning_rate": 4.467201998047112e-06, "loss": 0.06851577758789062, "step": 4072 }, { "epoch": 0.5675468543161708, "grad_norm": 1.1718798875808716, "learning_rate": 4.464858692415304e-06, "loss": 0.0981597900390625, "step": 4073 }, { "epoch": 0.5676861980073852, "grad_norm": 0.623988151550293, "learning_rate": 4.462515505679677e-06, "loss": 0.08834075927734375, "step": 4074 }, { "epoch": 0.5678255416985996, "grad_norm": 0.8788135051727295, "learning_rate": 4.460172438360832e-06, "loss": 0.0978851318359375, "step": 4075 }, { "epoch": 0.5679648853898139, "grad_norm": 1.1759573221206665, "learning_rate": 4.457829490979347e-06, "loss": 0.11576080322265625, "step": 4076 }, { "epoch": 0.5681042290810283, "grad_norm": 1.4560480117797852, "learning_rate": 4.455486664055772e-06, "loss": 0.09751129150390625, "step": 4077 }, { "epoch": 0.5682435727722427, "grad_norm": 0.9947416186332703, "learning_rate": 4.4531439581106295e-06, "loss": 0.08749771118164062, "step": 4078 }, { "epoch": 0.5683829164634571, "grad_norm": 0.649425745010376, "learning_rate": 4.450801373664413e-06, "loss": 0.06525039672851562, "step": 4079 }, { "epoch": 0.5685222601546714, "grad_norm": 0.7174168825149536, "learning_rate": 4.448458911237593e-06, "loss": 0.07745933532714844, "step": 4080 }, { "epoch": 0.5686616038458859, "grad_norm": 2.958223581314087, "learning_rate": 4.446116571350611e-06, "loss": 0.15102005004882812, "step": 4081 }, { "epoch": 0.5688009475371003, "grad_norm": 0.6003392338752747, "learning_rate": 4.443774354523883e-06, "loss": 0.07139968872070312, "step": 4082 }, { "epoch": 0.5689402912283147, "grad_norm": 0.5808599591255188, "learning_rate": 4.441432261277794e-06, "loss": 0.0738525390625, "step": 4083 }, { "epoch": 0.5690796349195291, "grad_norm": 1.6163309812545776, "learning_rate": 4.4390902921327025e-06, "loss": 0.12166643142700195, "step": 4084 }, { "epoch": 0.5692189786107434, "grad_norm": 0.8310291767120361, "learning_rate": 4.436748447608944e-06, "loss": 0.09060287475585938, "step": 4085 }, { "epoch": 0.5693583223019578, "grad_norm": 0.5087378025054932, "learning_rate": 4.43440672822682e-06, "loss": 0.07395362854003906, "step": 4086 }, { "epoch": 0.5694976659931722, "grad_norm": 0.48974353075027466, "learning_rate": 4.432065134506608e-06, "loss": 0.056626319885253906, "step": 4087 }, { "epoch": 0.5696370096843866, "grad_norm": 1.579438328742981, "learning_rate": 4.429723666968559e-06, "loss": 0.106658935546875, "step": 4088 }, { "epoch": 0.569776353375601, "grad_norm": 0.8773099780082703, "learning_rate": 4.427382326132892e-06, "loss": 0.10071754455566406, "step": 4089 }, { "epoch": 0.5699156970668153, "grad_norm": 0.5561714172363281, "learning_rate": 4.425041112519797e-06, "loss": 0.06853866577148438, "step": 4090 }, { "epoch": 0.5700550407580297, "grad_norm": 1.5551981925964355, "learning_rate": 4.42270002664944e-06, "loss": 0.1324443817138672, "step": 4091 }, { "epoch": 0.5701943844492441, "grad_norm": 0.8716766238212585, "learning_rate": 4.4203590690419575e-06, "loss": 0.08273887634277344, "step": 4092 }, { "epoch": 0.5703337281404585, "grad_norm": 0.7863575220108032, "learning_rate": 4.418018240217457e-06, "loss": 0.09329032897949219, "step": 4093 }, { "epoch": 0.5704730718316728, "grad_norm": 0.7099878191947937, "learning_rate": 4.415677540696017e-06, "loss": 0.09073448181152344, "step": 4094 }, { "epoch": 0.5706124155228872, "grad_norm": 0.5670595169067383, "learning_rate": 4.413336970997687e-06, "loss": 0.06613540649414062, "step": 4095 }, { "epoch": 0.5707517592141016, "grad_norm": 0.6105392575263977, "learning_rate": 4.410996531642487e-06, "loss": 0.06636810302734375, "step": 4096 }, { "epoch": 0.570891102905316, "grad_norm": 0.9909316301345825, "learning_rate": 4.408656223150412e-06, "loss": 0.0971221923828125, "step": 4097 }, { "epoch": 0.5710304465965303, "grad_norm": 1.7641077041625977, "learning_rate": 4.406316046041423e-06, "loss": 0.1382160186767578, "step": 4098 }, { "epoch": 0.5711697902877447, "grad_norm": 0.6059736013412476, "learning_rate": 4.4039760008354556e-06, "loss": 0.07605743408203125, "step": 4099 }, { "epoch": 0.5713091339789591, "grad_norm": 0.47523581981658936, "learning_rate": 4.401636088052411e-06, "loss": 0.07036209106445312, "step": 4100 }, { "epoch": 0.5714484776701735, "grad_norm": 0.8524162173271179, "learning_rate": 4.399296308212168e-06, "loss": 0.10267829895019531, "step": 4101 }, { "epoch": 0.5715878213613879, "grad_norm": 0.5743864178657532, "learning_rate": 4.396956661834571e-06, "loss": 0.08153724670410156, "step": 4102 }, { "epoch": 0.5717271650526022, "grad_norm": 0.9654628038406372, "learning_rate": 4.394617149439435e-06, "loss": 0.08641910552978516, "step": 4103 }, { "epoch": 0.5718665087438166, "grad_norm": 0.9603514671325684, "learning_rate": 4.392277771546549e-06, "loss": 0.10042190551757812, "step": 4104 }, { "epoch": 0.572005852435031, "grad_norm": 1.610740065574646, "learning_rate": 4.389938528675668e-06, "loss": 0.11101341247558594, "step": 4105 }, { "epoch": 0.5721451961262454, "grad_norm": 0.6434781551361084, "learning_rate": 4.387599421346517e-06, "loss": 0.08690738677978516, "step": 4106 }, { "epoch": 0.5722845398174597, "grad_norm": 0.6508153676986694, "learning_rate": 4.385260450078793e-06, "loss": 0.08400726318359375, "step": 4107 }, { "epoch": 0.5724238835086741, "grad_norm": 1.336958408355713, "learning_rate": 4.382921615392162e-06, "loss": 0.10203742980957031, "step": 4108 }, { "epoch": 0.5725632271998885, "grad_norm": 1.2579466104507446, "learning_rate": 4.38058291780626e-06, "loss": 0.1339244842529297, "step": 4109 }, { "epoch": 0.5727025708911029, "grad_norm": 0.8550846576690674, "learning_rate": 4.378244357840694e-06, "loss": 0.07151985168457031, "step": 4110 }, { "epoch": 0.5728419145823173, "grad_norm": 0.7438938021659851, "learning_rate": 4.375905936015035e-06, "loss": 0.09517478942871094, "step": 4111 }, { "epoch": 0.5729812582735316, "grad_norm": 0.5578249096870422, "learning_rate": 4.373567652848828e-06, "loss": 0.06775856018066406, "step": 4112 }, { "epoch": 0.573120601964746, "grad_norm": 0.7348350882530212, "learning_rate": 4.371229508861588e-06, "loss": 0.07534980773925781, "step": 4113 }, { "epoch": 0.5732599456559604, "grad_norm": 0.8538760542869568, "learning_rate": 4.368891504572796e-06, "loss": 0.10509681701660156, "step": 4114 }, { "epoch": 0.5733992893471748, "grad_norm": 0.7279375791549683, "learning_rate": 4.3665536405019045e-06, "loss": 0.08370018005371094, "step": 4115 }, { "epoch": 0.5735386330383891, "grad_norm": 1.5616258382797241, "learning_rate": 4.36421591716833e-06, "loss": 0.12166786193847656, "step": 4116 }, { "epoch": 0.5736779767296035, "grad_norm": 1.0221244096755981, "learning_rate": 4.361878335091464e-06, "loss": 0.09376335144042969, "step": 4117 }, { "epoch": 0.5738173204208179, "grad_norm": 0.6279402375221252, "learning_rate": 4.3595408947906644e-06, "loss": 0.08337974548339844, "step": 4118 }, { "epoch": 0.5739566641120323, "grad_norm": 0.6445189118385315, "learning_rate": 4.357203596785254e-06, "loss": 0.08345794677734375, "step": 4119 }, { "epoch": 0.5740960078032467, "grad_norm": 1.5192166566848755, "learning_rate": 4.3548664415945326e-06, "loss": 0.1053934097290039, "step": 4120 }, { "epoch": 0.5742353514944611, "grad_norm": 0.944985032081604, "learning_rate": 4.3525294297377566e-06, "loss": 0.1157388687133789, "step": 4121 }, { "epoch": 0.5743746951856755, "grad_norm": 1.0006135702133179, "learning_rate": 4.35019256173416e-06, "loss": 0.09043693542480469, "step": 4122 }, { "epoch": 0.5745140388768899, "grad_norm": 1.0215866565704346, "learning_rate": 4.34785583810294e-06, "loss": 0.08830642700195312, "step": 4123 }, { "epoch": 0.5746533825681043, "grad_norm": 0.6014605164527893, "learning_rate": 4.345519259363264e-06, "loss": 0.07591629028320312, "step": 4124 }, { "epoch": 0.5747927262593187, "grad_norm": 0.6894569993019104, "learning_rate": 4.343182826034268e-06, "loss": 0.07471656799316406, "step": 4125 }, { "epoch": 0.574932069950533, "grad_norm": 0.8023510575294495, "learning_rate": 4.340846538635053e-06, "loss": 0.07601356506347656, "step": 4126 }, { "epoch": 0.5750714136417474, "grad_norm": 0.9071303606033325, "learning_rate": 4.338510397684687e-06, "loss": 0.08030509948730469, "step": 4127 }, { "epoch": 0.5752107573329618, "grad_norm": 0.7262620329856873, "learning_rate": 4.336174403702208e-06, "loss": 0.07864570617675781, "step": 4128 }, { "epoch": 0.5753501010241762, "grad_norm": 0.6877728700637817, "learning_rate": 4.333838557206623e-06, "loss": 0.0810689926147461, "step": 4129 }, { "epoch": 0.5754894447153905, "grad_norm": 0.6931685209274292, "learning_rate": 4.3315028587169e-06, "loss": 0.10281753540039062, "step": 4130 }, { "epoch": 0.5756287884066049, "grad_norm": 0.6703829169273376, "learning_rate": 4.329167308751982e-06, "loss": 0.08284187316894531, "step": 4131 }, { "epoch": 0.5757681320978193, "grad_norm": 0.8378818035125732, "learning_rate": 4.3268319078307695e-06, "loss": 0.0761423110961914, "step": 4132 }, { "epoch": 0.5759074757890337, "grad_norm": 1.4073970317840576, "learning_rate": 4.324496656472141e-06, "loss": 0.10151958465576172, "step": 4133 }, { "epoch": 0.576046819480248, "grad_norm": 0.45945265889167786, "learning_rate": 4.322161555194932e-06, "loss": 0.07373428344726562, "step": 4134 }, { "epoch": 0.5761861631714624, "grad_norm": 0.635475754737854, "learning_rate": 4.31982660451795e-06, "loss": 0.07990169525146484, "step": 4135 }, { "epoch": 0.5763255068626768, "grad_norm": 0.6422591209411621, "learning_rate": 4.3174918049599705e-06, "loss": 0.09856605529785156, "step": 4136 }, { "epoch": 0.5764648505538912, "grad_norm": 0.5832564234733582, "learning_rate": 4.315157157039727e-06, "loss": 0.07550048828125, "step": 4137 }, { "epoch": 0.5766041942451056, "grad_norm": 0.5465685725212097, "learning_rate": 4.312822661275929e-06, "loss": 0.08142280578613281, "step": 4138 }, { "epoch": 0.5767435379363199, "grad_norm": 1.1996403932571411, "learning_rate": 4.310488318187247e-06, "loss": 0.10824966430664062, "step": 4139 }, { "epoch": 0.5768828816275343, "grad_norm": 0.6611541509628296, "learning_rate": 4.308154128292318e-06, "loss": 0.08258056640625, "step": 4140 }, { "epoch": 0.5770222253187487, "grad_norm": 0.5757229328155518, "learning_rate": 4.305820092109748e-06, "loss": 0.08427238464355469, "step": 4141 }, { "epoch": 0.5771615690099631, "grad_norm": 2.152920961380005, "learning_rate": 4.303486210158106e-06, "loss": 0.14479637145996094, "step": 4142 }, { "epoch": 0.5773009127011774, "grad_norm": 1.9271057844161987, "learning_rate": 4.301152482955926e-06, "loss": 0.1510934829711914, "step": 4143 }, { "epoch": 0.5774402563923918, "grad_norm": 0.7121643424034119, "learning_rate": 4.298818911021707e-06, "loss": 0.0831146240234375, "step": 4144 }, { "epoch": 0.5775796000836062, "grad_norm": 0.3571421802043915, "learning_rate": 4.296485494873919e-06, "loss": 0.06387042999267578, "step": 4145 }, { "epoch": 0.5777189437748206, "grad_norm": 0.5552216172218323, "learning_rate": 4.294152235030993e-06, "loss": 0.0758371353149414, "step": 4146 }, { "epoch": 0.577858287466035, "grad_norm": 0.5618678331375122, "learning_rate": 4.291819132011327e-06, "loss": 0.08025741577148438, "step": 4147 }, { "epoch": 0.5779976311572493, "grad_norm": 1.0630816221237183, "learning_rate": 4.2894861863332785e-06, "loss": 0.09186744689941406, "step": 4148 }, { "epoch": 0.5781369748484637, "grad_norm": 1.1711220741271973, "learning_rate": 4.28715339851518e-06, "loss": 0.1304149627685547, "step": 4149 }, { "epoch": 0.5782763185396781, "grad_norm": 1.5510635375976562, "learning_rate": 4.284820769075322e-06, "loss": 0.12677955627441406, "step": 4150 }, { "epoch": 0.5784156622308925, "grad_norm": 0.3930652439594269, "learning_rate": 4.282488298531959e-06, "loss": 0.057891845703125, "step": 4151 }, { "epoch": 0.5785550059221068, "grad_norm": 0.7555586695671082, "learning_rate": 4.28015598740332e-06, "loss": 0.08563995361328125, "step": 4152 }, { "epoch": 0.5786943496133212, "grad_norm": 0.6003113389015198, "learning_rate": 4.277823836207581e-06, "loss": 0.08235740661621094, "step": 4153 }, { "epoch": 0.5788336933045356, "grad_norm": 0.8144310712814331, "learning_rate": 4.275491845462901e-06, "loss": 0.08924674987792969, "step": 4154 }, { "epoch": 0.57897303699575, "grad_norm": 0.9851505756378174, "learning_rate": 4.27316001568739e-06, "loss": 0.1152048110961914, "step": 4155 }, { "epoch": 0.5791123806869644, "grad_norm": 1.6289018392562866, "learning_rate": 4.270828347399131e-06, "loss": 0.11452960968017578, "step": 4156 }, { "epoch": 0.5792517243781787, "grad_norm": 0.9488279819488525, "learning_rate": 4.268496841116166e-06, "loss": 0.113311767578125, "step": 4157 }, { "epoch": 0.5793910680693931, "grad_norm": 0.5304964184761047, "learning_rate": 4.266165497356503e-06, "loss": 0.07060050964355469, "step": 4158 }, { "epoch": 0.5795304117606075, "grad_norm": 0.6161766648292542, "learning_rate": 4.2638343166381115e-06, "loss": 0.08022689819335938, "step": 4159 }, { "epoch": 0.5796697554518219, "grad_norm": 0.47967758774757385, "learning_rate": 4.261503299478928e-06, "loss": 0.08062171936035156, "step": 4160 }, { "epoch": 0.5798090991430362, "grad_norm": 0.3208214044570923, "learning_rate": 4.259172446396851e-06, "loss": 0.06694602966308594, "step": 4161 }, { "epoch": 0.5799484428342507, "grad_norm": 0.5037426352500916, "learning_rate": 4.256841757909744e-06, "loss": 0.08043670654296875, "step": 4162 }, { "epoch": 0.5800877865254651, "grad_norm": 0.6643304824829102, "learning_rate": 4.254511234535432e-06, "loss": 0.07901287078857422, "step": 4163 }, { "epoch": 0.5802271302166795, "grad_norm": 0.7012732625007629, "learning_rate": 4.2521808767917024e-06, "loss": 0.09073448181152344, "step": 4164 }, { "epoch": 0.5803664739078939, "grad_norm": 0.7801777124404907, "learning_rate": 4.2498506851963095e-06, "loss": 0.08429908752441406, "step": 4165 }, { "epoch": 0.5805058175991082, "grad_norm": 0.7579885721206665, "learning_rate": 4.247520660266969e-06, "loss": 0.09327125549316406, "step": 4166 }, { "epoch": 0.5806451612903226, "grad_norm": 0.7804591655731201, "learning_rate": 4.245190802521356e-06, "loss": 0.098419189453125, "step": 4167 }, { "epoch": 0.580784504981537, "grad_norm": 0.8238499164581299, "learning_rate": 4.2428611124771184e-06, "loss": 0.08732414245605469, "step": 4168 }, { "epoch": 0.5809238486727514, "grad_norm": 0.6198453307151794, "learning_rate": 4.240531590651853e-06, "loss": 0.08785247802734375, "step": 4169 }, { "epoch": 0.5810631923639658, "grad_norm": 0.8012208938598633, "learning_rate": 4.238202237563129e-06, "loss": 0.09406852722167969, "step": 4170 }, { "epoch": 0.5812025360551801, "grad_norm": 0.9362679719924927, "learning_rate": 4.235873053728475e-06, "loss": 0.09840965270996094, "step": 4171 }, { "epoch": 0.5813418797463945, "grad_norm": 0.9513877034187317, "learning_rate": 4.233544039665385e-06, "loss": 0.137847900390625, "step": 4172 }, { "epoch": 0.5814812234376089, "grad_norm": 0.7580530643463135, "learning_rate": 4.231215195891311e-06, "loss": 0.09015274047851562, "step": 4173 }, { "epoch": 0.5816205671288233, "grad_norm": 0.6591736674308777, "learning_rate": 4.228886522923668e-06, "loss": 0.11438179016113281, "step": 4174 }, { "epoch": 0.5817599108200376, "grad_norm": 0.8041442036628723, "learning_rate": 4.2265580212798355e-06, "loss": 0.08340930938720703, "step": 4175 }, { "epoch": 0.581899254511252, "grad_norm": 0.6803694367408752, "learning_rate": 4.224229691477151e-06, "loss": 0.10489082336425781, "step": 4176 }, { "epoch": 0.5820385982024664, "grad_norm": 0.6832919716835022, "learning_rate": 4.221901534032918e-06, "loss": 0.09191513061523438, "step": 4177 }, { "epoch": 0.5821779418936808, "grad_norm": 0.8680355548858643, "learning_rate": 4.219573549464403e-06, "loss": 0.09848594665527344, "step": 4178 }, { "epoch": 0.5823172855848952, "grad_norm": 0.3353879451751709, "learning_rate": 4.217245738288825e-06, "loss": 0.05828857421875, "step": 4179 }, { "epoch": 0.5824566292761095, "grad_norm": 0.7911186218261719, "learning_rate": 4.2149181010233734e-06, "loss": 0.08815574645996094, "step": 4180 }, { "epoch": 0.5825959729673239, "grad_norm": 0.8102871179580688, "learning_rate": 4.212590638185196e-06, "loss": 0.08092212677001953, "step": 4181 }, { "epoch": 0.5827353166585383, "grad_norm": 0.5505996346473694, "learning_rate": 4.2102633502914035e-06, "loss": 0.06441879272460938, "step": 4182 }, { "epoch": 0.5828746603497527, "grad_norm": 0.7471832633018494, "learning_rate": 4.2079362378590625e-06, "loss": 0.09594917297363281, "step": 4183 }, { "epoch": 0.583014004040967, "grad_norm": 0.5805506706237793, "learning_rate": 4.2056093014052085e-06, "loss": 0.08696556091308594, "step": 4184 }, { "epoch": 0.5831533477321814, "grad_norm": 0.4090520143508911, "learning_rate": 4.20328254144683e-06, "loss": 0.0658726692199707, "step": 4185 }, { "epoch": 0.5832926914233958, "grad_norm": 0.9475260376930237, "learning_rate": 4.2009559585008826e-06, "loss": 0.10115432739257812, "step": 4186 }, { "epoch": 0.5834320351146102, "grad_norm": 0.7363665699958801, "learning_rate": 4.198629553084277e-06, "loss": 0.09754753112792969, "step": 4187 }, { "epoch": 0.5835713788058245, "grad_norm": 0.5219995975494385, "learning_rate": 4.1963033257138904e-06, "loss": 0.0874786376953125, "step": 4188 }, { "epoch": 0.5837107224970389, "grad_norm": 0.8245421051979065, "learning_rate": 4.193977276906557e-06, "loss": 0.09967041015625, "step": 4189 }, { "epoch": 0.5838500661882533, "grad_norm": 0.45007866621017456, "learning_rate": 4.191651407179069e-06, "loss": 0.05875968933105469, "step": 4190 }, { "epoch": 0.5839894098794677, "grad_norm": 1.7091013193130493, "learning_rate": 4.189325717048185e-06, "loss": 0.14281558990478516, "step": 4191 }, { "epoch": 0.5841287535706821, "grad_norm": 0.8240340948104858, "learning_rate": 4.187000207030616e-06, "loss": 0.10678243637084961, "step": 4192 }, { "epoch": 0.5842680972618964, "grad_norm": 1.0307371616363525, "learning_rate": 4.184674877643042e-06, "loss": 0.10541534423828125, "step": 4193 }, { "epoch": 0.5844074409531108, "grad_norm": 1.1728519201278687, "learning_rate": 4.182349729402097e-06, "loss": 0.11365509033203125, "step": 4194 }, { "epoch": 0.5845467846443252, "grad_norm": 1.3684660196304321, "learning_rate": 4.180024762824374e-06, "loss": 0.13165664672851562, "step": 4195 }, { "epoch": 0.5846861283355396, "grad_norm": 0.4818660318851471, "learning_rate": 4.177699978426426e-06, "loss": 0.07504940032958984, "step": 4196 }, { "epoch": 0.584825472026754, "grad_norm": 1.1660393476486206, "learning_rate": 4.175375376724772e-06, "loss": 0.0883169174194336, "step": 4197 }, { "epoch": 0.5849648157179683, "grad_norm": 1.0435616970062256, "learning_rate": 4.173050958235882e-06, "loss": 0.08098602294921875, "step": 4198 }, { "epoch": 0.5851041594091827, "grad_norm": 0.6417640447616577, "learning_rate": 4.170726723476189e-06, "loss": 0.0785980224609375, "step": 4199 }, { "epoch": 0.5852435031003971, "grad_norm": 0.45920488238334656, "learning_rate": 4.168402672962086e-06, "loss": 0.06692695617675781, "step": 4200 }, { "epoch": 0.5853828467916115, "grad_norm": 0.42414194345474243, "learning_rate": 4.166078807209924e-06, "loss": 0.06888008117675781, "step": 4201 }, { "epoch": 0.5855221904828259, "grad_norm": 0.6978867650032043, "learning_rate": 4.163755126736011e-06, "loss": 0.09842491149902344, "step": 4202 }, { "epoch": 0.5856615341740403, "grad_norm": 1.8486281633377075, "learning_rate": 4.1614316320566174e-06, "loss": 0.08513641357421875, "step": 4203 }, { "epoch": 0.5858008778652547, "grad_norm": 1.079641342163086, "learning_rate": 4.159108323687971e-06, "loss": 0.10498428344726562, "step": 4204 }, { "epoch": 0.5859402215564691, "grad_norm": 1.2158674001693726, "learning_rate": 4.156785202146257e-06, "loss": 0.0958566665649414, "step": 4205 }, { "epoch": 0.5860795652476835, "grad_norm": 1.0379959344863892, "learning_rate": 4.154462267947621e-06, "loss": 0.08727645874023438, "step": 4206 }, { "epoch": 0.5862189089388978, "grad_norm": 0.9260666966438293, "learning_rate": 4.152139521608164e-06, "loss": 0.08658790588378906, "step": 4207 }, { "epoch": 0.5863582526301122, "grad_norm": 0.7149448990821838, "learning_rate": 4.149816963643947e-06, "loss": 0.07177352905273438, "step": 4208 }, { "epoch": 0.5864975963213266, "grad_norm": 0.6784573793411255, "learning_rate": 4.147494594570992e-06, "loss": 0.07443046569824219, "step": 4209 }, { "epoch": 0.586636940012541, "grad_norm": 1.5752556324005127, "learning_rate": 4.1451724149052764e-06, "loss": 0.12151718139648438, "step": 4210 }, { "epoch": 0.5867762837037553, "grad_norm": 0.8613414168357849, "learning_rate": 4.1428504251627335e-06, "loss": 0.07621192932128906, "step": 4211 }, { "epoch": 0.5869156273949697, "grad_norm": 1.522270679473877, "learning_rate": 4.140528625859254e-06, "loss": 0.07187652587890625, "step": 4212 }, { "epoch": 0.5870549710861841, "grad_norm": 1.1322507858276367, "learning_rate": 4.138207017510696e-06, "loss": 0.08889007568359375, "step": 4213 }, { "epoch": 0.5871943147773985, "grad_norm": 1.0394313335418701, "learning_rate": 4.1358856006328614e-06, "loss": 0.10757064819335938, "step": 4214 }, { "epoch": 0.5873336584686129, "grad_norm": 1.1939880847930908, "learning_rate": 4.1335643757415195e-06, "loss": 0.1105051040649414, "step": 4215 }, { "epoch": 0.5874730021598272, "grad_norm": 0.38674765825271606, "learning_rate": 4.131243343352391e-06, "loss": 0.0600738525390625, "step": 4216 }, { "epoch": 0.5876123458510416, "grad_norm": 0.6084223985671997, "learning_rate": 4.128922503981158e-06, "loss": 0.08533668518066406, "step": 4217 }, { "epoch": 0.587751689542256, "grad_norm": 0.9170710444450378, "learning_rate": 4.126601858143457e-06, "loss": 0.10926055908203125, "step": 4218 }, { "epoch": 0.5878910332334704, "grad_norm": 0.7607752680778503, "learning_rate": 4.124281406354883e-06, "loss": 0.07142448425292969, "step": 4219 }, { "epoch": 0.5880303769246847, "grad_norm": 1.1973240375518799, "learning_rate": 4.121961149130989e-06, "loss": 0.12434005737304688, "step": 4220 }, { "epoch": 0.5881697206158991, "grad_norm": 0.8296911120414734, "learning_rate": 4.119641086987282e-06, "loss": 0.1058502197265625, "step": 4221 }, { "epoch": 0.5883090643071135, "grad_norm": 0.539405107498169, "learning_rate": 4.1173212204392245e-06, "loss": 0.07495880126953125, "step": 4222 }, { "epoch": 0.5884484079983279, "grad_norm": 0.6246073842048645, "learning_rate": 4.115001550002241e-06, "loss": 0.07088279724121094, "step": 4223 }, { "epoch": 0.5885877516895422, "grad_norm": 1.4294133186340332, "learning_rate": 4.1126820761917075e-06, "loss": 0.0791015625, "step": 4224 }, { "epoch": 0.5887270953807566, "grad_norm": 0.9147610068321228, "learning_rate": 4.11036279952296e-06, "loss": 0.10268783569335938, "step": 4225 }, { "epoch": 0.588866439071971, "grad_norm": 1.095183253288269, "learning_rate": 4.108043720511287e-06, "loss": 0.08438587188720703, "step": 4226 }, { "epoch": 0.5890057827631854, "grad_norm": 0.44535285234451294, "learning_rate": 4.105724839671936e-06, "loss": 0.0725412368774414, "step": 4227 }, { "epoch": 0.5891451264543998, "grad_norm": 0.46527230739593506, "learning_rate": 4.103406157520108e-06, "loss": 0.07088088989257812, "step": 4228 }, { "epoch": 0.5892844701456141, "grad_norm": 0.6611407399177551, "learning_rate": 4.101087674570963e-06, "loss": 0.08452129364013672, "step": 4229 }, { "epoch": 0.5894238138368285, "grad_norm": 1.9753844738006592, "learning_rate": 4.0987693913396145e-06, "loss": 0.1635150909423828, "step": 4230 }, { "epoch": 0.5895631575280429, "grad_norm": 0.6758061647415161, "learning_rate": 4.096451308341132e-06, "loss": 0.10409164428710938, "step": 4231 }, { "epoch": 0.5897025012192573, "grad_norm": 0.6112347841262817, "learning_rate": 4.094133426090539e-06, "loss": 0.08701896667480469, "step": 4232 }, { "epoch": 0.5898418449104716, "grad_norm": 0.7137689590454102, "learning_rate": 4.091815745102818e-06, "loss": 0.09321022033691406, "step": 4233 }, { "epoch": 0.589981188601686, "grad_norm": 0.7671059370040894, "learning_rate": 4.089498265892905e-06, "loss": 0.09479427337646484, "step": 4234 }, { "epoch": 0.5901205322929004, "grad_norm": 0.7200010418891907, "learning_rate": 4.0871809889756884e-06, "loss": 0.066802978515625, "step": 4235 }, { "epoch": 0.5902598759841148, "grad_norm": 1.4233208894729614, "learning_rate": 4.084863914866018e-06, "loss": 0.11697578430175781, "step": 4236 }, { "epoch": 0.5903992196753292, "grad_norm": 0.9535250663757324, "learning_rate": 4.082547044078693e-06, "loss": 0.09723663330078125, "step": 4237 }, { "epoch": 0.5905385633665435, "grad_norm": 0.589360237121582, "learning_rate": 4.0802303771284685e-06, "loss": 0.07963752746582031, "step": 4238 }, { "epoch": 0.5906779070577579, "grad_norm": 1.3761910200119019, "learning_rate": 4.0779139145300536e-06, "loss": 0.10210800170898438, "step": 4239 }, { "epoch": 0.5908172507489723, "grad_norm": 1.1261318922042847, "learning_rate": 4.075597656798117e-06, "loss": 0.11666488647460938, "step": 4240 }, { "epoch": 0.5909565944401867, "grad_norm": 0.6837795376777649, "learning_rate": 4.073281604447277e-06, "loss": 0.06808185577392578, "step": 4241 }, { "epoch": 0.5910959381314012, "grad_norm": 1.6373571157455444, "learning_rate": 4.0709657579921075e-06, "loss": 0.11910247802734375, "step": 4242 }, { "epoch": 0.5912352818226155, "grad_norm": 1.1708029508590698, "learning_rate": 4.068650117947135e-06, "loss": 0.11069107055664062, "step": 4243 }, { "epoch": 0.5913746255138299, "grad_norm": 1.1434277296066284, "learning_rate": 4.0663346848268435e-06, "loss": 0.12032318115234375, "step": 4244 }, { "epoch": 0.5915139692050443, "grad_norm": 1.21369469165802, "learning_rate": 4.064019459145669e-06, "loss": 0.11731719970703125, "step": 4245 }, { "epoch": 0.5916533128962587, "grad_norm": 0.9061928391456604, "learning_rate": 4.061704441418002e-06, "loss": 0.09362411499023438, "step": 4246 }, { "epoch": 0.591792656587473, "grad_norm": 0.7046627402305603, "learning_rate": 4.059389632158189e-06, "loss": 0.09018898010253906, "step": 4247 }, { "epoch": 0.5919320002786874, "grad_norm": 0.8415338397026062, "learning_rate": 4.057075031880521e-06, "loss": 0.09239768981933594, "step": 4248 }, { "epoch": 0.5920713439699018, "grad_norm": 0.4098670780658722, "learning_rate": 4.054760641099256e-06, "loss": 0.07966232299804688, "step": 4249 }, { "epoch": 0.5922106876611162, "grad_norm": 0.5646622180938721, "learning_rate": 4.052446460328595e-06, "loss": 0.09630012512207031, "step": 4250 }, { "epoch": 0.5923500313523306, "grad_norm": 0.7363912463188171, "learning_rate": 4.050132490082698e-06, "loss": 0.1075124740600586, "step": 4251 }, { "epoch": 0.5924893750435449, "grad_norm": 0.37218692898750305, "learning_rate": 4.0478187308756775e-06, "loss": 0.07340431213378906, "step": 4252 }, { "epoch": 0.5926287187347593, "grad_norm": 1.7869659662246704, "learning_rate": 4.045505183221594e-06, "loss": 0.11101913452148438, "step": 4253 }, { "epoch": 0.5927680624259737, "grad_norm": 0.4255489706993103, "learning_rate": 4.043191847634469e-06, "loss": 0.06653022766113281, "step": 4254 }, { "epoch": 0.5929074061171881, "grad_norm": 1.553056001663208, "learning_rate": 4.040878724628269e-06, "loss": 0.13536643981933594, "step": 4255 }, { "epoch": 0.5930467498084024, "grad_norm": 0.5980584621429443, "learning_rate": 4.038565814716921e-06, "loss": 0.07833290100097656, "step": 4256 }, { "epoch": 0.5931860934996168, "grad_norm": 1.0066534280776978, "learning_rate": 4.036253118414299e-06, "loss": 0.1137857437133789, "step": 4257 }, { "epoch": 0.5933254371908312, "grad_norm": 0.6300830841064453, "learning_rate": 4.033940636234233e-06, "loss": 0.09140396118164062, "step": 4258 }, { "epoch": 0.5934647808820456, "grad_norm": 0.5558249354362488, "learning_rate": 4.0316283686905e-06, "loss": 0.08557701110839844, "step": 4259 }, { "epoch": 0.59360412457326, "grad_norm": 0.6609942317008972, "learning_rate": 4.029316316296834e-06, "loss": 0.10686302185058594, "step": 4260 }, { "epoch": 0.5937434682644743, "grad_norm": 0.7300080060958862, "learning_rate": 4.027004479566923e-06, "loss": 0.07845497131347656, "step": 4261 }, { "epoch": 0.5938828119556887, "grad_norm": 0.9765761494636536, "learning_rate": 4.024692859014403e-06, "loss": 0.0975332260131836, "step": 4262 }, { "epoch": 0.5940221556469031, "grad_norm": 0.48097094893455505, "learning_rate": 4.022381455152863e-06, "loss": 0.07389640808105469, "step": 4263 }, { "epoch": 0.5941614993381175, "grad_norm": 0.7741380333900452, "learning_rate": 4.020070268495844e-06, "loss": 0.12303543090820312, "step": 4264 }, { "epoch": 0.5943008430293318, "grad_norm": 0.6040611863136292, "learning_rate": 4.017759299556838e-06, "loss": 0.09207534790039062, "step": 4265 }, { "epoch": 0.5944401867205462, "grad_norm": 0.976622462272644, "learning_rate": 4.015448548849293e-06, "loss": 0.09790420532226562, "step": 4266 }, { "epoch": 0.5945795304117606, "grad_norm": 1.0114415884017944, "learning_rate": 4.0131380168866e-06, "loss": 0.08514976501464844, "step": 4267 }, { "epoch": 0.594718874102975, "grad_norm": 0.5282579064369202, "learning_rate": 4.010827704182113e-06, "loss": 0.06435966491699219, "step": 4268 }, { "epoch": 0.5948582177941893, "grad_norm": 0.94781893491745, "learning_rate": 4.0085176112491245e-06, "loss": 0.09236621856689453, "step": 4269 }, { "epoch": 0.5949975614854037, "grad_norm": 0.5395175218582153, "learning_rate": 4.006207738600887e-06, "loss": 0.07409858703613281, "step": 4270 }, { "epoch": 0.5951369051766181, "grad_norm": 0.4667659401893616, "learning_rate": 4.0038980867506e-06, "loss": 0.07322311401367188, "step": 4271 }, { "epoch": 0.5952762488678325, "grad_norm": 0.9574400782585144, "learning_rate": 4.001588656211418e-06, "loss": 0.09772682189941406, "step": 4272 }, { "epoch": 0.5954155925590469, "grad_norm": 1.0630128383636475, "learning_rate": 3.999279447496444e-06, "loss": 0.0825490951538086, "step": 4273 }, { "epoch": 0.5955549362502612, "grad_norm": 0.8296915292739868, "learning_rate": 3.996970461118729e-06, "loss": 0.08999443054199219, "step": 4274 }, { "epoch": 0.5956942799414756, "grad_norm": 0.743564248085022, "learning_rate": 3.994661697591278e-06, "loss": 0.08907508850097656, "step": 4275 }, { "epoch": 0.59583362363269, "grad_norm": 1.5603033304214478, "learning_rate": 3.992353157427044e-06, "loss": 0.09533119201660156, "step": 4276 }, { "epoch": 0.5959729673239044, "grad_norm": 2.0956766605377197, "learning_rate": 3.990044841138934e-06, "loss": 0.10386180877685547, "step": 4277 }, { "epoch": 0.5961123110151187, "grad_norm": 1.2703394889831543, "learning_rate": 3.987736749239804e-06, "loss": 0.1503276824951172, "step": 4278 }, { "epoch": 0.5962516547063331, "grad_norm": 0.693534791469574, "learning_rate": 3.985428882242458e-06, "loss": 0.0936431884765625, "step": 4279 }, { "epoch": 0.5963909983975475, "grad_norm": 1.3452260494232178, "learning_rate": 3.983121240659649e-06, "loss": 0.13611221313476562, "step": 4280 }, { "epoch": 0.5965303420887619, "grad_norm": 0.6121253371238708, "learning_rate": 3.980813825004086e-06, "loss": 0.09107637405395508, "step": 4281 }, { "epoch": 0.5966696857799764, "grad_norm": 1.4522297382354736, "learning_rate": 3.978506635788423e-06, "loss": 0.10064506530761719, "step": 4282 }, { "epoch": 0.5968090294711907, "grad_norm": 0.9043034911155701, "learning_rate": 3.976199673525263e-06, "loss": 0.10557270050048828, "step": 4283 }, { "epoch": 0.5969483731624051, "grad_norm": 0.9286361932754517, "learning_rate": 3.973892938727164e-06, "loss": 0.08576107025146484, "step": 4284 }, { "epoch": 0.5970877168536195, "grad_norm": 0.6284470558166504, "learning_rate": 3.971586431906627e-06, "loss": 0.08971595764160156, "step": 4285 }, { "epoch": 0.5972270605448339, "grad_norm": 0.4720170795917511, "learning_rate": 3.969280153576105e-06, "loss": 0.08664703369140625, "step": 4286 }, { "epoch": 0.5973664042360483, "grad_norm": 1.4350694417953491, "learning_rate": 3.966974104248001e-06, "loss": 0.11305809020996094, "step": 4287 }, { "epoch": 0.5975057479272626, "grad_norm": 0.9937059879302979, "learning_rate": 3.964668284434666e-06, "loss": 0.08825111389160156, "step": 4288 }, { "epoch": 0.597645091618477, "grad_norm": 0.721869945526123, "learning_rate": 3.962362694648404e-06, "loss": 0.08430099487304688, "step": 4289 }, { "epoch": 0.5977844353096914, "grad_norm": 0.6338101029396057, "learning_rate": 3.960057335401459e-06, "loss": 0.07898139953613281, "step": 4290 }, { "epoch": 0.5979237790009058, "grad_norm": 1.0130032300949097, "learning_rate": 3.9577522072060336e-06, "loss": 0.10698890686035156, "step": 4291 }, { "epoch": 0.5980631226921201, "grad_norm": 1.2985703945159912, "learning_rate": 3.95544731057427e-06, "loss": 0.12068367004394531, "step": 4292 }, { "epoch": 0.5982024663833345, "grad_norm": 0.862210214138031, "learning_rate": 3.953142646018269e-06, "loss": 0.06962394714355469, "step": 4293 }, { "epoch": 0.5983418100745489, "grad_norm": 0.5473278760910034, "learning_rate": 3.95083821405007e-06, "loss": 0.08395957946777344, "step": 4294 }, { "epoch": 0.5984811537657633, "grad_norm": 1.0541430711746216, "learning_rate": 3.948534015181671e-06, "loss": 0.1080465316772461, "step": 4295 }, { "epoch": 0.5986204974569777, "grad_norm": 0.9279611706733704, "learning_rate": 3.946230049925004e-06, "loss": 0.09058666229248047, "step": 4296 }, { "epoch": 0.598759841148192, "grad_norm": 0.6633728742599487, "learning_rate": 3.9439263187919635e-06, "loss": 0.08921623229980469, "step": 4297 }, { "epoch": 0.5988991848394064, "grad_norm": 0.760134756565094, "learning_rate": 3.941622822294385e-06, "loss": 0.09189891815185547, "step": 4298 }, { "epoch": 0.5990385285306208, "grad_norm": 0.7986801862716675, "learning_rate": 3.939319560944051e-06, "loss": 0.09051513671875, "step": 4299 }, { "epoch": 0.5991778722218352, "grad_norm": 0.35520702600479126, "learning_rate": 3.937016535252696e-06, "loss": 0.05291938781738281, "step": 4300 }, { "epoch": 0.5993172159130495, "grad_norm": 0.9961345791816711, "learning_rate": 3.934713745731998e-06, "loss": 0.0858001708984375, "step": 4301 }, { "epoch": 0.5994565596042639, "grad_norm": 1.1655610799789429, "learning_rate": 3.932411192893586e-06, "loss": 0.08620452880859375, "step": 4302 }, { "epoch": 0.5995959032954783, "grad_norm": 0.6457646489143372, "learning_rate": 3.93010887724903e-06, "loss": 0.06993293762207031, "step": 4303 }, { "epoch": 0.5997352469866927, "grad_norm": 1.431958556175232, "learning_rate": 3.927806799309859e-06, "loss": 0.09249114990234375, "step": 4304 }, { "epoch": 0.599874590677907, "grad_norm": 0.6977927684783936, "learning_rate": 3.925504959587538e-06, "loss": 0.06919574737548828, "step": 4305 }, { "epoch": 0.6000139343691214, "grad_norm": 0.8272653222084045, "learning_rate": 3.9232033585934835e-06, "loss": 0.08392715454101562, "step": 4306 }, { "epoch": 0.6001532780603358, "grad_norm": 0.8961278200149536, "learning_rate": 3.920901996839059e-06, "loss": 0.08709335327148438, "step": 4307 }, { "epoch": 0.6002926217515502, "grad_norm": 1.4728517532348633, "learning_rate": 3.918600874835573e-06, "loss": 0.10437202453613281, "step": 4308 }, { "epoch": 0.6004319654427646, "grad_norm": 1.8897193670272827, "learning_rate": 3.916299993094285e-06, "loss": 0.09507369995117188, "step": 4309 }, { "epoch": 0.6005713091339789, "grad_norm": 0.9907543063163757, "learning_rate": 3.913999352126399e-06, "loss": 0.0739889144897461, "step": 4310 }, { "epoch": 0.6007106528251933, "grad_norm": 1.1216109991073608, "learning_rate": 3.9116989524430615e-06, "loss": 0.12454605102539062, "step": 4311 }, { "epoch": 0.6008499965164077, "grad_norm": 1.2089283466339111, "learning_rate": 3.90939879455537e-06, "loss": 0.07976150512695312, "step": 4312 }, { "epoch": 0.6009893402076221, "grad_norm": 0.7038972973823547, "learning_rate": 3.907098878974367e-06, "loss": 0.08300971984863281, "step": 4313 }, { "epoch": 0.6011286838988364, "grad_norm": 1.4070346355438232, "learning_rate": 3.9047992062110435e-06, "loss": 0.09948539733886719, "step": 4314 }, { "epoch": 0.6012680275900508, "grad_norm": 0.5134490132331848, "learning_rate": 3.902499776776331e-06, "loss": 0.09378242492675781, "step": 4315 }, { "epoch": 0.6014073712812652, "grad_norm": 0.9747257232666016, "learning_rate": 3.900200591181114e-06, "loss": 0.11225700378417969, "step": 4316 }, { "epoch": 0.6015467149724796, "grad_norm": 0.5333184599876404, "learning_rate": 3.897901649936215e-06, "loss": 0.06451797485351562, "step": 4317 }, { "epoch": 0.601686058663694, "grad_norm": 0.4314073324203491, "learning_rate": 3.895602953552408e-06, "loss": 0.0714273452758789, "step": 4318 }, { "epoch": 0.6018254023549083, "grad_norm": 0.6721124649047852, "learning_rate": 3.8933045025404105e-06, "loss": 0.09821701049804688, "step": 4319 }, { "epoch": 0.6019647460461227, "grad_norm": 0.5433871150016785, "learning_rate": 3.891006297410887e-06, "loss": 0.07602691650390625, "step": 4320 }, { "epoch": 0.6021040897373371, "grad_norm": 1.0301188230514526, "learning_rate": 3.888708338674447e-06, "loss": 0.093994140625, "step": 4321 }, { "epoch": 0.6022434334285516, "grad_norm": 0.5676934123039246, "learning_rate": 3.8864106268416416e-06, "loss": 0.09347343444824219, "step": 4322 }, { "epoch": 0.602382777119766, "grad_norm": 0.3889131546020508, "learning_rate": 3.884113162422971e-06, "loss": 0.06483840942382812, "step": 4323 }, { "epoch": 0.6025221208109803, "grad_norm": 0.7689726948738098, "learning_rate": 3.881815945928879e-06, "loss": 0.08496952056884766, "step": 4324 }, { "epoch": 0.6026614645021947, "grad_norm": 0.8973390460014343, "learning_rate": 3.879518977869755e-06, "loss": 0.11704635620117188, "step": 4325 }, { "epoch": 0.6028008081934091, "grad_norm": 0.4714604616165161, "learning_rate": 3.8772222587559345e-06, "loss": 0.08059930801391602, "step": 4326 }, { "epoch": 0.6029401518846235, "grad_norm": 1.1840635538101196, "learning_rate": 3.874925789097695e-06, "loss": 0.09795188903808594, "step": 4327 }, { "epoch": 0.6030794955758378, "grad_norm": 0.7916499376296997, "learning_rate": 3.872629569405257e-06, "loss": 0.07891845703125, "step": 4328 }, { "epoch": 0.6032188392670522, "grad_norm": 0.6610833406448364, "learning_rate": 3.870333600188792e-06, "loss": 0.09130859375, "step": 4329 }, { "epoch": 0.6033581829582666, "grad_norm": 0.9531504511833191, "learning_rate": 3.86803788195841e-06, "loss": 0.10376930236816406, "step": 4330 }, { "epoch": 0.603497526649481, "grad_norm": 0.811346173286438, "learning_rate": 3.865742415224169e-06, "loss": 0.10413742065429688, "step": 4331 }, { "epoch": 0.6036368703406954, "grad_norm": 0.792317271232605, "learning_rate": 3.863447200496065e-06, "loss": 0.11913108825683594, "step": 4332 }, { "epoch": 0.6037762140319097, "grad_norm": 1.2846393585205078, "learning_rate": 3.8611522382840476e-06, "loss": 0.104278564453125, "step": 4333 }, { "epoch": 0.6039155577231241, "grad_norm": 1.669973611831665, "learning_rate": 3.858857529098001e-06, "loss": 0.10458660125732422, "step": 4334 }, { "epoch": 0.6040549014143385, "grad_norm": 1.278027057647705, "learning_rate": 3.8565630734477575e-06, "loss": 0.09716033935546875, "step": 4335 }, { "epoch": 0.6041942451055529, "grad_norm": 0.8745903968811035, "learning_rate": 3.854268871843096e-06, "loss": 0.11998558044433594, "step": 4336 }, { "epoch": 0.6043335887967672, "grad_norm": 0.6692991852760315, "learning_rate": 3.851974924793734e-06, "loss": 0.09132003784179688, "step": 4337 }, { "epoch": 0.6044729324879816, "grad_norm": 1.7974072694778442, "learning_rate": 3.8496812328093335e-06, "loss": 0.10179519653320312, "step": 4338 }, { "epoch": 0.604612276179196, "grad_norm": 0.7910085320472717, "learning_rate": 3.8473877963995e-06, "loss": 0.08632278442382812, "step": 4339 }, { "epoch": 0.6047516198704104, "grad_norm": 0.5850790739059448, "learning_rate": 3.845094616073783e-06, "loss": 0.0672607421875, "step": 4340 }, { "epoch": 0.6048909635616248, "grad_norm": 0.4407216012477875, "learning_rate": 3.8428016923416775e-06, "loss": 0.05830955505371094, "step": 4341 }, { "epoch": 0.6050303072528391, "grad_norm": 0.6002865433692932, "learning_rate": 3.840509025712616e-06, "loss": 0.08436393737792969, "step": 4342 }, { "epoch": 0.6051696509440535, "grad_norm": 1.008945345878601, "learning_rate": 3.838216616695977e-06, "loss": 0.10539054870605469, "step": 4343 }, { "epoch": 0.6053089946352679, "grad_norm": 0.6314991116523743, "learning_rate": 3.835924465801081e-06, "loss": 0.07353019714355469, "step": 4344 }, { "epoch": 0.6054483383264823, "grad_norm": 1.079193115234375, "learning_rate": 3.833632573537193e-06, "loss": 0.1322784423828125, "step": 4345 }, { "epoch": 0.6055876820176966, "grad_norm": 0.7585672736167908, "learning_rate": 3.831340940413519e-06, "loss": 0.0836935043334961, "step": 4346 }, { "epoch": 0.605727025708911, "grad_norm": 1.108338713645935, "learning_rate": 3.8290495669392085e-06, "loss": 0.08988189697265625, "step": 4347 }, { "epoch": 0.6058663694001254, "grad_norm": 1.0760464668273926, "learning_rate": 3.826758453623348e-06, "loss": 0.14255428314208984, "step": 4348 }, { "epoch": 0.6060057130913398, "grad_norm": 0.9772223234176636, "learning_rate": 3.8244676009749745e-06, "loss": 0.12599754333496094, "step": 4349 }, { "epoch": 0.6061450567825541, "grad_norm": 0.576438844203949, "learning_rate": 3.8221770095030625e-06, "loss": 0.0652322769165039, "step": 4350 }, { "epoch": 0.6062844004737685, "grad_norm": 0.7365687489509583, "learning_rate": 3.819886679716528e-06, "loss": 0.10146713256835938, "step": 4351 }, { "epoch": 0.6064237441649829, "grad_norm": 0.5223211646080017, "learning_rate": 3.8175966121242314e-06, "loss": 0.06772804260253906, "step": 4352 }, { "epoch": 0.6065630878561973, "grad_norm": 1.5206400156021118, "learning_rate": 3.815306807234974e-06, "loss": 0.15499305725097656, "step": 4353 }, { "epoch": 0.6067024315474117, "grad_norm": 0.984269917011261, "learning_rate": 3.8130172655574963e-06, "loss": 0.09422683715820312, "step": 4354 }, { "epoch": 0.606841775238626, "grad_norm": 0.7399469614028931, "learning_rate": 3.810727987600482e-06, "loss": 0.0921173095703125, "step": 4355 }, { "epoch": 0.6069811189298404, "grad_norm": 0.45897433161735535, "learning_rate": 3.808438973872558e-06, "loss": 0.07097053527832031, "step": 4356 }, { "epoch": 0.6071204626210548, "grad_norm": 0.44559940695762634, "learning_rate": 3.80615022488229e-06, "loss": 0.08533859252929688, "step": 4357 }, { "epoch": 0.6072598063122692, "grad_norm": 0.6752280592918396, "learning_rate": 3.8038617411381876e-06, "loss": 0.08395004272460938, "step": 4358 }, { "epoch": 0.6073991500034835, "grad_norm": 0.7932080030441284, "learning_rate": 3.8015735231486974e-06, "loss": 0.1124725341796875, "step": 4359 }, { "epoch": 0.6075384936946979, "grad_norm": 0.886142909526825, "learning_rate": 3.799285571422208e-06, "loss": 0.08385181427001953, "step": 4360 }, { "epoch": 0.6076778373859123, "grad_norm": 0.5239256024360657, "learning_rate": 3.7969978864670527e-06, "loss": 0.07633590698242188, "step": 4361 }, { "epoch": 0.6078171810771267, "grad_norm": 0.9458755850791931, "learning_rate": 3.794710468791502e-06, "loss": 0.09450054168701172, "step": 4362 }, { "epoch": 0.6079565247683412, "grad_norm": 0.49718332290649414, "learning_rate": 3.7924233189037697e-06, "loss": 0.06609344482421875, "step": 4363 }, { "epoch": 0.6080958684595555, "grad_norm": 0.8717616200447083, "learning_rate": 3.7901364373120036e-06, "loss": 0.10149002075195312, "step": 4364 }, { "epoch": 0.6082352121507699, "grad_norm": 0.8649433255195618, "learning_rate": 3.787849824524301e-06, "loss": 0.08230781555175781, "step": 4365 }, { "epoch": 0.6083745558419843, "grad_norm": 0.6922891139984131, "learning_rate": 3.7855634810486936e-06, "loss": 0.0747833251953125, "step": 4366 }, { "epoch": 0.6085138995331987, "grad_norm": 1.625116229057312, "learning_rate": 3.7832774073931535e-06, "loss": 0.09115982055664062, "step": 4367 }, { "epoch": 0.608653243224413, "grad_norm": 1.0009983777999878, "learning_rate": 3.780991604065598e-06, "loss": 0.09274768829345703, "step": 4368 }, { "epoch": 0.6087925869156274, "grad_norm": 0.8450549840927124, "learning_rate": 3.778706071573875e-06, "loss": 0.09739112854003906, "step": 4369 }, { "epoch": 0.6089319306068418, "grad_norm": 1.0554171800613403, "learning_rate": 3.776420810425781e-06, "loss": 0.11092567443847656, "step": 4370 }, { "epoch": 0.6090712742980562, "grad_norm": 1.1038801670074463, "learning_rate": 3.774135821129047e-06, "loss": 0.09906005859375, "step": 4371 }, { "epoch": 0.6092106179892706, "grad_norm": 0.40921100974082947, "learning_rate": 3.771851104191348e-06, "loss": 0.06086540222167969, "step": 4372 }, { "epoch": 0.6093499616804849, "grad_norm": 0.6561086773872375, "learning_rate": 3.7695666601202944e-06, "loss": 0.10749626159667969, "step": 4373 }, { "epoch": 0.6094893053716993, "grad_norm": 0.9761524796485901, "learning_rate": 3.7672824894234388e-06, "loss": 0.07639122009277344, "step": 4374 }, { "epoch": 0.6096286490629137, "grad_norm": 0.8457097411155701, "learning_rate": 3.7649985926082695e-06, "loss": 0.08267402648925781, "step": 4375 }, { "epoch": 0.6097679927541281, "grad_norm": 0.933195173740387, "learning_rate": 3.762714970182216e-06, "loss": 0.10860347747802734, "step": 4376 }, { "epoch": 0.6099073364453425, "grad_norm": 0.524017333984375, "learning_rate": 3.76043162265265e-06, "loss": 0.0781087875366211, "step": 4377 }, { "epoch": 0.6100466801365568, "grad_norm": 1.4406994581222534, "learning_rate": 3.758148550526877e-06, "loss": 0.12240791320800781, "step": 4378 }, { "epoch": 0.6101860238277712, "grad_norm": 0.6118319034576416, "learning_rate": 3.7558657543121456e-06, "loss": 0.06679153442382812, "step": 4379 }, { "epoch": 0.6103253675189856, "grad_norm": 0.8554168343544006, "learning_rate": 3.7535832345156376e-06, "loss": 0.08784866333007812, "step": 4380 }, { "epoch": 0.6104647112102, "grad_norm": 3.18925142288208, "learning_rate": 3.7513009916444797e-06, "loss": 0.17141342163085938, "step": 4381 }, { "epoch": 0.6106040549014143, "grad_norm": 0.5568420886993408, "learning_rate": 3.7490190262057322e-06, "loss": 0.08019161224365234, "step": 4382 }, { "epoch": 0.6107433985926287, "grad_norm": 0.3763359785079956, "learning_rate": 3.7467373387063973e-06, "loss": 0.06843948364257812, "step": 4383 }, { "epoch": 0.6108827422838431, "grad_norm": 0.9471192955970764, "learning_rate": 3.7444559296534144e-06, "loss": 0.11816787719726562, "step": 4384 }, { "epoch": 0.6110220859750575, "grad_norm": 0.8910694718360901, "learning_rate": 3.7421747995536585e-06, "loss": 0.11928749084472656, "step": 4385 }, { "epoch": 0.6111614296662718, "grad_norm": 0.4844025671482086, "learning_rate": 3.739893948913945e-06, "loss": 0.08749008178710938, "step": 4386 }, { "epoch": 0.6113007733574862, "grad_norm": 1.0129188299179077, "learning_rate": 3.7376133782410275e-06, "loss": 0.10216522216796875, "step": 4387 }, { "epoch": 0.6114401170487006, "grad_norm": 0.5737795829772949, "learning_rate": 3.7353330880415963e-06, "loss": 0.07504081726074219, "step": 4388 }, { "epoch": 0.611579460739915, "grad_norm": 0.6235918998718262, "learning_rate": 3.7330530788222807e-06, "loss": 0.08941459655761719, "step": 4389 }, { "epoch": 0.6117188044311294, "grad_norm": 0.404185950756073, "learning_rate": 3.730773351089647e-06, "loss": 0.06211662292480469, "step": 4390 }, { "epoch": 0.6118581481223437, "grad_norm": 0.9264029264450073, "learning_rate": 3.7284939053501966e-06, "loss": 0.08273887634277344, "step": 4391 }, { "epoch": 0.6119974918135581, "grad_norm": 0.975281834602356, "learning_rate": 3.7262147421103713e-06, "loss": 0.09283447265625, "step": 4392 }, { "epoch": 0.6121368355047725, "grad_norm": 1.3062210083007812, "learning_rate": 3.723935861876549e-06, "loss": 0.11268997192382812, "step": 4393 }, { "epoch": 0.6122761791959869, "grad_norm": 0.993001401424408, "learning_rate": 3.7216572651550453e-06, "loss": 0.07944679260253906, "step": 4394 }, { "epoch": 0.6124155228872012, "grad_norm": 0.35760506987571716, "learning_rate": 3.7193789524521146e-06, "loss": 0.06302070617675781, "step": 4395 }, { "epoch": 0.6125548665784156, "grad_norm": 0.45904913544654846, "learning_rate": 3.717100924273941e-06, "loss": 0.07707023620605469, "step": 4396 }, { "epoch": 0.61269421026963, "grad_norm": 0.6353238224983215, "learning_rate": 3.714823181126653e-06, "loss": 0.1002044677734375, "step": 4397 }, { "epoch": 0.6128335539608444, "grad_norm": 1.0206400156021118, "learning_rate": 3.7125457235163144e-06, "loss": 0.08205413818359375, "step": 4398 }, { "epoch": 0.6129728976520588, "grad_norm": 1.8807151317596436, "learning_rate": 3.710268551948921e-06, "loss": 0.11491584777832031, "step": 4399 }, { "epoch": 0.6131122413432731, "grad_norm": 1.076349139213562, "learning_rate": 3.7079916669304127e-06, "loss": 0.10691642761230469, "step": 4400 }, { "epoch": 0.6132515850344875, "grad_norm": 0.5760291218757629, "learning_rate": 3.7057150689666577e-06, "loss": 0.07891082763671875, "step": 4401 }, { "epoch": 0.6133909287257019, "grad_norm": 0.4855266511440277, "learning_rate": 3.7034387585634656e-06, "loss": 0.06803321838378906, "step": 4402 }, { "epoch": 0.6135302724169164, "grad_norm": 1.0673350095748901, "learning_rate": 3.701162736226579e-06, "loss": 0.10468673706054688, "step": 4403 }, { "epoch": 0.6136696161081308, "grad_norm": 0.6857905983924866, "learning_rate": 3.6988870024616807e-06, "loss": 0.0769968032836914, "step": 4404 }, { "epoch": 0.6138089597993451, "grad_norm": 1.6134004592895508, "learning_rate": 3.6966115577743865e-06, "loss": 0.12476730346679688, "step": 4405 }, { "epoch": 0.6139483034905595, "grad_norm": 0.46746906638145447, "learning_rate": 3.6943364026702466e-06, "loss": 0.07452678680419922, "step": 4406 }, { "epoch": 0.6140876471817739, "grad_norm": 0.8324674367904663, "learning_rate": 3.6920615376547487e-06, "loss": 0.08028221130371094, "step": 4407 }, { "epoch": 0.6142269908729883, "grad_norm": 2.2088623046875, "learning_rate": 3.6897869632333157e-06, "loss": 0.17969131469726562, "step": 4408 }, { "epoch": 0.6143663345642026, "grad_norm": 0.9697964191436768, "learning_rate": 3.687512679911307e-06, "loss": 0.08271980285644531, "step": 4409 }, { "epoch": 0.614505678255417, "grad_norm": 0.6272424459457397, "learning_rate": 3.685238688194016e-06, "loss": 0.08148479461669922, "step": 4410 }, { "epoch": 0.6146450219466314, "grad_norm": 0.7526939511299133, "learning_rate": 3.682964988586675e-06, "loss": 0.08689308166503906, "step": 4411 }, { "epoch": 0.6147843656378458, "grad_norm": 1.1617817878723145, "learning_rate": 3.6806915815944422e-06, "loss": 0.13058853149414062, "step": 4412 }, { "epoch": 0.6149237093290602, "grad_norm": 0.3540203273296356, "learning_rate": 3.6784184677224204e-06, "loss": 0.05708503723144531, "step": 4413 }, { "epoch": 0.6150630530202745, "grad_norm": 0.5129426121711731, "learning_rate": 3.676145647475643e-06, "loss": 0.07755279541015625, "step": 4414 }, { "epoch": 0.6152023967114889, "grad_norm": 0.870883584022522, "learning_rate": 3.673873121359077e-06, "loss": 0.08451175689697266, "step": 4415 }, { "epoch": 0.6153417404027033, "grad_norm": 0.8520779013633728, "learning_rate": 3.6716008898776306e-06, "loss": 0.07947731018066406, "step": 4416 }, { "epoch": 0.6154810840939177, "grad_norm": 0.8051789402961731, "learning_rate": 3.669328953536137e-06, "loss": 0.08138465881347656, "step": 4417 }, { "epoch": 0.615620427785132, "grad_norm": 0.7881433367729187, "learning_rate": 3.6670573128393704e-06, "loss": 0.1064615249633789, "step": 4418 }, { "epoch": 0.6157597714763464, "grad_norm": 1.267985463142395, "learning_rate": 3.664785968292036e-06, "loss": 0.09200572967529297, "step": 4419 }, { "epoch": 0.6158991151675608, "grad_norm": 0.7762444615364075, "learning_rate": 3.662514920398777e-06, "loss": 0.0946207046508789, "step": 4420 }, { "epoch": 0.6160384588587752, "grad_norm": 0.7751940488815308, "learning_rate": 3.6602441696641684e-06, "loss": 0.09205055236816406, "step": 4421 }, { "epoch": 0.6161778025499896, "grad_norm": 1.6561031341552734, "learning_rate": 3.6579737165927176e-06, "loss": 0.13092422485351562, "step": 4422 }, { "epoch": 0.6163171462412039, "grad_norm": 1.0755923986434937, "learning_rate": 3.655703561688867e-06, "loss": 0.09753990173339844, "step": 4423 }, { "epoch": 0.6164564899324183, "grad_norm": 0.8598778247833252, "learning_rate": 3.653433705456994e-06, "loss": 0.10216140747070312, "step": 4424 }, { "epoch": 0.6165958336236327, "grad_norm": 0.5109290480613708, "learning_rate": 3.651164148401409e-06, "loss": 0.0797128677368164, "step": 4425 }, { "epoch": 0.6167351773148471, "grad_norm": 0.6959465146064758, "learning_rate": 3.648894891026358e-06, "loss": 0.07337188720703125, "step": 4426 }, { "epoch": 0.6168745210060614, "grad_norm": 2.180238962173462, "learning_rate": 3.646625933836015e-06, "loss": 0.1159515380859375, "step": 4427 }, { "epoch": 0.6170138646972758, "grad_norm": 0.6770450472831726, "learning_rate": 3.64435727733449e-06, "loss": 0.07030677795410156, "step": 4428 }, { "epoch": 0.6171532083884902, "grad_norm": 1.1467458009719849, "learning_rate": 3.6420889220258295e-06, "loss": 0.1349925994873047, "step": 4429 }, { "epoch": 0.6172925520797046, "grad_norm": 0.6740655899047852, "learning_rate": 3.639820868414008e-06, "loss": 0.09943962097167969, "step": 4430 }, { "epoch": 0.617431895770919, "grad_norm": 0.8725491166114807, "learning_rate": 3.6375531170029356e-06, "loss": 0.11168479919433594, "step": 4431 }, { "epoch": 0.6175712394621333, "grad_norm": 0.9050360321998596, "learning_rate": 3.6352856682964576e-06, "loss": 0.10172271728515625, "step": 4432 }, { "epoch": 0.6177105831533477, "grad_norm": 0.5541929006576538, "learning_rate": 3.633018522798346e-06, "loss": 0.07400321960449219, "step": 4433 }, { "epoch": 0.6178499268445621, "grad_norm": 0.9755771160125732, "learning_rate": 3.6307516810123095e-06, "loss": 0.10078716278076172, "step": 4434 }, { "epoch": 0.6179892705357765, "grad_norm": 0.547038197517395, "learning_rate": 3.6284851434419886e-06, "loss": 0.058162689208984375, "step": 4435 }, { "epoch": 0.6181286142269908, "grad_norm": 0.8407055735588074, "learning_rate": 3.6262189105909574e-06, "loss": 0.07286453247070312, "step": 4436 }, { "epoch": 0.6182679579182052, "grad_norm": 1.0963512659072876, "learning_rate": 3.6239529829627214e-06, "loss": 0.15149688720703125, "step": 4437 }, { "epoch": 0.6184073016094196, "grad_norm": 0.7378605604171753, "learning_rate": 3.6216873610607155e-06, "loss": 0.11273193359375, "step": 4438 }, { "epoch": 0.618546645300634, "grad_norm": 0.6585099697113037, "learning_rate": 3.61942204538831e-06, "loss": 0.08867454528808594, "step": 4439 }, { "epoch": 0.6186859889918483, "grad_norm": 0.7842193841934204, "learning_rate": 3.6171570364488075e-06, "loss": 0.08734321594238281, "step": 4440 }, { "epoch": 0.6188253326830627, "grad_norm": 0.5310879945755005, "learning_rate": 3.6148923347454413e-06, "loss": 0.06924819946289062, "step": 4441 }, { "epoch": 0.6189646763742771, "grad_norm": 0.7425654530525208, "learning_rate": 3.6126279407813765e-06, "loss": 0.10165786743164062, "step": 4442 }, { "epoch": 0.6191040200654916, "grad_norm": 1.426845908164978, "learning_rate": 3.6103638550597074e-06, "loss": 0.12108230590820312, "step": 4443 }, { "epoch": 0.619243363756706, "grad_norm": 0.840258002281189, "learning_rate": 3.6081000780834635e-06, "loss": 0.1060943603515625, "step": 4444 }, { "epoch": 0.6193827074479203, "grad_norm": 1.1497464179992676, "learning_rate": 3.6058366103556055e-06, "loss": 0.09619426727294922, "step": 4445 }, { "epoch": 0.6195220511391347, "grad_norm": 1.3340212106704712, "learning_rate": 3.6035734523790235e-06, "loss": 0.11147499084472656, "step": 4446 }, { "epoch": 0.6196613948303491, "grad_norm": 0.46318519115448, "learning_rate": 3.6013106046565383e-06, "loss": 0.07732200622558594, "step": 4447 }, { "epoch": 0.6198007385215635, "grad_norm": 1.2977112531661987, "learning_rate": 3.5990480676909055e-06, "loss": 0.09898567199707031, "step": 4448 }, { "epoch": 0.6199400822127779, "grad_norm": 0.9092416763305664, "learning_rate": 3.5967858419848077e-06, "loss": 0.08093643188476562, "step": 4449 }, { "epoch": 0.6200794259039922, "grad_norm": 0.6497290730476379, "learning_rate": 3.5945239280408596e-06, "loss": 0.07962274551391602, "step": 4450 }, { "epoch": 0.6202187695952066, "grad_norm": 1.5177383422851562, "learning_rate": 3.592262326361606e-06, "loss": 0.10141754150390625, "step": 4451 }, { "epoch": 0.620358113286421, "grad_norm": 0.615293562412262, "learning_rate": 3.5900010374495252e-06, "loss": 0.09060287475585938, "step": 4452 }, { "epoch": 0.6204974569776354, "grad_norm": 1.294450044631958, "learning_rate": 3.587740061807024e-06, "loss": 0.09570884704589844, "step": 4453 }, { "epoch": 0.6206368006688497, "grad_norm": 0.5884562730789185, "learning_rate": 3.585479399936438e-06, "loss": 0.08672142028808594, "step": 4454 }, { "epoch": 0.6207761443600641, "grad_norm": 0.4386233985424042, "learning_rate": 3.583219052340034e-06, "loss": 0.07245063781738281, "step": 4455 }, { "epoch": 0.6209154880512785, "grad_norm": 0.7266510725021362, "learning_rate": 3.5809590195200115e-06, "loss": 0.10675239562988281, "step": 4456 }, { "epoch": 0.6210548317424929, "grad_norm": 0.696344792842865, "learning_rate": 3.578699301978499e-06, "loss": 0.08214187622070312, "step": 4457 }, { "epoch": 0.6211941754337073, "grad_norm": 0.602878212928772, "learning_rate": 3.576439900217552e-06, "loss": 0.07999992370605469, "step": 4458 }, { "epoch": 0.6213335191249216, "grad_norm": 1.600625991821289, "learning_rate": 3.5741808147391587e-06, "loss": 0.12485313415527344, "step": 4459 }, { "epoch": 0.621472862816136, "grad_norm": 1.3800617456436157, "learning_rate": 3.571922046045235e-06, "loss": 0.11339759826660156, "step": 4460 }, { "epoch": 0.6216122065073504, "grad_norm": 0.5736028552055359, "learning_rate": 3.5696635946376305e-06, "loss": 0.09364128112792969, "step": 4461 }, { "epoch": 0.6217515501985648, "grad_norm": 0.760587751865387, "learning_rate": 3.5674054610181203e-06, "loss": 0.09037208557128906, "step": 4462 }, { "epoch": 0.6218908938897791, "grad_norm": 0.5804054141044617, "learning_rate": 3.5651476456884103e-06, "loss": 0.0811309814453125, "step": 4463 }, { "epoch": 0.6220302375809935, "grad_norm": 0.609815776348114, "learning_rate": 3.562890149150134e-06, "loss": 0.11310005187988281, "step": 4464 }, { "epoch": 0.6221695812722079, "grad_norm": 0.45204970240592957, "learning_rate": 3.560632971904857e-06, "loss": 0.06884765625, "step": 4465 }, { "epoch": 0.6223089249634223, "grad_norm": 1.15963876247406, "learning_rate": 3.558376114454073e-06, "loss": 0.09859085083007812, "step": 4466 }, { "epoch": 0.6224482686546366, "grad_norm": 0.8843235373497009, "learning_rate": 3.556119577299202e-06, "loss": 0.08100128173828125, "step": 4467 }, { "epoch": 0.622587612345851, "grad_norm": 1.1420973539352417, "learning_rate": 3.553863360941598e-06, "loss": 0.09459495544433594, "step": 4468 }, { "epoch": 0.6227269560370654, "grad_norm": 0.7658562064170837, "learning_rate": 3.55160746588254e-06, "loss": 0.0806427001953125, "step": 4469 }, { "epoch": 0.6228662997282798, "grad_norm": 0.5851583480834961, "learning_rate": 3.5493518926232352e-06, "loss": 0.08404731750488281, "step": 4470 }, { "epoch": 0.6230056434194942, "grad_norm": 1.210903286933899, "learning_rate": 3.547096641664819e-06, "loss": 0.10976028442382812, "step": 4471 }, { "epoch": 0.6231449871107085, "grad_norm": 0.7631639242172241, "learning_rate": 3.5448417135083603e-06, "loss": 0.09755706787109375, "step": 4472 }, { "epoch": 0.6232843308019229, "grad_norm": 0.7582495808601379, "learning_rate": 3.5425871086548513e-06, "loss": 0.07175445556640625, "step": 4473 }, { "epoch": 0.6234236744931373, "grad_norm": 0.34599363803863525, "learning_rate": 3.540332827605214e-06, "loss": 0.05870532989501953, "step": 4474 }, { "epoch": 0.6235630181843517, "grad_norm": 1.356798768043518, "learning_rate": 3.538078870860297e-06, "loss": 0.11049842834472656, "step": 4475 }, { "epoch": 0.623702361875566, "grad_norm": 0.6443419456481934, "learning_rate": 3.5358252389208777e-06, "loss": 0.0919647216796875, "step": 4476 }, { "epoch": 0.6238417055667804, "grad_norm": 0.6624502539634705, "learning_rate": 3.533571932287663e-06, "loss": 0.10401535034179688, "step": 4477 }, { "epoch": 0.6239810492579948, "grad_norm": 0.5309792160987854, "learning_rate": 3.5313189514612867e-06, "loss": 0.06769466400146484, "step": 4478 }, { "epoch": 0.6241203929492092, "grad_norm": 0.6455239653587341, "learning_rate": 3.5290662969423097e-06, "loss": 0.11218070983886719, "step": 4479 }, { "epoch": 0.6242597366404236, "grad_norm": 0.8085891604423523, "learning_rate": 3.5268139692312163e-06, "loss": 0.09199905395507812, "step": 4480 }, { "epoch": 0.6243990803316379, "grad_norm": 0.7175710797309875, "learning_rate": 3.5245619688284277e-06, "loss": 0.09014129638671875, "step": 4481 }, { "epoch": 0.6245384240228523, "grad_norm": 0.9829919338226318, "learning_rate": 3.522310296234285e-06, "loss": 0.09663772583007812, "step": 4482 }, { "epoch": 0.6246777677140668, "grad_norm": 0.6614023447036743, "learning_rate": 3.520058951949056e-06, "loss": 0.0908498764038086, "step": 4483 }, { "epoch": 0.6248171114052812, "grad_norm": 1.931065320968628, "learning_rate": 3.517807936472942e-06, "loss": 0.10782814025878906, "step": 4484 }, { "epoch": 0.6249564550964956, "grad_norm": 1.5271457433700562, "learning_rate": 3.515557250306067e-06, "loss": 0.1131744384765625, "step": 4485 }, { "epoch": 0.6250957987877099, "grad_norm": 0.9282026886940002, "learning_rate": 3.5133068939484793e-06, "loss": 0.10064315795898438, "step": 4486 }, { "epoch": 0.6252351424789243, "grad_norm": 1.751636028289795, "learning_rate": 3.511056867900157e-06, "loss": 0.13364028930664062, "step": 4487 }, { "epoch": 0.6253744861701387, "grad_norm": 0.7598699331283569, "learning_rate": 3.508807172661006e-06, "loss": 0.09369277954101562, "step": 4488 }, { "epoch": 0.6255138298613531, "grad_norm": 1.7441822290420532, "learning_rate": 3.506557808730857e-06, "loss": 0.11202621459960938, "step": 4489 }, { "epoch": 0.6256531735525674, "grad_norm": 1.1503249406814575, "learning_rate": 3.504308776609468e-06, "loss": 0.0793294906616211, "step": 4490 }, { "epoch": 0.6257925172437818, "grad_norm": 0.8078253865242004, "learning_rate": 3.502060076796521e-06, "loss": 0.1007070541381836, "step": 4491 }, { "epoch": 0.6259318609349962, "grad_norm": 1.0567067861557007, "learning_rate": 3.4998117097916247e-06, "loss": 0.07744789123535156, "step": 4492 }, { "epoch": 0.6260712046262106, "grad_norm": 1.0833358764648438, "learning_rate": 3.4975636760943177e-06, "loss": 0.08460521697998047, "step": 4493 }, { "epoch": 0.626210548317425, "grad_norm": 1.0391925573349, "learning_rate": 3.49531597620406e-06, "loss": 0.1002187728881836, "step": 4494 }, { "epoch": 0.6263498920086393, "grad_norm": 1.0621626377105713, "learning_rate": 3.4930686106202428e-06, "loss": 0.09947681427001953, "step": 4495 }, { "epoch": 0.6264892356998537, "grad_norm": 1.3493471145629883, "learning_rate": 3.4908215798421737e-06, "loss": 0.09395217895507812, "step": 4496 }, { "epoch": 0.6266285793910681, "grad_norm": 2.0694894790649414, "learning_rate": 3.488574884369095e-06, "loss": 0.08626270294189453, "step": 4497 }, { "epoch": 0.6267679230822825, "grad_norm": 0.7152244448661804, "learning_rate": 3.486328524700171e-06, "loss": 0.07662582397460938, "step": 4498 }, { "epoch": 0.6269072667734968, "grad_norm": 0.939436137676239, "learning_rate": 3.4840825013344897e-06, "loss": 0.10838127136230469, "step": 4499 }, { "epoch": 0.6270466104647112, "grad_norm": 0.5809099674224854, "learning_rate": 3.48183681477107e-06, "loss": 0.07575798034667969, "step": 4500 }, { "epoch": 0.6271859541559256, "grad_norm": 1.0770831108093262, "learning_rate": 3.4795914655088486e-06, "loss": 0.07628440856933594, "step": 4501 }, { "epoch": 0.62732529784714, "grad_norm": 0.994560182094574, "learning_rate": 3.4773464540466917e-06, "loss": 0.0865936279296875, "step": 4502 }, { "epoch": 0.6274646415383544, "grad_norm": 0.6873100399971008, "learning_rate": 3.47510178088339e-06, "loss": 0.07379913330078125, "step": 4503 }, { "epoch": 0.6276039852295687, "grad_norm": 0.5174708962440491, "learning_rate": 3.4728574465176585e-06, "loss": 0.07904720306396484, "step": 4504 }, { "epoch": 0.6277433289207831, "grad_norm": 0.7545915246009827, "learning_rate": 3.4706134514481372e-06, "loss": 0.11308860778808594, "step": 4505 }, { "epoch": 0.6278826726119975, "grad_norm": 0.7569705247879028, "learning_rate": 3.468369796173392e-06, "loss": 0.09020614624023438, "step": 4506 }, { "epoch": 0.6280220163032119, "grad_norm": 1.1964681148529053, "learning_rate": 3.4661264811919093e-06, "loss": 0.11006546020507812, "step": 4507 }, { "epoch": 0.6281613599944262, "grad_norm": 0.5689071416854858, "learning_rate": 3.4638835070021027e-06, "loss": 0.07135963439941406, "step": 4508 }, { "epoch": 0.6283007036856406, "grad_norm": 0.4463250935077667, "learning_rate": 3.4616408741023113e-06, "loss": 0.06348609924316406, "step": 4509 }, { "epoch": 0.628440047376855, "grad_norm": 0.4419902265071869, "learning_rate": 3.459398582990795e-06, "loss": 0.056171417236328125, "step": 4510 }, { "epoch": 0.6285793910680694, "grad_norm": 1.2317900657653809, "learning_rate": 3.4571566341657446e-06, "loss": 0.11491012573242188, "step": 4511 }, { "epoch": 0.6287187347592837, "grad_norm": 0.6290752291679382, "learning_rate": 3.4549150281252635e-06, "loss": 0.06799125671386719, "step": 4512 }, { "epoch": 0.6288580784504981, "grad_norm": 0.6090077757835388, "learning_rate": 3.452673765367389e-06, "loss": 0.08188438415527344, "step": 4513 }, { "epoch": 0.6289974221417125, "grad_norm": 0.8193356394767761, "learning_rate": 3.450432846390078e-06, "loss": 0.09892845153808594, "step": 4514 }, { "epoch": 0.6291367658329269, "grad_norm": 1.362317681312561, "learning_rate": 3.4481922716912097e-06, "loss": 0.10945701599121094, "step": 4515 }, { "epoch": 0.6292761095241413, "grad_norm": 2.045860528945923, "learning_rate": 3.445952041768593e-06, "loss": 0.14242172241210938, "step": 4516 }, { "epoch": 0.6294154532153556, "grad_norm": 0.527769923210144, "learning_rate": 3.443712157119952e-06, "loss": 0.06523704528808594, "step": 4517 }, { "epoch": 0.62955479690657, "grad_norm": 0.9041419625282288, "learning_rate": 3.4414726182429388e-06, "loss": 0.10205459594726562, "step": 4518 }, { "epoch": 0.6296941405977844, "grad_norm": 2.273747205734253, "learning_rate": 3.4392334256351265e-06, "loss": 0.15852069854736328, "step": 4519 }, { "epoch": 0.6298334842889988, "grad_norm": 1.3815324306488037, "learning_rate": 3.436994579794016e-06, "loss": 0.1373748779296875, "step": 4520 }, { "epoch": 0.6299728279802131, "grad_norm": 0.5338550806045532, "learning_rate": 3.4347560812170267e-06, "loss": 0.08269405364990234, "step": 4521 }, { "epoch": 0.6301121716714275, "grad_norm": 0.6881833076477051, "learning_rate": 3.4325179304014997e-06, "loss": 0.08204460144042969, "step": 4522 }, { "epoch": 0.6302515153626419, "grad_norm": 1.0961004495620728, "learning_rate": 3.4302801278447028e-06, "loss": 0.11699867248535156, "step": 4523 }, { "epoch": 0.6303908590538564, "grad_norm": 1.6203807592391968, "learning_rate": 3.428042674043822e-06, "loss": 0.10700798034667969, "step": 4524 }, { "epoch": 0.6305302027450708, "grad_norm": 0.9207046627998352, "learning_rate": 3.425805569495973e-06, "loss": 0.09647178649902344, "step": 4525 }, { "epoch": 0.6306695464362851, "grad_norm": 0.9158530235290527, "learning_rate": 3.4235688146981854e-06, "loss": 0.11779403686523438, "step": 4526 }, { "epoch": 0.6308088901274995, "grad_norm": 1.111548900604248, "learning_rate": 3.42133241014742e-06, "loss": 0.10389137268066406, "step": 4527 }, { "epoch": 0.6309482338187139, "grad_norm": 0.6890766620635986, "learning_rate": 3.4190963563405482e-06, "loss": 0.11286544799804688, "step": 4528 }, { "epoch": 0.6310875775099283, "grad_norm": 0.9696862697601318, "learning_rate": 3.416860653774374e-06, "loss": 0.09770774841308594, "step": 4529 }, { "epoch": 0.6312269212011427, "grad_norm": 1.6757488250732422, "learning_rate": 3.4146253029456195e-06, "loss": 0.11371040344238281, "step": 4530 }, { "epoch": 0.631366264892357, "grad_norm": 1.748837947845459, "learning_rate": 3.4123903043509267e-06, "loss": 0.11204147338867188, "step": 4531 }, { "epoch": 0.6315056085835714, "grad_norm": 0.9085609316825867, "learning_rate": 3.4101556584868646e-06, "loss": 0.07874488830566406, "step": 4532 }, { "epoch": 0.6316449522747858, "grad_norm": 0.7213195562362671, "learning_rate": 3.407921365849917e-06, "loss": 0.0995635986328125, "step": 4533 }, { "epoch": 0.6317842959660002, "grad_norm": 0.9737873077392578, "learning_rate": 3.4056874269364946e-06, "loss": 0.10092735290527344, "step": 4534 }, { "epoch": 0.6319236396572145, "grad_norm": 0.6644216179847717, "learning_rate": 3.4034538422429263e-06, "loss": 0.10491180419921875, "step": 4535 }, { "epoch": 0.6320629833484289, "grad_norm": 1.235543966293335, "learning_rate": 3.401220612265465e-06, "loss": 0.1342029571533203, "step": 4536 }, { "epoch": 0.6322023270396433, "grad_norm": 0.989254891872406, "learning_rate": 3.3989877375002846e-06, "loss": 0.09068107604980469, "step": 4537 }, { "epoch": 0.6323416707308577, "grad_norm": 0.9016674757003784, "learning_rate": 3.3967552184434753e-06, "loss": 0.0911092758178711, "step": 4538 }, { "epoch": 0.632481014422072, "grad_norm": 0.658644437789917, "learning_rate": 3.3945230555910534e-06, "loss": 0.0711212158203125, "step": 4539 }, { "epoch": 0.6326203581132864, "grad_norm": 1.2262084484100342, "learning_rate": 3.3922912494389554e-06, "loss": 0.127288818359375, "step": 4540 }, { "epoch": 0.6327597018045008, "grad_norm": 0.6980158686637878, "learning_rate": 3.3900598004830377e-06, "loss": 0.0803079605102539, "step": 4541 }, { "epoch": 0.6328990454957152, "grad_norm": 1.149953007698059, "learning_rate": 3.387828709219075e-06, "loss": 0.10464096069335938, "step": 4542 }, { "epoch": 0.6330383891869296, "grad_norm": 0.6929759383201599, "learning_rate": 3.3855979761427705e-06, "loss": 0.09324169158935547, "step": 4543 }, { "epoch": 0.6331777328781439, "grad_norm": 0.4160417318344116, "learning_rate": 3.3833676017497353e-06, "loss": 0.07017898559570312, "step": 4544 }, { "epoch": 0.6333170765693583, "grad_norm": 0.6139534711837769, "learning_rate": 3.381137586535511e-06, "loss": 0.0704660415649414, "step": 4545 }, { "epoch": 0.6334564202605727, "grad_norm": 0.9390152096748352, "learning_rate": 3.3789079309955556e-06, "loss": 0.10177421569824219, "step": 4546 }, { "epoch": 0.6335957639517871, "grad_norm": 0.709061861038208, "learning_rate": 3.3766786356252466e-06, "loss": 0.08785820007324219, "step": 4547 }, { "epoch": 0.6337351076430014, "grad_norm": 0.6906203031539917, "learning_rate": 3.374449700919887e-06, "loss": 0.09955024719238281, "step": 4548 }, { "epoch": 0.6338744513342158, "grad_norm": 0.667145848274231, "learning_rate": 3.37222112737469e-06, "loss": 0.08044672012329102, "step": 4549 }, { "epoch": 0.6340137950254302, "grad_norm": 0.6479272842407227, "learning_rate": 3.3699929154847957e-06, "loss": 0.09703636169433594, "step": 4550 }, { "epoch": 0.6341531387166446, "grad_norm": 1.4775142669677734, "learning_rate": 3.367765065745261e-06, "loss": 0.10408592224121094, "step": 4551 }, { "epoch": 0.634292482407859, "grad_norm": 0.814644455909729, "learning_rate": 3.365537578651065e-06, "loss": 0.0822296142578125, "step": 4552 }, { "epoch": 0.6344318260990733, "grad_norm": 0.7503966093063354, "learning_rate": 3.3633104546971052e-06, "loss": 0.0905609130859375, "step": 4553 }, { "epoch": 0.6345711697902877, "grad_norm": 0.9768809080123901, "learning_rate": 3.3610836943781945e-06, "loss": 0.10157394409179688, "step": 4554 }, { "epoch": 0.6347105134815021, "grad_norm": 0.6261598467826843, "learning_rate": 3.358857298189069e-06, "loss": 0.079681396484375, "step": 4555 }, { "epoch": 0.6348498571727165, "grad_norm": 1.0731266736984253, "learning_rate": 3.356631266624385e-06, "loss": 0.11245536804199219, "step": 4556 }, { "epoch": 0.6349892008639308, "grad_norm": 1.1148664951324463, "learning_rate": 3.3544056001787146e-06, "loss": 0.11219978332519531, "step": 4557 }, { "epoch": 0.6351285445551452, "grad_norm": 0.8331814408302307, "learning_rate": 3.3521802993465513e-06, "loss": 0.09557914733886719, "step": 4558 }, { "epoch": 0.6352678882463596, "grad_norm": 0.5717902183532715, "learning_rate": 3.3499553646223037e-06, "loss": 0.08027839660644531, "step": 4559 }, { "epoch": 0.635407231937574, "grad_norm": 0.6231586337089539, "learning_rate": 3.3477307965003026e-06, "loss": 0.08863449096679688, "step": 4560 }, { "epoch": 0.6355465756287884, "grad_norm": 2.8812520503997803, "learning_rate": 3.345506595474798e-06, "loss": 0.12989425659179688, "step": 4561 }, { "epoch": 0.6356859193200027, "grad_norm": 0.8642901182174683, "learning_rate": 3.3432827620399543e-06, "loss": 0.10856437683105469, "step": 4562 }, { "epoch": 0.6358252630112171, "grad_norm": 0.4751153886318207, "learning_rate": 3.3410592966898565e-06, "loss": 0.07672882080078125, "step": 4563 }, { "epoch": 0.6359646067024316, "grad_norm": 0.6567915081977844, "learning_rate": 3.3388361999185105e-06, "loss": 0.08612346649169922, "step": 4564 }, { "epoch": 0.636103950393646, "grad_norm": 1.5720300674438477, "learning_rate": 3.3366134722198352e-06, "loss": 0.115203857421875, "step": 4565 }, { "epoch": 0.6362432940848604, "grad_norm": 0.7206675410270691, "learning_rate": 3.3343911140876704e-06, "loss": 0.103118896484375, "step": 4566 }, { "epoch": 0.6363826377760747, "grad_norm": 0.5911450982093811, "learning_rate": 3.332169126015773e-06, "loss": 0.06638145446777344, "step": 4567 }, { "epoch": 0.6365219814672891, "grad_norm": 1.0502567291259766, "learning_rate": 3.3299475084978195e-06, "loss": 0.12084579467773438, "step": 4568 }, { "epoch": 0.6366613251585035, "grad_norm": 1.2466931343078613, "learning_rate": 3.3277262620274025e-06, "loss": 0.12195587158203125, "step": 4569 }, { "epoch": 0.6368006688497179, "grad_norm": 1.047380805015564, "learning_rate": 3.3255053870980304e-06, "loss": 0.099761962890625, "step": 4570 }, { "epoch": 0.6369400125409322, "grad_norm": 0.7213295102119446, "learning_rate": 3.3232848842031306e-06, "loss": 0.0863485336303711, "step": 4571 }, { "epoch": 0.6370793562321466, "grad_norm": 1.307336449623108, "learning_rate": 3.3210647538360514e-06, "loss": 0.10228252410888672, "step": 4572 }, { "epoch": 0.637218699923361, "grad_norm": 1.5142645835876465, "learning_rate": 3.3188449964900527e-06, "loss": 0.12808799743652344, "step": 4573 }, { "epoch": 0.6373580436145754, "grad_norm": 0.2835998833179474, "learning_rate": 3.316625612658315e-06, "loss": 0.05730247497558594, "step": 4574 }, { "epoch": 0.6374973873057898, "grad_norm": 0.5272022485733032, "learning_rate": 3.314406602833933e-06, "loss": 0.09215354919433594, "step": 4575 }, { "epoch": 0.6376367309970041, "grad_norm": 1.33045494556427, "learning_rate": 3.3121879675099205e-06, "loss": 0.117584228515625, "step": 4576 }, { "epoch": 0.6377760746882185, "grad_norm": 0.7289847135543823, "learning_rate": 3.3099697071792093e-06, "loss": 0.0978240966796875, "step": 4577 }, { "epoch": 0.6379154183794329, "grad_norm": 1.227479100227356, "learning_rate": 3.3077518223346448e-06, "loss": 0.09471702575683594, "step": 4578 }, { "epoch": 0.6380547620706473, "grad_norm": 0.4620774984359741, "learning_rate": 3.30553431346899e-06, "loss": 0.07267570495605469, "step": 4579 }, { "epoch": 0.6381941057618616, "grad_norm": 0.9587869048118591, "learning_rate": 3.3033171810749274e-06, "loss": 0.10303115844726562, "step": 4580 }, { "epoch": 0.638333449453076, "grad_norm": 0.6529108881950378, "learning_rate": 3.3011004256450497e-06, "loss": 0.08368110656738281, "step": 4581 }, { "epoch": 0.6384727931442904, "grad_norm": 0.7551344633102417, "learning_rate": 3.2988840476718713e-06, "loss": 0.09065437316894531, "step": 4582 }, { "epoch": 0.6386121368355048, "grad_norm": 0.4374924898147583, "learning_rate": 3.2966680476478196e-06, "loss": 0.07089042663574219, "step": 4583 }, { "epoch": 0.6387514805267192, "grad_norm": 0.9487583041191101, "learning_rate": 3.294452426065241e-06, "loss": 0.1009674072265625, "step": 4584 }, { "epoch": 0.6388908242179335, "grad_norm": 0.4766983091831207, "learning_rate": 3.2922371834163958e-06, "loss": 0.08078575134277344, "step": 4585 }, { "epoch": 0.6390301679091479, "grad_norm": 1.1063820123672485, "learning_rate": 3.2900223201934584e-06, "loss": 0.13385391235351562, "step": 4586 }, { "epoch": 0.6391695116003623, "grad_norm": 0.6889409422874451, "learning_rate": 3.287807836888521e-06, "loss": 0.08208560943603516, "step": 4587 }, { "epoch": 0.6393088552915767, "grad_norm": 0.5675349831581116, "learning_rate": 3.2855937339935933e-06, "loss": 0.08269882202148438, "step": 4588 }, { "epoch": 0.639448198982791, "grad_norm": 0.40339869260787964, "learning_rate": 3.2833800120005977e-06, "loss": 0.062458038330078125, "step": 4589 }, { "epoch": 0.6395875426740054, "grad_norm": 0.7060455083847046, "learning_rate": 3.2811666714013724e-06, "loss": 0.1083536148071289, "step": 4590 }, { "epoch": 0.6397268863652198, "grad_norm": 0.5596547722816467, "learning_rate": 3.2789537126876714e-06, "loss": 0.0699605941772461, "step": 4591 }, { "epoch": 0.6398662300564342, "grad_norm": 0.7004932761192322, "learning_rate": 3.2767411363511613e-06, "loss": 0.08836936950683594, "step": 4592 }, { "epoch": 0.6400055737476485, "grad_norm": 0.771403968334198, "learning_rate": 3.2745289428834294e-06, "loss": 0.1013031005859375, "step": 4593 }, { "epoch": 0.6401449174388629, "grad_norm": 0.5950875878334045, "learning_rate": 3.272317132775972e-06, "loss": 0.07748222351074219, "step": 4594 }, { "epoch": 0.6402842611300773, "grad_norm": 0.7521244883537292, "learning_rate": 3.270105706520207e-06, "loss": 0.09175491333007812, "step": 4595 }, { "epoch": 0.6404236048212917, "grad_norm": 0.6871645450592041, "learning_rate": 3.267894664607457e-06, "loss": 0.10100173950195312, "step": 4596 }, { "epoch": 0.6405629485125061, "grad_norm": 0.9528211951255798, "learning_rate": 3.265684007528969e-06, "loss": 0.0803518295288086, "step": 4597 }, { "epoch": 0.6407022922037204, "grad_norm": 1.3384027481079102, "learning_rate": 3.2634737357758994e-06, "loss": 0.10014915466308594, "step": 4598 }, { "epoch": 0.6408416358949348, "grad_norm": 0.8921104669570923, "learning_rate": 3.261263849839319e-06, "loss": 0.09718132019042969, "step": 4599 }, { "epoch": 0.6409809795861492, "grad_norm": 1.8532991409301758, "learning_rate": 3.2590543502102163e-06, "loss": 0.1046457290649414, "step": 4600 }, { "epoch": 0.6411203232773636, "grad_norm": 0.7033703923225403, "learning_rate": 3.256845237379491e-06, "loss": 0.07036590576171875, "step": 4601 }, { "epoch": 0.641259666968578, "grad_norm": 0.8395472764968872, "learning_rate": 3.254636511837957e-06, "loss": 0.0823211669921875, "step": 4602 }, { "epoch": 0.6413990106597923, "grad_norm": 0.7338293194770813, "learning_rate": 3.252428174076341e-06, "loss": 0.0903472900390625, "step": 4603 }, { "epoch": 0.6415383543510068, "grad_norm": 0.5421583652496338, "learning_rate": 3.2502202245852887e-06, "loss": 0.08674812316894531, "step": 4604 }, { "epoch": 0.6416776980422212, "grad_norm": 1.0352917909622192, "learning_rate": 3.2480126638553533e-06, "loss": 0.0901031494140625, "step": 4605 }, { "epoch": 0.6418170417334356, "grad_norm": 0.6772106885910034, "learning_rate": 3.245805492377007e-06, "loss": 0.08737754821777344, "step": 4606 }, { "epoch": 0.64195638542465, "grad_norm": 1.0575637817382812, "learning_rate": 3.243598710640631e-06, "loss": 0.11973953247070312, "step": 4607 }, { "epoch": 0.6420957291158643, "grad_norm": 0.7183886766433716, "learning_rate": 3.2413923191365203e-06, "loss": 0.09469223022460938, "step": 4608 }, { "epoch": 0.6422350728070787, "grad_norm": 0.7153064012527466, "learning_rate": 3.2391863183548877e-06, "loss": 0.09765815734863281, "step": 4609 }, { "epoch": 0.6423744164982931, "grad_norm": 0.7201479077339172, "learning_rate": 3.236980708785854e-06, "loss": 0.09603309631347656, "step": 4610 }, { "epoch": 0.6425137601895075, "grad_norm": 0.4895358085632324, "learning_rate": 3.2347754909194595e-06, "loss": 0.07863235473632812, "step": 4611 }, { "epoch": 0.6426531038807218, "grad_norm": 1.042862057685852, "learning_rate": 3.232570665245648e-06, "loss": 0.11170196533203125, "step": 4612 }, { "epoch": 0.6427924475719362, "grad_norm": 0.9433963894844055, "learning_rate": 3.2303662322542835e-06, "loss": 0.14160919189453125, "step": 4613 }, { "epoch": 0.6429317912631506, "grad_norm": 1.2201517820358276, "learning_rate": 3.2281621924351407e-06, "loss": 0.115631103515625, "step": 4614 }, { "epoch": 0.643071134954365, "grad_norm": 0.7539862394332886, "learning_rate": 3.2259585462779063e-06, "loss": 0.08335590362548828, "step": 4615 }, { "epoch": 0.6432104786455793, "grad_norm": 1.197329044342041, "learning_rate": 3.2237552942721832e-06, "loss": 0.12092399597167969, "step": 4616 }, { "epoch": 0.6433498223367937, "grad_norm": 1.335817813873291, "learning_rate": 3.2215524369074802e-06, "loss": 0.1301126480102539, "step": 4617 }, { "epoch": 0.6434891660280081, "grad_norm": 0.6263457536697388, "learning_rate": 3.219349974673223e-06, "loss": 0.09009552001953125, "step": 4618 }, { "epoch": 0.6436285097192225, "grad_norm": 1.0174669027328491, "learning_rate": 3.2171479080587475e-06, "loss": 0.10465431213378906, "step": 4619 }, { "epoch": 0.6437678534104369, "grad_norm": 0.8921168446540833, "learning_rate": 3.2149462375533046e-06, "loss": 0.09957504272460938, "step": 4620 }, { "epoch": 0.6439071971016512, "grad_norm": 0.6361935138702393, "learning_rate": 3.212744963646054e-06, "loss": 0.09157562255859375, "step": 4621 }, { "epoch": 0.6440465407928656, "grad_norm": 1.0721070766448975, "learning_rate": 3.2105440868260706e-06, "loss": 0.09707832336425781, "step": 4622 }, { "epoch": 0.64418588448408, "grad_norm": 0.6730546951293945, "learning_rate": 3.2083436075823353e-06, "loss": 0.08948516845703125, "step": 4623 }, { "epoch": 0.6443252281752944, "grad_norm": 0.36917534470558167, "learning_rate": 3.2061435264037457e-06, "loss": 0.05974769592285156, "step": 4624 }, { "epoch": 0.6444645718665087, "grad_norm": 0.41217881441116333, "learning_rate": 3.2039438437791105e-06, "loss": 0.07049942016601562, "step": 4625 }, { "epoch": 0.6446039155577231, "grad_norm": 0.6398956179618835, "learning_rate": 3.2017445601971474e-06, "loss": 0.0929718017578125, "step": 4626 }, { "epoch": 0.6447432592489375, "grad_norm": 0.36505454778671265, "learning_rate": 3.199545676146492e-06, "loss": 0.061344146728515625, "step": 4627 }, { "epoch": 0.6448826029401519, "grad_norm": 0.4920269250869751, "learning_rate": 3.197347192115679e-06, "loss": 0.06994247436523438, "step": 4628 }, { "epoch": 0.6450219466313662, "grad_norm": 0.9362406134605408, "learning_rate": 3.1951491085931657e-06, "loss": 0.09203338623046875, "step": 4629 }, { "epoch": 0.6451612903225806, "grad_norm": 0.6516214609146118, "learning_rate": 3.1929514260673145e-06, "loss": 0.0898752212524414, "step": 4630 }, { "epoch": 0.645300634013795, "grad_norm": 0.5420569777488708, "learning_rate": 3.1907541450264003e-06, "loss": 0.08534431457519531, "step": 4631 }, { "epoch": 0.6454399777050094, "grad_norm": 1.369884729385376, "learning_rate": 3.188557265958612e-06, "loss": 0.11434173583984375, "step": 4632 }, { "epoch": 0.6455793213962238, "grad_norm": 0.8108819127082825, "learning_rate": 3.186360789352041e-06, "loss": 0.08153915405273438, "step": 4633 }, { "epoch": 0.6457186650874381, "grad_norm": 1.3785868883132935, "learning_rate": 3.184164715694697e-06, "loss": 0.11089038848876953, "step": 4634 }, { "epoch": 0.6458580087786525, "grad_norm": 0.4894010126590729, "learning_rate": 3.1819690454744956e-06, "loss": 0.07515907287597656, "step": 4635 }, { "epoch": 0.6459973524698669, "grad_norm": 1.7380282878875732, "learning_rate": 3.1797737791792672e-06, "loss": 0.12972068786621094, "step": 4636 }, { "epoch": 0.6461366961610813, "grad_norm": 0.7913181185722351, "learning_rate": 3.1775789172967486e-06, "loss": 0.09023284912109375, "step": 4637 }, { "epoch": 0.6462760398522956, "grad_norm": 0.774307906627655, "learning_rate": 3.1753844603145894e-06, "loss": 0.09472084045410156, "step": 4638 }, { "epoch": 0.64641538354351, "grad_norm": 1.5998281240463257, "learning_rate": 3.1731904087203442e-06, "loss": 0.13487815856933594, "step": 4639 }, { "epoch": 0.6465547272347244, "grad_norm": 0.4478793740272522, "learning_rate": 3.1709967630014844e-06, "loss": 0.06723403930664062, "step": 4640 }, { "epoch": 0.6466940709259388, "grad_norm": 1.1478010416030884, "learning_rate": 3.168803523645387e-06, "loss": 0.08829402923583984, "step": 4641 }, { "epoch": 0.6468334146171532, "grad_norm": 1.115396499633789, "learning_rate": 3.166610691139338e-06, "loss": 0.12759971618652344, "step": 4642 }, { "epoch": 0.6469727583083675, "grad_norm": 0.5103621482849121, "learning_rate": 3.1644182659705403e-06, "loss": 0.07664966583251953, "step": 4643 }, { "epoch": 0.647112101999582, "grad_norm": 2.222656011581421, "learning_rate": 3.1622262486260936e-06, "loss": 0.13108348846435547, "step": 4644 }, { "epoch": 0.6472514456907964, "grad_norm": 0.5974005460739136, "learning_rate": 3.160034639593018e-06, "loss": 0.09815216064453125, "step": 4645 }, { "epoch": 0.6473907893820108, "grad_norm": 0.7063329815864563, "learning_rate": 3.1578434393582392e-06, "loss": 0.08809947967529297, "step": 4646 }, { "epoch": 0.6475301330732252, "grad_norm": 0.681898295879364, "learning_rate": 3.155652648408589e-06, "loss": 0.1049346923828125, "step": 4647 }, { "epoch": 0.6476694767644395, "grad_norm": 0.9780024886131287, "learning_rate": 3.1534622672308165e-06, "loss": 0.11458778381347656, "step": 4648 }, { "epoch": 0.6478088204556539, "grad_norm": 0.46377262473106384, "learning_rate": 3.1512722963115693e-06, "loss": 0.06864166259765625, "step": 4649 }, { "epoch": 0.6479481641468683, "grad_norm": 1.4759719371795654, "learning_rate": 3.1490827361374105e-06, "loss": 0.11160445213317871, "step": 4650 }, { "epoch": 0.6480875078380827, "grad_norm": 2.5557572841644287, "learning_rate": 3.1468935871948096e-06, "loss": 0.1303691864013672, "step": 4651 }, { "epoch": 0.648226851529297, "grad_norm": 1.254496455192566, "learning_rate": 3.1447048499701478e-06, "loss": 0.12026786804199219, "step": 4652 }, { "epoch": 0.6483661952205114, "grad_norm": 1.6255868673324585, "learning_rate": 3.1425165249497118e-06, "loss": 0.115692138671875, "step": 4653 }, { "epoch": 0.6485055389117258, "grad_norm": 0.9394548535346985, "learning_rate": 3.1403286126196963e-06, "loss": 0.11884498596191406, "step": 4654 }, { "epoch": 0.6486448826029402, "grad_norm": 1.3085020780563354, "learning_rate": 3.138141113466205e-06, "loss": 0.11490440368652344, "step": 4655 }, { "epoch": 0.6487842262941546, "grad_norm": 1.2664754390716553, "learning_rate": 3.135954027975252e-06, "loss": 0.08292961120605469, "step": 4656 }, { "epoch": 0.6489235699853689, "grad_norm": 1.1393675804138184, "learning_rate": 3.1337673566327575e-06, "loss": 0.08249664306640625, "step": 4657 }, { "epoch": 0.6490629136765833, "grad_norm": 0.6636370420455933, "learning_rate": 3.1315810999245483e-06, "loss": 0.08447551727294922, "step": 4658 }, { "epoch": 0.6492022573677977, "grad_norm": 1.6731998920440674, "learning_rate": 3.1293952583363653e-06, "loss": 0.11104393005371094, "step": 4659 }, { "epoch": 0.6493416010590121, "grad_norm": 0.7513255476951599, "learning_rate": 3.127209832353846e-06, "loss": 0.07841682434082031, "step": 4660 }, { "epoch": 0.6494809447502264, "grad_norm": 1.2977542877197266, "learning_rate": 3.1250248224625463e-06, "loss": 0.10435867309570312, "step": 4661 }, { "epoch": 0.6496202884414408, "grad_norm": 1.0014936923980713, "learning_rate": 3.1228402291479243e-06, "loss": 0.11480331420898438, "step": 4662 }, { "epoch": 0.6497596321326552, "grad_norm": 1.7565538883209229, "learning_rate": 3.1206560528953467e-06, "loss": 0.09508705139160156, "step": 4663 }, { "epoch": 0.6498989758238696, "grad_norm": 1.19558846950531, "learning_rate": 3.1184722941900902e-06, "loss": 0.1102752685546875, "step": 4664 }, { "epoch": 0.650038319515084, "grad_norm": 0.4553973972797394, "learning_rate": 3.1162889535173323e-06, "loss": 0.07006263732910156, "step": 4665 }, { "epoch": 0.6501776632062983, "grad_norm": 0.5753689408302307, "learning_rate": 3.1141060313621637e-06, "loss": 0.08533096313476562, "step": 4666 }, { "epoch": 0.6503170068975127, "grad_norm": 0.7496270537376404, "learning_rate": 3.111923528209577e-06, "loss": 0.1159820556640625, "step": 4667 }, { "epoch": 0.6504563505887271, "grad_norm": 0.7305957674980164, "learning_rate": 3.1097414445444796e-06, "loss": 0.09840965270996094, "step": 4668 }, { "epoch": 0.6505956942799415, "grad_norm": 1.1479570865631104, "learning_rate": 3.1075597808516776e-06, "loss": 0.09328556060791016, "step": 4669 }, { "epoch": 0.6507350379711558, "grad_norm": 1.3441888093948364, "learning_rate": 3.1053785376158865e-06, "loss": 0.1390705108642578, "step": 4670 }, { "epoch": 0.6508743816623702, "grad_norm": 1.2211933135986328, "learning_rate": 3.1031977153217286e-06, "loss": 0.12164878845214844, "step": 4671 }, { "epoch": 0.6510137253535846, "grad_norm": 1.1178443431854248, "learning_rate": 3.1010173144537348e-06, "loss": 0.12164306640625, "step": 4672 }, { "epoch": 0.651153069044799, "grad_norm": 1.1463439464569092, "learning_rate": 3.0988373354963387e-06, "loss": 0.11072349548339844, "step": 4673 }, { "epoch": 0.6512924127360133, "grad_norm": 0.6625497937202454, "learning_rate": 3.0966577789338812e-06, "loss": 0.09052658081054688, "step": 4674 }, { "epoch": 0.6514317564272277, "grad_norm": 0.8021824359893799, "learning_rate": 3.0944786452506147e-06, "loss": 0.09759521484375, "step": 4675 }, { "epoch": 0.6515711001184421, "grad_norm": 0.8879271745681763, "learning_rate": 3.092299934930686e-06, "loss": 0.08875846862792969, "step": 4676 }, { "epoch": 0.6517104438096565, "grad_norm": 0.7206394076347351, "learning_rate": 3.0901216484581597e-06, "loss": 0.09284782409667969, "step": 4677 }, { "epoch": 0.6518497875008709, "grad_norm": 1.6939268112182617, "learning_rate": 3.087943786316999e-06, "loss": 0.13701629638671875, "step": 4678 }, { "epoch": 0.6519891311920852, "grad_norm": 0.4577694237232208, "learning_rate": 3.085766348991076e-06, "loss": 0.06558418273925781, "step": 4679 }, { "epoch": 0.6521284748832996, "grad_norm": 0.540330171585083, "learning_rate": 3.0835893369641694e-06, "loss": 0.07479095458984375, "step": 4680 }, { "epoch": 0.652267818574514, "grad_norm": 0.8942210078239441, "learning_rate": 3.0814127507199587e-06, "loss": 0.11735153198242188, "step": 4681 }, { "epoch": 0.6524071622657284, "grad_norm": 0.7385797500610352, "learning_rate": 3.0792365907420323e-06, "loss": 0.08393287658691406, "step": 4682 }, { "epoch": 0.6525465059569427, "grad_norm": 0.8879309892654419, "learning_rate": 3.0770608575138825e-06, "loss": 0.09224414825439453, "step": 4683 }, { "epoch": 0.6526858496481572, "grad_norm": 0.6295105814933777, "learning_rate": 3.0748855515189104e-06, "loss": 0.076995849609375, "step": 4684 }, { "epoch": 0.6528251933393716, "grad_norm": 0.29920485615730286, "learning_rate": 3.0727106732404183e-06, "loss": 0.06294822692871094, "step": 4685 }, { "epoch": 0.652964537030586, "grad_norm": 0.5007673501968384, "learning_rate": 3.0705362231616133e-06, "loss": 0.07527828216552734, "step": 4686 }, { "epoch": 0.6531038807218004, "grad_norm": 0.7572353482246399, "learning_rate": 3.0683622017656074e-06, "loss": 0.08060359954833984, "step": 4687 }, { "epoch": 0.6532432244130147, "grad_norm": 0.39502525329589844, "learning_rate": 3.066188609535421e-06, "loss": 0.06489086151123047, "step": 4688 }, { "epoch": 0.6533825681042291, "grad_norm": 0.4714740514755249, "learning_rate": 3.064015446953977e-06, "loss": 0.07942581176757812, "step": 4689 }, { "epoch": 0.6535219117954435, "grad_norm": 0.927311360836029, "learning_rate": 3.0618427145041017e-06, "loss": 0.08634185791015625, "step": 4690 }, { "epoch": 0.6536612554866579, "grad_norm": 1.5949325561523438, "learning_rate": 3.059670412668525e-06, "loss": 0.12372016906738281, "step": 4691 }, { "epoch": 0.6538005991778723, "grad_norm": 0.47615113854408264, "learning_rate": 3.0574985419298843e-06, "loss": 0.06116676330566406, "step": 4692 }, { "epoch": 0.6539399428690866, "grad_norm": 1.0710675716400146, "learning_rate": 3.055327102770719e-06, "loss": 0.11292266845703125, "step": 4693 }, { "epoch": 0.654079286560301, "grad_norm": 0.7073004841804504, "learning_rate": 3.053156095673474e-06, "loss": 0.10607337951660156, "step": 4694 }, { "epoch": 0.6542186302515154, "grad_norm": 0.784407913684845, "learning_rate": 3.0509855211204976e-06, "loss": 0.10425567626953125, "step": 4695 }, { "epoch": 0.6543579739427298, "grad_norm": 0.641082227230072, "learning_rate": 3.048815379594043e-06, "loss": 0.06744766235351562, "step": 4696 }, { "epoch": 0.6544973176339441, "grad_norm": 0.6870379447937012, "learning_rate": 3.046645671576264e-06, "loss": 0.09983444213867188, "step": 4697 }, { "epoch": 0.6546366613251585, "grad_norm": 0.621989369392395, "learning_rate": 3.044476397549221e-06, "loss": 0.07926559448242188, "step": 4698 }, { "epoch": 0.6547760050163729, "grad_norm": 0.599571943283081, "learning_rate": 3.0423075579948756e-06, "loss": 0.0833282470703125, "step": 4699 }, { "epoch": 0.6549153487075873, "grad_norm": 0.7003766894340515, "learning_rate": 3.0401391533950976e-06, "loss": 0.09557533264160156, "step": 4700 }, { "epoch": 0.6550546923988017, "grad_norm": 0.9373446106910706, "learning_rate": 3.037971184231655e-06, "loss": 0.11372184753417969, "step": 4701 }, { "epoch": 0.655194036090016, "grad_norm": 0.4543534219264984, "learning_rate": 3.035803650986222e-06, "loss": 0.06996679306030273, "step": 4702 }, { "epoch": 0.6553333797812304, "grad_norm": 0.8449156284332275, "learning_rate": 3.0336365541403723e-06, "loss": 0.09893989562988281, "step": 4703 }, { "epoch": 0.6554727234724448, "grad_norm": 0.644910454750061, "learning_rate": 3.0314698941755886e-06, "loss": 0.08953094482421875, "step": 4704 }, { "epoch": 0.6556120671636592, "grad_norm": 0.2656462490558624, "learning_rate": 3.0293036715732527e-06, "loss": 0.05027580261230469, "step": 4705 }, { "epoch": 0.6557514108548735, "grad_norm": 0.5400695204734802, "learning_rate": 3.0271378868146494e-06, "loss": 0.07495498657226562, "step": 4706 }, { "epoch": 0.6558907545460879, "grad_norm": 0.8552642464637756, "learning_rate": 3.024972540380966e-06, "loss": 0.11305046081542969, "step": 4707 }, { "epoch": 0.6560300982373023, "grad_norm": 0.5432556867599487, "learning_rate": 3.0228076327532925e-06, "loss": 0.08538818359375, "step": 4708 }, { "epoch": 0.6561694419285167, "grad_norm": 1.5374828577041626, "learning_rate": 3.0206431644126234e-06, "loss": 0.10196304321289062, "step": 4709 }, { "epoch": 0.656308785619731, "grad_norm": 0.9708107113838196, "learning_rate": 3.0184791358398537e-06, "loss": 0.10035514831542969, "step": 4710 }, { "epoch": 0.6564481293109454, "grad_norm": 0.3474626839160919, "learning_rate": 3.016315547515783e-06, "loss": 0.06448554992675781, "step": 4711 }, { "epoch": 0.6565874730021598, "grad_norm": 0.8820134401321411, "learning_rate": 3.0141523999211065e-06, "loss": 0.10756492614746094, "step": 4712 }, { "epoch": 0.6567268166933742, "grad_norm": 0.8876789212226868, "learning_rate": 3.0119896935364305e-06, "loss": 0.09217262268066406, "step": 4713 }, { "epoch": 0.6568661603845886, "grad_norm": 0.9960759282112122, "learning_rate": 3.009827428842258e-06, "loss": 0.08810043334960938, "step": 4714 }, { "epoch": 0.6570055040758029, "grad_norm": 0.7761502861976624, "learning_rate": 3.0076656063189926e-06, "loss": 0.10334014892578125, "step": 4715 }, { "epoch": 0.6571448477670173, "grad_norm": 2.242791175842285, "learning_rate": 3.0055042264469447e-06, "loss": 0.11852645874023438, "step": 4716 }, { "epoch": 0.6572841914582317, "grad_norm": 1.0808905363082886, "learning_rate": 3.003343289706324e-06, "loss": 0.11098289489746094, "step": 4717 }, { "epoch": 0.6574235351494461, "grad_norm": 1.1309775114059448, "learning_rate": 3.001182796577239e-06, "loss": 0.09761619567871094, "step": 4718 }, { "epoch": 0.6575628788406604, "grad_norm": 1.0852450132369995, "learning_rate": 2.999022747539701e-06, "loss": 0.08282470703125, "step": 4719 }, { "epoch": 0.6577022225318748, "grad_norm": 0.6308801174163818, "learning_rate": 2.9968631430736274e-06, "loss": 0.06317329406738281, "step": 4720 }, { "epoch": 0.6578415662230892, "grad_norm": 0.3648965358734131, "learning_rate": 2.99470398365883e-06, "loss": 0.06536102294921875, "step": 4721 }, { "epoch": 0.6579809099143036, "grad_norm": 0.9912438988685608, "learning_rate": 2.9925452697750275e-06, "loss": 0.08490562438964844, "step": 4722 }, { "epoch": 0.658120253605518, "grad_norm": 1.5233863592147827, "learning_rate": 2.990387001901834e-06, "loss": 0.13100814819335938, "step": 4723 }, { "epoch": 0.6582595972967323, "grad_norm": 0.8461006283760071, "learning_rate": 2.988229180518767e-06, "loss": 0.07372283935546875, "step": 4724 }, { "epoch": 0.6583989409879468, "grad_norm": 1.2942301034927368, "learning_rate": 2.9860718061052478e-06, "loss": 0.1334972381591797, "step": 4725 }, { "epoch": 0.6585382846791612, "grad_norm": 0.7035929560661316, "learning_rate": 2.9839148791405937e-06, "loss": 0.08423423767089844, "step": 4726 }, { "epoch": 0.6586776283703756, "grad_norm": 0.5705187320709229, "learning_rate": 2.981758400104028e-06, "loss": 0.0770721435546875, "step": 4727 }, { "epoch": 0.65881697206159, "grad_norm": 0.769984781742096, "learning_rate": 2.979602369474667e-06, "loss": 0.09398460388183594, "step": 4728 }, { "epoch": 0.6589563157528043, "grad_norm": 1.4363378286361694, "learning_rate": 2.977446787731532e-06, "loss": 0.133026123046875, "step": 4729 }, { "epoch": 0.6590956594440187, "grad_norm": 0.7027722597122192, "learning_rate": 2.975291655353546e-06, "loss": 0.07688331604003906, "step": 4730 }, { "epoch": 0.6592350031352331, "grad_norm": 1.065352201461792, "learning_rate": 2.9731369728195288e-06, "loss": 0.15423965454101562, "step": 4731 }, { "epoch": 0.6593743468264475, "grad_norm": 2.2857718467712402, "learning_rate": 2.9709827406082028e-06, "loss": 0.17110061645507812, "step": 4732 }, { "epoch": 0.6595136905176618, "grad_norm": 0.82207852602005, "learning_rate": 2.9688289591981887e-06, "loss": 0.10679054260253906, "step": 4733 }, { "epoch": 0.6596530342088762, "grad_norm": 0.5923954844474792, "learning_rate": 2.9666756290680078e-06, "loss": 0.07962226867675781, "step": 4734 }, { "epoch": 0.6597923779000906, "grad_norm": 1.739918828010559, "learning_rate": 2.964522750696079e-06, "loss": 0.1292591094970703, "step": 4735 }, { "epoch": 0.659931721591305, "grad_norm": 0.5785806179046631, "learning_rate": 2.962370324560725e-06, "loss": 0.07135391235351562, "step": 4736 }, { "epoch": 0.6600710652825194, "grad_norm": 0.818994402885437, "learning_rate": 2.9602183511401656e-06, "loss": 0.07843685150146484, "step": 4737 }, { "epoch": 0.6602104089737337, "grad_norm": 0.9518700242042542, "learning_rate": 2.9580668309125203e-06, "loss": 0.10823631286621094, "step": 4738 }, { "epoch": 0.6603497526649481, "grad_norm": 0.756278395652771, "learning_rate": 2.9559157643558046e-06, "loss": 0.08285140991210938, "step": 4739 }, { "epoch": 0.6604890963561625, "grad_norm": 0.944288432598114, "learning_rate": 2.9537651519479403e-06, "loss": 0.09114646911621094, "step": 4740 }, { "epoch": 0.6606284400473769, "grad_norm": 0.5135129690170288, "learning_rate": 2.951614994166743e-06, "loss": 0.07297897338867188, "step": 4741 }, { "epoch": 0.6607677837385912, "grad_norm": 0.8384546637535095, "learning_rate": 2.9494652914899267e-06, "loss": 0.0830078125, "step": 4742 }, { "epoch": 0.6609071274298056, "grad_norm": 0.5572512149810791, "learning_rate": 2.947316044395112e-06, "loss": 0.0834665298461914, "step": 4743 }, { "epoch": 0.66104647112102, "grad_norm": 0.7563642859458923, "learning_rate": 2.945167253359806e-06, "loss": 0.07952308654785156, "step": 4744 }, { "epoch": 0.6611858148122344, "grad_norm": 0.834784746170044, "learning_rate": 2.943018918861424e-06, "loss": 0.09600830078125, "step": 4745 }, { "epoch": 0.6613251585034488, "grad_norm": 0.42042210698127747, "learning_rate": 2.940871041377277e-06, "loss": 0.071533203125, "step": 4746 }, { "epoch": 0.6614645021946631, "grad_norm": 0.5135504007339478, "learning_rate": 2.938723621384572e-06, "loss": 0.07153034210205078, "step": 4747 }, { "epoch": 0.6616038458858775, "grad_norm": 0.8135823011398315, "learning_rate": 2.936576659360421e-06, "loss": 0.08835506439208984, "step": 4748 }, { "epoch": 0.6617431895770919, "grad_norm": 1.2466048002243042, "learning_rate": 2.9344301557818267e-06, "loss": 0.10341835021972656, "step": 4749 }, { "epoch": 0.6618825332683063, "grad_norm": 1.618283987045288, "learning_rate": 2.9322841111256937e-06, "loss": 0.08537483215332031, "step": 4750 }, { "epoch": 0.6620218769595206, "grad_norm": 1.6441413164138794, "learning_rate": 2.930138525868824e-06, "loss": 0.11366462707519531, "step": 4751 }, { "epoch": 0.662161220650735, "grad_norm": 0.7446433305740356, "learning_rate": 2.927993400487919e-06, "loss": 0.09136962890625, "step": 4752 }, { "epoch": 0.6623005643419494, "grad_norm": 0.7676959037780762, "learning_rate": 2.9258487354595754e-06, "loss": 0.08273506164550781, "step": 4753 }, { "epoch": 0.6624399080331638, "grad_norm": 1.519458293914795, "learning_rate": 2.9237045312602908e-06, "loss": 0.10042285919189453, "step": 4754 }, { "epoch": 0.6625792517243781, "grad_norm": 0.45711326599121094, "learning_rate": 2.921560788366454e-06, "loss": 0.07684326171875, "step": 4755 }, { "epoch": 0.6627185954155925, "grad_norm": 0.6125630140304565, "learning_rate": 2.9194175072543594e-06, "loss": 0.09619712829589844, "step": 4756 }, { "epoch": 0.6628579391068069, "grad_norm": 1.3009812831878662, "learning_rate": 2.9172746884001944e-06, "loss": 0.09812736511230469, "step": 4757 }, { "epoch": 0.6629972827980213, "grad_norm": 1.7865358591079712, "learning_rate": 2.9151323322800433e-06, "loss": 0.1266803741455078, "step": 4758 }, { "epoch": 0.6631366264892357, "grad_norm": 0.9123466610908508, "learning_rate": 2.9129904393698917e-06, "loss": 0.09645652770996094, "step": 4759 }, { "epoch": 0.66327597018045, "grad_norm": 0.6533408164978027, "learning_rate": 2.910849010145617e-06, "loss": 0.09238243103027344, "step": 4760 }, { "epoch": 0.6634153138716644, "grad_norm": 0.7964163422584534, "learning_rate": 2.908708045082994e-06, "loss": 0.08317756652832031, "step": 4761 }, { "epoch": 0.6635546575628788, "grad_norm": 0.7910414338111877, "learning_rate": 2.906567544657699e-06, "loss": 0.11670684814453125, "step": 4762 }, { "epoch": 0.6636940012540932, "grad_norm": 1.1521793603897095, "learning_rate": 2.9044275093453034e-06, "loss": 0.11070060729980469, "step": 4763 }, { "epoch": 0.6638333449453075, "grad_norm": 1.6660915613174438, "learning_rate": 2.902287939621272e-06, "loss": 0.12483978271484375, "step": 4764 }, { "epoch": 0.663972688636522, "grad_norm": 0.9028393626213074, "learning_rate": 2.9001488359609676e-06, "loss": 0.08618927001953125, "step": 4765 }, { "epoch": 0.6641120323277364, "grad_norm": 1.1174274682998657, "learning_rate": 2.898010198839651e-06, "loss": 0.14258956909179688, "step": 4766 }, { "epoch": 0.6642513760189508, "grad_norm": 0.6746264696121216, "learning_rate": 2.895872028732481e-06, "loss": 0.09804725646972656, "step": 4767 }, { "epoch": 0.6643907197101652, "grad_norm": 0.576452910900116, "learning_rate": 2.893734326114506e-06, "loss": 0.08038139343261719, "step": 4768 }, { "epoch": 0.6645300634013795, "grad_norm": 0.42509952187538147, "learning_rate": 2.8915970914606793e-06, "loss": 0.0684976577758789, "step": 4769 }, { "epoch": 0.6646694070925939, "grad_norm": 0.31513601541519165, "learning_rate": 2.8894603252458407e-06, "loss": 0.05723381042480469, "step": 4770 }, { "epoch": 0.6648087507838083, "grad_norm": 0.6048224568367004, "learning_rate": 2.8873240279447355e-06, "loss": 0.08136367797851562, "step": 4771 }, { "epoch": 0.6649480944750227, "grad_norm": 0.7138516306877136, "learning_rate": 2.8851882000319966e-06, "loss": 0.09760093688964844, "step": 4772 }, { "epoch": 0.665087438166237, "grad_norm": 0.6828570365905762, "learning_rate": 2.883052841982157e-06, "loss": 0.07766914367675781, "step": 4773 }, { "epoch": 0.6652267818574514, "grad_norm": 0.7712494730949402, "learning_rate": 2.8809179542696474e-06, "loss": 0.06341743469238281, "step": 4774 }, { "epoch": 0.6653661255486658, "grad_norm": 0.3066035807132721, "learning_rate": 2.878783537368789e-06, "loss": 0.053363800048828125, "step": 4775 }, { "epoch": 0.6655054692398802, "grad_norm": 1.4441039562225342, "learning_rate": 2.8766495917537985e-06, "loss": 0.10373115539550781, "step": 4776 }, { "epoch": 0.6656448129310946, "grad_norm": 0.6013437509536743, "learning_rate": 2.874516117898792e-06, "loss": 0.07220649719238281, "step": 4777 }, { "epoch": 0.6657841566223089, "grad_norm": 1.0156726837158203, "learning_rate": 2.8723831162777806e-06, "loss": 0.10346221923828125, "step": 4778 }, { "epoch": 0.6659235003135233, "grad_norm": 0.6404106616973877, "learning_rate": 2.8702505873646636e-06, "loss": 0.08525276184082031, "step": 4779 }, { "epoch": 0.6660628440047377, "grad_norm": 1.1624895334243774, "learning_rate": 2.8681185316332453e-06, "loss": 0.11228561401367188, "step": 4780 }, { "epoch": 0.6662021876959521, "grad_norm": 0.3383373022079468, "learning_rate": 2.865986949557218e-06, "loss": 0.05665302276611328, "step": 4781 }, { "epoch": 0.6663415313871665, "grad_norm": 1.6587635278701782, "learning_rate": 2.8638558416101683e-06, "loss": 0.10909605026245117, "step": 4782 }, { "epoch": 0.6664808750783808, "grad_norm": 0.9352493286132812, "learning_rate": 2.8617252082655813e-06, "loss": 0.08488082885742188, "step": 4783 }, { "epoch": 0.6666202187695952, "grad_norm": 0.7884294390678406, "learning_rate": 2.8595950499968352e-06, "loss": 0.08880805969238281, "step": 4784 }, { "epoch": 0.6667595624608096, "grad_norm": 0.985385000705719, "learning_rate": 2.8574653672772068e-06, "loss": 0.09012079238891602, "step": 4785 }, { "epoch": 0.666898906152024, "grad_norm": 1.9411952495574951, "learning_rate": 2.8553361605798545e-06, "loss": 0.11449623107910156, "step": 4786 }, { "epoch": 0.6670382498432383, "grad_norm": 0.7258504629135132, "learning_rate": 2.8532074303778446e-06, "loss": 0.08672428131103516, "step": 4787 }, { "epoch": 0.6671775935344527, "grad_norm": 1.0854904651641846, "learning_rate": 2.8510791771441327e-06, "loss": 0.11011695861816406, "step": 4788 }, { "epoch": 0.6673169372256671, "grad_norm": 0.454956591129303, "learning_rate": 2.8489514013515656e-06, "loss": 0.06454086303710938, "step": 4789 }, { "epoch": 0.6674562809168815, "grad_norm": 0.4540470838546753, "learning_rate": 2.8468241034728878e-06, "loss": 0.0703277587890625, "step": 4790 }, { "epoch": 0.6675956246080959, "grad_norm": 0.8543364405632019, "learning_rate": 2.8446972839807384e-06, "loss": 0.08442306518554688, "step": 4791 }, { "epoch": 0.6677349682993102, "grad_norm": 0.9565549492835999, "learning_rate": 2.8425709433476455e-06, "loss": 0.11561965942382812, "step": 4792 }, { "epoch": 0.6678743119905246, "grad_norm": 0.8850557804107666, "learning_rate": 2.8404450820460326e-06, "loss": 0.09357452392578125, "step": 4793 }, { "epoch": 0.668013655681739, "grad_norm": 0.6043317914009094, "learning_rate": 2.8383197005482187e-06, "loss": 0.08818912506103516, "step": 4794 }, { "epoch": 0.6681529993729534, "grad_norm": 0.5787788033485413, "learning_rate": 2.8361947993264185e-06, "loss": 0.08196640014648438, "step": 4795 }, { "epoch": 0.6682923430641677, "grad_norm": 1.4491320848464966, "learning_rate": 2.834070378852732e-06, "loss": 0.12901687622070312, "step": 4796 }, { "epoch": 0.6684316867553821, "grad_norm": 1.428146481513977, "learning_rate": 2.8319464395991567e-06, "loss": 0.1169729232788086, "step": 4797 }, { "epoch": 0.6685710304465965, "grad_norm": 0.5967636704444885, "learning_rate": 2.829822982037585e-06, "loss": 0.08183097839355469, "step": 4798 }, { "epoch": 0.6687103741378109, "grad_norm": 0.8519421219825745, "learning_rate": 2.8277000066398032e-06, "loss": 0.10699081420898438, "step": 4799 }, { "epoch": 0.6688497178290252, "grad_norm": 0.9343273043632507, "learning_rate": 2.8255775138774827e-06, "loss": 0.08817100524902344, "step": 4800 }, { "epoch": 0.6689890615202396, "grad_norm": 1.1249842643737793, "learning_rate": 2.823455504222198e-06, "loss": 0.11327171325683594, "step": 4801 }, { "epoch": 0.669128405211454, "grad_norm": 0.8920124769210815, "learning_rate": 2.821333978145407e-06, "loss": 0.10587215423583984, "step": 4802 }, { "epoch": 0.6692677489026684, "grad_norm": 0.5253137350082397, "learning_rate": 2.8192129361184685e-06, "loss": 0.07322311401367188, "step": 4803 }, { "epoch": 0.6694070925938828, "grad_norm": 0.6977540254592896, "learning_rate": 2.817092378612625e-06, "loss": 0.08030319213867188, "step": 4804 }, { "epoch": 0.6695464362850972, "grad_norm": 0.529238224029541, "learning_rate": 2.814972306099018e-06, "loss": 0.08096122741699219, "step": 4805 }, { "epoch": 0.6696857799763116, "grad_norm": 1.2090513706207275, "learning_rate": 2.8128527190486823e-06, "loss": 0.12175750732421875, "step": 4806 }, { "epoch": 0.669825123667526, "grad_norm": 0.663022518157959, "learning_rate": 2.8107336179325383e-06, "loss": 0.08791637420654297, "step": 4807 }, { "epoch": 0.6699644673587404, "grad_norm": 1.1191449165344238, "learning_rate": 2.808615003221401e-06, "loss": 0.10379791259765625, "step": 4808 }, { "epoch": 0.6701038110499548, "grad_norm": 0.9050287008285522, "learning_rate": 2.80649687538598e-06, "loss": 0.09360218048095703, "step": 4809 }, { "epoch": 0.6702431547411691, "grad_norm": 0.6915168166160583, "learning_rate": 2.8043792348968767e-06, "loss": 0.09372425079345703, "step": 4810 }, { "epoch": 0.6703824984323835, "grad_norm": 0.5338101387023926, "learning_rate": 2.8022620822245782e-06, "loss": 0.06912040710449219, "step": 4811 }, { "epoch": 0.6705218421235979, "grad_norm": 0.669274628162384, "learning_rate": 2.8001454178394715e-06, "loss": 0.09791755676269531, "step": 4812 }, { "epoch": 0.6706611858148123, "grad_norm": 0.8667890429496765, "learning_rate": 2.7980292422118282e-06, "loss": 0.09391021728515625, "step": 4813 }, { "epoch": 0.6708005295060266, "grad_norm": 1.246782898902893, "learning_rate": 2.795913555811817e-06, "loss": 0.13842391967773438, "step": 4814 }, { "epoch": 0.670939873197241, "grad_norm": 1.3568227291107178, "learning_rate": 2.793798359109492e-06, "loss": 0.11986732482910156, "step": 4815 }, { "epoch": 0.6710792168884554, "grad_norm": 0.7302311658859253, "learning_rate": 2.7916836525748024e-06, "loss": 0.09238815307617188, "step": 4816 }, { "epoch": 0.6712185605796698, "grad_norm": 0.8245944380760193, "learning_rate": 2.7895694366775934e-06, "loss": 0.10763740539550781, "step": 4817 }, { "epoch": 0.6713579042708842, "grad_norm": 0.8425164222717285, "learning_rate": 2.7874557118875863e-06, "loss": 0.11360740661621094, "step": 4818 }, { "epoch": 0.6714972479620985, "grad_norm": 1.2464371919631958, "learning_rate": 2.7853424786744068e-06, "loss": 0.12813949584960938, "step": 4819 }, { "epoch": 0.6716365916533129, "grad_norm": 0.41778644919395447, "learning_rate": 2.7832297375075685e-06, "loss": 0.07376861572265625, "step": 4820 }, { "epoch": 0.6717759353445273, "grad_norm": 0.4582722783088684, "learning_rate": 2.7811174888564713e-06, "loss": 0.06854629516601562, "step": 4821 }, { "epoch": 0.6719152790357417, "grad_norm": 0.9458609819412231, "learning_rate": 2.779005733190412e-06, "loss": 0.1035003662109375, "step": 4822 }, { "epoch": 0.672054622726956, "grad_norm": 0.6470925211906433, "learning_rate": 2.7768944709785705e-06, "loss": 0.0845937728881836, "step": 4823 }, { "epoch": 0.6721939664181704, "grad_norm": 1.3163971900939941, "learning_rate": 2.774783702690025e-06, "loss": 0.08981132507324219, "step": 4824 }, { "epoch": 0.6723333101093848, "grad_norm": 0.6491605043411255, "learning_rate": 2.7726734287937367e-06, "loss": 0.08749675750732422, "step": 4825 }, { "epoch": 0.6724726538005992, "grad_norm": 1.4418766498565674, "learning_rate": 2.770563649758562e-06, "loss": 0.11719894409179688, "step": 4826 }, { "epoch": 0.6726119974918136, "grad_norm": 0.8123962879180908, "learning_rate": 2.768454366053247e-06, "loss": 0.07720470428466797, "step": 4827 }, { "epoch": 0.6727513411830279, "grad_norm": 1.19899582862854, "learning_rate": 2.7663455781464245e-06, "loss": 0.08669281005859375, "step": 4828 }, { "epoch": 0.6728906848742423, "grad_norm": 0.9468966126441956, "learning_rate": 2.764237286506618e-06, "loss": 0.08114433288574219, "step": 4829 }, { "epoch": 0.6730300285654567, "grad_norm": 0.8268569707870483, "learning_rate": 2.7621294916022423e-06, "loss": 0.076629638671875, "step": 4830 }, { "epoch": 0.6731693722566711, "grad_norm": 1.317325472831726, "learning_rate": 2.760022193901605e-06, "loss": 0.09566307067871094, "step": 4831 }, { "epoch": 0.6733087159478854, "grad_norm": 0.9756562113761902, "learning_rate": 2.7579153938728943e-06, "loss": 0.10851573944091797, "step": 4832 }, { "epoch": 0.6734480596390998, "grad_norm": 0.8978959321975708, "learning_rate": 2.7558090919841972e-06, "loss": 0.10559654235839844, "step": 4833 }, { "epoch": 0.6735874033303142, "grad_norm": 0.9850003719329834, "learning_rate": 2.753703288703482e-06, "loss": 0.08271217346191406, "step": 4834 }, { "epoch": 0.6737267470215286, "grad_norm": 0.8053244948387146, "learning_rate": 2.7515979844986148e-06, "loss": 0.11854934692382812, "step": 4835 }, { "epoch": 0.673866090712743, "grad_norm": 0.8326334357261658, "learning_rate": 2.749493179837341e-06, "loss": 0.10269355773925781, "step": 4836 }, { "epoch": 0.6740054344039573, "grad_norm": 0.6919323205947876, "learning_rate": 2.747388875187303e-06, "loss": 0.09783554077148438, "step": 4837 }, { "epoch": 0.6741447780951717, "grad_norm": 0.7482173442840576, "learning_rate": 2.7452850710160305e-06, "loss": 0.07235908508300781, "step": 4838 }, { "epoch": 0.6742841217863861, "grad_norm": 0.7391013503074646, "learning_rate": 2.74318176779094e-06, "loss": 0.08195686340332031, "step": 4839 }, { "epoch": 0.6744234654776005, "grad_norm": 0.598052442073822, "learning_rate": 2.741078965979334e-06, "loss": 0.07777690887451172, "step": 4840 }, { "epoch": 0.6745628091688148, "grad_norm": 0.9068569540977478, "learning_rate": 2.7389766660484103e-06, "loss": 0.1204833984375, "step": 4841 }, { "epoch": 0.6747021528600292, "grad_norm": 1.0717644691467285, "learning_rate": 2.736874868465253e-06, "loss": 0.12575531005859375, "step": 4842 }, { "epoch": 0.6748414965512436, "grad_norm": 0.45943090319633484, "learning_rate": 2.7347735736968318e-06, "loss": 0.0756063461303711, "step": 4843 }, { "epoch": 0.674980840242458, "grad_norm": 0.9058017730712891, "learning_rate": 2.7326727822100047e-06, "loss": 0.08831977844238281, "step": 4844 }, { "epoch": 0.6751201839336725, "grad_norm": 0.773091733455658, "learning_rate": 2.7305724944715218e-06, "loss": 0.09439849853515625, "step": 4845 }, { "epoch": 0.6752595276248868, "grad_norm": 0.8381521105766296, "learning_rate": 2.72847271094802e-06, "loss": 0.08481788635253906, "step": 4846 }, { "epoch": 0.6753988713161012, "grad_norm": 1.367621898651123, "learning_rate": 2.7263734321060198e-06, "loss": 0.09213542938232422, "step": 4847 }, { "epoch": 0.6755382150073156, "grad_norm": 0.9281383156776428, "learning_rate": 2.7242746584119364e-06, "loss": 0.09428596496582031, "step": 4848 }, { "epoch": 0.67567755869853, "grad_norm": 0.9523243308067322, "learning_rate": 2.722176390332071e-06, "loss": 0.08506202697753906, "step": 4849 }, { "epoch": 0.6758169023897443, "grad_norm": 1.3910274505615234, "learning_rate": 2.720078628332605e-06, "loss": 0.1060791015625, "step": 4850 }, { "epoch": 0.6759562460809587, "grad_norm": 1.2556185722351074, "learning_rate": 2.7179813728796156e-06, "loss": 0.08488082885742188, "step": 4851 }, { "epoch": 0.6760955897721731, "grad_norm": 0.7347831726074219, "learning_rate": 2.7158846244390657e-06, "loss": 0.0919189453125, "step": 4852 }, { "epoch": 0.6762349334633875, "grad_norm": 0.5280786752700806, "learning_rate": 2.7137883834768076e-06, "loss": 0.08063507080078125, "step": 4853 }, { "epoch": 0.6763742771546019, "grad_norm": 0.5548141002655029, "learning_rate": 2.7116926504585756e-06, "loss": 0.07814216613769531, "step": 4854 }, { "epoch": 0.6765136208458162, "grad_norm": 0.5815032720565796, "learning_rate": 2.7095974258499914e-06, "loss": 0.08601951599121094, "step": 4855 }, { "epoch": 0.6766529645370306, "grad_norm": 0.8434306383132935, "learning_rate": 2.7075027101165706e-06, "loss": 0.08471393585205078, "step": 4856 }, { "epoch": 0.676792308228245, "grad_norm": 0.40128040313720703, "learning_rate": 2.7054085037237066e-06, "loss": 0.06995391845703125, "step": 4857 }, { "epoch": 0.6769316519194594, "grad_norm": 0.986789345741272, "learning_rate": 2.7033148071366866e-06, "loss": 0.1290416717529297, "step": 4858 }, { "epoch": 0.6770709956106737, "grad_norm": 0.8090910315513611, "learning_rate": 2.701221620820685e-06, "loss": 0.08688926696777344, "step": 4859 }, { "epoch": 0.6772103393018881, "grad_norm": 0.7125062346458435, "learning_rate": 2.6991289452407564e-06, "loss": 0.08444881439208984, "step": 4860 }, { "epoch": 0.6773496829931025, "grad_norm": 0.9453529715538025, "learning_rate": 2.697036780861845e-06, "loss": 0.10524559020996094, "step": 4861 }, { "epoch": 0.6774890266843169, "grad_norm": 0.9194634556770325, "learning_rate": 2.694945128148784e-06, "loss": 0.09344100952148438, "step": 4862 }, { "epoch": 0.6776283703755313, "grad_norm": 0.6666544079780579, "learning_rate": 2.692853987566291e-06, "loss": 0.08400917053222656, "step": 4863 }, { "epoch": 0.6777677140667456, "grad_norm": 1.0617913007736206, "learning_rate": 2.690763359578969e-06, "loss": 0.11961936950683594, "step": 4864 }, { "epoch": 0.67790705775796, "grad_norm": 1.744156002998352, "learning_rate": 2.6886732446513066e-06, "loss": 0.14528656005859375, "step": 4865 }, { "epoch": 0.6780464014491744, "grad_norm": 0.6291934251785278, "learning_rate": 2.68658364324768e-06, "loss": 0.07809066772460938, "step": 4866 }, { "epoch": 0.6781857451403888, "grad_norm": 0.9507735371589661, "learning_rate": 2.684494555832353e-06, "loss": 0.08049392700195312, "step": 4867 }, { "epoch": 0.6783250888316031, "grad_norm": 1.1454229354858398, "learning_rate": 2.6824059828694715e-06, "loss": 0.11737060546875, "step": 4868 }, { "epoch": 0.6784644325228175, "grad_norm": 1.139288306236267, "learning_rate": 2.680317924823068e-06, "loss": 0.088531494140625, "step": 4869 }, { "epoch": 0.6786037762140319, "grad_norm": 1.9863158464431763, "learning_rate": 2.6782303821570644e-06, "loss": 0.1004037857055664, "step": 4870 }, { "epoch": 0.6787431199052463, "grad_norm": 1.461610198020935, "learning_rate": 2.676143355335263e-06, "loss": 0.1124725341796875, "step": 4871 }, { "epoch": 0.6788824635964607, "grad_norm": 0.7863550186157227, "learning_rate": 2.6740568448213523e-06, "loss": 0.08798027038574219, "step": 4872 }, { "epoch": 0.679021807287675, "grad_norm": 0.6482836008071899, "learning_rate": 2.6719708510789077e-06, "loss": 0.09808540344238281, "step": 4873 }, { "epoch": 0.6791611509788894, "grad_norm": 0.8616598844528198, "learning_rate": 2.669885374571392e-06, "loss": 0.1130218505859375, "step": 4874 }, { "epoch": 0.6793004946701038, "grad_norm": 0.7991536259651184, "learning_rate": 2.667800415762149e-06, "loss": 0.12185859680175781, "step": 4875 }, { "epoch": 0.6794398383613182, "grad_norm": 0.7310281991958618, "learning_rate": 2.665715975114407e-06, "loss": 0.0830841064453125, "step": 4876 }, { "epoch": 0.6795791820525325, "grad_norm": 0.7052139639854431, "learning_rate": 2.6636320530912817e-06, "loss": 0.08011817932128906, "step": 4877 }, { "epoch": 0.6797185257437469, "grad_norm": 0.9332994222640991, "learning_rate": 2.6615486501557765e-06, "loss": 0.09555435180664062, "step": 4878 }, { "epoch": 0.6798578694349613, "grad_norm": 1.7477976083755493, "learning_rate": 2.659465766770772e-06, "loss": 0.14669227600097656, "step": 4879 }, { "epoch": 0.6799972131261757, "grad_norm": 0.569505512714386, "learning_rate": 2.6573834033990404e-06, "loss": 0.08265876770019531, "step": 4880 }, { "epoch": 0.68013655681739, "grad_norm": 0.699120044708252, "learning_rate": 2.655301560503234e-06, "loss": 0.083587646484375, "step": 4881 }, { "epoch": 0.6802759005086044, "grad_norm": 0.5418126583099365, "learning_rate": 2.6532202385458875e-06, "loss": 0.07695293426513672, "step": 4882 }, { "epoch": 0.6804152441998188, "grad_norm": 0.7032243609428406, "learning_rate": 2.6511394379894274e-06, "loss": 0.08862686157226562, "step": 4883 }, { "epoch": 0.6805545878910332, "grad_norm": 1.0873680114746094, "learning_rate": 2.649059159296158e-06, "loss": 0.10976123809814453, "step": 4884 }, { "epoch": 0.6806939315822477, "grad_norm": 1.0109727382659912, "learning_rate": 2.6469794029282726e-06, "loss": 0.08924293518066406, "step": 4885 }, { "epoch": 0.680833275273462, "grad_norm": 0.5908765196800232, "learning_rate": 2.6449001693478438e-06, "loss": 0.07377243041992188, "step": 4886 }, { "epoch": 0.6809726189646764, "grad_norm": 1.8175169229507446, "learning_rate": 2.642821459016827e-06, "loss": 0.1403636932373047, "step": 4887 }, { "epoch": 0.6811119626558908, "grad_norm": 0.7824096083641052, "learning_rate": 2.6407432723970694e-06, "loss": 0.08083534240722656, "step": 4888 }, { "epoch": 0.6812513063471052, "grad_norm": 0.9495210647583008, "learning_rate": 2.6386656099502917e-06, "loss": 0.09253311157226562, "step": 4889 }, { "epoch": 0.6813906500383196, "grad_norm": 0.46549826860427856, "learning_rate": 2.6365884721381045e-06, "loss": 0.07090473175048828, "step": 4890 }, { "epoch": 0.6815299937295339, "grad_norm": 0.43383997678756714, "learning_rate": 2.6345118594220044e-06, "loss": 0.08213138580322266, "step": 4891 }, { "epoch": 0.6816693374207483, "grad_norm": 0.6675127744674683, "learning_rate": 2.632435772263363e-06, "loss": 0.07434463500976562, "step": 4892 }, { "epoch": 0.6818086811119627, "grad_norm": 0.8252811431884766, "learning_rate": 2.6303602111234394e-06, "loss": 0.0945730209350586, "step": 4893 }, { "epoch": 0.6819480248031771, "grad_norm": 0.7861129641532898, "learning_rate": 2.6282851764633765e-06, "loss": 0.09316444396972656, "step": 4894 }, { "epoch": 0.6820873684943914, "grad_norm": 0.604897141456604, "learning_rate": 2.626210668744203e-06, "loss": 0.09289932250976562, "step": 4895 }, { "epoch": 0.6822267121856058, "grad_norm": 0.6582683324813843, "learning_rate": 2.624136688426824e-06, "loss": 0.07454681396484375, "step": 4896 }, { "epoch": 0.6823660558768202, "grad_norm": 0.7638270854949951, "learning_rate": 2.6220632359720287e-06, "loss": 0.09719276428222656, "step": 4897 }, { "epoch": 0.6825053995680346, "grad_norm": 0.6273645162582397, "learning_rate": 2.6199903118404934e-06, "loss": 0.08830642700195312, "step": 4898 }, { "epoch": 0.682644743259249, "grad_norm": 0.3732198476791382, "learning_rate": 2.617917916492776e-06, "loss": 0.06409263610839844, "step": 4899 }, { "epoch": 0.6827840869504633, "grad_norm": 0.7326235771179199, "learning_rate": 2.615846050389312e-06, "loss": 0.0877227783203125, "step": 4900 }, { "epoch": 0.6829234306416777, "grad_norm": 0.677246630191803, "learning_rate": 2.6137747139904262e-06, "loss": 0.087127685546875, "step": 4901 }, { "epoch": 0.6830627743328921, "grad_norm": 0.7425593733787537, "learning_rate": 2.611703907756319e-06, "loss": 0.09570503234863281, "step": 4902 }, { "epoch": 0.6832021180241065, "grad_norm": 0.731720507144928, "learning_rate": 2.6096336321470796e-06, "loss": 0.08262157440185547, "step": 4903 }, { "epoch": 0.6833414617153208, "grad_norm": 0.562707245349884, "learning_rate": 2.6075638876226715e-06, "loss": 0.07214546203613281, "step": 4904 }, { "epoch": 0.6834808054065352, "grad_norm": 0.5783421993255615, "learning_rate": 2.605494674642948e-06, "loss": 0.07075309753417969, "step": 4905 }, { "epoch": 0.6836201490977496, "grad_norm": 0.7518441677093506, "learning_rate": 2.603425993667642e-06, "loss": 0.1015167236328125, "step": 4906 }, { "epoch": 0.683759492788964, "grad_norm": 0.7395201921463013, "learning_rate": 2.6013578451563653e-06, "loss": 0.08829402923583984, "step": 4907 }, { "epoch": 0.6838988364801784, "grad_norm": 0.42543572187423706, "learning_rate": 2.599290229568612e-06, "loss": 0.062152862548828125, "step": 4908 }, { "epoch": 0.6840381801713927, "grad_norm": 0.685438334941864, "learning_rate": 2.59722314736376e-06, "loss": 0.085174560546875, "step": 4909 }, { "epoch": 0.6841775238626071, "grad_norm": 1.2556304931640625, "learning_rate": 2.5951565990010706e-06, "loss": 0.11131668090820312, "step": 4910 }, { "epoch": 0.6843168675538215, "grad_norm": 0.39622998237609863, "learning_rate": 2.5930905849396792e-06, "loss": 0.058495521545410156, "step": 4911 }, { "epoch": 0.6844562112450359, "grad_norm": 1.2602699995040894, "learning_rate": 2.5910251056386113e-06, "loss": 0.1051025390625, "step": 4912 }, { "epoch": 0.6845955549362502, "grad_norm": 1.2307716608047485, "learning_rate": 2.5889601615567657e-06, "loss": 0.14907264709472656, "step": 4913 }, { "epoch": 0.6847348986274646, "grad_norm": 0.4627978801727295, "learning_rate": 2.5868957531529283e-06, "loss": 0.06547260284423828, "step": 4914 }, { "epoch": 0.684874242318679, "grad_norm": 1.1658315658569336, "learning_rate": 2.584831880885761e-06, "loss": 0.13507843017578125, "step": 4915 }, { "epoch": 0.6850135860098934, "grad_norm": 2.086185932159424, "learning_rate": 2.582768545213811e-06, "loss": 0.13948440551757812, "step": 4916 }, { "epoch": 0.6851529297011077, "grad_norm": 0.6180437803268433, "learning_rate": 2.5807057465955065e-06, "loss": 0.09046745300292969, "step": 4917 }, { "epoch": 0.6852922733923221, "grad_norm": 0.556489884853363, "learning_rate": 2.5786434854891482e-06, "loss": 0.07906913757324219, "step": 4918 }, { "epoch": 0.6854316170835365, "grad_norm": 0.33118903636932373, "learning_rate": 2.576581762352928e-06, "loss": 0.06758880615234375, "step": 4919 }, { "epoch": 0.6855709607747509, "grad_norm": 0.4765889346599579, "learning_rate": 2.574520577644913e-06, "loss": 0.0757293701171875, "step": 4920 }, { "epoch": 0.6857103044659653, "grad_norm": 1.4520190954208374, "learning_rate": 2.5724599318230504e-06, "loss": 0.0939016342163086, "step": 4921 }, { "epoch": 0.6858496481571796, "grad_norm": 0.4850568473339081, "learning_rate": 2.570399825345169e-06, "loss": 0.06742286682128906, "step": 4922 }, { "epoch": 0.685988991848394, "grad_norm": 1.0115488767623901, "learning_rate": 2.5683402586689788e-06, "loss": 0.08514595031738281, "step": 4923 }, { "epoch": 0.6861283355396084, "grad_norm": 0.6644041538238525, "learning_rate": 2.566281232252068e-06, "loss": 0.0893707275390625, "step": 4924 }, { "epoch": 0.6862676792308228, "grad_norm": 0.7775734663009644, "learning_rate": 2.564222746551903e-06, "loss": 0.0960988998413086, "step": 4925 }, { "epoch": 0.6864070229220373, "grad_norm": 1.333435297012329, "learning_rate": 2.562164802025834e-06, "loss": 0.11841964721679688, "step": 4926 }, { "epoch": 0.6865463666132516, "grad_norm": 1.4793559312820435, "learning_rate": 2.5601073991310903e-06, "loss": 0.11116600036621094, "step": 4927 }, { "epoch": 0.686685710304466, "grad_norm": 0.8104956150054932, "learning_rate": 2.5580505383247796e-06, "loss": 0.08358001708984375, "step": 4928 }, { "epoch": 0.6868250539956804, "grad_norm": 1.1661444902420044, "learning_rate": 2.5559942200638866e-06, "loss": 0.11224746704101562, "step": 4929 }, { "epoch": 0.6869643976868948, "grad_norm": 1.0963927507400513, "learning_rate": 2.5539384448052797e-06, "loss": 0.08185482025146484, "step": 4930 }, { "epoch": 0.6871037413781091, "grad_norm": 0.8783205151557922, "learning_rate": 2.5518832130057082e-06, "loss": 0.09033203125, "step": 4931 }, { "epoch": 0.6872430850693235, "grad_norm": 0.7792655825614929, "learning_rate": 2.5498285251217938e-06, "loss": 0.08570575714111328, "step": 4932 }, { "epoch": 0.6873824287605379, "grad_norm": 2.099876642227173, "learning_rate": 2.5477743816100443e-06, "loss": 0.13087749481201172, "step": 4933 }, { "epoch": 0.6875217724517523, "grad_norm": 1.6808431148529053, "learning_rate": 2.5457207829268394e-06, "loss": 0.08675384521484375, "step": 4934 }, { "epoch": 0.6876611161429667, "grad_norm": 2.4423117637634277, "learning_rate": 2.5436677295284474e-06, "loss": 0.11910629272460938, "step": 4935 }, { "epoch": 0.687800459834181, "grad_norm": 1.0564205646514893, "learning_rate": 2.5416152218710044e-06, "loss": 0.08564567565917969, "step": 4936 }, { "epoch": 0.6879398035253954, "grad_norm": 0.7503150701522827, "learning_rate": 2.539563260410533e-06, "loss": 0.09080886840820312, "step": 4937 }, { "epoch": 0.6880791472166098, "grad_norm": 0.8772290945053101, "learning_rate": 2.5375118456029345e-06, "loss": 0.09179496765136719, "step": 4938 }, { "epoch": 0.6882184909078242, "grad_norm": 1.4153661727905273, "learning_rate": 2.5354609779039844e-06, "loss": 0.10867500305175781, "step": 4939 }, { "epoch": 0.6883578345990385, "grad_norm": 1.4380228519439697, "learning_rate": 2.533410657769337e-06, "loss": 0.1267547607421875, "step": 4940 }, { "epoch": 0.6884971782902529, "grad_norm": 1.2612251043319702, "learning_rate": 2.531360885654528e-06, "loss": 0.09645843505859375, "step": 4941 }, { "epoch": 0.6886365219814673, "grad_norm": 0.5271649956703186, "learning_rate": 2.529311662014972e-06, "loss": 0.08286094665527344, "step": 4942 }, { "epoch": 0.6887758656726817, "grad_norm": 0.932640016078949, "learning_rate": 2.5272629873059564e-06, "loss": 0.08944320678710938, "step": 4943 }, { "epoch": 0.688915209363896, "grad_norm": 0.9565929770469666, "learning_rate": 2.5252148619826535e-06, "loss": 0.06742286682128906, "step": 4944 }, { "epoch": 0.6890545530551104, "grad_norm": 0.7364131808280945, "learning_rate": 2.5231672865001056e-06, "loss": 0.09723472595214844, "step": 4945 }, { "epoch": 0.6891938967463248, "grad_norm": 0.48223716020584106, "learning_rate": 2.5211202613132413e-06, "loss": 0.06611442565917969, "step": 4946 }, { "epoch": 0.6893332404375392, "grad_norm": 1.395226001739502, "learning_rate": 2.5190737868768592e-06, "loss": 0.1139373779296875, "step": 4947 }, { "epoch": 0.6894725841287536, "grad_norm": 0.6748273968696594, "learning_rate": 2.5170278636456413e-06, "loss": 0.10214805603027344, "step": 4948 }, { "epoch": 0.6896119278199679, "grad_norm": 0.6655899882316589, "learning_rate": 2.5149824920741493e-06, "loss": 0.08542156219482422, "step": 4949 }, { "epoch": 0.6897512715111823, "grad_norm": 0.6007693409919739, "learning_rate": 2.51293767261681e-06, "loss": 0.08535385131835938, "step": 4950 }, { "epoch": 0.6898906152023967, "grad_norm": 0.718933641910553, "learning_rate": 2.5108934057279376e-06, "loss": 0.08695220947265625, "step": 4951 }, { "epoch": 0.6900299588936111, "grad_norm": 0.6877939701080322, "learning_rate": 2.5088496918617243e-06, "loss": 0.06530380249023438, "step": 4952 }, { "epoch": 0.6901693025848255, "grad_norm": 0.44536924362182617, "learning_rate": 2.5068065314722378e-06, "loss": 0.07214927673339844, "step": 4953 }, { "epoch": 0.6903086462760398, "grad_norm": 0.482807993888855, "learning_rate": 2.504763925013419e-06, "loss": 0.06805229187011719, "step": 4954 }, { "epoch": 0.6904479899672542, "grad_norm": 1.4450145959854126, "learning_rate": 2.5027218729390867e-06, "loss": 0.10350990295410156, "step": 4955 }, { "epoch": 0.6905873336584686, "grad_norm": 0.9201472401618958, "learning_rate": 2.500680375702943e-06, "loss": 0.0856475830078125, "step": 4956 }, { "epoch": 0.690726677349683, "grad_norm": 0.8034936189651489, "learning_rate": 2.498639433758557e-06, "loss": 0.08782196044921875, "step": 4957 }, { "epoch": 0.6908660210408973, "grad_norm": 0.431333065032959, "learning_rate": 2.4965990475593814e-06, "loss": 0.06488990783691406, "step": 4958 }, { "epoch": 0.6910053647321117, "grad_norm": 1.2362797260284424, "learning_rate": 2.494559217558746e-06, "loss": 0.11938858032226562, "step": 4959 }, { "epoch": 0.6911447084233261, "grad_norm": 0.5537431836128235, "learning_rate": 2.492519944209853e-06, "loss": 0.07487010955810547, "step": 4960 }, { "epoch": 0.6912840521145405, "grad_norm": 0.5638784766197205, "learning_rate": 2.4904812279657792e-06, "loss": 0.08153152465820312, "step": 4961 }, { "epoch": 0.6914233958057548, "grad_norm": 1.3836572170257568, "learning_rate": 2.488443069279483e-06, "loss": 0.1349620819091797, "step": 4962 }, { "epoch": 0.6915627394969692, "grad_norm": 0.5449133515357971, "learning_rate": 2.4864054686037993e-06, "loss": 0.07941818237304688, "step": 4963 }, { "epoch": 0.6917020831881836, "grad_norm": 0.37328392267227173, "learning_rate": 2.484368426391432e-06, "loss": 0.06599998474121094, "step": 4964 }, { "epoch": 0.691841426879398, "grad_norm": 0.8601976037025452, "learning_rate": 2.482331943094969e-06, "loss": 0.10688209533691406, "step": 4965 }, { "epoch": 0.6919807705706125, "grad_norm": 1.09879732131958, "learning_rate": 2.480296019166868e-06, "loss": 0.11005210876464844, "step": 4966 }, { "epoch": 0.6921201142618268, "grad_norm": 0.4744446575641632, "learning_rate": 2.478260655059467e-06, "loss": 0.06387901306152344, "step": 4967 }, { "epoch": 0.6922594579530412, "grad_norm": 0.9280557036399841, "learning_rate": 2.4762258512249745e-06, "loss": 0.12471580505371094, "step": 4968 }, { "epoch": 0.6923988016442556, "grad_norm": 0.32965558767318726, "learning_rate": 2.4741916081154786e-06, "loss": 0.05703544616699219, "step": 4969 }, { "epoch": 0.69253814533547, "grad_norm": 0.7049430012702942, "learning_rate": 2.472157926182945e-06, "loss": 0.08662033081054688, "step": 4970 }, { "epoch": 0.6926774890266844, "grad_norm": 0.6615204215049744, "learning_rate": 2.470124805879208e-06, "loss": 0.06768131256103516, "step": 4971 }, { "epoch": 0.6928168327178987, "grad_norm": 1.047497272491455, "learning_rate": 2.468092247655979e-06, "loss": 0.10082626342773438, "step": 4972 }, { "epoch": 0.6929561764091131, "grad_norm": 1.2424119710922241, "learning_rate": 2.466060251964848e-06, "loss": 0.09906578063964844, "step": 4973 }, { "epoch": 0.6930955201003275, "grad_norm": 1.537980079650879, "learning_rate": 2.464028819257281e-06, "loss": 0.08895492553710938, "step": 4974 }, { "epoch": 0.6932348637915419, "grad_norm": 0.603426992893219, "learning_rate": 2.4619979499846127e-06, "loss": 0.09921455383300781, "step": 4975 }, { "epoch": 0.6933742074827562, "grad_norm": 0.5634564161300659, "learning_rate": 2.459967644598054e-06, "loss": 0.08388328552246094, "step": 4976 }, { "epoch": 0.6935135511739706, "grad_norm": 0.7145993709564209, "learning_rate": 2.457937903548695e-06, "loss": 0.09768486022949219, "step": 4977 }, { "epoch": 0.693652894865185, "grad_norm": 0.719183087348938, "learning_rate": 2.4559087272875e-06, "loss": 0.08734130859375, "step": 4978 }, { "epoch": 0.6937922385563994, "grad_norm": 0.3717139959335327, "learning_rate": 2.4538801162653002e-06, "loss": 0.06197643280029297, "step": 4979 }, { "epoch": 0.6939315822476138, "grad_norm": 1.1658236980438232, "learning_rate": 2.451852070932811e-06, "loss": 0.09628105163574219, "step": 4980 }, { "epoch": 0.6940709259388281, "grad_norm": 0.6189127564430237, "learning_rate": 2.4498245917406195e-06, "loss": 0.07534122467041016, "step": 4981 }, { "epoch": 0.6942102696300425, "grad_norm": 0.8106676340103149, "learning_rate": 2.4477976791391784e-06, "loss": 0.09098434448242188, "step": 4982 }, { "epoch": 0.6943496133212569, "grad_norm": 1.5778454542160034, "learning_rate": 2.445771333578825e-06, "loss": 0.11426734924316406, "step": 4983 }, { "epoch": 0.6944889570124713, "grad_norm": 0.879702627658844, "learning_rate": 2.443745555509768e-06, "loss": 0.09824371337890625, "step": 4984 }, { "epoch": 0.6946283007036856, "grad_norm": 0.590299665927887, "learning_rate": 2.4417203453820892e-06, "loss": 0.08037567138671875, "step": 4985 }, { "epoch": 0.6947676443949, "grad_norm": 0.8189776539802551, "learning_rate": 2.4396957036457443e-06, "loss": 0.08878135681152344, "step": 4986 }, { "epoch": 0.6949069880861144, "grad_norm": 0.7393333315849304, "learning_rate": 2.437671630750558e-06, "loss": 0.08381462097167969, "step": 4987 }, { "epoch": 0.6950463317773288, "grad_norm": 0.9540377259254456, "learning_rate": 2.4356481271462396e-06, "loss": 0.08040332794189453, "step": 4988 }, { "epoch": 0.6951856754685432, "grad_norm": 0.42229515314102173, "learning_rate": 2.4336251932823594e-06, "loss": 0.06859111785888672, "step": 4989 }, { "epoch": 0.6953250191597575, "grad_norm": 1.5834615230560303, "learning_rate": 2.4316028296083705e-06, "loss": 0.1155548095703125, "step": 4990 }, { "epoch": 0.6954643628509719, "grad_norm": 0.40606099367141724, "learning_rate": 2.4295810365735974e-06, "loss": 0.06864166259765625, "step": 4991 }, { "epoch": 0.6956037065421863, "grad_norm": 0.4538690745830536, "learning_rate": 2.427559814627234e-06, "loss": 0.08537101745605469, "step": 4992 }, { "epoch": 0.6957430502334007, "grad_norm": 0.9207727313041687, "learning_rate": 2.425539164218348e-06, "loss": 0.11437606811523438, "step": 4993 }, { "epoch": 0.695882393924615, "grad_norm": 0.6320446729660034, "learning_rate": 2.4235190857958834e-06, "loss": 0.08827400207519531, "step": 4994 }, { "epoch": 0.6960217376158294, "grad_norm": 1.4698179960250854, "learning_rate": 2.4214995798086584e-06, "loss": 0.09047126770019531, "step": 4995 }, { "epoch": 0.6961610813070438, "grad_norm": 2.3340656757354736, "learning_rate": 2.4194806467053584e-06, "loss": 0.18459510803222656, "step": 4996 }, { "epoch": 0.6963004249982582, "grad_norm": 0.8429393768310547, "learning_rate": 2.417462286934543e-06, "loss": 0.10172080993652344, "step": 4997 }, { "epoch": 0.6964397686894725, "grad_norm": 0.2907165586948395, "learning_rate": 2.4154445009446457e-06, "loss": 0.057417869567871094, "step": 4998 }, { "epoch": 0.6965791123806869, "grad_norm": 1.2220509052276611, "learning_rate": 2.413427289183977e-06, "loss": 0.11089801788330078, "step": 4999 }, { "epoch": 0.6967184560719013, "grad_norm": 0.8702083230018616, "learning_rate": 2.41141065210071e-06, "loss": 0.11381149291992188, "step": 5000 }, { "epoch": 0.6968577997631157, "grad_norm": 1.1824312210083008, "learning_rate": 2.4093945901428977e-06, "loss": 0.11737060546875, "step": 5001 }, { "epoch": 0.6969971434543301, "grad_norm": 0.7002254724502563, "learning_rate": 2.4073791037584648e-06, "loss": 0.06575202941894531, "step": 5002 }, { "epoch": 0.6971364871455444, "grad_norm": 0.7351788878440857, "learning_rate": 2.4053641933952043e-06, "loss": 0.07583999633789062, "step": 5003 }, { "epoch": 0.6972758308367588, "grad_norm": 1.1048165559768677, "learning_rate": 2.403349859500782e-06, "loss": 0.10335159301757812, "step": 5004 }, { "epoch": 0.6974151745279732, "grad_norm": 0.7558220028877258, "learning_rate": 2.4013361025227384e-06, "loss": 0.07575798034667969, "step": 5005 }, { "epoch": 0.6975545182191877, "grad_norm": 1.0064090490341187, "learning_rate": 2.3993229229084856e-06, "loss": 0.10323143005371094, "step": 5006 }, { "epoch": 0.6976938619104021, "grad_norm": 0.4954807162284851, "learning_rate": 2.3973103211053052e-06, "loss": 0.07344245910644531, "step": 5007 }, { "epoch": 0.6978332056016164, "grad_norm": 0.6984603404998779, "learning_rate": 2.3952982975603494e-06, "loss": 0.09528732299804688, "step": 5008 }, { "epoch": 0.6979725492928308, "grad_norm": 0.7050738334655762, "learning_rate": 2.393286852720645e-06, "loss": 0.09552955627441406, "step": 5009 }, { "epoch": 0.6981118929840452, "grad_norm": 0.7796118259429932, "learning_rate": 2.391275987033092e-06, "loss": 0.08836174011230469, "step": 5010 }, { "epoch": 0.6982512366752596, "grad_norm": 0.650277316570282, "learning_rate": 2.3892657009444543e-06, "loss": 0.07511520385742188, "step": 5011 }, { "epoch": 0.698390580366474, "grad_norm": 0.5035979151725769, "learning_rate": 2.387255994901376e-06, "loss": 0.07013130187988281, "step": 5012 }, { "epoch": 0.6985299240576883, "grad_norm": 0.7851938605308533, "learning_rate": 2.3852468693503635e-06, "loss": 0.08135604858398438, "step": 5013 }, { "epoch": 0.6986692677489027, "grad_norm": 0.9863877892494202, "learning_rate": 2.3832383247378025e-06, "loss": 0.08626556396484375, "step": 5014 }, { "epoch": 0.6988086114401171, "grad_norm": 0.8321183323860168, "learning_rate": 2.3812303615099423e-06, "loss": 0.07726860046386719, "step": 5015 }, { "epoch": 0.6989479551313315, "grad_norm": 0.951352596282959, "learning_rate": 2.3792229801129086e-06, "loss": 0.11095046997070312, "step": 5016 }, { "epoch": 0.6990872988225458, "grad_norm": 0.9004004597663879, "learning_rate": 2.3772161809926973e-06, "loss": 0.09151077270507812, "step": 5017 }, { "epoch": 0.6992266425137602, "grad_norm": 0.7672640085220337, "learning_rate": 2.375209964595171e-06, "loss": 0.08631372451782227, "step": 5018 }, { "epoch": 0.6993659862049746, "grad_norm": 0.9354745745658875, "learning_rate": 2.373204331366064e-06, "loss": 0.09110450744628906, "step": 5019 }, { "epoch": 0.699505329896189, "grad_norm": 1.0499955415725708, "learning_rate": 2.3711992817509854e-06, "loss": 0.09551239013671875, "step": 5020 }, { "epoch": 0.6996446735874033, "grad_norm": 0.4189526438713074, "learning_rate": 2.3691948161954083e-06, "loss": 0.06505966186523438, "step": 5021 }, { "epoch": 0.6997840172786177, "grad_norm": 0.8748196959495544, "learning_rate": 2.3671909351446802e-06, "loss": 0.07543754577636719, "step": 5022 }, { "epoch": 0.6999233609698321, "grad_norm": 2.1418826580047607, "learning_rate": 2.365187639044021e-06, "loss": 0.10402774810791016, "step": 5023 }, { "epoch": 0.7000627046610465, "grad_norm": 0.7548609972000122, "learning_rate": 2.363184928338514e-06, "loss": 0.09225845336914062, "step": 5024 }, { "epoch": 0.7002020483522609, "grad_norm": 1.2059814929962158, "learning_rate": 2.3611828034731144e-06, "loss": 0.11017417907714844, "step": 5025 }, { "epoch": 0.7003413920434752, "grad_norm": 1.423546314239502, "learning_rate": 2.359181264892651e-06, "loss": 0.08801078796386719, "step": 5026 }, { "epoch": 0.7004807357346896, "grad_norm": 0.8672583103179932, "learning_rate": 2.3571803130418215e-06, "loss": 0.10976028442382812, "step": 5027 }, { "epoch": 0.700620079425904, "grad_norm": 2.1373188495635986, "learning_rate": 2.3551799483651894e-06, "loss": 0.1470966339111328, "step": 5028 }, { "epoch": 0.7007594231171184, "grad_norm": 1.029261589050293, "learning_rate": 2.3531801713071887e-06, "loss": 0.12253570556640625, "step": 5029 }, { "epoch": 0.7008987668083327, "grad_norm": 0.6655247807502747, "learning_rate": 2.351180982312127e-06, "loss": 0.0775289535522461, "step": 5030 }, { "epoch": 0.7010381104995471, "grad_norm": 1.434198021888733, "learning_rate": 2.349182381824178e-06, "loss": 0.09345054626464844, "step": 5031 }, { "epoch": 0.7011774541907615, "grad_norm": 0.5566481947898865, "learning_rate": 2.3471843702873835e-06, "loss": 0.06783103942871094, "step": 5032 }, { "epoch": 0.7013167978819759, "grad_norm": 0.7307983040809631, "learning_rate": 2.345186948145659e-06, "loss": 0.07930374145507812, "step": 5033 }, { "epoch": 0.7014561415731903, "grad_norm": 0.7704582810401917, "learning_rate": 2.343190115842782e-06, "loss": 0.09730339050292969, "step": 5034 }, { "epoch": 0.7015954852644046, "grad_norm": 1.1352519989013672, "learning_rate": 2.341193873822407e-06, "loss": 0.09885120391845703, "step": 5035 }, { "epoch": 0.701734828955619, "grad_norm": 0.6511200070381165, "learning_rate": 2.33919822252805e-06, "loss": 0.08132743835449219, "step": 5036 }, { "epoch": 0.7018741726468334, "grad_norm": 1.2822527885437012, "learning_rate": 2.337203162403101e-06, "loss": 0.09883928298950195, "step": 5037 }, { "epoch": 0.7020135163380478, "grad_norm": 0.9443262815475464, "learning_rate": 2.335208693890819e-06, "loss": 0.11308479309082031, "step": 5038 }, { "epoch": 0.7021528600292621, "grad_norm": 0.9654844403266907, "learning_rate": 2.3332148174343257e-06, "loss": 0.0950469970703125, "step": 5039 }, { "epoch": 0.7022922037204765, "grad_norm": 0.7549152970314026, "learning_rate": 2.331221533476615e-06, "loss": 0.07228851318359375, "step": 5040 }, { "epoch": 0.7024315474116909, "grad_norm": 0.9885104298591614, "learning_rate": 2.3292288424605503e-06, "loss": 0.07684135437011719, "step": 5041 }, { "epoch": 0.7025708911029053, "grad_norm": 0.6917967200279236, "learning_rate": 2.327236744828864e-06, "loss": 0.08290672302246094, "step": 5042 }, { "epoch": 0.7027102347941196, "grad_norm": 0.8283496499061584, "learning_rate": 2.325245241024151e-06, "loss": 0.07598495483398438, "step": 5043 }, { "epoch": 0.702849578485334, "grad_norm": 1.0502452850341797, "learning_rate": 2.323254331488881e-06, "loss": 0.08062744140625, "step": 5044 }, { "epoch": 0.7029889221765484, "grad_norm": 0.985828161239624, "learning_rate": 2.3212640166653868e-06, "loss": 0.0966482162475586, "step": 5045 }, { "epoch": 0.7031282658677629, "grad_norm": 0.46123725175857544, "learning_rate": 2.319274296995872e-06, "loss": 0.07500934600830078, "step": 5046 }, { "epoch": 0.7032676095589773, "grad_norm": 0.8440926671028137, "learning_rate": 2.3172851729224056e-06, "loss": 0.09749603271484375, "step": 5047 }, { "epoch": 0.7034069532501916, "grad_norm": 0.9278401136398315, "learning_rate": 2.315296644886926e-06, "loss": 0.0936899185180664, "step": 5048 }, { "epoch": 0.703546296941406, "grad_norm": 1.2596038579940796, "learning_rate": 2.313308713331242e-06, "loss": 0.12987709045410156, "step": 5049 }, { "epoch": 0.7036856406326204, "grad_norm": 2.103389024734497, "learning_rate": 2.3113213786970205e-06, "loss": 0.10523605346679688, "step": 5050 }, { "epoch": 0.7038249843238348, "grad_norm": 1.1424753665924072, "learning_rate": 2.3093346414258054e-06, "loss": 0.12636566162109375, "step": 5051 }, { "epoch": 0.7039643280150492, "grad_norm": 1.2354451417922974, "learning_rate": 2.3073485019590043e-06, "loss": 0.10692405700683594, "step": 5052 }, { "epoch": 0.7041036717062635, "grad_norm": 0.76384437084198, "learning_rate": 2.305362960737893e-06, "loss": 0.10122108459472656, "step": 5053 }, { "epoch": 0.7042430153974779, "grad_norm": 0.8498771786689758, "learning_rate": 2.3033780182036127e-06, "loss": 0.12047386169433594, "step": 5054 }, { "epoch": 0.7043823590886923, "grad_norm": 0.754266619682312, "learning_rate": 2.301393674797169e-06, "loss": 0.09636688232421875, "step": 5055 }, { "epoch": 0.7045217027799067, "grad_norm": 0.6483745574951172, "learning_rate": 2.2994099309594437e-06, "loss": 0.08213615417480469, "step": 5056 }, { "epoch": 0.704661046471121, "grad_norm": 0.6875750422477722, "learning_rate": 2.297426787131174e-06, "loss": 0.06949615478515625, "step": 5057 }, { "epoch": 0.7048003901623354, "grad_norm": 1.0357415676116943, "learning_rate": 2.2954442437529705e-06, "loss": 0.11544227600097656, "step": 5058 }, { "epoch": 0.7049397338535498, "grad_norm": 0.5352867245674133, "learning_rate": 2.293462301265313e-06, "loss": 0.07347869873046875, "step": 5059 }, { "epoch": 0.7050790775447642, "grad_norm": 0.3634214997291565, "learning_rate": 2.2914809601085405e-06, "loss": 0.06327056884765625, "step": 5060 }, { "epoch": 0.7052184212359786, "grad_norm": 0.9526922106742859, "learning_rate": 2.28950022072286e-06, "loss": 0.13184165954589844, "step": 5061 }, { "epoch": 0.7053577649271929, "grad_norm": 0.7224952578544617, "learning_rate": 2.2875200835483486e-06, "loss": 0.09439659118652344, "step": 5062 }, { "epoch": 0.7054971086184073, "grad_norm": 0.5351418256759644, "learning_rate": 2.2855405490249498e-06, "loss": 0.07657241821289062, "step": 5063 }, { "epoch": 0.7056364523096217, "grad_norm": 1.1155680418014526, "learning_rate": 2.283561617592467e-06, "loss": 0.09415721893310547, "step": 5064 }, { "epoch": 0.7057757960008361, "grad_norm": 0.6283812522888184, "learning_rate": 2.2815832896905772e-06, "loss": 0.07811737060546875, "step": 5065 }, { "epoch": 0.7059151396920504, "grad_norm": 0.6501659750938416, "learning_rate": 2.279605565758816e-06, "loss": 0.07601737976074219, "step": 5066 }, { "epoch": 0.7060544833832648, "grad_norm": 0.6718569397926331, "learning_rate": 2.277628446236592e-06, "loss": 0.07800006866455078, "step": 5067 }, { "epoch": 0.7061938270744792, "grad_norm": 0.8719935417175293, "learning_rate": 2.275651931563173e-06, "loss": 0.07459640502929688, "step": 5068 }, { "epoch": 0.7063331707656936, "grad_norm": 0.8260694742202759, "learning_rate": 2.273676022177697e-06, "loss": 0.11057472229003906, "step": 5069 }, { "epoch": 0.706472514456908, "grad_norm": 0.8375729322433472, "learning_rate": 2.2717007185191673e-06, "loss": 0.08108329772949219, "step": 5070 }, { "epoch": 0.7066118581481223, "grad_norm": 1.0112985372543335, "learning_rate": 2.2697260210264506e-06, "loss": 0.10943984985351562, "step": 5071 }, { "epoch": 0.7067512018393367, "grad_norm": 1.2209482192993164, "learning_rate": 2.267751930138276e-06, "loss": 0.10003852844238281, "step": 5072 }, { "epoch": 0.7068905455305511, "grad_norm": 0.7776097655296326, "learning_rate": 2.265778446293245e-06, "loss": 0.08107376098632812, "step": 5073 }, { "epoch": 0.7070298892217655, "grad_norm": 0.5138954520225525, "learning_rate": 2.263805569929821e-06, "loss": 0.07063484191894531, "step": 5074 }, { "epoch": 0.7071692329129798, "grad_norm": 1.2360997200012207, "learning_rate": 2.2618333014863296e-06, "loss": 0.12751007080078125, "step": 5075 }, { "epoch": 0.7073085766041942, "grad_norm": 0.8611602187156677, "learning_rate": 2.259861641400967e-06, "loss": 0.08976364135742188, "step": 5076 }, { "epoch": 0.7074479202954086, "grad_norm": 0.9401355981826782, "learning_rate": 2.2578905901117876e-06, "loss": 0.09481048583984375, "step": 5077 }, { "epoch": 0.707587263986623, "grad_norm": 0.8832383155822754, "learning_rate": 2.255920148056717e-06, "loss": 0.07096004486083984, "step": 5078 }, { "epoch": 0.7077266076778373, "grad_norm": 0.5850106477737427, "learning_rate": 2.2539503156735392e-06, "loss": 0.09055328369140625, "step": 5079 }, { "epoch": 0.7078659513690517, "grad_norm": 0.787654459476471, "learning_rate": 2.2519810933999085e-06, "loss": 0.08971214294433594, "step": 5080 }, { "epoch": 0.7080052950602661, "grad_norm": 0.5546157956123352, "learning_rate": 2.2500124816733437e-06, "loss": 0.07063865661621094, "step": 5081 }, { "epoch": 0.7081446387514805, "grad_norm": 1.185052752494812, "learning_rate": 2.248044480931219e-06, "loss": 0.11652374267578125, "step": 5082 }, { "epoch": 0.7082839824426949, "grad_norm": 1.31234610080719, "learning_rate": 2.2460770916107823e-06, "loss": 0.11188316345214844, "step": 5083 }, { "epoch": 0.7084233261339092, "grad_norm": 0.9327336549758911, "learning_rate": 2.2441103141491424e-06, "loss": 0.08673095703125, "step": 5084 }, { "epoch": 0.7085626698251236, "grad_norm": 0.648948609828949, "learning_rate": 2.2421441489832745e-06, "loss": 0.08021259307861328, "step": 5085 }, { "epoch": 0.7087020135163381, "grad_norm": 0.73154616355896, "learning_rate": 2.240178596550014e-06, "loss": 0.08231544494628906, "step": 5086 }, { "epoch": 0.7088413572075525, "grad_norm": 0.7842297554016113, "learning_rate": 2.23821365728606e-06, "loss": 0.09965133666992188, "step": 5087 }, { "epoch": 0.7089807008987669, "grad_norm": 0.5625466704368591, "learning_rate": 2.23624933162798e-06, "loss": 0.08343982696533203, "step": 5088 }, { "epoch": 0.7091200445899812, "grad_norm": 1.2044672966003418, "learning_rate": 2.2342856200121993e-06, "loss": 0.08381080627441406, "step": 5089 }, { "epoch": 0.7092593882811956, "grad_norm": 0.8488390445709229, "learning_rate": 2.2323225228750113e-06, "loss": 0.09629631042480469, "step": 5090 }, { "epoch": 0.70939873197241, "grad_norm": 0.9703565835952759, "learning_rate": 2.230360040652574e-06, "loss": 0.08140850067138672, "step": 5091 }, { "epoch": 0.7095380756636244, "grad_norm": 0.5848873853683472, "learning_rate": 2.228398173780903e-06, "loss": 0.07948112487792969, "step": 5092 }, { "epoch": 0.7096774193548387, "grad_norm": 0.7300907969474792, "learning_rate": 2.2264369226958794e-06, "loss": 0.08669662475585938, "step": 5093 }, { "epoch": 0.7098167630460531, "grad_norm": 0.5643593072891235, "learning_rate": 2.2244762878332506e-06, "loss": 0.07861328125, "step": 5094 }, { "epoch": 0.7099561067372675, "grad_norm": 0.7772735953330994, "learning_rate": 2.222516269628626e-06, "loss": 0.07858085632324219, "step": 5095 }, { "epoch": 0.7100954504284819, "grad_norm": 0.7830649614334106, "learning_rate": 2.220556868517473e-06, "loss": 0.07999610900878906, "step": 5096 }, { "epoch": 0.7102347941196963, "grad_norm": 0.8460818529129028, "learning_rate": 2.2185980849351295e-06, "loss": 0.09587383270263672, "step": 5097 }, { "epoch": 0.7103741378109106, "grad_norm": 1.2641445398330688, "learning_rate": 2.2166399193167905e-06, "loss": 0.12807846069335938, "step": 5098 }, { "epoch": 0.710513481502125, "grad_norm": 0.5671942830085754, "learning_rate": 2.214682372097517e-06, "loss": 0.07084083557128906, "step": 5099 }, { "epoch": 0.7106528251933394, "grad_norm": 0.7952759265899658, "learning_rate": 2.212725443712229e-06, "loss": 0.10462760925292969, "step": 5100 }, { "epoch": 0.7107921688845538, "grad_norm": 0.9262940287590027, "learning_rate": 2.2107691345957133e-06, "loss": 0.07519721984863281, "step": 5101 }, { "epoch": 0.7109315125757681, "grad_norm": 1.040286898612976, "learning_rate": 2.208813445182618e-06, "loss": 0.08714675903320312, "step": 5102 }, { "epoch": 0.7110708562669825, "grad_norm": 0.6775789260864258, "learning_rate": 2.2068583759074513e-06, "loss": 0.09341621398925781, "step": 5103 }, { "epoch": 0.7112101999581969, "grad_norm": 1.5532437562942505, "learning_rate": 2.2049039272045837e-06, "loss": 0.110595703125, "step": 5104 }, { "epoch": 0.7113495436494113, "grad_norm": 0.7003015279769897, "learning_rate": 2.2029500995082497e-06, "loss": 0.08153772354125977, "step": 5105 }, { "epoch": 0.7114888873406257, "grad_norm": 0.3608596920967102, "learning_rate": 2.2009968932525478e-06, "loss": 0.06473731994628906, "step": 5106 }, { "epoch": 0.71162823103184, "grad_norm": 1.263828992843628, "learning_rate": 2.199044308871434e-06, "loss": 0.10298538208007812, "step": 5107 }, { "epoch": 0.7117675747230544, "grad_norm": 0.7369030714035034, "learning_rate": 2.197092346798726e-06, "loss": 0.10342597961425781, "step": 5108 }, { "epoch": 0.7119069184142688, "grad_norm": 0.8469061255455017, "learning_rate": 2.1951410074681074e-06, "loss": 0.09889602661132812, "step": 5109 }, { "epoch": 0.7120462621054832, "grad_norm": 0.4196474254131317, "learning_rate": 2.193190291313122e-06, "loss": 0.0677337646484375, "step": 5110 }, { "epoch": 0.7121856057966975, "grad_norm": 0.6703079342842102, "learning_rate": 2.1912401987671724e-06, "loss": 0.07064628601074219, "step": 5111 }, { "epoch": 0.7123249494879119, "grad_norm": 1.044179916381836, "learning_rate": 2.1892907302635246e-06, "loss": 0.1090855598449707, "step": 5112 }, { "epoch": 0.7124642931791263, "grad_norm": 0.9104196429252625, "learning_rate": 2.1873418862353095e-06, "loss": 0.09014892578125, "step": 5113 }, { "epoch": 0.7126036368703407, "grad_norm": 0.5631582140922546, "learning_rate": 2.185393667115513e-06, "loss": 0.07807540893554688, "step": 5114 }, { "epoch": 0.712742980561555, "grad_norm": 1.6199983358383179, "learning_rate": 2.1834460733369835e-06, "loss": 0.12845802307128906, "step": 5115 }, { "epoch": 0.7128823242527694, "grad_norm": 0.44659653306007385, "learning_rate": 2.181499105332433e-06, "loss": 0.08337688446044922, "step": 5116 }, { "epoch": 0.7130216679439838, "grad_norm": 0.8705210089683533, "learning_rate": 2.179552763534436e-06, "loss": 0.07617950439453125, "step": 5117 }, { "epoch": 0.7131610116351982, "grad_norm": 0.8382517695426941, "learning_rate": 2.177607048375423e-06, "loss": 0.10319900512695312, "step": 5118 }, { "epoch": 0.7133003553264126, "grad_norm": 1.0182161331176758, "learning_rate": 2.1756619602876857e-06, "loss": 0.082000732421875, "step": 5119 }, { "epoch": 0.7134396990176269, "grad_norm": 1.6294779777526855, "learning_rate": 2.1737174997033818e-06, "loss": 0.08620166778564453, "step": 5120 }, { "epoch": 0.7135790427088413, "grad_norm": 1.0541179180145264, "learning_rate": 2.1717736670545226e-06, "loss": 0.07744598388671875, "step": 5121 }, { "epoch": 0.7137183864000557, "grad_norm": 0.6007778644561768, "learning_rate": 2.169830462772985e-06, "loss": 0.08262825012207031, "step": 5122 }, { "epoch": 0.7138577300912701, "grad_norm": 1.6164698600769043, "learning_rate": 2.1678878872905063e-06, "loss": 0.13355255126953125, "step": 5123 }, { "epoch": 0.7139970737824844, "grad_norm": 0.6599476337432861, "learning_rate": 2.1659459410386814e-06, "loss": 0.07745742797851562, "step": 5124 }, { "epoch": 0.7141364174736988, "grad_norm": 1.9921514987945557, "learning_rate": 2.1640046244489637e-06, "loss": 0.11357498168945312, "step": 5125 }, { "epoch": 0.7142757611649132, "grad_norm": 1.516560673713684, "learning_rate": 2.1620639379526715e-06, "loss": 0.09256172180175781, "step": 5126 }, { "epoch": 0.7144151048561277, "grad_norm": 0.35921400785446167, "learning_rate": 2.1601238819809827e-06, "loss": 0.06064033508300781, "step": 5127 }, { "epoch": 0.7145544485473421, "grad_norm": 0.7743819952011108, "learning_rate": 2.158184456964932e-06, "loss": 0.09450721740722656, "step": 5128 }, { "epoch": 0.7146937922385564, "grad_norm": 0.8730310797691345, "learning_rate": 2.156245663335414e-06, "loss": 0.09342479705810547, "step": 5129 }, { "epoch": 0.7148331359297708, "grad_norm": 0.8108557462692261, "learning_rate": 2.154307501523185e-06, "loss": 0.08469390869140625, "step": 5130 }, { "epoch": 0.7149724796209852, "grad_norm": 1.2889018058776855, "learning_rate": 2.1523699719588633e-06, "loss": 0.11017799377441406, "step": 5131 }, { "epoch": 0.7151118233121996, "grad_norm": 0.5935595631599426, "learning_rate": 2.1504330750729185e-06, "loss": 0.07825183868408203, "step": 5132 }, { "epoch": 0.715251167003414, "grad_norm": 1.071433424949646, "learning_rate": 2.1484968112956884e-06, "loss": 0.12572288513183594, "step": 5133 }, { "epoch": 0.7153905106946283, "grad_norm": 0.9190468192100525, "learning_rate": 2.146561181057368e-06, "loss": 0.11704063415527344, "step": 5134 }, { "epoch": 0.7155298543858427, "grad_norm": 0.5186915993690491, "learning_rate": 2.1446261847880073e-06, "loss": 0.0797414779663086, "step": 5135 }, { "epoch": 0.7156691980770571, "grad_norm": 0.9065159559249878, "learning_rate": 2.1426918229175175e-06, "loss": 0.07155227661132812, "step": 5136 }, { "epoch": 0.7158085417682715, "grad_norm": 1.555821180343628, "learning_rate": 2.140758095875671e-06, "loss": 0.12986373901367188, "step": 5137 }, { "epoch": 0.7159478854594858, "grad_norm": 2.9028267860412598, "learning_rate": 2.1388250040921007e-06, "loss": 0.15229463577270508, "step": 5138 }, { "epoch": 0.7160872291507002, "grad_norm": 0.6207892894744873, "learning_rate": 2.136892547996292e-06, "loss": 0.08057594299316406, "step": 5139 }, { "epoch": 0.7162265728419146, "grad_norm": 0.7717511653900146, "learning_rate": 2.1349607280175918e-06, "loss": 0.08801651000976562, "step": 5140 }, { "epoch": 0.716365916533129, "grad_norm": 0.9076723456382751, "learning_rate": 2.133029544585207e-06, "loss": 0.0743093490600586, "step": 5141 }, { "epoch": 0.7165052602243434, "grad_norm": 1.0612982511520386, "learning_rate": 2.1310989981282067e-06, "loss": 0.0973358154296875, "step": 5142 }, { "epoch": 0.7166446039155577, "grad_norm": 0.9053005576133728, "learning_rate": 2.1291690890755078e-06, "loss": 0.08316230773925781, "step": 5143 }, { "epoch": 0.7167839476067721, "grad_norm": 1.0495063066482544, "learning_rate": 2.127239817855897e-06, "loss": 0.09741783142089844, "step": 5144 }, { "epoch": 0.7169232912979865, "grad_norm": 1.3480478525161743, "learning_rate": 2.1253111848980113e-06, "loss": 0.1118927001953125, "step": 5145 }, { "epoch": 0.7170626349892009, "grad_norm": 1.267420768737793, "learning_rate": 2.1233831906303514e-06, "loss": 0.10676443576812744, "step": 5146 }, { "epoch": 0.7172019786804152, "grad_norm": 0.6807815432548523, "learning_rate": 2.121455835481271e-06, "loss": 0.07930564880371094, "step": 5147 }, { "epoch": 0.7173413223716296, "grad_norm": 0.8052094578742981, "learning_rate": 2.119529119878985e-06, "loss": 0.08860969543457031, "step": 5148 }, { "epoch": 0.717480666062844, "grad_norm": 1.1460905075073242, "learning_rate": 2.1176030442515704e-06, "loss": 0.11646080017089844, "step": 5149 }, { "epoch": 0.7176200097540584, "grad_norm": 1.0670113563537598, "learning_rate": 2.115677609026949e-06, "loss": 0.10769081115722656, "step": 5150 }, { "epoch": 0.7177593534452728, "grad_norm": 0.9422207474708557, "learning_rate": 2.1137528146329133e-06, "loss": 0.09261226654052734, "step": 5151 }, { "epoch": 0.7178986971364871, "grad_norm": 0.8079156875610352, "learning_rate": 2.1118286614971075e-06, "loss": 0.08232593536376953, "step": 5152 }, { "epoch": 0.7180380408277015, "grad_norm": 3.0853676795959473, "learning_rate": 2.1099051500470368e-06, "loss": 0.15977096557617188, "step": 5153 }, { "epoch": 0.7181773845189159, "grad_norm": 1.1263478994369507, "learning_rate": 2.1079822807100585e-06, "loss": 0.11688041687011719, "step": 5154 }, { "epoch": 0.7183167282101303, "grad_norm": 0.6672700047492981, "learning_rate": 2.1060600539133928e-06, "loss": 0.08426856994628906, "step": 5155 }, { "epoch": 0.7184560719013446, "grad_norm": 0.8550943732261658, "learning_rate": 2.104138470084114e-06, "loss": 0.09337997436523438, "step": 5156 }, { "epoch": 0.718595415592559, "grad_norm": 0.8587205410003662, "learning_rate": 2.1022175296491516e-06, "loss": 0.08825302124023438, "step": 5157 }, { "epoch": 0.7187347592837734, "grad_norm": 0.8588348627090454, "learning_rate": 2.100297233035296e-06, "loss": 0.12321853637695312, "step": 5158 }, { "epoch": 0.7188741029749878, "grad_norm": 0.46775737404823303, "learning_rate": 2.098377580669196e-06, "loss": 0.07443428039550781, "step": 5159 }, { "epoch": 0.7190134466662021, "grad_norm": 0.9855735898017883, "learning_rate": 2.096458572977352e-06, "loss": 0.10112190246582031, "step": 5160 }, { "epoch": 0.7191527903574165, "grad_norm": 0.642131507396698, "learning_rate": 2.0945402103861233e-06, "loss": 0.06964778900146484, "step": 5161 }, { "epoch": 0.7192921340486309, "grad_norm": 0.9155319929122925, "learning_rate": 2.0926224933217267e-06, "loss": 0.09531593322753906, "step": 5162 }, { "epoch": 0.7194314777398453, "grad_norm": 1.1475472450256348, "learning_rate": 2.0907054222102367e-06, "loss": 0.12331962585449219, "step": 5163 }, { "epoch": 0.7195708214310597, "grad_norm": 0.8287627696990967, "learning_rate": 2.0887889974775805e-06, "loss": 0.11712646484375, "step": 5164 }, { "epoch": 0.719710165122274, "grad_norm": 0.7059844136238098, "learning_rate": 2.0868732195495463e-06, "loss": 0.08089160919189453, "step": 5165 }, { "epoch": 0.7198495088134884, "grad_norm": 0.8847388625144958, "learning_rate": 2.0849580888517733e-06, "loss": 0.07901954650878906, "step": 5166 }, { "epoch": 0.7199888525047029, "grad_norm": 0.4865313768386841, "learning_rate": 2.083043605809763e-06, "loss": 0.07899141311645508, "step": 5167 }, { "epoch": 0.7201281961959173, "grad_norm": 1.5224024057388306, "learning_rate": 2.081129770848867e-06, "loss": 0.11385965347290039, "step": 5168 }, { "epoch": 0.7202675398871317, "grad_norm": 0.6607574820518494, "learning_rate": 2.0792165843942963e-06, "loss": 0.07612133026123047, "step": 5169 }, { "epoch": 0.720406883578346, "grad_norm": 1.0640982389450073, "learning_rate": 2.0773040468711205e-06, "loss": 0.12339019775390625, "step": 5170 }, { "epoch": 0.7205462272695604, "grad_norm": 0.7450610995292664, "learning_rate": 2.0753921587042586e-06, "loss": 0.09824371337890625, "step": 5171 }, { "epoch": 0.7206855709607748, "grad_norm": 0.37891390919685364, "learning_rate": 2.0734809203184873e-06, "loss": 0.06151771545410156, "step": 5172 }, { "epoch": 0.7208249146519892, "grad_norm": 1.6402900218963623, "learning_rate": 2.071570332138442e-06, "loss": 0.13814544677734375, "step": 5173 }, { "epoch": 0.7209642583432035, "grad_norm": 0.599624752998352, "learning_rate": 2.0696603945886133e-06, "loss": 0.07589912414550781, "step": 5174 }, { "epoch": 0.7211036020344179, "grad_norm": 0.8425595164299011, "learning_rate": 2.067751108093343e-06, "loss": 0.08943367004394531, "step": 5175 }, { "epoch": 0.7212429457256323, "grad_norm": 0.6189008355140686, "learning_rate": 2.0658424730768335e-06, "loss": 0.08160400390625, "step": 5176 }, { "epoch": 0.7213822894168467, "grad_norm": 0.8922195434570312, "learning_rate": 2.063934489963137e-06, "loss": 0.10165786743164062, "step": 5177 }, { "epoch": 0.7215216331080611, "grad_norm": 1.0419610738754272, "learning_rate": 2.0620271591761666e-06, "loss": 0.11163902282714844, "step": 5178 }, { "epoch": 0.7216609767992754, "grad_norm": 1.0784903764724731, "learning_rate": 2.0601204811396847e-06, "loss": 0.12493515014648438, "step": 5179 }, { "epoch": 0.7218003204904898, "grad_norm": 0.9113585948944092, "learning_rate": 2.058214456277314e-06, "loss": 0.0916757583618164, "step": 5180 }, { "epoch": 0.7219396641817042, "grad_norm": 0.7771918177604675, "learning_rate": 2.0563090850125318e-06, "loss": 0.0818023681640625, "step": 5181 }, { "epoch": 0.7220790078729186, "grad_norm": 0.875211238861084, "learning_rate": 2.054404367768662e-06, "loss": 0.09787368774414062, "step": 5182 }, { "epoch": 0.7222183515641329, "grad_norm": 1.0440232753753662, "learning_rate": 2.0525003049688923e-06, "loss": 0.087432861328125, "step": 5183 }, { "epoch": 0.7223576952553473, "grad_norm": 0.5978320837020874, "learning_rate": 2.0505968970362627e-06, "loss": 0.08621406555175781, "step": 5184 }, { "epoch": 0.7224970389465617, "grad_norm": 0.7006909251213074, "learning_rate": 2.048694144393668e-06, "loss": 0.077850341796875, "step": 5185 }, { "epoch": 0.7226363826377761, "grad_norm": 0.9775253534317017, "learning_rate": 2.0467920474638552e-06, "loss": 0.11275386810302734, "step": 5186 }, { "epoch": 0.7227757263289905, "grad_norm": 0.6162028312683105, "learning_rate": 2.0448906066694247e-06, "loss": 0.0736689567565918, "step": 5187 }, { "epoch": 0.7229150700202048, "grad_norm": 0.8260387778282166, "learning_rate": 2.042989822432837e-06, "loss": 0.07001113891601562, "step": 5188 }, { "epoch": 0.7230544137114192, "grad_norm": 1.0232053995132446, "learning_rate": 2.041089695176399e-06, "loss": 0.09536552429199219, "step": 5189 }, { "epoch": 0.7231937574026336, "grad_norm": 1.066112995147705, "learning_rate": 2.0391902253222777e-06, "loss": 0.09939765930175781, "step": 5190 }, { "epoch": 0.723333101093848, "grad_norm": 0.7250447273254395, "learning_rate": 2.037291413292494e-06, "loss": 0.09525680541992188, "step": 5191 }, { "epoch": 0.7234724447850623, "grad_norm": 2.2351737022399902, "learning_rate": 2.035393259508919e-06, "loss": 0.10393905639648438, "step": 5192 }, { "epoch": 0.7236117884762767, "grad_norm": 2.1335527896881104, "learning_rate": 2.0334957643932757e-06, "loss": 0.1534271240234375, "step": 5193 }, { "epoch": 0.7237511321674911, "grad_norm": 0.7573909163475037, "learning_rate": 2.0315989283671474e-06, "loss": 0.10495758056640625, "step": 5194 }, { "epoch": 0.7238904758587055, "grad_norm": 0.4559195935726166, "learning_rate": 2.0297027518519696e-06, "loss": 0.06598663330078125, "step": 5195 }, { "epoch": 0.7240298195499199, "grad_norm": 0.8331352472305298, "learning_rate": 2.0278072352690253e-06, "loss": 0.07946205139160156, "step": 5196 }, { "epoch": 0.7241691632411342, "grad_norm": 0.7049586772918701, "learning_rate": 2.0259123790394587e-06, "loss": 0.08407974243164062, "step": 5197 }, { "epoch": 0.7243085069323486, "grad_norm": 0.806969940662384, "learning_rate": 2.0240181835842605e-06, "loss": 0.08721923828125, "step": 5198 }, { "epoch": 0.724447850623563, "grad_norm": 2.143646001815796, "learning_rate": 2.0221246493242802e-06, "loss": 0.1157684326171875, "step": 5199 }, { "epoch": 0.7245871943147774, "grad_norm": 0.4098610579967499, "learning_rate": 2.0202317766802155e-06, "loss": 0.06512451171875, "step": 5200 }, { "epoch": 0.7247265380059917, "grad_norm": 0.6709553003311157, "learning_rate": 2.0183395660726208e-06, "loss": 0.08466720581054688, "step": 5201 }, { "epoch": 0.7248658816972061, "grad_norm": 0.9068776369094849, "learning_rate": 2.0164480179219038e-06, "loss": 0.10187530517578125, "step": 5202 }, { "epoch": 0.7250052253884205, "grad_norm": 0.5563416481018066, "learning_rate": 2.014557132648321e-06, "loss": 0.0839681625366211, "step": 5203 }, { "epoch": 0.7251445690796349, "grad_norm": 1.4155783653259277, "learning_rate": 2.0126669106719833e-06, "loss": 0.11857223510742188, "step": 5204 }, { "epoch": 0.7252839127708492, "grad_norm": 0.8968859314918518, "learning_rate": 2.010777352412856e-06, "loss": 0.09392929077148438, "step": 5205 }, { "epoch": 0.7254232564620636, "grad_norm": 1.1308966875076294, "learning_rate": 2.0088884582907574e-06, "loss": 0.08616924285888672, "step": 5206 }, { "epoch": 0.7255626001532781, "grad_norm": 1.0277024507522583, "learning_rate": 2.0070002287253554e-06, "loss": 0.10242843627929688, "step": 5207 }, { "epoch": 0.7257019438444925, "grad_norm": 0.6834669709205627, "learning_rate": 2.0051126641361697e-06, "loss": 0.1006169319152832, "step": 5208 }, { "epoch": 0.7258412875357069, "grad_norm": 1.2362291812896729, "learning_rate": 2.0032257649425753e-06, "loss": 0.09362602233886719, "step": 5209 }, { "epoch": 0.7259806312269212, "grad_norm": 0.7883133292198181, "learning_rate": 2.0013395315637997e-06, "loss": 0.07610893249511719, "step": 5210 }, { "epoch": 0.7261199749181356, "grad_norm": 0.7672985792160034, "learning_rate": 1.9994539644189183e-06, "loss": 0.07336616516113281, "step": 5211 }, { "epoch": 0.72625931860935, "grad_norm": 1.3274582624435425, "learning_rate": 1.9975690639268623e-06, "loss": 0.13653898239135742, "step": 5212 }, { "epoch": 0.7263986623005644, "grad_norm": 0.9950271844863892, "learning_rate": 1.9956848305064156e-06, "loss": 0.08096122741699219, "step": 5213 }, { "epoch": 0.7265380059917788, "grad_norm": 0.4489187002182007, "learning_rate": 1.99380126457621e-06, "loss": 0.07046699523925781, "step": 5214 }, { "epoch": 0.7266773496829931, "grad_norm": 1.0094093084335327, "learning_rate": 1.9919183665547285e-06, "loss": 0.11480903625488281, "step": 5215 }, { "epoch": 0.7268166933742075, "grad_norm": 0.6675369739532471, "learning_rate": 1.9900361368603104e-06, "loss": 0.08613777160644531, "step": 5216 }, { "epoch": 0.7269560370654219, "grad_norm": 0.6932039856910706, "learning_rate": 1.988154575911146e-06, "loss": 0.09663009643554688, "step": 5217 }, { "epoch": 0.7270953807566363, "grad_norm": 0.5389742851257324, "learning_rate": 1.9862736841252734e-06, "loss": 0.07056999206542969, "step": 5218 }, { "epoch": 0.7272347244478506, "grad_norm": 0.42732250690460205, "learning_rate": 1.984393461920581e-06, "loss": 0.07135200500488281, "step": 5219 }, { "epoch": 0.727374068139065, "grad_norm": 0.8548200130462646, "learning_rate": 1.9825139097148166e-06, "loss": 0.10204696655273438, "step": 5220 }, { "epoch": 0.7275134118302794, "grad_norm": 0.8480073809623718, "learning_rate": 1.980635027925569e-06, "loss": 0.10742950439453125, "step": 5221 }, { "epoch": 0.7276527555214938, "grad_norm": 0.8278719782829285, "learning_rate": 1.9787568169702848e-06, "loss": 0.10508155822753906, "step": 5222 }, { "epoch": 0.7277920992127082, "grad_norm": 1.123093843460083, "learning_rate": 1.9768792772662616e-06, "loss": 0.09594345092773438, "step": 5223 }, { "epoch": 0.7279314429039225, "grad_norm": 0.3952956795692444, "learning_rate": 1.975002409230644e-06, "loss": 0.0681915283203125, "step": 5224 }, { "epoch": 0.7280707865951369, "grad_norm": 0.9091848731040955, "learning_rate": 1.9731262132804275e-06, "loss": 0.1012420654296875, "step": 5225 }, { "epoch": 0.7282101302863513, "grad_norm": 1.027465581893921, "learning_rate": 1.9712506898324613e-06, "loss": 0.10902214050292969, "step": 5226 }, { "epoch": 0.7283494739775657, "grad_norm": 0.924568235874176, "learning_rate": 1.969375839303447e-06, "loss": 0.08242034912109375, "step": 5227 }, { "epoch": 0.72848881766878, "grad_norm": 0.893608808517456, "learning_rate": 1.967501662109928e-06, "loss": 0.12188339233398438, "step": 5228 }, { "epoch": 0.7286281613599944, "grad_norm": 0.4950302839279175, "learning_rate": 1.965628158668309e-06, "loss": 0.07645225524902344, "step": 5229 }, { "epoch": 0.7287675050512088, "grad_norm": 0.41830798983573914, "learning_rate": 1.9637553293948353e-06, "loss": 0.06106758117675781, "step": 5230 }, { "epoch": 0.7289068487424232, "grad_norm": 0.7033432126045227, "learning_rate": 1.9618831747056106e-06, "loss": 0.08354949951171875, "step": 5231 }, { "epoch": 0.7290461924336376, "grad_norm": 0.6391546130180359, "learning_rate": 1.960011695016581e-06, "loss": 0.07654571533203125, "step": 5232 }, { "epoch": 0.7291855361248519, "grad_norm": 0.7021375298500061, "learning_rate": 1.958140890743549e-06, "loss": 0.09296035766601562, "step": 5233 }, { "epoch": 0.7293248798160663, "grad_norm": 0.8452174067497253, "learning_rate": 1.956270762302166e-06, "loss": 0.09101486206054688, "step": 5234 }, { "epoch": 0.7294642235072807, "grad_norm": 0.8079817891120911, "learning_rate": 1.9544013101079295e-06, "loss": 0.10210609436035156, "step": 5235 }, { "epoch": 0.7296035671984951, "grad_norm": 0.737819492816925, "learning_rate": 1.9525325345761887e-06, "loss": 0.08420276641845703, "step": 5236 }, { "epoch": 0.7297429108897094, "grad_norm": 0.7897840142250061, "learning_rate": 1.950664436122144e-06, "loss": 0.09705352783203125, "step": 5237 }, { "epoch": 0.7298822545809238, "grad_norm": 1.1009290218353271, "learning_rate": 1.948797015160845e-06, "loss": 0.11600685119628906, "step": 5238 }, { "epoch": 0.7300215982721382, "grad_norm": 0.4237394332885742, "learning_rate": 1.94693027210719e-06, "loss": 0.06990528106689453, "step": 5239 }, { "epoch": 0.7301609419633526, "grad_norm": 0.6148760318756104, "learning_rate": 1.945064207375923e-06, "loss": 0.08111572265625, "step": 5240 }, { "epoch": 0.730300285654567, "grad_norm": 0.6210387945175171, "learning_rate": 1.9431988213816444e-06, "loss": 0.06860542297363281, "step": 5241 }, { "epoch": 0.7304396293457813, "grad_norm": 1.3224496841430664, "learning_rate": 1.9413341145388013e-06, "loss": 0.12921905517578125, "step": 5242 }, { "epoch": 0.7305789730369957, "grad_norm": 0.7608392834663391, "learning_rate": 1.9394700872616856e-06, "loss": 0.08424568176269531, "step": 5243 }, { "epoch": 0.7307183167282101, "grad_norm": 0.9352702498435974, "learning_rate": 1.9376067399644456e-06, "loss": 0.11129474639892578, "step": 5244 }, { "epoch": 0.7308576604194245, "grad_norm": 1.0360465049743652, "learning_rate": 1.93574407306107e-06, "loss": 0.09228134155273438, "step": 5245 }, { "epoch": 0.7309970041106388, "grad_norm": 1.3689249753952026, "learning_rate": 1.9338820869654056e-06, "loss": 0.12127304077148438, "step": 5246 }, { "epoch": 0.7311363478018533, "grad_norm": 0.7441880702972412, "learning_rate": 1.9320207820911387e-06, "loss": 0.09173774719238281, "step": 5247 }, { "epoch": 0.7312756914930677, "grad_norm": 0.9384174346923828, "learning_rate": 1.930160158851811e-06, "loss": 0.09791183471679688, "step": 5248 }, { "epoch": 0.7314150351842821, "grad_norm": 0.4312819838523865, "learning_rate": 1.9283002176608116e-06, "loss": 0.07051849365234375, "step": 5249 }, { "epoch": 0.7315543788754965, "grad_norm": 1.1201516389846802, "learning_rate": 1.9264409589313767e-06, "loss": 0.1409587860107422, "step": 5250 }, { "epoch": 0.7316937225667108, "grad_norm": 0.8650641441345215, "learning_rate": 1.9245823830765874e-06, "loss": 0.12196540832519531, "step": 5251 }, { "epoch": 0.7318330662579252, "grad_norm": 1.0727818012237549, "learning_rate": 1.92272449050938e-06, "loss": 0.12475872039794922, "step": 5252 }, { "epoch": 0.7319724099491396, "grad_norm": 0.9161220788955688, "learning_rate": 1.920867281642538e-06, "loss": 0.10989570617675781, "step": 5253 }, { "epoch": 0.732111753640354, "grad_norm": 1.7137728929519653, "learning_rate": 1.919010756888685e-06, "loss": 0.10669422149658203, "step": 5254 }, { "epoch": 0.7322510973315683, "grad_norm": 1.7382724285125732, "learning_rate": 1.917154916660304e-06, "loss": 0.12253141403198242, "step": 5255 }, { "epoch": 0.7323904410227827, "grad_norm": 0.5351114273071289, "learning_rate": 1.9152997613697184e-06, "loss": 0.08155250549316406, "step": 5256 }, { "epoch": 0.7325297847139971, "grad_norm": 0.40346840023994446, "learning_rate": 1.913445291429099e-06, "loss": 0.05562591552734375, "step": 5257 }, { "epoch": 0.7326691284052115, "grad_norm": 0.9480301141738892, "learning_rate": 1.9115915072504683e-06, "loss": 0.07134246826171875, "step": 5258 }, { "epoch": 0.7328084720964259, "grad_norm": 1.6954872608184814, "learning_rate": 1.909738409245697e-06, "loss": 0.12172317504882812, "step": 5259 }, { "epoch": 0.7329478157876402, "grad_norm": 1.2573332786560059, "learning_rate": 1.9078859978264995e-06, "loss": 0.10015106201171875, "step": 5260 }, { "epoch": 0.7330871594788546, "grad_norm": 0.9001079201698303, "learning_rate": 1.9060342734044374e-06, "loss": 0.0843353271484375, "step": 5261 }, { "epoch": 0.733226503170069, "grad_norm": 0.7169435024261475, "learning_rate": 1.904183236390923e-06, "loss": 0.0843353271484375, "step": 5262 }, { "epoch": 0.7333658468612834, "grad_norm": 0.9510001540184021, "learning_rate": 1.9023328871972163e-06, "loss": 0.09322357177734375, "step": 5263 }, { "epoch": 0.7335051905524977, "grad_norm": 0.9594963788986206, "learning_rate": 1.9004832262344197e-06, "loss": 0.10058784484863281, "step": 5264 }, { "epoch": 0.7336445342437121, "grad_norm": 1.6316791772842407, "learning_rate": 1.8986342539134873e-06, "loss": 0.10175895690917969, "step": 5265 }, { "epoch": 0.7337838779349265, "grad_norm": 0.9781923294067383, "learning_rate": 1.8967859706452196e-06, "loss": 0.08597183227539062, "step": 5266 }, { "epoch": 0.7339232216261409, "grad_norm": 0.5193711519241333, "learning_rate": 1.894938376840262e-06, "loss": 0.07575416564941406, "step": 5267 }, { "epoch": 0.7340625653173553, "grad_norm": 0.6362305879592896, "learning_rate": 1.8930914729091055e-06, "loss": 0.07687664031982422, "step": 5268 }, { "epoch": 0.7342019090085696, "grad_norm": 1.5691771507263184, "learning_rate": 1.8912452592620916e-06, "loss": 0.11225128173828125, "step": 5269 }, { "epoch": 0.734341252699784, "grad_norm": 0.6325458884239197, "learning_rate": 1.8893997363094086e-06, "loss": 0.09940719604492188, "step": 5270 }, { "epoch": 0.7344805963909984, "grad_norm": 0.8119149208068848, "learning_rate": 1.8875549044610886e-06, "loss": 0.07717418670654297, "step": 5271 }, { "epoch": 0.7346199400822128, "grad_norm": 1.1861642599105835, "learning_rate": 1.8857107641270084e-06, "loss": 0.14923095703125, "step": 5272 }, { "epoch": 0.7347592837734271, "grad_norm": 0.3234228789806366, "learning_rate": 1.8838673157168956e-06, "loss": 0.0630950927734375, "step": 5273 }, { "epoch": 0.7348986274646415, "grad_norm": 0.7811183333396912, "learning_rate": 1.8820245596403253e-06, "loss": 0.09055519104003906, "step": 5274 }, { "epoch": 0.7350379711558559, "grad_norm": 0.3106610178947449, "learning_rate": 1.8801824963067105e-06, "loss": 0.07109642028808594, "step": 5275 }, { "epoch": 0.7351773148470703, "grad_norm": 0.678915798664093, "learning_rate": 1.8783411261253208e-06, "loss": 0.09521865844726562, "step": 5276 }, { "epoch": 0.7353166585382847, "grad_norm": 1.0828187465667725, "learning_rate": 1.8765004495052623e-06, "loss": 0.09689712524414062, "step": 5277 }, { "epoch": 0.735456002229499, "grad_norm": 0.46864983439445496, "learning_rate": 1.8746604668554952e-06, "loss": 0.08044624328613281, "step": 5278 }, { "epoch": 0.7355953459207134, "grad_norm": 0.5689334869384766, "learning_rate": 1.8728211785848176e-06, "loss": 0.07897186279296875, "step": 5279 }, { "epoch": 0.7357346896119278, "grad_norm": 0.7589076161384583, "learning_rate": 1.8709825851018798e-06, "loss": 0.08384227752685547, "step": 5280 }, { "epoch": 0.7358740333031422, "grad_norm": 0.7720663547515869, "learning_rate": 1.869144686815178e-06, "loss": 0.10235786437988281, "step": 5281 }, { "epoch": 0.7360133769943565, "grad_norm": 0.8306959271430969, "learning_rate": 1.8673074841330447e-06, "loss": 0.09022140502929688, "step": 5282 }, { "epoch": 0.7361527206855709, "grad_norm": 1.3837916851043701, "learning_rate": 1.8654709774636676e-06, "loss": 0.12152099609375, "step": 5283 }, { "epoch": 0.7362920643767853, "grad_norm": 0.581948459148407, "learning_rate": 1.8636351672150771e-06, "loss": 0.08531570434570312, "step": 5284 }, { "epoch": 0.7364314080679997, "grad_norm": 0.34726467728614807, "learning_rate": 1.8618000537951496e-06, "loss": 0.06553268432617188, "step": 5285 }, { "epoch": 0.736570751759214, "grad_norm": 0.553432047367096, "learning_rate": 1.8599656376116026e-06, "loss": 0.08859825134277344, "step": 5286 }, { "epoch": 0.7367100954504285, "grad_norm": 0.9722211360931396, "learning_rate": 1.8581319190720038e-06, "loss": 0.1411151885986328, "step": 5287 }, { "epoch": 0.7368494391416429, "grad_norm": 0.7618229985237122, "learning_rate": 1.8562988985837632e-06, "loss": 0.09891319274902344, "step": 5288 }, { "epoch": 0.7369887828328573, "grad_norm": 0.36590731143951416, "learning_rate": 1.854466576554133e-06, "loss": 0.061496734619140625, "step": 5289 }, { "epoch": 0.7371281265240717, "grad_norm": 0.7786855697631836, "learning_rate": 1.8526349533902161e-06, "loss": 0.07927513122558594, "step": 5290 }, { "epoch": 0.737267470215286, "grad_norm": 0.8620100617408752, "learning_rate": 1.8508040294989588e-06, "loss": 0.09538459777832031, "step": 5291 }, { "epoch": 0.7374068139065004, "grad_norm": 0.8748332858085632, "learning_rate": 1.8489738052871486e-06, "loss": 0.09715747833251953, "step": 5292 }, { "epoch": 0.7375461575977148, "grad_norm": 1.4012558460235596, "learning_rate": 1.8471442811614177e-06, "loss": 0.1369161605834961, "step": 5293 }, { "epoch": 0.7376855012889292, "grad_norm": 0.5680184960365295, "learning_rate": 1.8453154575282472e-06, "loss": 0.06651973724365234, "step": 5294 }, { "epoch": 0.7378248449801436, "grad_norm": 0.6700828671455383, "learning_rate": 1.8434873347939608e-06, "loss": 0.08251762390136719, "step": 5295 }, { "epoch": 0.7379641886713579, "grad_norm": 1.3577505350112915, "learning_rate": 1.8416599133647223e-06, "loss": 0.13010787963867188, "step": 5296 }, { "epoch": 0.7381035323625723, "grad_norm": 1.1494413614273071, "learning_rate": 1.839833193646547e-06, "loss": 0.0983734130859375, "step": 5297 }, { "epoch": 0.7382428760537867, "grad_norm": 0.9015903472900391, "learning_rate": 1.8380071760452862e-06, "loss": 0.08919715881347656, "step": 5298 }, { "epoch": 0.7383822197450011, "grad_norm": 0.545279860496521, "learning_rate": 1.8361818609666433e-06, "loss": 0.07405853271484375, "step": 5299 }, { "epoch": 0.7385215634362154, "grad_norm": 0.39086514711380005, "learning_rate": 1.8343572488161576e-06, "loss": 0.06999969482421875, "step": 5300 }, { "epoch": 0.7386609071274298, "grad_norm": 1.3350118398666382, "learning_rate": 1.832533339999219e-06, "loss": 0.11247062683105469, "step": 5301 }, { "epoch": 0.7388002508186442, "grad_norm": 0.7146146893501282, "learning_rate": 1.8307101349210588e-06, "loss": 0.1012725830078125, "step": 5302 }, { "epoch": 0.7389395945098586, "grad_norm": 1.0296804904937744, "learning_rate": 1.8288876339867511e-06, "loss": 0.08829116821289062, "step": 5303 }, { "epoch": 0.739078938201073, "grad_norm": 0.9425792694091797, "learning_rate": 1.8270658376012112e-06, "loss": 0.1225748062133789, "step": 5304 }, { "epoch": 0.7392182818922873, "grad_norm": 0.9015663862228394, "learning_rate": 1.8252447461692029e-06, "loss": 0.11060810089111328, "step": 5305 }, { "epoch": 0.7393576255835017, "grad_norm": 0.7964217066764832, "learning_rate": 1.8234243600953334e-06, "loss": 0.10930633544921875, "step": 5306 }, { "epoch": 0.7394969692747161, "grad_norm": 0.6314270496368408, "learning_rate": 1.8216046797840465e-06, "loss": 0.07750797271728516, "step": 5307 }, { "epoch": 0.7396363129659305, "grad_norm": 0.9723740816116333, "learning_rate": 1.8197857056396372e-06, "loss": 0.09420013427734375, "step": 5308 }, { "epoch": 0.7397756566571448, "grad_norm": 1.23246431350708, "learning_rate": 1.8179674380662372e-06, "loss": 0.11060047149658203, "step": 5309 }, { "epoch": 0.7399150003483592, "grad_norm": 0.8185647130012512, "learning_rate": 1.8161498774678271e-06, "loss": 0.12771224975585938, "step": 5310 }, { "epoch": 0.7400543440395736, "grad_norm": 0.975026547908783, "learning_rate": 1.8143330242482244e-06, "loss": 0.10401344299316406, "step": 5311 }, { "epoch": 0.740193687730788, "grad_norm": 0.6103703379631042, "learning_rate": 1.8125168788110932e-06, "loss": 0.08621025085449219, "step": 5312 }, { "epoch": 0.7403330314220024, "grad_norm": 1.0753685235977173, "learning_rate": 1.8107014415599416e-06, "loss": 0.0778350830078125, "step": 5313 }, { "epoch": 0.7404723751132167, "grad_norm": 0.6684495806694031, "learning_rate": 1.808886712898117e-06, "loss": 0.08403491973876953, "step": 5314 }, { "epoch": 0.7406117188044311, "grad_norm": 1.4140770435333252, "learning_rate": 1.8070726932288086e-06, "loss": 0.1268787384033203, "step": 5315 }, { "epoch": 0.7407510624956455, "grad_norm": 0.32575342059135437, "learning_rate": 1.8052593829550525e-06, "loss": 0.05536460876464844, "step": 5316 }, { "epoch": 0.7408904061868599, "grad_norm": 1.8798874616622925, "learning_rate": 1.8034467824797252e-06, "loss": 0.11342620849609375, "step": 5317 }, { "epoch": 0.7410297498780742, "grad_norm": 0.7770450115203857, "learning_rate": 1.8016348922055448e-06, "loss": 0.07431602478027344, "step": 5318 }, { "epoch": 0.7411690935692886, "grad_norm": 0.7976288199424744, "learning_rate": 1.7998237125350698e-06, "loss": 0.08332252502441406, "step": 5319 }, { "epoch": 0.741308437260503, "grad_norm": 0.9832298159599304, "learning_rate": 1.7980132438707059e-06, "loss": 0.09070014953613281, "step": 5320 }, { "epoch": 0.7414477809517174, "grad_norm": 1.1890368461608887, "learning_rate": 1.7962034866146954e-06, "loss": 0.13544464111328125, "step": 5321 }, { "epoch": 0.7415871246429317, "grad_norm": 0.6610950827598572, "learning_rate": 1.794394441169126e-06, "loss": 0.09099769592285156, "step": 5322 }, { "epoch": 0.7417264683341461, "grad_norm": 0.9650443196296692, "learning_rate": 1.7925861079359268e-06, "loss": 0.1070094108581543, "step": 5323 }, { "epoch": 0.7418658120253605, "grad_norm": 1.0724159479141235, "learning_rate": 1.790778487316871e-06, "loss": 0.09749221801757812, "step": 5324 }, { "epoch": 0.7420051557165749, "grad_norm": 1.0057004690170288, "learning_rate": 1.7889715797135643e-06, "loss": 0.10318183898925781, "step": 5325 }, { "epoch": 0.7421444994077893, "grad_norm": 0.753519594669342, "learning_rate": 1.7871653855274634e-06, "loss": 0.1038360595703125, "step": 5326 }, { "epoch": 0.7422838430990036, "grad_norm": 0.7593231201171875, "learning_rate": 1.7853599051598658e-06, "loss": 0.08726119995117188, "step": 5327 }, { "epoch": 0.7424231867902181, "grad_norm": 0.6798810362815857, "learning_rate": 1.7835551390119033e-06, "loss": 0.09475135803222656, "step": 5328 }, { "epoch": 0.7425625304814325, "grad_norm": 0.7144794464111328, "learning_rate": 1.7817510874845585e-06, "loss": 0.09998893737792969, "step": 5329 }, { "epoch": 0.7427018741726469, "grad_norm": 0.8336343765258789, "learning_rate": 1.779947750978646e-06, "loss": 0.09072113037109375, "step": 5330 }, { "epoch": 0.7428412178638613, "grad_norm": 0.5368489027023315, "learning_rate": 1.7781451298948305e-06, "loss": 0.08919143676757812, "step": 5331 }, { "epoch": 0.7429805615550756, "grad_norm": 0.6739060878753662, "learning_rate": 1.7763432246336087e-06, "loss": 0.101226806640625, "step": 5332 }, { "epoch": 0.74311990524629, "grad_norm": 0.6101112961769104, "learning_rate": 1.7745420355953253e-06, "loss": 0.09202957153320312, "step": 5333 }, { "epoch": 0.7432592489375044, "grad_norm": 0.6126689314842224, "learning_rate": 1.7727415631801648e-06, "loss": 0.07265853881835938, "step": 5334 }, { "epoch": 0.7433985926287188, "grad_norm": 1.176163673400879, "learning_rate": 1.7709418077881495e-06, "loss": 0.09452056884765625, "step": 5335 }, { "epoch": 0.7435379363199331, "grad_norm": 1.0870904922485352, "learning_rate": 1.7691427698191422e-06, "loss": 0.10834884643554688, "step": 5336 }, { "epoch": 0.7436772800111475, "grad_norm": 1.4852747917175293, "learning_rate": 1.7673444496728493e-06, "loss": 0.1101083755493164, "step": 5337 }, { "epoch": 0.7438166237023619, "grad_norm": 0.9493586421012878, "learning_rate": 1.7655468477488191e-06, "loss": 0.11075782775878906, "step": 5338 }, { "epoch": 0.7439559673935763, "grad_norm": 1.377183437347412, "learning_rate": 1.763749964446435e-06, "loss": 0.11799240112304688, "step": 5339 }, { "epoch": 0.7440953110847907, "grad_norm": 0.8056616187095642, "learning_rate": 1.7619538001649228e-06, "loss": 0.0846710205078125, "step": 5340 }, { "epoch": 0.744234654776005, "grad_norm": 1.267351746559143, "learning_rate": 1.7601583553033502e-06, "loss": 0.11300468444824219, "step": 5341 }, { "epoch": 0.7443739984672194, "grad_norm": 0.8614610433578491, "learning_rate": 1.7583636302606254e-06, "loss": 0.09548377990722656, "step": 5342 }, { "epoch": 0.7445133421584338, "grad_norm": 1.1139081716537476, "learning_rate": 1.756569625435493e-06, "loss": 0.10924720764160156, "step": 5343 }, { "epoch": 0.7446526858496482, "grad_norm": 1.6049326658248901, "learning_rate": 1.7547763412265412e-06, "loss": 0.12920570373535156, "step": 5344 }, { "epoch": 0.7447920295408625, "grad_norm": 1.7079614400863647, "learning_rate": 1.7529837780321979e-06, "loss": 0.13689041137695312, "step": 5345 }, { "epoch": 0.7449313732320769, "grad_norm": 0.38030895590782166, "learning_rate": 1.751191936250729e-06, "loss": 0.06818771362304688, "step": 5346 }, { "epoch": 0.7450707169232913, "grad_norm": 1.239558458328247, "learning_rate": 1.7494008162802378e-06, "loss": 0.11045265197753906, "step": 5347 }, { "epoch": 0.7452100606145057, "grad_norm": 1.3588374853134155, "learning_rate": 1.7476104185186737e-06, "loss": 0.13154029846191406, "step": 5348 }, { "epoch": 0.74534940430572, "grad_norm": 0.99973064661026, "learning_rate": 1.7458207433638225e-06, "loss": 0.0979766845703125, "step": 5349 }, { "epoch": 0.7454887479969344, "grad_norm": 1.4470545053482056, "learning_rate": 1.7440317912133076e-06, "loss": 0.10342216491699219, "step": 5350 }, { "epoch": 0.7456280916881488, "grad_norm": 1.2484625577926636, "learning_rate": 1.7422435624645928e-06, "loss": 0.10358047485351562, "step": 5351 }, { "epoch": 0.7457674353793632, "grad_norm": 0.7681310176849365, "learning_rate": 1.7404560575149821e-06, "loss": 0.09066009521484375, "step": 5352 }, { "epoch": 0.7459067790705776, "grad_norm": 0.49550992250442505, "learning_rate": 1.7386692767616204e-06, "loss": 0.07820892333984375, "step": 5353 }, { "epoch": 0.7460461227617919, "grad_norm": 1.1105304956436157, "learning_rate": 1.7368832206014863e-06, "loss": 0.11348342895507812, "step": 5354 }, { "epoch": 0.7461854664530063, "grad_norm": 0.6713313460350037, "learning_rate": 1.735097889431404e-06, "loss": 0.0746011734008789, "step": 5355 }, { "epoch": 0.7463248101442207, "grad_norm": 0.959757924079895, "learning_rate": 1.733313283648032e-06, "loss": 0.09707450866699219, "step": 5356 }, { "epoch": 0.7464641538354351, "grad_norm": 0.7767303586006165, "learning_rate": 1.7315294036478664e-06, "loss": 0.09124565124511719, "step": 5357 }, { "epoch": 0.7466034975266495, "grad_norm": 0.7416813969612122, "learning_rate": 1.7297462498272476e-06, "loss": 0.10146522521972656, "step": 5358 }, { "epoch": 0.7467428412178638, "grad_norm": 0.8318707346916199, "learning_rate": 1.727963822582352e-06, "loss": 0.09490585327148438, "step": 5359 }, { "epoch": 0.7468821849090782, "grad_norm": 1.2283968925476074, "learning_rate": 1.7261821223091918e-06, "loss": 0.11171436309814453, "step": 5360 }, { "epoch": 0.7470215286002926, "grad_norm": 1.0124337673187256, "learning_rate": 1.7244011494036228e-06, "loss": 0.11637115478515625, "step": 5361 }, { "epoch": 0.747160872291507, "grad_norm": 1.2139263153076172, "learning_rate": 1.722620904261334e-06, "loss": 0.09758377075195312, "step": 5362 }, { "epoch": 0.7473002159827213, "grad_norm": 1.084000587463379, "learning_rate": 1.720841387277858e-06, "loss": 0.11930274963378906, "step": 5363 }, { "epoch": 0.7474395596739357, "grad_norm": 0.7295516729354858, "learning_rate": 1.7190625988485593e-06, "loss": 0.0776214599609375, "step": 5364 }, { "epoch": 0.7475789033651501, "grad_norm": 1.6768863201141357, "learning_rate": 1.7172845393686465e-06, "loss": 0.13538742065429688, "step": 5365 }, { "epoch": 0.7477182470563645, "grad_norm": 0.41037169098854065, "learning_rate": 1.7155072092331648e-06, "loss": 0.07081794738769531, "step": 5366 }, { "epoch": 0.7478575907475788, "grad_norm": 0.8585652709007263, "learning_rate": 1.7137306088369948e-06, "loss": 0.11037254333496094, "step": 5367 }, { "epoch": 0.7479969344387933, "grad_norm": 2.686375617980957, "learning_rate": 1.7119547385748552e-06, "loss": 0.17756271362304688, "step": 5368 }, { "epoch": 0.7481362781300077, "grad_norm": 1.4900089502334595, "learning_rate": 1.7101795988413056e-06, "loss": 0.10959911346435547, "step": 5369 }, { "epoch": 0.7482756218212221, "grad_norm": 0.7843888998031616, "learning_rate": 1.708405190030743e-06, "loss": 0.07401275634765625, "step": 5370 }, { "epoch": 0.7484149655124365, "grad_norm": 0.732657253742218, "learning_rate": 1.7066315125373984e-06, "loss": 0.11419868469238281, "step": 5371 }, { "epoch": 0.7485543092036508, "grad_norm": 1.013064980506897, "learning_rate": 1.7048585667553414e-06, "loss": 0.09157943725585938, "step": 5372 }, { "epoch": 0.7486936528948652, "grad_norm": 0.8174221515655518, "learning_rate": 1.7030863530784814e-06, "loss": 0.10547828674316406, "step": 5373 }, { "epoch": 0.7488329965860796, "grad_norm": 0.6628208160400391, "learning_rate": 1.7013148719005652e-06, "loss": 0.08314704895019531, "step": 5374 }, { "epoch": 0.748972340277294, "grad_norm": 0.6385287046432495, "learning_rate": 1.6995441236151732e-06, "loss": 0.09206771850585938, "step": 5375 }, { "epoch": 0.7491116839685084, "grad_norm": 0.838553249835968, "learning_rate": 1.6977741086157273e-06, "loss": 0.09335899353027344, "step": 5376 }, { "epoch": 0.7492510276597227, "grad_norm": 0.3371921181678772, "learning_rate": 1.6960048272954821e-06, "loss": 0.05524873733520508, "step": 5377 }, { "epoch": 0.7493903713509371, "grad_norm": 1.3580540418624878, "learning_rate": 1.6942362800475343e-06, "loss": 0.09428977966308594, "step": 5378 }, { "epoch": 0.7495297150421515, "grad_norm": 0.5332575440406799, "learning_rate": 1.6924684672648117e-06, "loss": 0.07366180419921875, "step": 5379 }, { "epoch": 0.7496690587333659, "grad_norm": 1.0779242515563965, "learning_rate": 1.6907013893400838e-06, "loss": 0.11074066162109375, "step": 5380 }, { "epoch": 0.7498084024245802, "grad_norm": 0.44665607810020447, "learning_rate": 1.6889350466659554e-06, "loss": 0.070068359375, "step": 5381 }, { "epoch": 0.7499477461157946, "grad_norm": 0.7247703671455383, "learning_rate": 1.687169439634867e-06, "loss": 0.08498668670654297, "step": 5382 }, { "epoch": 0.750087089807009, "grad_norm": 1.7624409198760986, "learning_rate": 1.6854045686390947e-06, "loss": 0.16098785400390625, "step": 5383 }, { "epoch": 0.7502264334982234, "grad_norm": 0.9724414348602295, "learning_rate": 1.6836404340707535e-06, "loss": 0.08379936218261719, "step": 5384 }, { "epoch": 0.7503657771894378, "grad_norm": 0.7421772480010986, "learning_rate": 1.6818770363217957e-06, "loss": 0.07422637939453125, "step": 5385 }, { "epoch": 0.7505051208806521, "grad_norm": 1.2032305002212524, "learning_rate": 1.6801143757840043e-06, "loss": 0.09521102905273438, "step": 5386 }, { "epoch": 0.7506444645718665, "grad_norm": 0.502497136592865, "learning_rate": 1.678352452849007e-06, "loss": 0.06769752502441406, "step": 5387 }, { "epoch": 0.7507838082630809, "grad_norm": 0.8246985077857971, "learning_rate": 1.6765912679082592e-06, "loss": 0.08379840850830078, "step": 5388 }, { "epoch": 0.7509231519542953, "grad_norm": 1.804659128189087, "learning_rate": 1.6748308213530555e-06, "loss": 0.10778617858886719, "step": 5389 }, { "epoch": 0.7510624956455096, "grad_norm": 0.9757303595542908, "learning_rate": 1.6730711135745287e-06, "loss": 0.08704376220703125, "step": 5390 }, { "epoch": 0.751201839336724, "grad_norm": 0.42289504408836365, "learning_rate": 1.6713121449636471e-06, "loss": 0.07113838195800781, "step": 5391 }, { "epoch": 0.7513411830279384, "grad_norm": 1.4925047159194946, "learning_rate": 1.6695539159112112e-06, "loss": 0.1518878936767578, "step": 5392 }, { "epoch": 0.7514805267191528, "grad_norm": 0.8749409914016724, "learning_rate": 1.6677964268078584e-06, "loss": 0.08867645263671875, "step": 5393 }, { "epoch": 0.7516198704103672, "grad_norm": 0.4283216893672943, "learning_rate": 1.666039678044064e-06, "loss": 0.07566261291503906, "step": 5394 }, { "epoch": 0.7517592141015815, "grad_norm": 0.8120799660682678, "learning_rate": 1.6642836700101396e-06, "loss": 0.09807586669921875, "step": 5395 }, { "epoch": 0.7518985577927959, "grad_norm": 0.6478936076164246, "learning_rate": 1.6625284030962257e-06, "loss": 0.0800628662109375, "step": 5396 }, { "epoch": 0.7520379014840103, "grad_norm": 1.1308578252792358, "learning_rate": 1.6607738776923072e-06, "loss": 0.09593963623046875, "step": 5397 }, { "epoch": 0.7521772451752247, "grad_norm": 1.3617417812347412, "learning_rate": 1.659020094188195e-06, "loss": 0.1226339340209961, "step": 5398 }, { "epoch": 0.752316588866439, "grad_norm": 0.5389484167098999, "learning_rate": 1.657267052973544e-06, "loss": 0.06878852844238281, "step": 5399 }, { "epoch": 0.7524559325576534, "grad_norm": 0.7586172223091125, "learning_rate": 1.6555147544378364e-06, "loss": 0.10009574890136719, "step": 5400 }, { "epoch": 0.7525952762488678, "grad_norm": 0.7381013631820679, "learning_rate": 1.653763198970394e-06, "loss": 0.08375835418701172, "step": 5401 }, { "epoch": 0.7527346199400822, "grad_norm": 0.8769290447235107, "learning_rate": 1.652012386960375e-06, "loss": 0.10600090026855469, "step": 5402 }, { "epoch": 0.7528739636312965, "grad_norm": 0.7055816054344177, "learning_rate": 1.6502623187967675e-06, "loss": 0.070220947265625, "step": 5403 }, { "epoch": 0.7530133073225109, "grad_norm": 0.9969320297241211, "learning_rate": 1.6485129948683954e-06, "loss": 0.09353065490722656, "step": 5404 }, { "epoch": 0.7531526510137253, "grad_norm": 1.1025124788284302, "learning_rate": 1.64676441556392e-06, "loss": 0.08372688293457031, "step": 5405 }, { "epoch": 0.7532919947049397, "grad_norm": 0.576245903968811, "learning_rate": 1.6450165812718377e-06, "loss": 0.0858917236328125, "step": 5406 }, { "epoch": 0.7534313383961541, "grad_norm": 0.6182284355163574, "learning_rate": 1.643269492380473e-06, "loss": 0.09709739685058594, "step": 5407 }, { "epoch": 0.7535706820873685, "grad_norm": 0.4386487603187561, "learning_rate": 1.6415231492779942e-06, "loss": 0.06860542297363281, "step": 5408 }, { "epoch": 0.7537100257785829, "grad_norm": 0.6935343146324158, "learning_rate": 1.6397775523523946e-06, "loss": 0.07787322998046875, "step": 5409 }, { "epoch": 0.7538493694697973, "grad_norm": 0.8797628283500671, "learning_rate": 1.6380327019915088e-06, "loss": 0.08823585510253906, "step": 5410 }, { "epoch": 0.7539887131610117, "grad_norm": 1.2650768756866455, "learning_rate": 1.6362885985830001e-06, "loss": 0.1016845703125, "step": 5411 }, { "epoch": 0.7541280568522261, "grad_norm": 0.6499858498573303, "learning_rate": 1.6345452425143705e-06, "loss": 0.07074356079101562, "step": 5412 }, { "epoch": 0.7542674005434404, "grad_norm": 1.005212426185608, "learning_rate": 1.6328026341729547e-06, "loss": 0.10367012023925781, "step": 5413 }, { "epoch": 0.7544067442346548, "grad_norm": 0.9682257175445557, "learning_rate": 1.6310607739459188e-06, "loss": 0.10587882995605469, "step": 5414 }, { "epoch": 0.7545460879258692, "grad_norm": 1.8394047021865845, "learning_rate": 1.6293196622202635e-06, "loss": 0.14856719970703125, "step": 5415 }, { "epoch": 0.7546854316170836, "grad_norm": 1.6533085107803345, "learning_rate": 1.6275792993828249e-06, "loss": 0.1047210693359375, "step": 5416 }, { "epoch": 0.754824775308298, "grad_norm": 0.7311619520187378, "learning_rate": 1.6258396858202746e-06, "loss": 0.09606742858886719, "step": 5417 }, { "epoch": 0.7549641189995123, "grad_norm": 0.41806846857070923, "learning_rate": 1.6241008219191107e-06, "loss": 0.06737613677978516, "step": 5418 }, { "epoch": 0.7551034626907267, "grad_norm": 0.6869854927062988, "learning_rate": 1.622362708065673e-06, "loss": 0.07767486572265625, "step": 5419 }, { "epoch": 0.7552428063819411, "grad_norm": 1.2493972778320312, "learning_rate": 1.6206253446461278e-06, "loss": 0.11208534240722656, "step": 5420 }, { "epoch": 0.7553821500731555, "grad_norm": 0.7289895415306091, "learning_rate": 1.618888732046478e-06, "loss": 0.0860757827758789, "step": 5421 }, { "epoch": 0.7555214937643698, "grad_norm": 1.3830054998397827, "learning_rate": 1.6171528706525596e-06, "loss": 0.11406326293945312, "step": 5422 }, { "epoch": 0.7556608374555842, "grad_norm": 0.5152040719985962, "learning_rate": 1.6154177608500415e-06, "loss": 0.07373619079589844, "step": 5423 }, { "epoch": 0.7558001811467986, "grad_norm": 1.4573668241500854, "learning_rate": 1.6136834030244292e-06, "loss": 0.1355733871459961, "step": 5424 }, { "epoch": 0.755939524838013, "grad_norm": 1.0065234899520874, "learning_rate": 1.61194979756105e-06, "loss": 0.10677719116210938, "step": 5425 }, { "epoch": 0.7560788685292273, "grad_norm": 0.5993707180023193, "learning_rate": 1.6102169448450756e-06, "loss": 0.09101295471191406, "step": 5426 }, { "epoch": 0.7562182122204417, "grad_norm": 0.48354119062423706, "learning_rate": 1.6084848452615076e-06, "loss": 0.06707382202148438, "step": 5427 }, { "epoch": 0.7563575559116561, "grad_norm": 0.6357414126396179, "learning_rate": 1.6067534991951754e-06, "loss": 0.08536338806152344, "step": 5428 }, { "epoch": 0.7564968996028705, "grad_norm": 1.0894298553466797, "learning_rate": 1.6050229070307488e-06, "loss": 0.11353492736816406, "step": 5429 }, { "epoch": 0.7566362432940849, "grad_norm": 1.2499582767486572, "learning_rate": 1.6032930691527214e-06, "loss": 0.10374164581298828, "step": 5430 }, { "epoch": 0.7567755869852992, "grad_norm": 1.0264135599136353, "learning_rate": 1.6015639859454278e-06, "loss": 0.09646224975585938, "step": 5431 }, { "epoch": 0.7569149306765136, "grad_norm": 1.0734357833862305, "learning_rate": 1.5998356577930274e-06, "loss": 0.10529518127441406, "step": 5432 }, { "epoch": 0.757054274367728, "grad_norm": 1.50846266746521, "learning_rate": 1.5981080850795171e-06, "loss": 0.12991905212402344, "step": 5433 }, { "epoch": 0.7571936180589424, "grad_norm": 0.601259171962738, "learning_rate": 1.5963812681887248e-06, "loss": 0.08218765258789062, "step": 5434 }, { "epoch": 0.7573329617501567, "grad_norm": 0.6500915884971619, "learning_rate": 1.5946552075043092e-06, "loss": 0.08340835571289062, "step": 5435 }, { "epoch": 0.7574723054413711, "grad_norm": 0.9857394099235535, "learning_rate": 1.592929903409759e-06, "loss": 0.11584854125976562, "step": 5436 }, { "epoch": 0.7576116491325855, "grad_norm": 1.9855384826660156, "learning_rate": 1.5912053562884e-06, "loss": 0.14066314697265625, "step": 5437 }, { "epoch": 0.7577509928237999, "grad_norm": 0.9623340964317322, "learning_rate": 1.589481566523388e-06, "loss": 0.10153388977050781, "step": 5438 }, { "epoch": 0.7578903365150143, "grad_norm": 0.6862382292747498, "learning_rate": 1.587758534497707e-06, "loss": 0.08093070983886719, "step": 5439 }, { "epoch": 0.7580296802062286, "grad_norm": 0.4706345200538635, "learning_rate": 1.5860362605941788e-06, "loss": 0.05980873107910156, "step": 5440 }, { "epoch": 0.758169023897443, "grad_norm": 1.1014152765274048, "learning_rate": 1.5843147451954493e-06, "loss": 0.10384941101074219, "step": 5441 }, { "epoch": 0.7583083675886574, "grad_norm": 1.106798529624939, "learning_rate": 1.5825939886840036e-06, "loss": 0.14406967163085938, "step": 5442 }, { "epoch": 0.7584477112798718, "grad_norm": 0.8048821091651917, "learning_rate": 1.5808739914421512e-06, "loss": 0.11635398864746094, "step": 5443 }, { "epoch": 0.7585870549710861, "grad_norm": 1.0160887241363525, "learning_rate": 1.5791547538520386e-06, "loss": 0.0850229263305664, "step": 5444 }, { "epoch": 0.7587263986623005, "grad_norm": 0.4835474491119385, "learning_rate": 1.5774362762956414e-06, "loss": 0.07576370239257812, "step": 5445 }, { "epoch": 0.7588657423535149, "grad_norm": 1.552894949913025, "learning_rate": 1.5757185591547653e-06, "loss": 0.11892318725585938, "step": 5446 }, { "epoch": 0.7590050860447293, "grad_norm": 0.8699231147766113, "learning_rate": 1.574001602811046e-06, "loss": 0.07613945007324219, "step": 5447 }, { "epoch": 0.7591444297359438, "grad_norm": 0.3903055489063263, "learning_rate": 1.5722854076459538e-06, "loss": 0.064361572265625, "step": 5448 }, { "epoch": 0.7592837734271581, "grad_norm": 1.2148369550704956, "learning_rate": 1.57056997404079e-06, "loss": 0.10884857177734375, "step": 5449 }, { "epoch": 0.7594231171183725, "grad_norm": 1.1126123666763306, "learning_rate": 1.5688553023766823e-06, "loss": 0.12281227111816406, "step": 5450 }, { "epoch": 0.7595624608095869, "grad_norm": 1.0495283603668213, "learning_rate": 1.5671413930345902e-06, "loss": 0.09147834777832031, "step": 5451 }, { "epoch": 0.7597018045008013, "grad_norm": 1.0609420537948608, "learning_rate": 1.5654282463953074e-06, "loss": 0.10765647888183594, "step": 5452 }, { "epoch": 0.7598411481920156, "grad_norm": 1.2320431470870972, "learning_rate": 1.5637158628394572e-06, "loss": 0.11595726013183594, "step": 5453 }, { "epoch": 0.75998049188323, "grad_norm": 2.6980652809143066, "learning_rate": 1.5620042427474892e-06, "loss": 0.14202499389648438, "step": 5454 }, { "epoch": 0.7601198355744444, "grad_norm": 0.9019707441329956, "learning_rate": 1.5602933864996872e-06, "loss": 0.11375808715820312, "step": 5455 }, { "epoch": 0.7602591792656588, "grad_norm": 0.43940332531929016, "learning_rate": 1.5585832944761686e-06, "loss": 0.072906494140625, "step": 5456 }, { "epoch": 0.7603985229568732, "grad_norm": 0.8503431677818298, "learning_rate": 1.5568739670568693e-06, "loss": 0.10375022888183594, "step": 5457 }, { "epoch": 0.7605378666480875, "grad_norm": 0.46168872714042664, "learning_rate": 1.555165404621567e-06, "loss": 0.06317138671875, "step": 5458 }, { "epoch": 0.7606772103393019, "grad_norm": 0.7011496424674988, "learning_rate": 1.5534576075498664e-06, "loss": 0.08354759216308594, "step": 5459 }, { "epoch": 0.7608165540305163, "grad_norm": 0.4908219277858734, "learning_rate": 1.5517505762211982e-06, "loss": 0.07127904891967773, "step": 5460 }, { "epoch": 0.7609558977217307, "grad_norm": 0.45828700065612793, "learning_rate": 1.5500443110148283e-06, "loss": 0.07832527160644531, "step": 5461 }, { "epoch": 0.761095241412945, "grad_norm": 0.9930283427238464, "learning_rate": 1.5483388123098474e-06, "loss": 0.12021636962890625, "step": 5462 }, { "epoch": 0.7612345851041594, "grad_norm": 0.972962498664856, "learning_rate": 1.546634080485181e-06, "loss": 0.08769798278808594, "step": 5463 }, { "epoch": 0.7613739287953738, "grad_norm": 0.8129028677940369, "learning_rate": 1.5449301159195785e-06, "loss": 0.10512542724609375, "step": 5464 }, { "epoch": 0.7615132724865882, "grad_norm": 0.847463846206665, "learning_rate": 1.5432269189916237e-06, "loss": 0.07881355285644531, "step": 5465 }, { "epoch": 0.7616526161778026, "grad_norm": 0.5594891905784607, "learning_rate": 1.54152449007973e-06, "loss": 0.06702423095703125, "step": 5466 }, { "epoch": 0.7617919598690169, "grad_norm": 0.7244643568992615, "learning_rate": 1.539822829562136e-06, "loss": 0.07637214660644531, "step": 5467 }, { "epoch": 0.7619313035602313, "grad_norm": 0.8799853920936584, "learning_rate": 1.5381219378169103e-06, "loss": 0.08585357666015625, "step": 5468 }, { "epoch": 0.7620706472514457, "grad_norm": 1.3514212369918823, "learning_rate": 1.5364218152219545e-06, "loss": 0.13495445251464844, "step": 5469 }, { "epoch": 0.7622099909426601, "grad_norm": 0.6872397661209106, "learning_rate": 1.5347224621549978e-06, "loss": 0.07487869262695312, "step": 5470 }, { "epoch": 0.7623493346338744, "grad_norm": 1.1263136863708496, "learning_rate": 1.5330238789935963e-06, "loss": 0.10529327392578125, "step": 5471 }, { "epoch": 0.7624886783250888, "grad_norm": 0.8139039874076843, "learning_rate": 1.5313260661151352e-06, "loss": 0.08182811737060547, "step": 5472 }, { "epoch": 0.7626280220163032, "grad_norm": 0.568324625492096, "learning_rate": 1.5296290238968303e-06, "loss": 0.07968807220458984, "step": 5473 }, { "epoch": 0.7627673657075176, "grad_norm": 0.700274646282196, "learning_rate": 1.5279327527157289e-06, "loss": 0.087677001953125, "step": 5474 }, { "epoch": 0.762906709398732, "grad_norm": 0.7365020513534546, "learning_rate": 1.526237252948699e-06, "loss": 0.08681678771972656, "step": 5475 }, { "epoch": 0.7630460530899463, "grad_norm": 1.2590696811676025, "learning_rate": 1.5245425249724443e-06, "loss": 0.11998939514160156, "step": 5476 }, { "epoch": 0.7631853967811607, "grad_norm": 1.0049984455108643, "learning_rate": 1.5228485691634964e-06, "loss": 0.08215951919555664, "step": 5477 }, { "epoch": 0.7633247404723751, "grad_norm": 1.1076706647872925, "learning_rate": 1.5211553858982115e-06, "loss": 0.1003265380859375, "step": 5478 }, { "epoch": 0.7634640841635895, "grad_norm": 0.7213104963302612, "learning_rate": 1.5194629755527746e-06, "loss": 0.09783744812011719, "step": 5479 }, { "epoch": 0.7636034278548038, "grad_norm": 0.7350333333015442, "learning_rate": 1.517771338503203e-06, "loss": 0.07852363586425781, "step": 5480 }, { "epoch": 0.7637427715460182, "grad_norm": 0.892251193523407, "learning_rate": 1.5160804751253405e-06, "loss": 0.08670234680175781, "step": 5481 }, { "epoch": 0.7638821152372326, "grad_norm": 0.8004680871963501, "learning_rate": 1.5143903857948572e-06, "loss": 0.09433174133300781, "step": 5482 }, { "epoch": 0.764021458928447, "grad_norm": 0.7669534683227539, "learning_rate": 1.5127010708872513e-06, "loss": 0.0965728759765625, "step": 5483 }, { "epoch": 0.7641608026196614, "grad_norm": 0.5892196893692017, "learning_rate": 1.5110125307778506e-06, "loss": 0.08372688293457031, "step": 5484 }, { "epoch": 0.7643001463108757, "grad_norm": 0.6679248213768005, "learning_rate": 1.5093247658418125e-06, "loss": 0.07973480224609375, "step": 5485 }, { "epoch": 0.7644394900020901, "grad_norm": 1.15636146068573, "learning_rate": 1.5076377764541162e-06, "loss": 0.10927391052246094, "step": 5486 }, { "epoch": 0.7645788336933045, "grad_norm": 0.8897743225097656, "learning_rate": 1.5059515629895754e-06, "loss": 0.10618019104003906, "step": 5487 }, { "epoch": 0.764718177384519, "grad_norm": 0.7096830010414124, "learning_rate": 1.5042661258228268e-06, "loss": 0.10196876525878906, "step": 5488 }, { "epoch": 0.7648575210757333, "grad_norm": 1.2973713874816895, "learning_rate": 1.502581465328335e-06, "loss": 0.11402511596679688, "step": 5489 }, { "epoch": 0.7649968647669477, "grad_norm": 1.4986166954040527, "learning_rate": 1.5008975818803939e-06, "loss": 0.10010242462158203, "step": 5490 }, { "epoch": 0.7651362084581621, "grad_norm": 0.6324976086616516, "learning_rate": 1.4992144758531257e-06, "loss": 0.07213401794433594, "step": 5491 }, { "epoch": 0.7652755521493765, "grad_norm": 1.0775635242462158, "learning_rate": 1.4975321476204767e-06, "loss": 0.08790779113769531, "step": 5492 }, { "epoch": 0.7654148958405909, "grad_norm": 0.5055960416793823, "learning_rate": 1.4958505975562205e-06, "loss": 0.081787109375, "step": 5493 }, { "epoch": 0.7655542395318052, "grad_norm": 0.8231300115585327, "learning_rate": 1.49416982603396e-06, "loss": 0.08276176452636719, "step": 5494 }, { "epoch": 0.7656935832230196, "grad_norm": 0.967821478843689, "learning_rate": 1.4924898334271265e-06, "loss": 0.08168220520019531, "step": 5495 }, { "epoch": 0.765832926914234, "grad_norm": 0.8216500878334045, "learning_rate": 1.4908106201089722e-06, "loss": 0.097015380859375, "step": 5496 }, { "epoch": 0.7659722706054484, "grad_norm": 0.6730252504348755, "learning_rate": 1.4891321864525826e-06, "loss": 0.09708213806152344, "step": 5497 }, { "epoch": 0.7661116142966627, "grad_norm": 0.6914403438568115, "learning_rate": 1.4874545328308681e-06, "loss": 0.08867549896240234, "step": 5498 }, { "epoch": 0.7662509579878771, "grad_norm": 0.7530841827392578, "learning_rate": 1.4857776596165635e-06, "loss": 0.08846473693847656, "step": 5499 }, { "epoch": 0.7663903016790915, "grad_norm": 0.5703619122505188, "learning_rate": 1.4841015671822306e-06, "loss": 0.06656646728515625, "step": 5500 }, { "epoch": 0.7665296453703059, "grad_norm": 1.2171090841293335, "learning_rate": 1.4824262559002595e-06, "loss": 0.10319042205810547, "step": 5501 }, { "epoch": 0.7666689890615203, "grad_norm": 1.1510170698165894, "learning_rate": 1.480751726142869e-06, "loss": 0.12730026245117188, "step": 5502 }, { "epoch": 0.7668083327527346, "grad_norm": 0.32383182644844055, "learning_rate": 1.4790779782820991e-06, "loss": 0.05948066711425781, "step": 5503 }, { "epoch": 0.766947676443949, "grad_norm": 0.9556418657302856, "learning_rate": 1.4774050126898164e-06, "loss": 0.11659431457519531, "step": 5504 }, { "epoch": 0.7670870201351634, "grad_norm": 0.6057087182998657, "learning_rate": 1.4757328297377177e-06, "loss": 0.07503700256347656, "step": 5505 }, { "epoch": 0.7672263638263778, "grad_norm": 1.0557941198349, "learning_rate": 1.474061429797326e-06, "loss": 0.08807563781738281, "step": 5506 }, { "epoch": 0.7673657075175921, "grad_norm": 0.6485729217529297, "learning_rate": 1.4723908132399838e-06, "loss": 0.0823211669921875, "step": 5507 }, { "epoch": 0.7675050512088065, "grad_norm": 0.5722631216049194, "learning_rate": 1.4707209804368683e-06, "loss": 0.07328987121582031, "step": 5508 }, { "epoch": 0.7676443949000209, "grad_norm": 0.4298166334629059, "learning_rate": 1.4690519317589742e-06, "loss": 0.06587696075439453, "step": 5509 }, { "epoch": 0.7677837385912353, "grad_norm": 0.614833652973175, "learning_rate": 1.4673836675771298e-06, "loss": 0.09412574768066406, "step": 5510 }, { "epoch": 0.7679230822824497, "grad_norm": 0.8309850692749023, "learning_rate": 1.4657161882619814e-06, "loss": 0.07724761962890625, "step": 5511 }, { "epoch": 0.768062425973664, "grad_norm": 0.9127821326255798, "learning_rate": 1.4640494941840072e-06, "loss": 0.10749435424804688, "step": 5512 }, { "epoch": 0.7682017696648784, "grad_norm": 1.1100696325302124, "learning_rate": 1.4623835857135099e-06, "loss": 0.09894084930419922, "step": 5513 }, { "epoch": 0.7683411133560928, "grad_norm": 1.083152413368225, "learning_rate": 1.460718463220615e-06, "loss": 0.11925601959228516, "step": 5514 }, { "epoch": 0.7684804570473072, "grad_norm": 1.1768956184387207, "learning_rate": 1.4590541270752723e-06, "loss": 0.10203266143798828, "step": 5515 }, { "epoch": 0.7686198007385215, "grad_norm": 0.574420690536499, "learning_rate": 1.457390577647262e-06, "loss": 0.07454967498779297, "step": 5516 }, { "epoch": 0.7687591444297359, "grad_norm": 0.45370790362358093, "learning_rate": 1.455727815306187e-06, "loss": 0.06433486938476562, "step": 5517 }, { "epoch": 0.7688984881209503, "grad_norm": 0.8980273008346558, "learning_rate": 1.454065840421473e-06, "loss": 0.09533262252807617, "step": 5518 }, { "epoch": 0.7690378318121647, "grad_norm": 1.2638646364212036, "learning_rate": 1.4524046533623758e-06, "loss": 0.10701465606689453, "step": 5519 }, { "epoch": 0.769177175503379, "grad_norm": 1.0012753009796143, "learning_rate": 1.450744254497972e-06, "loss": 0.09804725646972656, "step": 5520 }, { "epoch": 0.7693165191945934, "grad_norm": 0.8193982243537903, "learning_rate": 1.4490846441971624e-06, "loss": 0.09635162353515625, "step": 5521 }, { "epoch": 0.7694558628858078, "grad_norm": 1.0420712232589722, "learning_rate": 1.4474258228286758e-06, "loss": 0.11462593078613281, "step": 5522 }, { "epoch": 0.7695952065770222, "grad_norm": 0.4110924303531647, "learning_rate": 1.4457677907610646e-06, "loss": 0.07232093811035156, "step": 5523 }, { "epoch": 0.7697345502682366, "grad_norm": 0.7269861698150635, "learning_rate": 1.4441105483627088e-06, "loss": 0.08271312713623047, "step": 5524 }, { "epoch": 0.7698738939594509, "grad_norm": 0.5953211188316345, "learning_rate": 1.442454096001804e-06, "loss": 0.07338762283325195, "step": 5525 }, { "epoch": 0.7700132376506653, "grad_norm": 1.4972463846206665, "learning_rate": 1.4407984340463794e-06, "loss": 0.1479334831237793, "step": 5526 }, { "epoch": 0.7701525813418797, "grad_norm": 0.9784947633743286, "learning_rate": 1.4391435628642853e-06, "loss": 0.09186887741088867, "step": 5527 }, { "epoch": 0.7702919250330941, "grad_norm": 0.8988018035888672, "learning_rate": 1.437489482823195e-06, "loss": 0.09755897521972656, "step": 5528 }, { "epoch": 0.7704312687243086, "grad_norm": 1.0757200717926025, "learning_rate": 1.4358361942906097e-06, "loss": 0.09662055969238281, "step": 5529 }, { "epoch": 0.7705706124155229, "grad_norm": 1.1391220092773438, "learning_rate": 1.4341836976338485e-06, "loss": 0.10678863525390625, "step": 5530 }, { "epoch": 0.7707099561067373, "grad_norm": 1.0850940942764282, "learning_rate": 1.4325319932200631e-06, "loss": 0.08594226837158203, "step": 5531 }, { "epoch": 0.7708492997979517, "grad_norm": 0.9387881755828857, "learning_rate": 1.43088108141622e-06, "loss": 0.10865974426269531, "step": 5532 }, { "epoch": 0.7709886434891661, "grad_norm": 0.5384645462036133, "learning_rate": 1.4292309625891166e-06, "loss": 0.08658027648925781, "step": 5533 }, { "epoch": 0.7711279871803804, "grad_norm": 0.8594463467597961, "learning_rate": 1.4275816371053725e-06, "loss": 0.09524345397949219, "step": 5534 }, { "epoch": 0.7712673308715948, "grad_norm": 0.4832078516483307, "learning_rate": 1.425933105331429e-06, "loss": 0.07425117492675781, "step": 5535 }, { "epoch": 0.7714066745628092, "grad_norm": 1.5909250974655151, "learning_rate": 1.424285367633551e-06, "loss": 0.11996269226074219, "step": 5536 }, { "epoch": 0.7715460182540236, "grad_norm": 0.9216828346252441, "learning_rate": 1.422638424377829e-06, "loss": 0.11264801025390625, "step": 5537 }, { "epoch": 0.771685361945238, "grad_norm": 1.1515952348709106, "learning_rate": 1.420992275930178e-06, "loss": 0.09068679809570312, "step": 5538 }, { "epoch": 0.7718247056364523, "grad_norm": 1.0356301069259644, "learning_rate": 1.4193469226563322e-06, "loss": 0.11877632141113281, "step": 5539 }, { "epoch": 0.7719640493276667, "grad_norm": 0.6677746176719666, "learning_rate": 1.4177023649218536e-06, "loss": 0.07951736450195312, "step": 5540 }, { "epoch": 0.7721033930188811, "grad_norm": 0.37164306640625, "learning_rate": 1.4160586030921224e-06, "loss": 0.06481170654296875, "step": 5541 }, { "epoch": 0.7722427367100955, "grad_norm": 0.7623107433319092, "learning_rate": 1.4144156375323486e-06, "loss": 0.09049034118652344, "step": 5542 }, { "epoch": 0.7723820804013098, "grad_norm": 0.6740528345108032, "learning_rate": 1.4127734686075589e-06, "loss": 0.10253524780273438, "step": 5543 }, { "epoch": 0.7725214240925242, "grad_norm": 0.7049416899681091, "learning_rate": 1.411132096682606e-06, "loss": 0.08676338195800781, "step": 5544 }, { "epoch": 0.7726607677837386, "grad_norm": 0.9326992630958557, "learning_rate": 1.4094915221221677e-06, "loss": 0.09440994262695312, "step": 5545 }, { "epoch": 0.772800111474953, "grad_norm": 1.3041750192642212, "learning_rate": 1.4078517452907403e-06, "loss": 0.10755729675292969, "step": 5546 }, { "epoch": 0.7729394551661674, "grad_norm": 0.8310760259628296, "learning_rate": 1.4062127665526438e-06, "loss": 0.07688713073730469, "step": 5547 }, { "epoch": 0.7730787988573817, "grad_norm": 0.6371056437492371, "learning_rate": 1.4045745862720227e-06, "loss": 0.08991622924804688, "step": 5548 }, { "epoch": 0.7732181425485961, "grad_norm": 1.233929991722107, "learning_rate": 1.4029372048128454e-06, "loss": 0.12261962890625, "step": 5549 }, { "epoch": 0.7733574862398105, "grad_norm": 0.7964253425598145, "learning_rate": 1.401300622538897e-06, "loss": 0.08049774169921875, "step": 5550 }, { "epoch": 0.7734968299310249, "grad_norm": 1.02915620803833, "learning_rate": 1.3996648398137924e-06, "loss": 0.11044502258300781, "step": 5551 }, { "epoch": 0.7736361736222392, "grad_norm": 0.4672423005104065, "learning_rate": 1.398029857000962e-06, "loss": 0.07648658752441406, "step": 5552 }, { "epoch": 0.7737755173134536, "grad_norm": 1.0204323530197144, "learning_rate": 1.3963956744636642e-06, "loss": 0.10951995849609375, "step": 5553 }, { "epoch": 0.773914861004668, "grad_norm": 0.505446195602417, "learning_rate": 1.394762292564974e-06, "loss": 0.07403182983398438, "step": 5554 }, { "epoch": 0.7740542046958824, "grad_norm": 0.8453208208084106, "learning_rate": 1.393129711667794e-06, "loss": 0.08240604400634766, "step": 5555 }, { "epoch": 0.7741935483870968, "grad_norm": 0.5492239594459534, "learning_rate": 1.3914979321348488e-06, "loss": 0.07923603057861328, "step": 5556 }, { "epoch": 0.7743328920783111, "grad_norm": 0.6926100850105286, "learning_rate": 1.3898669543286763e-06, "loss": 0.08744049072265625, "step": 5557 }, { "epoch": 0.7744722357695255, "grad_norm": 0.9330621957778931, "learning_rate": 1.3882367786116458e-06, "loss": 0.08180999755859375, "step": 5558 }, { "epoch": 0.7746115794607399, "grad_norm": 0.8905483484268188, "learning_rate": 1.3866074053459465e-06, "loss": 0.090484619140625, "step": 5559 }, { "epoch": 0.7747509231519543, "grad_norm": 0.7935736775398254, "learning_rate": 1.3849788348935856e-06, "loss": 0.0871419906616211, "step": 5560 }, { "epoch": 0.7748902668431686, "grad_norm": 0.560562789440155, "learning_rate": 1.3833510676163963e-06, "loss": 0.07825469970703125, "step": 5561 }, { "epoch": 0.775029610534383, "grad_norm": 0.791007399559021, "learning_rate": 1.3817241038760287e-06, "loss": 0.08854389190673828, "step": 5562 }, { "epoch": 0.7751689542255974, "grad_norm": 0.6230867505073547, "learning_rate": 1.3800979440339602e-06, "loss": 0.08942890167236328, "step": 5563 }, { "epoch": 0.7753082979168118, "grad_norm": 0.8888378739356995, "learning_rate": 1.3784725884514833e-06, "loss": 0.09690666198730469, "step": 5564 }, { "epoch": 0.7754476416080262, "grad_norm": 1.8410804271697998, "learning_rate": 1.3768480374897163e-06, "loss": 0.14726829528808594, "step": 5565 }, { "epoch": 0.7755869852992405, "grad_norm": 1.566748023033142, "learning_rate": 1.3752242915095993e-06, "loss": 0.11635589599609375, "step": 5566 }, { "epoch": 0.7757263289904549, "grad_norm": 0.6299608945846558, "learning_rate": 1.3736013508718892e-06, "loss": 0.08461952209472656, "step": 5567 }, { "epoch": 0.7758656726816693, "grad_norm": 0.7028539180755615, "learning_rate": 1.371979215937166e-06, "loss": 0.07588577270507812, "step": 5568 }, { "epoch": 0.7760050163728838, "grad_norm": 1.060711145401001, "learning_rate": 1.3703578870658312e-06, "loss": 0.11596298217773438, "step": 5569 }, { "epoch": 0.7761443600640981, "grad_norm": 0.7645989656448364, "learning_rate": 1.3687373646181095e-06, "loss": 0.08367729187011719, "step": 5570 }, { "epoch": 0.7762837037553125, "grad_norm": 0.6372326612472534, "learning_rate": 1.3671176489540406e-06, "loss": 0.07866668701171875, "step": 5571 }, { "epoch": 0.7764230474465269, "grad_norm": 1.2547041177749634, "learning_rate": 1.3654987404334917e-06, "loss": 0.1029367446899414, "step": 5572 }, { "epoch": 0.7765623911377413, "grad_norm": 0.46801936626434326, "learning_rate": 1.363880639416144e-06, "loss": 0.0662689208984375, "step": 5573 }, { "epoch": 0.7767017348289557, "grad_norm": 1.2286072969436646, "learning_rate": 1.3622633462615058e-06, "loss": 0.09460067749023438, "step": 5574 }, { "epoch": 0.77684107852017, "grad_norm": 1.1521553993225098, "learning_rate": 1.3606468613288997e-06, "loss": 0.08486175537109375, "step": 5575 }, { "epoch": 0.7769804222113844, "grad_norm": 0.758061945438385, "learning_rate": 1.359031184977473e-06, "loss": 0.09352302551269531, "step": 5576 }, { "epoch": 0.7771197659025988, "grad_norm": 0.7435169816017151, "learning_rate": 1.3574163175661936e-06, "loss": 0.10305309295654297, "step": 5577 }, { "epoch": 0.7772591095938132, "grad_norm": 0.722200870513916, "learning_rate": 1.3558022594538473e-06, "loss": 0.091888427734375, "step": 5578 }, { "epoch": 0.7773984532850275, "grad_norm": 1.6470867395401, "learning_rate": 1.3541890109990386e-06, "loss": 0.13208770751953125, "step": 5579 }, { "epoch": 0.7775377969762419, "grad_norm": 0.7321572303771973, "learning_rate": 1.3525765725601964e-06, "loss": 0.08547210693359375, "step": 5580 }, { "epoch": 0.7776771406674563, "grad_norm": 0.9540656208992004, "learning_rate": 1.3509649444955697e-06, "loss": 0.11142921447753906, "step": 5581 }, { "epoch": 0.7778164843586707, "grad_norm": 2.034698486328125, "learning_rate": 1.3493541271632227e-06, "loss": 0.1302013397216797, "step": 5582 }, { "epoch": 0.7779558280498851, "grad_norm": 0.9053849577903748, "learning_rate": 1.3477441209210418e-06, "loss": 0.09232330322265625, "step": 5583 }, { "epoch": 0.7780951717410994, "grad_norm": 2.095492124557495, "learning_rate": 1.3461349261267347e-06, "loss": 0.10498809814453125, "step": 5584 }, { "epoch": 0.7782345154323138, "grad_norm": 0.6332420706748962, "learning_rate": 1.3445265431378297e-06, "loss": 0.07900428771972656, "step": 5585 }, { "epoch": 0.7783738591235282, "grad_norm": 0.910902738571167, "learning_rate": 1.3429189723116693e-06, "loss": 0.09696006774902344, "step": 5586 }, { "epoch": 0.7785132028147426, "grad_norm": 1.4934289455413818, "learning_rate": 1.3413122140054219e-06, "loss": 0.08469009399414062, "step": 5587 }, { "epoch": 0.7786525465059569, "grad_norm": 0.5571666955947876, "learning_rate": 1.3397062685760715e-06, "loss": 0.084503173828125, "step": 5588 }, { "epoch": 0.7787918901971713, "grad_norm": 0.6887211799621582, "learning_rate": 1.3381011363804208e-06, "loss": 0.08513164520263672, "step": 5589 }, { "epoch": 0.7789312338883857, "grad_norm": 0.9317470192909241, "learning_rate": 1.3364968177750953e-06, "loss": 0.09041404724121094, "step": 5590 }, { "epoch": 0.7790705775796001, "grad_norm": 0.6599379181861877, "learning_rate": 1.3348933131165387e-06, "loss": 0.07337570190429688, "step": 5591 }, { "epoch": 0.7792099212708145, "grad_norm": 0.730413019657135, "learning_rate": 1.333290622761011e-06, "loss": 0.07659339904785156, "step": 5592 }, { "epoch": 0.7793492649620288, "grad_norm": 0.7663999795913696, "learning_rate": 1.3316887470645956e-06, "loss": 0.10039710998535156, "step": 5593 }, { "epoch": 0.7794886086532432, "grad_norm": 0.3937087059020996, "learning_rate": 1.3300876863831903e-06, "loss": 0.06804275512695312, "step": 5594 }, { "epoch": 0.7796279523444576, "grad_norm": 0.959327757358551, "learning_rate": 1.3284874410725174e-06, "loss": 0.10321617126464844, "step": 5595 }, { "epoch": 0.779767296035672, "grad_norm": 0.6759490966796875, "learning_rate": 1.3268880114881112e-06, "loss": 0.09140920639038086, "step": 5596 }, { "epoch": 0.7799066397268863, "grad_norm": 0.49751168489456177, "learning_rate": 1.3252893979853304e-06, "loss": 0.0738067626953125, "step": 5597 }, { "epoch": 0.7800459834181007, "grad_norm": 0.6798591017723083, "learning_rate": 1.3236916009193517e-06, "loss": 0.09089851379394531, "step": 5598 }, { "epoch": 0.7801853271093151, "grad_norm": 0.9646654725074768, "learning_rate": 1.3220946206451678e-06, "loss": 0.09967803955078125, "step": 5599 }, { "epoch": 0.7803246708005295, "grad_norm": 0.6710277795791626, "learning_rate": 1.3204984575175893e-06, "loss": 0.08704948425292969, "step": 5600 }, { "epoch": 0.7804640144917439, "grad_norm": 0.7057552933692932, "learning_rate": 1.31890311189125e-06, "loss": 0.08986568450927734, "step": 5601 }, { "epoch": 0.7806033581829582, "grad_norm": 0.7069456577301025, "learning_rate": 1.317308584120599e-06, "loss": 0.1013031005859375, "step": 5602 }, { "epoch": 0.7807427018741726, "grad_norm": 0.9484853148460388, "learning_rate": 1.3157148745599035e-06, "loss": 0.102386474609375, "step": 5603 }, { "epoch": 0.780882045565387, "grad_norm": 1.4998337030410767, "learning_rate": 1.314121983563248e-06, "loss": 0.11840057373046875, "step": 5604 }, { "epoch": 0.7810213892566014, "grad_norm": 1.1108105182647705, "learning_rate": 1.3125299114845375e-06, "loss": 0.12018966674804688, "step": 5605 }, { "epoch": 0.7811607329478157, "grad_norm": 0.656019926071167, "learning_rate": 1.3109386586774958e-06, "loss": 0.08215904235839844, "step": 5606 }, { "epoch": 0.7813000766390301, "grad_norm": 0.5619896650314331, "learning_rate": 1.3093482254956602e-06, "loss": 0.08661079406738281, "step": 5607 }, { "epoch": 0.7814394203302445, "grad_norm": 0.5372594594955444, "learning_rate": 1.3077586122923896e-06, "loss": 0.0642232894897461, "step": 5608 }, { "epoch": 0.781578764021459, "grad_norm": 0.9687103033065796, "learning_rate": 1.3061698194208616e-06, "loss": 0.11195564270019531, "step": 5609 }, { "epoch": 0.7817181077126734, "grad_norm": 1.1468092203140259, "learning_rate": 1.3045818472340683e-06, "loss": 0.10495185852050781, "step": 5610 }, { "epoch": 0.7818574514038877, "grad_norm": 0.5002973675727844, "learning_rate": 1.3029946960848188e-06, "loss": 0.05782032012939453, "step": 5611 }, { "epoch": 0.7819967950951021, "grad_norm": 1.5202094316482544, "learning_rate": 1.3014083663257443e-06, "loss": 0.12146186828613281, "step": 5612 }, { "epoch": 0.7821361387863165, "grad_norm": 1.1657943725585938, "learning_rate": 1.299822858309292e-06, "loss": 0.09262657165527344, "step": 5613 }, { "epoch": 0.7822754824775309, "grad_norm": 0.8369202613830566, "learning_rate": 1.2982381723877235e-06, "loss": 0.08718585968017578, "step": 5614 }, { "epoch": 0.7824148261687452, "grad_norm": 0.40880879759788513, "learning_rate": 1.2966543089131196e-06, "loss": 0.06775569915771484, "step": 5615 }, { "epoch": 0.7825541698599596, "grad_norm": 0.6894071102142334, "learning_rate": 1.295071268237379e-06, "loss": 0.09561729431152344, "step": 5616 }, { "epoch": 0.782693513551174, "grad_norm": 0.3122846484184265, "learning_rate": 1.2934890507122195e-06, "loss": 0.05875396728515625, "step": 5617 }, { "epoch": 0.7828328572423884, "grad_norm": 1.1145215034484863, "learning_rate": 1.2919076566891703e-06, "loss": 0.1219482421875, "step": 5618 }, { "epoch": 0.7829722009336028, "grad_norm": 1.3970085382461548, "learning_rate": 1.2903270865195837e-06, "loss": 0.1259613037109375, "step": 5619 }, { "epoch": 0.7831115446248171, "grad_norm": 0.7724071145057678, "learning_rate": 1.2887473405546254e-06, "loss": 0.07878684997558594, "step": 5620 }, { "epoch": 0.7832508883160315, "grad_norm": 1.3072816133499146, "learning_rate": 1.2871684191452772e-06, "loss": 0.10082530975341797, "step": 5621 }, { "epoch": 0.7833902320072459, "grad_norm": 0.9105129241943359, "learning_rate": 1.2855903226423412e-06, "loss": 0.09540462493896484, "step": 5622 }, { "epoch": 0.7835295756984603, "grad_norm": 0.9425021409988403, "learning_rate": 1.2840130513964338e-06, "loss": 0.08777427673339844, "step": 5623 }, { "epoch": 0.7836689193896746, "grad_norm": 0.6311429142951965, "learning_rate": 1.2824366057579917e-06, "loss": 0.0799407958984375, "step": 5624 }, { "epoch": 0.783808263080889, "grad_norm": 1.042182207107544, "learning_rate": 1.2808609860772598e-06, "loss": 0.09140586853027344, "step": 5625 }, { "epoch": 0.7839476067721034, "grad_norm": 2.312488317489624, "learning_rate": 1.2792861927043071e-06, "loss": 0.15569114685058594, "step": 5626 }, { "epoch": 0.7840869504633178, "grad_norm": 0.8123263120651245, "learning_rate": 1.277712225989019e-06, "loss": 0.07830238342285156, "step": 5627 }, { "epoch": 0.7842262941545322, "grad_norm": 0.8580288290977478, "learning_rate": 1.2761390862810907e-06, "loss": 0.09965896606445312, "step": 5628 }, { "epoch": 0.7843656378457465, "grad_norm": 0.5690065622329712, "learning_rate": 1.274566773930041e-06, "loss": 0.07768821716308594, "step": 5629 }, { "epoch": 0.7845049815369609, "grad_norm": 0.6410097479820251, "learning_rate": 1.272995289285202e-06, "loss": 0.09545707702636719, "step": 5630 }, { "epoch": 0.7846443252281753, "grad_norm": 0.4628840684890747, "learning_rate": 1.2714246326957213e-06, "loss": 0.05960845947265625, "step": 5631 }, { "epoch": 0.7847836689193897, "grad_norm": 0.8635421991348267, "learning_rate": 1.2698548045105608e-06, "loss": 0.07445335388183594, "step": 5632 }, { "epoch": 0.784923012610604, "grad_norm": 0.7933197021484375, "learning_rate": 1.2682858050785018e-06, "loss": 0.07549858093261719, "step": 5633 }, { "epoch": 0.7850623563018184, "grad_norm": 0.4540497362613678, "learning_rate": 1.266717634748142e-06, "loss": 0.07178878784179688, "step": 5634 }, { "epoch": 0.7852016999930328, "grad_norm": 1.2238818407058716, "learning_rate": 1.2651502938678917e-06, "loss": 0.10118865966796875, "step": 5635 }, { "epoch": 0.7853410436842472, "grad_norm": 1.1605165004730225, "learning_rate": 1.2635837827859766e-06, "loss": 0.12107276916503906, "step": 5636 }, { "epoch": 0.7854803873754616, "grad_norm": 0.6089059114456177, "learning_rate": 1.2620181018504406e-06, "loss": 0.08773040771484375, "step": 5637 }, { "epoch": 0.7856197310666759, "grad_norm": 1.5914390087127686, "learning_rate": 1.2604532514091444e-06, "loss": 0.09963417053222656, "step": 5638 }, { "epoch": 0.7857590747578903, "grad_norm": 0.9302395582199097, "learning_rate": 1.258889231809759e-06, "loss": 0.08352327346801758, "step": 5639 }, { "epoch": 0.7858984184491047, "grad_norm": 0.7990577816963196, "learning_rate": 1.2573260433997768e-06, "loss": 0.08467292785644531, "step": 5640 }, { "epoch": 0.7860377621403191, "grad_norm": 0.9801071286201477, "learning_rate": 1.2557636865265e-06, "loss": 0.12579917907714844, "step": 5641 }, { "epoch": 0.7861771058315334, "grad_norm": 0.645262598991394, "learning_rate": 1.254202161537051e-06, "loss": 0.09833717346191406, "step": 5642 }, { "epoch": 0.7863164495227478, "grad_norm": 0.5167416930198669, "learning_rate": 1.2526414687783616e-06, "loss": 0.06807899475097656, "step": 5643 }, { "epoch": 0.7864557932139622, "grad_norm": 1.4638932943344116, "learning_rate": 1.2510816085971849e-06, "loss": 0.13088035583496094, "step": 5644 }, { "epoch": 0.7865951369051766, "grad_norm": 0.5298093557357788, "learning_rate": 1.2495225813400864e-06, "loss": 0.07702827453613281, "step": 5645 }, { "epoch": 0.786734480596391, "grad_norm": 0.4614655673503876, "learning_rate": 1.247964387353446e-06, "loss": 0.06868934631347656, "step": 5646 }, { "epoch": 0.7868738242876053, "grad_norm": 1.0398744344711304, "learning_rate": 1.2464070269834566e-06, "loss": 0.095001220703125, "step": 5647 }, { "epoch": 0.7870131679788197, "grad_norm": 0.749260663986206, "learning_rate": 1.2448505005761297e-06, "loss": 0.08971595764160156, "step": 5648 }, { "epoch": 0.7871525116700342, "grad_norm": 0.6929985284805298, "learning_rate": 1.2432948084772917e-06, "loss": 0.07494163513183594, "step": 5649 }, { "epoch": 0.7872918553612486, "grad_norm": 0.5719811320304871, "learning_rate": 1.2417399510325785e-06, "loss": 0.07827568054199219, "step": 5650 }, { "epoch": 0.787431199052463, "grad_norm": 0.6603270769119263, "learning_rate": 1.2401859285874474e-06, "loss": 0.09626007080078125, "step": 5651 }, { "epoch": 0.7875705427436773, "grad_norm": 0.7446125745773315, "learning_rate": 1.2386327414871635e-06, "loss": 0.08609390258789062, "step": 5652 }, { "epoch": 0.7877098864348917, "grad_norm": 0.45890939235687256, "learning_rate": 1.237080390076812e-06, "loss": 0.07527923583984375, "step": 5653 }, { "epoch": 0.7878492301261061, "grad_norm": 0.546072244644165, "learning_rate": 1.2355288747012878e-06, "loss": 0.06937217712402344, "step": 5654 }, { "epoch": 0.7879885738173205, "grad_norm": 0.3613264262676239, "learning_rate": 1.2339781957053031e-06, "loss": 0.055110931396484375, "step": 5655 }, { "epoch": 0.7881279175085348, "grad_norm": 0.35469987988471985, "learning_rate": 1.232428353433387e-06, "loss": 0.06193351745605469, "step": 5656 }, { "epoch": 0.7882672611997492, "grad_norm": 0.44111281633377075, "learning_rate": 1.2308793482298724e-06, "loss": 0.06740760803222656, "step": 5657 }, { "epoch": 0.7884066048909636, "grad_norm": 0.9549940228462219, "learning_rate": 1.2293311804389162e-06, "loss": 0.08994865417480469, "step": 5658 }, { "epoch": 0.788545948582178, "grad_norm": 1.140805959701538, "learning_rate": 1.227783850404487e-06, "loss": 0.10348224639892578, "step": 5659 }, { "epoch": 0.7886852922733923, "grad_norm": 0.6880522966384888, "learning_rate": 1.2262373584703642e-06, "loss": 0.09248161315917969, "step": 5660 }, { "epoch": 0.7888246359646067, "grad_norm": 1.0077661275863647, "learning_rate": 1.2246917049801449e-06, "loss": 0.11325359344482422, "step": 5661 }, { "epoch": 0.7889639796558211, "grad_norm": 0.29663747549057007, "learning_rate": 1.2231468902772354e-06, "loss": 0.052603721618652344, "step": 5662 }, { "epoch": 0.7891033233470355, "grad_norm": 1.4181360006332397, "learning_rate": 1.221602914704862e-06, "loss": 0.1081094741821289, "step": 5663 }, { "epoch": 0.7892426670382499, "grad_norm": 1.2583526372909546, "learning_rate": 1.2200597786060565e-06, "loss": 0.12610149383544922, "step": 5664 }, { "epoch": 0.7893820107294642, "grad_norm": 0.8120214343070984, "learning_rate": 1.2185174823236711e-06, "loss": 0.10255241394042969, "step": 5665 }, { "epoch": 0.7895213544206786, "grad_norm": 0.8411095142364502, "learning_rate": 1.2169760262003693e-06, "loss": 0.09015655517578125, "step": 5666 }, { "epoch": 0.789660698111893, "grad_norm": 0.705756425857544, "learning_rate": 1.2154354105786276e-06, "loss": 0.07358169555664062, "step": 5667 }, { "epoch": 0.7898000418031074, "grad_norm": 0.7959482669830322, "learning_rate": 1.2138956358007325e-06, "loss": 0.08983612060546875, "step": 5668 }, { "epoch": 0.7899393854943217, "grad_norm": 0.48002275824546814, "learning_rate": 1.212356702208789e-06, "loss": 0.06008148193359375, "step": 5669 }, { "epoch": 0.7900787291855361, "grad_norm": 0.6071628332138062, "learning_rate": 1.210818610144714e-06, "loss": 0.09310722351074219, "step": 5670 }, { "epoch": 0.7902180728767505, "grad_norm": 0.5061182379722595, "learning_rate": 1.209281359950234e-06, "loss": 0.07959461212158203, "step": 5671 }, { "epoch": 0.7903574165679649, "grad_norm": 1.076187252998352, "learning_rate": 1.2077449519668943e-06, "loss": 0.10267257690429688, "step": 5672 }, { "epoch": 0.7904967602591793, "grad_norm": 0.7020587921142578, "learning_rate": 1.2062093865360458e-06, "loss": 0.0790262222290039, "step": 5673 }, { "epoch": 0.7906361039503936, "grad_norm": 0.8370577096939087, "learning_rate": 1.2046746639988593e-06, "loss": 0.09699630737304688, "step": 5674 }, { "epoch": 0.790775447641608, "grad_norm": 0.8062165379524231, "learning_rate": 1.2031407846963122e-06, "loss": 0.11121559143066406, "step": 5675 }, { "epoch": 0.7909147913328224, "grad_norm": 0.6023891568183899, "learning_rate": 1.201607748969199e-06, "loss": 0.08383369445800781, "step": 5676 }, { "epoch": 0.7910541350240368, "grad_norm": 0.47372353076934814, "learning_rate": 1.2000755571581263e-06, "loss": 0.06237220764160156, "step": 5677 }, { "epoch": 0.7911934787152511, "grad_norm": 0.6084104180335999, "learning_rate": 1.1985442096035116e-06, "loss": 0.07130622863769531, "step": 5678 }, { "epoch": 0.7913328224064655, "grad_norm": 0.5323822498321533, "learning_rate": 1.1970137066455834e-06, "loss": 0.07190513610839844, "step": 5679 }, { "epoch": 0.7914721660976799, "grad_norm": 1.1742157936096191, "learning_rate": 1.1954840486243857e-06, "loss": 0.08815479278564453, "step": 5680 }, { "epoch": 0.7916115097888943, "grad_norm": 1.017078161239624, "learning_rate": 1.193955235879775e-06, "loss": 0.1098480224609375, "step": 5681 }, { "epoch": 0.7917508534801087, "grad_norm": 0.6812211871147156, "learning_rate": 1.1924272687514182e-06, "loss": 0.0760507583618164, "step": 5682 }, { "epoch": 0.791890197171323, "grad_norm": 1.3275880813598633, "learning_rate": 1.1909001475787917e-06, "loss": 0.09814643859863281, "step": 5683 }, { "epoch": 0.7920295408625374, "grad_norm": 0.7331492900848389, "learning_rate": 1.1893738727011894e-06, "loss": 0.09026908874511719, "step": 5684 }, { "epoch": 0.7921688845537518, "grad_norm": 1.0983729362487793, "learning_rate": 1.187848444457716e-06, "loss": 0.13261795043945312, "step": 5685 }, { "epoch": 0.7923082282449662, "grad_norm": 1.3460299968719482, "learning_rate": 1.1863238631872843e-06, "loss": 0.108062744140625, "step": 5686 }, { "epoch": 0.7924475719361805, "grad_norm": 0.8921791315078735, "learning_rate": 1.184800129228622e-06, "loss": 0.08456993103027344, "step": 5687 }, { "epoch": 0.7925869156273949, "grad_norm": 0.7468177080154419, "learning_rate": 1.1832772429202716e-06, "loss": 0.10009193420410156, "step": 5688 }, { "epoch": 0.7927262593186094, "grad_norm": 1.0895885229110718, "learning_rate": 1.1817552046005777e-06, "loss": 0.09889030456542969, "step": 5689 }, { "epoch": 0.7928656030098238, "grad_norm": 0.5854409337043762, "learning_rate": 1.1802340146077045e-06, "loss": 0.0725564956665039, "step": 5690 }, { "epoch": 0.7930049467010382, "grad_norm": 1.0597161054611206, "learning_rate": 1.1787136732796289e-06, "loss": 0.10058403015136719, "step": 5691 }, { "epoch": 0.7931442903922525, "grad_norm": 0.5384039282798767, "learning_rate": 1.177194180954132e-06, "loss": 0.07107734680175781, "step": 5692 }, { "epoch": 0.7932836340834669, "grad_norm": 0.5839450359344482, "learning_rate": 1.1756755379688133e-06, "loss": 0.09129905700683594, "step": 5693 }, { "epoch": 0.7934229777746813, "grad_norm": 1.0168697834014893, "learning_rate": 1.174157744661078e-06, "loss": 0.096282958984375, "step": 5694 }, { "epoch": 0.7935623214658957, "grad_norm": 1.1955440044403076, "learning_rate": 1.1726408013681473e-06, "loss": 0.10676383972167969, "step": 5695 }, { "epoch": 0.79370166515711, "grad_norm": 0.8848345279693604, "learning_rate": 1.1711247084270494e-06, "loss": 0.12926673889160156, "step": 5696 }, { "epoch": 0.7938410088483244, "grad_norm": 0.49289020895957947, "learning_rate": 1.1696094661746267e-06, "loss": 0.07705116271972656, "step": 5697 }, { "epoch": 0.7939803525395388, "grad_norm": 0.8717136979103088, "learning_rate": 1.1680950749475328e-06, "loss": 0.08618545532226562, "step": 5698 }, { "epoch": 0.7941196962307532, "grad_norm": 0.45624101161956787, "learning_rate": 1.1665815350822291e-06, "loss": 0.0682830810546875, "step": 5699 }, { "epoch": 0.7942590399219676, "grad_norm": 0.63190096616745, "learning_rate": 1.1650688469149884e-06, "loss": 0.0774383544921875, "step": 5700 }, { "epoch": 0.7943983836131819, "grad_norm": 0.6766651272773743, "learning_rate": 1.1635570107818973e-06, "loss": 0.07636260986328125, "step": 5701 }, { "epoch": 0.7945377273043963, "grad_norm": 0.8924010396003723, "learning_rate": 1.1620460270188516e-06, "loss": 0.10263252258300781, "step": 5702 }, { "epoch": 0.7946770709956107, "grad_norm": 0.7462461590766907, "learning_rate": 1.1605358959615559e-06, "loss": 0.09386634826660156, "step": 5703 }, { "epoch": 0.7948164146868251, "grad_norm": 1.218381643295288, "learning_rate": 1.159026617945529e-06, "loss": 0.09439849853515625, "step": 5704 }, { "epoch": 0.7949557583780394, "grad_norm": 0.6913918256759644, "learning_rate": 1.1575181933060952e-06, "loss": 0.08808183670043945, "step": 5705 }, { "epoch": 0.7950951020692538, "grad_norm": 0.9104912281036377, "learning_rate": 1.156010622378395e-06, "loss": 0.08565425872802734, "step": 5706 }, { "epoch": 0.7952344457604682, "grad_norm": 0.48343104124069214, "learning_rate": 1.1545039054973733e-06, "loss": 0.05928611755371094, "step": 5707 }, { "epoch": 0.7953737894516826, "grad_norm": 0.6878095269203186, "learning_rate": 1.1529980429977899e-06, "loss": 0.08832740783691406, "step": 5708 }, { "epoch": 0.795513133142897, "grad_norm": 0.6607019305229187, "learning_rate": 1.151493035214214e-06, "loss": 0.08579444885253906, "step": 5709 }, { "epoch": 0.7956524768341113, "grad_norm": 0.9849503636360168, "learning_rate": 1.1499888824810223e-06, "loss": 0.09992408752441406, "step": 5710 }, { "epoch": 0.7957918205253257, "grad_norm": 0.5346410274505615, "learning_rate": 1.148485585132403e-06, "loss": 0.0767822265625, "step": 5711 }, { "epoch": 0.7959311642165401, "grad_norm": 0.9439733028411865, "learning_rate": 1.1469831435023542e-06, "loss": 0.12614059448242188, "step": 5712 }, { "epoch": 0.7960705079077545, "grad_norm": 1.0369999408721924, "learning_rate": 1.1454815579246874e-06, "loss": 0.11561203002929688, "step": 5713 }, { "epoch": 0.7962098515989688, "grad_norm": 0.7214659452438354, "learning_rate": 1.143980828733018e-06, "loss": 0.10802459716796875, "step": 5714 }, { "epoch": 0.7963491952901832, "grad_norm": 0.4770350754261017, "learning_rate": 1.1424809562607725e-06, "loss": 0.07224464416503906, "step": 5715 }, { "epoch": 0.7964885389813976, "grad_norm": 0.6690024137496948, "learning_rate": 1.1409819408411898e-06, "loss": 0.08472824096679688, "step": 5716 }, { "epoch": 0.796627882672612, "grad_norm": 1.3623523712158203, "learning_rate": 1.1394837828073184e-06, "loss": 0.12474632263183594, "step": 5717 }, { "epoch": 0.7967672263638264, "grad_norm": 1.0754773616790771, "learning_rate": 1.1379864824920116e-06, "loss": 0.12653350830078125, "step": 5718 }, { "epoch": 0.7969065700550407, "grad_norm": 0.6695522665977478, "learning_rate": 1.1364900402279394e-06, "loss": 0.09308624267578125, "step": 5719 }, { "epoch": 0.7970459137462551, "grad_norm": 1.0425724983215332, "learning_rate": 1.134994456347574e-06, "loss": 0.08113288879394531, "step": 5720 }, { "epoch": 0.7971852574374695, "grad_norm": 0.559383749961853, "learning_rate": 1.1334997311832003e-06, "loss": 0.08207130432128906, "step": 5721 }, { "epoch": 0.7973246011286839, "grad_norm": 0.6010262966156006, "learning_rate": 1.132005865066912e-06, "loss": 0.06859397888183594, "step": 5722 }, { "epoch": 0.7974639448198982, "grad_norm": 1.2389708757400513, "learning_rate": 1.1305128583306125e-06, "loss": 0.1552734375, "step": 5723 }, { "epoch": 0.7976032885111126, "grad_norm": 0.576616644859314, "learning_rate": 1.1290207113060158e-06, "loss": 0.09759807586669922, "step": 5724 }, { "epoch": 0.797742632202327, "grad_norm": 0.7847219705581665, "learning_rate": 1.127529424324641e-06, "loss": 0.08739280700683594, "step": 5725 }, { "epoch": 0.7978819758935414, "grad_norm": 0.7601884007453918, "learning_rate": 1.1260389977178166e-06, "loss": 0.08066272735595703, "step": 5726 }, { "epoch": 0.7980213195847558, "grad_norm": 0.5229741930961609, "learning_rate": 1.1245494318166844e-06, "loss": 0.07242774963378906, "step": 5727 }, { "epoch": 0.7981606632759701, "grad_norm": 0.8928104043006897, "learning_rate": 1.1230607269521886e-06, "loss": 0.09083747863769531, "step": 5728 }, { "epoch": 0.7983000069671845, "grad_norm": 0.6525951027870178, "learning_rate": 1.1215728834550877e-06, "loss": 0.08123397827148438, "step": 5729 }, { "epoch": 0.798439350658399, "grad_norm": 0.45536115765571594, "learning_rate": 1.1200859016559473e-06, "loss": 0.08064079284667969, "step": 5730 }, { "epoch": 0.7985786943496134, "grad_norm": 0.7336733937263489, "learning_rate": 1.1185997818851402e-06, "loss": 0.07128143310546875, "step": 5731 }, { "epoch": 0.7987180380408277, "grad_norm": 0.7695613503456116, "learning_rate": 1.1171145244728454e-06, "loss": 0.08073234558105469, "step": 5732 }, { "epoch": 0.7988573817320421, "grad_norm": 0.9460793733596802, "learning_rate": 1.1156301297490563e-06, "loss": 0.1275177001953125, "step": 5733 }, { "epoch": 0.7989967254232565, "grad_norm": 0.8878351449966431, "learning_rate": 1.1141465980435713e-06, "loss": 0.0770578384399414, "step": 5734 }, { "epoch": 0.7991360691144709, "grad_norm": 1.117342472076416, "learning_rate": 1.112663929685997e-06, "loss": 0.09813880920410156, "step": 5735 }, { "epoch": 0.7992754128056853, "grad_norm": 0.6618159413337708, "learning_rate": 1.111182125005747e-06, "loss": 0.07876205444335938, "step": 5736 }, { "epoch": 0.7994147564968996, "grad_norm": 1.0013318061828613, "learning_rate": 1.1097011843320454e-06, "loss": 0.10719680786132812, "step": 5737 }, { "epoch": 0.799554100188114, "grad_norm": 0.8028843402862549, "learning_rate": 1.1082211079939248e-06, "loss": 0.08787727355957031, "step": 5738 }, { "epoch": 0.7996934438793284, "grad_norm": 0.9267011284828186, "learning_rate": 1.106741896320222e-06, "loss": 0.08173274993896484, "step": 5739 }, { "epoch": 0.7998327875705428, "grad_norm": 0.7622369527816772, "learning_rate": 1.1052635496395864e-06, "loss": 0.08270645141601562, "step": 5740 }, { "epoch": 0.7999721312617571, "grad_norm": 1.7466825246810913, "learning_rate": 1.1037860682804708e-06, "loss": 0.10618972778320312, "step": 5741 }, { "epoch": 0.8001114749529715, "grad_norm": 1.4652653932571411, "learning_rate": 1.1023094525711397e-06, "loss": 0.1124258041381836, "step": 5742 }, { "epoch": 0.8002508186441859, "grad_norm": 1.7614614963531494, "learning_rate": 1.1008337028396616e-06, "loss": 0.09474372863769531, "step": 5743 }, { "epoch": 0.8003901623354003, "grad_norm": 0.5617880821228027, "learning_rate": 1.099358819413915e-06, "loss": 0.06202220916748047, "step": 5744 }, { "epoch": 0.8005295060266147, "grad_norm": 0.5741675496101379, "learning_rate": 1.0978848026215865e-06, "loss": 0.0813283920288086, "step": 5745 }, { "epoch": 0.800668849717829, "grad_norm": 0.9303640723228455, "learning_rate": 1.0964116527901686e-06, "loss": 0.11433601379394531, "step": 5746 }, { "epoch": 0.8008081934090434, "grad_norm": 0.8512147068977356, "learning_rate": 1.094939370246959e-06, "loss": 0.0881195068359375, "step": 5747 }, { "epoch": 0.8009475371002578, "grad_norm": 0.7999793291091919, "learning_rate": 1.093467955319068e-06, "loss": 0.08539009094238281, "step": 5748 }, { "epoch": 0.8010868807914722, "grad_norm": 0.6771866679191589, "learning_rate": 1.0919974083334106e-06, "loss": 0.07251262664794922, "step": 5749 }, { "epoch": 0.8012262244826865, "grad_norm": 1.504473090171814, "learning_rate": 1.0905277296167066e-06, "loss": 0.11203384399414062, "step": 5750 }, { "epoch": 0.8013655681739009, "grad_norm": 0.6987079381942749, "learning_rate": 1.089058919495488e-06, "loss": 0.0850229263305664, "step": 5751 }, { "epoch": 0.8015049118651153, "grad_norm": 1.1961498260498047, "learning_rate": 1.0875909782960887e-06, "loss": 0.1175088882446289, "step": 5752 }, { "epoch": 0.8016442555563297, "grad_norm": 0.8372869491577148, "learning_rate": 1.0861239063446511e-06, "loss": 0.10525703430175781, "step": 5753 }, { "epoch": 0.801783599247544, "grad_norm": 0.8157066702842712, "learning_rate": 1.0846577039671263e-06, "loss": 0.09329605102539062, "step": 5754 }, { "epoch": 0.8019229429387584, "grad_norm": 0.754289448261261, "learning_rate": 1.0831923714892706e-06, "loss": 0.08263015747070312, "step": 5755 }, { "epoch": 0.8020622866299728, "grad_norm": 1.0341778993606567, "learning_rate": 1.0817279092366507e-06, "loss": 0.08366775512695312, "step": 5756 }, { "epoch": 0.8022016303211872, "grad_norm": 0.9823777079582214, "learning_rate": 1.0802643175346312e-06, "loss": 0.09059715270996094, "step": 5757 }, { "epoch": 0.8023409740124016, "grad_norm": 0.5284303426742554, "learning_rate": 1.0788015967083904e-06, "loss": 0.06806564331054688, "step": 5758 }, { "epoch": 0.8024803177036159, "grad_norm": 1.7960492372512817, "learning_rate": 1.0773397470829145e-06, "loss": 0.1262044906616211, "step": 5759 }, { "epoch": 0.8026196613948303, "grad_norm": 0.8378269076347351, "learning_rate": 1.0758787689829891e-06, "loss": 0.08989238739013672, "step": 5760 }, { "epoch": 0.8027590050860447, "grad_norm": 1.1117916107177734, "learning_rate": 1.074418662733212e-06, "loss": 0.1056375503540039, "step": 5761 }, { "epoch": 0.8028983487772591, "grad_norm": 0.6458374261856079, "learning_rate": 1.0729594286579876e-06, "loss": 0.07837486267089844, "step": 5762 }, { "epoch": 0.8030376924684735, "grad_norm": 0.6146727204322815, "learning_rate": 1.0715010670815212e-06, "loss": 0.07419586181640625, "step": 5763 }, { "epoch": 0.8031770361596878, "grad_norm": 1.0729179382324219, "learning_rate": 1.0700435783278278e-06, "loss": 0.09479713439941406, "step": 5764 }, { "epoch": 0.8033163798509022, "grad_norm": 1.0815354585647583, "learning_rate": 1.068586962720729e-06, "loss": 0.09812545776367188, "step": 5765 }, { "epoch": 0.8034557235421166, "grad_norm": 0.3502616882324219, "learning_rate": 1.0671312205838525e-06, "loss": 0.06421661376953125, "step": 5766 }, { "epoch": 0.803595067233331, "grad_norm": 0.6671962141990662, "learning_rate": 1.06567635224063e-06, "loss": 0.07861900329589844, "step": 5767 }, { "epoch": 0.8037344109245453, "grad_norm": 0.6383435130119324, "learning_rate": 1.0642223580142985e-06, "loss": 0.09171009063720703, "step": 5768 }, { "epoch": 0.8038737546157597, "grad_norm": 1.1814002990722656, "learning_rate": 1.0627692382279038e-06, "loss": 0.1031646728515625, "step": 5769 }, { "epoch": 0.8040130983069742, "grad_norm": 1.4645763635635376, "learning_rate": 1.0613169932042972e-06, "loss": 0.09846878051757812, "step": 5770 }, { "epoch": 0.8041524419981886, "grad_norm": 0.9040818214416504, "learning_rate": 1.0598656232661313e-06, "loss": 0.08579826354980469, "step": 5771 }, { "epoch": 0.804291785689403, "grad_norm": 0.6218895316123962, "learning_rate": 1.0584151287358708e-06, "loss": 0.08108711242675781, "step": 5772 }, { "epoch": 0.8044311293806173, "grad_norm": 1.2507855892181396, "learning_rate": 1.0569655099357795e-06, "loss": 0.10501289367675781, "step": 5773 }, { "epoch": 0.8045704730718317, "grad_norm": 1.3259916305541992, "learning_rate": 1.0555167671879319e-06, "loss": 0.09302425384521484, "step": 5774 }, { "epoch": 0.8047098167630461, "grad_norm": 0.42598918080329895, "learning_rate": 1.0540689008142035e-06, "loss": 0.06878089904785156, "step": 5775 }, { "epoch": 0.8048491604542605, "grad_norm": 1.2180826663970947, "learning_rate": 1.052621911136278e-06, "loss": 0.10538303852081299, "step": 5776 }, { "epoch": 0.8049885041454748, "grad_norm": 0.6317790746688843, "learning_rate": 1.0511757984756455e-06, "loss": 0.091949462890625, "step": 5777 }, { "epoch": 0.8051278478366892, "grad_norm": 0.7387763261795044, "learning_rate": 1.049730563153597e-06, "loss": 0.10538101196289062, "step": 5778 }, { "epoch": 0.8052671915279036, "grad_norm": 1.3498486280441284, "learning_rate": 1.0482862054912296e-06, "loss": 0.08842277526855469, "step": 5779 }, { "epoch": 0.805406535219118, "grad_norm": 0.5514281988143921, "learning_rate": 1.0468427258094481e-06, "loss": 0.07382965087890625, "step": 5780 }, { "epoch": 0.8055458789103324, "grad_norm": 0.8005538582801819, "learning_rate": 1.045400124428963e-06, "loss": 0.08577537536621094, "step": 5781 }, { "epoch": 0.8056852226015467, "grad_norm": 0.5595508217811584, "learning_rate": 1.043958401670283e-06, "loss": 0.06964302062988281, "step": 5782 }, { "epoch": 0.8058245662927611, "grad_norm": 0.7421627044677734, "learning_rate": 1.04251755785373e-06, "loss": 0.08328056335449219, "step": 5783 }, { "epoch": 0.8059639099839755, "grad_norm": 0.42175108194351196, "learning_rate": 1.0410775932994232e-06, "loss": 0.0696258544921875, "step": 5784 }, { "epoch": 0.8061032536751899, "grad_norm": 0.7018191814422607, "learning_rate": 1.039638508327293e-06, "loss": 0.09615898132324219, "step": 5785 }, { "epoch": 0.8062425973664042, "grad_norm": 0.5364245176315308, "learning_rate": 1.0382003032570682e-06, "loss": 0.07865524291992188, "step": 5786 }, { "epoch": 0.8063819410576186, "grad_norm": 0.42707306146621704, "learning_rate": 1.0367629784082867e-06, "loss": 0.06726264953613281, "step": 5787 }, { "epoch": 0.806521284748833, "grad_norm": 0.8699250817298889, "learning_rate": 1.0353265341002916e-06, "loss": 0.10303878784179688, "step": 5788 }, { "epoch": 0.8066606284400474, "grad_norm": 0.48625561594963074, "learning_rate": 1.0338909706522232e-06, "loss": 0.07785797119140625, "step": 5789 }, { "epoch": 0.8067999721312618, "grad_norm": 0.7649532556533813, "learning_rate": 1.032456288383033e-06, "loss": 0.10072517395019531, "step": 5790 }, { "epoch": 0.8069393158224761, "grad_norm": 0.5949563980102539, "learning_rate": 1.0310224876114766e-06, "loss": 0.07735633850097656, "step": 5791 }, { "epoch": 0.8070786595136905, "grad_norm": 0.7156592607498169, "learning_rate": 1.0295895686561087e-06, "loss": 0.08892440795898438, "step": 5792 }, { "epoch": 0.8072180032049049, "grad_norm": 0.7894830107688904, "learning_rate": 1.0281575318352937e-06, "loss": 0.07379817962646484, "step": 5793 }, { "epoch": 0.8073573468961193, "grad_norm": 0.49201512336730957, "learning_rate": 1.0267263774671953e-06, "loss": 0.08637237548828125, "step": 5794 }, { "epoch": 0.8074966905873336, "grad_norm": 0.8022938966751099, "learning_rate": 1.0252961058697858e-06, "loss": 0.09160423278808594, "step": 5795 }, { "epoch": 0.807636034278548, "grad_norm": 1.037520408630371, "learning_rate": 1.0238667173608364e-06, "loss": 0.10206031799316406, "step": 5796 }, { "epoch": 0.8077753779697624, "grad_norm": 0.7484109401702881, "learning_rate": 1.0224382122579256e-06, "loss": 0.08302688598632812, "step": 5797 }, { "epoch": 0.8079147216609768, "grad_norm": 0.5998749136924744, "learning_rate": 1.0210105908784362e-06, "loss": 0.0848236083984375, "step": 5798 }, { "epoch": 0.8080540653521912, "grad_norm": 0.7991771697998047, "learning_rate": 1.0195838535395514e-06, "loss": 0.10811805725097656, "step": 5799 }, { "epoch": 0.8081934090434055, "grad_norm": 0.9268032312393188, "learning_rate": 1.0181580005582586e-06, "loss": 0.09699821472167969, "step": 5800 }, { "epoch": 0.8083327527346199, "grad_norm": 0.679787814617157, "learning_rate": 1.0167330322513508e-06, "loss": 0.10339927673339844, "step": 5801 }, { "epoch": 0.8084720964258343, "grad_norm": 0.4157421588897705, "learning_rate": 1.0153089489354256e-06, "loss": 0.06517696380615234, "step": 5802 }, { "epoch": 0.8086114401170487, "grad_norm": 1.0149177312850952, "learning_rate": 1.0138857509268784e-06, "loss": 0.11972236633300781, "step": 5803 }, { "epoch": 0.808750783808263, "grad_norm": 0.3790348768234253, "learning_rate": 1.012463438541914e-06, "loss": 0.06545066833496094, "step": 5804 }, { "epoch": 0.8088901274994774, "grad_norm": 0.7120288610458374, "learning_rate": 1.0110420120965354e-06, "loss": 0.09689903259277344, "step": 5805 }, { "epoch": 0.8090294711906918, "grad_norm": 0.3631022572517395, "learning_rate": 1.0096214719065534e-06, "loss": 0.06524944305419922, "step": 5806 }, { "epoch": 0.8091688148819062, "grad_norm": 1.817570686340332, "learning_rate": 1.008201818287577e-06, "loss": 0.1498432159423828, "step": 5807 }, { "epoch": 0.8093081585731206, "grad_norm": 1.0253156423568726, "learning_rate": 1.0067830515550224e-06, "loss": 0.09629058837890625, "step": 5808 }, { "epoch": 0.8094475022643349, "grad_norm": 1.8744175434112549, "learning_rate": 1.0053651720241087e-06, "loss": 0.1797771453857422, "step": 5809 }, { "epoch": 0.8095868459555494, "grad_norm": 0.8125008940696716, "learning_rate": 1.0039481800098545e-06, "loss": 0.0998849868774414, "step": 5810 }, { "epoch": 0.8097261896467638, "grad_norm": 1.6419028043746948, "learning_rate": 1.0025320758270819e-06, "loss": 0.10509681701660156, "step": 5811 }, { "epoch": 0.8098655333379782, "grad_norm": 0.7736353874206543, "learning_rate": 1.001116859790418e-06, "loss": 0.08681297302246094, "step": 5812 }, { "epoch": 0.8100048770291925, "grad_norm": 1.2070949077606201, "learning_rate": 9.997025322142934e-07, "loss": 0.095062255859375, "step": 5813 }, { "epoch": 0.8101442207204069, "grad_norm": 0.7817686200141907, "learning_rate": 9.98289093412938e-07, "loss": 0.074249267578125, "step": 5814 }, { "epoch": 0.8102835644116213, "grad_norm": 0.4253198802471161, "learning_rate": 9.96876543700384e-07, "loss": 0.07501983642578125, "step": 5815 }, { "epoch": 0.8104229081028357, "grad_norm": 0.9075488448143005, "learning_rate": 9.95464883390469e-07, "loss": 0.10004711151123047, "step": 5816 }, { "epoch": 0.8105622517940501, "grad_norm": 0.5555988550186157, "learning_rate": 9.940541127968335e-07, "loss": 0.07494926452636719, "step": 5817 }, { "epoch": 0.8107015954852644, "grad_norm": 0.641688346862793, "learning_rate": 9.92644232232915e-07, "loss": 0.08353996276855469, "step": 5818 }, { "epoch": 0.8108409391764788, "grad_norm": 0.7751086354255676, "learning_rate": 9.912352420119587e-07, "loss": 0.07155609130859375, "step": 5819 }, { "epoch": 0.8109802828676932, "grad_norm": 0.7722707390785217, "learning_rate": 9.89827142447013e-07, "loss": 0.08295249938964844, "step": 5820 }, { "epoch": 0.8111196265589076, "grad_norm": 0.36063864827156067, "learning_rate": 9.884199338509193e-07, "loss": 0.05193519592285156, "step": 5821 }, { "epoch": 0.811258970250122, "grad_norm": 0.36758530139923096, "learning_rate": 9.87013616536331e-07, "loss": 0.06142425537109375, "step": 5822 }, { "epoch": 0.8113983139413363, "grad_norm": 0.5832239389419556, "learning_rate": 9.856081908156984e-07, "loss": 0.0884389877319336, "step": 5823 }, { "epoch": 0.8115376576325507, "grad_norm": 1.3621293306350708, "learning_rate": 9.842036570012776e-07, "loss": 0.10148143768310547, "step": 5824 }, { "epoch": 0.8116770013237651, "grad_norm": 1.1474101543426514, "learning_rate": 9.828000154051216e-07, "loss": 0.09546279907226562, "step": 5825 }, { "epoch": 0.8118163450149795, "grad_norm": 0.795616626739502, "learning_rate": 9.813972663390864e-07, "loss": 0.10762214660644531, "step": 5826 }, { "epoch": 0.8119556887061938, "grad_norm": 0.7794660329818726, "learning_rate": 9.79995410114834e-07, "loss": 0.09363555908203125, "step": 5827 }, { "epoch": 0.8120950323974082, "grad_norm": 0.4230771064758301, "learning_rate": 9.785944470438218e-07, "loss": 0.06713104248046875, "step": 5828 }, { "epoch": 0.8122343760886226, "grad_norm": 1.139506220817566, "learning_rate": 9.771943774373138e-07, "loss": 0.11404609680175781, "step": 5829 }, { "epoch": 0.812373719779837, "grad_norm": 0.5972505807876587, "learning_rate": 9.757952016063738e-07, "loss": 0.07591819763183594, "step": 5830 }, { "epoch": 0.8125130634710513, "grad_norm": 0.6171560883522034, "learning_rate": 9.743969198618659e-07, "loss": 0.08224010467529297, "step": 5831 }, { "epoch": 0.8126524071622657, "grad_norm": 0.9506023526191711, "learning_rate": 9.729995325144548e-07, "loss": 0.0782470703125, "step": 5832 }, { "epoch": 0.8127917508534801, "grad_norm": 1.0895142555236816, "learning_rate": 9.716030398746096e-07, "loss": 0.1028127670288086, "step": 5833 }, { "epoch": 0.8129310945446945, "grad_norm": 0.37275606393814087, "learning_rate": 9.702074422526004e-07, "loss": 0.06340599060058594, "step": 5834 }, { "epoch": 0.8130704382359089, "grad_norm": 1.3203051090240479, "learning_rate": 9.688127399584956e-07, "loss": 0.14240646362304688, "step": 5835 }, { "epoch": 0.8132097819271232, "grad_norm": 0.3062257766723633, "learning_rate": 9.674189333021655e-07, "loss": 0.05575752258300781, "step": 5836 }, { "epoch": 0.8133491256183376, "grad_norm": 0.5086067318916321, "learning_rate": 9.660260225932834e-07, "loss": 0.07915306091308594, "step": 5837 }, { "epoch": 0.813488469309552, "grad_norm": 0.6845689415931702, "learning_rate": 9.646340081413225e-07, "loss": 0.0948638916015625, "step": 5838 }, { "epoch": 0.8136278130007664, "grad_norm": 0.8128687739372253, "learning_rate": 9.632428902555546e-07, "loss": 0.08076953887939453, "step": 5839 }, { "epoch": 0.8137671566919807, "grad_norm": 0.7575756907463074, "learning_rate": 9.618526692450564e-07, "loss": 0.102020263671875, "step": 5840 }, { "epoch": 0.8139065003831951, "grad_norm": 0.6877959966659546, "learning_rate": 9.604633454187035e-07, "loss": 0.0979156494140625, "step": 5841 }, { "epoch": 0.8140458440744095, "grad_norm": 1.2586075067520142, "learning_rate": 9.59074919085171e-07, "loss": 0.08897972106933594, "step": 5842 }, { "epoch": 0.8141851877656239, "grad_norm": 0.8076151013374329, "learning_rate": 9.57687390552935e-07, "loss": 0.09291648864746094, "step": 5843 }, { "epoch": 0.8143245314568383, "grad_norm": 1.0061314105987549, "learning_rate": 9.563007601302727e-07, "loss": 0.10185718536376953, "step": 5844 }, { "epoch": 0.8144638751480526, "grad_norm": 0.9661710262298584, "learning_rate": 9.549150281252633e-07, "loss": 0.12274360656738281, "step": 5845 }, { "epoch": 0.814603218839267, "grad_norm": 0.7395631670951843, "learning_rate": 9.535301948457842e-07, "loss": 0.0987539291381836, "step": 5846 }, { "epoch": 0.8147425625304814, "grad_norm": 1.442618727684021, "learning_rate": 9.521462605995119e-07, "loss": 0.10083770751953125, "step": 5847 }, { "epoch": 0.8148819062216958, "grad_norm": 0.6876887083053589, "learning_rate": 9.507632256939264e-07, "loss": 0.10528182983398438, "step": 5848 }, { "epoch": 0.8150212499129101, "grad_norm": 0.9755474328994751, "learning_rate": 9.493810904363077e-07, "loss": 0.11518096923828125, "step": 5849 }, { "epoch": 0.8151605936041246, "grad_norm": 1.3475022315979004, "learning_rate": 9.479998551337322e-07, "loss": 0.1278219223022461, "step": 5850 }, { "epoch": 0.815299937295339, "grad_norm": 0.8923541307449341, "learning_rate": 9.466195200930817e-07, "loss": 0.11484527587890625, "step": 5851 }, { "epoch": 0.8154392809865534, "grad_norm": 0.8103158473968506, "learning_rate": 9.452400856210337e-07, "loss": 0.10404014587402344, "step": 5852 }, { "epoch": 0.8155786246777678, "grad_norm": 0.4734945297241211, "learning_rate": 9.438615520240651e-07, "loss": 0.07023143768310547, "step": 5853 }, { "epoch": 0.8157179683689821, "grad_norm": 0.7393940687179565, "learning_rate": 9.424839196084568e-07, "loss": 0.0735940933227539, "step": 5854 }, { "epoch": 0.8158573120601965, "grad_norm": 0.8951472043991089, "learning_rate": 9.411071886802869e-07, "loss": 0.10202789306640625, "step": 5855 }, { "epoch": 0.8159966557514109, "grad_norm": 0.5405364632606506, "learning_rate": 9.397313595454349e-07, "loss": 0.08032798767089844, "step": 5856 }, { "epoch": 0.8161359994426253, "grad_norm": 1.0500856637954712, "learning_rate": 9.383564325095767e-07, "loss": 0.09874963760375977, "step": 5857 }, { "epoch": 0.8162753431338396, "grad_norm": 0.506389319896698, "learning_rate": 9.369824078781897e-07, "loss": 0.07417678833007812, "step": 5858 }, { "epoch": 0.816414686825054, "grad_norm": 0.6675388813018799, "learning_rate": 9.356092859565524e-07, "loss": 0.08894824981689453, "step": 5859 }, { "epoch": 0.8165540305162684, "grad_norm": 0.6330224275588989, "learning_rate": 9.342370670497391e-07, "loss": 0.07537460327148438, "step": 5860 }, { "epoch": 0.8166933742074828, "grad_norm": 0.7390345931053162, "learning_rate": 9.328657514626266e-07, "loss": 0.08110237121582031, "step": 5861 }, { "epoch": 0.8168327178986972, "grad_norm": 0.9401358962059021, "learning_rate": 9.314953394998905e-07, "loss": 0.09258270263671875, "step": 5862 }, { "epoch": 0.8169720615899115, "grad_norm": 0.7845829129219055, "learning_rate": 9.30125831466005e-07, "loss": 0.08786964416503906, "step": 5863 }, { "epoch": 0.8171114052811259, "grad_norm": 1.2611656188964844, "learning_rate": 9.287572276652417e-07, "loss": 0.10469436645507812, "step": 5864 }, { "epoch": 0.8172507489723403, "grad_norm": 0.7303304076194763, "learning_rate": 9.273895284016743e-07, "loss": 0.07167243957519531, "step": 5865 }, { "epoch": 0.8173900926635547, "grad_norm": 1.6378872394561768, "learning_rate": 9.260227339791755e-07, "loss": 0.09555339813232422, "step": 5866 }, { "epoch": 0.817529436354769, "grad_norm": 0.6999822854995728, "learning_rate": 9.246568447014148e-07, "loss": 0.09684371948242188, "step": 5867 }, { "epoch": 0.8176687800459834, "grad_norm": 0.620296061038971, "learning_rate": 9.232918608718599e-07, "loss": 0.06714248657226562, "step": 5868 }, { "epoch": 0.8178081237371978, "grad_norm": 0.6487634181976318, "learning_rate": 9.219277827937811e-07, "loss": 0.09244632720947266, "step": 5869 }, { "epoch": 0.8179474674284122, "grad_norm": 0.5278477668762207, "learning_rate": 9.205646107702465e-07, "loss": 0.08729743957519531, "step": 5870 }, { "epoch": 0.8180868111196266, "grad_norm": 1.1867955923080444, "learning_rate": 9.192023451041187e-07, "loss": 0.10625267028808594, "step": 5871 }, { "epoch": 0.8182261548108409, "grad_norm": 0.770518958568573, "learning_rate": 9.178409860980648e-07, "loss": 0.09042644500732422, "step": 5872 }, { "epoch": 0.8183654985020553, "grad_norm": 0.5566033720970154, "learning_rate": 9.164805340545457e-07, "loss": 0.07231426239013672, "step": 5873 }, { "epoch": 0.8185048421932697, "grad_norm": 0.6092151403427124, "learning_rate": 9.151209892758245e-07, "loss": 0.09847450256347656, "step": 5874 }, { "epoch": 0.8186441858844841, "grad_norm": 0.9643042087554932, "learning_rate": 9.137623520639588e-07, "loss": 0.11063671112060547, "step": 5875 }, { "epoch": 0.8187835295756984, "grad_norm": 0.6241855621337891, "learning_rate": 9.124046227208083e-07, "loss": 0.08856010437011719, "step": 5876 }, { "epoch": 0.8189228732669128, "grad_norm": 1.0564448833465576, "learning_rate": 9.110478015480301e-07, "loss": 0.13212013244628906, "step": 5877 }, { "epoch": 0.8190622169581272, "grad_norm": 0.7146324515342712, "learning_rate": 9.096918888470785e-07, "loss": 0.07601737976074219, "step": 5878 }, { "epoch": 0.8192015606493416, "grad_norm": 1.3014644384384155, "learning_rate": 9.083368849192042e-07, "loss": 0.10606956481933594, "step": 5879 }, { "epoch": 0.819340904340556, "grad_norm": 0.7053472995758057, "learning_rate": 9.069827900654604e-07, "loss": 0.09630584716796875, "step": 5880 }, { "epoch": 0.8194802480317703, "grad_norm": 0.5935311317443848, "learning_rate": 9.056296045866964e-07, "loss": 0.08238601684570312, "step": 5881 }, { "epoch": 0.8196195917229847, "grad_norm": 0.632820188999176, "learning_rate": 9.042773287835566e-07, "loss": 0.08288002014160156, "step": 5882 }, { "epoch": 0.8197589354141991, "grad_norm": 0.8083436489105225, "learning_rate": 9.02925962956489e-07, "loss": 0.08330631256103516, "step": 5883 }, { "epoch": 0.8198982791054135, "grad_norm": 1.3187085390090942, "learning_rate": 9.015755074057336e-07, "loss": 0.11694908142089844, "step": 5884 }, { "epoch": 0.8200376227966278, "grad_norm": 0.6442153453826904, "learning_rate": 9.002259624313325e-07, "loss": 0.0802755355834961, "step": 5885 }, { "epoch": 0.8201769664878422, "grad_norm": 1.0899540185928345, "learning_rate": 8.98877328333122e-07, "loss": 0.06699275970458984, "step": 5886 }, { "epoch": 0.8203163101790566, "grad_norm": 1.3211896419525146, "learning_rate": 8.975296054107396e-07, "loss": 0.1186065673828125, "step": 5887 }, { "epoch": 0.820455653870271, "grad_norm": 0.6669188737869263, "learning_rate": 8.961827939636198e-07, "loss": 0.07822418212890625, "step": 5888 }, { "epoch": 0.8205949975614854, "grad_norm": 0.5453914403915405, "learning_rate": 8.948368942909891e-07, "loss": 0.078704833984375, "step": 5889 }, { "epoch": 0.8207343412526998, "grad_norm": 0.6285053491592407, "learning_rate": 8.934919066918779e-07, "loss": 0.08691596984863281, "step": 5890 }, { "epoch": 0.8208736849439142, "grad_norm": 0.8647305965423584, "learning_rate": 8.921478314651133e-07, "loss": 0.10145854949951172, "step": 5891 }, { "epoch": 0.8210130286351286, "grad_norm": 0.8742812871932983, "learning_rate": 8.908046689093153e-07, "loss": 0.10553359985351562, "step": 5892 }, { "epoch": 0.821152372326343, "grad_norm": 1.084209680557251, "learning_rate": 8.894624193229051e-07, "loss": 0.12201309204101562, "step": 5893 }, { "epoch": 0.8212917160175573, "grad_norm": 0.5631493330001831, "learning_rate": 8.88121083004102e-07, "loss": 0.07637596130371094, "step": 5894 }, { "epoch": 0.8214310597087717, "grad_norm": 0.9405952095985413, "learning_rate": 8.867806602509177e-07, "loss": 0.11290264129638672, "step": 5895 }, { "epoch": 0.8215704033999861, "grad_norm": 0.825855016708374, "learning_rate": 8.854411513611638e-07, "loss": 0.09877204895019531, "step": 5896 }, { "epoch": 0.8217097470912005, "grad_norm": 1.210314393043518, "learning_rate": 8.841025566324485e-07, "loss": 0.09744453430175781, "step": 5897 }, { "epoch": 0.8218490907824149, "grad_norm": 0.4725269377231598, "learning_rate": 8.827648763621793e-07, "loss": 0.06433773040771484, "step": 5898 }, { "epoch": 0.8219884344736292, "grad_norm": 0.7226461172103882, "learning_rate": 8.814281108475565e-07, "loss": 0.090606689453125, "step": 5899 }, { "epoch": 0.8221277781648436, "grad_norm": 0.5475625395774841, "learning_rate": 8.800922603855772e-07, "loss": 0.09012031555175781, "step": 5900 }, { "epoch": 0.822267121856058, "grad_norm": 0.7391207814216614, "learning_rate": 8.787573252730386e-07, "loss": 0.08535003662109375, "step": 5901 }, { "epoch": 0.8224064655472724, "grad_norm": 1.012999176979065, "learning_rate": 8.774233058065346e-07, "loss": 0.09395790100097656, "step": 5902 }, { "epoch": 0.8225458092384867, "grad_norm": 0.6839848756790161, "learning_rate": 8.760902022824502e-07, "loss": 0.07358741760253906, "step": 5903 }, { "epoch": 0.8226851529297011, "grad_norm": 1.2276843786239624, "learning_rate": 8.747580149969737e-07, "loss": 0.0999603271484375, "step": 5904 }, { "epoch": 0.8228244966209155, "grad_norm": 0.5579260587692261, "learning_rate": 8.734267442460842e-07, "loss": 0.06366920471191406, "step": 5905 }, { "epoch": 0.8229638403121299, "grad_norm": 1.5510286092758179, "learning_rate": 8.720963903255619e-07, "loss": 0.12177276611328125, "step": 5906 }, { "epoch": 0.8231031840033443, "grad_norm": 0.8694924712181091, "learning_rate": 8.707669535309793e-07, "loss": 0.09760475158691406, "step": 5907 }, { "epoch": 0.8232425276945586, "grad_norm": 0.6742076277732849, "learning_rate": 8.694384341577072e-07, "loss": 0.09421920776367188, "step": 5908 }, { "epoch": 0.823381871385773, "grad_norm": 0.5586492419242859, "learning_rate": 8.681108325009141e-07, "loss": 0.07784461975097656, "step": 5909 }, { "epoch": 0.8235212150769874, "grad_norm": 0.6263794302940369, "learning_rate": 8.667841488555617e-07, "loss": 0.08038520812988281, "step": 5910 }, { "epoch": 0.8236605587682018, "grad_norm": 1.3912886381149292, "learning_rate": 8.654583835164066e-07, "loss": 0.1278972625732422, "step": 5911 }, { "epoch": 0.8237999024594161, "grad_norm": 0.451850950717926, "learning_rate": 8.641335367780057e-07, "loss": 0.057610511779785156, "step": 5912 }, { "epoch": 0.8239392461506305, "grad_norm": 0.7306058406829834, "learning_rate": 8.62809608934711e-07, "loss": 0.08572578430175781, "step": 5913 }, { "epoch": 0.8240785898418449, "grad_norm": 0.4847070276737213, "learning_rate": 8.614866002806665e-07, "loss": 0.07353782653808594, "step": 5914 }, { "epoch": 0.8242179335330593, "grad_norm": 1.8877722024917603, "learning_rate": 8.601645111098162e-07, "loss": 0.13709259033203125, "step": 5915 }, { "epoch": 0.8243572772242737, "grad_norm": 1.2911409139633179, "learning_rate": 8.588433417158965e-07, "loss": 0.103515625, "step": 5916 }, { "epoch": 0.824496620915488, "grad_norm": 0.5883973240852356, "learning_rate": 8.575230923924432e-07, "loss": 0.0901641845703125, "step": 5917 }, { "epoch": 0.8246359646067024, "grad_norm": 2.017284631729126, "learning_rate": 8.562037634327836e-07, "loss": 0.1571025848388672, "step": 5918 }, { "epoch": 0.8247753082979168, "grad_norm": 1.4229642152786255, "learning_rate": 8.548853551300429e-07, "loss": 0.1307544708251953, "step": 5919 }, { "epoch": 0.8249146519891312, "grad_norm": 0.9412460327148438, "learning_rate": 8.535678677771441e-07, "loss": 0.09421730041503906, "step": 5920 }, { "epoch": 0.8250539956803455, "grad_norm": 0.622073233127594, "learning_rate": 8.522513016667982e-07, "loss": 0.07194900512695312, "step": 5921 }, { "epoch": 0.8251933393715599, "grad_norm": 0.7012416124343872, "learning_rate": 8.509356570915184e-07, "loss": 0.0845947265625, "step": 5922 }, { "epoch": 0.8253326830627743, "grad_norm": 1.2486193180084229, "learning_rate": 8.496209343436101e-07, "loss": 0.0926971435546875, "step": 5923 }, { "epoch": 0.8254720267539887, "grad_norm": 1.088849425315857, "learning_rate": 8.483071337151777e-07, "loss": 0.08088493347167969, "step": 5924 }, { "epoch": 0.825611370445203, "grad_norm": 0.7630278468132019, "learning_rate": 8.469942554981148e-07, "loss": 0.08248138427734375, "step": 5925 }, { "epoch": 0.8257507141364174, "grad_norm": 0.5637840628623962, "learning_rate": 8.456822999841125e-07, "loss": 0.072021484375, "step": 5926 }, { "epoch": 0.8258900578276318, "grad_norm": 1.2923487424850464, "learning_rate": 8.443712674646598e-07, "loss": 0.11122894287109375, "step": 5927 }, { "epoch": 0.8260294015188462, "grad_norm": 0.7496253252029419, "learning_rate": 8.430611582310355e-07, "loss": 0.08016014099121094, "step": 5928 }, { "epoch": 0.8261687452100606, "grad_norm": 0.9170098304748535, "learning_rate": 8.417519725743173e-07, "loss": 0.11747550964355469, "step": 5929 }, { "epoch": 0.8263080889012749, "grad_norm": 0.7879952788352966, "learning_rate": 8.40443710785378e-07, "loss": 0.09671592712402344, "step": 5930 }, { "epoch": 0.8264474325924894, "grad_norm": 0.8534653782844543, "learning_rate": 8.391363731548813e-07, "loss": 0.0880126953125, "step": 5931 }, { "epoch": 0.8265867762837038, "grad_norm": 0.7035395503044128, "learning_rate": 8.378299599732875e-07, "loss": 0.0828714370727539, "step": 5932 }, { "epoch": 0.8267261199749182, "grad_norm": 0.8639432191848755, "learning_rate": 8.365244715308524e-07, "loss": 0.09805679321289062, "step": 5933 }, { "epoch": 0.8268654636661326, "grad_norm": 0.5343711972236633, "learning_rate": 8.352199081176271e-07, "loss": 0.07077217102050781, "step": 5934 }, { "epoch": 0.8270048073573469, "grad_norm": 1.0096834897994995, "learning_rate": 8.339162700234537e-07, "loss": 0.12130355834960938, "step": 5935 }, { "epoch": 0.8271441510485613, "grad_norm": 0.6872014999389648, "learning_rate": 8.326135575379729e-07, "loss": 0.07730484008789062, "step": 5936 }, { "epoch": 0.8272834947397757, "grad_norm": 0.7351813316345215, "learning_rate": 8.313117709506158e-07, "loss": 0.08788108825683594, "step": 5937 }, { "epoch": 0.8274228384309901, "grad_norm": 0.4087059497833252, "learning_rate": 8.30010910550611e-07, "loss": 0.06576919555664062, "step": 5938 }, { "epoch": 0.8275621821222044, "grad_norm": 0.6794036626815796, "learning_rate": 8.287109766269786e-07, "loss": 0.0851449966430664, "step": 5939 }, { "epoch": 0.8277015258134188, "grad_norm": 0.52797532081604, "learning_rate": 8.274119694685345e-07, "loss": 0.07570457458496094, "step": 5940 }, { "epoch": 0.8278408695046332, "grad_norm": 1.313839077949524, "learning_rate": 8.26113889363891e-07, "loss": 0.11541938781738281, "step": 5941 }, { "epoch": 0.8279802131958476, "grad_norm": 0.5235917568206787, "learning_rate": 8.248167366014493e-07, "loss": 0.08057785034179688, "step": 5942 }, { "epoch": 0.828119556887062, "grad_norm": 0.4672568738460541, "learning_rate": 8.235205114694067e-07, "loss": 0.08168411254882812, "step": 5943 }, { "epoch": 0.8282589005782763, "grad_norm": 0.8194131255149841, "learning_rate": 8.222252142557557e-07, "loss": 0.09563064575195312, "step": 5944 }, { "epoch": 0.8283982442694907, "grad_norm": 0.46748554706573486, "learning_rate": 8.209308452482829e-07, "loss": 0.07430648803710938, "step": 5945 }, { "epoch": 0.8285375879607051, "grad_norm": 0.6669378280639648, "learning_rate": 8.196374047345668e-07, "loss": 0.09601020812988281, "step": 5946 }, { "epoch": 0.8286769316519195, "grad_norm": 1.1405341625213623, "learning_rate": 8.183448930019783e-07, "loss": 0.10434913635253906, "step": 5947 }, { "epoch": 0.8288162753431338, "grad_norm": 0.5683647990226746, "learning_rate": 8.170533103376865e-07, "loss": 0.09298324584960938, "step": 5948 }, { "epoch": 0.8289556190343482, "grad_norm": 0.5350258946418762, "learning_rate": 8.157626570286515e-07, "loss": 0.07248306274414062, "step": 5949 }, { "epoch": 0.8290949627255626, "grad_norm": 0.5297756195068359, "learning_rate": 8.144729333616259e-07, "loss": 0.08176994323730469, "step": 5950 }, { "epoch": 0.829234306416777, "grad_norm": 0.6938969492912292, "learning_rate": 8.131841396231566e-07, "loss": 0.09693050384521484, "step": 5951 }, { "epoch": 0.8293736501079914, "grad_norm": 0.7831054925918579, "learning_rate": 8.118962760995874e-07, "loss": 0.10298633575439453, "step": 5952 }, { "epoch": 0.8295129937992057, "grad_norm": 1.3374009132385254, "learning_rate": 8.106093430770473e-07, "loss": 0.14255142211914062, "step": 5953 }, { "epoch": 0.8296523374904201, "grad_norm": 0.5217306017875671, "learning_rate": 8.093233408414658e-07, "loss": 0.07215499877929688, "step": 5954 }, { "epoch": 0.8297916811816345, "grad_norm": 0.6482180953025818, "learning_rate": 8.080382696785627e-07, "loss": 0.0829458236694336, "step": 5955 }, { "epoch": 0.8299310248728489, "grad_norm": 0.42920535802841187, "learning_rate": 8.067541298738535e-07, "loss": 0.06401348114013672, "step": 5956 }, { "epoch": 0.8300703685640632, "grad_norm": 1.060102105140686, "learning_rate": 8.054709217126433e-07, "loss": 0.10470771789550781, "step": 5957 }, { "epoch": 0.8302097122552776, "grad_norm": 0.4272494912147522, "learning_rate": 8.041886454800307e-07, "loss": 0.06807899475097656, "step": 5958 }, { "epoch": 0.830349055946492, "grad_norm": 1.6512513160705566, "learning_rate": 8.029073014609096e-07, "loss": 0.11665725708007812, "step": 5959 }, { "epoch": 0.8304883996377064, "grad_norm": 1.6971572637557983, "learning_rate": 8.016268899399643e-07, "loss": 0.1271495819091797, "step": 5960 }, { "epoch": 0.8306277433289208, "grad_norm": 0.38874468207359314, "learning_rate": 8.00347411201673e-07, "loss": 0.06404495239257812, "step": 5961 }, { "epoch": 0.8307670870201351, "grad_norm": 0.4108019769191742, "learning_rate": 7.990688655303086e-07, "loss": 0.06763172149658203, "step": 5962 }, { "epoch": 0.8309064307113495, "grad_norm": 0.5766010284423828, "learning_rate": 7.977912532099336e-07, "loss": 0.09728050231933594, "step": 5963 }, { "epoch": 0.8310457744025639, "grad_norm": 1.0497311353683472, "learning_rate": 7.965145745244029e-07, "loss": 0.09139823913574219, "step": 5964 }, { "epoch": 0.8311851180937783, "grad_norm": 0.7246831655502319, "learning_rate": 7.95238829757366e-07, "loss": 0.09316253662109375, "step": 5965 }, { "epoch": 0.8313244617849926, "grad_norm": 0.8580150604248047, "learning_rate": 7.939640191922665e-07, "loss": 0.07923698425292969, "step": 5966 }, { "epoch": 0.831463805476207, "grad_norm": 0.7249826788902283, "learning_rate": 7.926901431123362e-07, "loss": 0.1143951416015625, "step": 5967 }, { "epoch": 0.8316031491674214, "grad_norm": 0.8400806784629822, "learning_rate": 7.914172018006006e-07, "loss": 0.10101795196533203, "step": 5968 }, { "epoch": 0.8317424928586358, "grad_norm": 0.8880516886711121, "learning_rate": 7.901451955398792e-07, "loss": 0.09539604187011719, "step": 5969 }, { "epoch": 0.8318818365498502, "grad_norm": 0.6090980768203735, "learning_rate": 7.88874124612784e-07, "loss": 0.07312202453613281, "step": 5970 }, { "epoch": 0.8320211802410646, "grad_norm": 2.254594326019287, "learning_rate": 7.876039893017151e-07, "loss": 0.12640953063964844, "step": 5971 }, { "epoch": 0.832160523932279, "grad_norm": 1.4101539850234985, "learning_rate": 7.863347898888696e-07, "loss": 0.15080642700195312, "step": 5972 }, { "epoch": 0.8322998676234934, "grad_norm": 0.7239387631416321, "learning_rate": 7.850665266562352e-07, "loss": 0.08872413635253906, "step": 5973 }, { "epoch": 0.8324392113147078, "grad_norm": 0.545496940612793, "learning_rate": 7.837991998855899e-07, "loss": 0.07124710083007812, "step": 5974 }, { "epoch": 0.8325785550059221, "grad_norm": 1.0769840478897095, "learning_rate": 7.825328098585039e-07, "loss": 0.14737701416015625, "step": 5975 }, { "epoch": 0.8327178986971365, "grad_norm": 1.3100831508636475, "learning_rate": 7.812673568563406e-07, "loss": 0.11822128295898438, "step": 5976 }, { "epoch": 0.8328572423883509, "grad_norm": 0.9652345180511475, "learning_rate": 7.800028411602572e-07, "loss": 0.13260269165039062, "step": 5977 }, { "epoch": 0.8329965860795653, "grad_norm": 1.194395661354065, "learning_rate": 7.78739263051198e-07, "loss": 0.09244918823242188, "step": 5978 }, { "epoch": 0.8331359297707797, "grad_norm": 0.8737658262252808, "learning_rate": 7.774766228099001e-07, "loss": 0.1043996810913086, "step": 5979 }, { "epoch": 0.833275273461994, "grad_norm": 0.8058817386627197, "learning_rate": 7.762149207168951e-07, "loss": 0.08231830596923828, "step": 5980 }, { "epoch": 0.8334146171532084, "grad_norm": 0.7293906211853027, "learning_rate": 7.749541570525054e-07, "loss": 0.07701873779296875, "step": 5981 }, { "epoch": 0.8335539608444228, "grad_norm": 0.5983412861824036, "learning_rate": 7.736943320968409e-07, "loss": 0.07059669494628906, "step": 5982 }, { "epoch": 0.8336933045356372, "grad_norm": 0.4862145781517029, "learning_rate": 7.724354461298089e-07, "loss": 0.07348823547363281, "step": 5983 }, { "epoch": 0.8338326482268515, "grad_norm": 0.3180941641330719, "learning_rate": 7.711774994311027e-07, "loss": 0.05599021911621094, "step": 5984 }, { "epoch": 0.8339719919180659, "grad_norm": 1.3156936168670654, "learning_rate": 7.699204922802123e-07, "loss": 0.0914926528930664, "step": 5985 }, { "epoch": 0.8341113356092803, "grad_norm": 2.0563442707061768, "learning_rate": 7.686644249564124e-07, "loss": 0.10832405090332031, "step": 5986 }, { "epoch": 0.8342506793004947, "grad_norm": 0.48877450823783875, "learning_rate": 7.674092977387737e-07, "loss": 0.08252525329589844, "step": 5987 }, { "epoch": 0.8343900229917091, "grad_norm": 0.8815303444862366, "learning_rate": 7.661551109061593e-07, "loss": 0.11313819885253906, "step": 5988 }, { "epoch": 0.8345293666829234, "grad_norm": 0.5779299139976501, "learning_rate": 7.649018647372186e-07, "loss": 0.06113433837890625, "step": 5989 }, { "epoch": 0.8346687103741378, "grad_norm": 0.853289008140564, "learning_rate": 7.636495595103938e-07, "loss": 0.08983993530273438, "step": 5990 }, { "epoch": 0.8348080540653522, "grad_norm": 0.9220898747444153, "learning_rate": 7.6239819550392e-07, "loss": 0.09478044509887695, "step": 5991 }, { "epoch": 0.8349473977565666, "grad_norm": 0.5510091781616211, "learning_rate": 7.611477729958205e-07, "loss": 0.06972503662109375, "step": 5992 }, { "epoch": 0.835086741447781, "grad_norm": 0.6637252569198608, "learning_rate": 7.598982922639109e-07, "loss": 0.07622528076171875, "step": 5993 }, { "epoch": 0.8352260851389953, "grad_norm": 0.6589205265045166, "learning_rate": 7.586497535857984e-07, "loss": 0.07707595825195312, "step": 5994 }, { "epoch": 0.8353654288302097, "grad_norm": 1.01239812374115, "learning_rate": 7.574021572388795e-07, "loss": 0.11328125, "step": 5995 }, { "epoch": 0.8355047725214241, "grad_norm": 0.592911958694458, "learning_rate": 7.561555035003398e-07, "loss": 0.06910324096679688, "step": 5996 }, { "epoch": 0.8356441162126385, "grad_norm": 1.008838176727295, "learning_rate": 7.549097926471583e-07, "loss": 0.08479881286621094, "step": 5997 }, { "epoch": 0.8357834599038528, "grad_norm": 0.40979522466659546, "learning_rate": 7.536650249561056e-07, "loss": 0.07294082641601562, "step": 5998 }, { "epoch": 0.8359228035950672, "grad_norm": 0.9438984990119934, "learning_rate": 7.524212007037385e-07, "loss": 0.1253032684326172, "step": 5999 }, { "epoch": 0.8360621472862816, "grad_norm": 1.4067397117614746, "learning_rate": 7.511783201664053e-07, "loss": 0.11650943756103516, "step": 6000 }, { "epoch": 0.836201490977496, "grad_norm": 0.7620235681533813, "learning_rate": 7.499363836202472e-07, "loss": 0.07837772369384766, "step": 6001 }, { "epoch": 0.8363408346687103, "grad_norm": 0.6788490414619446, "learning_rate": 7.486953913411954e-07, "loss": 0.08060836791992188, "step": 6002 }, { "epoch": 0.8364801783599247, "grad_norm": 0.5051229596138, "learning_rate": 7.474553436049675e-07, "loss": 0.08034324645996094, "step": 6003 }, { "epoch": 0.8366195220511391, "grad_norm": 0.696119487285614, "learning_rate": 7.462162406870766e-07, "loss": 0.09325408935546875, "step": 6004 }, { "epoch": 0.8367588657423535, "grad_norm": 0.8789159059524536, "learning_rate": 7.4497808286282e-07, "loss": 0.12119102478027344, "step": 6005 }, { "epoch": 0.8368982094335679, "grad_norm": 0.7863568067550659, "learning_rate": 7.437408704072907e-07, "loss": 0.09052658081054688, "step": 6006 }, { "epoch": 0.8370375531247822, "grad_norm": 0.6361302137374878, "learning_rate": 7.425046035953665e-07, "loss": 0.08339977264404297, "step": 6007 }, { "epoch": 0.8371768968159966, "grad_norm": 1.1601306200027466, "learning_rate": 7.412692827017193e-07, "loss": 0.10281753540039062, "step": 6008 }, { "epoch": 0.837316240507211, "grad_norm": 0.7933894395828247, "learning_rate": 7.400349080008107e-07, "loss": 0.09631919860839844, "step": 6009 }, { "epoch": 0.8374555841984254, "grad_norm": 0.4844433069229126, "learning_rate": 7.38801479766888e-07, "loss": 0.07900428771972656, "step": 6010 }, { "epoch": 0.8375949278896399, "grad_norm": 0.7171069383621216, "learning_rate": 7.375689982739915e-07, "loss": 0.07839202880859375, "step": 6011 }, { "epoch": 0.8377342715808542, "grad_norm": 0.6165214776992798, "learning_rate": 7.363374637959498e-07, "loss": 0.07419681549072266, "step": 6012 }, { "epoch": 0.8378736152720686, "grad_norm": 1.0096794366836548, "learning_rate": 7.35106876606384e-07, "loss": 0.10971832275390625, "step": 6013 }, { "epoch": 0.838012958963283, "grad_norm": 0.5015798211097717, "learning_rate": 7.338772369787001e-07, "loss": 0.07140922546386719, "step": 6014 }, { "epoch": 0.8381523026544974, "grad_norm": 0.9639085531234741, "learning_rate": 7.326485451860976e-07, "loss": 0.09650516510009766, "step": 6015 }, { "epoch": 0.8382916463457117, "grad_norm": 0.9024426341056824, "learning_rate": 7.314208015015623e-07, "loss": 0.07818031311035156, "step": 6016 }, { "epoch": 0.8384309900369261, "grad_norm": 1.0425562858581543, "learning_rate": 7.301940061978724e-07, "loss": 0.109832763671875, "step": 6017 }, { "epoch": 0.8385703337281405, "grad_norm": 0.6138182878494263, "learning_rate": 7.289681595475922e-07, "loss": 0.07306098937988281, "step": 6018 }, { "epoch": 0.8387096774193549, "grad_norm": 1.0262609720230103, "learning_rate": 7.277432618230773e-07, "loss": 0.11241531372070312, "step": 6019 }, { "epoch": 0.8388490211105692, "grad_norm": 0.5177237391471863, "learning_rate": 7.265193132964749e-07, "loss": 0.07703208923339844, "step": 6020 }, { "epoch": 0.8389883648017836, "grad_norm": 0.5101404190063477, "learning_rate": 7.252963142397134e-07, "loss": 0.08329391479492188, "step": 6021 }, { "epoch": 0.839127708492998, "grad_norm": 0.7880303263664246, "learning_rate": 7.24074264924518e-07, "loss": 0.08083629608154297, "step": 6022 }, { "epoch": 0.8392670521842124, "grad_norm": 1.1341519355773926, "learning_rate": 7.228531656223997e-07, "loss": 0.1307048797607422, "step": 6023 }, { "epoch": 0.8394063958754268, "grad_norm": 1.0395731925964355, "learning_rate": 7.216330166046603e-07, "loss": 0.09884834289550781, "step": 6024 }, { "epoch": 0.8395457395666411, "grad_norm": 0.43177756667137146, "learning_rate": 7.204138181423881e-07, "loss": 0.06987190246582031, "step": 6025 }, { "epoch": 0.8396850832578555, "grad_norm": 0.7436912655830383, "learning_rate": 7.191955705064591e-07, "loss": 0.09747838973999023, "step": 6026 }, { "epoch": 0.8398244269490699, "grad_norm": 0.5724114775657654, "learning_rate": 7.179782739675434e-07, "loss": 0.07247734069824219, "step": 6027 }, { "epoch": 0.8399637706402843, "grad_norm": 0.8331990838050842, "learning_rate": 7.167619287960942e-07, "loss": 0.09394645690917969, "step": 6028 }, { "epoch": 0.8401031143314986, "grad_norm": 0.8449060320854187, "learning_rate": 7.155465352623559e-07, "loss": 0.08942127227783203, "step": 6029 }, { "epoch": 0.840242458022713, "grad_norm": 0.8055474758148193, "learning_rate": 7.143320936363629e-07, "loss": 0.09640693664550781, "step": 6030 }, { "epoch": 0.8403818017139274, "grad_norm": 0.9252771139144897, "learning_rate": 7.131186041879357e-07, "loss": 0.08651351928710938, "step": 6031 }, { "epoch": 0.8405211454051418, "grad_norm": 1.507381796836853, "learning_rate": 7.119060671866817e-07, "loss": 0.1283111572265625, "step": 6032 }, { "epoch": 0.8406604890963562, "grad_norm": 0.6914828419685364, "learning_rate": 7.106944829020013e-07, "loss": 0.08956718444824219, "step": 6033 }, { "epoch": 0.8407998327875705, "grad_norm": 0.7559006810188293, "learning_rate": 7.094838516030811e-07, "loss": 0.09940338134765625, "step": 6034 }, { "epoch": 0.8409391764787849, "grad_norm": 1.5120861530303955, "learning_rate": 7.082741735588938e-07, "loss": 0.11804008483886719, "step": 6035 }, { "epoch": 0.8410785201699993, "grad_norm": 0.8720502853393555, "learning_rate": 7.070654490382045e-07, "loss": 0.06872367858886719, "step": 6036 }, { "epoch": 0.8412178638612137, "grad_norm": 1.441402792930603, "learning_rate": 7.058576783095622e-07, "loss": 0.09597969055175781, "step": 6037 }, { "epoch": 0.841357207552428, "grad_norm": 0.8516719937324524, "learning_rate": 7.046508616413078e-07, "loss": 0.07381248474121094, "step": 6038 }, { "epoch": 0.8414965512436424, "grad_norm": 0.8876279592514038, "learning_rate": 7.034449993015663e-07, "loss": 0.09537315368652344, "step": 6039 }, { "epoch": 0.8416358949348568, "grad_norm": 0.9215595722198486, "learning_rate": 7.022400915582539e-07, "loss": 0.09324073791503906, "step": 6040 }, { "epoch": 0.8417752386260712, "grad_norm": 2.53952956199646, "learning_rate": 7.010361386790748e-07, "loss": 0.13573455810546875, "step": 6041 }, { "epoch": 0.8419145823172856, "grad_norm": 0.6488248705863953, "learning_rate": 6.998331409315184e-07, "loss": 0.087188720703125, "step": 6042 }, { "epoch": 0.8420539260084999, "grad_norm": 1.1280287504196167, "learning_rate": 6.986310985828626e-07, "loss": 0.10068321228027344, "step": 6043 }, { "epoch": 0.8421932696997143, "grad_norm": 0.529554545879364, "learning_rate": 6.974300119001754e-07, "loss": 0.08011245727539062, "step": 6044 }, { "epoch": 0.8423326133909287, "grad_norm": 1.2853288650512695, "learning_rate": 6.962298811503104e-07, "loss": 0.09914779663085938, "step": 6045 }, { "epoch": 0.8424719570821431, "grad_norm": 0.42886051535606384, "learning_rate": 6.950307065999085e-07, "loss": 0.06459999084472656, "step": 6046 }, { "epoch": 0.8426113007733574, "grad_norm": 0.8569791913032532, "learning_rate": 6.938324885154007e-07, "loss": 0.08578205108642578, "step": 6047 }, { "epoch": 0.8427506444645718, "grad_norm": 0.5469284653663635, "learning_rate": 6.92635227163001e-07, "loss": 0.08167171478271484, "step": 6048 }, { "epoch": 0.8428899881557862, "grad_norm": 0.9510036110877991, "learning_rate": 6.914389228087165e-07, "loss": 0.11950397491455078, "step": 6049 }, { "epoch": 0.8430293318470006, "grad_norm": 0.7516045570373535, "learning_rate": 6.902435757183357e-07, "loss": 0.07744598388671875, "step": 6050 }, { "epoch": 0.8431686755382151, "grad_norm": 1.189374566078186, "learning_rate": 6.890491861574389e-07, "loss": 0.09424591064453125, "step": 6051 }, { "epoch": 0.8433080192294294, "grad_norm": 1.06661057472229, "learning_rate": 6.87855754391395e-07, "loss": 0.12686729431152344, "step": 6052 }, { "epoch": 0.8434473629206438, "grad_norm": 1.7350358963012695, "learning_rate": 6.866632806853518e-07, "loss": 0.1143045425415039, "step": 6053 }, { "epoch": 0.8435867066118582, "grad_norm": 0.4206884503364563, "learning_rate": 6.854717653042531e-07, "loss": 0.06799507141113281, "step": 6054 }, { "epoch": 0.8437260503030726, "grad_norm": 0.9209513068199158, "learning_rate": 6.842812085128253e-07, "loss": 0.1159210205078125, "step": 6055 }, { "epoch": 0.843865393994287, "grad_norm": 0.6383757591247559, "learning_rate": 6.830916105755847e-07, "loss": 0.09222412109375, "step": 6056 }, { "epoch": 0.8440047376855013, "grad_norm": 0.5622556209564209, "learning_rate": 6.819029717568315e-07, "loss": 0.087982177734375, "step": 6057 }, { "epoch": 0.8441440813767157, "grad_norm": 0.6672676205635071, "learning_rate": 6.807152923206528e-07, "loss": 0.09094429016113281, "step": 6058 }, { "epoch": 0.8442834250679301, "grad_norm": 1.1119014024734497, "learning_rate": 6.795285725309269e-07, "loss": 0.1234292984008789, "step": 6059 }, { "epoch": 0.8444227687591445, "grad_norm": 0.954748809337616, "learning_rate": 6.783428126513125e-07, "loss": 0.13370132446289062, "step": 6060 }, { "epoch": 0.8445621124503588, "grad_norm": 0.8491211533546448, "learning_rate": 6.771580129452604e-07, "loss": 0.091888427734375, "step": 6061 }, { "epoch": 0.8447014561415732, "grad_norm": 1.1031070947647095, "learning_rate": 6.759741736760062e-07, "loss": 0.13112640380859375, "step": 6062 }, { "epoch": 0.8448407998327876, "grad_norm": 0.7429043650627136, "learning_rate": 6.747912951065722e-07, "loss": 0.08692264556884766, "step": 6063 }, { "epoch": 0.844980143524002, "grad_norm": 0.5585903525352478, "learning_rate": 6.736093774997643e-07, "loss": 0.07184791564941406, "step": 6064 }, { "epoch": 0.8451194872152163, "grad_norm": 0.9555863738059998, "learning_rate": 6.724284211181803e-07, "loss": 0.09183979034423828, "step": 6065 }, { "epoch": 0.8452588309064307, "grad_norm": 0.4840509593486786, "learning_rate": 6.712484262242014e-07, "loss": 0.07025718688964844, "step": 6066 }, { "epoch": 0.8453981745976451, "grad_norm": 0.699157178401947, "learning_rate": 6.700693930799945e-07, "loss": 0.08272743225097656, "step": 6067 }, { "epoch": 0.8455375182888595, "grad_norm": 0.3881455361843109, "learning_rate": 6.688913219475158e-07, "loss": 0.06767845153808594, "step": 6068 }, { "epoch": 0.8456768619800739, "grad_norm": 1.012094497680664, "learning_rate": 6.677142130885028e-07, "loss": 0.09470176696777344, "step": 6069 }, { "epoch": 0.8458162056712882, "grad_norm": 0.3971185088157654, "learning_rate": 6.665380667644849e-07, "loss": 0.0699920654296875, "step": 6070 }, { "epoch": 0.8459555493625026, "grad_norm": 0.45286110043525696, "learning_rate": 6.653628832367731e-07, "loss": 0.06939506530761719, "step": 6071 }, { "epoch": 0.846094893053717, "grad_norm": 0.4697556495666504, "learning_rate": 6.641886627664673e-07, "loss": 0.06167411804199219, "step": 6072 }, { "epoch": 0.8462342367449314, "grad_norm": 0.4768863618373871, "learning_rate": 6.630154056144533e-07, "loss": 0.07806587219238281, "step": 6073 }, { "epoch": 0.8463735804361457, "grad_norm": 1.207564353942871, "learning_rate": 6.618431120414015e-07, "loss": 0.11859703063964844, "step": 6074 }, { "epoch": 0.8465129241273601, "grad_norm": 0.9239991903305054, "learning_rate": 6.606717823077669e-07, "loss": 0.07804489135742188, "step": 6075 }, { "epoch": 0.8466522678185745, "grad_norm": 1.2237892150878906, "learning_rate": 6.59501416673794e-07, "loss": 0.10213661193847656, "step": 6076 }, { "epoch": 0.8467916115097889, "grad_norm": 0.8032836318016052, "learning_rate": 6.583320153995121e-07, "loss": 0.09523391723632812, "step": 6077 }, { "epoch": 0.8469309552010033, "grad_norm": 1.2579509019851685, "learning_rate": 6.571635787447339e-07, "loss": 0.12167739868164062, "step": 6078 }, { "epoch": 0.8470702988922176, "grad_norm": 0.8611698150634766, "learning_rate": 6.559961069690596e-07, "loss": 0.11035537719726562, "step": 6079 }, { "epoch": 0.847209642583432, "grad_norm": 1.482499599456787, "learning_rate": 6.548296003318744e-07, "loss": 0.13419151306152344, "step": 6080 }, { "epoch": 0.8473489862746464, "grad_norm": 1.0080907344818115, "learning_rate": 6.536640590923515e-07, "loss": 0.09243965148925781, "step": 6081 }, { "epoch": 0.8474883299658608, "grad_norm": 1.2477585077285767, "learning_rate": 6.52499483509445e-07, "loss": 0.11320686340332031, "step": 6082 }, { "epoch": 0.8476276736570751, "grad_norm": 0.6195218563079834, "learning_rate": 6.51335873841899e-07, "loss": 0.09008407592773438, "step": 6083 }, { "epoch": 0.8477670173482895, "grad_norm": 0.7885045409202576, "learning_rate": 6.501732303482394e-07, "loss": 0.11474609375, "step": 6084 }, { "epoch": 0.8479063610395039, "grad_norm": 0.8079976439476013, "learning_rate": 6.490115532867808e-07, "loss": 0.08643150329589844, "step": 6085 }, { "epoch": 0.8480457047307183, "grad_norm": 1.0306316614151, "learning_rate": 6.478508429156189e-07, "loss": 0.09366512298583984, "step": 6086 }, { "epoch": 0.8481850484219327, "grad_norm": 0.48611870408058167, "learning_rate": 6.466910994926384e-07, "loss": 0.070526123046875, "step": 6087 }, { "epoch": 0.848324392113147, "grad_norm": 0.8410348296165466, "learning_rate": 6.455323232755095e-07, "loss": 0.07555770874023438, "step": 6088 }, { "epoch": 0.8484637358043614, "grad_norm": 0.6755993366241455, "learning_rate": 6.44374514521684e-07, "loss": 0.086212158203125, "step": 6089 }, { "epoch": 0.8486030794955758, "grad_norm": 0.6770431399345398, "learning_rate": 6.432176734883994e-07, "loss": 0.07610130310058594, "step": 6090 }, { "epoch": 0.8487424231867903, "grad_norm": 0.44006675481796265, "learning_rate": 6.420618004326818e-07, "loss": 0.08167743682861328, "step": 6091 }, { "epoch": 0.8488817668780047, "grad_norm": 0.8163611888885498, "learning_rate": 6.409068956113379e-07, "loss": 0.0824594497680664, "step": 6092 }, { "epoch": 0.849021110569219, "grad_norm": 0.864422619342804, "learning_rate": 6.397529592809615e-07, "loss": 0.10586166381835938, "step": 6093 }, { "epoch": 0.8491604542604334, "grad_norm": 1.0354382991790771, "learning_rate": 6.38599991697933e-07, "loss": 0.1152048110961914, "step": 6094 }, { "epoch": 0.8492997979516478, "grad_norm": 0.48516154289245605, "learning_rate": 6.374479931184141e-07, "loss": 0.07487010955810547, "step": 6095 }, { "epoch": 0.8494391416428622, "grad_norm": 0.820529580116272, "learning_rate": 6.362969637983507e-07, "loss": 0.11693191528320312, "step": 6096 }, { "epoch": 0.8495784853340765, "grad_norm": 0.6210238337516785, "learning_rate": 6.351469039934771e-07, "loss": 0.09444999694824219, "step": 6097 }, { "epoch": 0.8497178290252909, "grad_norm": 1.038676142692566, "learning_rate": 6.339978139593117e-07, "loss": 0.12800216674804688, "step": 6098 }, { "epoch": 0.8498571727165053, "grad_norm": 0.7168063521385193, "learning_rate": 6.328496939511541e-07, "loss": 0.10297393798828125, "step": 6099 }, { "epoch": 0.8499965164077197, "grad_norm": 0.9943262934684753, "learning_rate": 6.317025442240893e-07, "loss": 0.09366607666015625, "step": 6100 }, { "epoch": 0.850135860098934, "grad_norm": 0.9153313040733337, "learning_rate": 6.305563650329899e-07, "loss": 0.09396934509277344, "step": 6101 }, { "epoch": 0.8502752037901484, "grad_norm": 0.6978869438171387, "learning_rate": 6.294111566325106e-07, "loss": 0.07348442077636719, "step": 6102 }, { "epoch": 0.8504145474813628, "grad_norm": 1.0674656629562378, "learning_rate": 6.282669192770896e-07, "loss": 0.09833717346191406, "step": 6103 }, { "epoch": 0.8505538911725772, "grad_norm": 0.8541334271430969, "learning_rate": 6.271236532209502e-07, "loss": 0.08901405334472656, "step": 6104 }, { "epoch": 0.8506932348637916, "grad_norm": 1.0674567222595215, "learning_rate": 6.259813587181024e-07, "loss": 0.12608718872070312, "step": 6105 }, { "epoch": 0.8508325785550059, "grad_norm": 1.0019248723983765, "learning_rate": 6.248400360223355e-07, "loss": 0.08820915222167969, "step": 6106 }, { "epoch": 0.8509719222462203, "grad_norm": 0.5409902334213257, "learning_rate": 6.236996853872251e-07, "loss": 0.08069419860839844, "step": 6107 }, { "epoch": 0.8511112659374347, "grad_norm": 0.9371684193611145, "learning_rate": 6.225603070661318e-07, "loss": 0.10022354125976562, "step": 6108 }, { "epoch": 0.8512506096286491, "grad_norm": 0.8353139758110046, "learning_rate": 6.214219013122008e-07, "loss": 0.09275245666503906, "step": 6109 }, { "epoch": 0.8513899533198634, "grad_norm": 0.5845342874526978, "learning_rate": 6.202844683783587e-07, "loss": 0.08056259155273438, "step": 6110 }, { "epoch": 0.8515292970110778, "grad_norm": 0.9624623656272888, "learning_rate": 6.191480085173163e-07, "loss": 0.10636138916015625, "step": 6111 }, { "epoch": 0.8516686407022922, "grad_norm": 0.46140286326408386, "learning_rate": 6.180125219815697e-07, "loss": 0.06258392333984375, "step": 6112 }, { "epoch": 0.8518079843935066, "grad_norm": 1.1368882656097412, "learning_rate": 6.168780090233994e-07, "loss": 0.08879470825195312, "step": 6113 }, { "epoch": 0.851947328084721, "grad_norm": 1.424544095993042, "learning_rate": 6.157444698948656e-07, "loss": 0.1110076904296875, "step": 6114 }, { "epoch": 0.8520866717759353, "grad_norm": 0.870483934879303, "learning_rate": 6.146119048478177e-07, "loss": 0.11237716674804688, "step": 6115 }, { "epoch": 0.8522260154671497, "grad_norm": 0.9254414439201355, "learning_rate": 6.134803141338835e-07, "loss": 0.12775230407714844, "step": 6116 }, { "epoch": 0.8523653591583641, "grad_norm": 0.9763825535774231, "learning_rate": 6.123496980044785e-07, "loss": 0.09919548034667969, "step": 6117 }, { "epoch": 0.8525047028495785, "grad_norm": 1.292262077331543, "learning_rate": 6.112200567107978e-07, "loss": 0.10840606689453125, "step": 6118 }, { "epoch": 0.8526440465407928, "grad_norm": 0.33534669876098633, "learning_rate": 6.10091390503823e-07, "loss": 0.0643463134765625, "step": 6119 }, { "epoch": 0.8527833902320072, "grad_norm": 0.6288948655128479, "learning_rate": 6.089636996343202e-07, "loss": 0.08033370971679688, "step": 6120 }, { "epoch": 0.8529227339232216, "grad_norm": 1.0315539836883545, "learning_rate": 6.07836984352832e-07, "loss": 0.10676383972167969, "step": 6121 }, { "epoch": 0.853062077614436, "grad_norm": 0.7361795902252197, "learning_rate": 6.067112449096907e-07, "loss": 0.09422588348388672, "step": 6122 }, { "epoch": 0.8532014213056504, "grad_norm": 0.9963939785957336, "learning_rate": 6.055864815550106e-07, "loss": 0.09387779235839844, "step": 6123 }, { "epoch": 0.8533407649968647, "grad_norm": 0.7397588491439819, "learning_rate": 6.044626945386894e-07, "loss": 0.08500480651855469, "step": 6124 }, { "epoch": 0.8534801086880791, "grad_norm": 0.3032810688018799, "learning_rate": 6.033398841104043e-07, "loss": 0.053859710693359375, "step": 6125 }, { "epoch": 0.8536194523792935, "grad_norm": 0.528245210647583, "learning_rate": 6.022180505196207e-07, "loss": 0.07789897918701172, "step": 6126 }, { "epoch": 0.8537587960705079, "grad_norm": 0.7069056630134583, "learning_rate": 6.01097194015583e-07, "loss": 0.09374618530273438, "step": 6127 }, { "epoch": 0.8538981397617222, "grad_norm": 0.8864181041717529, "learning_rate": 5.999773148473193e-07, "loss": 0.09430694580078125, "step": 6128 }, { "epoch": 0.8540374834529366, "grad_norm": 0.7259133458137512, "learning_rate": 5.988584132636421e-07, "loss": 0.08091115951538086, "step": 6129 }, { "epoch": 0.854176827144151, "grad_norm": 1.7395867109298706, "learning_rate": 5.977404895131467e-07, "loss": 0.17102718353271484, "step": 6130 }, { "epoch": 0.8543161708353654, "grad_norm": 0.43871569633483887, "learning_rate": 5.966235438442086e-07, "loss": 0.07599449157714844, "step": 6131 }, { "epoch": 0.8544555145265799, "grad_norm": 1.1962307691574097, "learning_rate": 5.955075765049878e-07, "loss": 0.09991836547851562, "step": 6132 }, { "epoch": 0.8545948582177942, "grad_norm": 1.1440359354019165, "learning_rate": 5.943925877434276e-07, "loss": 0.104248046875, "step": 6133 }, { "epoch": 0.8547342019090086, "grad_norm": 0.9505505561828613, "learning_rate": 5.932785778072531e-07, "loss": 0.08531570434570312, "step": 6134 }, { "epoch": 0.854873545600223, "grad_norm": 0.9083396792411804, "learning_rate": 5.921655469439708e-07, "loss": 0.11999893188476562, "step": 6135 }, { "epoch": 0.8550128892914374, "grad_norm": 0.918753445148468, "learning_rate": 5.910534954008718e-07, "loss": 0.09228897094726562, "step": 6136 }, { "epoch": 0.8551522329826517, "grad_norm": 0.8325721025466919, "learning_rate": 5.899424234250278e-07, "loss": 0.10328197479248047, "step": 6137 }, { "epoch": 0.8552915766738661, "grad_norm": 0.5663749575614929, "learning_rate": 5.888323312632948e-07, "loss": 0.08249378204345703, "step": 6138 }, { "epoch": 0.8554309203650805, "grad_norm": 1.1628235578536987, "learning_rate": 5.877232191623078e-07, "loss": 0.08520221710205078, "step": 6139 }, { "epoch": 0.8555702640562949, "grad_norm": 0.9665324091911316, "learning_rate": 5.866150873684878e-07, "loss": 0.11302375793457031, "step": 6140 }, { "epoch": 0.8557096077475093, "grad_norm": 1.7994024753570557, "learning_rate": 5.855079361280374e-07, "loss": 0.1278066635131836, "step": 6141 }, { "epoch": 0.8558489514387236, "grad_norm": 0.8804750442504883, "learning_rate": 5.844017656869389e-07, "loss": 0.09401702880859375, "step": 6142 }, { "epoch": 0.855988295129938, "grad_norm": 0.3748350441455841, "learning_rate": 5.83296576290957e-07, "loss": 0.05793571472167969, "step": 6143 }, { "epoch": 0.8561276388211524, "grad_norm": 1.034155249595642, "learning_rate": 5.821923681856406e-07, "loss": 0.10022354125976562, "step": 6144 }, { "epoch": 0.8562669825123668, "grad_norm": 0.4914131164550781, "learning_rate": 5.810891416163211e-07, "loss": 0.07281875610351562, "step": 6145 }, { "epoch": 0.8564063262035811, "grad_norm": 1.1822130680084229, "learning_rate": 5.799868968281075e-07, "loss": 0.12749290466308594, "step": 6146 }, { "epoch": 0.8565456698947955, "grad_norm": 0.7327971458435059, "learning_rate": 5.788856340658966e-07, "loss": 0.08629035949707031, "step": 6147 }, { "epoch": 0.8566850135860099, "grad_norm": 0.656723141670227, "learning_rate": 5.777853535743605e-07, "loss": 0.06343841552734375, "step": 6148 }, { "epoch": 0.8568243572772243, "grad_norm": 0.458786278963089, "learning_rate": 5.766860555979586e-07, "loss": 0.06713294982910156, "step": 6149 }, { "epoch": 0.8569637009684387, "grad_norm": 1.1511142253875732, "learning_rate": 5.755877403809284e-07, "loss": 0.10737228393554688, "step": 6150 }, { "epoch": 0.857103044659653, "grad_norm": 0.39058902859687805, "learning_rate": 5.744904081672914e-07, "loss": 0.06744575500488281, "step": 6151 }, { "epoch": 0.8572423883508674, "grad_norm": 0.5016586780548096, "learning_rate": 5.733940592008519e-07, "loss": 0.06527900695800781, "step": 6152 }, { "epoch": 0.8573817320420818, "grad_norm": 0.5902200937271118, "learning_rate": 5.72298693725189e-07, "loss": 0.08379554748535156, "step": 6153 }, { "epoch": 0.8575210757332962, "grad_norm": 0.563361406326294, "learning_rate": 5.712043119836702e-07, "loss": 0.06975364685058594, "step": 6154 }, { "epoch": 0.8576604194245105, "grad_norm": 1.1875340938568115, "learning_rate": 5.701109142194422e-07, "loss": 0.128509521484375, "step": 6155 }, { "epoch": 0.8577997631157249, "grad_norm": 0.6928291916847229, "learning_rate": 5.69018500675434e-07, "loss": 0.08121109008789062, "step": 6156 }, { "epoch": 0.8579391068069393, "grad_norm": 0.7882460951805115, "learning_rate": 5.679270715943535e-07, "loss": 0.07488441467285156, "step": 6157 }, { "epoch": 0.8580784504981537, "grad_norm": 0.9474771022796631, "learning_rate": 5.668366272186915e-07, "loss": 0.09579658508300781, "step": 6158 }, { "epoch": 0.858217794189368, "grad_norm": 0.32353708148002625, "learning_rate": 5.657471677907205e-07, "loss": 0.05804252624511719, "step": 6159 }, { "epoch": 0.8583571378805824, "grad_norm": 1.3547574281692505, "learning_rate": 5.646586935524922e-07, "loss": 0.0693206787109375, "step": 6160 }, { "epoch": 0.8584964815717968, "grad_norm": 2.379732131958008, "learning_rate": 5.635712047458419e-07, "loss": 0.10625076293945312, "step": 6161 }, { "epoch": 0.8586358252630112, "grad_norm": 0.46318256855010986, "learning_rate": 5.624847016123847e-07, "loss": 0.06851387023925781, "step": 6162 }, { "epoch": 0.8587751689542256, "grad_norm": 1.2546683549880981, "learning_rate": 5.613991843935179e-07, "loss": 0.10333633422851562, "step": 6163 }, { "epoch": 0.8589145126454399, "grad_norm": 0.45660313963890076, "learning_rate": 5.60314653330416e-07, "loss": 0.07933235168457031, "step": 6164 }, { "epoch": 0.8590538563366543, "grad_norm": 1.4712989330291748, "learning_rate": 5.592311086640379e-07, "loss": 0.11529731750488281, "step": 6165 }, { "epoch": 0.8591932000278687, "grad_norm": 1.2141832113265991, "learning_rate": 5.581485506351242e-07, "loss": 0.09928131103515625, "step": 6166 }, { "epoch": 0.8593325437190831, "grad_norm": 1.001444697380066, "learning_rate": 5.570669794841921e-07, "loss": 0.1216888427734375, "step": 6167 }, { "epoch": 0.8594718874102975, "grad_norm": 0.6318986415863037, "learning_rate": 5.559863954515448e-07, "loss": 0.0792083740234375, "step": 6168 }, { "epoch": 0.8596112311015118, "grad_norm": 1.6686811447143555, "learning_rate": 5.549067987772605e-07, "loss": 0.11197471618652344, "step": 6169 }, { "epoch": 0.8597505747927262, "grad_norm": 0.6490387320518494, "learning_rate": 5.538281897012032e-07, "loss": 0.08924293518066406, "step": 6170 }, { "epoch": 0.8598899184839406, "grad_norm": 0.8478994369506836, "learning_rate": 5.527505684630136e-07, "loss": 0.1094818115234375, "step": 6171 }, { "epoch": 0.8600292621751551, "grad_norm": 0.582647979259491, "learning_rate": 5.51673935302115e-07, "loss": 0.08346366882324219, "step": 6172 }, { "epoch": 0.8601686058663695, "grad_norm": 0.8082969784736633, "learning_rate": 5.505982904577123e-07, "loss": 0.09448432922363281, "step": 6173 }, { "epoch": 0.8603079495575838, "grad_norm": 0.54469895362854, "learning_rate": 5.495236341687876e-07, "loss": 0.08983039855957031, "step": 6174 }, { "epoch": 0.8604472932487982, "grad_norm": 2.097780704498291, "learning_rate": 5.484499666741044e-07, "loss": 0.15949440002441406, "step": 6175 }, { "epoch": 0.8605866369400126, "grad_norm": 0.9539492726325989, "learning_rate": 5.47377288212208e-07, "loss": 0.1128082275390625, "step": 6176 }, { "epoch": 0.860725980631227, "grad_norm": 0.6490043997764587, "learning_rate": 5.463055990214245e-07, "loss": 0.07562398910522461, "step": 6177 }, { "epoch": 0.8608653243224413, "grad_norm": 0.6814627647399902, "learning_rate": 5.452348993398566e-07, "loss": 0.08099937438964844, "step": 6178 }, { "epoch": 0.8610046680136557, "grad_norm": 0.4030105173587799, "learning_rate": 5.441651894053895e-07, "loss": 0.060962677001953125, "step": 6179 }, { "epoch": 0.8611440117048701, "grad_norm": 0.40438976883888245, "learning_rate": 5.430964694556884e-07, "loss": 0.06398391723632812, "step": 6180 }, { "epoch": 0.8612833553960845, "grad_norm": 1.0078474283218384, "learning_rate": 5.420287397282004e-07, "loss": 0.09308433532714844, "step": 6181 }, { "epoch": 0.8614226990872988, "grad_norm": 1.2867552042007446, "learning_rate": 5.409620004601479e-07, "loss": 0.10526847839355469, "step": 6182 }, { "epoch": 0.8615620427785132, "grad_norm": 0.40041494369506836, "learning_rate": 5.398962518885375e-07, "loss": 0.06284523010253906, "step": 6183 }, { "epoch": 0.8617013864697276, "grad_norm": 0.5178385376930237, "learning_rate": 5.388314942501549e-07, "loss": 0.07586479187011719, "step": 6184 }, { "epoch": 0.861840730160942, "grad_norm": 1.1256582736968994, "learning_rate": 5.377677277815646e-07, "loss": 0.1036977767944336, "step": 6185 }, { "epoch": 0.8619800738521564, "grad_norm": 1.2177987098693848, "learning_rate": 5.367049527191093e-07, "loss": 0.11770439147949219, "step": 6186 }, { "epoch": 0.8621194175433707, "grad_norm": 0.4364108443260193, "learning_rate": 5.356431692989144e-07, "loss": 0.07426071166992188, "step": 6187 }, { "epoch": 0.8622587612345851, "grad_norm": 1.4897600412368774, "learning_rate": 5.345823777568859e-07, "loss": 0.1356668472290039, "step": 6188 }, { "epoch": 0.8623981049257995, "grad_norm": 1.5478969812393188, "learning_rate": 5.335225783287051e-07, "loss": 0.16530418395996094, "step": 6189 }, { "epoch": 0.8625374486170139, "grad_norm": 0.7330582141876221, "learning_rate": 5.324637712498359e-07, "loss": 0.07819938659667969, "step": 6190 }, { "epoch": 0.8626767923082282, "grad_norm": 0.8235487341880798, "learning_rate": 5.314059567555213e-07, "loss": 0.10176849365234375, "step": 6191 }, { "epoch": 0.8628161359994426, "grad_norm": 0.7030858397483826, "learning_rate": 5.303491350807832e-07, "loss": 0.10934829711914062, "step": 6192 }, { "epoch": 0.862955479690657, "grad_norm": 1.010646104812622, "learning_rate": 5.292933064604228e-07, "loss": 0.1179342269897461, "step": 6193 }, { "epoch": 0.8630948233818714, "grad_norm": 0.4512905776500702, "learning_rate": 5.282384711290228e-07, "loss": 0.06899833679199219, "step": 6194 }, { "epoch": 0.8632341670730858, "grad_norm": 0.3571833074092865, "learning_rate": 5.271846293209426e-07, "loss": 0.06747627258300781, "step": 6195 }, { "epoch": 0.8633735107643001, "grad_norm": 1.3319281339645386, "learning_rate": 5.261317812703204e-07, "loss": 0.12278556823730469, "step": 6196 }, { "epoch": 0.8635128544555145, "grad_norm": 1.2010565996170044, "learning_rate": 5.250799272110768e-07, "loss": 0.121795654296875, "step": 6197 }, { "epoch": 0.8636521981467289, "grad_norm": 0.9747064113616943, "learning_rate": 5.240290673769099e-07, "loss": 0.11640548706054688, "step": 6198 }, { "epoch": 0.8637915418379433, "grad_norm": 0.8790909647941589, "learning_rate": 5.229792020012947e-07, "loss": 0.11374855041503906, "step": 6199 }, { "epoch": 0.8639308855291576, "grad_norm": 1.0834423303604126, "learning_rate": 5.2193033131749e-07, "loss": 0.09773063659667969, "step": 6200 }, { "epoch": 0.864070229220372, "grad_norm": 0.6494634747505188, "learning_rate": 5.20882455558529e-07, "loss": 0.09914016723632812, "step": 6201 }, { "epoch": 0.8642095729115864, "grad_norm": 0.8233022689819336, "learning_rate": 5.19835574957227e-07, "loss": 0.08061027526855469, "step": 6202 }, { "epoch": 0.8643489166028008, "grad_norm": 1.1688647270202637, "learning_rate": 5.187896897461752e-07, "loss": 0.08294105529785156, "step": 6203 }, { "epoch": 0.8644882602940152, "grad_norm": 0.964493989944458, "learning_rate": 5.177448001577468e-07, "loss": 0.08584785461425781, "step": 6204 }, { "epoch": 0.8646276039852295, "grad_norm": 1.079789638519287, "learning_rate": 5.167009064240936e-07, "loss": 0.11867713928222656, "step": 6205 }, { "epoch": 0.8647669476764439, "grad_norm": 0.4013625979423523, "learning_rate": 5.156580087771429e-07, "loss": 0.06450462341308594, "step": 6206 }, { "epoch": 0.8649062913676583, "grad_norm": 0.6281247138977051, "learning_rate": 5.146161074486022e-07, "loss": 0.07231426239013672, "step": 6207 }, { "epoch": 0.8650456350588727, "grad_norm": 0.7374158501625061, "learning_rate": 5.135752026699597e-07, "loss": 0.08885383605957031, "step": 6208 }, { "epoch": 0.865184978750087, "grad_norm": 0.5820049047470093, "learning_rate": 5.125352946724816e-07, "loss": 0.0726165771484375, "step": 6209 }, { "epoch": 0.8653243224413014, "grad_norm": 0.46699395775794983, "learning_rate": 5.114963836872105e-07, "loss": 0.07706260681152344, "step": 6210 }, { "epoch": 0.8654636661325158, "grad_norm": 0.5247236490249634, "learning_rate": 5.104584699449671e-07, "loss": 0.08410263061523438, "step": 6211 }, { "epoch": 0.8656030098237303, "grad_norm": 0.7064671516418457, "learning_rate": 5.094215536763541e-07, "loss": 0.10457992553710938, "step": 6212 }, { "epoch": 0.8657423535149447, "grad_norm": 0.6706658601760864, "learning_rate": 5.083856351117511e-07, "loss": 0.09038162231445312, "step": 6213 }, { "epoch": 0.865881697206159, "grad_norm": 0.6725912690162659, "learning_rate": 5.073507144813139e-07, "loss": 0.08565330505371094, "step": 6214 }, { "epoch": 0.8660210408973734, "grad_norm": 0.598831295967102, "learning_rate": 5.063167920149797e-07, "loss": 0.072601318359375, "step": 6215 }, { "epoch": 0.8661603845885878, "grad_norm": 0.4799025058746338, "learning_rate": 5.052838679424609e-07, "loss": 0.07007026672363281, "step": 6216 }, { "epoch": 0.8662997282798022, "grad_norm": 0.4666634798049927, "learning_rate": 5.042519424932512e-07, "loss": 0.07622909545898438, "step": 6217 }, { "epoch": 0.8664390719710166, "grad_norm": 0.8003259897232056, "learning_rate": 5.0322101589662e-07, "loss": 0.08241844177246094, "step": 6218 }, { "epoch": 0.8665784156622309, "grad_norm": 1.0528910160064697, "learning_rate": 5.02191088381615e-07, "loss": 0.10173320770263672, "step": 6219 }, { "epoch": 0.8667177593534453, "grad_norm": 0.38331618905067444, "learning_rate": 5.01162160177065e-07, "loss": 0.06047248840332031, "step": 6220 }, { "epoch": 0.8668571030446597, "grad_norm": 1.3797475099563599, "learning_rate": 5.001342315115726e-07, "loss": 0.09653472900390625, "step": 6221 }, { "epoch": 0.8669964467358741, "grad_norm": 1.8628623485565186, "learning_rate": 4.991073026135196e-07, "loss": 0.1352996826171875, "step": 6222 }, { "epoch": 0.8671357904270884, "grad_norm": 1.708479642868042, "learning_rate": 4.980813737110662e-07, "loss": 0.12829971313476562, "step": 6223 }, { "epoch": 0.8672751341183028, "grad_norm": 0.5527269244194031, "learning_rate": 4.970564450321525e-07, "loss": 0.06321907043457031, "step": 6224 }, { "epoch": 0.8674144778095172, "grad_norm": 1.355141520500183, "learning_rate": 4.960325168044916e-07, "loss": 0.10342025756835938, "step": 6225 }, { "epoch": 0.8675538215007316, "grad_norm": 1.2058115005493164, "learning_rate": 4.950095892555789e-07, "loss": 0.12062454223632812, "step": 6226 }, { "epoch": 0.867693165191946, "grad_norm": 0.4274204671382904, "learning_rate": 4.93987662612685e-07, "loss": 0.06764030456542969, "step": 6227 }, { "epoch": 0.8678325088831603, "grad_norm": 0.7698872089385986, "learning_rate": 4.929667371028579e-07, "loss": 0.07983970642089844, "step": 6228 }, { "epoch": 0.8679718525743747, "grad_norm": 0.545981228351593, "learning_rate": 4.919468129529237e-07, "loss": 0.07384490966796875, "step": 6229 }, { "epoch": 0.8681111962655891, "grad_norm": 0.7212803363800049, "learning_rate": 4.909278903894887e-07, "loss": 0.08999824523925781, "step": 6230 }, { "epoch": 0.8682505399568035, "grad_norm": 1.103687047958374, "learning_rate": 4.89909969638932e-07, "loss": 0.1067819595336914, "step": 6231 }, { "epoch": 0.8683898836480178, "grad_norm": 0.754253625869751, "learning_rate": 4.888930509274125e-07, "loss": 0.08765220642089844, "step": 6232 }, { "epoch": 0.8685292273392322, "grad_norm": 0.9328787326812744, "learning_rate": 4.878771344808664e-07, "loss": 0.08175849914550781, "step": 6233 }, { "epoch": 0.8686685710304466, "grad_norm": 0.7267076969146729, "learning_rate": 4.868622205250089e-07, "loss": 0.08390426635742188, "step": 6234 }, { "epoch": 0.868807914721661, "grad_norm": 0.8184004426002502, "learning_rate": 4.858483092853278e-07, "loss": 0.10529327392578125, "step": 6235 }, { "epoch": 0.8689472584128753, "grad_norm": 0.5717834234237671, "learning_rate": 4.848354009870931e-07, "loss": 0.06624031066894531, "step": 6236 }, { "epoch": 0.8690866021040897, "grad_norm": 1.1555393934249878, "learning_rate": 4.838234958553501e-07, "loss": 0.10602378845214844, "step": 6237 }, { "epoch": 0.8692259457953041, "grad_norm": 1.0852746963500977, "learning_rate": 4.828125941149197e-07, "loss": 0.14029502868652344, "step": 6238 }, { "epoch": 0.8693652894865185, "grad_norm": 0.6592853665351868, "learning_rate": 4.818026959904016e-07, "loss": 0.09800529479980469, "step": 6239 }, { "epoch": 0.8695046331777329, "grad_norm": 0.6138036847114563, "learning_rate": 4.80793801706172e-07, "loss": 0.08561325073242188, "step": 6240 }, { "epoch": 0.8696439768689472, "grad_norm": 1.0476677417755127, "learning_rate": 4.797859114863851e-07, "loss": 0.09634876251220703, "step": 6241 }, { "epoch": 0.8697833205601616, "grad_norm": 1.2849377393722534, "learning_rate": 4.787790255549707e-07, "loss": 0.12212944030761719, "step": 6242 }, { "epoch": 0.869922664251376, "grad_norm": 0.8394008874893188, "learning_rate": 4.777731441356342e-07, "loss": 0.09798240661621094, "step": 6243 }, { "epoch": 0.8700620079425904, "grad_norm": 0.6995416879653931, "learning_rate": 4.7676826745186144e-07, "loss": 0.09620094299316406, "step": 6244 }, { "epoch": 0.8702013516338047, "grad_norm": 0.7089188694953918, "learning_rate": 4.757643957269131e-07, "loss": 0.09792423248291016, "step": 6245 }, { "epoch": 0.8703406953250191, "grad_norm": 0.5412864685058594, "learning_rate": 4.7476152918382535e-07, "loss": 0.06645727157592773, "step": 6246 }, { "epoch": 0.8704800390162335, "grad_norm": 0.6363603472709656, "learning_rate": 4.737596680454137e-07, "loss": 0.08747100830078125, "step": 6247 }, { "epoch": 0.8706193827074479, "grad_norm": 0.6813156008720398, "learning_rate": 4.727588125342669e-07, "loss": 0.07964324951171875, "step": 6248 }, { "epoch": 0.8707587263986623, "grad_norm": 0.5084677934646606, "learning_rate": 4.7175896287275424e-07, "loss": 0.0794607400894165, "step": 6249 }, { "epoch": 0.8708980700898766, "grad_norm": 0.4760318696498871, "learning_rate": 4.7076011928301803e-07, "loss": 0.06332778930664062, "step": 6250 }, { "epoch": 0.871037413781091, "grad_norm": 0.8152291178703308, "learning_rate": 4.6976228198697847e-07, "loss": 0.08617591857910156, "step": 6251 }, { "epoch": 0.8711767574723055, "grad_norm": 0.595417320728302, "learning_rate": 4.687654512063344e-07, "loss": 0.08612442016601562, "step": 6252 }, { "epoch": 0.8713161011635199, "grad_norm": 0.4696880578994751, "learning_rate": 4.6776962716255593e-07, "loss": 0.06468009948730469, "step": 6253 }, { "epoch": 0.8714554448547343, "grad_norm": 0.846623957157135, "learning_rate": 4.667748100768937e-07, "loss": 0.09981536865234375, "step": 6254 }, { "epoch": 0.8715947885459486, "grad_norm": 0.39019063115119934, "learning_rate": 4.657810001703733e-07, "loss": 0.06559371948242188, "step": 6255 }, { "epoch": 0.871734132237163, "grad_norm": 1.134462594985962, "learning_rate": 4.647881976637975e-07, "loss": 0.13476943969726562, "step": 6256 }, { "epoch": 0.8718734759283774, "grad_norm": 0.7895445227622986, "learning_rate": 4.637964027777425e-07, "loss": 0.10488700866699219, "step": 6257 }, { "epoch": 0.8720128196195918, "grad_norm": 0.9387763738632202, "learning_rate": 4.62805615732565e-07, "loss": 0.1258525848388672, "step": 6258 }, { "epoch": 0.8721521633108061, "grad_norm": 1.0676078796386719, "learning_rate": 4.6181583674839323e-07, "loss": 0.10037612915039062, "step": 6259 }, { "epoch": 0.8722915070020205, "grad_norm": 0.777059018611908, "learning_rate": 4.6082706604513307e-07, "loss": 0.09963607788085938, "step": 6260 }, { "epoch": 0.8724308506932349, "grad_norm": 0.820003867149353, "learning_rate": 4.598393038424681e-07, "loss": 0.09716415405273438, "step": 6261 }, { "epoch": 0.8725701943844493, "grad_norm": 0.841123640537262, "learning_rate": 4.5885255035985675e-07, "loss": 0.08146095275878906, "step": 6262 }, { "epoch": 0.8727095380756636, "grad_norm": 0.8439874053001404, "learning_rate": 4.578668058165325e-07, "loss": 0.12831687927246094, "step": 6263 }, { "epoch": 0.872848881766878, "grad_norm": 0.9471628665924072, "learning_rate": 4.5688207043150467e-07, "loss": 0.08811378479003906, "step": 6264 }, { "epoch": 0.8729882254580924, "grad_norm": 1.007371187210083, "learning_rate": 4.5589834442355986e-07, "loss": 0.13338851928710938, "step": 6265 }, { "epoch": 0.8731275691493068, "grad_norm": 1.3803504705429077, "learning_rate": 4.549156280112599e-07, "loss": 0.09776687622070312, "step": 6266 }, { "epoch": 0.8732669128405212, "grad_norm": 0.4715639650821686, "learning_rate": 4.5393392141294066e-07, "loss": 0.06560325622558594, "step": 6267 }, { "epoch": 0.8734062565317355, "grad_norm": 0.47906923294067383, "learning_rate": 4.5295322484671667e-07, "loss": 0.07437801361083984, "step": 6268 }, { "epoch": 0.8735456002229499, "grad_norm": 0.4535812437534332, "learning_rate": 4.519735385304741e-07, "loss": 0.07088851928710938, "step": 6269 }, { "epoch": 0.8736849439141643, "grad_norm": 0.7652782797813416, "learning_rate": 4.509948626818789e-07, "loss": 0.11125946044921875, "step": 6270 }, { "epoch": 0.8738242876053787, "grad_norm": 0.936931848526001, "learning_rate": 4.500171975183687e-07, "loss": 0.09385871887207031, "step": 6271 }, { "epoch": 0.873963631296593, "grad_norm": 0.7871087193489075, "learning_rate": 4.4904054325715927e-07, "loss": 0.08756065368652344, "step": 6272 }, { "epoch": 0.8741029749878074, "grad_norm": 0.7576916813850403, "learning_rate": 4.4806490011524205e-07, "loss": 0.07320976257324219, "step": 6273 }, { "epoch": 0.8742423186790218, "grad_norm": 1.0797126293182373, "learning_rate": 4.4709026830938194e-07, "loss": 0.10586357116699219, "step": 6274 }, { "epoch": 0.8743816623702362, "grad_norm": 0.8692947030067444, "learning_rate": 4.46116648056118e-07, "loss": 0.10339927673339844, "step": 6275 }, { "epoch": 0.8745210060614506, "grad_norm": 0.798154354095459, "learning_rate": 4.451440395717682e-07, "loss": 0.09227180480957031, "step": 6276 }, { "epoch": 0.8746603497526649, "grad_norm": 0.6224346160888672, "learning_rate": 4.441724430724248e-07, "loss": 0.08006477355957031, "step": 6277 }, { "epoch": 0.8747996934438793, "grad_norm": 1.0016758441925049, "learning_rate": 4.432018587739517e-07, "loss": 0.07721805572509766, "step": 6278 }, { "epoch": 0.8749390371350937, "grad_norm": 0.6296698451042175, "learning_rate": 4.422322868919937e-07, "loss": 0.10465240478515625, "step": 6279 }, { "epoch": 0.8750783808263081, "grad_norm": 0.4946000874042511, "learning_rate": 4.4126372764196457e-07, "loss": 0.07769489288330078, "step": 6280 }, { "epoch": 0.8752177245175224, "grad_norm": 0.8006879687309265, "learning_rate": 4.402961812390588e-07, "loss": 0.1053924560546875, "step": 6281 }, { "epoch": 0.8753570682087368, "grad_norm": 1.1261248588562012, "learning_rate": 4.3932964789824064e-07, "loss": 0.08962631225585938, "step": 6282 }, { "epoch": 0.8754964118999512, "grad_norm": 0.8135372996330261, "learning_rate": 4.3836412783425265e-07, "loss": 0.10242080688476562, "step": 6283 }, { "epoch": 0.8756357555911656, "grad_norm": 0.3826376497745514, "learning_rate": 4.3739962126161273e-07, "loss": 0.06926155090332031, "step": 6284 }, { "epoch": 0.87577509928238, "grad_norm": 1.01958429813385, "learning_rate": 4.3643612839461057e-07, "loss": 0.08799934387207031, "step": 6285 }, { "epoch": 0.8759144429735943, "grad_norm": 0.9182413220405579, "learning_rate": 4.354736494473122e-07, "loss": 0.1135406494140625, "step": 6286 }, { "epoch": 0.8760537866648087, "grad_norm": 0.673734724521637, "learning_rate": 4.345121846335593e-07, "loss": 0.07251167297363281, "step": 6287 }, { "epoch": 0.8761931303560231, "grad_norm": 1.1172527074813843, "learning_rate": 4.335517341669676e-07, "loss": 0.12024402618408203, "step": 6288 }, { "epoch": 0.8763324740472375, "grad_norm": 0.8197587728500366, "learning_rate": 4.3259229826092655e-07, "loss": 0.09890365600585938, "step": 6289 }, { "epoch": 0.8764718177384518, "grad_norm": 0.3900660574436188, "learning_rate": 4.316338771286005e-07, "loss": 0.06674385070800781, "step": 6290 }, { "epoch": 0.8766111614296662, "grad_norm": 1.1531386375427246, "learning_rate": 4.3067647098293033e-07, "loss": 0.13399362564086914, "step": 6291 }, { "epoch": 0.8767505051208806, "grad_norm": 0.5465724468231201, "learning_rate": 4.29720080036628e-07, "loss": 0.07957649230957031, "step": 6292 }, { "epoch": 0.8768898488120951, "grad_norm": 0.8193186521530151, "learning_rate": 4.2876470450218254e-07, "loss": 0.11377906799316406, "step": 6293 }, { "epoch": 0.8770291925033095, "grad_norm": 0.983187198638916, "learning_rate": 4.278103445918569e-07, "loss": 0.0900421142578125, "step": 6294 }, { "epoch": 0.8771685361945238, "grad_norm": 0.444541871547699, "learning_rate": 4.268570005176892e-07, "loss": 0.06307792663574219, "step": 6295 }, { "epoch": 0.8773078798857382, "grad_norm": 0.709065854549408, "learning_rate": 4.259046724914878e-07, "loss": 0.07330322265625, "step": 6296 }, { "epoch": 0.8774472235769526, "grad_norm": 0.7039164304733276, "learning_rate": 4.2495336072484015e-07, "loss": 0.09248065948486328, "step": 6297 }, { "epoch": 0.877586567268167, "grad_norm": 0.5291371941566467, "learning_rate": 4.240030654291061e-07, "loss": 0.07013797760009766, "step": 6298 }, { "epoch": 0.8777259109593814, "grad_norm": 0.971062958240509, "learning_rate": 4.2305378681541833e-07, "loss": 0.08955669403076172, "step": 6299 }, { "epoch": 0.8778652546505957, "grad_norm": 0.6316004395484924, "learning_rate": 4.221055250946865e-07, "loss": 0.077880859375, "step": 6300 }, { "epoch": 0.8780045983418101, "grad_norm": 1.8750823736190796, "learning_rate": 4.21158280477591e-07, "loss": 0.12328815460205078, "step": 6301 }, { "epoch": 0.8781439420330245, "grad_norm": 0.6276256442070007, "learning_rate": 4.202120531745896e-07, "loss": 0.08211135864257812, "step": 6302 }, { "epoch": 0.8782832857242389, "grad_norm": 0.7868905663490295, "learning_rate": 4.192668433959113e-07, "loss": 0.08613204956054688, "step": 6303 }, { "epoch": 0.8784226294154532, "grad_norm": 0.7124183773994446, "learning_rate": 4.183226513515598e-07, "loss": 0.0948934555053711, "step": 6304 }, { "epoch": 0.8785619731066676, "grad_norm": 0.753463089466095, "learning_rate": 4.173794772513151e-07, "loss": 0.07700920104980469, "step": 6305 }, { "epoch": 0.878701316797882, "grad_norm": 0.8698002696037292, "learning_rate": 4.1643732130472737e-07, "loss": 0.07258224487304688, "step": 6306 }, { "epoch": 0.8788406604890964, "grad_norm": 1.2003772258758545, "learning_rate": 4.1549618372112135e-07, "loss": 0.1376323699951172, "step": 6307 }, { "epoch": 0.8789800041803107, "grad_norm": 1.3838129043579102, "learning_rate": 4.1455606470959755e-07, "loss": 0.12624168395996094, "step": 6308 }, { "epoch": 0.8791193478715251, "grad_norm": 0.5736704468727112, "learning_rate": 4.1361696447902944e-07, "loss": 0.08201789855957031, "step": 6309 }, { "epoch": 0.8792586915627395, "grad_norm": 1.155089259147644, "learning_rate": 4.1267888323806294e-07, "loss": 0.13156604766845703, "step": 6310 }, { "epoch": 0.8793980352539539, "grad_norm": 0.8739388585090637, "learning_rate": 4.117418211951174e-07, "loss": 0.11524581909179688, "step": 6311 }, { "epoch": 0.8795373789451683, "grad_norm": 0.5934416651725769, "learning_rate": 4.1080577855838746e-07, "loss": 0.06198310852050781, "step": 6312 }, { "epoch": 0.8796767226363826, "grad_norm": 0.8086888194084167, "learning_rate": 4.098707555358411e-07, "loss": 0.08604621887207031, "step": 6313 }, { "epoch": 0.879816066327597, "grad_norm": 1.3741928339004517, "learning_rate": 4.0893675233521777e-07, "loss": 0.1562938690185547, "step": 6314 }, { "epoch": 0.8799554100188114, "grad_norm": 1.0574325323104858, "learning_rate": 4.080037691640321e-07, "loss": 0.10138320922851562, "step": 6315 }, { "epoch": 0.8800947537100258, "grad_norm": 0.599210798740387, "learning_rate": 4.070718062295731e-07, "loss": 0.08812332153320312, "step": 6316 }, { "epoch": 0.8802340974012401, "grad_norm": 0.46848851442337036, "learning_rate": 4.0614086373890026e-07, "loss": 0.06940269470214844, "step": 6317 }, { "epoch": 0.8803734410924545, "grad_norm": 0.5955063104629517, "learning_rate": 4.05210941898847e-07, "loss": 0.07570457458496094, "step": 6318 }, { "epoch": 0.8805127847836689, "grad_norm": 0.8014842867851257, "learning_rate": 4.042820409160214e-07, "loss": 0.10937118530273438, "step": 6319 }, { "epoch": 0.8806521284748833, "grad_norm": 0.6545407772064209, "learning_rate": 4.033541609968056e-07, "loss": 0.07734870910644531, "step": 6320 }, { "epoch": 0.8807914721660977, "grad_norm": 1.2742234468460083, "learning_rate": 4.0242730234735184e-07, "loss": 0.10250473022460938, "step": 6321 }, { "epoch": 0.880930815857312, "grad_norm": 0.7914719581604004, "learning_rate": 4.01501465173586e-07, "loss": 0.07482242584228516, "step": 6322 }, { "epoch": 0.8810701595485264, "grad_norm": 0.6245109438896179, "learning_rate": 4.005766496812097e-07, "loss": 0.07520484924316406, "step": 6323 }, { "epoch": 0.8812095032397408, "grad_norm": 0.602569580078125, "learning_rate": 3.9965285607569573e-07, "loss": 0.07313346862792969, "step": 6324 }, { "epoch": 0.8813488469309552, "grad_norm": 0.7940371632575989, "learning_rate": 3.987300845622882e-07, "loss": 0.08728790283203125, "step": 6325 }, { "epoch": 0.8814881906221695, "grad_norm": 0.608064591884613, "learning_rate": 3.978083353460083e-07, "loss": 0.07981681823730469, "step": 6326 }, { "epoch": 0.8816275343133839, "grad_norm": 0.7775339484214783, "learning_rate": 3.96887608631647e-07, "loss": 0.08770942687988281, "step": 6327 }, { "epoch": 0.8817668780045983, "grad_norm": 0.4588792026042938, "learning_rate": 3.959679046237663e-07, "loss": 0.0708303451538086, "step": 6328 }, { "epoch": 0.8819062216958127, "grad_norm": 0.9720008373260498, "learning_rate": 3.950492235267062e-07, "loss": 0.07990741729736328, "step": 6329 }, { "epoch": 0.882045565387027, "grad_norm": 0.5993717312812805, "learning_rate": 3.9413156554457655e-07, "loss": 0.09229660034179688, "step": 6330 }, { "epoch": 0.8821849090782414, "grad_norm": 0.725059449672699, "learning_rate": 3.9321493088125774e-07, "loss": 0.09865379333496094, "step": 6331 }, { "epoch": 0.8823242527694558, "grad_norm": 0.44590702652931213, "learning_rate": 3.9229931974040844e-07, "loss": 0.06254196166992188, "step": 6332 }, { "epoch": 0.8824635964606703, "grad_norm": 1.3430622816085815, "learning_rate": 3.9138473232545326e-07, "loss": 0.08765029907226562, "step": 6333 }, { "epoch": 0.8826029401518847, "grad_norm": 1.015535831451416, "learning_rate": 3.9047116883959513e-07, "loss": 0.0820760726928711, "step": 6334 }, { "epoch": 0.882742283843099, "grad_norm": 0.7439335584640503, "learning_rate": 3.895586294858045e-07, "loss": 0.08810615539550781, "step": 6335 }, { "epoch": 0.8828816275343134, "grad_norm": 0.602085292339325, "learning_rate": 3.886471144668291e-07, "loss": 0.08715057373046875, "step": 6336 }, { "epoch": 0.8830209712255278, "grad_norm": 1.0745083093643188, "learning_rate": 3.8773662398518596e-07, "loss": 0.10243606567382812, "step": 6337 }, { "epoch": 0.8831603149167422, "grad_norm": 0.8213453888893127, "learning_rate": 3.8682715824316594e-07, "loss": 0.09950637817382812, "step": 6338 }, { "epoch": 0.8832996586079566, "grad_norm": 0.5723115801811218, "learning_rate": 3.8591871744282973e-07, "loss": 0.0853118896484375, "step": 6339 }, { "epoch": 0.8834390022991709, "grad_norm": 0.8813232183456421, "learning_rate": 3.85011301786013e-07, "loss": 0.09187793731689453, "step": 6340 }, { "epoch": 0.8835783459903853, "grad_norm": 0.49059468507766724, "learning_rate": 3.841049114743239e-07, "loss": 0.07408332824707031, "step": 6341 }, { "epoch": 0.8837176896815997, "grad_norm": 0.8124544620513916, "learning_rate": 3.8319954670914094e-07, "loss": 0.10776138305664062, "step": 6342 }, { "epoch": 0.8838570333728141, "grad_norm": 0.9540410041809082, "learning_rate": 3.8229520769161474e-07, "loss": 0.09145545959472656, "step": 6343 }, { "epoch": 0.8839963770640284, "grad_norm": 0.47898069024086, "learning_rate": 3.813918946226691e-07, "loss": 0.07608795166015625, "step": 6344 }, { "epoch": 0.8841357207552428, "grad_norm": 1.312544822692871, "learning_rate": 3.804896077030007e-07, "loss": 0.11110115051269531, "step": 6345 }, { "epoch": 0.8842750644464572, "grad_norm": 0.6839637756347656, "learning_rate": 3.7958834713307524e-07, "loss": 0.07180404663085938, "step": 6346 }, { "epoch": 0.8844144081376716, "grad_norm": 0.5242893099784851, "learning_rate": 3.786881131131348e-07, "loss": 0.07255935668945312, "step": 6347 }, { "epoch": 0.884553751828886, "grad_norm": 0.5442134737968445, "learning_rate": 3.7778890584318773e-07, "loss": 0.07280254364013672, "step": 6348 }, { "epoch": 0.8846930955201003, "grad_norm": 0.6917513012886047, "learning_rate": 3.7689072552301973e-07, "loss": 0.09898757934570312, "step": 6349 }, { "epoch": 0.8848324392113147, "grad_norm": 1.2972279787063599, "learning_rate": 3.759935723521846e-07, "loss": 0.10987472534179688, "step": 6350 }, { "epoch": 0.8849717829025291, "grad_norm": 0.6825981736183167, "learning_rate": 3.7509744653001e-07, "loss": 0.08008050918579102, "step": 6351 }, { "epoch": 0.8851111265937435, "grad_norm": 0.8534606099128723, "learning_rate": 3.742023482555951e-07, "loss": 0.1038055419921875, "step": 6352 }, { "epoch": 0.8852504702849578, "grad_norm": 0.5187929272651672, "learning_rate": 3.7330827772780967e-07, "loss": 0.07514286041259766, "step": 6353 }, { "epoch": 0.8853898139761722, "grad_norm": 0.9096397161483765, "learning_rate": 3.7241523514529476e-07, "loss": 0.08052635192871094, "step": 6354 }, { "epoch": 0.8855291576673866, "grad_norm": 0.843700110912323, "learning_rate": 3.715232207064651e-07, "loss": 0.09935760498046875, "step": 6355 }, { "epoch": 0.885668501358601, "grad_norm": 0.7471686601638794, "learning_rate": 3.7063223460950705e-07, "loss": 0.07968330383300781, "step": 6356 }, { "epoch": 0.8858078450498154, "grad_norm": 0.36538922786712646, "learning_rate": 3.697422770523751e-07, "loss": 0.07074356079101562, "step": 6357 }, { "epoch": 0.8859471887410297, "grad_norm": 1.093117117881775, "learning_rate": 3.688533482327994e-07, "loss": 0.11688613891601562, "step": 6358 }, { "epoch": 0.8860865324322441, "grad_norm": 1.0213220119476318, "learning_rate": 3.6796544834827865e-07, "loss": 0.08986473083496094, "step": 6359 }, { "epoch": 0.8862258761234585, "grad_norm": 0.5107007622718811, "learning_rate": 3.670785775960839e-07, "loss": 0.06428337097167969, "step": 6360 }, { "epoch": 0.8863652198146729, "grad_norm": 0.4968658983707428, "learning_rate": 3.66192736173257e-07, "loss": 0.07576990127563477, "step": 6361 }, { "epoch": 0.8865045635058872, "grad_norm": 0.49985310435295105, "learning_rate": 3.653079242766139e-07, "loss": 0.0569915771484375, "step": 6362 }, { "epoch": 0.8866439071971016, "grad_norm": 1.6022213697433472, "learning_rate": 3.6442414210273834e-07, "loss": 0.15447616577148438, "step": 6363 }, { "epoch": 0.886783250888316, "grad_norm": 0.686221718788147, "learning_rate": 3.6354138984798506e-07, "loss": 0.10357284545898438, "step": 6364 }, { "epoch": 0.8869225945795304, "grad_norm": 1.146530270576477, "learning_rate": 3.6265966770848314e-07, "loss": 0.1054534912109375, "step": 6365 }, { "epoch": 0.8870619382707448, "grad_norm": 0.7935132384300232, "learning_rate": 3.6177897588013154e-07, "loss": 0.10478591918945312, "step": 6366 }, { "epoch": 0.8872012819619591, "grad_norm": 0.7438606023788452, "learning_rate": 3.608993145585987e-07, "loss": 0.09589195251464844, "step": 6367 }, { "epoch": 0.8873406256531735, "grad_norm": 0.5295684933662415, "learning_rate": 3.600206839393261e-07, "loss": 0.08916854858398438, "step": 6368 }, { "epoch": 0.8874799693443879, "grad_norm": 0.8817967176437378, "learning_rate": 3.591430842175242e-07, "loss": 0.08728790283203125, "step": 6369 }, { "epoch": 0.8876193130356023, "grad_norm": 0.37724342942237854, "learning_rate": 3.5826651558817703e-07, "loss": 0.061995506286621094, "step": 6370 }, { "epoch": 0.8877586567268166, "grad_norm": 0.6582083106040955, "learning_rate": 3.5739097824603665e-07, "loss": 0.0836639404296875, "step": 6371 }, { "epoch": 0.887898000418031, "grad_norm": 0.5774344205856323, "learning_rate": 3.5651647238562904e-07, "loss": 0.07199764251708984, "step": 6372 }, { "epoch": 0.8880373441092455, "grad_norm": 0.6640492677688599, "learning_rate": 3.5564299820124883e-07, "loss": 0.08397865295410156, "step": 6373 }, { "epoch": 0.8881766878004599, "grad_norm": 0.8508729338645935, "learning_rate": 3.547705558869624e-07, "loss": 0.09421348571777344, "step": 6374 }, { "epoch": 0.8883160314916743, "grad_norm": 0.5249964594841003, "learning_rate": 3.5389914563660475e-07, "loss": 0.08222579956054688, "step": 6375 }, { "epoch": 0.8884553751828886, "grad_norm": 0.45352330803871155, "learning_rate": 3.530287676437849e-07, "loss": 0.07271575927734375, "step": 6376 }, { "epoch": 0.888594718874103, "grad_norm": 0.3746854364871979, "learning_rate": 3.5215942210188204e-07, "loss": 0.05597496032714844, "step": 6377 }, { "epoch": 0.8887340625653174, "grad_norm": 0.5433764457702637, "learning_rate": 3.512911092040422e-07, "loss": 0.07007026672363281, "step": 6378 }, { "epoch": 0.8888734062565318, "grad_norm": 0.8071838617324829, "learning_rate": 3.5042382914318716e-07, "loss": 0.08619880676269531, "step": 6379 }, { "epoch": 0.8890127499477462, "grad_norm": 0.5957617163658142, "learning_rate": 3.495575821120045e-07, "loss": 0.07527732849121094, "step": 6380 }, { "epoch": 0.8891520936389605, "grad_norm": 1.3345603942871094, "learning_rate": 3.4869236830295695e-07, "loss": 0.09328269958496094, "step": 6381 }, { "epoch": 0.8892914373301749, "grad_norm": 0.7830173373222351, "learning_rate": 3.478281879082729e-07, "loss": 0.0772552490234375, "step": 6382 }, { "epoch": 0.8894307810213893, "grad_norm": 0.8840118646621704, "learning_rate": 3.469650411199543e-07, "loss": 0.10677719116210938, "step": 6383 }, { "epoch": 0.8895701247126037, "grad_norm": 0.6600843071937561, "learning_rate": 3.4610292812977454e-07, "loss": 0.07925605773925781, "step": 6384 }, { "epoch": 0.889709468403818, "grad_norm": 0.6655957698822021, "learning_rate": 3.452418491292731e-07, "loss": 0.07320976257324219, "step": 6385 }, { "epoch": 0.8898488120950324, "grad_norm": 1.3838289976119995, "learning_rate": 3.4438180430976243e-07, "loss": 0.13237380981445312, "step": 6386 }, { "epoch": 0.8899881557862468, "grad_norm": 0.8984883427619934, "learning_rate": 3.4352279386232535e-07, "loss": 0.09374427795410156, "step": 6387 }, { "epoch": 0.8901274994774612, "grad_norm": 0.652869462966919, "learning_rate": 3.426648179778147e-07, "loss": 0.0857696533203125, "step": 6388 }, { "epoch": 0.8902668431686755, "grad_norm": 0.6317785382270813, "learning_rate": 3.4180787684685246e-07, "loss": 0.09179306030273438, "step": 6389 }, { "epoch": 0.8904061868598899, "grad_norm": 0.9791358113288879, "learning_rate": 3.409519706598324e-07, "loss": 0.08841705322265625, "step": 6390 }, { "epoch": 0.8905455305511043, "grad_norm": 0.732467770576477, "learning_rate": 3.400970996069164e-07, "loss": 0.07573127746582031, "step": 6391 }, { "epoch": 0.8906848742423187, "grad_norm": 0.46116945147514343, "learning_rate": 3.392432638780363e-07, "loss": 0.06547927856445312, "step": 6392 }, { "epoch": 0.8908242179335331, "grad_norm": 0.49308088421821594, "learning_rate": 3.383904636628965e-07, "loss": 0.07744979858398438, "step": 6393 }, { "epoch": 0.8909635616247474, "grad_norm": 0.89007169008255, "learning_rate": 3.3753869915096936e-07, "loss": 0.10680294036865234, "step": 6394 }, { "epoch": 0.8911029053159618, "grad_norm": 0.7245925068855286, "learning_rate": 3.3668797053149907e-07, "loss": 0.09182548522949219, "step": 6395 }, { "epoch": 0.8912422490071762, "grad_norm": 0.7794055342674255, "learning_rate": 3.3583827799349486e-07, "loss": 0.08762550354003906, "step": 6396 }, { "epoch": 0.8913815926983906, "grad_norm": 1.408156156539917, "learning_rate": 3.3498962172574033e-07, "loss": 0.13064193725585938, "step": 6397 }, { "epoch": 0.891520936389605, "grad_norm": 0.7173792123794556, "learning_rate": 3.3414200191678903e-07, "loss": 0.07944774627685547, "step": 6398 }, { "epoch": 0.8916602800808193, "grad_norm": 0.6212553977966309, "learning_rate": 3.332954187549603e-07, "loss": 0.08231544494628906, "step": 6399 }, { "epoch": 0.8917996237720337, "grad_norm": 0.7000163793563843, "learning_rate": 3.3244987242834816e-07, "loss": 0.07908821105957031, "step": 6400 }, { "epoch": 0.8919389674632481, "grad_norm": 0.46133726835250854, "learning_rate": 3.3160536312481174e-07, "loss": 0.07129287719726562, "step": 6401 }, { "epoch": 0.8920783111544625, "grad_norm": 0.6236714124679565, "learning_rate": 3.3076189103198265e-07, "loss": 0.0902252197265625, "step": 6402 }, { "epoch": 0.8922176548456768, "grad_norm": 0.6934467554092407, "learning_rate": 3.299194563372604e-07, "loss": 0.088165283203125, "step": 6403 }, { "epoch": 0.8923569985368912, "grad_norm": 0.9630333781242371, "learning_rate": 3.290780592278148e-07, "loss": 0.11362838745117188, "step": 6404 }, { "epoch": 0.8924963422281056, "grad_norm": 0.6675233840942383, "learning_rate": 3.2823769989058674e-07, "loss": 0.08893585205078125, "step": 6405 }, { "epoch": 0.89263568591932, "grad_norm": 0.5102248191833496, "learning_rate": 3.2739837851228306e-07, "loss": 0.07764530181884766, "step": 6406 }, { "epoch": 0.8927750296105343, "grad_norm": 0.5570409297943115, "learning_rate": 3.265600952793818e-07, "loss": 0.07448959350585938, "step": 6407 }, { "epoch": 0.8929143733017487, "grad_norm": 0.4350820779800415, "learning_rate": 3.2572285037813123e-07, "loss": 0.06984424591064453, "step": 6408 }, { "epoch": 0.8930537169929631, "grad_norm": 1.3545206785202026, "learning_rate": 3.248866439945486e-07, "loss": 0.10473251342773438, "step": 6409 }, { "epoch": 0.8931930606841775, "grad_norm": 0.6056035757064819, "learning_rate": 3.2405147631441757e-07, "loss": 0.06817817687988281, "step": 6410 }, { "epoch": 0.8933324043753919, "grad_norm": 0.8227357268333435, "learning_rate": 3.232173475232964e-07, "loss": 0.08980751037597656, "step": 6411 }, { "epoch": 0.8934717480666062, "grad_norm": 0.5779579281806946, "learning_rate": 3.2238425780650617e-07, "loss": 0.08357620239257812, "step": 6412 }, { "epoch": 0.8936110917578207, "grad_norm": 0.6403419375419617, "learning_rate": 3.215522073491434e-07, "loss": 0.08056640625, "step": 6413 }, { "epoch": 0.8937504354490351, "grad_norm": 0.6109372973442078, "learning_rate": 3.2072119633606845e-07, "loss": 0.07900047302246094, "step": 6414 }, { "epoch": 0.8938897791402495, "grad_norm": 0.5389840602874756, "learning_rate": 3.198912249519143e-07, "loss": 0.0892791748046875, "step": 6415 }, { "epoch": 0.8940291228314639, "grad_norm": 0.46750178933143616, "learning_rate": 3.190622933810816e-07, "loss": 0.06327390670776367, "step": 6416 }, { "epoch": 0.8941684665226782, "grad_norm": 0.6227248311042786, "learning_rate": 3.182344018077399e-07, "loss": 0.07177162170410156, "step": 6417 }, { "epoch": 0.8943078102138926, "grad_norm": 0.572967529296875, "learning_rate": 3.1740755041582694e-07, "loss": 0.07461690902709961, "step": 6418 }, { "epoch": 0.894447153905107, "grad_norm": 0.9755306243896484, "learning_rate": 3.1658173938905023e-07, "loss": 0.07901954650878906, "step": 6419 }, { "epoch": 0.8945864975963214, "grad_norm": 1.2428395748138428, "learning_rate": 3.1575696891088804e-07, "loss": 0.10898971557617188, "step": 6420 }, { "epoch": 0.8947258412875357, "grad_norm": 0.7766479849815369, "learning_rate": 3.149332391645843e-07, "loss": 0.08104705810546875, "step": 6421 }, { "epoch": 0.8948651849787501, "grad_norm": 1.9654852151870728, "learning_rate": 3.1411055033315207e-07, "loss": 0.12103652954101562, "step": 6422 }, { "epoch": 0.8950045286699645, "grad_norm": 0.6523290872573853, "learning_rate": 3.132889025993746e-07, "loss": 0.08254051208496094, "step": 6423 }, { "epoch": 0.8951438723611789, "grad_norm": 0.8270828723907471, "learning_rate": 3.1246829614580476e-07, "loss": 0.09195613861083984, "step": 6424 }, { "epoch": 0.8952832160523932, "grad_norm": 0.7761591672897339, "learning_rate": 3.1164873115476056e-07, "loss": 0.08134841918945312, "step": 6425 }, { "epoch": 0.8954225597436076, "grad_norm": 0.797170877456665, "learning_rate": 3.1083020780833137e-07, "loss": 0.08983039855957031, "step": 6426 }, { "epoch": 0.895561903434822, "grad_norm": 1.1324281692504883, "learning_rate": 3.1001272628837565e-07, "loss": 0.11748790740966797, "step": 6427 }, { "epoch": 0.8957012471260364, "grad_norm": 0.5568913221359253, "learning_rate": 3.0919628677651636e-07, "loss": 0.07832145690917969, "step": 6428 }, { "epoch": 0.8958405908172508, "grad_norm": 0.5349634885787964, "learning_rate": 3.083808894541496e-07, "loss": 0.07726860046386719, "step": 6429 }, { "epoch": 0.8959799345084651, "grad_norm": 0.8078449964523315, "learning_rate": 3.075665345024387e-07, "loss": 0.10628128051757812, "step": 6430 }, { "epoch": 0.8961192781996795, "grad_norm": 0.7101252675056458, "learning_rate": 3.0675322210231227e-07, "loss": 0.08854007720947266, "step": 6431 }, { "epoch": 0.8962586218908939, "grad_norm": 1.1284078359603882, "learning_rate": 3.0594095243447254e-07, "loss": 0.10851860046386719, "step": 6432 }, { "epoch": 0.8963979655821083, "grad_norm": 0.6402690410614014, "learning_rate": 3.0512972567938505e-07, "loss": 0.08703231811523438, "step": 6433 }, { "epoch": 0.8965373092733226, "grad_norm": 1.3152750730514526, "learning_rate": 3.043195420172879e-07, "loss": 0.10731887817382812, "step": 6434 }, { "epoch": 0.896676652964537, "grad_norm": 0.8254266381263733, "learning_rate": 3.035104016281831e-07, "loss": 0.10019302368164062, "step": 6435 }, { "epoch": 0.8968159966557514, "grad_norm": 0.8635796904563904, "learning_rate": 3.027023046918448e-07, "loss": 0.10496330261230469, "step": 6436 }, { "epoch": 0.8969553403469658, "grad_norm": 1.2667725086212158, "learning_rate": 3.018952513878137e-07, "loss": 0.10710525512695312, "step": 6437 }, { "epoch": 0.8970946840381802, "grad_norm": 0.6960008144378662, "learning_rate": 3.010892418953981e-07, "loss": 0.07705497741699219, "step": 6438 }, { "epoch": 0.8972340277293945, "grad_norm": 0.9059591293334961, "learning_rate": 3.0028427639367475e-07, "loss": 0.09179210662841797, "step": 6439 }, { "epoch": 0.8973733714206089, "grad_norm": 1.9209864139556885, "learning_rate": 2.994803550614883e-07, "loss": 0.13691139221191406, "step": 6440 }, { "epoch": 0.8975127151118233, "grad_norm": 1.0248441696166992, "learning_rate": 2.9867747807745315e-07, "loss": 0.11865901947021484, "step": 6441 }, { "epoch": 0.8976520588030377, "grad_norm": 0.7846607565879822, "learning_rate": 2.978756456199494e-07, "loss": 0.09611892700195312, "step": 6442 }, { "epoch": 0.897791402494252, "grad_norm": 0.807433009147644, "learning_rate": 2.970748578671251e-07, "loss": 0.08841133117675781, "step": 6443 }, { "epoch": 0.8979307461854664, "grad_norm": 0.413652241230011, "learning_rate": 2.9627511499689787e-07, "loss": 0.06132698059082031, "step": 6444 }, { "epoch": 0.8980700898766808, "grad_norm": 0.6152486205101013, "learning_rate": 2.9547641718695285e-07, "loss": 0.08732414245605469, "step": 6445 }, { "epoch": 0.8982094335678952, "grad_norm": 0.93519127368927, "learning_rate": 2.946787646147414e-07, "loss": 0.10607719421386719, "step": 6446 }, { "epoch": 0.8983487772591096, "grad_norm": 1.0852488279342651, "learning_rate": 2.9388215745748347e-07, "loss": 0.08821964263916016, "step": 6447 }, { "epoch": 0.8984881209503239, "grad_norm": 0.8311220407485962, "learning_rate": 2.9308659589216913e-07, "loss": 0.09240531921386719, "step": 6448 }, { "epoch": 0.8986274646415383, "grad_norm": 0.7983461618423462, "learning_rate": 2.92292080095552e-07, "loss": 0.09633445739746094, "step": 6449 }, { "epoch": 0.8987668083327527, "grad_norm": 1.148481011390686, "learning_rate": 2.9149861024415526e-07, "loss": 0.08968448638916016, "step": 6450 }, { "epoch": 0.8989061520239671, "grad_norm": 0.7349345684051514, "learning_rate": 2.9070618651427073e-07, "loss": 0.07344818115234375, "step": 6451 }, { "epoch": 0.8990454957151814, "grad_norm": 0.9710511565208435, "learning_rate": 2.89914809081957e-07, "loss": 0.08272171020507812, "step": 6452 }, { "epoch": 0.8991848394063959, "grad_norm": 0.4644010066986084, "learning_rate": 2.8912447812303956e-07, "loss": 0.06961345672607422, "step": 6453 }, { "epoch": 0.8993241830976103, "grad_norm": 1.4670642614364624, "learning_rate": 2.8833519381311127e-07, "loss": 0.11824607849121094, "step": 6454 }, { "epoch": 0.8994635267888247, "grad_norm": 1.2004151344299316, "learning_rate": 2.8754695632753406e-07, "loss": 0.12759017944335938, "step": 6455 }, { "epoch": 0.8996028704800391, "grad_norm": 1.1720812320709229, "learning_rate": 2.867597658414367e-07, "loss": 0.101287841796875, "step": 6456 }, { "epoch": 0.8997422141712534, "grad_norm": 0.745904803276062, "learning_rate": 2.859736225297133e-07, "loss": 0.09418964385986328, "step": 6457 }, { "epoch": 0.8998815578624678, "grad_norm": 1.0238558053970337, "learning_rate": 2.8518852656702845e-07, "loss": 0.08054924011230469, "step": 6458 }, { "epoch": 0.9000209015536822, "grad_norm": 1.5899286270141602, "learning_rate": 2.844044781278127e-07, "loss": 0.14681625366210938, "step": 6459 }, { "epoch": 0.9001602452448966, "grad_norm": 1.051811933517456, "learning_rate": 2.836214773862617e-07, "loss": 0.10259056091308594, "step": 6460 }, { "epoch": 0.900299588936111, "grad_norm": 1.4464114904403687, "learning_rate": 2.828395245163418e-07, "loss": 0.11355304718017578, "step": 6461 }, { "epoch": 0.9004389326273253, "grad_norm": 1.3770703077316284, "learning_rate": 2.820586196917857e-07, "loss": 0.12932395935058594, "step": 6462 }, { "epoch": 0.9005782763185397, "grad_norm": 0.6949123740196228, "learning_rate": 2.812787630860919e-07, "loss": 0.07952690124511719, "step": 6463 }, { "epoch": 0.9007176200097541, "grad_norm": 0.6498902440071106, "learning_rate": 2.8049995487252625e-07, "loss": 0.10001564025878906, "step": 6464 }, { "epoch": 0.9008569637009685, "grad_norm": 0.46097835898399353, "learning_rate": 2.7972219522412194e-07, "loss": 0.05635643005371094, "step": 6465 }, { "epoch": 0.9009963073921828, "grad_norm": 0.9976030588150024, "learning_rate": 2.789454843136813e-07, "loss": 0.09589576721191406, "step": 6466 }, { "epoch": 0.9011356510833972, "grad_norm": 0.9897968173027039, "learning_rate": 2.7816982231376964e-07, "loss": 0.0775918960571289, "step": 6467 }, { "epoch": 0.9012749947746116, "grad_norm": 0.9404992461204529, "learning_rate": 2.773952093967225e-07, "loss": 0.12137794494628906, "step": 6468 }, { "epoch": 0.901414338465826, "grad_norm": 0.7665221691131592, "learning_rate": 2.7662164573464156e-07, "loss": 0.08866310119628906, "step": 6469 }, { "epoch": 0.9015536821570403, "grad_norm": 0.8224163055419922, "learning_rate": 2.758491314993944e-07, "loss": 0.08008003234863281, "step": 6470 }, { "epoch": 0.9016930258482547, "grad_norm": 0.6472164392471313, "learning_rate": 2.750776668626148e-07, "loss": 0.09618568420410156, "step": 6471 }, { "epoch": 0.9018323695394691, "grad_norm": 0.9473525881767273, "learning_rate": 2.743072519957063e-07, "loss": 0.08704566955566406, "step": 6472 }, { "epoch": 0.9019717132306835, "grad_norm": 0.36008012294769287, "learning_rate": 2.73537887069838e-07, "loss": 0.0707082748413086, "step": 6473 }, { "epoch": 0.9021110569218979, "grad_norm": 0.39605432748794556, "learning_rate": 2.7276957225594367e-07, "loss": 0.05628013610839844, "step": 6474 }, { "epoch": 0.9022504006131122, "grad_norm": 0.928551435470581, "learning_rate": 2.7200230772472526e-07, "loss": 0.11164093017578125, "step": 6475 }, { "epoch": 0.9023897443043266, "grad_norm": 0.7667192220687866, "learning_rate": 2.712360936466524e-07, "loss": 0.09589385986328125, "step": 6476 }, { "epoch": 0.902529087995541, "grad_norm": 0.7235963344573975, "learning_rate": 2.704709301919606e-07, "loss": 0.08437156677246094, "step": 6477 }, { "epoch": 0.9026684316867554, "grad_norm": 0.8463969826698303, "learning_rate": 2.6970681753065e-07, "loss": 0.10663604736328125, "step": 6478 }, { "epoch": 0.9028077753779697, "grad_norm": 0.7480518817901611, "learning_rate": 2.6894375583249144e-07, "loss": 0.09450531005859375, "step": 6479 }, { "epoch": 0.9029471190691841, "grad_norm": 0.4292130470275879, "learning_rate": 2.681817452670171e-07, "loss": 0.0758819580078125, "step": 6480 }, { "epoch": 0.9030864627603985, "grad_norm": 0.6841022968292236, "learning_rate": 2.6742078600353106e-07, "loss": 0.07212638854980469, "step": 6481 }, { "epoch": 0.9032258064516129, "grad_norm": 0.8949876427650452, "learning_rate": 2.6666087821109855e-07, "loss": 0.12092208862304688, "step": 6482 }, { "epoch": 0.9033651501428273, "grad_norm": 0.6786472797393799, "learning_rate": 2.6590202205855506e-07, "loss": 0.08174514770507812, "step": 6483 }, { "epoch": 0.9035044938340416, "grad_norm": 1.096672534942627, "learning_rate": 2.6514421771450194e-07, "loss": 0.11727523803710938, "step": 6484 }, { "epoch": 0.903643837525256, "grad_norm": 0.7006496787071228, "learning_rate": 2.6438746534730497e-07, "loss": 0.08749771118164062, "step": 6485 }, { "epoch": 0.9037831812164704, "grad_norm": 1.2135789394378662, "learning_rate": 2.6363176512509637e-07, "loss": 0.11335563659667969, "step": 6486 }, { "epoch": 0.9039225249076848, "grad_norm": 0.8369923233985901, "learning_rate": 2.628771172157768e-07, "loss": 0.11016464233398438, "step": 6487 }, { "epoch": 0.9040618685988991, "grad_norm": 0.7302773594856262, "learning_rate": 2.621235217870116e-07, "loss": 0.0815887451171875, "step": 6488 }, { "epoch": 0.9042012122901135, "grad_norm": 0.5890854001045227, "learning_rate": 2.6137097900623185e-07, "loss": 0.08598518371582031, "step": 6489 }, { "epoch": 0.9043405559813279, "grad_norm": 0.9378361701965332, "learning_rate": 2.6061948904063663e-07, "loss": 0.12072563171386719, "step": 6490 }, { "epoch": 0.9044798996725423, "grad_norm": 0.681951642036438, "learning_rate": 2.598690520571889e-07, "loss": 0.08934593200683594, "step": 6491 }, { "epoch": 0.9046192433637567, "grad_norm": 0.45315051078796387, "learning_rate": 2.591196682226182e-07, "loss": 0.07592582702636719, "step": 6492 }, { "epoch": 0.904758587054971, "grad_norm": 0.7896424531936646, "learning_rate": 2.5837133770342135e-07, "loss": 0.09052467346191406, "step": 6493 }, { "epoch": 0.9048979307461855, "grad_norm": 1.0745999813079834, "learning_rate": 2.5762406066585976e-07, "loss": 0.08747482299804688, "step": 6494 }, { "epoch": 0.9050372744373999, "grad_norm": 0.4911971986293793, "learning_rate": 2.568778372759628e-07, "loss": 0.06004524230957031, "step": 6495 }, { "epoch": 0.9051766181286143, "grad_norm": 0.782589852809906, "learning_rate": 2.5613266769952183e-07, "loss": 0.08912277221679688, "step": 6496 }, { "epoch": 0.9053159618198287, "grad_norm": 0.5812814831733704, "learning_rate": 2.5538855210209823e-07, "loss": 0.0691823959350586, "step": 6497 }, { "epoch": 0.905455305511043, "grad_norm": 1.3047335147857666, "learning_rate": 2.54645490649017e-07, "loss": 0.12566566467285156, "step": 6498 }, { "epoch": 0.9055946492022574, "grad_norm": 0.5136925578117371, "learning_rate": 2.5390348350536887e-07, "loss": 0.08577346801757812, "step": 6499 }, { "epoch": 0.9057339928934718, "grad_norm": 0.7467120289802551, "learning_rate": 2.531625308360125e-07, "loss": 0.09174728393554688, "step": 6500 }, { "epoch": 0.9058733365846862, "grad_norm": 1.1307634115219116, "learning_rate": 2.52422632805569e-07, "loss": 0.16311264038085938, "step": 6501 }, { "epoch": 0.9060126802759005, "grad_norm": 0.507568895816803, "learning_rate": 2.5168378957842797e-07, "loss": 0.07526206970214844, "step": 6502 }, { "epoch": 0.9061520239671149, "grad_norm": 0.5598406195640564, "learning_rate": 2.5094600131874205e-07, "loss": 0.0767669677734375, "step": 6503 }, { "epoch": 0.9062913676583293, "grad_norm": 0.5694454908370972, "learning_rate": 2.5020926819043223e-07, "loss": 0.06635284423828125, "step": 6504 }, { "epoch": 0.9064307113495437, "grad_norm": 0.6029976606369019, "learning_rate": 2.4947359035718434e-07, "loss": 0.07998466491699219, "step": 6505 }, { "epoch": 0.906570055040758, "grad_norm": 0.6989917159080505, "learning_rate": 2.487389679824481e-07, "loss": 0.08209037780761719, "step": 6506 }, { "epoch": 0.9067093987319724, "grad_norm": 0.6339115500450134, "learning_rate": 2.4800540122943915e-07, "loss": 0.08065986633300781, "step": 6507 }, { "epoch": 0.9068487424231868, "grad_norm": 0.5253869891166687, "learning_rate": 2.4727289026114043e-07, "loss": 0.07940483093261719, "step": 6508 }, { "epoch": 0.9069880861144012, "grad_norm": 1.184716820716858, "learning_rate": 2.4654143524029896e-07, "loss": 0.08827018737792969, "step": 6509 }, { "epoch": 0.9071274298056156, "grad_norm": 0.8736333250999451, "learning_rate": 2.4581103632942747e-07, "loss": 0.10011482238769531, "step": 6510 }, { "epoch": 0.9072667734968299, "grad_norm": 0.46125271916389465, "learning_rate": 2.4508169369080404e-07, "loss": 0.05651664733886719, "step": 6511 }, { "epoch": 0.9074061171880443, "grad_norm": 0.39574697613716125, "learning_rate": 2.443534074864706e-07, "loss": 0.06511783599853516, "step": 6512 }, { "epoch": 0.9075454608792587, "grad_norm": 0.4399123787879944, "learning_rate": 2.436261778782378e-07, "loss": 0.07824897766113281, "step": 6513 }, { "epoch": 0.9076848045704731, "grad_norm": 0.5148511528968811, "learning_rate": 2.4290000502767755e-07, "loss": 0.07855606079101562, "step": 6514 }, { "epoch": 0.9078241482616874, "grad_norm": 0.8216776847839355, "learning_rate": 2.421748890961301e-07, "loss": 0.10356521606445312, "step": 6515 }, { "epoch": 0.9079634919529018, "grad_norm": 1.1319698095321655, "learning_rate": 2.4145083024469996e-07, "loss": 0.12082862854003906, "step": 6516 }, { "epoch": 0.9081028356441162, "grad_norm": 0.5206557512283325, "learning_rate": 2.407278286342557e-07, "loss": 0.0824899673461914, "step": 6517 }, { "epoch": 0.9082421793353306, "grad_norm": 1.3515242338180542, "learning_rate": 2.40005884425431e-07, "loss": 0.13928699493408203, "step": 6518 }, { "epoch": 0.908381523026545, "grad_norm": 0.4071400463581085, "learning_rate": 2.39284997778626e-07, "loss": 0.06641769409179688, "step": 6519 }, { "epoch": 0.9085208667177593, "grad_norm": 0.5919241309165955, "learning_rate": 2.3856516885400693e-07, "loss": 0.07479667663574219, "step": 6520 }, { "epoch": 0.9086602104089737, "grad_norm": 0.9015988111495972, "learning_rate": 2.3784639781150143e-07, "loss": 0.09149742126464844, "step": 6521 }, { "epoch": 0.9087995541001881, "grad_norm": 1.0575686693191528, "learning_rate": 2.3712868481080397e-07, "loss": 0.08976364135742188, "step": 6522 }, { "epoch": 0.9089388977914025, "grad_norm": 0.9868388772010803, "learning_rate": 2.364120300113748e-07, "loss": 0.10019493103027344, "step": 6523 }, { "epoch": 0.9090782414826168, "grad_norm": 0.8362367749214172, "learning_rate": 2.356964335724382e-07, "loss": 0.09728050231933594, "step": 6524 }, { "epoch": 0.9092175851738312, "grad_norm": 0.5225020051002502, "learning_rate": 2.3498189565298312e-07, "loss": 0.0774698257446289, "step": 6525 }, { "epoch": 0.9093569288650456, "grad_norm": 0.5753639340400696, "learning_rate": 2.3426841641176311e-07, "loss": 0.09235382080078125, "step": 6526 }, { "epoch": 0.90949627255626, "grad_norm": 0.33991745114326477, "learning_rate": 2.3355599600729916e-07, "loss": 0.0628957748413086, "step": 6527 }, { "epoch": 0.9096356162474744, "grad_norm": 0.4791218340396881, "learning_rate": 2.328446345978713e-07, "loss": 0.07436180114746094, "step": 6528 }, { "epoch": 0.9097749599386887, "grad_norm": 0.621284544467926, "learning_rate": 2.3213433234152982e-07, "loss": 0.07798194885253906, "step": 6529 }, { "epoch": 0.9099143036299031, "grad_norm": 0.7644511461257935, "learning_rate": 2.3142508939608844e-07, "loss": 0.09615421295166016, "step": 6530 }, { "epoch": 0.9100536473211175, "grad_norm": 1.5645463466644287, "learning_rate": 2.3071690591912277e-07, "loss": 0.11884212493896484, "step": 6531 }, { "epoch": 0.9101929910123319, "grad_norm": 0.5807003974914551, "learning_rate": 2.3000978206797697e-07, "loss": 0.07462501525878906, "step": 6532 }, { "epoch": 0.9103323347035462, "grad_norm": 0.6262759566307068, "learning_rate": 2.2930371799975593e-07, "loss": 0.08106231689453125, "step": 6533 }, { "epoch": 0.9104716783947607, "grad_norm": 0.7793316841125488, "learning_rate": 2.2859871387133248e-07, "loss": 0.10144615173339844, "step": 6534 }, { "epoch": 0.9106110220859751, "grad_norm": 0.8372783660888672, "learning_rate": 2.2789476983934133e-07, "loss": 0.10935020446777344, "step": 6535 }, { "epoch": 0.9107503657771895, "grad_norm": 0.6566019654273987, "learning_rate": 2.271918860601835e-07, "loss": 0.08769989013671875, "step": 6536 }, { "epoch": 0.9108897094684039, "grad_norm": 1.1406279802322388, "learning_rate": 2.2649006269002406e-07, "loss": 0.11395454406738281, "step": 6537 }, { "epoch": 0.9110290531596182, "grad_norm": 0.7298685908317566, "learning_rate": 2.257892998847916e-07, "loss": 0.09649848937988281, "step": 6538 }, { "epoch": 0.9111683968508326, "grad_norm": 0.9754316210746765, "learning_rate": 2.250895978001788e-07, "loss": 0.10803604125976562, "step": 6539 }, { "epoch": 0.911307740542047, "grad_norm": 0.6169140338897705, "learning_rate": 2.2439095659164467e-07, "loss": 0.09675025939941406, "step": 6540 }, { "epoch": 0.9114470842332614, "grad_norm": 0.6486262083053589, "learning_rate": 2.236933764144117e-07, "loss": 0.09417724609375, "step": 6541 }, { "epoch": 0.9115864279244758, "grad_norm": 0.8711140155792236, "learning_rate": 2.2299685742346423e-07, "loss": 0.09937477111816406, "step": 6542 }, { "epoch": 0.9117257716156901, "grad_norm": 0.5297939777374268, "learning_rate": 2.223013997735557e-07, "loss": 0.08149147033691406, "step": 6543 }, { "epoch": 0.9118651153069045, "grad_norm": 0.6712208390235901, "learning_rate": 2.2160700361919807e-07, "loss": 0.08027076721191406, "step": 6544 }, { "epoch": 0.9120044589981189, "grad_norm": 0.9693924784660339, "learning_rate": 2.2091366911467238e-07, "loss": 0.08340263366699219, "step": 6545 }, { "epoch": 0.9121438026893333, "grad_norm": 0.9032512307167053, "learning_rate": 2.2022139641402095e-07, "loss": 0.10491371154785156, "step": 6546 }, { "epoch": 0.9122831463805476, "grad_norm": 0.5083120465278625, "learning_rate": 2.1953018567105078e-07, "loss": 0.06783580780029297, "step": 6547 }, { "epoch": 0.912422490071762, "grad_norm": 1.75643789768219, "learning_rate": 2.1884003703933343e-07, "loss": 0.08489799499511719, "step": 6548 }, { "epoch": 0.9125618337629764, "grad_norm": 0.6755703687667847, "learning_rate": 2.181509506722046e-07, "loss": 0.10198402404785156, "step": 6549 }, { "epoch": 0.9127011774541908, "grad_norm": 0.4570816457271576, "learning_rate": 2.1746292672276238e-07, "loss": 0.07205772399902344, "step": 6550 }, { "epoch": 0.9128405211454051, "grad_norm": 0.9329178929328918, "learning_rate": 2.1677596534387114e-07, "loss": 0.11023712158203125, "step": 6551 }, { "epoch": 0.9129798648366195, "grad_norm": 0.738453209400177, "learning_rate": 2.1609006668815768e-07, "loss": 0.08978843688964844, "step": 6552 }, { "epoch": 0.9131192085278339, "grad_norm": 0.9121342897415161, "learning_rate": 2.1540523090801292e-07, "loss": 0.08325576782226562, "step": 6553 }, { "epoch": 0.9132585522190483, "grad_norm": 0.517034649848938, "learning_rate": 2.1472145815559064e-07, "loss": 0.06079387664794922, "step": 6554 }, { "epoch": 0.9133978959102627, "grad_norm": 0.9185873866081238, "learning_rate": 2.1403874858281104e-07, "loss": 0.106048583984375, "step": 6555 }, { "epoch": 0.913537239601477, "grad_norm": 0.6174396872520447, "learning_rate": 2.133571023413572e-07, "loss": 0.08379554748535156, "step": 6556 }, { "epoch": 0.9136765832926914, "grad_norm": 0.9499533772468567, "learning_rate": 2.1267651958267298e-07, "loss": 0.08260774612426758, "step": 6557 }, { "epoch": 0.9138159269839058, "grad_norm": 0.3788938820362091, "learning_rate": 2.1199700045797077e-07, "loss": 0.05988597869873047, "step": 6558 }, { "epoch": 0.9139552706751202, "grad_norm": 0.6999072432518005, "learning_rate": 2.113185451182226e-07, "loss": 0.08973884582519531, "step": 6559 }, { "epoch": 0.9140946143663345, "grad_norm": 0.8657020926475525, "learning_rate": 2.106411537141656e-07, "loss": 0.09498023986816406, "step": 6560 }, { "epoch": 0.9142339580575489, "grad_norm": 1.2415753602981567, "learning_rate": 2.0996482639630167e-07, "loss": 0.10057830810546875, "step": 6561 }, { "epoch": 0.9143733017487633, "grad_norm": 0.6350250244140625, "learning_rate": 2.0928956331489558e-07, "loss": 0.0662384033203125, "step": 6562 }, { "epoch": 0.9145126454399777, "grad_norm": 1.109322428703308, "learning_rate": 2.08615364619974e-07, "loss": 0.0987091064453125, "step": 6563 }, { "epoch": 0.9146519891311921, "grad_norm": 1.2619202136993408, "learning_rate": 2.079422304613299e-07, "loss": 0.1058807373046875, "step": 6564 }, { "epoch": 0.9147913328224064, "grad_norm": 0.7706087827682495, "learning_rate": 2.0727016098851694e-07, "loss": 0.08653831481933594, "step": 6565 }, { "epoch": 0.9149306765136208, "grad_norm": 0.47035902738571167, "learning_rate": 2.0659915635085515e-07, "loss": 0.060332298278808594, "step": 6566 }, { "epoch": 0.9150700202048352, "grad_norm": 0.6090660095214844, "learning_rate": 2.0592921669742528e-07, "loss": 0.08445167541503906, "step": 6567 }, { "epoch": 0.9152093638960496, "grad_norm": 0.46968603134155273, "learning_rate": 2.0526034217707213e-07, "loss": 0.06891250610351562, "step": 6568 }, { "epoch": 0.9153487075872639, "grad_norm": 0.41454148292541504, "learning_rate": 2.0459253293840632e-07, "loss": 0.05995464324951172, "step": 6569 }, { "epoch": 0.9154880512784783, "grad_norm": 0.6716922521591187, "learning_rate": 2.0392578912979853e-07, "loss": 0.08317184448242188, "step": 6570 }, { "epoch": 0.9156273949696927, "grad_norm": 0.9161880612373352, "learning_rate": 2.032601108993837e-07, "loss": 0.10399532318115234, "step": 6571 }, { "epoch": 0.9157667386609071, "grad_norm": 0.6054226756095886, "learning_rate": 2.0259549839506064e-07, "loss": 0.09120368957519531, "step": 6572 }, { "epoch": 0.9159060823521215, "grad_norm": 1.767232060432434, "learning_rate": 2.0193195176449188e-07, "loss": 0.130889892578125, "step": 6573 }, { "epoch": 0.9160454260433359, "grad_norm": 0.9041648507118225, "learning_rate": 2.0126947115510165e-07, "loss": 0.09046173095703125, "step": 6574 }, { "epoch": 0.9161847697345503, "grad_norm": 1.5835314989089966, "learning_rate": 2.006080567140778e-07, "loss": 0.1853313446044922, "step": 6575 }, { "epoch": 0.9163241134257647, "grad_norm": 1.4034589529037476, "learning_rate": 1.999477085883711e-07, "loss": 0.09122467041015625, "step": 6576 }, { "epoch": 0.9164634571169791, "grad_norm": 1.170926570892334, "learning_rate": 1.9928842692469752e-07, "loss": 0.10038375854492188, "step": 6577 }, { "epoch": 0.9166028008081935, "grad_norm": 0.4768672287464142, "learning_rate": 1.9863021186953268e-07, "loss": 0.06465721130371094, "step": 6578 }, { "epoch": 0.9167421444994078, "grad_norm": 0.7397958636283875, "learning_rate": 1.9797306356911793e-07, "loss": 0.09496688842773438, "step": 6579 }, { "epoch": 0.9168814881906222, "grad_norm": 0.8644639253616333, "learning_rate": 1.973169821694565e-07, "loss": 0.08361434936523438, "step": 6580 }, { "epoch": 0.9170208318818366, "grad_norm": 0.8187204003334045, "learning_rate": 1.9666196781631453e-07, "loss": 0.11458873748779297, "step": 6581 }, { "epoch": 0.917160175573051, "grad_norm": 1.2856255769729614, "learning_rate": 1.9600802065522063e-07, "loss": 0.12664222717285156, "step": 6582 }, { "epoch": 0.9172995192642653, "grad_norm": 0.5037311315536499, "learning_rate": 1.95355140831468e-07, "loss": 0.06342697143554688, "step": 6583 }, { "epoch": 0.9174388629554797, "grad_norm": 0.6597398519515991, "learning_rate": 1.947033284901112e-07, "loss": 0.09408760070800781, "step": 6584 }, { "epoch": 0.9175782066466941, "grad_norm": 0.3587104082107544, "learning_rate": 1.9405258377596825e-07, "loss": 0.060962677001953125, "step": 6585 }, { "epoch": 0.9177175503379085, "grad_norm": 0.7251691818237305, "learning_rate": 1.9340290683361907e-07, "loss": 0.07869529724121094, "step": 6586 }, { "epoch": 0.9178568940291228, "grad_norm": 1.01286780834198, "learning_rate": 1.9275429780740763e-07, "loss": 0.09073829650878906, "step": 6587 }, { "epoch": 0.9179962377203372, "grad_norm": 1.1117827892303467, "learning_rate": 1.921067568414403e-07, "loss": 0.09762191772460938, "step": 6588 }, { "epoch": 0.9181355814115516, "grad_norm": 0.8822376728057861, "learning_rate": 1.9146028407958483e-07, "loss": 0.12764263153076172, "step": 6589 }, { "epoch": 0.918274925102766, "grad_norm": 0.7784180045127869, "learning_rate": 1.9081487966547407e-07, "loss": 0.10255241394042969, "step": 6590 }, { "epoch": 0.9184142687939804, "grad_norm": 1.0145443677902222, "learning_rate": 1.9017054374250111e-07, "loss": 0.09181594848632812, "step": 6591 }, { "epoch": 0.9185536124851947, "grad_norm": 0.5642242431640625, "learning_rate": 1.8952727645382307e-07, "loss": 0.06996345520019531, "step": 6592 }, { "epoch": 0.9186929561764091, "grad_norm": 0.5563076734542847, "learning_rate": 1.88885077942359e-07, "loss": 0.08998489379882812, "step": 6593 }, { "epoch": 0.9188322998676235, "grad_norm": 0.6000143885612488, "learning_rate": 1.8824394835079086e-07, "loss": 0.08814239501953125, "step": 6594 }, { "epoch": 0.9189716435588379, "grad_norm": 1.4213993549346924, "learning_rate": 1.8760388782156468e-07, "loss": 0.12044143676757812, "step": 6595 }, { "epoch": 0.9191109872500522, "grad_norm": 0.711509108543396, "learning_rate": 1.8696489649688454e-07, "loss": 0.09060955047607422, "step": 6596 }, { "epoch": 0.9192503309412666, "grad_norm": 0.8203651905059814, "learning_rate": 1.8632697451872074e-07, "loss": 0.10109376907348633, "step": 6597 }, { "epoch": 0.919389674632481, "grad_norm": 0.840366005897522, "learning_rate": 1.8569012202880599e-07, "loss": 0.08641624450683594, "step": 6598 }, { "epoch": 0.9195290183236954, "grad_norm": 1.353654146194458, "learning_rate": 1.850543391686327e-07, "loss": 0.13185882568359375, "step": 6599 }, { "epoch": 0.9196683620149098, "grad_norm": 0.571768581867218, "learning_rate": 1.8441962607945786e-07, "loss": 0.06768417358398438, "step": 6600 }, { "epoch": 0.9198077057061241, "grad_norm": 0.7265860438346863, "learning_rate": 1.83785982902302e-07, "loss": 0.08166313171386719, "step": 6601 }, { "epoch": 0.9199470493973385, "grad_norm": 0.45877528190612793, "learning_rate": 1.8315340977794415e-07, "loss": 0.07248878479003906, "step": 6602 }, { "epoch": 0.9200863930885529, "grad_norm": 0.711276650428772, "learning_rate": 1.825219068469275e-07, "loss": 0.08547782897949219, "step": 6603 }, { "epoch": 0.9202257367797673, "grad_norm": 0.45026421546936035, "learning_rate": 1.818914742495581e-07, "loss": 0.0740365982055664, "step": 6604 }, { "epoch": 0.9203650804709816, "grad_norm": 0.9071954488754272, "learning_rate": 1.8126211212590505e-07, "loss": 0.09702110290527344, "step": 6605 }, { "epoch": 0.920504424162196, "grad_norm": 0.5122121572494507, "learning_rate": 1.8063382061579648e-07, "loss": 0.06550025939941406, "step": 6606 }, { "epoch": 0.9206437678534104, "grad_norm": 0.83022141456604, "learning_rate": 1.8000659985882463e-07, "loss": 0.08818626403808594, "step": 6607 }, { "epoch": 0.9207831115446248, "grad_norm": 0.569475531578064, "learning_rate": 1.7938044999434412e-07, "loss": 0.07022857666015625, "step": 6608 }, { "epoch": 0.9209224552358392, "grad_norm": 1.021173119544983, "learning_rate": 1.7875537116147146e-07, "loss": 0.08286762237548828, "step": 6609 }, { "epoch": 0.9210617989270535, "grad_norm": 1.0611498355865479, "learning_rate": 1.781313634990839e-07, "loss": 0.11374473571777344, "step": 6610 }, { "epoch": 0.9212011426182679, "grad_norm": 0.3362915813922882, "learning_rate": 1.7750842714582272e-07, "loss": 0.05774879455566406, "step": 6611 }, { "epoch": 0.9213404863094823, "grad_norm": 0.6133601069450378, "learning_rate": 1.7688656224008893e-07, "loss": 0.09201812744140625, "step": 6612 }, { "epoch": 0.9214798300006967, "grad_norm": 0.9213498830795288, "learning_rate": 1.762657689200481e-07, "loss": 0.09086370468139648, "step": 6613 }, { "epoch": 0.9216191736919112, "grad_norm": 0.6811490058898926, "learning_rate": 1.7564604732362545e-07, "loss": 0.07531929016113281, "step": 6614 }, { "epoch": 0.9217585173831255, "grad_norm": 0.33189278841018677, "learning_rate": 1.7502739758850863e-07, "loss": 0.05919170379638672, "step": 6615 }, { "epoch": 0.9218978610743399, "grad_norm": 1.1379326581954956, "learning_rate": 1.7440981985214933e-07, "loss": 0.11728096008300781, "step": 6616 }, { "epoch": 0.9220372047655543, "grad_norm": 0.5753011107444763, "learning_rate": 1.7379331425175728e-07, "loss": 0.08460807800292969, "step": 6617 }, { "epoch": 0.9221765484567687, "grad_norm": 1.1420081853866577, "learning_rate": 1.7317788092430676e-07, "loss": 0.11317253112792969, "step": 6618 }, { "epoch": 0.922315892147983, "grad_norm": 1.0505788326263428, "learning_rate": 1.725635200065323e-07, "loss": 0.1230916976928711, "step": 6619 }, { "epoch": 0.9224552358391974, "grad_norm": 0.678439736366272, "learning_rate": 1.7195023163493253e-07, "loss": 0.08401107788085938, "step": 6620 }, { "epoch": 0.9225945795304118, "grad_norm": 0.9988895654678345, "learning_rate": 1.7133801594576393e-07, "loss": 0.1217498779296875, "step": 6621 }, { "epoch": 0.9227339232216262, "grad_norm": 1.1652709245681763, "learning_rate": 1.7072687307504887e-07, "loss": 0.1280803680419922, "step": 6622 }, { "epoch": 0.9228732669128406, "grad_norm": 0.6733852028846741, "learning_rate": 1.701168031585676e-07, "loss": 0.09728813171386719, "step": 6623 }, { "epoch": 0.9230126106040549, "grad_norm": 0.6852744817733765, "learning_rate": 1.695078063318656e-07, "loss": 0.08804607391357422, "step": 6624 }, { "epoch": 0.9231519542952693, "grad_norm": 0.9367015361785889, "learning_rate": 1.6889988273024627e-07, "loss": 0.10061073303222656, "step": 6625 }, { "epoch": 0.9232912979864837, "grad_norm": 0.5621427893638611, "learning_rate": 1.682930324887766e-07, "loss": 0.06910896301269531, "step": 6626 }, { "epoch": 0.9234306416776981, "grad_norm": 0.608925461769104, "learning_rate": 1.6768725574228706e-07, "loss": 0.07106924057006836, "step": 6627 }, { "epoch": 0.9235699853689124, "grad_norm": 0.7005331516265869, "learning_rate": 1.6708255262536443e-07, "loss": 0.08487892150878906, "step": 6628 }, { "epoch": 0.9237093290601268, "grad_norm": 0.7599420547485352, "learning_rate": 1.6647892327236125e-07, "loss": 0.09543228149414062, "step": 6629 }, { "epoch": 0.9238486727513412, "grad_norm": 0.35149267315864563, "learning_rate": 1.658763678173908e-07, "loss": 0.05564308166503906, "step": 6630 }, { "epoch": 0.9239880164425556, "grad_norm": 0.6096779108047485, "learning_rate": 1.6527488639432543e-07, "loss": 0.07958602905273438, "step": 6631 }, { "epoch": 0.92412736013377, "grad_norm": 1.2452837228775024, "learning_rate": 1.6467447913680268e-07, "loss": 0.11198806762695312, "step": 6632 }, { "epoch": 0.9242667038249843, "grad_norm": 1.0346404314041138, "learning_rate": 1.6407514617821752e-07, "loss": 0.10502433776855469, "step": 6633 }, { "epoch": 0.9244060475161987, "grad_norm": 0.679426908493042, "learning_rate": 1.6347688765172953e-07, "loss": 0.09036827087402344, "step": 6634 }, { "epoch": 0.9245453912074131, "grad_norm": 0.9082434773445129, "learning_rate": 1.6287970369025686e-07, "loss": 0.10919857025146484, "step": 6635 }, { "epoch": 0.9246847348986275, "grad_norm": 0.8223417401313782, "learning_rate": 1.6228359442648112e-07, "loss": 0.12314605712890625, "step": 6636 }, { "epoch": 0.9248240785898418, "grad_norm": 0.5971925258636475, "learning_rate": 1.616885599928436e-07, "loss": 0.09030914306640625, "step": 6637 }, { "epoch": 0.9249634222810562, "grad_norm": 0.7694409489631653, "learning_rate": 1.6109460052154802e-07, "loss": 0.08598899841308594, "step": 6638 }, { "epoch": 0.9251027659722706, "grad_norm": 0.7981800436973572, "learning_rate": 1.6050171614455712e-07, "loss": 0.08365917205810547, "step": 6639 }, { "epoch": 0.925242109663485, "grad_norm": 0.865278959274292, "learning_rate": 1.5990990699359777e-07, "loss": 0.10442543029785156, "step": 6640 }, { "epoch": 0.9253814533546993, "grad_norm": 0.6788645386695862, "learning_rate": 1.593191732001559e-07, "loss": 0.0810699462890625, "step": 6641 }, { "epoch": 0.9255207970459137, "grad_norm": 0.9269324541091919, "learning_rate": 1.5872951489547926e-07, "loss": 0.08490371704101562, "step": 6642 }, { "epoch": 0.9256601407371281, "grad_norm": 0.531960129737854, "learning_rate": 1.5814093221057647e-07, "loss": 0.08441543579101562, "step": 6643 }, { "epoch": 0.9257994844283425, "grad_norm": 0.6916802525520325, "learning_rate": 1.575534252762162e-07, "loss": 0.0916748046875, "step": 6644 }, { "epoch": 0.9259388281195569, "grad_norm": 0.5495800971984863, "learning_rate": 1.5696699422293072e-07, "loss": 0.07234954833984375, "step": 6645 }, { "epoch": 0.9260781718107712, "grad_norm": 0.710411548614502, "learning_rate": 1.5638163918101024e-07, "loss": 0.09037494659423828, "step": 6646 }, { "epoch": 0.9262175155019856, "grad_norm": 1.0242385864257812, "learning_rate": 1.5579736028050797e-07, "loss": 0.10394096374511719, "step": 6647 }, { "epoch": 0.9263568591932, "grad_norm": 1.13351571559906, "learning_rate": 1.5521415765123783e-07, "loss": 0.09953498840332031, "step": 6648 }, { "epoch": 0.9264962028844144, "grad_norm": 0.8503966927528381, "learning_rate": 1.546320314227734e-07, "loss": 0.10027885437011719, "step": 6649 }, { "epoch": 0.9266355465756287, "grad_norm": 0.3682258129119873, "learning_rate": 1.5405098172444954e-07, "loss": 0.061573028564453125, "step": 6650 }, { "epoch": 0.9267748902668431, "grad_norm": 0.929460346698761, "learning_rate": 1.5347100868536246e-07, "loss": 0.11228179931640625, "step": 6651 }, { "epoch": 0.9269142339580575, "grad_norm": 0.6535914540290833, "learning_rate": 1.5289211243436964e-07, "loss": 0.08567237854003906, "step": 6652 }, { "epoch": 0.9270535776492719, "grad_norm": 0.628764271736145, "learning_rate": 1.5231429310008817e-07, "loss": 0.08149337768554688, "step": 6653 }, { "epoch": 0.9271929213404864, "grad_norm": 0.6390331983566284, "learning_rate": 1.5173755081089536e-07, "loss": 0.08817100524902344, "step": 6654 }, { "epoch": 0.9273322650317007, "grad_norm": 0.4625324308872223, "learning_rate": 1.511618856949315e-07, "loss": 0.07574844360351562, "step": 6655 }, { "epoch": 0.9274716087229151, "grad_norm": 1.2026753425598145, "learning_rate": 1.5058729788009597e-07, "loss": 0.10975837707519531, "step": 6656 }, { "epoch": 0.9276109524141295, "grad_norm": 0.7112432718276978, "learning_rate": 1.5001378749404883e-07, "loss": 0.082672119140625, "step": 6657 }, { "epoch": 0.9277502961053439, "grad_norm": 1.353652000427246, "learning_rate": 1.4944135466421095e-07, "loss": 0.12674522399902344, "step": 6658 }, { "epoch": 0.9278896397965583, "grad_norm": 0.6033484935760498, "learning_rate": 1.4886999951776448e-07, "loss": 0.06904888153076172, "step": 6659 }, { "epoch": 0.9280289834877726, "grad_norm": 0.9323314428329468, "learning_rate": 1.4829972218165013e-07, "loss": 0.08052587509155273, "step": 6660 }, { "epoch": 0.928168327178987, "grad_norm": 0.8956069350242615, "learning_rate": 1.477305227825715e-07, "loss": 0.09437751770019531, "step": 6661 }, { "epoch": 0.9283076708702014, "grad_norm": 0.826565146446228, "learning_rate": 1.471624014469919e-07, "loss": 0.10936164855957031, "step": 6662 }, { "epoch": 0.9284470145614158, "grad_norm": 0.9414736032485962, "learning_rate": 1.4659535830113368e-07, "loss": 0.09078598022460938, "step": 6663 }, { "epoch": 0.9285863582526301, "grad_norm": 0.9155384302139282, "learning_rate": 1.4602939347098278e-07, "loss": 0.10192489624023438, "step": 6664 }, { "epoch": 0.9287257019438445, "grad_norm": 0.722347617149353, "learning_rate": 1.454645070822819e-07, "loss": 0.09902381896972656, "step": 6665 }, { "epoch": 0.9288650456350589, "grad_norm": 0.61469966173172, "learning_rate": 1.449006992605373e-07, "loss": 0.09182453155517578, "step": 6666 }, { "epoch": 0.9290043893262733, "grad_norm": 0.6343265175819397, "learning_rate": 1.443379701310127e-07, "loss": 0.08533859252929688, "step": 6667 }, { "epoch": 0.9291437330174876, "grad_norm": 0.5778338313102722, "learning_rate": 1.4377631981873474e-07, "loss": 0.07450103759765625, "step": 6668 }, { "epoch": 0.929283076708702, "grad_norm": 0.5946862697601318, "learning_rate": 1.432157484484892e-07, "loss": 0.08020973205566406, "step": 6669 }, { "epoch": 0.9294224203999164, "grad_norm": 1.078290581703186, "learning_rate": 1.4265625614482247e-07, "loss": 0.08245849609375, "step": 6670 }, { "epoch": 0.9295617640911308, "grad_norm": 0.5710664987564087, "learning_rate": 1.4209784303203965e-07, "loss": 0.0829010009765625, "step": 6671 }, { "epoch": 0.9297011077823452, "grad_norm": 0.9497199058532715, "learning_rate": 1.415405092342087e-07, "loss": 0.1002492904663086, "step": 6672 }, { "epoch": 0.9298404514735595, "grad_norm": 0.8312626481056213, "learning_rate": 1.4098425487515665e-07, "loss": 0.0922393798828125, "step": 6673 }, { "epoch": 0.9299797951647739, "grad_norm": 1.569978952407837, "learning_rate": 1.4042908007846912e-07, "loss": 0.12539291381835938, "step": 6674 }, { "epoch": 0.9301191388559883, "grad_norm": 0.907586395740509, "learning_rate": 1.3987498496749463e-07, "loss": 0.08296394348144531, "step": 6675 }, { "epoch": 0.9302584825472027, "grad_norm": 0.47600656747817993, "learning_rate": 1.3932196966533972e-07, "loss": 0.06879234313964844, "step": 6676 }, { "epoch": 0.930397826238417, "grad_norm": 1.1133062839508057, "learning_rate": 1.3877003429487224e-07, "loss": 0.11379003524780273, "step": 6677 }, { "epoch": 0.9305371699296314, "grad_norm": 0.6807987093925476, "learning_rate": 1.3821917897871905e-07, "loss": 0.09240150451660156, "step": 6678 }, { "epoch": 0.9306765136208458, "grad_norm": 0.5717045664787292, "learning_rate": 1.3766940383926785e-07, "loss": 0.079803466796875, "step": 6679 }, { "epoch": 0.9308158573120602, "grad_norm": 0.6348443031311035, "learning_rate": 1.3712070899866704e-07, "loss": 0.09597015380859375, "step": 6680 }, { "epoch": 0.9309552010032746, "grad_norm": 0.4522729814052582, "learning_rate": 1.3657309457882294e-07, "loss": 0.0701751708984375, "step": 6681 }, { "epoch": 0.9310945446944889, "grad_norm": 0.4880298376083374, "learning_rate": 1.3602656070140275e-07, "loss": 0.08072853088378906, "step": 6682 }, { "epoch": 0.9312338883857033, "grad_norm": 0.4419649541378021, "learning_rate": 1.3548110748783426e-07, "loss": 0.06827068328857422, "step": 6683 }, { "epoch": 0.9313732320769177, "grad_norm": 0.4796788990497589, "learning_rate": 1.349367350593056e-07, "loss": 0.06814765930175781, "step": 6684 }, { "epoch": 0.9315125757681321, "grad_norm": 1.2855793237686157, "learning_rate": 1.3439344353676276e-07, "loss": 0.09820556640625, "step": 6685 }, { "epoch": 0.9316519194593464, "grad_norm": 0.3836301267147064, "learning_rate": 1.3385123304091306e-07, "loss": 0.055362701416015625, "step": 6686 }, { "epoch": 0.9317912631505608, "grad_norm": 0.8089998960494995, "learning_rate": 1.3331010369222298e-07, "loss": 0.08760929107666016, "step": 6687 }, { "epoch": 0.9319306068417752, "grad_norm": 0.7574322819709778, "learning_rate": 1.3277005561092016e-07, "loss": 0.0942535400390625, "step": 6688 }, { "epoch": 0.9320699505329896, "grad_norm": 0.9428072571754456, "learning_rate": 1.3223108891698976e-07, "loss": 0.09232139587402344, "step": 6689 }, { "epoch": 0.932209294224204, "grad_norm": 1.331215739250183, "learning_rate": 1.316932037301788e-07, "loss": 0.12390708923339844, "step": 6690 }, { "epoch": 0.9323486379154183, "grad_norm": 1.3770136833190918, "learning_rate": 1.3115640016999222e-07, "loss": 0.11195945739746094, "step": 6691 }, { "epoch": 0.9324879816066327, "grad_norm": 0.8076065182685852, "learning_rate": 1.3062067835569625e-07, "loss": 0.0753774642944336, "step": 6692 }, { "epoch": 0.9326273252978471, "grad_norm": 0.7635619640350342, "learning_rate": 1.3008603840631516e-07, "loss": 0.08867073059082031, "step": 6693 }, { "epoch": 0.9327666689890615, "grad_norm": 0.8299214243888855, "learning_rate": 1.2955248044063452e-07, "loss": 0.10869216918945312, "step": 6694 }, { "epoch": 0.932906012680276, "grad_norm": 0.9137520790100098, "learning_rate": 1.2902000457719886e-07, "loss": 0.07617950439453125, "step": 6695 }, { "epoch": 0.9330453563714903, "grad_norm": 0.48782262206077576, "learning_rate": 1.2848861093431143e-07, "loss": 0.06494522094726562, "step": 6696 }, { "epoch": 0.9331847000627047, "grad_norm": 0.506926953792572, "learning_rate": 1.2795829963003604e-07, "loss": 0.085662841796875, "step": 6697 }, { "epoch": 0.9333240437539191, "grad_norm": 0.6929637789726257, "learning_rate": 1.274290707821968e-07, "loss": 0.0856027603149414, "step": 6698 }, { "epoch": 0.9334633874451335, "grad_norm": 1.5525685548782349, "learning_rate": 1.269009245083741e-07, "loss": 0.09131240844726562, "step": 6699 }, { "epoch": 0.9336027311363478, "grad_norm": 0.5294057726860046, "learning_rate": 1.2637386092591187e-07, "loss": 0.07821273803710938, "step": 6700 }, { "epoch": 0.9337420748275622, "grad_norm": 0.6653882265090942, "learning_rate": 1.258478801519114e-07, "loss": 0.07777214050292969, "step": 6701 }, { "epoch": 0.9338814185187766, "grad_norm": 0.9720170497894287, "learning_rate": 1.2532298230323258e-07, "loss": 0.10248374938964844, "step": 6702 }, { "epoch": 0.934020762209991, "grad_norm": 0.8748880624771118, "learning_rate": 1.2479916749649657e-07, "loss": 0.09381866455078125, "step": 6703 }, { "epoch": 0.9341601059012054, "grad_norm": 1.0592859983444214, "learning_rate": 1.2427643584808246e-07, "loss": 0.10190200805664062, "step": 6704 }, { "epoch": 0.9342994495924197, "grad_norm": 0.8123784065246582, "learning_rate": 1.2375478747413017e-07, "loss": 0.08069801330566406, "step": 6705 }, { "epoch": 0.9344387932836341, "grad_norm": 0.7378441095352173, "learning_rate": 1.2323422249053696e-07, "loss": 0.08131980895996094, "step": 6706 }, { "epoch": 0.9345781369748485, "grad_norm": 0.7098504304885864, "learning_rate": 1.2271474101296144e-07, "loss": 0.09447288513183594, "step": 6707 }, { "epoch": 0.9347174806660629, "grad_norm": 1.3195215463638306, "learning_rate": 1.2219634315681962e-07, "loss": 0.13886451721191406, "step": 6708 }, { "epoch": 0.9348568243572772, "grad_norm": 0.7498605251312256, "learning_rate": 1.2167902903728879e-07, "loss": 0.08312225341796875, "step": 6709 }, { "epoch": 0.9349961680484916, "grad_norm": 0.372226357460022, "learning_rate": 1.211627987693037e-07, "loss": 0.06493759155273438, "step": 6710 }, { "epoch": 0.935135511739706, "grad_norm": 0.735066831111908, "learning_rate": 1.206476524675587e-07, "loss": 0.10005760192871094, "step": 6711 }, { "epoch": 0.9352748554309204, "grad_norm": 0.6286857724189758, "learning_rate": 1.2013359024650785e-07, "loss": 0.0971221923828125, "step": 6712 }, { "epoch": 0.9354141991221347, "grad_norm": 0.5104736685752869, "learning_rate": 1.196206122203647e-07, "loss": 0.0734710693359375, "step": 6713 }, { "epoch": 0.9355535428133491, "grad_norm": 0.6346911191940308, "learning_rate": 1.1910871850309979e-07, "loss": 0.09551811218261719, "step": 6714 }, { "epoch": 0.9356928865045635, "grad_norm": 1.2270376682281494, "learning_rate": 1.1859790920844494e-07, "loss": 0.11871623992919922, "step": 6715 }, { "epoch": 0.9358322301957779, "grad_norm": 1.17112135887146, "learning_rate": 1.1808818444989046e-07, "loss": 0.13508033752441406, "step": 6716 }, { "epoch": 0.9359715738869923, "grad_norm": 1.3577831983566284, "learning_rate": 1.1757954434068574e-07, "loss": 0.10356521606445312, "step": 6717 }, { "epoch": 0.9361109175782066, "grad_norm": 0.40741145610809326, "learning_rate": 1.1707198899383875e-07, "loss": 0.07034873962402344, "step": 6718 }, { "epoch": 0.936250261269421, "grad_norm": 0.48636189103126526, "learning_rate": 1.1656551852211595e-07, "loss": 0.06963729858398438, "step": 6719 }, { "epoch": 0.9363896049606354, "grad_norm": 1.865810513496399, "learning_rate": 1.1606013303804508e-07, "loss": 0.11740875244140625, "step": 6720 }, { "epoch": 0.9365289486518498, "grad_norm": 1.4263495206832886, "learning_rate": 1.1555583265390968e-07, "loss": 0.09454154968261719, "step": 6721 }, { "epoch": 0.9366682923430641, "grad_norm": 0.8829201459884644, "learning_rate": 1.1505261748175512e-07, "loss": 0.08385658264160156, "step": 6722 }, { "epoch": 0.9368076360342785, "grad_norm": 0.6151916980743408, "learning_rate": 1.1455048763338361e-07, "loss": 0.08040237426757812, "step": 6723 }, { "epoch": 0.9369469797254929, "grad_norm": 0.3958021104335785, "learning_rate": 1.1404944322035705e-07, "loss": 0.05913734436035156, "step": 6724 }, { "epoch": 0.9370863234167073, "grad_norm": 1.278457522392273, "learning_rate": 1.1354948435399582e-07, "loss": 0.10847663879394531, "step": 6725 }, { "epoch": 0.9372256671079217, "grad_norm": 0.4782831370830536, "learning_rate": 1.130506111453794e-07, "loss": 0.06739187240600586, "step": 6726 }, { "epoch": 0.937365010799136, "grad_norm": 0.5211597681045532, "learning_rate": 1.1255282370534748e-07, "loss": 0.07039833068847656, "step": 6727 }, { "epoch": 0.9375043544903504, "grad_norm": 0.5844599604606628, "learning_rate": 1.1205612214449434e-07, "loss": 0.08557510375976562, "step": 6728 }, { "epoch": 0.9376436981815648, "grad_norm": 0.854241669178009, "learning_rate": 1.1156050657317785e-07, "loss": 0.10581207275390625, "step": 6729 }, { "epoch": 0.9377830418727792, "grad_norm": 0.4115937054157257, "learning_rate": 1.1106597710151157e-07, "loss": 0.05882835388183594, "step": 6730 }, { "epoch": 0.9379223855639935, "grad_norm": 0.7076199650764465, "learning_rate": 1.1057253383936928e-07, "loss": 0.08683013916015625, "step": 6731 }, { "epoch": 0.9380617292552079, "grad_norm": 0.8997673392295837, "learning_rate": 1.1008017689638162e-07, "loss": 0.10125398635864258, "step": 6732 }, { "epoch": 0.9382010729464223, "grad_norm": 0.8899293541908264, "learning_rate": 1.0958890638194108e-07, "loss": 0.09833145141601562, "step": 6733 }, { "epoch": 0.9383404166376367, "grad_norm": 1.5107835531234741, "learning_rate": 1.0909872240519481e-07, "loss": 0.1184091567993164, "step": 6734 }, { "epoch": 0.9384797603288512, "grad_norm": 0.607815682888031, "learning_rate": 1.0860962507505124e-07, "loss": 0.07662200927734375, "step": 6735 }, { "epoch": 0.9386191040200655, "grad_norm": 0.8456151485443115, "learning_rate": 1.0812161450017678e-07, "loss": 0.10291862487792969, "step": 6736 }, { "epoch": 0.9387584477112799, "grad_norm": 1.0176100730895996, "learning_rate": 1.0763469078899635e-07, "loss": 0.10197257995605469, "step": 6737 }, { "epoch": 0.9388977914024943, "grad_norm": 0.7928478121757507, "learning_rate": 1.0714885404969288e-07, "loss": 0.08056449890136719, "step": 6738 }, { "epoch": 0.9390371350937087, "grad_norm": 0.5163343548774719, "learning_rate": 1.0666410439020836e-07, "loss": 0.09602546691894531, "step": 6739 }, { "epoch": 0.939176478784923, "grad_norm": 1.102597951889038, "learning_rate": 1.0618044191824273e-07, "loss": 0.12597084045410156, "step": 6740 }, { "epoch": 0.9393158224761374, "grad_norm": 0.4212413430213928, "learning_rate": 1.056978667412556e-07, "loss": 0.06297588348388672, "step": 6741 }, { "epoch": 0.9394551661673518, "grad_norm": 1.4559335708618164, "learning_rate": 1.0521637896646286e-07, "loss": 0.1439228057861328, "step": 6742 }, { "epoch": 0.9395945098585662, "grad_norm": 0.8634052276611328, "learning_rate": 1.0473597870084174e-07, "loss": 0.10852813720703125, "step": 6743 }, { "epoch": 0.9397338535497806, "grad_norm": 0.29860416054725647, "learning_rate": 1.0425666605112516e-07, "loss": 0.054924964904785156, "step": 6744 }, { "epoch": 0.9398731972409949, "grad_norm": 0.7651384472846985, "learning_rate": 1.0377844112380575e-07, "loss": 0.09241676330566406, "step": 6745 }, { "epoch": 0.9400125409322093, "grad_norm": 0.7762443423271179, "learning_rate": 1.0330130402513406e-07, "loss": 0.08878517150878906, "step": 6746 }, { "epoch": 0.9401518846234237, "grad_norm": 0.9455567598342896, "learning_rate": 1.028252548611186e-07, "loss": 0.12511634826660156, "step": 6747 }, { "epoch": 0.9402912283146381, "grad_norm": 0.624835729598999, "learning_rate": 1.0235029373752758e-07, "loss": 0.0705118179321289, "step": 6748 }, { "epoch": 0.9404305720058524, "grad_norm": 0.36895209550857544, "learning_rate": 1.0187642075988602e-07, "loss": 0.059478759765625, "step": 6749 }, { "epoch": 0.9405699156970668, "grad_norm": 0.6577364802360535, "learning_rate": 1.0140363603347747e-07, "loss": 0.08178138732910156, "step": 6750 }, { "epoch": 0.9407092593882812, "grad_norm": 0.5023090243339539, "learning_rate": 1.0093193966334403e-07, "loss": 0.07120609283447266, "step": 6751 }, { "epoch": 0.9408486030794956, "grad_norm": 0.42229804396629333, "learning_rate": 1.0046133175428685e-07, "loss": 0.06904983520507812, "step": 6752 }, { "epoch": 0.94098794677071, "grad_norm": 0.37385863065719604, "learning_rate": 9.999181241086231e-08, "loss": 0.0521392822265625, "step": 6753 }, { "epoch": 0.9411272904619243, "grad_norm": 1.5291922092437744, "learning_rate": 9.952338173738862e-08, "loss": 0.12200927734375, "step": 6754 }, { "epoch": 0.9412666341531387, "grad_norm": 1.1421505212783813, "learning_rate": 9.905603983793921e-08, "loss": 0.11986160278320312, "step": 6755 }, { "epoch": 0.9414059778443531, "grad_norm": 0.39756521582603455, "learning_rate": 9.858978681634823e-08, "loss": 0.06840896606445312, "step": 6756 }, { "epoch": 0.9415453215355675, "grad_norm": 0.5519850254058838, "learning_rate": 9.81246227762045e-08, "loss": 0.07016372680664062, "step": 6757 }, { "epoch": 0.9416846652267818, "grad_norm": 0.5216733813285828, "learning_rate": 9.76605478208581e-08, "loss": 0.08426284790039062, "step": 6758 }, { "epoch": 0.9418240089179962, "grad_norm": 0.8723834156990051, "learning_rate": 9.719756205341658e-08, "loss": 0.09850883483886719, "step": 6759 }, { "epoch": 0.9419633526092106, "grad_norm": 0.7423604130744934, "learning_rate": 9.673566557674263e-08, "loss": 0.09323692321777344, "step": 6760 }, { "epoch": 0.942102696300425, "grad_norm": 0.4561028480529785, "learning_rate": 9.627485849346085e-08, "loss": 0.0810546875, "step": 6761 }, { "epoch": 0.9422420399916394, "grad_norm": 0.8535152673721313, "learning_rate": 9.581514090595212e-08, "loss": 0.09387969970703125, "step": 6762 }, { "epoch": 0.9423813836828537, "grad_norm": 0.5326577425003052, "learning_rate": 9.535651291635362e-08, "loss": 0.07700729370117188, "step": 6763 }, { "epoch": 0.9425207273740681, "grad_norm": 0.8724589943885803, "learning_rate": 9.489897462656383e-08, "loss": 0.08081245422363281, "step": 6764 }, { "epoch": 0.9426600710652825, "grad_norm": 0.9830670356750488, "learning_rate": 9.44425261382359e-08, "loss": 0.08734798431396484, "step": 6765 }, { "epoch": 0.9427994147564969, "grad_norm": 0.5718317627906799, "learning_rate": 9.39871675527837e-08, "loss": 0.07975006103515625, "step": 6766 }, { "epoch": 0.9429387584477112, "grad_norm": 0.5135818719863892, "learning_rate": 9.353289897137574e-08, "loss": 0.072967529296875, "step": 6767 }, { "epoch": 0.9430781021389256, "grad_norm": 0.6909454464912415, "learning_rate": 9.30797204949413e-08, "loss": 0.0851430892944336, "step": 6768 }, { "epoch": 0.94321744583014, "grad_norm": 0.8258058428764343, "learning_rate": 9.262763222416649e-08, "loss": 0.08571243286132812, "step": 6769 }, { "epoch": 0.9433567895213544, "grad_norm": 0.3739544749259949, "learning_rate": 9.217663425949486e-08, "loss": 0.06659889221191406, "step": 6770 }, { "epoch": 0.9434961332125688, "grad_norm": 0.49104365706443787, "learning_rate": 9.172672670112681e-08, "loss": 0.07779884338378906, "step": 6771 }, { "epoch": 0.9436354769037831, "grad_norm": 0.8823704123497009, "learning_rate": 9.127790964902239e-08, "loss": 0.08545494079589844, "step": 6772 }, { "epoch": 0.9437748205949975, "grad_norm": 0.7180679440498352, "learning_rate": 9.083018320289849e-08, "loss": 0.060013771057128906, "step": 6773 }, { "epoch": 0.9439141642862119, "grad_norm": 0.736919641494751, "learning_rate": 9.038354746222999e-08, "loss": 0.08751296997070312, "step": 6774 }, { "epoch": 0.9440535079774264, "grad_norm": 0.4909234046936035, "learning_rate": 8.993800252624863e-08, "loss": 0.06816864013671875, "step": 6775 }, { "epoch": 0.9441928516686408, "grad_norm": 0.8984291553497314, "learning_rate": 8.94935484939441e-08, "loss": 0.08715152740478516, "step": 6776 }, { "epoch": 0.9443321953598551, "grad_norm": 0.6269862651824951, "learning_rate": 8.905018546406519e-08, "loss": 0.10059928894042969, "step": 6777 }, { "epoch": 0.9444715390510695, "grad_norm": 0.6450772285461426, "learning_rate": 8.860791353511532e-08, "loss": 0.08228111267089844, "step": 6778 }, { "epoch": 0.9446108827422839, "grad_norm": 0.6465370059013367, "learning_rate": 8.816673280535815e-08, "loss": 0.07944107055664062, "step": 6779 }, { "epoch": 0.9447502264334983, "grad_norm": 0.6413435935974121, "learning_rate": 8.772664337281412e-08, "loss": 0.08019828796386719, "step": 6780 }, { "epoch": 0.9448895701247126, "grad_norm": 0.8024011850357056, "learning_rate": 8.728764533526112e-08, "loss": 0.10602188110351562, "step": 6781 }, { "epoch": 0.945028913815927, "grad_norm": 0.941605806350708, "learning_rate": 8.684973879023395e-08, "loss": 0.09965872764587402, "step": 6782 }, { "epoch": 0.9451682575071414, "grad_norm": 0.3936874270439148, "learning_rate": 8.641292383502531e-08, "loss": 0.06783485412597656, "step": 6783 }, { "epoch": 0.9453076011983558, "grad_norm": 1.153045415878296, "learning_rate": 8.597720056668646e-08, "loss": 0.08196640014648438, "step": 6784 }, { "epoch": 0.9454469448895702, "grad_norm": 0.6648853421211243, "learning_rate": 8.55425690820244e-08, "loss": 0.09085655212402344, "step": 6785 }, { "epoch": 0.9455862885807845, "grad_norm": 0.6187095046043396, "learning_rate": 8.510902947760469e-08, "loss": 0.07651901245117188, "step": 6786 }, { "epoch": 0.9457256322719989, "grad_norm": 0.3652861416339874, "learning_rate": 8.467658184974914e-08, "loss": 0.06449127197265625, "step": 6787 }, { "epoch": 0.9458649759632133, "grad_norm": 0.6682625412940979, "learning_rate": 8.424522629453924e-08, "loss": 0.08642387390136719, "step": 6788 }, { "epoch": 0.9460043196544277, "grad_norm": 1.1666195392608643, "learning_rate": 8.381496290781055e-08, "loss": 0.14615631103515625, "step": 6789 }, { "epoch": 0.946143663345642, "grad_norm": 0.8491275310516357, "learning_rate": 8.338579178515882e-08, "loss": 0.09816932678222656, "step": 6790 }, { "epoch": 0.9462830070368564, "grad_norm": 0.4577428996562958, "learning_rate": 8.295771302193723e-08, "loss": 0.07308769226074219, "step": 6791 }, { "epoch": 0.9464223507280708, "grad_norm": 0.7853921055793762, "learning_rate": 8.253072671325246e-08, "loss": 0.10166358947753906, "step": 6792 }, { "epoch": 0.9465616944192852, "grad_norm": 0.6316817998886108, "learning_rate": 8.210483295397309e-08, "loss": 0.07739448547363281, "step": 6793 }, { "epoch": 0.9467010381104995, "grad_norm": 0.984271764755249, "learning_rate": 8.168003183872175e-08, "loss": 0.09244346618652344, "step": 6794 }, { "epoch": 0.9468403818017139, "grad_norm": 0.30798181891441345, "learning_rate": 8.125632346188073e-08, "loss": 0.06142234802246094, "step": 6795 }, { "epoch": 0.9469797254929283, "grad_norm": 0.6482940316200256, "learning_rate": 8.083370791758804e-08, "loss": 0.08242321014404297, "step": 6796 }, { "epoch": 0.9471190691841427, "grad_norm": 0.9285650849342346, "learning_rate": 8.04121852997386e-08, "loss": 0.09196949005126953, "step": 6797 }, { "epoch": 0.9472584128753571, "grad_norm": 0.531266987323761, "learning_rate": 7.999175570198526e-08, "loss": 0.07453632354736328, "step": 6798 }, { "epoch": 0.9473977565665714, "grad_norm": 0.7649562954902649, "learning_rate": 7.957241921773828e-08, "loss": 0.11594772338867188, "step": 6799 }, { "epoch": 0.9475371002577858, "grad_norm": 0.6453039646148682, "learning_rate": 7.915417594016428e-08, "loss": 0.08996963500976562, "step": 6800 }, { "epoch": 0.9476764439490002, "grad_norm": 1.5141643285751343, "learning_rate": 7.873702596218836e-08, "loss": 0.14200401306152344, "step": 6801 }, { "epoch": 0.9478157876402146, "grad_norm": 0.5919872522354126, "learning_rate": 7.83209693764908e-08, "loss": 0.08458232879638672, "step": 6802 }, { "epoch": 0.947955131331429, "grad_norm": 1.4499952793121338, "learning_rate": 7.790600627550937e-08, "loss": 0.12653541564941406, "step": 6803 }, { "epoch": 0.9480944750226433, "grad_norm": 0.6006331443786621, "learning_rate": 7.749213675143974e-08, "loss": 0.08693885803222656, "step": 6804 }, { "epoch": 0.9482338187138577, "grad_norm": 0.9637106657028198, "learning_rate": 7.707936089623558e-08, "loss": 0.08995819091796875, "step": 6805 }, { "epoch": 0.9483731624050721, "grad_norm": 1.1747742891311646, "learning_rate": 7.666767880160464e-08, "loss": 0.12083816528320312, "step": 6806 }, { "epoch": 0.9485125060962865, "grad_norm": 0.5271254181861877, "learning_rate": 7.625709055901375e-08, "loss": 0.07855224609375, "step": 6807 }, { "epoch": 0.9486518497875008, "grad_norm": 0.5684513449668884, "learning_rate": 7.584759625968663e-08, "loss": 0.07773208618164062, "step": 6808 }, { "epoch": 0.9487911934787152, "grad_norm": 0.6656180024147034, "learning_rate": 7.543919599460325e-08, "loss": 0.08413124084472656, "step": 6809 }, { "epoch": 0.9489305371699296, "grad_norm": 0.5321371555328369, "learning_rate": 7.503188985450105e-08, "loss": 0.0690927505493164, "step": 6810 }, { "epoch": 0.949069880861144, "grad_norm": 0.5859333276748657, "learning_rate": 7.462567792987374e-08, "loss": 0.07776832580566406, "step": 6811 }, { "epoch": 0.9492092245523583, "grad_norm": 1.0264453887939453, "learning_rate": 7.422056031097302e-08, "loss": 0.09277153015136719, "step": 6812 }, { "epoch": 0.9493485682435727, "grad_norm": 0.7680577635765076, "learning_rate": 7.381653708780578e-08, "loss": 0.08795547485351562, "step": 6813 }, { "epoch": 0.9494879119347871, "grad_norm": 0.48979079723358154, "learning_rate": 7.341360835013745e-08, "loss": 0.05623626708984375, "step": 6814 }, { "epoch": 0.9496272556260016, "grad_norm": 0.7833986282348633, "learning_rate": 7.301177418748973e-08, "loss": 0.08746719360351562, "step": 6815 }, { "epoch": 0.949766599317216, "grad_norm": 0.676363468170166, "learning_rate": 7.261103468914066e-08, "loss": 0.08115577697753906, "step": 6816 }, { "epoch": 0.9499059430084303, "grad_norm": 0.7414114475250244, "learning_rate": 7.221138994412569e-08, "loss": 0.09857559204101562, "step": 6817 }, { "epoch": 0.9500452866996447, "grad_norm": 0.6649263501167297, "learning_rate": 7.181284004123601e-08, "loss": 0.0861358642578125, "step": 6818 }, { "epoch": 0.9501846303908591, "grad_norm": 0.4219898283481598, "learning_rate": 7.14153850690208e-08, "loss": 0.07496833801269531, "step": 6819 }, { "epoch": 0.9503239740820735, "grad_norm": 0.7293485999107361, "learning_rate": 7.101902511578606e-08, "loss": 0.07506752014160156, "step": 6820 }, { "epoch": 0.9504633177732879, "grad_norm": 0.6484253406524658, "learning_rate": 7.062376026959305e-08, "loss": 0.0758209228515625, "step": 6821 }, { "epoch": 0.9506026614645022, "grad_norm": 0.7310644388198853, "learning_rate": 7.022959061826151e-08, "loss": 0.08929920196533203, "step": 6822 }, { "epoch": 0.9507420051557166, "grad_norm": 0.693108320236206, "learning_rate": 6.983651624936527e-08, "loss": 0.08709335327148438, "step": 6823 }, { "epoch": 0.950881348846931, "grad_norm": 0.9957515001296997, "learning_rate": 6.944453725023836e-08, "loss": 0.10588836669921875, "step": 6824 }, { "epoch": 0.9510206925381454, "grad_norm": 0.6905865669250488, "learning_rate": 6.905365370796891e-08, "loss": 0.08345603942871094, "step": 6825 }, { "epoch": 0.9511600362293597, "grad_norm": 0.8990439772605896, "learning_rate": 6.866386570940132e-08, "loss": 0.09649276733398438, "step": 6826 }, { "epoch": 0.9512993799205741, "grad_norm": 0.48668432235717773, "learning_rate": 6.827517334113965e-08, "loss": 0.08065414428710938, "step": 6827 }, { "epoch": 0.9514387236117885, "grad_norm": 0.43928274512290955, "learning_rate": 6.788757668954038e-08, "loss": 0.065338134765625, "step": 6828 }, { "epoch": 0.9515780673030029, "grad_norm": 0.4258774220943451, "learning_rate": 6.750107584071964e-08, "loss": 0.0760812759399414, "step": 6829 }, { "epoch": 0.9517174109942172, "grad_norm": 1.024205207824707, "learning_rate": 6.711567088054927e-08, "loss": 0.10737228393554688, "step": 6830 }, { "epoch": 0.9518567546854316, "grad_norm": 1.1468091011047363, "learning_rate": 6.67313618946569e-08, "loss": 0.12235069274902344, "step": 6831 }, { "epoch": 0.951996098376646, "grad_norm": 0.39216917753219604, "learning_rate": 6.634814896842757e-08, "loss": 0.07401466369628906, "step": 6832 }, { "epoch": 0.9521354420678604, "grad_norm": 0.9721818566322327, "learning_rate": 6.59660321870026e-08, "loss": 0.10714149475097656, "step": 6833 }, { "epoch": 0.9522747857590748, "grad_norm": 0.9114884734153748, "learning_rate": 6.558501163527964e-08, "loss": 0.10164642333984375, "step": 6834 }, { "epoch": 0.9524141294502891, "grad_norm": 1.1627038717269897, "learning_rate": 6.520508739791153e-08, "loss": 0.12183570861816406, "step": 6835 }, { "epoch": 0.9525534731415035, "grad_norm": 0.7089784741401672, "learning_rate": 6.482625955931022e-08, "loss": 0.10181045532226562, "step": 6836 }, { "epoch": 0.9526928168327179, "grad_norm": 1.7249246835708618, "learning_rate": 6.444852820364222e-08, "loss": 0.12794876098632812, "step": 6837 }, { "epoch": 0.9528321605239323, "grad_norm": 1.2599741220474243, "learning_rate": 6.407189341483044e-08, "loss": 0.11341094970703125, "step": 6838 }, { "epoch": 0.9529715042151466, "grad_norm": 0.8336100578308105, "learning_rate": 6.369635527655515e-08, "loss": 0.08127403259277344, "step": 6839 }, { "epoch": 0.953110847906361, "grad_norm": 0.4833526909351349, "learning_rate": 6.332191387225128e-08, "loss": 0.06343269348144531, "step": 6840 }, { "epoch": 0.9532501915975754, "grad_norm": 0.9512216448783875, "learning_rate": 6.294856928511284e-08, "loss": 0.08817481994628906, "step": 6841 }, { "epoch": 0.9533895352887898, "grad_norm": 1.200229287147522, "learning_rate": 6.257632159808679e-08, "loss": 0.08839607238769531, "step": 6842 }, { "epoch": 0.9535288789800042, "grad_norm": 0.7591498494148254, "learning_rate": 6.220517089387867e-08, "loss": 0.08005285263061523, "step": 6843 }, { "epoch": 0.9536682226712185, "grad_norm": 0.8057992458343506, "learning_rate": 6.183511725495028e-08, "loss": 0.11512565612792969, "step": 6844 }, { "epoch": 0.9538075663624329, "grad_norm": 0.9877325296401978, "learning_rate": 6.146616076351864e-08, "loss": 0.13439178466796875, "step": 6845 }, { "epoch": 0.9539469100536473, "grad_norm": 1.2979496717453003, "learning_rate": 6.109830150155705e-08, "loss": 0.10712337493896484, "step": 6846 }, { "epoch": 0.9540862537448617, "grad_norm": 0.788402795791626, "learning_rate": 6.07315395507957e-08, "loss": 0.1085357666015625, "step": 6847 }, { "epoch": 0.954225597436076, "grad_norm": 0.8278927206993103, "learning_rate": 6.036587499272161e-08, "loss": 0.10487937927246094, "step": 6848 }, { "epoch": 0.9543649411272904, "grad_norm": 0.5148045420646667, "learning_rate": 6.000130790857595e-08, "loss": 0.07379341125488281, "step": 6849 }, { "epoch": 0.9545042848185048, "grad_norm": 0.6822431087493896, "learning_rate": 5.963783837935722e-08, "loss": 0.09679985046386719, "step": 6850 }, { "epoch": 0.9546436285097192, "grad_norm": 0.6349693536758423, "learning_rate": 5.927546648582083e-08, "loss": 0.0819549560546875, "step": 6851 }, { "epoch": 0.9547829722009336, "grad_norm": 0.6195026636123657, "learning_rate": 5.8914192308476835e-08, "loss": 0.07736396789550781, "step": 6852 }, { "epoch": 0.9549223158921479, "grad_norm": 1.2458410263061523, "learning_rate": 5.855401592759269e-08, "loss": 0.08454513549804688, "step": 6853 }, { "epoch": 0.9550616595833623, "grad_norm": 0.9349880814552307, "learning_rate": 5.8194937423191043e-08, "loss": 0.11117172241210938, "step": 6854 }, { "epoch": 0.9552010032745768, "grad_norm": 0.6498242020606995, "learning_rate": 5.783695687505087e-08, "loss": 0.0915374755859375, "step": 6855 }, { "epoch": 0.9553403469657912, "grad_norm": 0.579339325428009, "learning_rate": 5.7480074362707415e-08, "loss": 0.07482337951660156, "step": 6856 }, { "epoch": 0.9554796906570056, "grad_norm": 0.49369561672210693, "learning_rate": 5.712428996545172e-08, "loss": 0.071441650390625, "step": 6857 }, { "epoch": 0.9556190343482199, "grad_norm": 1.4504964351654053, "learning_rate": 5.6769603762331096e-08, "loss": 0.15613746643066406, "step": 6858 }, { "epoch": 0.9557583780394343, "grad_norm": 1.1400827169418335, "learning_rate": 5.641601583214862e-08, "loss": 0.12842655181884766, "step": 6859 }, { "epoch": 0.9558977217306487, "grad_norm": 0.8849841952323914, "learning_rate": 5.606352625346368e-08, "loss": 0.08115005493164062, "step": 6860 }, { "epoch": 0.9560370654218631, "grad_norm": 1.2735731601715088, "learning_rate": 5.571213510459084e-08, "loss": 0.12196731567382812, "step": 6861 }, { "epoch": 0.9561764091130774, "grad_norm": 0.5794918537139893, "learning_rate": 5.53618424636021e-08, "loss": 0.08607292175292969, "step": 6862 }, { "epoch": 0.9563157528042918, "grad_norm": 0.8719363212585449, "learning_rate": 5.501264840832299e-08, "loss": 0.08886146545410156, "step": 6863 }, { "epoch": 0.9564550964955062, "grad_norm": 0.8918839693069458, "learning_rate": 5.466455301633811e-08, "loss": 0.1103515625, "step": 6864 }, { "epoch": 0.9565944401867206, "grad_norm": 1.2996011972427368, "learning_rate": 5.431755636498559e-08, "loss": 0.10135841369628906, "step": 6865 }, { "epoch": 0.956733783877935, "grad_norm": 0.8754323720932007, "learning_rate": 5.3971658531360436e-08, "loss": 0.0936431884765625, "step": 6866 }, { "epoch": 0.9568731275691493, "grad_norm": 1.5856117010116577, "learning_rate": 5.362685959231284e-08, "loss": 0.12551307678222656, "step": 6867 }, { "epoch": 0.9570124712603637, "grad_norm": 0.5771937370300293, "learning_rate": 5.3283159624448745e-08, "loss": 0.07071304321289062, "step": 6868 }, { "epoch": 0.9571518149515781, "grad_norm": 0.6278083920478821, "learning_rate": 5.294055870413206e-08, "loss": 0.0930633544921875, "step": 6869 }, { "epoch": 0.9572911586427925, "grad_norm": 1.3378429412841797, "learning_rate": 5.2599056907479685e-08, "loss": 0.11212539672851562, "step": 6870 }, { "epoch": 0.9574305023340068, "grad_norm": 0.5572197437286377, "learning_rate": 5.2258654310365366e-08, "loss": 0.08257293701171875, "step": 6871 }, { "epoch": 0.9575698460252212, "grad_norm": 0.3567609488964081, "learning_rate": 5.1919350988419716e-08, "loss": 0.06601905822753906, "step": 6872 }, { "epoch": 0.9577091897164356, "grad_norm": 1.360999584197998, "learning_rate": 5.1581147017027434e-08, "loss": 0.13450050354003906, "step": 6873 }, { "epoch": 0.95784853340765, "grad_norm": 0.7712132930755615, "learning_rate": 5.124404247133008e-08, "loss": 0.08266258239746094, "step": 6874 }, { "epoch": 0.9579878770988643, "grad_norm": 0.7796860337257385, "learning_rate": 5.090803742622441e-08, "loss": 0.08991050720214844, "step": 6875 }, { "epoch": 0.9581272207900787, "grad_norm": 0.5422692894935608, "learning_rate": 5.057313195636293e-08, "loss": 0.0737009048461914, "step": 6876 }, { "epoch": 0.9582665644812931, "grad_norm": 1.3015509843826294, "learning_rate": 5.0239326136154454e-08, "loss": 0.11120414733886719, "step": 6877 }, { "epoch": 0.9584059081725075, "grad_norm": 0.9659198522567749, "learning_rate": 4.990662003976243e-08, "loss": 0.11145687103271484, "step": 6878 }, { "epoch": 0.9585452518637219, "grad_norm": 1.1030619144439697, "learning_rate": 4.957501374110718e-08, "loss": 0.1267099380493164, "step": 6879 }, { "epoch": 0.9586845955549362, "grad_norm": 0.7456552982330322, "learning_rate": 4.924450731386365e-08, "loss": 0.10089302062988281, "step": 6880 }, { "epoch": 0.9588239392461506, "grad_norm": 0.7111863493919373, "learning_rate": 4.8915100831463116e-08, "loss": 0.10369110107421875, "step": 6881 }, { "epoch": 0.958963282937365, "grad_norm": 0.4725085496902466, "learning_rate": 4.858679436709201e-08, "loss": 0.07193374633789062, "step": 6882 }, { "epoch": 0.9591026266285794, "grad_norm": 0.8349010348320007, "learning_rate": 4.825958799369201e-08, "loss": 0.09141731262207031, "step": 6883 }, { "epoch": 0.9592419703197937, "grad_norm": 1.0692561864852905, "learning_rate": 4.7933481783961624e-08, "loss": 0.11024284362792969, "step": 6884 }, { "epoch": 0.9593813140110081, "grad_norm": 0.5132582187652588, "learning_rate": 4.760847581035399e-08, "loss": 0.08109855651855469, "step": 6885 }, { "epoch": 0.9595206577022225, "grad_norm": 0.6772329807281494, "learning_rate": 4.728457014507859e-08, "loss": 0.09170341491699219, "step": 6886 }, { "epoch": 0.9596600013934369, "grad_norm": 0.7178216576576233, "learning_rate": 4.69617648600984e-08, "loss": 0.0803375244140625, "step": 6887 }, { "epoch": 0.9597993450846513, "grad_norm": 0.3919197618961334, "learning_rate": 4.664006002713495e-08, "loss": 0.0625457763671875, "step": 6888 }, { "epoch": 0.9599386887758656, "grad_norm": 1.2970820665359497, "learning_rate": 4.631945571766272e-08, "loss": 0.11273956298828125, "step": 6889 }, { "epoch": 0.96007803246708, "grad_norm": 0.64552241563797, "learning_rate": 4.5999952002912516e-08, "loss": 0.07755851745605469, "step": 6890 }, { "epoch": 0.9602173761582944, "grad_norm": 0.44945454597473145, "learning_rate": 4.5681548953872555e-08, "loss": 0.06511306762695312, "step": 6891 }, { "epoch": 0.9603567198495088, "grad_norm": 0.6724942326545715, "learning_rate": 4.536424664128236e-08, "loss": 0.08403968811035156, "step": 6892 }, { "epoch": 0.9604960635407231, "grad_norm": 0.691336989402771, "learning_rate": 4.504804513564054e-08, "loss": 0.08015632629394531, "step": 6893 }, { "epoch": 0.9606354072319375, "grad_norm": 0.5132117867469788, "learning_rate": 4.473294450719923e-08, "loss": 0.07036590576171875, "step": 6894 }, { "epoch": 0.9607747509231519, "grad_norm": 1.0459647178649902, "learning_rate": 4.441894482596743e-08, "loss": 0.08835601806640625, "step": 6895 }, { "epoch": 0.9609140946143664, "grad_norm": 0.4785706400871277, "learning_rate": 4.410604616170822e-08, "loss": 0.07653617858886719, "step": 6896 }, { "epoch": 0.9610534383055808, "grad_norm": 0.5179362297058105, "learning_rate": 4.379424858394043e-08, "loss": 0.07592964172363281, "step": 6897 }, { "epoch": 0.9611927819967951, "grad_norm": 0.5857897400856018, "learning_rate": 4.348355216193867e-08, "loss": 0.0870823860168457, "step": 6898 }, { "epoch": 0.9613321256880095, "grad_norm": 0.8445968627929688, "learning_rate": 4.3173956964732145e-08, "loss": 0.11390304565429688, "step": 6899 }, { "epoch": 0.9614714693792239, "grad_norm": 0.5483893752098083, "learning_rate": 4.286546306110639e-08, "loss": 0.06665992736816406, "step": 6900 }, { "epoch": 0.9616108130704383, "grad_norm": 0.8599899411201477, "learning_rate": 4.2558070519601594e-08, "loss": 0.10345268249511719, "step": 6901 }, { "epoch": 0.9617501567616527, "grad_norm": 0.6556399464607239, "learning_rate": 4.2251779408513104e-08, "loss": 0.09272003173828125, "step": 6902 }, { "epoch": 0.961889500452867, "grad_norm": 1.036983847618103, "learning_rate": 4.19465897958915e-08, "loss": 0.12169837951660156, "step": 6903 }, { "epoch": 0.9620288441440814, "grad_norm": 0.8368129134178162, "learning_rate": 4.164250174954365e-08, "loss": 0.08523178100585938, "step": 6904 }, { "epoch": 0.9621681878352958, "grad_norm": 0.7144729495048523, "learning_rate": 4.133951533703107e-08, "loss": 0.06376075744628906, "step": 6905 }, { "epoch": 0.9623075315265102, "grad_norm": 0.5714385509490967, "learning_rate": 4.1037630625669345e-08, "loss": 0.07092857360839844, "step": 6906 }, { "epoch": 0.9624468752177245, "grad_norm": 0.8161686062812805, "learning_rate": 4.07368476825315e-08, "loss": 0.09361648559570312, "step": 6907 }, { "epoch": 0.9625862189089389, "grad_norm": 0.924811065196991, "learning_rate": 4.043716657444407e-08, "loss": 0.09486007690429688, "step": 6908 }, { "epoch": 0.9627255626001533, "grad_norm": 0.6116845607757568, "learning_rate": 4.0138587367989365e-08, "loss": 0.09090805053710938, "step": 6909 }, { "epoch": 0.9628649062913677, "grad_norm": 0.5872064232826233, "learning_rate": 3.984111012950487e-08, "loss": 0.074310302734375, "step": 6910 }, { "epoch": 0.963004249982582, "grad_norm": 0.974449634552002, "learning_rate": 3.9544734925083264e-08, "loss": 0.09553909301757812, "step": 6911 }, { "epoch": 0.9631435936737964, "grad_norm": 0.7130177617073059, "learning_rate": 3.924946182057299e-08, "loss": 0.09020662307739258, "step": 6912 }, { "epoch": 0.9632829373650108, "grad_norm": 0.8483078479766846, "learning_rate": 3.8955290881576566e-08, "loss": 0.09252357482910156, "step": 6913 }, { "epoch": 0.9634222810562252, "grad_norm": 0.7453019022941589, "learning_rate": 3.866222217345117e-08, "loss": 0.08929443359375, "step": 6914 }, { "epoch": 0.9635616247474396, "grad_norm": 1.4047267436981201, "learning_rate": 3.837025576131137e-08, "loss": 0.1341562271118164, "step": 6915 }, { "epoch": 0.9637009684386539, "grad_norm": 0.7698964476585388, "learning_rate": 3.807939171002473e-08, "loss": 0.08441448211669922, "step": 6916 }, { "epoch": 0.9638403121298683, "grad_norm": 0.6681036949157715, "learning_rate": 3.778963008421455e-08, "loss": 0.08310508728027344, "step": 6917 }, { "epoch": 0.9639796558210827, "grad_norm": 0.6092724800109863, "learning_rate": 3.750097094825933e-08, "loss": 0.0725250244140625, "step": 6918 }, { "epoch": 0.9641189995122971, "grad_norm": 0.7336450219154358, "learning_rate": 3.721341436629222e-08, "loss": 0.08010101318359375, "step": 6919 }, { "epoch": 0.9642583432035114, "grad_norm": 0.7546849846839905, "learning_rate": 3.6926960402202674e-08, "loss": 0.07221412658691406, "step": 6920 }, { "epoch": 0.9643976868947258, "grad_norm": 1.5718879699707031, "learning_rate": 3.66416091196331e-08, "loss": 0.11194515228271484, "step": 6921 }, { "epoch": 0.9645370305859402, "grad_norm": 1.0272488594055176, "learning_rate": 3.63573605819828e-08, "loss": 0.12512969970703125, "step": 6922 }, { "epoch": 0.9646763742771546, "grad_norm": 0.982380211353302, "learning_rate": 3.6074214852405695e-08, "loss": 0.12227630615234375, "step": 6923 }, { "epoch": 0.964815717968369, "grad_norm": 1.0876317024230957, "learning_rate": 3.5792171993809244e-08, "loss": 0.10976982116699219, "step": 6924 }, { "epoch": 0.9649550616595833, "grad_norm": 0.3587639033794403, "learning_rate": 3.55112320688572e-08, "loss": 0.06259822845458984, "step": 6925 }, { "epoch": 0.9650944053507977, "grad_norm": 0.7947112321853638, "learning_rate": 3.523139513996798e-08, "loss": 0.10488128662109375, "step": 6926 }, { "epoch": 0.9652337490420121, "grad_norm": 0.5238937139511108, "learning_rate": 3.495266126931574e-08, "loss": 0.07625675201416016, "step": 6927 }, { "epoch": 0.9653730927332265, "grad_norm": 0.5059053301811218, "learning_rate": 3.467503051882815e-08, "loss": 0.06501960754394531, "step": 6928 }, { "epoch": 0.9655124364244408, "grad_norm": 0.9271664619445801, "learning_rate": 3.4398502950188096e-08, "loss": 0.08112525939941406, "step": 6929 }, { "epoch": 0.9656517801156552, "grad_norm": 1.1010574102401733, "learning_rate": 3.4123078624834214e-08, "loss": 0.14633941650390625, "step": 6930 }, { "epoch": 0.9657911238068696, "grad_norm": 0.7177765965461731, "learning_rate": 3.384875760395978e-08, "loss": 0.09337425231933594, "step": 6931 }, { "epoch": 0.965930467498084, "grad_norm": 0.7753693461418152, "learning_rate": 3.3575539948511595e-08, "loss": 0.105499267578125, "step": 6932 }, { "epoch": 0.9660698111892984, "grad_norm": 1.3021841049194336, "learning_rate": 3.330342571919332e-08, "loss": 0.11463165283203125, "step": 6933 }, { "epoch": 0.9662091548805127, "grad_norm": 0.6954248547554016, "learning_rate": 3.30324149764627e-08, "loss": 0.1052703857421875, "step": 6934 }, { "epoch": 0.9663484985717271, "grad_norm": 1.1224772930145264, "learning_rate": 3.2762507780531026e-08, "loss": 0.09143829345703125, "step": 6935 }, { "epoch": 0.9664878422629416, "grad_norm": 0.8414345979690552, "learning_rate": 3.249370419136644e-08, "loss": 0.11778450012207031, "step": 6936 }, { "epoch": 0.966627185954156, "grad_norm": 1.153344988822937, "learning_rate": 3.2226004268690605e-08, "loss": 0.12622451782226562, "step": 6937 }, { "epoch": 0.9667665296453704, "grad_norm": 0.9546419978141785, "learning_rate": 3.195940807198039e-08, "loss": 0.0948190689086914, "step": 6938 }, { "epoch": 0.9669058733365847, "grad_norm": 1.326406717300415, "learning_rate": 3.169391566046731e-08, "loss": 0.12396049499511719, "step": 6939 }, { "epoch": 0.9670452170277991, "grad_norm": 0.3992061913013458, "learning_rate": 3.142952709313807e-08, "loss": 0.06042337417602539, "step": 6940 }, { "epoch": 0.9671845607190135, "grad_norm": 0.8179708123207092, "learning_rate": 3.116624242873345e-08, "loss": 0.08469009399414062, "step": 6941 }, { "epoch": 0.9673239044102279, "grad_norm": 0.6887122392654419, "learning_rate": 3.090406172574889e-08, "loss": 0.09372901916503906, "step": 6942 }, { "epoch": 0.9674632481014422, "grad_norm": 0.751213014125824, "learning_rate": 3.064298504243612e-08, "loss": 0.09104156494140625, "step": 6943 }, { "epoch": 0.9676025917926566, "grad_norm": 0.9222365617752075, "learning_rate": 3.0383012436799306e-08, "loss": 0.06993579864501953, "step": 6944 }, { "epoch": 0.967741935483871, "grad_norm": 0.7163318991661072, "learning_rate": 3.0124143966599464e-08, "loss": 0.07554817199707031, "step": 6945 }, { "epoch": 0.9678812791750854, "grad_norm": 1.0364458560943604, "learning_rate": 2.9866379689350024e-08, "loss": 0.08027267456054688, "step": 6946 }, { "epoch": 0.9680206228662998, "grad_norm": 1.4350978136062622, "learning_rate": 2.9609719662320735e-08, "loss": 0.11433267593383789, "step": 6947 }, { "epoch": 0.9681599665575141, "grad_norm": 0.7372121810913086, "learning_rate": 2.9354163942535983e-08, "loss": 0.0782012939453125, "step": 6948 }, { "epoch": 0.9682993102487285, "grad_norm": 1.2425594329833984, "learning_rate": 2.90997125867748e-08, "loss": 0.11361503601074219, "step": 6949 }, { "epoch": 0.9684386539399429, "grad_norm": 0.8993390798568726, "learning_rate": 2.8846365651569175e-08, "loss": 0.10361766815185547, "step": 6950 }, { "epoch": 0.9685779976311573, "grad_norm": 1.1460340023040771, "learning_rate": 2.8594123193207978e-08, "loss": 0.10834789276123047, "step": 6951 }, { "epoch": 0.9687173413223716, "grad_norm": 0.4331074655056, "learning_rate": 2.83429852677336e-08, "loss": 0.05743885040283203, "step": 6952 }, { "epoch": 0.968856685013586, "grad_norm": 0.4935552775859833, "learning_rate": 2.809295193094308e-08, "loss": 0.07543087005615234, "step": 6953 }, { "epoch": 0.9689960287048004, "grad_norm": 0.9736174941062927, "learning_rate": 2.7844023238388084e-08, "loss": 0.10740470886230469, "step": 6954 }, { "epoch": 0.9691353723960148, "grad_norm": 1.2711883783340454, "learning_rate": 2.759619924537438e-08, "loss": 0.0909423828125, "step": 6955 }, { "epoch": 0.9692747160872291, "grad_norm": 0.44883257150650024, "learning_rate": 2.7349480006964023e-08, "loss": 0.0764312744140625, "step": 6956 }, { "epoch": 0.9694140597784435, "grad_norm": 0.7679431438446045, "learning_rate": 2.7103865577970955e-08, "loss": 0.09897232055664062, "step": 6957 }, { "epoch": 0.9695534034696579, "grad_norm": 0.6743978261947632, "learning_rate": 2.6859356012965964e-08, "loss": 0.07907485961914062, "step": 6958 }, { "epoch": 0.9696927471608723, "grad_norm": 0.849498987197876, "learning_rate": 2.661595136627393e-08, "loss": 0.09492874145507812, "step": 6959 }, { "epoch": 0.9698320908520867, "grad_norm": 0.8932749032974243, "learning_rate": 2.63736516919727e-08, "loss": 0.08839702606201172, "step": 6960 }, { "epoch": 0.969971434543301, "grad_norm": 0.6870079636573792, "learning_rate": 2.6132457043896442e-08, "loss": 0.08525276184082031, "step": 6961 }, { "epoch": 0.9701107782345154, "grad_norm": 0.6244803667068481, "learning_rate": 2.589236747563284e-08, "loss": 0.07901906967163086, "step": 6962 }, { "epoch": 0.9702501219257298, "grad_norm": 0.39538702368736267, "learning_rate": 2.5653383040524228e-08, "loss": 0.06543540954589844, "step": 6963 }, { "epoch": 0.9703894656169442, "grad_norm": 0.8335169553756714, "learning_rate": 2.5415503791667573e-08, "loss": 0.09360122680664062, "step": 6964 }, { "epoch": 0.9705288093081585, "grad_norm": 0.8737971186637878, "learning_rate": 2.5178729781915046e-08, "loss": 0.0940704345703125, "step": 6965 }, { "epoch": 0.9706681529993729, "grad_norm": 1.036914348602295, "learning_rate": 2.4943061063870678e-08, "loss": 0.10273933410644531, "step": 6966 }, { "epoch": 0.9708074966905873, "grad_norm": 0.5140239596366882, "learning_rate": 2.4708497689896472e-08, "loss": 0.08123588562011719, "step": 6967 }, { "epoch": 0.9709468403818017, "grad_norm": 0.49494442343711853, "learning_rate": 2.4475039712105742e-08, "loss": 0.07425308227539062, "step": 6968 }, { "epoch": 0.9710861840730161, "grad_norm": 0.45705437660217285, "learning_rate": 2.4242687182368106e-08, "loss": 0.06390094757080078, "step": 6969 }, { "epoch": 0.9712255277642304, "grad_norm": 0.848262369632721, "learning_rate": 2.401144015230672e-08, "loss": 0.09015178680419922, "step": 6970 }, { "epoch": 0.9713648714554448, "grad_norm": 0.9067292213439941, "learning_rate": 2.3781298673299924e-08, "loss": 0.09714698791503906, "step": 6971 }, { "epoch": 0.9715042151466592, "grad_norm": 0.6733032464981079, "learning_rate": 2.3552262796479042e-08, "loss": 0.08220481872558594, "step": 6972 }, { "epoch": 0.9716435588378736, "grad_norm": 0.46832069754600525, "learning_rate": 2.33243325727317e-08, "loss": 0.07728910446166992, "step": 6973 }, { "epoch": 0.9717829025290879, "grad_norm": 0.4226600229740143, "learning_rate": 2.3097508052697948e-08, "loss": 0.06753349304199219, "step": 6974 }, { "epoch": 0.9719222462203023, "grad_norm": 0.7696413993835449, "learning_rate": 2.2871789286773582e-08, "loss": 0.07946395874023438, "step": 6975 }, { "epoch": 0.9720615899115168, "grad_norm": 1.0251203775405884, "learning_rate": 2.264717632510738e-08, "loss": 0.09759712219238281, "step": 6976 }, { "epoch": 0.9722009336027312, "grad_norm": 0.9804015755653381, "learning_rate": 2.2423669217604415e-08, "loss": 0.11968612670898438, "step": 6977 }, { "epoch": 0.9723402772939456, "grad_norm": 0.7547821402549744, "learning_rate": 2.220126801392164e-08, "loss": 0.09961128234863281, "step": 6978 }, { "epoch": 0.9724796209851599, "grad_norm": 0.7269533276557922, "learning_rate": 2.1979972763471747e-08, "loss": 0.08259010314941406, "step": 6979 }, { "epoch": 0.9726189646763743, "grad_norm": 0.9362820386886597, "learning_rate": 2.1759783515422074e-08, "loss": 0.13497543334960938, "step": 6980 }, { "epoch": 0.9727583083675887, "grad_norm": 1.3470745086669922, "learning_rate": 2.1540700318693487e-08, "loss": 0.11382675170898438, "step": 6981 }, { "epoch": 0.9728976520588031, "grad_norm": 1.52419912815094, "learning_rate": 2.132272322196094e-08, "loss": 0.09410285949707031, "step": 6982 }, { "epoch": 0.9730369957500175, "grad_norm": 1.1078945398330688, "learning_rate": 2.110585227365458e-08, "loss": 0.10717201232910156, "step": 6983 }, { "epoch": 0.9731763394412318, "grad_norm": 0.7363792657852173, "learning_rate": 2.0890087521957536e-08, "loss": 0.08381843566894531, "step": 6984 }, { "epoch": 0.9733156831324462, "grad_norm": 0.8926404714584351, "learning_rate": 2.0675429014807568e-08, "loss": 0.07818889617919922, "step": 6985 }, { "epoch": 0.9734550268236606, "grad_norm": 1.0020580291748047, "learning_rate": 2.0461876799898196e-08, "loss": 0.09192085266113281, "step": 6986 }, { "epoch": 0.973594370514875, "grad_norm": 0.7929787039756775, "learning_rate": 2.024943092467424e-08, "loss": 0.09904098510742188, "step": 6987 }, { "epoch": 0.9737337142060893, "grad_norm": 0.6482223868370056, "learning_rate": 2.0038091436337392e-08, "loss": 0.08881378173828125, "step": 6988 }, { "epoch": 0.9738730578973037, "grad_norm": 0.7310747504234314, "learning_rate": 1.9827858381842312e-08, "loss": 0.10197639465332031, "step": 6989 }, { "epoch": 0.9740124015885181, "grad_norm": 0.9437008500099182, "learning_rate": 1.961873180789775e-08, "loss": 0.07380390167236328, "step": 6990 }, { "epoch": 0.9741517452797325, "grad_norm": 0.8263393640518188, "learning_rate": 1.9410711760967092e-08, "loss": 0.08027076721191406, "step": 6991 }, { "epoch": 0.9742910889709469, "grad_norm": 0.397605836391449, "learning_rate": 1.920379828726726e-08, "loss": 0.06842327117919922, "step": 6992 }, { "epoch": 0.9744304326621612, "grad_norm": 0.9430391788482666, "learning_rate": 1.8997991432769812e-08, "loss": 0.10279083251953125, "step": 6993 }, { "epoch": 0.9745697763533756, "grad_norm": 0.5930535793304443, "learning_rate": 1.8793291243200396e-08, "loss": 0.0802469253540039, "step": 6994 }, { "epoch": 0.97470912004459, "grad_norm": 0.6228017807006836, "learning_rate": 1.8589697764039295e-08, "loss": 0.08641433715820312, "step": 6995 }, { "epoch": 0.9748484637358044, "grad_norm": 1.1160913705825806, "learning_rate": 1.8387211040519216e-08, "loss": 0.11009979248046875, "step": 6996 }, { "epoch": 0.9749878074270187, "grad_norm": 0.615450382232666, "learning_rate": 1.818583111762917e-08, "loss": 0.06256675720214844, "step": 6997 }, { "epoch": 0.9751271511182331, "grad_norm": 1.1067078113555908, "learning_rate": 1.7985558040110594e-08, "loss": 0.10637474060058594, "step": 6998 }, { "epoch": 0.9752664948094475, "grad_norm": 0.7226575613021851, "learning_rate": 1.778639185245956e-08, "loss": 0.09715652465820312, "step": 6999 }, { "epoch": 0.9754058385006619, "grad_norm": 0.6849076151847839, "learning_rate": 1.758833259892623e-08, "loss": 0.08258438110351562, "step": 7000 }, { "epoch": 0.9755451821918762, "grad_norm": 0.8845155239105225, "learning_rate": 1.7391380323515395e-08, "loss": 0.08946800231933594, "step": 7001 }, { "epoch": 0.9756845258830906, "grad_norm": 1.289367437362671, "learning_rate": 1.7195535069984838e-08, "loss": 0.10545921325683594, "step": 7002 }, { "epoch": 0.975823869574305, "grad_norm": 0.73397296667099, "learning_rate": 1.700079688184697e-08, "loss": 0.10626983642578125, "step": 7003 }, { "epoch": 0.9759632132655194, "grad_norm": 0.9645708203315735, "learning_rate": 1.6807165802368297e-08, "loss": 0.10767555236816406, "step": 7004 }, { "epoch": 0.9761025569567338, "grad_norm": 0.6763968467712402, "learning_rate": 1.661464187456885e-08, "loss": 0.07717704772949219, "step": 7005 }, { "epoch": 0.9762419006479481, "grad_norm": 0.9028957486152649, "learning_rate": 1.6423225141223854e-08, "loss": 0.10978889465332031, "step": 7006 }, { "epoch": 0.9763812443391625, "grad_norm": 0.7951579689979553, "learning_rate": 1.623291564486096e-08, "loss": 0.07509708404541016, "step": 7007 }, { "epoch": 0.9765205880303769, "grad_norm": 0.859541654586792, "learning_rate": 1.604371342776301e-08, "loss": 0.07907485961914062, "step": 7008 }, { "epoch": 0.9766599317215913, "grad_norm": 0.47197848558425903, "learning_rate": 1.585561853196582e-08, "loss": 0.072235107421875, "step": 7009 }, { "epoch": 0.9767992754128056, "grad_norm": 0.9843536019325256, "learning_rate": 1.5668630999260968e-08, "loss": 0.09526538848876953, "step": 7010 }, { "epoch": 0.97693861910402, "grad_norm": 0.5896551012992859, "learning_rate": 1.5482750871191333e-08, "loss": 0.06244659423828125, "step": 7011 }, { "epoch": 0.9770779627952344, "grad_norm": 1.1334093809127808, "learning_rate": 1.529797818905665e-08, "loss": 0.12310218811035156, "step": 7012 }, { "epoch": 0.9772173064864488, "grad_norm": 0.6493598222732544, "learning_rate": 1.5114312993908532e-08, "loss": 0.0737767219543457, "step": 7013 }, { "epoch": 0.9773566501776632, "grad_norm": 0.8221216797828674, "learning_rate": 1.4931755326552667e-08, "loss": 0.101409912109375, "step": 7014 }, { "epoch": 0.9774959938688775, "grad_norm": 0.964944064617157, "learning_rate": 1.4750305227549943e-08, "loss": 0.10716915130615234, "step": 7015 }, { "epoch": 0.977635337560092, "grad_norm": 1.151052713394165, "learning_rate": 1.4569962737214228e-08, "loss": 0.11777877807617188, "step": 7016 }, { "epoch": 0.9777746812513064, "grad_norm": 0.6541749238967896, "learning_rate": 1.4390727895613465e-08, "loss": 0.08729171752929688, "step": 7017 }, { "epoch": 0.9779140249425208, "grad_norm": 1.1931053400039673, "learning_rate": 1.4212600742569694e-08, "loss": 0.1013946533203125, "step": 7018 }, { "epoch": 0.9780533686337352, "grad_norm": 0.5630456805229187, "learning_rate": 1.4035581317658476e-08, "loss": 0.072296142578125, "step": 7019 }, { "epoch": 0.9781927123249495, "grad_norm": 1.3467321395874023, "learning_rate": 1.3859669660209463e-08, "loss": 0.14226150512695312, "step": 7020 }, { "epoch": 0.9783320560161639, "grad_norm": 0.5659570693969727, "learning_rate": 1.368486580930639e-08, "loss": 0.08168792724609375, "step": 7021 }, { "epoch": 0.9784713997073783, "grad_norm": 0.5668736696243286, "learning_rate": 1.3511169803786527e-08, "loss": 0.0865936279296875, "step": 7022 }, { "epoch": 0.9786107433985927, "grad_norm": 0.7574331164360046, "learning_rate": 1.333858168224178e-08, "loss": 0.10008430480957031, "step": 7023 }, { "epoch": 0.978750087089807, "grad_norm": 0.6221814155578613, "learning_rate": 1.3167101483016476e-08, "loss": 0.07824325561523438, "step": 7024 }, { "epoch": 0.9788894307810214, "grad_norm": 0.5572243928909302, "learning_rate": 1.2996729244209583e-08, "loss": 0.06711578369140625, "step": 7025 }, { "epoch": 0.9790287744722358, "grad_norm": 0.4251595437526703, "learning_rate": 1.282746500367471e-08, "loss": 0.06768226623535156, "step": 7026 }, { "epoch": 0.9791681181634502, "grad_norm": 1.2519887685775757, "learning_rate": 1.2659308799017889e-08, "loss": 0.1333599090576172, "step": 7027 }, { "epoch": 0.9793074618546646, "grad_norm": 0.4966178834438324, "learning_rate": 1.2492260667599232e-08, "loss": 0.06989479064941406, "step": 7028 }, { "epoch": 0.9794468055458789, "grad_norm": 0.7427707314491272, "learning_rate": 1.2326320646534051e-08, "loss": 0.08576774597167969, "step": 7029 }, { "epoch": 0.9795861492370933, "grad_norm": 1.1143219470977783, "learning_rate": 1.2161488772690077e-08, "loss": 0.10370826721191406, "step": 7030 }, { "epoch": 0.9797254929283077, "grad_norm": 0.8615458011627197, "learning_rate": 1.1997765082688573e-08, "loss": 0.0969696044921875, "step": 7031 }, { "epoch": 0.9798648366195221, "grad_norm": 0.6372317671775818, "learning_rate": 1.1835149612905438e-08, "loss": 0.08777999877929688, "step": 7032 }, { "epoch": 0.9800041803107364, "grad_norm": 0.479191392660141, "learning_rate": 1.1673642399470663e-08, "loss": 0.07712745666503906, "step": 7033 }, { "epoch": 0.9801435240019508, "grad_norm": 0.42858073115348816, "learning_rate": 1.1513243478267211e-08, "loss": 0.0724935531616211, "step": 7034 }, { "epoch": 0.9802828676931652, "grad_norm": 0.690813422203064, "learning_rate": 1.135395288493213e-08, "loss": 0.09622764587402344, "step": 7035 }, { "epoch": 0.9804222113843796, "grad_norm": 0.4570613503456116, "learning_rate": 1.1195770654855443e-08, "loss": 0.07172775268554688, "step": 7036 }, { "epoch": 0.980561555075594, "grad_norm": 0.6349123120307922, "learning_rate": 1.1038696823182372e-08, "loss": 0.08044815063476562, "step": 7037 }, { "epoch": 0.9807008987668083, "grad_norm": 0.6452863216400146, "learning_rate": 1.088273142481111e-08, "loss": 0.07652091979980469, "step": 7038 }, { "epoch": 0.9808402424580227, "grad_norm": 0.6628211736679077, "learning_rate": 1.0727874494393386e-08, "loss": 0.07785606384277344, "step": 7039 }, { "epoch": 0.9809795861492371, "grad_norm": 1.2272061109542847, "learning_rate": 1.0574126066335011e-08, "loss": 0.09360599517822266, "step": 7040 }, { "epoch": 0.9811189298404515, "grad_norm": 0.8383199572563171, "learning_rate": 1.0421486174795326e-08, "loss": 0.09041786193847656, "step": 7041 }, { "epoch": 0.9812582735316658, "grad_norm": 0.5226139426231384, "learning_rate": 1.0269954853687202e-08, "loss": 0.07885360717773438, "step": 7042 }, { "epoch": 0.9813976172228802, "grad_norm": 0.5321347117424011, "learning_rate": 1.01195321366776e-08, "loss": 0.06902027130126953, "step": 7043 }, { "epoch": 0.9815369609140946, "grad_norm": 0.7037310004234314, "learning_rate": 9.970218057187009e-09, "loss": 0.07598495483398438, "step": 7044 }, { "epoch": 0.981676304605309, "grad_norm": 0.8321195244789124, "learning_rate": 9.82201264839e-09, "loss": 0.09591388702392578, "step": 7045 }, { "epoch": 0.9818156482965233, "grad_norm": 0.4460492730140686, "learning_rate": 9.67491594321357e-09, "loss": 0.07325172424316406, "step": 7046 }, { "epoch": 0.9819549919877377, "grad_norm": 0.6636959910392761, "learning_rate": 9.528927974339908e-09, "loss": 0.0744028091430664, "step": 7047 }, { "epoch": 0.9820943356789521, "grad_norm": 0.9938888549804688, "learning_rate": 9.38404877420418e-09, "loss": 0.10747909545898438, "step": 7048 }, { "epoch": 0.9822336793701665, "grad_norm": 0.3189326524734497, "learning_rate": 9.240278374995637e-09, "loss": 0.0577850341796875, "step": 7049 }, { "epoch": 0.9823730230613809, "grad_norm": 0.8515139222145081, "learning_rate": 9.097616808655396e-09, "loss": 0.10699176788330078, "step": 7050 }, { "epoch": 0.9825123667525952, "grad_norm": 0.5432673096656799, "learning_rate": 8.95606410688088e-09, "loss": 0.0965871810913086, "step": 7051 }, { "epoch": 0.9826517104438096, "grad_norm": 0.6895435452461243, "learning_rate": 8.815620301121375e-09, "loss": 0.08440017700195312, "step": 7052 }, { "epoch": 0.982791054135024, "grad_norm": 0.8605464100837708, "learning_rate": 8.676285422580255e-09, "loss": 0.08754158020019531, "step": 7053 }, { "epoch": 0.9829303978262384, "grad_norm": 0.4560835063457489, "learning_rate": 8.538059502214979e-09, "loss": 0.0761251449584961, "step": 7054 }, { "epoch": 0.9830697415174527, "grad_norm": 0.5302451848983765, "learning_rate": 8.400942570735427e-09, "loss": 0.08742332458496094, "step": 7055 }, { "epoch": 0.9832090852086672, "grad_norm": 1.3357703685760498, "learning_rate": 8.264934658606672e-09, "loss": 0.09009742736816406, "step": 7056 }, { "epoch": 0.9833484288998816, "grad_norm": 0.6167078018188477, "learning_rate": 8.13003579604621e-09, "loss": 0.07796669006347656, "step": 7057 }, { "epoch": 0.983487772591096, "grad_norm": 0.612398087978363, "learning_rate": 7.996246013025067e-09, "loss": 0.08025884628295898, "step": 7058 }, { "epoch": 0.9836271162823104, "grad_norm": 0.8811931610107422, "learning_rate": 7.863565339268908e-09, "loss": 0.09348297119140625, "step": 7059 }, { "epoch": 0.9837664599735247, "grad_norm": 0.45422112941741943, "learning_rate": 7.731993804256378e-09, "loss": 0.07649993896484375, "step": 7060 }, { "epoch": 0.9839058036647391, "grad_norm": 0.49077093601226807, "learning_rate": 7.60153143721909e-09, "loss": 0.06572723388671875, "step": 7061 }, { "epoch": 0.9840451473559535, "grad_norm": 1.0366864204406738, "learning_rate": 7.472178267143304e-09, "loss": 0.09517478942871094, "step": 7062 }, { "epoch": 0.9841844910471679, "grad_norm": 1.1760421991348267, "learning_rate": 7.343934322767699e-09, "loss": 0.12604713439941406, "step": 7063 }, { "epoch": 0.9843238347383823, "grad_norm": 0.8061408996582031, "learning_rate": 7.216799632586147e-09, "loss": 0.09443092346191406, "step": 7064 }, { "epoch": 0.9844631784295966, "grad_norm": 0.4353530704975128, "learning_rate": 7.0907742248443875e-09, "loss": 0.05804252624511719, "step": 7065 }, { "epoch": 0.984602522120811, "grad_norm": 0.5937380790710449, "learning_rate": 6.965858127542247e-09, "loss": 0.07758903503417969, "step": 7066 }, { "epoch": 0.9847418658120254, "grad_norm": 0.8233128786087036, "learning_rate": 6.842051368433633e-09, "loss": 0.10089874267578125, "step": 7067 }, { "epoch": 0.9848812095032398, "grad_norm": 0.8986954092979431, "learning_rate": 6.719353975025989e-09, "loss": 0.0816650390625, "step": 7068 }, { "epoch": 0.9850205531944541, "grad_norm": 0.6605030298233032, "learning_rate": 6.5977659745786185e-09, "loss": 0.08933639526367188, "step": 7069 }, { "epoch": 0.9851598968856685, "grad_norm": 0.9717810750007629, "learning_rate": 6.477287394107134e-09, "loss": 0.10745048522949219, "step": 7070 }, { "epoch": 0.9852992405768829, "grad_norm": 2.026381492614746, "learning_rate": 6.357918260377349e-09, "loss": 0.13467025756835938, "step": 7071 }, { "epoch": 0.9854385842680973, "grad_norm": 0.4730115234851837, "learning_rate": 6.239658599911935e-09, "loss": 0.0729982852935791, "step": 7072 }, { "epoch": 0.9855779279593117, "grad_norm": 0.7307688593864441, "learning_rate": 6.122508438984875e-09, "loss": 0.07136154174804688, "step": 7073 }, { "epoch": 0.985717271650526, "grad_norm": 1.2961463928222656, "learning_rate": 6.0064678036242385e-09, "loss": 0.12688350677490234, "step": 7074 }, { "epoch": 0.9858566153417404, "grad_norm": 0.6997078657150269, "learning_rate": 5.891536719611624e-09, "loss": 0.08519649505615234, "step": 7075 }, { "epoch": 0.9859959590329548, "grad_norm": 1.0115678310394287, "learning_rate": 5.77771521248216e-09, "loss": 0.11094379425048828, "step": 7076 }, { "epoch": 0.9861353027241692, "grad_norm": 0.7769637107849121, "learning_rate": 5.665003307524508e-09, "loss": 0.10552787780761719, "step": 7077 }, { "epoch": 0.9862746464153835, "grad_norm": 0.45661649107933044, "learning_rate": 5.5534010297803034e-09, "loss": 0.06548690795898438, "step": 7078 }, { "epoch": 0.9864139901065979, "grad_norm": 0.9305257797241211, "learning_rate": 5.4429084040452665e-09, "loss": 0.10107612609863281, "step": 7079 }, { "epoch": 0.9865533337978123, "grad_norm": 0.40940842032432556, "learning_rate": 5.333525454868094e-09, "loss": 0.07415008544921875, "step": 7080 }, { "epoch": 0.9866926774890267, "grad_norm": 0.8120054602622986, "learning_rate": 5.225252206551568e-09, "loss": 0.07533836364746094, "step": 7081 }, { "epoch": 0.986832021180241, "grad_norm": 1.0443311929702759, "learning_rate": 5.118088683151445e-09, "loss": 0.1077117919921875, "step": 7082 }, { "epoch": 0.9869713648714554, "grad_norm": 0.7624261379241943, "learning_rate": 5.01203490847646e-09, "loss": 0.09784317016601562, "step": 7083 }, { "epoch": 0.9871107085626698, "grad_norm": 0.7542126178741455, "learning_rate": 4.907090906090539e-09, "loss": 0.08936882019042969, "step": 7084 }, { "epoch": 0.9872500522538842, "grad_norm": 0.6522624492645264, "learning_rate": 4.803256699308923e-09, "loss": 0.0766754150390625, "step": 7085 }, { "epoch": 0.9873893959450986, "grad_norm": 0.5591641664505005, "learning_rate": 4.700532311200934e-09, "loss": 0.08740615844726562, "step": 7086 }, { "epoch": 0.9875287396363129, "grad_norm": 0.7526242733001709, "learning_rate": 4.598917764590538e-09, "loss": 0.10594749450683594, "step": 7087 }, { "epoch": 0.9876680833275273, "grad_norm": 0.797370970249176, "learning_rate": 4.498413082053566e-09, "loss": 0.06959056854248047, "step": 7088 }, { "epoch": 0.9878074270187417, "grad_norm": 1.4806652069091797, "learning_rate": 4.399018285919376e-09, "loss": 0.1138916015625, "step": 7089 }, { "epoch": 0.9879467707099561, "grad_norm": 1.041988492012024, "learning_rate": 4.300733398272528e-09, "loss": 0.09145736694335938, "step": 7090 }, { "epoch": 0.9880861144011704, "grad_norm": 0.8755418658256531, "learning_rate": 4.203558440948885e-09, "loss": 0.10593128204345703, "step": 7091 }, { "epoch": 0.9882254580923848, "grad_norm": 0.5652458667755127, "learning_rate": 4.1074934355384015e-09, "loss": 0.08513069152832031, "step": 7092 }, { "epoch": 0.9883648017835992, "grad_norm": 0.7299976944923401, "learning_rate": 4.0125384033845586e-09, "loss": 0.07551956176757812, "step": 7093 }, { "epoch": 0.9885041454748136, "grad_norm": 1.011225700378418, "learning_rate": 3.91869336558437e-09, "loss": 0.12079429626464844, "step": 7094 }, { "epoch": 0.988643489166028, "grad_norm": 0.600803017616272, "learning_rate": 3.8259583429883785e-09, "loss": 0.08413505554199219, "step": 7095 }, { "epoch": 0.9887828328572423, "grad_norm": 0.6169756054878235, "learning_rate": 3.734333356199548e-09, "loss": 0.07680702209472656, "step": 7096 }, { "epoch": 0.9889221765484568, "grad_norm": 0.42319613695144653, "learning_rate": 3.643818425575485e-09, "loss": 0.07709789276123047, "step": 7097 }, { "epoch": 0.9890615202396712, "grad_norm": 0.8842945098876953, "learning_rate": 3.5544135712262116e-09, "loss": 0.09791374206542969, "step": 7098 }, { "epoch": 0.9892008639308856, "grad_norm": 0.8796135187149048, "learning_rate": 3.4661188130147295e-09, "loss": 0.09747314453125, "step": 7099 }, { "epoch": 0.9893402076221, "grad_norm": 0.711597740650177, "learning_rate": 3.378934170559789e-09, "loss": 0.10215568542480469, "step": 7100 }, { "epoch": 0.9894795513133143, "grad_norm": 0.9444085955619812, "learning_rate": 3.292859663230341e-09, "loss": 0.08955574035644531, "step": 7101 }, { "epoch": 0.9896188950045287, "grad_norm": 1.0436512231826782, "learning_rate": 3.207895310150533e-09, "loss": 0.09430694580078125, "step": 7102 }, { "epoch": 0.9897582386957431, "grad_norm": 1.3529603481292725, "learning_rate": 3.1240411301980413e-09, "loss": 0.13432693481445312, "step": 7103 }, { "epoch": 0.9898975823869575, "grad_norm": 0.569800078868866, "learning_rate": 3.0412971420029636e-09, "loss": 0.08498954772949219, "step": 7104 }, { "epoch": 0.9900369260781718, "grad_norm": 1.0087289810180664, "learning_rate": 2.959663363949483e-09, "loss": 0.0864877700805664, "step": 7105 }, { "epoch": 0.9901762697693862, "grad_norm": 0.5763134360313416, "learning_rate": 2.8791398141736484e-09, "loss": 0.08817386627197266, "step": 7106 }, { "epoch": 0.9903156134606006, "grad_norm": 1.0112745761871338, "learning_rate": 2.799726510567258e-09, "loss": 0.1196298599243164, "step": 7107 }, { "epoch": 0.990454957151815, "grad_norm": 0.5208508372306824, "learning_rate": 2.721423470773421e-09, "loss": 0.06596755981445312, "step": 7108 }, { "epoch": 0.9905943008430294, "grad_norm": 0.771122395992279, "learning_rate": 2.644230712189888e-09, "loss": 0.07648372650146484, "step": 7109 }, { "epoch": 0.9907336445342437, "grad_norm": 0.5061673521995544, "learning_rate": 2.5681482519662736e-09, "loss": 0.07416152954101562, "step": 7110 }, { "epoch": 0.9908729882254581, "grad_norm": 0.573085606098175, "learning_rate": 2.493176107006834e-09, "loss": 0.07870101928710938, "step": 7111 }, { "epoch": 0.9910123319166725, "grad_norm": 0.676785409450531, "learning_rate": 2.4193142939687996e-09, "loss": 0.08646011352539062, "step": 7112 }, { "epoch": 0.9911516756078869, "grad_norm": 0.836001455783844, "learning_rate": 2.3465628292623776e-09, "loss": 0.0866098403930664, "step": 7113 }, { "epoch": 0.9912910192991012, "grad_norm": 0.8345834612846375, "learning_rate": 2.2749217290513048e-09, "loss": 0.10691642761230469, "step": 7114 }, { "epoch": 0.9914303629903156, "grad_norm": 0.5392050743103027, "learning_rate": 2.2043910092522935e-09, "loss": 0.09264373779296875, "step": 7115 }, { "epoch": 0.99156970668153, "grad_norm": 1.0321967601776123, "learning_rate": 2.134970685536697e-09, "loss": 0.1334228515625, "step": 7116 }, { "epoch": 0.9917090503727444, "grad_norm": 0.5054229497909546, "learning_rate": 2.066660773326623e-09, "loss": 0.07532501220703125, "step": 7117 }, { "epoch": 0.9918483940639587, "grad_norm": 0.8185880184173584, "learning_rate": 1.999461287800486e-09, "loss": 0.0960540771484375, "step": 7118 }, { "epoch": 0.9919877377551731, "grad_norm": 1.0752118825912476, "learning_rate": 1.9333722438874548e-09, "loss": 0.08437442779541016, "step": 7119 }, { "epoch": 0.9921270814463875, "grad_norm": 0.5374956130981445, "learning_rate": 1.868393656271339e-09, "loss": 0.0755777359008789, "step": 7120 }, { "epoch": 0.9922664251376019, "grad_norm": 1.0852715969085693, "learning_rate": 1.8045255393889238e-09, "loss": 0.10752105712890625, "step": 7121 }, { "epoch": 0.9924057688288163, "grad_norm": 0.6398861408233643, "learning_rate": 1.7417679074299698e-09, "loss": 0.07811832427978516, "step": 7122 }, { "epoch": 0.9925451125200306, "grad_norm": 0.9517158269882202, "learning_rate": 1.680120774338323e-09, "loss": 0.08537435531616211, "step": 7123 }, { "epoch": 0.992684456211245, "grad_norm": 0.743435800075531, "learning_rate": 1.6195841538096947e-09, "loss": 0.08952522277832031, "step": 7124 }, { "epoch": 0.9928237999024594, "grad_norm": 0.6796303987503052, "learning_rate": 1.5601580592949916e-09, "loss": 0.08238983154296875, "step": 7125 }, { "epoch": 0.9929631435936738, "grad_norm": 0.6787546873092651, "learning_rate": 1.5018425039969864e-09, "loss": 0.08163261413574219, "step": 7126 }, { "epoch": 0.9931024872848881, "grad_norm": 0.7028850317001343, "learning_rate": 1.4446375008714264e-09, "loss": 0.08557891845703125, "step": 7127 }, { "epoch": 0.9932418309761025, "grad_norm": 0.7698436975479126, "learning_rate": 1.3885430626287e-09, "loss": 0.09356880187988281, "step": 7128 }, { "epoch": 0.9933811746673169, "grad_norm": 0.7276813387870789, "learning_rate": 1.3335592017316156e-09, "loss": 0.09911346435546875, "step": 7129 }, { "epoch": 0.9935205183585313, "grad_norm": 0.6571632623672485, "learning_rate": 1.2796859303959575e-09, "loss": 0.07279682159423828, "step": 7130 }, { "epoch": 0.9936598620497457, "grad_norm": 0.3950362503528595, "learning_rate": 1.2269232605915948e-09, "loss": 0.06231689453125, "step": 7131 }, { "epoch": 0.99379920574096, "grad_norm": 1.2500144243240356, "learning_rate": 1.1752712040408176e-09, "loss": 0.11310577392578125, "step": 7132 }, { "epoch": 0.9939385494321744, "grad_norm": 0.6180011034011841, "learning_rate": 1.124729772219446e-09, "loss": 0.08819961547851562, "step": 7133 }, { "epoch": 0.9940778931233888, "grad_norm": 1.0690141916275024, "learning_rate": 1.075298976356831e-09, "loss": 0.09407234191894531, "step": 7134 }, { "epoch": 0.9942172368146032, "grad_norm": 1.0636507272720337, "learning_rate": 1.026978827435854e-09, "loss": 0.12105751037597656, "step": 7135 }, { "epoch": 0.9943565805058175, "grad_norm": 0.46588316559791565, "learning_rate": 9.797693361912607e-10, "loss": 0.07201385498046875, "step": 7136 }, { "epoch": 0.994495924197032, "grad_norm": 1.0136581659317017, "learning_rate": 9.33670513112439e-10, "loss": 0.11342620849609375, "step": 7137 }, { "epoch": 0.9946352678882464, "grad_norm": 1.0118143558502197, "learning_rate": 8.886823684417512e-10, "loss": 0.10509300231933594, "step": 7138 }, { "epoch": 0.9947746115794608, "grad_norm": 0.7133252620697021, "learning_rate": 8.448049121739798e-10, "loss": 0.09563255310058594, "step": 7139 }, { "epoch": 0.9949139552706752, "grad_norm": 0.9925166368484497, "learning_rate": 8.020381540579936e-10, "loss": 0.12166213989257812, "step": 7140 }, { "epoch": 0.9950532989618895, "grad_norm": 1.0537376403808594, "learning_rate": 7.603821035950809e-10, "loss": 0.09594917297363281, "step": 7141 }, { "epoch": 0.9951926426531039, "grad_norm": 0.9251006841659546, "learning_rate": 7.198367700411712e-10, "loss": 0.08883380889892578, "step": 7142 }, { "epoch": 0.9953319863443183, "grad_norm": 1.0057358741760254, "learning_rate": 6.80402162403504e-10, "loss": 0.09448051452636719, "step": 7143 }, { "epoch": 0.9954713300355327, "grad_norm": 1.1703524589538574, "learning_rate": 6.420782894445144e-10, "loss": 0.10370063781738281, "step": 7144 }, { "epoch": 0.995610673726747, "grad_norm": 0.5568293929100037, "learning_rate": 6.048651596785027e-10, "loss": 0.06656360626220703, "step": 7145 }, { "epoch": 0.9957500174179614, "grad_norm": 0.5805951356887817, "learning_rate": 5.687627813727448e-10, "loss": 0.07762432098388672, "step": 7146 }, { "epoch": 0.9958893611091758, "grad_norm": 0.5683309435844421, "learning_rate": 5.337711625497122e-10, "loss": 0.07901763916015625, "step": 7147 }, { "epoch": 0.9960287048003902, "grad_norm": 0.8345100283622742, "learning_rate": 4.998903109826314e-10, "loss": 0.0701608657836914, "step": 7148 }, { "epoch": 0.9961680484916046, "grad_norm": 1.470834732055664, "learning_rate": 4.671202341993697e-10, "loss": 0.13637924194335938, "step": 7149 }, { "epoch": 0.9963073921828189, "grad_norm": 1.0252994298934937, "learning_rate": 4.354609394802145e-10, "loss": 0.13611602783203125, "step": 7150 }, { "epoch": 0.9964467358740333, "grad_norm": 0.7363699674606323, "learning_rate": 4.0491243386009403e-10, "loss": 0.08699417114257812, "step": 7151 }, { "epoch": 0.9965860795652477, "grad_norm": 0.5029408931732178, "learning_rate": 3.7547472412580167e-10, "loss": 0.07819938659667969, "step": 7152 }, { "epoch": 0.9967254232564621, "grad_norm": 0.5955779552459717, "learning_rate": 3.471478168176612e-10, "loss": 0.08417320251464844, "step": 7153 }, { "epoch": 0.9968647669476765, "grad_norm": 0.5387664437294006, "learning_rate": 3.19931718229527e-10, "loss": 0.08239173889160156, "step": 7154 }, { "epoch": 0.9970041106388908, "grad_norm": 0.6764245629310608, "learning_rate": 2.9382643440767354e-10, "loss": 0.09382820129394531, "step": 7155 }, { "epoch": 0.9971434543301052, "grad_norm": 0.7701941728591919, "learning_rate": 2.6883197115190606e-10, "loss": 0.10859012603759766, "step": 7156 }, { "epoch": 0.9972827980213196, "grad_norm": 0.45134228467941284, "learning_rate": 2.4494833401667027e-10, "loss": 0.07152557373046875, "step": 7157 }, { "epoch": 0.997422141712534, "grad_norm": 0.6265767812728882, "learning_rate": 2.2217552830716693e-10, "loss": 0.09081840515136719, "step": 7158 }, { "epoch": 0.9975614854037483, "grad_norm": 0.8978511095046997, "learning_rate": 2.0051355908323743e-10, "loss": 0.10479927062988281, "step": 7159 }, { "epoch": 0.9977008290949627, "grad_norm": 0.8257019519805908, "learning_rate": 1.7996243115769863e-10, "loss": 0.10752677917480469, "step": 7160 }, { "epoch": 0.9978401727861771, "grad_norm": 0.7821223139762878, "learning_rate": 1.605221490968978e-10, "loss": 0.07930469512939453, "step": 7161 }, { "epoch": 0.9979795164773915, "grad_norm": 1.2605535984039307, "learning_rate": 1.421927172201576e-10, "loss": 0.10813045501708984, "step": 7162 }, { "epoch": 0.9981188601686058, "grad_norm": 0.8725650906562805, "learning_rate": 1.24974139599221e-10, "loss": 0.10878801345825195, "step": 7163 }, { "epoch": 0.9982582038598202, "grad_norm": 0.8800066113471985, "learning_rate": 1.0886642005991654e-10, "loss": 0.09224510192871094, "step": 7164 }, { "epoch": 0.9983975475510346, "grad_norm": 0.32711949944496155, "learning_rate": 9.386956218104815e-11, "loss": 0.05911064147949219, "step": 7165 }, { "epoch": 0.998536891242249, "grad_norm": 0.6822524666786194, "learning_rate": 7.998356929439511e-11, "loss": 0.08124160766601562, "step": 7166 }, { "epoch": 0.9986762349334634, "grad_norm": 0.657299816608429, "learning_rate": 6.72084444852672e-11, "loss": 0.07547950744628906, "step": 7167 }, { "epoch": 0.9988155786246777, "grad_norm": 0.3161132037639618, "learning_rate": 5.554419059250471e-11, "loss": 0.05511283874511719, "step": 7168 }, { "epoch": 0.9989549223158921, "grad_norm": 1.1874725818634033, "learning_rate": 4.499081020681306e-11, "loss": 0.1514263153076172, "step": 7169 }, { "epoch": 0.9990942660071065, "grad_norm": 1.2515686750411987, "learning_rate": 3.554830567298328e-11, "loss": 0.1448535919189453, "step": 7170 }, { "epoch": 0.9992336096983209, "grad_norm": 0.5818399786949158, "learning_rate": 2.7216679089892008e-11, "loss": 0.08704376220703125, "step": 7171 }, { "epoch": 0.9993729533895352, "grad_norm": 0.4809263050556183, "learning_rate": 1.9995932307170783e-11, "loss": 0.06825637817382812, "step": 7172 }, { "epoch": 0.9995122970807496, "grad_norm": 1.011396884918213, "learning_rate": 1.3886066930202113e-11, "loss": 0.11175537109375, "step": 7173 }, { "epoch": 0.999651640771964, "grad_norm": 0.6320770382881165, "learning_rate": 8.88708431623364e-12, "loss": 0.08298015594482422, "step": 7174 }, { "epoch": 0.9997909844631784, "grad_norm": 0.8537821173667908, "learning_rate": 4.998985576043503e-12, "loss": 0.0964508056640625, "step": 7175 }, { "epoch": 0.9999303281543928, "grad_norm": 0.8895832300186157, "learning_rate": 2.2217715728301003e-12, "loss": 0.10613059997558594, "step": 7176 }, { "epoch": 1.0, "grad_norm": 2.0245578289031982, "learning_rate": 5.554429238774361e-13, "loss": 0.15132904052734375, "step": 7177 }, { "epoch": 1.0, "step": 7177, "total_flos": 5.326107668090192e+19, "train_loss": 0.0, "train_runtime": 1.3638, "train_samples_per_second": 1347069.866, "train_steps_per_second": 5262.43 } ], "logging_steps": 1.0, "max_steps": 7177, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.326107668090192e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }