{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7397, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001351922264469793, "grad_norm": 113.21495056152344, "learning_rate": 0.0, "loss": 0.748291015625, "step": 1 }, { "epoch": 0.0002703844528939586, "grad_norm": 108.36476135253906, "learning_rate": 5.859375e-08, "loss": 0.74407958984375, "step": 2 }, { "epoch": 0.0004055766793409379, "grad_norm": 106.5140609741211, "learning_rate": 1.171875e-07, "loss": 0.74310302734375, "step": 3 }, { "epoch": 0.0005407689057879172, "grad_norm": 107.39302825927734, "learning_rate": 1.7578125e-07, "loss": 0.74542236328125, "step": 4 }, { "epoch": 0.0006759611322348965, "grad_norm": 103.53854370117188, "learning_rate": 2.34375e-07, "loss": 0.74053955078125, "step": 5 }, { "epoch": 0.0008111533586818758, "grad_norm": 113.02201080322266, "learning_rate": 2.9296875000000003e-07, "loss": 0.75213623046875, "step": 6 }, { "epoch": 0.0009463455851288551, "grad_norm": 107.06986999511719, "learning_rate": 3.515625e-07, "loss": 0.7452392578125, "step": 7 }, { "epoch": 0.0010815378115758344, "grad_norm": 107.04864501953125, "learning_rate": 4.1015625e-07, "loss": 0.741455078125, "step": 8 }, { "epoch": 0.0012167300380228137, "grad_norm": 107.50235748291016, "learning_rate": 4.6875e-07, "loss": 0.74114990234375, "step": 9 }, { "epoch": 0.001351922264469793, "grad_norm": 110.0961685180664, "learning_rate": 5.2734375e-07, "loss": 0.742431640625, "step": 10 }, { "epoch": 0.0014871144909167722, "grad_norm": 107.9870376586914, "learning_rate": 5.859375000000001e-07, "loss": 0.74407958984375, "step": 11 }, { "epoch": 0.0016223067173637515, "grad_norm": 106.07730102539062, "learning_rate": 6.4453125e-07, "loss": 0.73370361328125, "step": 12 }, { "epoch": 0.0017574989438107309, "grad_norm": 111.1282730102539, "learning_rate": 7.03125e-07, "loss": 0.735595703125, "step": 13 }, { "epoch": 0.0018926911702577102, "grad_norm": 108.10362243652344, "learning_rate": 7.6171875e-07, "loss": 0.7298583984375, "step": 14 }, { "epoch": 0.0020278833967046894, "grad_norm": 112.204833984375, "learning_rate": 8.203125e-07, "loss": 0.73345947265625, "step": 15 }, { "epoch": 0.0021630756231516687, "grad_norm": 110.4712905883789, "learning_rate": 8.7890625e-07, "loss": 0.714111328125, "step": 16 }, { "epoch": 0.002298267849598648, "grad_norm": 105.55673217773438, "learning_rate": 9.375e-07, "loss": 0.7093505859375, "step": 17 }, { "epoch": 0.0024334600760456274, "grad_norm": 92.90567779541016, "learning_rate": 9.9609375e-07, "loss": 0.68585205078125, "step": 18 }, { "epoch": 0.0025686523024926067, "grad_norm": 105.00279235839844, "learning_rate": 1.0546875e-06, "loss": 0.6875, "step": 19 }, { "epoch": 0.002703844528939586, "grad_norm": 96.80097198486328, "learning_rate": 1.11328125e-06, "loss": 0.68011474609375, "step": 20 }, { "epoch": 0.0028390367553865654, "grad_norm": 92.1880111694336, "learning_rate": 1.1718750000000001e-06, "loss": 0.6771240234375, "step": 21 }, { "epoch": 0.0029742289818335444, "grad_norm": 98.32208251953125, "learning_rate": 1.23046875e-06, "loss": 0.6190185546875, "step": 22 }, { "epoch": 0.0031094212082805237, "grad_norm": 94.18339538574219, "learning_rate": 1.2890625e-06, "loss": 0.611083984375, "step": 23 }, { "epoch": 0.003244613434727503, "grad_norm": 92.8238525390625, "learning_rate": 1.34765625e-06, "loss": 0.6107177734375, "step": 24 }, { "epoch": 0.0033798056611744824, "grad_norm": 84.07279205322266, "learning_rate": 1.40625e-06, "loss": 0.56585693359375, "step": 25 }, { "epoch": 0.0035149978876214617, "grad_norm": 82.3076400756836, "learning_rate": 1.46484375e-06, "loss": 0.55584716796875, "step": 26 }, { "epoch": 0.003650190114068441, "grad_norm": 81.03410339355469, "learning_rate": 1.5234375e-06, "loss": 0.55029296875, "step": 27 }, { "epoch": 0.0037853823405154204, "grad_norm": 84.79804229736328, "learning_rate": 1.5820312500000001e-06, "loss": 0.54022216796875, "step": 28 }, { "epoch": 0.003920574566962399, "grad_norm": 74.59902954101562, "learning_rate": 1.640625e-06, "loss": 0.54278564453125, "step": 29 }, { "epoch": 0.004055766793409379, "grad_norm": 58.30272674560547, "learning_rate": 1.69921875e-06, "loss": 0.419342041015625, "step": 30 }, { "epoch": 0.004190959019856358, "grad_norm": 62.59189224243164, "learning_rate": 1.7578125e-06, "loss": 0.3858642578125, "step": 31 }, { "epoch": 0.004326151246303337, "grad_norm": 58.085750579833984, "learning_rate": 1.81640625e-06, "loss": 0.390350341796875, "step": 32 }, { "epoch": 0.004461343472750317, "grad_norm": 57.22941970825195, "learning_rate": 1.875e-06, "loss": 0.3809814453125, "step": 33 }, { "epoch": 0.004596535699197296, "grad_norm": 50.55696487426758, "learning_rate": 1.93359375e-06, "loss": 0.371734619140625, "step": 34 }, { "epoch": 0.0047317279256442754, "grad_norm": 51.165592193603516, "learning_rate": 1.9921875e-06, "loss": 0.346527099609375, "step": 35 }, { "epoch": 0.004866920152091255, "grad_norm": 48.22037124633789, "learning_rate": 2.05078125e-06, "loss": 0.338653564453125, "step": 36 }, { "epoch": 0.005002112378538234, "grad_norm": 40.0262565612793, "learning_rate": 2.109375e-06, "loss": 0.355743408203125, "step": 37 }, { "epoch": 0.0051373046049852135, "grad_norm": 38.58692932128906, "learning_rate": 2.16796875e-06, "loss": 0.340789794921875, "step": 38 }, { "epoch": 0.005272496831432193, "grad_norm": 32.50050735473633, "learning_rate": 2.2265625e-06, "loss": 0.33416748046875, "step": 39 }, { "epoch": 0.005407689057879172, "grad_norm": 22.643756866455078, "learning_rate": 2.28515625e-06, "loss": 0.3324127197265625, "step": 40 }, { "epoch": 0.0055428812843261515, "grad_norm": 20.40488624572754, "learning_rate": 2.3437500000000002e-06, "loss": 0.2858123779296875, "step": 41 }, { "epoch": 0.005678073510773131, "grad_norm": 17.645462036132812, "learning_rate": 2.40234375e-06, "loss": 0.262664794921875, "step": 42 }, { "epoch": 0.00581326573722011, "grad_norm": 13.637259483337402, "learning_rate": 2.4609375e-06, "loss": 0.2672119140625, "step": 43 }, { "epoch": 0.005948457963667089, "grad_norm": 15.169034004211426, "learning_rate": 2.5195312500000003e-06, "loss": 0.23325347900390625, "step": 44 }, { "epoch": 0.006083650190114068, "grad_norm": 14.01146411895752, "learning_rate": 2.578125e-06, "loss": 0.2244873046875, "step": 45 }, { "epoch": 0.006218842416561047, "grad_norm": 9.481780052185059, "learning_rate": 2.63671875e-06, "loss": 0.24816131591796875, "step": 46 }, { "epoch": 0.006354034643008027, "grad_norm": 10.433836936950684, "learning_rate": 2.6953125e-06, "loss": 0.21952056884765625, "step": 47 }, { "epoch": 0.006489226869455006, "grad_norm": 8.479501724243164, "learning_rate": 2.75390625e-06, "loss": 0.21712493896484375, "step": 48 }, { "epoch": 0.0066244190959019855, "grad_norm": 2.557535171508789, "learning_rate": 2.8125e-06, "loss": 0.259521484375, "step": 49 }, { "epoch": 0.006759611322348965, "grad_norm": 2.974193572998047, "learning_rate": 2.87109375e-06, "loss": 0.2401885986328125, "step": 50 }, { "epoch": 0.006894803548795944, "grad_norm": 2.2221181392669678, "learning_rate": 2.9296875e-06, "loss": 0.26718902587890625, "step": 51 }, { "epoch": 0.0070299957752429235, "grad_norm": 1.9755792617797852, "learning_rate": 2.9882812500000002e-06, "loss": 0.23004150390625, "step": 52 }, { "epoch": 0.007165188001689903, "grad_norm": 2.6573526859283447, "learning_rate": 3.046875e-06, "loss": 0.21501922607421875, "step": 53 }, { "epoch": 0.007300380228136882, "grad_norm": 3.352895975112915, "learning_rate": 3.10546875e-06, "loss": 0.20934295654296875, "step": 54 }, { "epoch": 0.0074355724545838615, "grad_norm": 2.83560848236084, "learning_rate": 3.1640625000000003e-06, "loss": 0.22216796875, "step": 55 }, { "epoch": 0.007570764681030841, "grad_norm": 5.9283127784729, "learning_rate": 3.22265625e-06, "loss": 0.15530014038085938, "step": 56 }, { "epoch": 0.00770595690747782, "grad_norm": 3.6991169452667236, "learning_rate": 3.28125e-06, "loss": 0.1821136474609375, "step": 57 }, { "epoch": 0.007841149133924799, "grad_norm": 4.471269607543945, "learning_rate": 3.3398437500000003e-06, "loss": 0.232086181640625, "step": 58 }, { "epoch": 0.007976341360371779, "grad_norm": 5.5343217849731445, "learning_rate": 3.3984375e-06, "loss": 0.23752593994140625, "step": 59 }, { "epoch": 0.008111533586818757, "grad_norm": 3.6134774684906006, "learning_rate": 3.45703125e-06, "loss": 0.2616615295410156, "step": 60 }, { "epoch": 0.008246725813265738, "grad_norm": 5.2135910987854, "learning_rate": 3.515625e-06, "loss": 0.145904541015625, "step": 61 }, { "epoch": 0.008381918039712716, "grad_norm": 4.801625728607178, "learning_rate": 3.57421875e-06, "loss": 0.15020751953125, "step": 62 }, { "epoch": 0.008517110266159696, "grad_norm": 4.166983127593994, "learning_rate": 3.6328125e-06, "loss": 0.19959259033203125, "step": 63 }, { "epoch": 0.008652302492606675, "grad_norm": 4.064243793487549, "learning_rate": 3.69140625e-06, "loss": 0.13083839416503906, "step": 64 }, { "epoch": 0.008787494719053655, "grad_norm": 7.325190544128418, "learning_rate": 3.75e-06, "loss": 0.16757583618164062, "step": 65 }, { "epoch": 0.008922686945500634, "grad_norm": 9.485154151916504, "learning_rate": 3.8085937500000002e-06, "loss": 0.22097015380859375, "step": 66 }, { "epoch": 0.009057879171947614, "grad_norm": 9.843076705932617, "learning_rate": 3.8671875e-06, "loss": 0.13802337646484375, "step": 67 }, { "epoch": 0.009193071398394592, "grad_norm": 8.675374031066895, "learning_rate": 3.92578125e-06, "loss": 0.1808319091796875, "step": 68 }, { "epoch": 0.009328263624841572, "grad_norm": 3.483614921569824, "learning_rate": 3.984375e-06, "loss": 0.10939407348632812, "step": 69 }, { "epoch": 0.009463455851288551, "grad_norm": 3.5544822216033936, "learning_rate": 4.0429687500000004e-06, "loss": 0.13950729370117188, "step": 70 }, { "epoch": 0.00959864807773553, "grad_norm": 3.999521017074585, "learning_rate": 4.1015625e-06, "loss": 0.17206954956054688, "step": 71 }, { "epoch": 0.00973384030418251, "grad_norm": 9.9075927734375, "learning_rate": 4.16015625e-06, "loss": 0.22394180297851562, "step": 72 }, { "epoch": 0.009869032530629488, "grad_norm": 3.8207085132598877, "learning_rate": 4.21875e-06, "loss": 0.12192153930664062, "step": 73 }, { "epoch": 0.010004224757076468, "grad_norm": 8.090127944946289, "learning_rate": 4.27734375e-06, "loss": 0.21240997314453125, "step": 74 }, { "epoch": 0.010139416983523447, "grad_norm": 6.701347827911377, "learning_rate": 4.3359375e-06, "loss": 0.13440704345703125, "step": 75 }, { "epoch": 0.010274609209970427, "grad_norm": 2.4383575916290283, "learning_rate": 4.3945312500000005e-06, "loss": 0.13167572021484375, "step": 76 }, { "epoch": 0.010409801436417405, "grad_norm": 3.738039493560791, "learning_rate": 4.453125e-06, "loss": 0.19110107421875, "step": 77 }, { "epoch": 0.010544993662864386, "grad_norm": 7.909547328948975, "learning_rate": 4.51171875e-06, "loss": 0.13581275939941406, "step": 78 }, { "epoch": 0.010680185889311364, "grad_norm": 2.761085271835327, "learning_rate": 4.5703125e-06, "loss": 0.1324901580810547, "step": 79 }, { "epoch": 0.010815378115758344, "grad_norm": 11.46563720703125, "learning_rate": 4.62890625e-06, "loss": 0.1734781265258789, "step": 80 }, { "epoch": 0.010950570342205323, "grad_norm": 11.595213890075684, "learning_rate": 4.6875000000000004e-06, "loss": 0.22270774841308594, "step": 81 }, { "epoch": 0.011085762568652303, "grad_norm": 2.31451153755188, "learning_rate": 4.74609375e-06, "loss": 0.13615036010742188, "step": 82 }, { "epoch": 0.011220954795099282, "grad_norm": 6.179954528808594, "learning_rate": 4.8046875e-06, "loss": 0.23222732543945312, "step": 83 }, { "epoch": 0.011356147021546262, "grad_norm": 6.969549179077148, "learning_rate": 4.86328125e-06, "loss": 0.12405014038085938, "step": 84 }, { "epoch": 0.01149133924799324, "grad_norm": 4.883032321929932, "learning_rate": 4.921875e-06, "loss": 0.18102645874023438, "step": 85 }, { "epoch": 0.01162653147444022, "grad_norm": 3.645406484603882, "learning_rate": 4.98046875e-06, "loss": 0.12834930419921875, "step": 86 }, { "epoch": 0.011761723700887199, "grad_norm": 2.0353078842163086, "learning_rate": 5.0390625000000005e-06, "loss": 0.062042236328125, "step": 87 }, { "epoch": 0.011896915927334177, "grad_norm": 8.733626365661621, "learning_rate": 5.09765625e-06, "loss": 0.16191673278808594, "step": 88 }, { "epoch": 0.012032108153781158, "grad_norm": 7.236554145812988, "learning_rate": 5.15625e-06, "loss": 0.1753101348876953, "step": 89 }, { "epoch": 0.012167300380228136, "grad_norm": 5.710174083709717, "learning_rate": 5.21484375e-06, "loss": 0.18563079833984375, "step": 90 }, { "epoch": 0.012302492606675116, "grad_norm": 2.9105305671691895, "learning_rate": 5.2734375e-06, "loss": 0.1434478759765625, "step": 91 }, { "epoch": 0.012437684833122095, "grad_norm": 8.332087516784668, "learning_rate": 5.3320312500000004e-06, "loss": 0.10818862915039062, "step": 92 }, { "epoch": 0.012572877059569075, "grad_norm": 2.86965012550354, "learning_rate": 5.390625e-06, "loss": 0.16751861572265625, "step": 93 }, { "epoch": 0.012708069286016054, "grad_norm": 6.664602756500244, "learning_rate": 5.44921875e-06, "loss": 0.13586997985839844, "step": 94 }, { "epoch": 0.012843261512463034, "grad_norm": 3.1009819507598877, "learning_rate": 5.5078125e-06, "loss": 0.12674665451049805, "step": 95 }, { "epoch": 0.012978453738910012, "grad_norm": 8.342328071594238, "learning_rate": 5.56640625e-06, "loss": 0.14958953857421875, "step": 96 }, { "epoch": 0.013113645965356992, "grad_norm": 10.464272499084473, "learning_rate": 5.625e-06, "loss": 0.14819717407226562, "step": 97 }, { "epoch": 0.013248838191803971, "grad_norm": 6.0037736892700195, "learning_rate": 5.6835937500000005e-06, "loss": 0.10096168518066406, "step": 98 }, { "epoch": 0.013384030418250951, "grad_norm": 2.2893693447113037, "learning_rate": 5.7421875e-06, "loss": 0.13230514526367188, "step": 99 }, { "epoch": 0.01351922264469793, "grad_norm": 7.123652935028076, "learning_rate": 5.80078125e-06, "loss": 0.11539888381958008, "step": 100 }, { "epoch": 0.01365441487114491, "grad_norm": 10.961258888244629, "learning_rate": 5.859375e-06, "loss": 0.1784381866455078, "step": 101 }, { "epoch": 0.013789607097591888, "grad_norm": 4.8647990226745605, "learning_rate": 5.91796875e-06, "loss": 0.11674118041992188, "step": 102 }, { "epoch": 0.013924799324038869, "grad_norm": 7.338796615600586, "learning_rate": 5.9765625000000004e-06, "loss": 0.13860511779785156, "step": 103 }, { "epoch": 0.014059991550485847, "grad_norm": 8.011110305786133, "learning_rate": 6.03515625e-06, "loss": 0.16212081909179688, "step": 104 }, { "epoch": 0.014195183776932827, "grad_norm": 5.4134440422058105, "learning_rate": 6.09375e-06, "loss": 0.10045814514160156, "step": 105 }, { "epoch": 0.014330376003379806, "grad_norm": 2.763641834259033, "learning_rate": 6.15234375e-06, "loss": 0.1777801513671875, "step": 106 }, { "epoch": 0.014465568229826784, "grad_norm": 1.190101146697998, "learning_rate": 6.2109375e-06, "loss": 0.07335090637207031, "step": 107 }, { "epoch": 0.014600760456273764, "grad_norm": 8.644311904907227, "learning_rate": 6.26953125e-06, "loss": 0.1813502311706543, "step": 108 }, { "epoch": 0.014735952682720743, "grad_norm": 4.509277820587158, "learning_rate": 6.3281250000000005e-06, "loss": 0.1643209457397461, "step": 109 }, { "epoch": 0.014871144909167723, "grad_norm": 1.0009852647781372, "learning_rate": 6.38671875e-06, "loss": 0.08414649963378906, "step": 110 }, { "epoch": 0.015006337135614702, "grad_norm": 2.962554693222046, "learning_rate": 6.4453125e-06, "loss": 0.1264801025390625, "step": 111 }, { "epoch": 0.015141529362061682, "grad_norm": 2.0783417224884033, "learning_rate": 6.50390625e-06, "loss": 0.11137199401855469, "step": 112 }, { "epoch": 0.01527672158850866, "grad_norm": 4.764652729034424, "learning_rate": 6.5625e-06, "loss": 0.08538532257080078, "step": 113 }, { "epoch": 0.01541191381495564, "grad_norm": 2.847435474395752, "learning_rate": 6.6210937500000004e-06, "loss": 0.18856430053710938, "step": 114 }, { "epoch": 0.015547106041402619, "grad_norm": 2.094693660736084, "learning_rate": 6.679687500000001e-06, "loss": 0.1180562973022461, "step": 115 }, { "epoch": 0.015682298267849597, "grad_norm": 4.287889003753662, "learning_rate": 6.73828125e-06, "loss": 0.121612548828125, "step": 116 }, { "epoch": 0.01581749049429658, "grad_norm": 2.730125665664673, "learning_rate": 6.796875e-06, "loss": 0.10434532165527344, "step": 117 }, { "epoch": 0.015952682720743558, "grad_norm": 2.53700852394104, "learning_rate": 6.85546875e-06, "loss": 0.1632843017578125, "step": 118 }, { "epoch": 0.016087874947190536, "grad_norm": 8.600790977478027, "learning_rate": 6.9140625e-06, "loss": 0.12584304809570312, "step": 119 }, { "epoch": 0.016223067173637515, "grad_norm": 8.225876808166504, "learning_rate": 6.9726562500000005e-06, "loss": 0.16274023056030273, "step": 120 }, { "epoch": 0.016358259400084497, "grad_norm": 7.000145435333252, "learning_rate": 7.03125e-06, "loss": 0.1355915069580078, "step": 121 }, { "epoch": 0.016493451626531475, "grad_norm": 1.2663960456848145, "learning_rate": 7.08984375e-06, "loss": 0.10964584350585938, "step": 122 }, { "epoch": 0.016628643852978454, "grad_norm": 4.407687187194824, "learning_rate": 7.1484375e-06, "loss": 0.1642303466796875, "step": 123 }, { "epoch": 0.016763836079425432, "grad_norm": 4.05107307434082, "learning_rate": 7.20703125e-06, "loss": 0.1561412811279297, "step": 124 }, { "epoch": 0.01689902830587241, "grad_norm": 4.977163791656494, "learning_rate": 7.265625e-06, "loss": 0.12038421630859375, "step": 125 }, { "epoch": 0.017034220532319393, "grad_norm": 2.574216604232788, "learning_rate": 7.3242187500000006e-06, "loss": 0.18793296813964844, "step": 126 }, { "epoch": 0.01716941275876637, "grad_norm": 1.0781961679458618, "learning_rate": 7.3828125e-06, "loss": 0.12640953063964844, "step": 127 }, { "epoch": 0.01730460498521335, "grad_norm": 1.6773957014083862, "learning_rate": 7.44140625e-06, "loss": 0.0811920166015625, "step": 128 }, { "epoch": 0.017439797211660328, "grad_norm": 0.7164351344108582, "learning_rate": 7.5e-06, "loss": 0.06787109375, "step": 129 }, { "epoch": 0.01757498943810731, "grad_norm": 3.4869437217712402, "learning_rate": 7.55859375e-06, "loss": 0.11167430877685547, "step": 130 }, { "epoch": 0.01771018166455429, "grad_norm": 1.7625919580459595, "learning_rate": 7.6171875000000005e-06, "loss": 0.16495513916015625, "step": 131 }, { "epoch": 0.017845373891001267, "grad_norm": 0.7602922320365906, "learning_rate": 7.67578125e-06, "loss": 0.13075828552246094, "step": 132 }, { "epoch": 0.017980566117448245, "grad_norm": 2.4049386978149414, "learning_rate": 7.734375e-06, "loss": 0.15265655517578125, "step": 133 }, { "epoch": 0.018115758343895227, "grad_norm": 1.0546514987945557, "learning_rate": 7.792968750000001e-06, "loss": 0.14289093017578125, "step": 134 }, { "epoch": 0.018250950570342206, "grad_norm": 1.0986353158950806, "learning_rate": 7.8515625e-06, "loss": 0.14336681365966797, "step": 135 }, { "epoch": 0.018386142796789184, "grad_norm": 3.7656426429748535, "learning_rate": 7.91015625e-06, "loss": 0.1464376449584961, "step": 136 }, { "epoch": 0.018521335023236163, "grad_norm": 1.7089734077453613, "learning_rate": 7.96875e-06, "loss": 0.12820720672607422, "step": 137 }, { "epoch": 0.018656527249683145, "grad_norm": 1.987181305885315, "learning_rate": 8.02734375e-06, "loss": 0.11230659484863281, "step": 138 }, { "epoch": 0.018791719476130123, "grad_norm": 0.8318939208984375, "learning_rate": 8.085937500000001e-06, "loss": 0.09685516357421875, "step": 139 }, { "epoch": 0.018926911702577102, "grad_norm": 1.2632231712341309, "learning_rate": 8.14453125e-06, "loss": 0.09587240219116211, "step": 140 }, { "epoch": 0.01906210392902408, "grad_norm": 1.4492000341415405, "learning_rate": 8.203125e-06, "loss": 0.12637901306152344, "step": 141 }, { "epoch": 0.01919729615547106, "grad_norm": 3.6630232334136963, "learning_rate": 8.26171875e-06, "loss": 0.1568431854248047, "step": 142 }, { "epoch": 0.01933248838191804, "grad_norm": 2.887187957763672, "learning_rate": 8.3203125e-06, "loss": 0.12235832214355469, "step": 143 }, { "epoch": 0.01946768060836502, "grad_norm": 2.018618583679199, "learning_rate": 8.37890625e-06, "loss": 0.08384561538696289, "step": 144 }, { "epoch": 0.019602872834811998, "grad_norm": 2.8993093967437744, "learning_rate": 8.4375e-06, "loss": 0.13108062744140625, "step": 145 }, { "epoch": 0.019738065061258976, "grad_norm": 3.5131022930145264, "learning_rate": 8.49609375e-06, "loss": 0.17909622192382812, "step": 146 }, { "epoch": 0.019873257287705958, "grad_norm": 2.9853014945983887, "learning_rate": 8.5546875e-06, "loss": 0.08828163146972656, "step": 147 }, { "epoch": 0.020008449514152937, "grad_norm": 4.798372745513916, "learning_rate": 8.61328125e-06, "loss": 0.14518165588378906, "step": 148 }, { "epoch": 0.020143641740599915, "grad_norm": 4.102869033813477, "learning_rate": 8.671875e-06, "loss": 0.12208366394042969, "step": 149 }, { "epoch": 0.020278833967046894, "grad_norm": 1.452130675315857, "learning_rate": 8.73046875e-06, "loss": 0.09453487396240234, "step": 150 }, { "epoch": 0.020414026193493875, "grad_norm": 3.580275535583496, "learning_rate": 8.789062500000001e-06, "loss": 0.11167144775390625, "step": 151 }, { "epoch": 0.020549218419940854, "grad_norm": 2.5855484008789062, "learning_rate": 8.84765625e-06, "loss": 0.08226871490478516, "step": 152 }, { "epoch": 0.020684410646387832, "grad_norm": 3.079184055328369, "learning_rate": 8.90625e-06, "loss": 0.08919048309326172, "step": 153 }, { "epoch": 0.02081960287283481, "grad_norm": 1.7718374729156494, "learning_rate": 8.96484375e-06, "loss": 0.1257932186126709, "step": 154 }, { "epoch": 0.020954795099281793, "grad_norm": 1.7795010805130005, "learning_rate": 9.0234375e-06, "loss": 0.10280036926269531, "step": 155 }, { "epoch": 0.02108998732572877, "grad_norm": 6.808331489562988, "learning_rate": 9.082031250000001e-06, "loss": 0.15737152099609375, "step": 156 }, { "epoch": 0.02122517955217575, "grad_norm": 1.3567724227905273, "learning_rate": 9.140625e-06, "loss": 0.15021324157714844, "step": 157 }, { "epoch": 0.02136037177862273, "grad_norm": 1.3221532106399536, "learning_rate": 9.19921875e-06, "loss": 0.07552814483642578, "step": 158 }, { "epoch": 0.021495564005069707, "grad_norm": 1.1455117464065552, "learning_rate": 9.2578125e-06, "loss": 0.10880470275878906, "step": 159 }, { "epoch": 0.02163075623151669, "grad_norm": 1.018518090248108, "learning_rate": 9.31640625e-06, "loss": 0.06482505798339844, "step": 160 }, { "epoch": 0.021765948457963667, "grad_norm": 1.9412482976913452, "learning_rate": 9.375000000000001e-06, "loss": 0.06850290298461914, "step": 161 }, { "epoch": 0.021901140684410646, "grad_norm": 3.4968411922454834, "learning_rate": 9.43359375e-06, "loss": 0.12122619152069092, "step": 162 }, { "epoch": 0.022036332910857624, "grad_norm": 4.4286417961120605, "learning_rate": 9.4921875e-06, "loss": 0.1243133544921875, "step": 163 }, { "epoch": 0.022171525137304606, "grad_norm": 5.524990558624268, "learning_rate": 9.55078125e-06, "loss": 0.14089202880859375, "step": 164 }, { "epoch": 0.022306717363751585, "grad_norm": 3.6802821159362793, "learning_rate": 9.609375e-06, "loss": 0.08651351928710938, "step": 165 }, { "epoch": 0.022441909590198563, "grad_norm": 3.1190967559814453, "learning_rate": 9.66796875e-06, "loss": 0.08302116394042969, "step": 166 }, { "epoch": 0.02257710181664554, "grad_norm": 4.092504024505615, "learning_rate": 9.7265625e-06, "loss": 0.13624954223632812, "step": 167 }, { "epoch": 0.022712294043092524, "grad_norm": 2.8123655319213867, "learning_rate": 9.78515625e-06, "loss": 0.09840965270996094, "step": 168 }, { "epoch": 0.022847486269539502, "grad_norm": 2.038686513900757, "learning_rate": 9.84375e-06, "loss": 0.13901329040527344, "step": 169 }, { "epoch": 0.02298267849598648, "grad_norm": 1.851515293121338, "learning_rate": 9.90234375e-06, "loss": 0.1355876922607422, "step": 170 }, { "epoch": 0.02311787072243346, "grad_norm": 4.631291389465332, "learning_rate": 9.9609375e-06, "loss": 0.181976318359375, "step": 171 }, { "epoch": 0.02325306294888044, "grad_norm": 7.575974941253662, "learning_rate": 1.001953125e-05, "loss": 0.2035970687866211, "step": 172 }, { "epoch": 0.02338825517532742, "grad_norm": 9.929818153381348, "learning_rate": 1.0078125000000001e-05, "loss": 0.13532638549804688, "step": 173 }, { "epoch": 0.023523447401774398, "grad_norm": 1.531161904335022, "learning_rate": 1.013671875e-05, "loss": 0.11621952056884766, "step": 174 }, { "epoch": 0.023658639628221376, "grad_norm": 2.7926032543182373, "learning_rate": 1.01953125e-05, "loss": 0.08643817901611328, "step": 175 }, { "epoch": 0.023793831854668355, "grad_norm": 6.364810466766357, "learning_rate": 1.025390625e-05, "loss": 0.16801691055297852, "step": 176 }, { "epoch": 0.023929024081115337, "grad_norm": 4.558557033538818, "learning_rate": 1.03125e-05, "loss": 0.12172222137451172, "step": 177 }, { "epoch": 0.024064216307562315, "grad_norm": 13.089761734008789, "learning_rate": 1.0371093750000001e-05, "loss": 0.1399097442626953, "step": 178 }, { "epoch": 0.024199408534009294, "grad_norm": 12.028462409973145, "learning_rate": 1.04296875e-05, "loss": 0.11944293975830078, "step": 179 }, { "epoch": 0.024334600760456272, "grad_norm": 7.456510543823242, "learning_rate": 1.048828125e-05, "loss": 0.09870719909667969, "step": 180 }, { "epoch": 0.024469792986903254, "grad_norm": 1.4857101440429688, "learning_rate": 1.0546875e-05, "loss": 0.09162235260009766, "step": 181 }, { "epoch": 0.024604985213350233, "grad_norm": 4.810478687286377, "learning_rate": 1.060546875e-05, "loss": 0.1052408218383789, "step": 182 }, { "epoch": 0.02474017743979721, "grad_norm": 4.571746826171875, "learning_rate": 1.0664062500000001e-05, "loss": 0.12206459045410156, "step": 183 }, { "epoch": 0.02487536966624419, "grad_norm": 4.826739311218262, "learning_rate": 1.072265625e-05, "loss": 0.14027976989746094, "step": 184 }, { "epoch": 0.02501056189269117, "grad_norm": 1.68911874294281, "learning_rate": 1.078125e-05, "loss": 0.13982009887695312, "step": 185 }, { "epoch": 0.02514575411913815, "grad_norm": 5.710495948791504, "learning_rate": 1.083984375e-05, "loss": 0.11030769348144531, "step": 186 }, { "epoch": 0.02528094634558513, "grad_norm": 2.2882609367370605, "learning_rate": 1.08984375e-05, "loss": 0.09025359153747559, "step": 187 }, { "epoch": 0.025416138572032107, "grad_norm": 3.9504992961883545, "learning_rate": 1.095703125e-05, "loss": 0.13927841186523438, "step": 188 }, { "epoch": 0.02555133079847909, "grad_norm": 2.2836692333221436, "learning_rate": 1.1015625e-05, "loss": 0.1269536018371582, "step": 189 }, { "epoch": 0.025686523024926067, "grad_norm": 2.6040592193603516, "learning_rate": 1.1074218750000001e-05, "loss": 0.11598014831542969, "step": 190 }, { "epoch": 0.025821715251373046, "grad_norm": 5.9299092292785645, "learning_rate": 1.11328125e-05, "loss": 0.14548873901367188, "step": 191 }, { "epoch": 0.025956907477820024, "grad_norm": 2.255737066268921, "learning_rate": 1.119140625e-05, "loss": 0.11804676055908203, "step": 192 }, { "epoch": 0.026092099704267006, "grad_norm": 1.8716999292373657, "learning_rate": 1.125e-05, "loss": 0.06808090209960938, "step": 193 }, { "epoch": 0.026227291930713985, "grad_norm": 2.497042655944824, "learning_rate": 1.130859375e-05, "loss": 0.10706710815429688, "step": 194 }, { "epoch": 0.026362484157160963, "grad_norm": 3.482166051864624, "learning_rate": 1.1367187500000001e-05, "loss": 0.15346050262451172, "step": 195 }, { "epoch": 0.026497676383607942, "grad_norm": 4.400440216064453, "learning_rate": 1.142578125e-05, "loss": 0.10824441909790039, "step": 196 }, { "epoch": 0.02663286861005492, "grad_norm": 6.114097595214844, "learning_rate": 1.1484375e-05, "loss": 0.11834430694580078, "step": 197 }, { "epoch": 0.026768060836501902, "grad_norm": 2.279799699783325, "learning_rate": 1.154296875e-05, "loss": 0.12312889099121094, "step": 198 }, { "epoch": 0.02690325306294888, "grad_norm": 0.9420679211616516, "learning_rate": 1.16015625e-05, "loss": 0.06853199005126953, "step": 199 }, { "epoch": 0.02703844528939586, "grad_norm": 1.9888229370117188, "learning_rate": 1.1660156250000001e-05, "loss": 0.0968637466430664, "step": 200 }, { "epoch": 0.027173637515842838, "grad_norm": 1.231702208518982, "learning_rate": 1.171875e-05, "loss": 0.10464668273925781, "step": 201 }, { "epoch": 0.02730882974228982, "grad_norm": 1.6797163486480713, "learning_rate": 1.177734375e-05, "loss": 0.11948394775390625, "step": 202 }, { "epoch": 0.027444021968736798, "grad_norm": 2.0518569946289062, "learning_rate": 1.18359375e-05, "loss": 0.09188961982727051, "step": 203 }, { "epoch": 0.027579214195183777, "grad_norm": 2.415738821029663, "learning_rate": 1.189453125e-05, "loss": 0.09293174743652344, "step": 204 }, { "epoch": 0.027714406421630755, "grad_norm": 0.9479292035102844, "learning_rate": 1.1953125000000001e-05, "loss": 0.10342216491699219, "step": 205 }, { "epoch": 0.027849598648077737, "grad_norm": 0.7605688571929932, "learning_rate": 1.201171875e-05, "loss": 0.06887245178222656, "step": 206 }, { "epoch": 0.027984790874524716, "grad_norm": 1.1083003282546997, "learning_rate": 1.20703125e-05, "loss": 0.08589363098144531, "step": 207 }, { "epoch": 0.028119983100971694, "grad_norm": 3.79036283493042, "learning_rate": 1.212890625e-05, "loss": 0.19014549255371094, "step": 208 }, { "epoch": 0.028255175327418672, "grad_norm": 3.4152753353118896, "learning_rate": 1.21875e-05, "loss": 0.12618637084960938, "step": 209 }, { "epoch": 0.028390367553865654, "grad_norm": 2.4498088359832764, "learning_rate": 1.224609375e-05, "loss": 0.13806629180908203, "step": 210 }, { "epoch": 0.028525559780312633, "grad_norm": 5.081950664520264, "learning_rate": 1.23046875e-05, "loss": 0.14824390411376953, "step": 211 }, { "epoch": 0.02866075200675961, "grad_norm": 3.4185314178466797, "learning_rate": 1.2363281250000001e-05, "loss": 0.1659564971923828, "step": 212 }, { "epoch": 0.02879594423320659, "grad_norm": 5.166901588439941, "learning_rate": 1.2421875e-05, "loss": 0.08841896057128906, "step": 213 }, { "epoch": 0.02893113645965357, "grad_norm": 4.72791862487793, "learning_rate": 1.248046875e-05, "loss": 0.11452770233154297, "step": 214 }, { "epoch": 0.02906632868610055, "grad_norm": 5.573087692260742, "learning_rate": 1.25390625e-05, "loss": 0.08337831497192383, "step": 215 }, { "epoch": 0.02920152091254753, "grad_norm": 4.106476783752441, "learning_rate": 1.259765625e-05, "loss": 0.07535362243652344, "step": 216 }, { "epoch": 0.029336713138994507, "grad_norm": 2.1564137935638428, "learning_rate": 1.2656250000000001e-05, "loss": 0.08741140365600586, "step": 217 }, { "epoch": 0.029471905365441486, "grad_norm": 1.2396667003631592, "learning_rate": 1.271484375e-05, "loss": 0.1266641616821289, "step": 218 }, { "epoch": 0.029607097591888468, "grad_norm": 2.104222536087036, "learning_rate": 1.27734375e-05, "loss": 0.14081764221191406, "step": 219 }, { "epoch": 0.029742289818335446, "grad_norm": 2.055009365081787, "learning_rate": 1.283203125e-05, "loss": 0.1470928192138672, "step": 220 }, { "epoch": 0.029877482044782425, "grad_norm": 1.493439793586731, "learning_rate": 1.2890625e-05, "loss": 0.1262054443359375, "step": 221 }, { "epoch": 0.030012674271229403, "grad_norm": 0.9046214818954468, "learning_rate": 1.2949218750000001e-05, "loss": 0.0701284408569336, "step": 222 }, { "epoch": 0.030147866497676385, "grad_norm": 2.089538097381592, "learning_rate": 1.30078125e-05, "loss": 0.10072708129882812, "step": 223 }, { "epoch": 0.030283058724123364, "grad_norm": 3.5991060733795166, "learning_rate": 1.306640625e-05, "loss": 0.11789846420288086, "step": 224 }, { "epoch": 0.030418250950570342, "grad_norm": 3.6529898643493652, "learning_rate": 1.3125e-05, "loss": 0.10924339294433594, "step": 225 }, { "epoch": 0.03055344317701732, "grad_norm": 8.038374900817871, "learning_rate": 1.318359375e-05, "loss": 0.1462249755859375, "step": 226 }, { "epoch": 0.030688635403464302, "grad_norm": 5.628714084625244, "learning_rate": 1.3242187500000001e-05, "loss": 0.10403728485107422, "step": 227 }, { "epoch": 0.03082382762991128, "grad_norm": 2.750640630722046, "learning_rate": 1.330078125e-05, "loss": 0.13383102416992188, "step": 228 }, { "epoch": 0.03095901985635826, "grad_norm": 2.91054630279541, "learning_rate": 1.3359375000000001e-05, "loss": 0.11275577545166016, "step": 229 }, { "epoch": 0.031094212082805238, "grad_norm": 2.669840097427368, "learning_rate": 1.341796875e-05, "loss": 0.0904231071472168, "step": 230 }, { "epoch": 0.031229404309252216, "grad_norm": 2.7783708572387695, "learning_rate": 1.34765625e-05, "loss": 0.10542774200439453, "step": 231 }, { "epoch": 0.031364596535699195, "grad_norm": 1.4299713373184204, "learning_rate": 1.353515625e-05, "loss": 0.0928964614868164, "step": 232 }, { "epoch": 0.03149978876214617, "grad_norm": 2.016174554824829, "learning_rate": 1.359375e-05, "loss": 0.114776611328125, "step": 233 }, { "epoch": 0.03163498098859316, "grad_norm": 4.8079423904418945, "learning_rate": 1.3652343750000001e-05, "loss": 0.11942386627197266, "step": 234 }, { "epoch": 0.03177017321504014, "grad_norm": 1.7228970527648926, "learning_rate": 1.37109375e-05, "loss": 0.0829782485961914, "step": 235 }, { "epoch": 0.031905365441487116, "grad_norm": 0.42919689416885376, "learning_rate": 1.376953125e-05, "loss": 0.04460620880126953, "step": 236 }, { "epoch": 0.032040557667934094, "grad_norm": 3.3028643131256104, "learning_rate": 1.3828125e-05, "loss": 0.09788990020751953, "step": 237 }, { "epoch": 0.03217574989438107, "grad_norm": 2.778446912765503, "learning_rate": 1.388671875e-05, "loss": 0.10048770904541016, "step": 238 }, { "epoch": 0.03231094212082805, "grad_norm": 1.2364140748977661, "learning_rate": 1.3945312500000001e-05, "loss": 0.08328843116760254, "step": 239 }, { "epoch": 0.03244613434727503, "grad_norm": 1.6592155694961548, "learning_rate": 1.400390625e-05, "loss": 0.10396099090576172, "step": 240 }, { "epoch": 0.03258132657372201, "grad_norm": 0.6079725623130798, "learning_rate": 1.40625e-05, "loss": 0.060886383056640625, "step": 241 }, { "epoch": 0.032716518800168994, "grad_norm": 5.930914878845215, "learning_rate": 1.412109375e-05, "loss": 0.11059761047363281, "step": 242 }, { "epoch": 0.03285171102661597, "grad_norm": 4.0349626541137695, "learning_rate": 1.41796875e-05, "loss": 0.11272239685058594, "step": 243 }, { "epoch": 0.03298690325306295, "grad_norm": 1.2537180185317993, "learning_rate": 1.4238281250000001e-05, "loss": 0.10306453704833984, "step": 244 }, { "epoch": 0.03312209547950993, "grad_norm": 1.9081717729568481, "learning_rate": 1.4296875e-05, "loss": 0.13697052001953125, "step": 245 }, { "epoch": 0.03325728770595691, "grad_norm": 1.496750831604004, "learning_rate": 1.435546875e-05, "loss": 0.08673858642578125, "step": 246 }, { "epoch": 0.033392479932403886, "grad_norm": 2.706200361251831, "learning_rate": 1.44140625e-05, "loss": 0.1148829460144043, "step": 247 }, { "epoch": 0.033527672158850864, "grad_norm": 1.9627279043197632, "learning_rate": 1.447265625e-05, "loss": 0.08620643615722656, "step": 248 }, { "epoch": 0.03366286438529784, "grad_norm": 1.650659203529358, "learning_rate": 1.453125e-05, "loss": 0.12858200073242188, "step": 249 }, { "epoch": 0.03379805661174482, "grad_norm": 1.805548906326294, "learning_rate": 1.458984375e-05, "loss": 0.09543418884277344, "step": 250 }, { "epoch": 0.03393324883819181, "grad_norm": 2.7181215286254883, "learning_rate": 1.4648437500000001e-05, "loss": 0.07841825485229492, "step": 251 }, { "epoch": 0.034068441064638785, "grad_norm": 2.5783114433288574, "learning_rate": 1.470703125e-05, "loss": 0.12348270416259766, "step": 252 }, { "epoch": 0.034203633291085764, "grad_norm": 0.9260610938072205, "learning_rate": 1.4765625e-05, "loss": 0.06962871551513672, "step": 253 }, { "epoch": 0.03433882551753274, "grad_norm": 1.6076881885528564, "learning_rate": 1.482421875e-05, "loss": 0.11204147338867188, "step": 254 }, { "epoch": 0.03447401774397972, "grad_norm": 2.5134031772613525, "learning_rate": 1.48828125e-05, "loss": 0.11007165908813477, "step": 255 }, { "epoch": 0.0346092099704267, "grad_norm": 1.9173743724822998, "learning_rate": 1.4941406250000001e-05, "loss": 0.11146736145019531, "step": 256 }, { "epoch": 0.03474440219687368, "grad_norm": 1.3722951412200928, "learning_rate": 1.5e-05, "loss": 0.09109020233154297, "step": 257 }, { "epoch": 0.034879594423320656, "grad_norm": 5.6338629722595215, "learning_rate": 1.505859375e-05, "loss": 0.14918041229248047, "step": 258 }, { "epoch": 0.03501478664976764, "grad_norm": 3.58321213722229, "learning_rate": 1.51171875e-05, "loss": 0.1257801055908203, "step": 259 }, { "epoch": 0.03514997887621462, "grad_norm": 2.2501776218414307, "learning_rate": 1.517578125e-05, "loss": 0.1175689697265625, "step": 260 }, { "epoch": 0.0352851711026616, "grad_norm": 3.7236227989196777, "learning_rate": 1.5234375000000001e-05, "loss": 0.12977886199951172, "step": 261 }, { "epoch": 0.03542036332910858, "grad_norm": 2.549156427383423, "learning_rate": 1.529296875e-05, "loss": 0.10477447509765625, "step": 262 }, { "epoch": 0.035555555555555556, "grad_norm": 1.5814710855484009, "learning_rate": 1.53515625e-05, "loss": 0.07183074951171875, "step": 263 }, { "epoch": 0.035690747782002534, "grad_norm": 0.9356978535652161, "learning_rate": 1.541015625e-05, "loss": 0.08452892303466797, "step": 264 }, { "epoch": 0.03582594000844951, "grad_norm": 1.1948210000991821, "learning_rate": 1.546875e-05, "loss": 0.11107063293457031, "step": 265 }, { "epoch": 0.03596113223489649, "grad_norm": 0.8266691565513611, "learning_rate": 1.552734375e-05, "loss": 0.09385013580322266, "step": 266 }, { "epoch": 0.03609632446134347, "grad_norm": 5.6417646408081055, "learning_rate": 1.5585937500000002e-05, "loss": 0.11809539794921875, "step": 267 }, { "epoch": 0.036231516687790455, "grad_norm": 1.6528130769729614, "learning_rate": 1.564453125e-05, "loss": 0.05729341506958008, "step": 268 }, { "epoch": 0.03636670891423743, "grad_norm": 3.9556565284729004, "learning_rate": 1.5703125e-05, "loss": 0.09425687789916992, "step": 269 }, { "epoch": 0.03650190114068441, "grad_norm": 4.515077590942383, "learning_rate": 1.576171875e-05, "loss": 0.1512136459350586, "step": 270 }, { "epoch": 0.03663709336713139, "grad_norm": 1.4441699981689453, "learning_rate": 1.58203125e-05, "loss": 0.12055397033691406, "step": 271 }, { "epoch": 0.03677228559357837, "grad_norm": 1.241927981376648, "learning_rate": 1.587890625e-05, "loss": 0.1313619613647461, "step": 272 }, { "epoch": 0.03690747782002535, "grad_norm": 4.403429985046387, "learning_rate": 1.59375e-05, "loss": 0.16307449340820312, "step": 273 }, { "epoch": 0.037042670046472326, "grad_norm": 2.729262351989746, "learning_rate": 1.599609375e-05, "loss": 0.08340573310852051, "step": 274 }, { "epoch": 0.037177862272919304, "grad_norm": 1.3819665908813477, "learning_rate": 1.60546875e-05, "loss": 0.16694259643554688, "step": 275 }, { "epoch": 0.03731305449936629, "grad_norm": 1.756886601448059, "learning_rate": 1.611328125e-05, "loss": 0.11464977264404297, "step": 276 }, { "epoch": 0.03744824672581327, "grad_norm": 0.8777771592140198, "learning_rate": 1.6171875000000002e-05, "loss": 0.08952999114990234, "step": 277 }, { "epoch": 0.03758343895226025, "grad_norm": 2.6637778282165527, "learning_rate": 1.623046875e-05, "loss": 0.1071014404296875, "step": 278 }, { "epoch": 0.037718631178707225, "grad_norm": 0.8831402063369751, "learning_rate": 1.62890625e-05, "loss": 0.09461021423339844, "step": 279 }, { "epoch": 0.037853823405154204, "grad_norm": 2.507668972015381, "learning_rate": 1.634765625e-05, "loss": 0.12186670303344727, "step": 280 }, { "epoch": 0.03798901563160118, "grad_norm": 2.0636186599731445, "learning_rate": 1.640625e-05, "loss": 0.10665130615234375, "step": 281 }, { "epoch": 0.03812420785804816, "grad_norm": 3.099966287612915, "learning_rate": 1.646484375e-05, "loss": 0.11927509307861328, "step": 282 }, { "epoch": 0.03825940008449514, "grad_norm": 1.2251938581466675, "learning_rate": 1.65234375e-05, "loss": 0.10716438293457031, "step": 283 }, { "epoch": 0.03839459231094212, "grad_norm": 1.5043450593948364, "learning_rate": 1.6582031250000002e-05, "loss": 0.0943303108215332, "step": 284 }, { "epoch": 0.0385297845373891, "grad_norm": 1.2747001647949219, "learning_rate": 1.6640625e-05, "loss": 0.08355045318603516, "step": 285 }, { "epoch": 0.03866497676383608, "grad_norm": 2.7686452865600586, "learning_rate": 1.669921875e-05, "loss": 0.09137535095214844, "step": 286 }, { "epoch": 0.03880016899028306, "grad_norm": 1.7852996587753296, "learning_rate": 1.67578125e-05, "loss": 0.09190750122070312, "step": 287 }, { "epoch": 0.03893536121673004, "grad_norm": 1.1456475257873535, "learning_rate": 1.681640625e-05, "loss": 0.1054677963256836, "step": 288 }, { "epoch": 0.03907055344317702, "grad_norm": 0.7634332180023193, "learning_rate": 1.6875e-05, "loss": 0.06901717185974121, "step": 289 }, { "epoch": 0.039205745669623995, "grad_norm": 2.010342836380005, "learning_rate": 1.693359375e-05, "loss": 0.06561470031738281, "step": 290 }, { "epoch": 0.039340937896070974, "grad_norm": 3.22692608833313, "learning_rate": 1.69921875e-05, "loss": 0.07588434219360352, "step": 291 }, { "epoch": 0.03947613012251795, "grad_norm": 2.1595005989074707, "learning_rate": 1.705078125e-05, "loss": 0.14582252502441406, "step": 292 }, { "epoch": 0.03961132234896494, "grad_norm": 1.0591042041778564, "learning_rate": 1.7109375e-05, "loss": 0.09330081939697266, "step": 293 }, { "epoch": 0.039746514575411916, "grad_norm": 1.6329455375671387, "learning_rate": 1.7167968750000002e-05, "loss": 0.09622001647949219, "step": 294 }, { "epoch": 0.039881706801858895, "grad_norm": 1.439279556274414, "learning_rate": 1.72265625e-05, "loss": 0.12204265594482422, "step": 295 }, { "epoch": 0.04001689902830587, "grad_norm": 1.1472564935684204, "learning_rate": 1.728515625e-05, "loss": 0.048249244689941406, "step": 296 }, { "epoch": 0.04015209125475285, "grad_norm": 1.2087730169296265, "learning_rate": 1.734375e-05, "loss": 0.11009597778320312, "step": 297 }, { "epoch": 0.04028728348119983, "grad_norm": 1.881333827972412, "learning_rate": 1.740234375e-05, "loss": 0.1336355209350586, "step": 298 }, { "epoch": 0.04042247570764681, "grad_norm": 1.5699728727340698, "learning_rate": 1.74609375e-05, "loss": 0.11317634582519531, "step": 299 }, { "epoch": 0.04055766793409379, "grad_norm": 3.4574596881866455, "learning_rate": 1.751953125e-05, "loss": 0.09913825988769531, "step": 300 }, { "epoch": 0.040692860160540766, "grad_norm": 2.1715574264526367, "learning_rate": 1.7578125000000002e-05, "loss": 0.09168434143066406, "step": 301 }, { "epoch": 0.04082805238698775, "grad_norm": 0.5499640107154846, "learning_rate": 1.763671875e-05, "loss": 0.0647573471069336, "step": 302 }, { "epoch": 0.04096324461343473, "grad_norm": 1.5822196006774902, "learning_rate": 1.76953125e-05, "loss": 0.06601619720458984, "step": 303 }, { "epoch": 0.04109843683988171, "grad_norm": 0.46376851201057434, "learning_rate": 1.775390625e-05, "loss": 0.08853340148925781, "step": 304 }, { "epoch": 0.041233629066328686, "grad_norm": 1.0824228525161743, "learning_rate": 1.78125e-05, "loss": 0.1126103401184082, "step": 305 }, { "epoch": 0.041368821292775665, "grad_norm": 1.072993278503418, "learning_rate": 1.787109375e-05, "loss": 0.12059402465820312, "step": 306 }, { "epoch": 0.04150401351922264, "grad_norm": 1.2036006450653076, "learning_rate": 1.79296875e-05, "loss": 0.061367034912109375, "step": 307 }, { "epoch": 0.04163920574566962, "grad_norm": 0.7556990385055542, "learning_rate": 1.798828125e-05, "loss": 0.10070037841796875, "step": 308 }, { "epoch": 0.0417743979721166, "grad_norm": 1.9353309869766235, "learning_rate": 1.8046875e-05, "loss": 0.07542753219604492, "step": 309 }, { "epoch": 0.041909590198563586, "grad_norm": 1.5867691040039062, "learning_rate": 1.810546875e-05, "loss": 0.08946514129638672, "step": 310 }, { "epoch": 0.042044782425010564, "grad_norm": 1.0343576669692993, "learning_rate": 1.8164062500000002e-05, "loss": 0.15060901641845703, "step": 311 }, { "epoch": 0.04217997465145754, "grad_norm": 1.1521289348602295, "learning_rate": 1.822265625e-05, "loss": 0.10712528228759766, "step": 312 }, { "epoch": 0.04231516687790452, "grad_norm": 1.2042758464813232, "learning_rate": 1.828125e-05, "loss": 0.11998486518859863, "step": 313 }, { "epoch": 0.0424503591043515, "grad_norm": 1.1514405012130737, "learning_rate": 1.833984375e-05, "loss": 0.0639185905456543, "step": 314 }, { "epoch": 0.04258555133079848, "grad_norm": 0.6923871636390686, "learning_rate": 1.83984375e-05, "loss": 0.09173774719238281, "step": 315 }, { "epoch": 0.04272074355724546, "grad_norm": 0.998264491558075, "learning_rate": 1.845703125e-05, "loss": 0.12207412719726562, "step": 316 }, { "epoch": 0.042855935783692435, "grad_norm": 2.2990736961364746, "learning_rate": 1.8515625e-05, "loss": 0.07476043701171875, "step": 317 }, { "epoch": 0.042991128010139414, "grad_norm": 1.482649564743042, "learning_rate": 1.8574218750000002e-05, "loss": 0.108489990234375, "step": 318 }, { "epoch": 0.0431263202365864, "grad_norm": 2.9330074787139893, "learning_rate": 1.86328125e-05, "loss": 0.06729316711425781, "step": 319 }, { "epoch": 0.04326151246303338, "grad_norm": 3.1757407188415527, "learning_rate": 1.869140625e-05, "loss": 0.07111549377441406, "step": 320 }, { "epoch": 0.043396704689480356, "grad_norm": 1.5996159315109253, "learning_rate": 1.8750000000000002e-05, "loss": 0.10639572143554688, "step": 321 }, { "epoch": 0.043531896915927334, "grad_norm": 1.336880087852478, "learning_rate": 1.880859375e-05, "loss": 0.07693672180175781, "step": 322 }, { "epoch": 0.04366708914237431, "grad_norm": 0.7539777755737305, "learning_rate": 1.88671875e-05, "loss": 0.07776546478271484, "step": 323 }, { "epoch": 0.04380228136882129, "grad_norm": 1.1080620288848877, "learning_rate": 1.892578125e-05, "loss": 0.09150028228759766, "step": 324 }, { "epoch": 0.04393747359526827, "grad_norm": 1.0383034944534302, "learning_rate": 1.8984375e-05, "loss": 0.08949756622314453, "step": 325 }, { "epoch": 0.04407266582171525, "grad_norm": 1.2968395948410034, "learning_rate": 1.904296875e-05, "loss": 0.0734872817993164, "step": 326 }, { "epoch": 0.044207858048162234, "grad_norm": 1.0070537328720093, "learning_rate": 1.91015625e-05, "loss": 0.06596803665161133, "step": 327 }, { "epoch": 0.04434305027460921, "grad_norm": 0.8868251442909241, "learning_rate": 1.9160156250000002e-05, "loss": 0.10872650146484375, "step": 328 }, { "epoch": 0.04447824250105619, "grad_norm": 1.2364088296890259, "learning_rate": 1.921875e-05, "loss": 0.08606529235839844, "step": 329 }, { "epoch": 0.04461343472750317, "grad_norm": 0.915959894657135, "learning_rate": 1.927734375e-05, "loss": 0.091278076171875, "step": 330 }, { "epoch": 0.04474862695395015, "grad_norm": 1.4230494499206543, "learning_rate": 1.93359375e-05, "loss": 0.10262727737426758, "step": 331 }, { "epoch": 0.044883819180397126, "grad_norm": 2.434359550476074, "learning_rate": 1.939453125e-05, "loss": 0.10689163208007812, "step": 332 }, { "epoch": 0.045019011406844105, "grad_norm": 0.5046936273574829, "learning_rate": 1.9453125e-05, "loss": 0.05563926696777344, "step": 333 }, { "epoch": 0.04515420363329108, "grad_norm": 1.49765944480896, "learning_rate": 1.951171875e-05, "loss": 0.11042022705078125, "step": 334 }, { "epoch": 0.04528939585973806, "grad_norm": 1.485113501548767, "learning_rate": 1.95703125e-05, "loss": 0.06636238098144531, "step": 335 }, { "epoch": 0.04542458808618505, "grad_norm": 3.4412622451782227, "learning_rate": 1.962890625e-05, "loss": 0.11137104034423828, "step": 336 }, { "epoch": 0.045559780312632026, "grad_norm": 1.7870159149169922, "learning_rate": 1.96875e-05, "loss": 0.06279373168945312, "step": 337 }, { "epoch": 0.045694972539079004, "grad_norm": 1.9660131931304932, "learning_rate": 1.9746093750000002e-05, "loss": 0.13500404357910156, "step": 338 }, { "epoch": 0.04583016476552598, "grad_norm": 1.4462882280349731, "learning_rate": 1.98046875e-05, "loss": 0.08622026443481445, "step": 339 }, { "epoch": 0.04596535699197296, "grad_norm": 0.8645960092544556, "learning_rate": 1.986328125e-05, "loss": 0.034657955169677734, "step": 340 }, { "epoch": 0.04610054921841994, "grad_norm": 9.027908325195312, "learning_rate": 1.9921875e-05, "loss": 0.1717844009399414, "step": 341 }, { "epoch": 0.04623574144486692, "grad_norm": 2.525975227355957, "learning_rate": 1.998046875e-05, "loss": 0.0699930191040039, "step": 342 }, { "epoch": 0.046370933671313896, "grad_norm": 1.6707751750946045, "learning_rate": 2.00390625e-05, "loss": 0.08284282684326172, "step": 343 }, { "epoch": 0.04650612589776088, "grad_norm": 0.4815549850463867, "learning_rate": 2.009765625e-05, "loss": 0.06775760650634766, "step": 344 }, { "epoch": 0.04664131812420786, "grad_norm": 1.3863520622253418, "learning_rate": 2.0156250000000002e-05, "loss": 0.07233905792236328, "step": 345 }, { "epoch": 0.04677651035065484, "grad_norm": 1.3405689001083374, "learning_rate": 2.021484375e-05, "loss": 0.06769132614135742, "step": 346 }, { "epoch": 0.04691170257710182, "grad_norm": 0.4060302972793579, "learning_rate": 2.02734375e-05, "loss": 0.057675838470458984, "step": 347 }, { "epoch": 0.047046894803548796, "grad_norm": 0.8686118721961975, "learning_rate": 2.033203125e-05, "loss": 0.12314987182617188, "step": 348 }, { "epoch": 0.047182087029995774, "grad_norm": 1.619500994682312, "learning_rate": 2.0390625e-05, "loss": 0.11477994918823242, "step": 349 }, { "epoch": 0.04731727925644275, "grad_norm": 0.8877691626548767, "learning_rate": 2.044921875e-05, "loss": 0.11084270477294922, "step": 350 }, { "epoch": 0.04745247148288973, "grad_norm": 1.370432734489441, "learning_rate": 2.05078125e-05, "loss": 0.08461380004882812, "step": 351 }, { "epoch": 0.04758766370933671, "grad_norm": 0.5697827935218811, "learning_rate": 2.056640625e-05, "loss": 0.07113170623779297, "step": 352 }, { "epoch": 0.047722855935783695, "grad_norm": 0.863343358039856, "learning_rate": 2.0625e-05, "loss": 0.06659460067749023, "step": 353 }, { "epoch": 0.047858048162230674, "grad_norm": 0.5409537553787231, "learning_rate": 2.068359375e-05, "loss": 0.06585693359375, "step": 354 }, { "epoch": 0.04799324038867765, "grad_norm": 1.2570327520370483, "learning_rate": 2.0742187500000002e-05, "loss": 0.06066417694091797, "step": 355 }, { "epoch": 0.04812843261512463, "grad_norm": 0.9887579083442688, "learning_rate": 2.080078125e-05, "loss": 0.07993316650390625, "step": 356 }, { "epoch": 0.04826362484157161, "grad_norm": 1.390712857246399, "learning_rate": 2.0859375e-05, "loss": 0.1002206802368164, "step": 357 }, { "epoch": 0.04839881706801859, "grad_norm": 2.349412441253662, "learning_rate": 2.091796875e-05, "loss": 0.08269977569580078, "step": 358 }, { "epoch": 0.048534009294465566, "grad_norm": 2.8065319061279297, "learning_rate": 2.09765625e-05, "loss": 0.1237192153930664, "step": 359 }, { "epoch": 0.048669201520912544, "grad_norm": 0.7555651068687439, "learning_rate": 2.103515625e-05, "loss": 0.06234538555145264, "step": 360 }, { "epoch": 0.04880439374735953, "grad_norm": 2.7890522480010986, "learning_rate": 2.109375e-05, "loss": 0.11326980590820312, "step": 361 }, { "epoch": 0.04893958597380651, "grad_norm": 2.958678722381592, "learning_rate": 2.1152343750000002e-05, "loss": 0.10531044006347656, "step": 362 }, { "epoch": 0.04907477820025349, "grad_norm": 0.7495773434638977, "learning_rate": 2.12109375e-05, "loss": 0.09909248352050781, "step": 363 }, { "epoch": 0.049209970426700465, "grad_norm": 1.170852541923523, "learning_rate": 2.126953125e-05, "loss": 0.118499755859375, "step": 364 }, { "epoch": 0.049345162653147444, "grad_norm": 1.6964222192764282, "learning_rate": 2.1328125000000002e-05, "loss": 0.09187841415405273, "step": 365 }, { "epoch": 0.04948035487959442, "grad_norm": 2.094663381576538, "learning_rate": 2.138671875e-05, "loss": 0.1099853515625, "step": 366 }, { "epoch": 0.0496155471060414, "grad_norm": 0.7523356080055237, "learning_rate": 2.14453125e-05, "loss": 0.11729049682617188, "step": 367 }, { "epoch": 0.04975073933248838, "grad_norm": 2.4578635692596436, "learning_rate": 2.150390625e-05, "loss": 0.10619497299194336, "step": 368 }, { "epoch": 0.049885931558935365, "grad_norm": 3.7947957515716553, "learning_rate": 2.15625e-05, "loss": 0.12723445892333984, "step": 369 }, { "epoch": 0.05002112378538234, "grad_norm": 1.2074638605117798, "learning_rate": 2.162109375e-05, "loss": 0.04551982879638672, "step": 370 }, { "epoch": 0.05015631601182932, "grad_norm": 0.4961264431476593, "learning_rate": 2.16796875e-05, "loss": 0.06826496124267578, "step": 371 }, { "epoch": 0.0502915082382763, "grad_norm": 0.8231317400932312, "learning_rate": 2.1738281250000002e-05, "loss": 0.07170963287353516, "step": 372 }, { "epoch": 0.05042670046472328, "grad_norm": 2.801870822906494, "learning_rate": 2.1796875e-05, "loss": 0.12439346313476562, "step": 373 }, { "epoch": 0.05056189269117026, "grad_norm": 1.2951346635818481, "learning_rate": 2.185546875e-05, "loss": 0.093963623046875, "step": 374 }, { "epoch": 0.050697084917617236, "grad_norm": 0.5000646710395813, "learning_rate": 2.19140625e-05, "loss": 0.08770179748535156, "step": 375 }, { "epoch": 0.050832277144064214, "grad_norm": 0.9898878931999207, "learning_rate": 2.197265625e-05, "loss": 0.11335372924804688, "step": 376 }, { "epoch": 0.05096746937051119, "grad_norm": 2.055985927581787, "learning_rate": 2.203125e-05, "loss": 0.1132669448852539, "step": 377 }, { "epoch": 0.05110266159695818, "grad_norm": 1.0124309062957764, "learning_rate": 2.208984375e-05, "loss": 0.11417198181152344, "step": 378 }, { "epoch": 0.051237853823405156, "grad_norm": 1.9621731042861938, "learning_rate": 2.2148437500000002e-05, "loss": 0.07587289810180664, "step": 379 }, { "epoch": 0.051373046049852135, "grad_norm": 3.2625293731689453, "learning_rate": 2.220703125e-05, "loss": 0.1237955093383789, "step": 380 }, { "epoch": 0.05150823827629911, "grad_norm": 1.7194373607635498, "learning_rate": 2.2265625e-05, "loss": 0.12086820602416992, "step": 381 }, { "epoch": 0.05164343050274609, "grad_norm": 1.6917966604232788, "learning_rate": 2.2324218750000002e-05, "loss": 0.08769416809082031, "step": 382 }, { "epoch": 0.05177862272919307, "grad_norm": 1.7035692930221558, "learning_rate": 2.23828125e-05, "loss": 0.13475990295410156, "step": 383 }, { "epoch": 0.05191381495564005, "grad_norm": 1.4051333665847778, "learning_rate": 2.244140625e-05, "loss": 0.06197834014892578, "step": 384 }, { "epoch": 0.05204900718208703, "grad_norm": 0.9884541034698486, "learning_rate": 2.25e-05, "loss": 0.09939861297607422, "step": 385 }, { "epoch": 0.05218419940853401, "grad_norm": 0.5881649851799011, "learning_rate": 2.255859375e-05, "loss": 0.06579351425170898, "step": 386 }, { "epoch": 0.05231939163498099, "grad_norm": 0.9547070264816284, "learning_rate": 2.26171875e-05, "loss": 0.07893943786621094, "step": 387 }, { "epoch": 0.05245458386142797, "grad_norm": 0.5487326383590698, "learning_rate": 2.267578125e-05, "loss": 0.08156633377075195, "step": 388 }, { "epoch": 0.05258977608787495, "grad_norm": 0.8872190713882446, "learning_rate": 2.2734375000000002e-05, "loss": 0.07341861724853516, "step": 389 }, { "epoch": 0.05272496831432193, "grad_norm": 1.4447367191314697, "learning_rate": 2.279296875e-05, "loss": 0.11373615264892578, "step": 390 }, { "epoch": 0.052860160540768905, "grad_norm": 1.5204302072525024, "learning_rate": 2.28515625e-05, "loss": 0.05935955047607422, "step": 391 }, { "epoch": 0.052995352767215884, "grad_norm": 1.0304832458496094, "learning_rate": 2.291015625e-05, "loss": 0.07056713104248047, "step": 392 }, { "epoch": 0.05313054499366286, "grad_norm": 1.544775128364563, "learning_rate": 2.296875e-05, "loss": 0.05887651443481445, "step": 393 }, { "epoch": 0.05326573722010984, "grad_norm": 0.6929005980491638, "learning_rate": 2.302734375e-05, "loss": 0.06090068817138672, "step": 394 }, { "epoch": 0.053400929446556826, "grad_norm": 0.5169737935066223, "learning_rate": 2.30859375e-05, "loss": 0.08879148960113525, "step": 395 }, { "epoch": 0.053536121673003804, "grad_norm": 0.952409565448761, "learning_rate": 2.3144531250000002e-05, "loss": 0.07410049438476562, "step": 396 }, { "epoch": 0.05367131389945078, "grad_norm": 0.9403892159461975, "learning_rate": 2.3203125e-05, "loss": 0.1253814697265625, "step": 397 }, { "epoch": 0.05380650612589776, "grad_norm": 0.6398392915725708, "learning_rate": 2.326171875e-05, "loss": 0.12230050563812256, "step": 398 }, { "epoch": 0.05394169835234474, "grad_norm": 2.4542503356933594, "learning_rate": 2.3320312500000002e-05, "loss": 0.08579373359680176, "step": 399 }, { "epoch": 0.05407689057879172, "grad_norm": 1.6023656129837036, "learning_rate": 2.337890625e-05, "loss": 0.14700698852539062, "step": 400 }, { "epoch": 0.0542120828052387, "grad_norm": 1.851426601409912, "learning_rate": 2.34375e-05, "loss": 0.09523773193359375, "step": 401 }, { "epoch": 0.054347275031685675, "grad_norm": 0.9876133799552917, "learning_rate": 2.349609375e-05, "loss": 0.09573745727539062, "step": 402 }, { "epoch": 0.05448246725813266, "grad_norm": 0.7136473655700684, "learning_rate": 2.35546875e-05, "loss": 0.07299518585205078, "step": 403 }, { "epoch": 0.05461765948457964, "grad_norm": 1.0516611337661743, "learning_rate": 2.361328125e-05, "loss": 0.09270524978637695, "step": 404 }, { "epoch": 0.05475285171102662, "grad_norm": 0.8308907747268677, "learning_rate": 2.3671875e-05, "loss": 0.05745649337768555, "step": 405 }, { "epoch": 0.054888043937473596, "grad_norm": 0.47025609016418457, "learning_rate": 2.3730468750000002e-05, "loss": 0.08206653594970703, "step": 406 }, { "epoch": 0.055023236163920575, "grad_norm": 0.8204769492149353, "learning_rate": 2.37890625e-05, "loss": 0.06442022323608398, "step": 407 }, { "epoch": 0.05515842839036755, "grad_norm": 0.8910301923751831, "learning_rate": 2.384765625e-05, "loss": 0.08228254318237305, "step": 408 }, { "epoch": 0.05529362061681453, "grad_norm": 1.1042304039001465, "learning_rate": 2.3906250000000002e-05, "loss": 0.10942554473876953, "step": 409 }, { "epoch": 0.05542881284326151, "grad_norm": 1.2192918062210083, "learning_rate": 2.396484375e-05, "loss": 0.08187246322631836, "step": 410 }, { "epoch": 0.05556400506970849, "grad_norm": 1.1344430446624756, "learning_rate": 2.40234375e-05, "loss": 0.1009821891784668, "step": 411 }, { "epoch": 0.055699197296155474, "grad_norm": 0.9855709671974182, "learning_rate": 2.408203125e-05, "loss": 0.09816837310791016, "step": 412 }, { "epoch": 0.05583438952260245, "grad_norm": 1.6689993143081665, "learning_rate": 2.4140625e-05, "loss": 0.09561729431152344, "step": 413 }, { "epoch": 0.05596958174904943, "grad_norm": 3.0833699703216553, "learning_rate": 2.419921875e-05, "loss": 0.10691452026367188, "step": 414 }, { "epoch": 0.05610477397549641, "grad_norm": 0.7255855202674866, "learning_rate": 2.42578125e-05, "loss": 0.04642772674560547, "step": 415 }, { "epoch": 0.05623996620194339, "grad_norm": 1.850696325302124, "learning_rate": 2.4316406250000002e-05, "loss": 0.1008908748626709, "step": 416 }, { "epoch": 0.056375158428390366, "grad_norm": 2.2940292358398438, "learning_rate": 2.4375e-05, "loss": 0.11898565292358398, "step": 417 }, { "epoch": 0.056510350654837345, "grad_norm": 0.6170001029968262, "learning_rate": 2.443359375e-05, "loss": 0.059403419494628906, "step": 418 }, { "epoch": 0.05664554288128432, "grad_norm": 4.38313102722168, "learning_rate": 2.44921875e-05, "loss": 0.1190958023071289, "step": 419 }, { "epoch": 0.05678073510773131, "grad_norm": 1.159766435623169, "learning_rate": 2.455078125e-05, "loss": 0.04065656661987305, "step": 420 }, { "epoch": 0.05691592733417829, "grad_norm": 2.5286099910736084, "learning_rate": 2.4609375e-05, "loss": 0.07746124267578125, "step": 421 }, { "epoch": 0.057051119560625266, "grad_norm": 1.0204311609268188, "learning_rate": 2.466796875e-05, "loss": 0.11473560333251953, "step": 422 }, { "epoch": 0.057186311787072244, "grad_norm": 2.9297640323638916, "learning_rate": 2.4726562500000002e-05, "loss": 0.10918998718261719, "step": 423 }, { "epoch": 0.05732150401351922, "grad_norm": 2.293121576309204, "learning_rate": 2.478515625e-05, "loss": 0.0850973129272461, "step": 424 }, { "epoch": 0.0574566962399662, "grad_norm": 2.8647897243499756, "learning_rate": 2.484375e-05, "loss": 0.07971739768981934, "step": 425 }, { "epoch": 0.05759188846641318, "grad_norm": 1.8048826456069946, "learning_rate": 2.4902343750000002e-05, "loss": 0.10360240936279297, "step": 426 }, { "epoch": 0.05772708069286016, "grad_norm": 1.325268268585205, "learning_rate": 2.49609375e-05, "loss": 0.0766916275024414, "step": 427 }, { "epoch": 0.05786227291930714, "grad_norm": 0.8526578545570374, "learning_rate": 2.501953125e-05, "loss": 0.10811042785644531, "step": 428 }, { "epoch": 0.05799746514575412, "grad_norm": 0.8991419076919556, "learning_rate": 2.5078125e-05, "loss": 0.06367301940917969, "step": 429 }, { "epoch": 0.0581326573722011, "grad_norm": 1.5593284368515015, "learning_rate": 2.513671875e-05, "loss": 0.09201622009277344, "step": 430 }, { "epoch": 0.05826784959864808, "grad_norm": 0.803339421749115, "learning_rate": 2.51953125e-05, "loss": 0.06955718994140625, "step": 431 }, { "epoch": 0.05840304182509506, "grad_norm": 1.2454171180725098, "learning_rate": 2.525390625e-05, "loss": 0.058101654052734375, "step": 432 }, { "epoch": 0.058538234051542036, "grad_norm": 1.0445276498794556, "learning_rate": 2.5312500000000002e-05, "loss": 0.09754657745361328, "step": 433 }, { "epoch": 0.058673426277989014, "grad_norm": 1.1726139783859253, "learning_rate": 2.537109375e-05, "loss": 0.0903177261352539, "step": 434 }, { "epoch": 0.05880861850443599, "grad_norm": 2.145634174346924, "learning_rate": 2.54296875e-05, "loss": 0.0947575569152832, "step": 435 }, { "epoch": 0.05894381073088297, "grad_norm": 2.7240688800811768, "learning_rate": 2.548828125e-05, "loss": 0.09523487091064453, "step": 436 }, { "epoch": 0.05907900295732996, "grad_norm": 1.097942590713501, "learning_rate": 2.5546875e-05, "loss": 0.06391668319702148, "step": 437 }, { "epoch": 0.059214195183776935, "grad_norm": 2.3833532333374023, "learning_rate": 2.560546875e-05, "loss": 0.10647773742675781, "step": 438 }, { "epoch": 0.059349387410223914, "grad_norm": 3.94865083694458, "learning_rate": 2.56640625e-05, "loss": 0.11064720153808594, "step": 439 }, { "epoch": 0.05948457963667089, "grad_norm": 2.3660669326782227, "learning_rate": 2.5722656250000002e-05, "loss": 0.10022509098052979, "step": 440 }, { "epoch": 0.05961977186311787, "grad_norm": 1.014296531677246, "learning_rate": 2.578125e-05, "loss": 0.06762075424194336, "step": 441 }, { "epoch": 0.05975496408956485, "grad_norm": 0.9329538941383362, "learning_rate": 2.583984375e-05, "loss": 0.07561779022216797, "step": 442 }, { "epoch": 0.05989015631601183, "grad_norm": 2.7418301105499268, "learning_rate": 2.5898437500000002e-05, "loss": 0.10692977905273438, "step": 443 }, { "epoch": 0.060025348542458806, "grad_norm": 1.9461236000061035, "learning_rate": 2.595703125e-05, "loss": 0.08919239044189453, "step": 444 }, { "epoch": 0.060160540768905785, "grad_norm": 0.5753958821296692, "learning_rate": 2.6015625e-05, "loss": 0.1025991439819336, "step": 445 }, { "epoch": 0.06029573299535277, "grad_norm": 0.7839550375938416, "learning_rate": 2.607421875e-05, "loss": 0.06124305725097656, "step": 446 }, { "epoch": 0.06043092522179975, "grad_norm": 1.8102830648422241, "learning_rate": 2.61328125e-05, "loss": 0.12053298950195312, "step": 447 }, { "epoch": 0.06056611744824673, "grad_norm": 1.0977753400802612, "learning_rate": 2.619140625e-05, "loss": 0.07920074462890625, "step": 448 }, { "epoch": 0.060701309674693706, "grad_norm": 0.3934282064437866, "learning_rate": 2.625e-05, "loss": 0.09472990036010742, "step": 449 }, { "epoch": 0.060836501901140684, "grad_norm": 3.4103548526763916, "learning_rate": 2.6308593750000002e-05, "loss": 0.08573007583618164, "step": 450 }, { "epoch": 0.06097169412758766, "grad_norm": 4.015721321105957, "learning_rate": 2.63671875e-05, "loss": 0.09718036651611328, "step": 451 }, { "epoch": 0.06110688635403464, "grad_norm": 0.9889941215515137, "learning_rate": 2.642578125e-05, "loss": 0.06290102005004883, "step": 452 }, { "epoch": 0.06124207858048162, "grad_norm": 2.233210563659668, "learning_rate": 2.6484375000000002e-05, "loss": 0.11217117309570312, "step": 453 }, { "epoch": 0.061377270806928605, "grad_norm": 0.9601654410362244, "learning_rate": 2.654296875e-05, "loss": 0.07436752319335938, "step": 454 }, { "epoch": 0.06151246303337558, "grad_norm": 1.0864660739898682, "learning_rate": 2.66015625e-05, "loss": 0.10283088684082031, "step": 455 }, { "epoch": 0.06164765525982256, "grad_norm": 0.8739877939224243, "learning_rate": 2.666015625e-05, "loss": 0.09707832336425781, "step": 456 }, { "epoch": 0.06178284748626954, "grad_norm": 1.828110694885254, "learning_rate": 2.6718750000000002e-05, "loss": 0.09204626083374023, "step": 457 }, { "epoch": 0.06191803971271652, "grad_norm": 1.8099445104599, "learning_rate": 2.677734375e-05, "loss": 0.084808349609375, "step": 458 }, { "epoch": 0.0620532319391635, "grad_norm": 1.5391427278518677, "learning_rate": 2.68359375e-05, "loss": 0.06592464447021484, "step": 459 }, { "epoch": 0.062188424165610476, "grad_norm": 1.0284428596496582, "learning_rate": 2.6894531250000002e-05, "loss": 0.0708761215209961, "step": 460 }, { "epoch": 0.062323616392057454, "grad_norm": 1.2352813482284546, "learning_rate": 2.6953125e-05, "loss": 0.10225677490234375, "step": 461 }, { "epoch": 0.06245880861850443, "grad_norm": 0.6165651679039001, "learning_rate": 2.701171875e-05, "loss": 0.06885337829589844, "step": 462 }, { "epoch": 0.06259400084495141, "grad_norm": 1.559402346611023, "learning_rate": 2.70703125e-05, "loss": 0.11022090911865234, "step": 463 }, { "epoch": 0.06272919307139839, "grad_norm": 0.6769663691520691, "learning_rate": 2.712890625e-05, "loss": 0.0871734619140625, "step": 464 }, { "epoch": 0.06286438529784537, "grad_norm": 1.1884297132492065, "learning_rate": 2.71875e-05, "loss": 0.12218093872070312, "step": 465 }, { "epoch": 0.06299957752429235, "grad_norm": 1.8552953004837036, "learning_rate": 2.724609375e-05, "loss": 0.09126853942871094, "step": 466 }, { "epoch": 0.06313476975073934, "grad_norm": 3.1213207244873047, "learning_rate": 2.7304687500000002e-05, "loss": 0.12496566772460938, "step": 467 }, { "epoch": 0.06326996197718632, "grad_norm": 0.7156252861022949, "learning_rate": 2.736328125e-05, "loss": 0.07934856414794922, "step": 468 }, { "epoch": 0.0634051542036333, "grad_norm": 0.7846072912216187, "learning_rate": 2.7421875e-05, "loss": 0.08756685256958008, "step": 469 }, { "epoch": 0.06354034643008027, "grad_norm": 2.3148536682128906, "learning_rate": 2.7480468750000002e-05, "loss": 0.07439613342285156, "step": 470 }, { "epoch": 0.06367553865652725, "grad_norm": 5.831628799438477, "learning_rate": 2.75390625e-05, "loss": 0.11340999603271484, "step": 471 }, { "epoch": 0.06381073088297423, "grad_norm": 1.986263632774353, "learning_rate": 2.759765625e-05, "loss": 0.06171226501464844, "step": 472 }, { "epoch": 0.06394592310942121, "grad_norm": 3.016879081726074, "learning_rate": 2.765625e-05, "loss": 0.08681917190551758, "step": 473 }, { "epoch": 0.06408111533586819, "grad_norm": 0.9517513513565063, "learning_rate": 2.7714843750000002e-05, "loss": 0.07973384857177734, "step": 474 }, { "epoch": 0.06421630756231517, "grad_norm": 1.4492366313934326, "learning_rate": 2.77734375e-05, "loss": 0.057143211364746094, "step": 475 }, { "epoch": 0.06435149978876215, "grad_norm": 2.5386431217193604, "learning_rate": 2.783203125e-05, "loss": 0.0702970027923584, "step": 476 }, { "epoch": 0.06448669201520912, "grad_norm": 2.8083841800689697, "learning_rate": 2.7890625000000002e-05, "loss": 0.09427070617675781, "step": 477 }, { "epoch": 0.0646218842416561, "grad_norm": 2.863194227218628, "learning_rate": 2.794921875e-05, "loss": 0.1324458122253418, "step": 478 }, { "epoch": 0.06475707646810308, "grad_norm": 1.1017186641693115, "learning_rate": 2.80078125e-05, "loss": 0.07815027236938477, "step": 479 }, { "epoch": 0.06489226869455006, "grad_norm": 1.798361897468567, "learning_rate": 2.806640625e-05, "loss": 0.09364700317382812, "step": 480 }, { "epoch": 0.06502746092099704, "grad_norm": 0.5833501815795898, "learning_rate": 2.8125e-05, "loss": 0.058249473571777344, "step": 481 }, { "epoch": 0.06516265314744402, "grad_norm": 2.1234350204467773, "learning_rate": 2.818359375e-05, "loss": 0.08389091491699219, "step": 482 }, { "epoch": 0.065297845373891, "grad_norm": 2.4928934574127197, "learning_rate": 2.82421875e-05, "loss": 0.13441944122314453, "step": 483 }, { "epoch": 0.06543303760033799, "grad_norm": 2.3466925621032715, "learning_rate": 2.8300781250000002e-05, "loss": 0.15199065208435059, "step": 484 }, { "epoch": 0.06556822982678497, "grad_norm": 1.0683932304382324, "learning_rate": 2.8359375e-05, "loss": 0.12136125564575195, "step": 485 }, { "epoch": 0.06570342205323194, "grad_norm": 1.038776159286499, "learning_rate": 2.841796875e-05, "loss": 0.07250213623046875, "step": 486 }, { "epoch": 0.06583861427967892, "grad_norm": 1.1282142400741577, "learning_rate": 2.8476562500000002e-05, "loss": 0.09813117980957031, "step": 487 }, { "epoch": 0.0659738065061259, "grad_norm": 2.1990718841552734, "learning_rate": 2.853515625e-05, "loss": 0.1062326431274414, "step": 488 }, { "epoch": 0.06610899873257288, "grad_norm": 1.2486003637313843, "learning_rate": 2.859375e-05, "loss": 0.08194351196289062, "step": 489 }, { "epoch": 0.06624419095901986, "grad_norm": 1.2207564115524292, "learning_rate": 2.865234375e-05, "loss": 0.08218860626220703, "step": 490 }, { "epoch": 0.06637938318546684, "grad_norm": 0.8832204937934875, "learning_rate": 2.87109375e-05, "loss": 0.1184835433959961, "step": 491 }, { "epoch": 0.06651457541191381, "grad_norm": 0.8516805171966553, "learning_rate": 2.876953125e-05, "loss": 0.06719732284545898, "step": 492 }, { "epoch": 0.0666497676383608, "grad_norm": 3.5853073596954346, "learning_rate": 2.8828125e-05, "loss": 0.09199237823486328, "step": 493 }, { "epoch": 0.06678495986480777, "grad_norm": 2.824306011199951, "learning_rate": 2.8886718750000002e-05, "loss": 0.07332611083984375, "step": 494 }, { "epoch": 0.06692015209125475, "grad_norm": 2.6137425899505615, "learning_rate": 2.89453125e-05, "loss": 0.08916282653808594, "step": 495 }, { "epoch": 0.06705534431770173, "grad_norm": 0.6165325045585632, "learning_rate": 2.900390625e-05, "loss": 0.0583348274230957, "step": 496 }, { "epoch": 0.06719053654414871, "grad_norm": 1.457873821258545, "learning_rate": 2.90625e-05, "loss": 0.1300978660583496, "step": 497 }, { "epoch": 0.06732572877059569, "grad_norm": 0.6833505630493164, "learning_rate": 2.912109375e-05, "loss": 0.05837535858154297, "step": 498 }, { "epoch": 0.06746092099704266, "grad_norm": 0.8221961855888367, "learning_rate": 2.91796875e-05, "loss": 0.07027196884155273, "step": 499 }, { "epoch": 0.06759611322348964, "grad_norm": 0.7736613154411316, "learning_rate": 2.923828125e-05, "loss": 0.08208274841308594, "step": 500 }, { "epoch": 0.06773130544993664, "grad_norm": 0.4670673608779907, "learning_rate": 2.9296875000000002e-05, "loss": 0.0636606216430664, "step": 501 }, { "epoch": 0.06786649767638361, "grad_norm": 1.6846320629119873, "learning_rate": 2.935546875e-05, "loss": 0.07015705108642578, "step": 502 }, { "epoch": 0.06800168990283059, "grad_norm": 1.551141381263733, "learning_rate": 2.94140625e-05, "loss": 0.06168341636657715, "step": 503 }, { "epoch": 0.06813688212927757, "grad_norm": 0.6758229732513428, "learning_rate": 2.9472656250000002e-05, "loss": 0.09338092803955078, "step": 504 }, { "epoch": 0.06827207435572455, "grad_norm": 0.5300410985946655, "learning_rate": 2.953125e-05, "loss": 0.07401514053344727, "step": 505 }, { "epoch": 0.06840726658217153, "grad_norm": 1.1444934606552124, "learning_rate": 2.958984375e-05, "loss": 0.0963125228881836, "step": 506 }, { "epoch": 0.0685424588086185, "grad_norm": 0.7125914096832275, "learning_rate": 2.96484375e-05, "loss": 0.051430702209472656, "step": 507 }, { "epoch": 0.06867765103506548, "grad_norm": 0.7984012961387634, "learning_rate": 2.970703125e-05, "loss": 0.06642436981201172, "step": 508 }, { "epoch": 0.06881284326151246, "grad_norm": 1.0330177545547485, "learning_rate": 2.9765625e-05, "loss": 0.08015632629394531, "step": 509 }, { "epoch": 0.06894803548795944, "grad_norm": 0.9897464513778687, "learning_rate": 2.982421875e-05, "loss": 0.05432319641113281, "step": 510 }, { "epoch": 0.06908322771440642, "grad_norm": 1.3147339820861816, "learning_rate": 2.9882812500000002e-05, "loss": 0.09183740615844727, "step": 511 }, { "epoch": 0.0692184199408534, "grad_norm": 2.923064708709717, "learning_rate": 2.994140625e-05, "loss": 0.0987863540649414, "step": 512 }, { "epoch": 0.06935361216730038, "grad_norm": 2.062232732772827, "learning_rate": 3e-05, "loss": 0.06314706802368164, "step": 513 }, { "epoch": 0.06948880439374736, "grad_norm": 3.0970215797424316, "learning_rate": 2.9999998438460004e-05, "loss": 0.10694122314453125, "step": 514 }, { "epoch": 0.06962399662019433, "grad_norm": 0.9646059274673462, "learning_rate": 2.9999993753840344e-05, "loss": 0.04927396774291992, "step": 515 }, { "epoch": 0.06975918884664131, "grad_norm": 1.4721053838729858, "learning_rate": 2.9999985946141995e-05, "loss": 0.09446072578430176, "step": 516 }, { "epoch": 0.06989438107308829, "grad_norm": 1.6983895301818848, "learning_rate": 2.9999975015366586e-05, "loss": 0.07980012893676758, "step": 517 }, { "epoch": 0.07002957329953528, "grad_norm": 0.6937089562416077, "learning_rate": 2.9999960961516384e-05, "loss": 0.09464740753173828, "step": 518 }, { "epoch": 0.07016476552598226, "grad_norm": 1.892868995666504, "learning_rate": 2.9999943784594325e-05, "loss": 0.07539939880371094, "step": 519 }, { "epoch": 0.07029995775242924, "grad_norm": 3.4821112155914307, "learning_rate": 2.9999923484603975e-05, "loss": 0.08524513244628906, "step": 520 }, { "epoch": 0.07043514997887622, "grad_norm": 1.4239543676376343, "learning_rate": 2.999990006154957e-05, "loss": 0.09918570518493652, "step": 521 }, { "epoch": 0.0705703422053232, "grad_norm": 1.6605324745178223, "learning_rate": 2.9999873515435977e-05, "loss": 0.08107471466064453, "step": 522 }, { "epoch": 0.07070553443177018, "grad_norm": 3.454179286956787, "learning_rate": 2.9999843846268735e-05, "loss": 0.11911773681640625, "step": 523 }, { "epoch": 0.07084072665821715, "grad_norm": 3.45165753364563, "learning_rate": 2.9999811054054018e-05, "loss": 0.11735010147094727, "step": 524 }, { "epoch": 0.07097591888466413, "grad_norm": 4.04960298538208, "learning_rate": 2.9999775138798646e-05, "loss": 0.13319826126098633, "step": 525 }, { "epoch": 0.07111111111111111, "grad_norm": 1.6464530229568481, "learning_rate": 2.99997361005101e-05, "loss": 0.0993800163269043, "step": 526 }, { "epoch": 0.07124630333755809, "grad_norm": 1.0972402095794678, "learning_rate": 2.9999693939196513e-05, "loss": 0.09639739990234375, "step": 527 }, { "epoch": 0.07138149556400507, "grad_norm": 0.29994040727615356, "learning_rate": 2.999964865486666e-05, "loss": 0.04704856872558594, "step": 528 }, { "epoch": 0.07151668779045205, "grad_norm": 1.53493332862854, "learning_rate": 2.999960024752997e-05, "loss": 0.12227725982666016, "step": 529 }, { "epoch": 0.07165188001689902, "grad_norm": 1.67615807056427, "learning_rate": 2.9999548717196516e-05, "loss": 0.10790061950683594, "step": 530 }, { "epoch": 0.071787072243346, "grad_norm": 0.4506801962852478, "learning_rate": 2.999949406387703e-05, "loss": 0.06563091278076172, "step": 531 }, { "epoch": 0.07192226446979298, "grad_norm": 0.9371463060379028, "learning_rate": 2.9999436287582903e-05, "loss": 0.10554313659667969, "step": 532 }, { "epoch": 0.07205745669623996, "grad_norm": 2.043168783187866, "learning_rate": 2.9999375388326145e-05, "loss": 0.09876775741577148, "step": 533 }, { "epoch": 0.07219264892268694, "grad_norm": 1.5399434566497803, "learning_rate": 2.9999311366119447e-05, "loss": 0.057539939880371094, "step": 534 }, { "epoch": 0.07232784114913393, "grad_norm": 2.689091920852661, "learning_rate": 2.9999244220976137e-05, "loss": 0.11822700500488281, "step": 535 }, { "epoch": 0.07246303337558091, "grad_norm": 2.3212358951568604, "learning_rate": 2.9999173952910197e-05, "loss": 0.12038135528564453, "step": 536 }, { "epoch": 0.07259822560202789, "grad_norm": 1.0485259294509888, "learning_rate": 2.9999100561936252e-05, "loss": 0.10058116912841797, "step": 537 }, { "epoch": 0.07273341782847487, "grad_norm": 1.9497294425964355, "learning_rate": 2.9999024048069585e-05, "loss": 0.05536365509033203, "step": 538 }, { "epoch": 0.07286861005492185, "grad_norm": 2.2156689167022705, "learning_rate": 2.9998944411326127e-05, "loss": 0.07976913452148438, "step": 539 }, { "epoch": 0.07300380228136882, "grad_norm": 1.8322675228118896, "learning_rate": 2.999886165172246e-05, "loss": 0.07038617134094238, "step": 540 }, { "epoch": 0.0731389945078158, "grad_norm": 2.59919810295105, "learning_rate": 2.9998775769275814e-05, "loss": 0.1098175048828125, "step": 541 }, { "epoch": 0.07327418673426278, "grad_norm": 0.9037202000617981, "learning_rate": 2.9998686764004067e-05, "loss": 0.10444879531860352, "step": 542 }, { "epoch": 0.07340937896070976, "grad_norm": 1.077059030532837, "learning_rate": 2.9998594635925755e-05, "loss": 0.07860016822814941, "step": 543 }, { "epoch": 0.07354457118715674, "grad_norm": 1.3018779754638672, "learning_rate": 2.999849938506005e-05, "loss": 0.0791769027709961, "step": 544 }, { "epoch": 0.07367976341360372, "grad_norm": 0.6867576241493225, "learning_rate": 2.99984010114268e-05, "loss": 0.07667970657348633, "step": 545 }, { "epoch": 0.0738149556400507, "grad_norm": 0.7375957369804382, "learning_rate": 2.9998299515046475e-05, "loss": 0.07518553733825684, "step": 546 }, { "epoch": 0.07395014786649767, "grad_norm": 0.5696337223052979, "learning_rate": 2.9998194895940213e-05, "loss": 0.09038829803466797, "step": 547 }, { "epoch": 0.07408534009294465, "grad_norm": 0.7057362794876099, "learning_rate": 2.9998087154129792e-05, "loss": 0.06936836242675781, "step": 548 }, { "epoch": 0.07422053231939163, "grad_norm": 2.4675979614257812, "learning_rate": 2.9997976289637645e-05, "loss": 0.08766460418701172, "step": 549 }, { "epoch": 0.07435572454583861, "grad_norm": 0.5624940395355225, "learning_rate": 2.9997862302486855e-05, "loss": 0.06978332996368408, "step": 550 }, { "epoch": 0.07449091677228559, "grad_norm": 0.6041243672370911, "learning_rate": 2.9997745192701153e-05, "loss": 0.07013988494873047, "step": 551 }, { "epoch": 0.07462610899873258, "grad_norm": 0.5061952471733093, "learning_rate": 2.9997624960304926e-05, "loss": 0.061765193939208984, "step": 552 }, { "epoch": 0.07476130122517956, "grad_norm": 0.9293647408485413, "learning_rate": 2.9997501605323214e-05, "loss": 0.052285194396972656, "step": 553 }, { "epoch": 0.07489649345162654, "grad_norm": 2.0838630199432373, "learning_rate": 2.999737512778168e-05, "loss": 0.08160591125488281, "step": 554 }, { "epoch": 0.07503168567807351, "grad_norm": 0.5081800818443298, "learning_rate": 2.9997245527706674e-05, "loss": 0.07192039489746094, "step": 555 }, { "epoch": 0.0751668779045205, "grad_norm": 0.5362699031829834, "learning_rate": 2.999711280512517e-05, "loss": 0.03781700134277344, "step": 556 }, { "epoch": 0.07530207013096747, "grad_norm": 0.8696369528770447, "learning_rate": 2.9996976960064807e-05, "loss": 0.11155176162719727, "step": 557 }, { "epoch": 0.07543726235741445, "grad_norm": 0.7809555530548096, "learning_rate": 2.999683799255387e-05, "loss": 0.09868621826171875, "step": 558 }, { "epoch": 0.07557245458386143, "grad_norm": 0.7617474794387817, "learning_rate": 2.999669590262129e-05, "loss": 0.06615638732910156, "step": 559 }, { "epoch": 0.07570764681030841, "grad_norm": 1.0787569284439087, "learning_rate": 2.999655069029665e-05, "loss": 0.0860823392868042, "step": 560 }, { "epoch": 0.07584283903675539, "grad_norm": 0.6820898056030273, "learning_rate": 2.9996402355610183e-05, "loss": 0.068115234375, "step": 561 }, { "epoch": 0.07597803126320236, "grad_norm": 0.7910268306732178, "learning_rate": 2.9996250898592777e-05, "loss": 0.15956687927246094, "step": 562 }, { "epoch": 0.07611322348964934, "grad_norm": 2.779169797897339, "learning_rate": 2.9996096319275962e-05, "loss": 0.10142803192138672, "step": 563 }, { "epoch": 0.07624841571609632, "grad_norm": 2.24088454246521, "learning_rate": 2.9995938617691925e-05, "loss": 0.10528564453125, "step": 564 }, { "epoch": 0.0763836079425433, "grad_norm": 1.1034523248672485, "learning_rate": 2.9995777793873504e-05, "loss": 0.062438011169433594, "step": 565 }, { "epoch": 0.07651880016899028, "grad_norm": 0.7434017658233643, "learning_rate": 2.9995613847854176e-05, "loss": 0.08405876159667969, "step": 566 }, { "epoch": 0.07665399239543726, "grad_norm": 1.4872692823410034, "learning_rate": 2.9995446779668078e-05, "loss": 0.07397747039794922, "step": 567 }, { "epoch": 0.07678918462188423, "grad_norm": 0.9276036620140076, "learning_rate": 2.9995276589349992e-05, "loss": 0.08028507232666016, "step": 568 }, { "epoch": 0.07692437684833123, "grad_norm": 2.156606912612915, "learning_rate": 2.9995103276935357e-05, "loss": 0.11402225494384766, "step": 569 }, { "epoch": 0.0770595690747782, "grad_norm": 0.6237265467643738, "learning_rate": 2.9994926842460258e-05, "loss": 0.062169551849365234, "step": 570 }, { "epoch": 0.07719476130122518, "grad_norm": 1.6970484256744385, "learning_rate": 2.9994747285961428e-05, "loss": 0.15056133270263672, "step": 571 }, { "epoch": 0.07732995352767216, "grad_norm": 0.6036470532417297, "learning_rate": 2.9994564607476255e-05, "loss": 0.06957483291625977, "step": 572 }, { "epoch": 0.07746514575411914, "grad_norm": 1.0580317974090576, "learning_rate": 2.9994378807042762e-05, "loss": 0.07123374938964844, "step": 573 }, { "epoch": 0.07760033798056612, "grad_norm": 1.689844012260437, "learning_rate": 2.9994189884699647e-05, "loss": 0.0945281982421875, "step": 574 }, { "epoch": 0.0777355302070131, "grad_norm": 1.0948611497879028, "learning_rate": 2.9993997840486233e-05, "loss": 0.07328128814697266, "step": 575 }, { "epoch": 0.07787072243346008, "grad_norm": 2.237011432647705, "learning_rate": 2.9993802674442516e-05, "loss": 0.0851888656616211, "step": 576 }, { "epoch": 0.07800591465990706, "grad_norm": 0.6065527200698853, "learning_rate": 2.999360438660913e-05, "loss": 0.10452127456665039, "step": 577 }, { "epoch": 0.07814110688635403, "grad_norm": 1.1571671962738037, "learning_rate": 2.9993402977027346e-05, "loss": 0.09570884704589844, "step": 578 }, { "epoch": 0.07827629911280101, "grad_norm": 1.6818926334381104, "learning_rate": 2.999319844573911e-05, "loss": 0.10058212280273438, "step": 579 }, { "epoch": 0.07841149133924799, "grad_norm": 1.6151068210601807, "learning_rate": 2.9992990792787007e-05, "loss": 0.10038471221923828, "step": 580 }, { "epoch": 0.07854668356569497, "grad_norm": 0.8078140616416931, "learning_rate": 2.999278001821427e-05, "loss": 0.08383822441101074, "step": 581 }, { "epoch": 0.07868187579214195, "grad_norm": 1.0977928638458252, "learning_rate": 2.9992566122064775e-05, "loss": 0.10461854934692383, "step": 582 }, { "epoch": 0.07881706801858893, "grad_norm": 0.49909019470214844, "learning_rate": 2.999234910438307e-05, "loss": 0.0581972599029541, "step": 583 }, { "epoch": 0.0789522602450359, "grad_norm": 1.8760097026824951, "learning_rate": 2.999212896521433e-05, "loss": 0.08929443359375, "step": 584 }, { "epoch": 0.07908745247148288, "grad_norm": 2.0165154933929443, "learning_rate": 2.999190570460439e-05, "loss": 0.10365009307861328, "step": 585 }, { "epoch": 0.07922264469792988, "grad_norm": 0.3590303957462311, "learning_rate": 2.9991679322599734e-05, "loss": 0.0414276123046875, "step": 586 }, { "epoch": 0.07935783692437685, "grad_norm": 1.8026360273361206, "learning_rate": 2.9991449819247505e-05, "loss": 0.08381783962249756, "step": 587 }, { "epoch": 0.07949302915082383, "grad_norm": 1.1399129629135132, "learning_rate": 2.9991217194595474e-05, "loss": 0.07933759689331055, "step": 588 }, { "epoch": 0.07962822137727081, "grad_norm": 1.8184621334075928, "learning_rate": 2.9990981448692078e-05, "loss": 0.08505058288574219, "step": 589 }, { "epoch": 0.07976341360371779, "grad_norm": 0.7170165777206421, "learning_rate": 2.999074258158641e-05, "loss": 0.08453178405761719, "step": 590 }, { "epoch": 0.07989860583016477, "grad_norm": 1.4948357343673706, "learning_rate": 2.9990500593328192e-05, "loss": 0.08392477035522461, "step": 591 }, { "epoch": 0.08003379805661175, "grad_norm": 2.0151801109313965, "learning_rate": 2.999025548396781e-05, "loss": 0.06732606887817383, "step": 592 }, { "epoch": 0.08016899028305872, "grad_norm": 1.5872349739074707, "learning_rate": 2.9990007253556302e-05, "loss": 0.08662223815917969, "step": 593 }, { "epoch": 0.0803041825095057, "grad_norm": 0.9635038375854492, "learning_rate": 2.9989755902145345e-05, "loss": 0.08410263061523438, "step": 594 }, { "epoch": 0.08043937473595268, "grad_norm": 0.6852844953536987, "learning_rate": 2.9989501429787273e-05, "loss": 0.07874822616577148, "step": 595 }, { "epoch": 0.08057456696239966, "grad_norm": 0.37786686420440674, "learning_rate": 2.9989243836535073e-05, "loss": 0.0660257339477539, "step": 596 }, { "epoch": 0.08070975918884664, "grad_norm": 0.45019659399986267, "learning_rate": 2.998898312244237e-05, "loss": 0.07022261619567871, "step": 597 }, { "epoch": 0.08084495141529362, "grad_norm": 0.33326104283332825, "learning_rate": 2.9988719287563452e-05, "loss": 0.062285423278808594, "step": 598 }, { "epoch": 0.0809801436417406, "grad_norm": 0.47920557856559753, "learning_rate": 2.998845233195325e-05, "loss": 0.06540918350219727, "step": 599 }, { "epoch": 0.08111533586818757, "grad_norm": 0.6676343083381653, "learning_rate": 2.998818225566734e-05, "loss": 0.10930967330932617, "step": 600 }, { "epoch": 0.08125052809463455, "grad_norm": 1.1839637756347656, "learning_rate": 2.998790905876196e-05, "loss": 0.12560462951660156, "step": 601 }, { "epoch": 0.08138572032108153, "grad_norm": 0.7003291845321655, "learning_rate": 2.9987632741293987e-05, "loss": 0.07976245880126953, "step": 602 }, { "epoch": 0.08152091254752852, "grad_norm": 0.6434202194213867, "learning_rate": 2.998735330332096e-05, "loss": 0.09475851058959961, "step": 603 }, { "epoch": 0.0816561047739755, "grad_norm": 1.0487202405929565, "learning_rate": 2.9987070744901046e-05, "loss": 0.08417701721191406, "step": 604 }, { "epoch": 0.08179129700042248, "grad_norm": 1.0852630138397217, "learning_rate": 2.9986785066093084e-05, "loss": 0.10083460807800293, "step": 605 }, { "epoch": 0.08192648922686946, "grad_norm": 0.45663532614707947, "learning_rate": 2.9986496266956556e-05, "loss": 0.06810188293457031, "step": 606 }, { "epoch": 0.08206168145331644, "grad_norm": 1.3907688856124878, "learning_rate": 2.9986204347551583e-05, "loss": 0.11536788940429688, "step": 607 }, { "epoch": 0.08219687367976342, "grad_norm": 0.9038923382759094, "learning_rate": 2.9985909307938948e-05, "loss": 0.06625890731811523, "step": 608 }, { "epoch": 0.0823320659062104, "grad_norm": 0.8186825513839722, "learning_rate": 2.9985611148180082e-05, "loss": 0.08992385864257812, "step": 609 }, { "epoch": 0.08246725813265737, "grad_norm": 0.49454689025878906, "learning_rate": 2.9985309868337063e-05, "loss": 0.08014965057373047, "step": 610 }, { "epoch": 0.08260245035910435, "grad_norm": 0.7636528015136719, "learning_rate": 2.9985005468472617e-05, "loss": 0.06519794464111328, "step": 611 }, { "epoch": 0.08273764258555133, "grad_norm": 0.5517367720603943, "learning_rate": 2.9984697948650124e-05, "loss": 0.0753469467163086, "step": 612 }, { "epoch": 0.08287283481199831, "grad_norm": 1.6862266063690186, "learning_rate": 2.998438730893361e-05, "loss": 0.14296340942382812, "step": 613 }, { "epoch": 0.08300802703844529, "grad_norm": 1.0018746852874756, "learning_rate": 2.9984073549387747e-05, "loss": 0.09372568130493164, "step": 614 }, { "epoch": 0.08314321926489227, "grad_norm": 0.5324258804321289, "learning_rate": 2.998375667007787e-05, "loss": 0.10657310485839844, "step": 615 }, { "epoch": 0.08327841149133924, "grad_norm": 0.5407150983810425, "learning_rate": 2.998343667106995e-05, "loss": 0.06703996658325195, "step": 616 }, { "epoch": 0.08341360371778622, "grad_norm": 0.7302781343460083, "learning_rate": 2.9983113552430616e-05, "loss": 0.08555793762207031, "step": 617 }, { "epoch": 0.0835487959442332, "grad_norm": 1.0875376462936401, "learning_rate": 2.9982787314227134e-05, "loss": 0.0678713321685791, "step": 618 }, { "epoch": 0.08368398817068018, "grad_norm": 0.9090947508811951, "learning_rate": 2.998245795652744e-05, "loss": 0.06933879852294922, "step": 619 }, { "epoch": 0.08381918039712717, "grad_norm": 1.000343680381775, "learning_rate": 2.9982125479400106e-05, "loss": 0.07668018341064453, "step": 620 }, { "epoch": 0.08395437262357415, "grad_norm": 1.2728577852249146, "learning_rate": 2.9981789882914352e-05, "loss": 0.05016660690307617, "step": 621 }, { "epoch": 0.08408956485002113, "grad_norm": 0.5821818709373474, "learning_rate": 2.9981451167140048e-05, "loss": 0.07403802871704102, "step": 622 }, { "epoch": 0.08422475707646811, "grad_norm": 0.7069630026817322, "learning_rate": 2.9981109332147722e-05, "loss": 0.09298419952392578, "step": 623 }, { "epoch": 0.08435994930291509, "grad_norm": 2.1008784770965576, "learning_rate": 2.9980764378008545e-05, "loss": 0.09131908416748047, "step": 624 }, { "epoch": 0.08449514152936206, "grad_norm": 0.5936189293861389, "learning_rate": 2.9980416304794332e-05, "loss": 0.07242202758789062, "step": 625 }, { "epoch": 0.08463033375580904, "grad_norm": 0.5418726801872253, "learning_rate": 2.9980065112577565e-05, "loss": 0.07906627655029297, "step": 626 }, { "epoch": 0.08476552598225602, "grad_norm": 1.0324081182479858, "learning_rate": 2.9979710801431357e-05, "loss": 0.07719612121582031, "step": 627 }, { "epoch": 0.084900718208703, "grad_norm": 0.7520241737365723, "learning_rate": 2.997935337142948e-05, "loss": 0.06949472427368164, "step": 628 }, { "epoch": 0.08503591043514998, "grad_norm": 0.7859668135643005, "learning_rate": 2.9978992822646347e-05, "loss": 0.08112668991088867, "step": 629 }, { "epoch": 0.08517110266159696, "grad_norm": 0.7527902126312256, "learning_rate": 2.9978629155157036e-05, "loss": 0.08897209167480469, "step": 630 }, { "epoch": 0.08530629488804393, "grad_norm": 1.6249278783798218, "learning_rate": 2.9978262369037252e-05, "loss": 0.10718822479248047, "step": 631 }, { "epoch": 0.08544148711449091, "grad_norm": 1.6737232208251953, "learning_rate": 2.9977892464363375e-05, "loss": 0.09795284271240234, "step": 632 }, { "epoch": 0.08557667934093789, "grad_norm": 0.856467604637146, "learning_rate": 2.9977519441212412e-05, "loss": 0.12327003479003906, "step": 633 }, { "epoch": 0.08571187156738487, "grad_norm": 1.5771981477737427, "learning_rate": 2.9977143299662034e-05, "loss": 0.08191108703613281, "step": 634 }, { "epoch": 0.08584706379383185, "grad_norm": 1.770072102546692, "learning_rate": 2.997676403979055e-05, "loss": 0.11481571197509766, "step": 635 }, { "epoch": 0.08598225602027883, "grad_norm": 1.2036272287368774, "learning_rate": 2.997638166167693e-05, "loss": 0.10878849029541016, "step": 636 }, { "epoch": 0.08611744824672582, "grad_norm": 0.8349308967590332, "learning_rate": 2.9975996165400786e-05, "loss": 0.061860084533691406, "step": 637 }, { "epoch": 0.0862526404731728, "grad_norm": 1.7418475151062012, "learning_rate": 2.9975607551042373e-05, "loss": 0.06311798095703125, "step": 638 }, { "epoch": 0.08638783269961978, "grad_norm": 1.278281569480896, "learning_rate": 2.9975215818682607e-05, "loss": 0.09191131591796875, "step": 639 }, { "epoch": 0.08652302492606675, "grad_norm": 1.303808569908142, "learning_rate": 2.9974820968403056e-05, "loss": 0.1275162696838379, "step": 640 }, { "epoch": 0.08665821715251373, "grad_norm": 3.023681640625, "learning_rate": 2.9974423000285923e-05, "loss": 0.12473678588867188, "step": 641 }, { "epoch": 0.08679340937896071, "grad_norm": 2.977800130844116, "learning_rate": 2.9974021914414068e-05, "loss": 0.07272624969482422, "step": 642 }, { "epoch": 0.08692860160540769, "grad_norm": 3.0921504497528076, "learning_rate": 2.9973617710871e-05, "loss": 0.14291000366210938, "step": 643 }, { "epoch": 0.08706379383185467, "grad_norm": 0.6507948637008667, "learning_rate": 2.997321038974087e-05, "loss": 0.093414306640625, "step": 644 }, { "epoch": 0.08719898605830165, "grad_norm": 1.0436996221542358, "learning_rate": 2.997279995110849e-05, "loss": 0.10811042785644531, "step": 645 }, { "epoch": 0.08733417828474863, "grad_norm": 0.9280104041099548, "learning_rate": 2.997238639505932e-05, "loss": 0.08973407745361328, "step": 646 }, { "epoch": 0.0874693705111956, "grad_norm": 0.880110502243042, "learning_rate": 2.997196972167946e-05, "loss": 0.06839847564697266, "step": 647 }, { "epoch": 0.08760456273764258, "grad_norm": 0.6661954522132874, "learning_rate": 2.9971549931055665e-05, "loss": 0.09742164611816406, "step": 648 }, { "epoch": 0.08773975496408956, "grad_norm": 0.5180718302726746, "learning_rate": 2.997112702327533e-05, "loss": 0.0813608169555664, "step": 649 }, { "epoch": 0.08787494719053654, "grad_norm": 0.32702475786209106, "learning_rate": 2.9970700998426518e-05, "loss": 0.05808305740356445, "step": 650 }, { "epoch": 0.08801013941698352, "grad_norm": 2.2169876098632812, "learning_rate": 2.9970271856597925e-05, "loss": 0.12061214447021484, "step": 651 }, { "epoch": 0.0881453316434305, "grad_norm": 0.3961057662963867, "learning_rate": 2.9969839597878896e-05, "loss": 0.04576587677001953, "step": 652 }, { "epoch": 0.08828052386987748, "grad_norm": 0.618942141532898, "learning_rate": 2.9969404222359436e-05, "loss": 0.08178424835205078, "step": 653 }, { "epoch": 0.08841571609632447, "grad_norm": 0.7832603454589844, "learning_rate": 2.9968965730130188e-05, "loss": 0.06732845306396484, "step": 654 }, { "epoch": 0.08855090832277145, "grad_norm": 1.4064054489135742, "learning_rate": 2.9968524121282455e-05, "loss": 0.09905624389648438, "step": 655 }, { "epoch": 0.08868610054921842, "grad_norm": 0.720722496509552, "learning_rate": 2.9968079395908178e-05, "loss": 0.04322624206542969, "step": 656 }, { "epoch": 0.0888212927756654, "grad_norm": 0.9421080350875854, "learning_rate": 2.9967631554099947e-05, "loss": 0.059384822845458984, "step": 657 }, { "epoch": 0.08895648500211238, "grad_norm": 1.0023547410964966, "learning_rate": 2.996718059595101e-05, "loss": 0.11681890487670898, "step": 658 }, { "epoch": 0.08909167722855936, "grad_norm": 0.48367545008659363, "learning_rate": 2.9966726521555265e-05, "loss": 0.053379058837890625, "step": 659 }, { "epoch": 0.08922686945500634, "grad_norm": 0.7467041611671448, "learning_rate": 2.996626933100724e-05, "loss": 0.09331703186035156, "step": 660 }, { "epoch": 0.08936206168145332, "grad_norm": 0.46111801266670227, "learning_rate": 2.996580902440213e-05, "loss": 0.05901908874511719, "step": 661 }, { "epoch": 0.0894972539079003, "grad_norm": 0.40190592408180237, "learning_rate": 2.9965345601835773e-05, "loss": 0.05935859680175781, "step": 662 }, { "epoch": 0.08963244613434727, "grad_norm": 1.0780490636825562, "learning_rate": 2.996487906340466e-05, "loss": 0.08236527442932129, "step": 663 }, { "epoch": 0.08976763836079425, "grad_norm": 0.5780622959136963, "learning_rate": 2.996440940920592e-05, "loss": 0.04762125015258789, "step": 664 }, { "epoch": 0.08990283058724123, "grad_norm": 0.5727117657661438, "learning_rate": 2.996393663933735e-05, "loss": 0.08030319213867188, "step": 665 }, { "epoch": 0.09003802281368821, "grad_norm": 1.2621744871139526, "learning_rate": 2.9963460753897364e-05, "loss": 0.08369064331054688, "step": 666 }, { "epoch": 0.09017321504013519, "grad_norm": 1.0983774662017822, "learning_rate": 2.996298175298506e-05, "loss": 0.0863642692565918, "step": 667 }, { "epoch": 0.09030840726658217, "grad_norm": 0.49387016892433167, "learning_rate": 2.996249963670016e-05, "loss": 0.09246349334716797, "step": 668 }, { "epoch": 0.09044359949302914, "grad_norm": 0.6467683911323547, "learning_rate": 2.9962014405143042e-05, "loss": 0.09410476684570312, "step": 669 }, { "epoch": 0.09057879171947612, "grad_norm": 1.23124361038208, "learning_rate": 2.9961526058414745e-05, "loss": 0.09699106216430664, "step": 670 }, { "epoch": 0.09071398394592312, "grad_norm": 0.7838457822799683, "learning_rate": 2.9961034596616936e-05, "loss": 0.0928945541381836, "step": 671 }, { "epoch": 0.0908491761723701, "grad_norm": 0.22324122488498688, "learning_rate": 2.996054001985194e-05, "loss": 0.03599286079406738, "step": 672 }, { "epoch": 0.09098436839881707, "grad_norm": 0.8104805946350098, "learning_rate": 2.9960042328222732e-05, "loss": 0.0686802864074707, "step": 673 }, { "epoch": 0.09111956062526405, "grad_norm": 0.5143696665763855, "learning_rate": 2.995954152183294e-05, "loss": 0.04830813407897949, "step": 674 }, { "epoch": 0.09125475285171103, "grad_norm": 1.3214099407196045, "learning_rate": 2.9959037600786822e-05, "loss": 0.10090303421020508, "step": 675 }, { "epoch": 0.09138994507815801, "grad_norm": 0.6124051809310913, "learning_rate": 2.9958530565189307e-05, "loss": 0.07726764678955078, "step": 676 }, { "epoch": 0.09152513730460499, "grad_norm": 1.5081795454025269, "learning_rate": 2.995802041514596e-05, "loss": 0.08885860443115234, "step": 677 }, { "epoch": 0.09166032953105197, "grad_norm": 0.8456224799156189, "learning_rate": 2.9957507150762996e-05, "loss": 0.06396722793579102, "step": 678 }, { "epoch": 0.09179552175749894, "grad_norm": 2.4486069679260254, "learning_rate": 2.9956990772147283e-05, "loss": 0.09366321563720703, "step": 679 }, { "epoch": 0.09193071398394592, "grad_norm": 0.7520796060562134, "learning_rate": 2.9956471279406324e-05, "loss": 0.08934783935546875, "step": 680 }, { "epoch": 0.0920659062103929, "grad_norm": 1.2322204113006592, "learning_rate": 2.9955948672648298e-05, "loss": 0.06775808334350586, "step": 681 }, { "epoch": 0.09220109843683988, "grad_norm": 2.079535961151123, "learning_rate": 2.9955422951981994e-05, "loss": 0.09503650665283203, "step": 682 }, { "epoch": 0.09233629066328686, "grad_norm": 2.127537250518799, "learning_rate": 2.995489411751688e-05, "loss": 0.08817386627197266, "step": 683 }, { "epoch": 0.09247148288973384, "grad_norm": 1.3528122901916504, "learning_rate": 2.9954362169363064e-05, "loss": 0.17700481414794922, "step": 684 }, { "epoch": 0.09260667511618081, "grad_norm": 2.128573179244995, "learning_rate": 2.99538271076313e-05, "loss": 0.10328292846679688, "step": 685 }, { "epoch": 0.09274186734262779, "grad_norm": 0.7017504572868347, "learning_rate": 2.9953288932432985e-05, "loss": 0.08335733413696289, "step": 686 }, { "epoch": 0.09287705956907477, "grad_norm": 1.8741538524627686, "learning_rate": 2.995274764388018e-05, "loss": 0.09028053283691406, "step": 687 }, { "epoch": 0.09301225179552176, "grad_norm": 0.9078563451766968, "learning_rate": 2.9952203242085566e-05, "loss": 0.06004953384399414, "step": 688 }, { "epoch": 0.09314744402196874, "grad_norm": 1.0448877811431885, "learning_rate": 2.995165572716251e-05, "loss": 0.09534072875976562, "step": 689 }, { "epoch": 0.09328263624841572, "grad_norm": 0.7695907950401306, "learning_rate": 2.9951105099225003e-05, "loss": 0.08654117584228516, "step": 690 }, { "epoch": 0.0934178284748627, "grad_norm": 0.4433549642562866, "learning_rate": 2.995055135838768e-05, "loss": 0.05588960647583008, "step": 691 }, { "epoch": 0.09355302070130968, "grad_norm": 0.5443503856658936, "learning_rate": 2.994999450476584e-05, "loss": 0.10817337036132812, "step": 692 }, { "epoch": 0.09368821292775666, "grad_norm": 0.6996601223945618, "learning_rate": 2.9949434538475414e-05, "loss": 0.08685922622680664, "step": 693 }, { "epoch": 0.09382340515420363, "grad_norm": 0.4138985574245453, "learning_rate": 2.9948871459633008e-05, "loss": 0.07134532928466797, "step": 694 }, { "epoch": 0.09395859738065061, "grad_norm": 0.5843410491943359, "learning_rate": 2.994830526835584e-05, "loss": 0.09716653823852539, "step": 695 }, { "epoch": 0.09409378960709759, "grad_norm": 0.43319714069366455, "learning_rate": 2.9947735964761803e-05, "loss": 0.08893775939941406, "step": 696 }, { "epoch": 0.09422898183354457, "grad_norm": 0.7768867015838623, "learning_rate": 2.9947163548969428e-05, "loss": 0.08929061889648438, "step": 697 }, { "epoch": 0.09436417405999155, "grad_norm": 0.18085744976997375, "learning_rate": 2.9946588021097893e-05, "loss": 0.03959512710571289, "step": 698 }, { "epoch": 0.09449936628643853, "grad_norm": 0.6254941821098328, "learning_rate": 2.9946009381267028e-05, "loss": 0.03760814666748047, "step": 699 }, { "epoch": 0.0946345585128855, "grad_norm": 0.48299524188041687, "learning_rate": 2.9945427629597306e-05, "loss": 0.0595393180847168, "step": 700 }, { "epoch": 0.09476975073933248, "grad_norm": 1.3369132280349731, "learning_rate": 2.9944842766209853e-05, "loss": 0.08656501770019531, "step": 701 }, { "epoch": 0.09490494296577946, "grad_norm": 0.4847789704799652, "learning_rate": 2.9944254791226444e-05, "loss": 0.09263086318969727, "step": 702 }, { "epoch": 0.09504013519222644, "grad_norm": 0.9162623882293701, "learning_rate": 2.994366370476949e-05, "loss": 0.11437129974365234, "step": 703 }, { "epoch": 0.09517532741867342, "grad_norm": 1.4687793254852295, "learning_rate": 2.9943069506962067e-05, "loss": 0.09958362579345703, "step": 704 }, { "epoch": 0.09531051964512041, "grad_norm": 0.5143060684204102, "learning_rate": 2.9942472197927886e-05, "loss": 0.05807340145111084, "step": 705 }, { "epoch": 0.09544571187156739, "grad_norm": 1.066678524017334, "learning_rate": 2.994187177779131e-05, "loss": 0.11996746063232422, "step": 706 }, { "epoch": 0.09558090409801437, "grad_norm": 0.4829036593437195, "learning_rate": 2.9941268246677353e-05, "loss": 0.05228614807128906, "step": 707 }, { "epoch": 0.09571609632446135, "grad_norm": 0.8587409853935242, "learning_rate": 2.9940661604711664e-05, "loss": 0.0664682388305664, "step": 708 }, { "epoch": 0.09585128855090833, "grad_norm": 1.2800363302230835, "learning_rate": 2.994005185202056e-05, "loss": 0.07017230987548828, "step": 709 }, { "epoch": 0.0959864807773553, "grad_norm": 1.7786747217178345, "learning_rate": 2.9939438988730986e-05, "loss": 0.08867454528808594, "step": 710 }, { "epoch": 0.09612167300380228, "grad_norm": 0.5550428628921509, "learning_rate": 2.9938823014970553e-05, "loss": 0.08253812789916992, "step": 711 }, { "epoch": 0.09625686523024926, "grad_norm": 3.2433102130889893, "learning_rate": 2.99382039308675e-05, "loss": 0.10310506820678711, "step": 712 }, { "epoch": 0.09639205745669624, "grad_norm": 2.4590277671813965, "learning_rate": 2.993758173655073e-05, "loss": 0.12313079833984375, "step": 713 }, { "epoch": 0.09652724968314322, "grad_norm": 1.180315375328064, "learning_rate": 2.993695643214979e-05, "loss": 0.05230712890625, "step": 714 }, { "epoch": 0.0966624419095902, "grad_norm": 1.1300855875015259, "learning_rate": 2.9936328017794864e-05, "loss": 0.09466934204101562, "step": 715 }, { "epoch": 0.09679763413603718, "grad_norm": 1.2884372472763062, "learning_rate": 2.9935696493616796e-05, "loss": 0.08497238159179688, "step": 716 }, { "epoch": 0.09693282636248415, "grad_norm": 1.4714183807373047, "learning_rate": 2.9935061859747065e-05, "loss": 0.08114802837371826, "step": 717 }, { "epoch": 0.09706801858893113, "grad_norm": 1.5180643796920776, "learning_rate": 2.993442411631782e-05, "loss": 0.06940174102783203, "step": 718 }, { "epoch": 0.09720321081537811, "grad_norm": 1.6803812980651855, "learning_rate": 2.9933783263461827e-05, "loss": 0.0589139461517334, "step": 719 }, { "epoch": 0.09733840304182509, "grad_norm": 1.2122961282730103, "learning_rate": 2.9933139301312526e-05, "loss": 0.0832834243774414, "step": 720 }, { "epoch": 0.09747359526827207, "grad_norm": 0.6643701791763306, "learning_rate": 2.9932492230003984e-05, "loss": 0.051471710205078125, "step": 721 }, { "epoch": 0.09760878749471906, "grad_norm": 0.9927834868431091, "learning_rate": 2.993184204967094e-05, "loss": 0.0924386978149414, "step": 722 }, { "epoch": 0.09774397972116604, "grad_norm": 0.8113033771514893, "learning_rate": 2.9931188760448748e-05, "loss": 0.0583806037902832, "step": 723 }, { "epoch": 0.09787917194761302, "grad_norm": 1.3883392810821533, "learning_rate": 2.9930532362473433e-05, "loss": 0.09034204483032227, "step": 724 }, { "epoch": 0.09801436417406, "grad_norm": 0.8473360538482666, "learning_rate": 2.9929872855881663e-05, "loss": 0.06917667388916016, "step": 725 }, { "epoch": 0.09814955640050697, "grad_norm": 0.36346328258514404, "learning_rate": 2.9929210240810744e-05, "loss": 0.08127927780151367, "step": 726 }, { "epoch": 0.09828474862695395, "grad_norm": 0.8365683555603027, "learning_rate": 2.9928544517398644e-05, "loss": 0.0607752799987793, "step": 727 }, { "epoch": 0.09841994085340093, "grad_norm": 0.430542916059494, "learning_rate": 2.9927875685783966e-05, "loss": 0.06436395645141602, "step": 728 }, { "epoch": 0.09855513307984791, "grad_norm": 0.6196367740631104, "learning_rate": 2.9927203746105968e-05, "loss": 0.07642745971679688, "step": 729 }, { "epoch": 0.09869032530629489, "grad_norm": 2.0761096477508545, "learning_rate": 2.9926528698504546e-05, "loss": 0.10896492004394531, "step": 730 }, { "epoch": 0.09882551753274187, "grad_norm": 0.8039914965629578, "learning_rate": 2.992585054312025e-05, "loss": 0.09295988082885742, "step": 731 }, { "epoch": 0.09896070975918884, "grad_norm": 0.9510341286659241, "learning_rate": 2.9925169280094278e-05, "loss": 0.1031789779663086, "step": 732 }, { "epoch": 0.09909590198563582, "grad_norm": 0.2380317896604538, "learning_rate": 2.9924484909568472e-05, "loss": 0.034404754638671875, "step": 733 }, { "epoch": 0.0992310942120828, "grad_norm": 0.2911379933357239, "learning_rate": 2.9923797431685322e-05, "loss": 0.08263731002807617, "step": 734 }, { "epoch": 0.09936628643852978, "grad_norm": 0.30056285858154297, "learning_rate": 2.992310684658796e-05, "loss": 0.057534217834472656, "step": 735 }, { "epoch": 0.09950147866497676, "grad_norm": 0.9018310308456421, "learning_rate": 2.9922413154420173e-05, "loss": 0.0929574966430664, "step": 736 }, { "epoch": 0.09963667089142374, "grad_norm": 0.37030431628227234, "learning_rate": 2.9921716355326393e-05, "loss": 0.06572103500366211, "step": 737 }, { "epoch": 0.09977186311787073, "grad_norm": 0.59752357006073, "learning_rate": 2.9921016449451695e-05, "loss": 0.08301687240600586, "step": 738 }, { "epoch": 0.09990705534431771, "grad_norm": 1.3171252012252808, "learning_rate": 2.9920313436941805e-05, "loss": 0.0990591049194336, "step": 739 }, { "epoch": 0.10004224757076469, "grad_norm": 0.27812743186950684, "learning_rate": 2.991960731794309e-05, "loss": 0.055756568908691406, "step": 740 }, { "epoch": 0.10017743979721166, "grad_norm": 0.3967611789703369, "learning_rate": 2.991889809260257e-05, "loss": 0.07887506484985352, "step": 741 }, { "epoch": 0.10031263202365864, "grad_norm": 0.5462791323661804, "learning_rate": 2.9918185761067912e-05, "loss": 0.08826637268066406, "step": 742 }, { "epoch": 0.10044782425010562, "grad_norm": 0.430637389421463, "learning_rate": 2.9917470323487423e-05, "loss": 0.08333444595336914, "step": 743 }, { "epoch": 0.1005830164765526, "grad_norm": 0.7959412932395935, "learning_rate": 2.9916751780010063e-05, "loss": 0.05894875526428223, "step": 744 }, { "epoch": 0.10071820870299958, "grad_norm": 0.6657803058624268, "learning_rate": 2.9916030130785436e-05, "loss": 0.0435028076171875, "step": 745 }, { "epoch": 0.10085340092944656, "grad_norm": 0.5945706963539124, "learning_rate": 2.99153053759638e-05, "loss": 0.10613536834716797, "step": 746 }, { "epoch": 0.10098859315589354, "grad_norm": 0.40436336398124695, "learning_rate": 2.991457751569604e-05, "loss": 0.06596684455871582, "step": 747 }, { "epoch": 0.10112378538234051, "grad_norm": 1.403603196144104, "learning_rate": 2.991384655013371e-05, "loss": 0.1388988494873047, "step": 748 }, { "epoch": 0.10125897760878749, "grad_norm": 0.8612577319145203, "learning_rate": 2.9913112479429e-05, "loss": 0.10822081565856934, "step": 749 }, { "epoch": 0.10139416983523447, "grad_norm": 0.4296482801437378, "learning_rate": 2.991237530373474e-05, "loss": 0.08353328704833984, "step": 750 }, { "epoch": 0.10152936206168145, "grad_norm": 1.9338592290878296, "learning_rate": 2.9911635023204423e-05, "loss": 0.12401151657104492, "step": 751 }, { "epoch": 0.10166455428812843, "grad_norm": 0.8141088485717773, "learning_rate": 2.9910891637992172e-05, "loss": 0.08863067626953125, "step": 752 }, { "epoch": 0.1017997465145754, "grad_norm": 0.4315641224384308, "learning_rate": 2.991014514825277e-05, "loss": 0.08157825469970703, "step": 753 }, { "epoch": 0.10193493874102239, "grad_norm": 0.6997880339622498, "learning_rate": 2.9909395554141638e-05, "loss": 0.06783151626586914, "step": 754 }, { "epoch": 0.10207013096746938, "grad_norm": 1.4606138467788696, "learning_rate": 2.9908642855814844e-05, "loss": 0.08960342407226562, "step": 755 }, { "epoch": 0.10220532319391636, "grad_norm": 0.4545760452747345, "learning_rate": 2.9907887053429107e-05, "loss": 0.0668487548828125, "step": 756 }, { "epoch": 0.10234051542036333, "grad_norm": 0.4696957767009735, "learning_rate": 2.9907128147141783e-05, "loss": 0.07184481620788574, "step": 757 }, { "epoch": 0.10247570764681031, "grad_norm": 0.5667471885681152, "learning_rate": 2.990636613711089e-05, "loss": 0.08581137657165527, "step": 758 }, { "epoch": 0.10261089987325729, "grad_norm": 0.37433671951293945, "learning_rate": 2.990560102349507e-05, "loss": 0.03728783130645752, "step": 759 }, { "epoch": 0.10274609209970427, "grad_norm": 1.2046866416931152, "learning_rate": 2.9904832806453635e-05, "loss": 0.07507181167602539, "step": 760 }, { "epoch": 0.10288128432615125, "grad_norm": 0.5006322860717773, "learning_rate": 2.9904061486146524e-05, "loss": 0.0571751594543457, "step": 761 }, { "epoch": 0.10301647655259823, "grad_norm": 1.884344220161438, "learning_rate": 2.9903287062734333e-05, "loss": 0.10209131240844727, "step": 762 }, { "epoch": 0.1031516687790452, "grad_norm": 0.3833383321762085, "learning_rate": 2.990250953637831e-05, "loss": 0.07235002517700195, "step": 763 }, { "epoch": 0.10328686100549218, "grad_norm": 1.8122638463974, "learning_rate": 2.9901728907240326e-05, "loss": 0.07389068603515625, "step": 764 }, { "epoch": 0.10342205323193916, "grad_norm": 1.4250800609588623, "learning_rate": 2.9900945175482916e-05, "loss": 0.07757329940795898, "step": 765 }, { "epoch": 0.10355724545838614, "grad_norm": 1.2942662239074707, "learning_rate": 2.990015834126926e-05, "loss": 0.1118459701538086, "step": 766 }, { "epoch": 0.10369243768483312, "grad_norm": 0.535193145275116, "learning_rate": 2.989936840476318e-05, "loss": 0.07560348510742188, "step": 767 }, { "epoch": 0.1038276299112801, "grad_norm": 1.116136908531189, "learning_rate": 2.9898575366129145e-05, "loss": 0.07685685157775879, "step": 768 }, { "epoch": 0.10396282213772708, "grad_norm": 1.636549949645996, "learning_rate": 2.9897779225532273e-05, "loss": 0.06873607635498047, "step": 769 }, { "epoch": 0.10409801436417405, "grad_norm": 0.851189374923706, "learning_rate": 2.989697998313832e-05, "loss": 0.07567405700683594, "step": 770 }, { "epoch": 0.10423320659062103, "grad_norm": 0.8681046962738037, "learning_rate": 2.989617763911369e-05, "loss": 0.08973884582519531, "step": 771 }, { "epoch": 0.10436839881706803, "grad_norm": 0.6807007193565369, "learning_rate": 2.9895372193625442e-05, "loss": 0.09481048583984375, "step": 772 }, { "epoch": 0.104503591043515, "grad_norm": 1.2062404155731201, "learning_rate": 2.9894563646841273e-05, "loss": 0.07929611206054688, "step": 773 }, { "epoch": 0.10463878326996198, "grad_norm": 1.149382472038269, "learning_rate": 2.9893751998929523e-05, "loss": 0.08654499053955078, "step": 774 }, { "epoch": 0.10477397549640896, "grad_norm": 1.5561285018920898, "learning_rate": 2.9892937250059187e-05, "loss": 0.09778118133544922, "step": 775 }, { "epoch": 0.10490916772285594, "grad_norm": 0.5485138297080994, "learning_rate": 2.9892119400399894e-05, "loss": 0.07680368423461914, "step": 776 }, { "epoch": 0.10504435994930292, "grad_norm": 0.43881773948669434, "learning_rate": 2.989129845012193e-05, "loss": 0.07778644561767578, "step": 777 }, { "epoch": 0.1051795521757499, "grad_norm": 1.080625057220459, "learning_rate": 2.989047439939621e-05, "loss": 0.08548808097839355, "step": 778 }, { "epoch": 0.10531474440219687, "grad_norm": 0.38331812620162964, "learning_rate": 2.9889647248394324e-05, "loss": 0.08388328552246094, "step": 779 }, { "epoch": 0.10544993662864385, "grad_norm": 1.2832900285720825, "learning_rate": 2.9888816997288475e-05, "loss": 0.09711027145385742, "step": 780 }, { "epoch": 0.10558512885509083, "grad_norm": 0.751434862613678, "learning_rate": 2.988798364625153e-05, "loss": 0.08174657821655273, "step": 781 }, { "epoch": 0.10572032108153781, "grad_norm": 1.0784751176834106, "learning_rate": 2.9887147195457002e-05, "loss": 0.11003780364990234, "step": 782 }, { "epoch": 0.10585551330798479, "grad_norm": 0.450376957654953, "learning_rate": 2.9886307645079037e-05, "loss": 0.09079742431640625, "step": 783 }, { "epoch": 0.10599070553443177, "grad_norm": 1.3440659046173096, "learning_rate": 2.9885464995292436e-05, "loss": 0.1103525161743164, "step": 784 }, { "epoch": 0.10612589776087875, "grad_norm": 0.4448350667953491, "learning_rate": 2.9884619246272648e-05, "loss": 0.07683658599853516, "step": 785 }, { "epoch": 0.10626108998732572, "grad_norm": 1.2078944444656372, "learning_rate": 2.988377039819575e-05, "loss": 0.06465387344360352, "step": 786 }, { "epoch": 0.1063962822137727, "grad_norm": 1.4633591175079346, "learning_rate": 2.9882918451238494e-05, "loss": 0.049219608306884766, "step": 787 }, { "epoch": 0.10653147444021968, "grad_norm": 1.1847048997879028, "learning_rate": 2.988206340557825e-05, "loss": 0.04823780059814453, "step": 788 }, { "epoch": 0.10666666666666667, "grad_norm": 0.5670963525772095, "learning_rate": 2.9881205261393037e-05, "loss": 0.10341835021972656, "step": 789 }, { "epoch": 0.10680185889311365, "grad_norm": 0.8065640926361084, "learning_rate": 2.988034401886154e-05, "loss": 0.08946609497070312, "step": 790 }, { "epoch": 0.10693705111956063, "grad_norm": 1.5516135692596436, "learning_rate": 2.9879479678163065e-05, "loss": 0.15784835815429688, "step": 791 }, { "epoch": 0.10707224334600761, "grad_norm": 0.7580604553222656, "learning_rate": 2.9878612239477568e-05, "loss": 0.08668923377990723, "step": 792 }, { "epoch": 0.10720743557245459, "grad_norm": 1.5237256288528442, "learning_rate": 2.9877741702985666e-05, "loss": 0.11019420623779297, "step": 793 }, { "epoch": 0.10734262779890157, "grad_norm": 0.6032150387763977, "learning_rate": 2.98768680688686e-05, "loss": 0.07812643051147461, "step": 794 }, { "epoch": 0.10747782002534854, "grad_norm": 0.8539298176765442, "learning_rate": 2.9875991337308274e-05, "loss": 0.07448577880859375, "step": 795 }, { "epoch": 0.10761301225179552, "grad_norm": 0.5719842314720154, "learning_rate": 2.987511150848722e-05, "loss": 0.05310940742492676, "step": 796 }, { "epoch": 0.1077482044782425, "grad_norm": 0.7173001170158386, "learning_rate": 2.9874228582588627e-05, "loss": 0.07994794845581055, "step": 797 }, { "epoch": 0.10788339670468948, "grad_norm": 0.686671257019043, "learning_rate": 2.9873342559796325e-05, "loss": 0.09230518341064453, "step": 798 }, { "epoch": 0.10801858893113646, "grad_norm": 0.7101694345474243, "learning_rate": 2.9872453440294787e-05, "loss": 0.07753467559814453, "step": 799 }, { "epoch": 0.10815378115758344, "grad_norm": 1.979806900024414, "learning_rate": 2.9871561224269134e-05, "loss": 0.13216018676757812, "step": 800 }, { "epoch": 0.10828897338403042, "grad_norm": 0.41649532318115234, "learning_rate": 2.9870665911905127e-05, "loss": 0.06540203094482422, "step": 801 }, { "epoch": 0.1084241656104774, "grad_norm": 0.6844872236251831, "learning_rate": 2.9869767503389176e-05, "loss": 0.07187485694885254, "step": 802 }, { "epoch": 0.10855935783692437, "grad_norm": 0.8601712584495544, "learning_rate": 2.986886599890834e-05, "loss": 0.08647775650024414, "step": 803 }, { "epoch": 0.10869455006337135, "grad_norm": 1.345866084098816, "learning_rate": 2.9867961398650306e-05, "loss": 0.06264054775238037, "step": 804 }, { "epoch": 0.10882974228981833, "grad_norm": 1.1698988676071167, "learning_rate": 2.9867053702803425e-05, "loss": 0.07759284973144531, "step": 805 }, { "epoch": 0.10896493451626532, "grad_norm": 0.3186224400997162, "learning_rate": 2.9866142911556685e-05, "loss": 0.07211780548095703, "step": 806 }, { "epoch": 0.1091001267427123, "grad_norm": 1.0271692276000977, "learning_rate": 2.9865229025099713e-05, "loss": 0.08307266235351562, "step": 807 }, { "epoch": 0.10923531896915928, "grad_norm": 0.8040181398391724, "learning_rate": 2.986431204362279e-05, "loss": 0.062419891357421875, "step": 808 }, { "epoch": 0.10937051119560626, "grad_norm": 1.5733109712600708, "learning_rate": 2.9863391967316835e-05, "loss": 0.08540821075439453, "step": 809 }, { "epoch": 0.10950570342205324, "grad_norm": 1.0525751113891602, "learning_rate": 2.9862468796373404e-05, "loss": 0.09246659278869629, "step": 810 }, { "epoch": 0.10964089564850021, "grad_norm": 1.5251206159591675, "learning_rate": 2.9861542530984718e-05, "loss": 0.0996556282043457, "step": 811 }, { "epoch": 0.10977608787494719, "grad_norm": 0.38562726974487305, "learning_rate": 2.9860613171343624e-05, "loss": 0.07490324974060059, "step": 812 }, { "epoch": 0.10991128010139417, "grad_norm": 0.6965151429176331, "learning_rate": 2.9859680717643623e-05, "loss": 0.08292293548583984, "step": 813 }, { "epoch": 0.11004647232784115, "grad_norm": 0.5826792120933533, "learning_rate": 2.985874517007885e-05, "loss": 0.06980037689208984, "step": 814 }, { "epoch": 0.11018166455428813, "grad_norm": 0.6408787369728088, "learning_rate": 2.98578065288441e-05, "loss": 0.0714254379272461, "step": 815 }, { "epoch": 0.1103168567807351, "grad_norm": 0.33082664012908936, "learning_rate": 2.9856864794134798e-05, "loss": 0.07348203659057617, "step": 816 }, { "epoch": 0.11045204900718208, "grad_norm": 0.5304411053657532, "learning_rate": 2.9855919966147025e-05, "loss": 0.0860593318939209, "step": 817 }, { "epoch": 0.11058724123362906, "grad_norm": 0.653801441192627, "learning_rate": 2.9854972045077485e-05, "loss": 0.0687417984008789, "step": 818 }, { "epoch": 0.11072243346007604, "grad_norm": 0.3759799003601074, "learning_rate": 2.9854021031123555e-05, "loss": 0.06639289855957031, "step": 819 }, { "epoch": 0.11085762568652302, "grad_norm": 1.0197070837020874, "learning_rate": 2.9853066924483232e-05, "loss": 0.06349563598632812, "step": 820 }, { "epoch": 0.11099281791297, "grad_norm": 0.19180253148078918, "learning_rate": 2.9852109725355173e-05, "loss": 0.039182186126708984, "step": 821 }, { "epoch": 0.11112801013941698, "grad_norm": 0.3551379442214966, "learning_rate": 2.9851149433938662e-05, "loss": 0.050806283950805664, "step": 822 }, { "epoch": 0.11126320236586397, "grad_norm": 0.4571462869644165, "learning_rate": 2.9850186050433645e-05, "loss": 0.0558934211730957, "step": 823 }, { "epoch": 0.11139839459231095, "grad_norm": 0.4020529091358185, "learning_rate": 2.9849219575040708e-05, "loss": 0.06947755813598633, "step": 824 }, { "epoch": 0.11153358681875793, "grad_norm": 0.38608673214912415, "learning_rate": 2.984825000796106e-05, "loss": 0.06527233123779297, "step": 825 }, { "epoch": 0.1116687790452049, "grad_norm": 0.6543492078781128, "learning_rate": 2.9847277349396586e-05, "loss": 0.12175655364990234, "step": 826 }, { "epoch": 0.11180397127165188, "grad_norm": 0.39084070920944214, "learning_rate": 2.984630159954979e-05, "loss": 0.07764720916748047, "step": 827 }, { "epoch": 0.11193916349809886, "grad_norm": 0.6771588325500488, "learning_rate": 2.9845322758623833e-05, "loss": 0.11553192138671875, "step": 828 }, { "epoch": 0.11207435572454584, "grad_norm": 1.0467925071716309, "learning_rate": 2.984434082682251e-05, "loss": 0.11504268646240234, "step": 829 }, { "epoch": 0.11220954795099282, "grad_norm": 0.32621562480926514, "learning_rate": 2.984335580435027e-05, "loss": 0.06376457214355469, "step": 830 }, { "epoch": 0.1123447401774398, "grad_norm": 0.9813115000724792, "learning_rate": 2.9842367691412192e-05, "loss": 0.0780191421508789, "step": 831 }, { "epoch": 0.11247993240388678, "grad_norm": 0.4412747025489807, "learning_rate": 2.9841376488214015e-05, "loss": 0.05402421951293945, "step": 832 }, { "epoch": 0.11261512463033375, "grad_norm": 0.511574387550354, "learning_rate": 2.984038219496211e-05, "loss": 0.1097269058227539, "step": 833 }, { "epoch": 0.11275031685678073, "grad_norm": 0.6203243136405945, "learning_rate": 2.9839384811863493e-05, "loss": 0.0794076919555664, "step": 834 }, { "epoch": 0.11288550908322771, "grad_norm": 0.48098886013031006, "learning_rate": 2.9838384339125824e-05, "loss": 0.06277823448181152, "step": 835 }, { "epoch": 0.11302070130967469, "grad_norm": 0.30106276273727417, "learning_rate": 2.9837380776957405e-05, "loss": 0.07796001434326172, "step": 836 }, { "epoch": 0.11315589353612167, "grad_norm": 0.6713687777519226, "learning_rate": 2.9836374125567193e-05, "loss": 0.11228370666503906, "step": 837 }, { "epoch": 0.11329108576256865, "grad_norm": 0.8277329802513123, "learning_rate": 2.9835364385164764e-05, "loss": 0.057244300842285156, "step": 838 }, { "epoch": 0.11342627798901563, "grad_norm": 0.5808928608894348, "learning_rate": 2.983435155596036e-05, "loss": 0.09830093383789062, "step": 839 }, { "epoch": 0.11356147021546262, "grad_norm": 0.46376940608024597, "learning_rate": 2.9833335638164858e-05, "loss": 0.08458471298217773, "step": 840 }, { "epoch": 0.1136966624419096, "grad_norm": 0.3028194010257721, "learning_rate": 2.9832316631989774e-05, "loss": 0.06242990493774414, "step": 841 }, { "epoch": 0.11383185466835657, "grad_norm": 0.3183048665523529, "learning_rate": 2.9831294537647272e-05, "loss": 0.07565617561340332, "step": 842 }, { "epoch": 0.11396704689480355, "grad_norm": 1.0704361200332642, "learning_rate": 2.9830269355350155e-05, "loss": 0.07597064971923828, "step": 843 }, { "epoch": 0.11410223912125053, "grad_norm": 0.7910330891609192, "learning_rate": 2.9829241085311872e-05, "loss": 0.06637001037597656, "step": 844 }, { "epoch": 0.11423743134769751, "grad_norm": 0.4403345286846161, "learning_rate": 2.9828209727746522e-05, "loss": 0.08481502532958984, "step": 845 }, { "epoch": 0.11437262357414449, "grad_norm": 0.9092098474502563, "learning_rate": 2.982717528286883e-05, "loss": 0.09525442123413086, "step": 846 }, { "epoch": 0.11450781580059147, "grad_norm": 1.2730127573013306, "learning_rate": 2.9826137750894176e-05, "loss": 0.10476016998291016, "step": 847 }, { "epoch": 0.11464300802703845, "grad_norm": 0.3732256293296814, "learning_rate": 2.9825097132038578e-05, "loss": 0.05859994888305664, "step": 848 }, { "epoch": 0.11477820025348542, "grad_norm": 0.41927069425582886, "learning_rate": 2.9824053426518703e-05, "loss": 0.09224319458007812, "step": 849 }, { "epoch": 0.1149133924799324, "grad_norm": 1.1012687683105469, "learning_rate": 2.9823006634551848e-05, "loss": 0.0856013298034668, "step": 850 }, { "epoch": 0.11504858470637938, "grad_norm": 0.5237995386123657, "learning_rate": 2.9821956756355973e-05, "loss": 0.07183837890625, "step": 851 }, { "epoch": 0.11518377693282636, "grad_norm": 0.3141275942325592, "learning_rate": 2.9820903792149653e-05, "loss": 0.04247856140136719, "step": 852 }, { "epoch": 0.11531896915927334, "grad_norm": 0.8342028260231018, "learning_rate": 2.981984774215214e-05, "loss": 0.0800313949584961, "step": 853 }, { "epoch": 0.11545416138572032, "grad_norm": 0.6760586500167847, "learning_rate": 2.9818788606583286e-05, "loss": 0.09170246124267578, "step": 854 }, { "epoch": 0.1155893536121673, "grad_norm": 0.7063131332397461, "learning_rate": 2.9817726385663627e-05, "loss": 0.09348559379577637, "step": 855 }, { "epoch": 0.11572454583861427, "grad_norm": 0.3852894604206085, "learning_rate": 2.9816661079614316e-05, "loss": 0.0653526782989502, "step": 856 }, { "epoch": 0.11585973806506127, "grad_norm": 0.7683351635932922, "learning_rate": 2.9815592688657154e-05, "loss": 0.0905923843383789, "step": 857 }, { "epoch": 0.11599493029150824, "grad_norm": 0.5725550651550293, "learning_rate": 2.9814521213014588e-05, "loss": 0.09342670440673828, "step": 858 }, { "epoch": 0.11613012251795522, "grad_norm": 0.7682245969772339, "learning_rate": 2.9813446652909707e-05, "loss": 0.08133316040039062, "step": 859 }, { "epoch": 0.1162653147444022, "grad_norm": 0.7258708477020264, "learning_rate": 2.981236900856624e-05, "loss": 0.10254907608032227, "step": 860 }, { "epoch": 0.11640050697084918, "grad_norm": 0.7408928871154785, "learning_rate": 2.9811288280208552e-05, "loss": 0.0868387222290039, "step": 861 }, { "epoch": 0.11653569919729616, "grad_norm": 0.8647645711898804, "learning_rate": 2.9810204468061664e-05, "loss": 0.09180545806884766, "step": 862 }, { "epoch": 0.11667089142374314, "grad_norm": 0.5420779585838318, "learning_rate": 2.9809117572351223e-05, "loss": 0.06743431091308594, "step": 863 }, { "epoch": 0.11680608365019012, "grad_norm": 0.6346800327301025, "learning_rate": 2.9808027593303537e-05, "loss": 0.06043863296508789, "step": 864 }, { "epoch": 0.1169412758766371, "grad_norm": 1.154919147491455, "learning_rate": 2.980693453114554e-05, "loss": 0.08947134017944336, "step": 865 }, { "epoch": 0.11707646810308407, "grad_norm": 0.7358927726745605, "learning_rate": 2.980583838610481e-05, "loss": 0.055572509765625, "step": 866 }, { "epoch": 0.11721166032953105, "grad_norm": 0.6965184807777405, "learning_rate": 2.980473915840957e-05, "loss": 0.07071423530578613, "step": 867 }, { "epoch": 0.11734685255597803, "grad_norm": 2.4694793224334717, "learning_rate": 2.9803636848288696e-05, "loss": 0.11431694030761719, "step": 868 }, { "epoch": 0.11748204478242501, "grad_norm": 1.4147696495056152, "learning_rate": 2.9802531455971686e-05, "loss": 0.07610034942626953, "step": 869 }, { "epoch": 0.11761723700887199, "grad_norm": 1.7168290615081787, "learning_rate": 2.980142298168869e-05, "loss": 0.09935760498046875, "step": 870 }, { "epoch": 0.11775242923531896, "grad_norm": 1.0734306573867798, "learning_rate": 2.9800311425670495e-05, "loss": 0.09269189834594727, "step": 871 }, { "epoch": 0.11788762146176594, "grad_norm": 0.946284294128418, "learning_rate": 2.9799196788148538e-05, "loss": 0.07281208038330078, "step": 872 }, { "epoch": 0.11802281368821292, "grad_norm": 1.427246332168579, "learning_rate": 2.9798079069354893e-05, "loss": 0.11725389957427979, "step": 873 }, { "epoch": 0.11815800591465991, "grad_norm": 1.4104865789413452, "learning_rate": 2.9796958269522273e-05, "loss": 0.06522738933563232, "step": 874 }, { "epoch": 0.11829319814110689, "grad_norm": 0.8461236953735352, "learning_rate": 2.9795834388884034e-05, "loss": 0.04470396041870117, "step": 875 }, { "epoch": 0.11842839036755387, "grad_norm": 0.7273390293121338, "learning_rate": 2.979470742767417e-05, "loss": 0.09412288665771484, "step": 876 }, { "epoch": 0.11856358259400085, "grad_norm": 0.4011588394641876, "learning_rate": 2.9793577386127327e-05, "loss": 0.07372426986694336, "step": 877 }, { "epoch": 0.11869877482044783, "grad_norm": 0.4491732716560364, "learning_rate": 2.9792444264478784e-05, "loss": 0.08272123336791992, "step": 878 }, { "epoch": 0.1188339670468948, "grad_norm": 0.5146609544754028, "learning_rate": 2.979130806296446e-05, "loss": 0.08355045318603516, "step": 879 }, { "epoch": 0.11896915927334178, "grad_norm": 0.8118272423744202, "learning_rate": 2.9790168781820925e-05, "loss": 0.1037912368774414, "step": 880 }, { "epoch": 0.11910435149978876, "grad_norm": 1.625578761100769, "learning_rate": 2.9789026421285375e-05, "loss": 0.073211669921875, "step": 881 }, { "epoch": 0.11923954372623574, "grad_norm": 0.3579215705394745, "learning_rate": 2.9787880981595663e-05, "loss": 0.04483389854431152, "step": 882 }, { "epoch": 0.11937473595268272, "grad_norm": 0.6862708926200867, "learning_rate": 2.9786732462990267e-05, "loss": 0.07068490982055664, "step": 883 }, { "epoch": 0.1195099281791297, "grad_norm": 0.37091052532196045, "learning_rate": 2.9785580865708323e-05, "loss": 0.09660053253173828, "step": 884 }, { "epoch": 0.11964512040557668, "grad_norm": 0.5847275257110596, "learning_rate": 2.97844261899896e-05, "loss": 0.11170244216918945, "step": 885 }, { "epoch": 0.11978031263202366, "grad_norm": 0.4958404302597046, "learning_rate": 2.9783268436074495e-05, "loss": 0.06901073455810547, "step": 886 }, { "epoch": 0.11991550485847063, "grad_norm": 1.4115339517593384, "learning_rate": 2.978210760420407e-05, "loss": 0.10162115097045898, "step": 887 }, { "epoch": 0.12005069708491761, "grad_norm": 0.8564049601554871, "learning_rate": 2.978094369462002e-05, "loss": 0.05538368225097656, "step": 888 }, { "epoch": 0.12018588931136459, "grad_norm": 0.5000080466270447, "learning_rate": 2.977977670756467e-05, "loss": 0.0643768310546875, "step": 889 }, { "epoch": 0.12032108153781157, "grad_norm": 0.5024836659431458, "learning_rate": 2.9778606643280987e-05, "loss": 0.07504701614379883, "step": 890 }, { "epoch": 0.12045627376425856, "grad_norm": 0.6458346843719482, "learning_rate": 2.97774335020126e-05, "loss": 0.09175586700439453, "step": 891 }, { "epoch": 0.12059146599070554, "grad_norm": 0.6736217737197876, "learning_rate": 2.9776257284003748e-05, "loss": 0.07339859008789062, "step": 892 }, { "epoch": 0.12072665821715252, "grad_norm": 0.8182859420776367, "learning_rate": 2.9775077989499338e-05, "loss": 0.1093292236328125, "step": 893 }, { "epoch": 0.1208618504435995, "grad_norm": 0.5426833033561707, "learning_rate": 2.97738956187449e-05, "loss": 0.09246015548706055, "step": 894 }, { "epoch": 0.12099704267004648, "grad_norm": 0.6667600870132446, "learning_rate": 2.9772710171986605e-05, "loss": 0.054932594299316406, "step": 895 }, { "epoch": 0.12113223489649345, "grad_norm": 0.4842645525932312, "learning_rate": 2.977152164947128e-05, "loss": 0.0806894302368164, "step": 896 }, { "epoch": 0.12126742712294043, "grad_norm": 1.1293233633041382, "learning_rate": 2.9770330051446373e-05, "loss": 0.09871912002563477, "step": 897 }, { "epoch": 0.12140261934938741, "grad_norm": 0.521162211894989, "learning_rate": 2.976913537815999e-05, "loss": 0.07866871356964111, "step": 898 }, { "epoch": 0.12153781157583439, "grad_norm": 0.48777225613594055, "learning_rate": 2.9767937629860853e-05, "loss": 0.06045722961425781, "step": 899 }, { "epoch": 0.12167300380228137, "grad_norm": 0.654708206653595, "learning_rate": 2.9766736806798353e-05, "loss": 0.08138012886047363, "step": 900 }, { "epoch": 0.12180819602872835, "grad_norm": 0.6570261716842651, "learning_rate": 2.9765532909222512e-05, "loss": 0.07046079635620117, "step": 901 }, { "epoch": 0.12194338825517533, "grad_norm": 0.8348051309585571, "learning_rate": 2.976432593738397e-05, "loss": 0.06657648086547852, "step": 902 }, { "epoch": 0.1220785804816223, "grad_norm": 0.42024993896484375, "learning_rate": 2.9763115891534036e-05, "loss": 0.08733034133911133, "step": 903 }, { "epoch": 0.12221377270806928, "grad_norm": 0.9708475470542908, "learning_rate": 2.9761902771924648e-05, "loss": 0.07796192169189453, "step": 904 }, { "epoch": 0.12234896493451626, "grad_norm": 0.6434724926948547, "learning_rate": 2.9760686578808387e-05, "loss": 0.0627903938293457, "step": 905 }, { "epoch": 0.12248415716096324, "grad_norm": 0.8117854595184326, "learning_rate": 2.9759467312438462e-05, "loss": 0.0890951156616211, "step": 906 }, { "epoch": 0.12261934938741022, "grad_norm": 0.44805821776390076, "learning_rate": 2.975824497306874e-05, "loss": 0.0842752456665039, "step": 907 }, { "epoch": 0.12275454161385721, "grad_norm": 0.8622244000434875, "learning_rate": 2.9757019560953707e-05, "loss": 0.05887746810913086, "step": 908 }, { "epoch": 0.12288973384030419, "grad_norm": 0.5789123177528381, "learning_rate": 2.9755791076348517e-05, "loss": 0.07620429992675781, "step": 909 }, { "epoch": 0.12302492606675117, "grad_norm": 1.0534026622772217, "learning_rate": 2.9754559519508924e-05, "loss": 0.07729911804199219, "step": 910 }, { "epoch": 0.12316011829319815, "grad_norm": 1.0735238790512085, "learning_rate": 2.975332489069137e-05, "loss": 0.07388782501220703, "step": 911 }, { "epoch": 0.12329531051964512, "grad_norm": 1.1803566217422485, "learning_rate": 2.9752087190152893e-05, "loss": 0.10381555557250977, "step": 912 }, { "epoch": 0.1234305027460921, "grad_norm": 1.0864958763122559, "learning_rate": 2.97508464181512e-05, "loss": 0.07347965240478516, "step": 913 }, { "epoch": 0.12356569497253908, "grad_norm": 0.71308833360672, "learning_rate": 2.9749602574944615e-05, "loss": 0.05932760238647461, "step": 914 }, { "epoch": 0.12370088719898606, "grad_norm": 0.8423773646354675, "learning_rate": 2.9748355660792125e-05, "loss": 0.07395792007446289, "step": 915 }, { "epoch": 0.12383607942543304, "grad_norm": 0.5618091225624084, "learning_rate": 2.9747105675953338e-05, "loss": 0.09624385833740234, "step": 916 }, { "epoch": 0.12397127165188002, "grad_norm": 0.3507370054721832, "learning_rate": 2.9745852620688506e-05, "loss": 0.0835719108581543, "step": 917 }, { "epoch": 0.124106463878327, "grad_norm": 0.8579305410385132, "learning_rate": 2.974459649525853e-05, "loss": 0.10347938537597656, "step": 918 }, { "epoch": 0.12424165610477397, "grad_norm": 1.5583200454711914, "learning_rate": 2.9743337299924925e-05, "loss": 0.08157157897949219, "step": 919 }, { "epoch": 0.12437684833122095, "grad_norm": 1.6886459589004517, "learning_rate": 2.9742075034949883e-05, "loss": 0.13440799713134766, "step": 920 }, { "epoch": 0.12451204055766793, "grad_norm": 0.4353358745574951, "learning_rate": 2.97408097005962e-05, "loss": 0.06841325759887695, "step": 921 }, { "epoch": 0.12464723278411491, "grad_norm": 0.9028810858726501, "learning_rate": 2.973954129712733e-05, "loss": 0.10115814208984375, "step": 922 }, { "epoch": 0.12478242501056189, "grad_norm": 0.4330396056175232, "learning_rate": 2.973826982480736e-05, "loss": 0.07902836799621582, "step": 923 }, { "epoch": 0.12491761723700887, "grad_norm": 1.4390037059783936, "learning_rate": 2.9736995283901022e-05, "loss": 0.07700300216674805, "step": 924 }, { "epoch": 0.12505280946345584, "grad_norm": 0.6106219291687012, "learning_rate": 2.9735717674673676e-05, "loss": 0.09856510162353516, "step": 925 }, { "epoch": 0.12518800168990282, "grad_norm": 0.40409108996391296, "learning_rate": 2.973443699739133e-05, "loss": 0.08372306823730469, "step": 926 }, { "epoch": 0.1253231939163498, "grad_norm": 0.5649853944778442, "learning_rate": 2.973315325232063e-05, "loss": 0.05322265625, "step": 927 }, { "epoch": 0.12545838614279678, "grad_norm": 0.33068177103996277, "learning_rate": 2.9731866439728853e-05, "loss": 0.053058624267578125, "step": 928 }, { "epoch": 0.12559357836924376, "grad_norm": 0.5357032418251038, "learning_rate": 2.9730576559883924e-05, "loss": 0.07047724723815918, "step": 929 }, { "epoch": 0.12572877059569074, "grad_norm": 1.169585108757019, "learning_rate": 2.97292836130544e-05, "loss": 0.09200382232666016, "step": 930 }, { "epoch": 0.12586396282213771, "grad_norm": 0.6226010918617249, "learning_rate": 2.9727987599509485e-05, "loss": 0.07495355606079102, "step": 931 }, { "epoch": 0.1259991550485847, "grad_norm": 0.34719395637512207, "learning_rate": 2.972668851951901e-05, "loss": 0.061835527420043945, "step": 932 }, { "epoch": 0.12613434727503167, "grad_norm": 0.43001851439476013, "learning_rate": 2.9725386373353455e-05, "loss": 0.09643840789794922, "step": 933 }, { "epoch": 0.12626953950147868, "grad_norm": 0.632045328617096, "learning_rate": 2.972408116128393e-05, "loss": 0.07325077056884766, "step": 934 }, { "epoch": 0.12640473172792566, "grad_norm": 0.6281790733337402, "learning_rate": 2.972277288358219e-05, "loss": 0.05544567108154297, "step": 935 }, { "epoch": 0.12653992395437264, "grad_norm": 0.5434910655021667, "learning_rate": 2.9721461540520628e-05, "loss": 0.0962514877319336, "step": 936 }, { "epoch": 0.1266751161808196, "grad_norm": 0.338144451379776, "learning_rate": 2.9720147132372265e-05, "loss": 0.07651424407958984, "step": 937 }, { "epoch": 0.1268103084072666, "grad_norm": 0.3626541793346405, "learning_rate": 2.9718829659410772e-05, "loss": 0.059327125549316406, "step": 938 }, { "epoch": 0.12694550063371357, "grad_norm": 0.7962621450424194, "learning_rate": 2.9717509121910453e-05, "loss": 0.07543087005615234, "step": 939 }, { "epoch": 0.12708069286016055, "grad_norm": 0.3682408630847931, "learning_rate": 2.971618552014625e-05, "loss": 0.0697183609008789, "step": 940 }, { "epoch": 0.12721588508660753, "grad_norm": 0.334225058555603, "learning_rate": 2.971485885439375e-05, "loss": 0.04205179214477539, "step": 941 }, { "epoch": 0.1273510773130545, "grad_norm": 0.7661436200141907, "learning_rate": 2.9713529124929163e-05, "loss": 0.10788249969482422, "step": 942 }, { "epoch": 0.12748626953950148, "grad_norm": 0.4361717700958252, "learning_rate": 2.9712196332029352e-05, "loss": 0.060984134674072266, "step": 943 }, { "epoch": 0.12762146176594846, "grad_norm": 0.5988253951072693, "learning_rate": 2.971086047597181e-05, "loss": 0.1024322509765625, "step": 944 }, { "epoch": 0.12775665399239544, "grad_norm": 0.4751938283443451, "learning_rate": 2.9709521557034668e-05, "loss": 0.0825343132019043, "step": 945 }, { "epoch": 0.12789184621884242, "grad_norm": 0.26907584071159363, "learning_rate": 2.9708179575496696e-05, "loss": 0.05469369888305664, "step": 946 }, { "epoch": 0.1280270384452894, "grad_norm": 0.2958700954914093, "learning_rate": 2.9706834531637303e-05, "loss": 0.06627678871154785, "step": 947 }, { "epoch": 0.12816223067173638, "grad_norm": 0.3210330009460449, "learning_rate": 2.9705486425736537e-05, "loss": 0.06341791152954102, "step": 948 }, { "epoch": 0.12829742289818336, "grad_norm": 0.46173107624053955, "learning_rate": 2.9704135258075077e-05, "loss": 0.07384729385375977, "step": 949 }, { "epoch": 0.12843261512463033, "grad_norm": 0.48776519298553467, "learning_rate": 2.970278102893424e-05, "loss": 0.05780982971191406, "step": 950 }, { "epoch": 0.1285678073510773, "grad_norm": 0.23254428803920746, "learning_rate": 2.9701423738595992e-05, "loss": 0.037735939025878906, "step": 951 }, { "epoch": 0.1287029995775243, "grad_norm": 0.31969594955444336, "learning_rate": 2.9700063387342925e-05, "loss": 0.07977175712585449, "step": 952 }, { "epoch": 0.12883819180397127, "grad_norm": 0.27571967244148254, "learning_rate": 2.969869997545827e-05, "loss": 0.059992313385009766, "step": 953 }, { "epoch": 0.12897338403041825, "grad_norm": 0.734204888343811, "learning_rate": 2.9697333503225897e-05, "loss": 0.08205127716064453, "step": 954 }, { "epoch": 0.12910857625686523, "grad_norm": 0.296566367149353, "learning_rate": 2.969596397093031e-05, "loss": 0.06202888488769531, "step": 955 }, { "epoch": 0.1292437684833122, "grad_norm": 0.5508058667182922, "learning_rate": 2.969459137885666e-05, "loss": 0.1157083511352539, "step": 956 }, { "epoch": 0.12937896070975918, "grad_norm": 0.7532540559768677, "learning_rate": 2.969321572729072e-05, "loss": 0.07169175148010254, "step": 957 }, { "epoch": 0.12951415293620616, "grad_norm": 0.40385115146636963, "learning_rate": 2.9691837016518915e-05, "loss": 0.05229759216308594, "step": 958 }, { "epoch": 0.12964934516265314, "grad_norm": 0.44017350673675537, "learning_rate": 2.9690455246828294e-05, "loss": 0.06218385696411133, "step": 959 }, { "epoch": 0.12978453738910012, "grad_norm": 1.5186437368392944, "learning_rate": 2.968907041850655e-05, "loss": 0.07077431678771973, "step": 960 }, { "epoch": 0.1299197296155471, "grad_norm": 0.28694963455200195, "learning_rate": 2.968768253184202e-05, "loss": 0.05970430374145508, "step": 961 }, { "epoch": 0.13005492184199408, "grad_norm": 1.0450797080993652, "learning_rate": 2.9686291587123655e-05, "loss": 0.09966850280761719, "step": 962 }, { "epoch": 0.13019011406844105, "grad_norm": 0.2038547694683075, "learning_rate": 2.968489758464107e-05, "loss": 0.0590057373046875, "step": 963 }, { "epoch": 0.13032530629488803, "grad_norm": 0.43276289105415344, "learning_rate": 2.9683500524684494e-05, "loss": 0.08506965637207031, "step": 964 }, { "epoch": 0.130460498521335, "grad_norm": 0.5809456706047058, "learning_rate": 2.9682100407544812e-05, "loss": 0.05703449249267578, "step": 965 }, { "epoch": 0.130595690747782, "grad_norm": 0.41581490635871887, "learning_rate": 2.9680697233513526e-05, "loss": 0.07561635971069336, "step": 966 }, { "epoch": 0.13073088297422897, "grad_norm": 0.473766028881073, "learning_rate": 2.9679291002882793e-05, "loss": 0.06410503387451172, "step": 967 }, { "epoch": 0.13086607520067597, "grad_norm": 0.35944005846977234, "learning_rate": 2.967788171594539e-05, "loss": 0.05506134033203125, "step": 968 }, { "epoch": 0.13100126742712295, "grad_norm": 0.3487021028995514, "learning_rate": 2.967646937299474e-05, "loss": 0.059519290924072266, "step": 969 }, { "epoch": 0.13113645965356993, "grad_norm": 0.7498253583908081, "learning_rate": 2.9675053974324907e-05, "loss": 0.1223001480102539, "step": 970 }, { "epoch": 0.1312716518800169, "grad_norm": 0.3644426763057709, "learning_rate": 2.9673635520230576e-05, "loss": 0.06449270248413086, "step": 971 }, { "epoch": 0.1314068441064639, "grad_norm": 0.3195389211177826, "learning_rate": 2.9672214011007087e-05, "loss": 0.06031656265258789, "step": 972 }, { "epoch": 0.13154203633291087, "grad_norm": 0.9045985341072083, "learning_rate": 2.9670789446950396e-05, "loss": 0.1047515869140625, "step": 973 }, { "epoch": 0.13167722855935785, "grad_norm": 0.521141767501831, "learning_rate": 2.9669361828357105e-05, "loss": 0.07935333251953125, "step": 974 }, { "epoch": 0.13181242078580482, "grad_norm": 0.6143845319747925, "learning_rate": 2.9667931155524454e-05, "loss": 0.06447744369506836, "step": 975 }, { "epoch": 0.1319476130122518, "grad_norm": 0.5948580503463745, "learning_rate": 2.966649742875032e-05, "loss": 0.07799911499023438, "step": 976 }, { "epoch": 0.13208280523869878, "grad_norm": 0.6051185131072998, "learning_rate": 2.9665060648333206e-05, "loss": 0.05740940570831299, "step": 977 }, { "epoch": 0.13221799746514576, "grad_norm": 0.34990349411964417, "learning_rate": 2.9663620814572266e-05, "loss": 0.05348944664001465, "step": 978 }, { "epoch": 0.13235318969159274, "grad_norm": 0.9583558440208435, "learning_rate": 2.966217792776728e-05, "loss": 0.0702972412109375, "step": 979 }, { "epoch": 0.13248838191803972, "grad_norm": 0.45510542392730713, "learning_rate": 2.9660731988218652e-05, "loss": 0.05802106857299805, "step": 980 }, { "epoch": 0.1326235741444867, "grad_norm": 0.821110188961029, "learning_rate": 2.965928299622745e-05, "loss": 0.06091594696044922, "step": 981 }, { "epoch": 0.13275876637093367, "grad_norm": 0.4617513120174408, "learning_rate": 2.965783095209535e-05, "loss": 0.08968257904052734, "step": 982 }, { "epoch": 0.13289395859738065, "grad_norm": 1.1345301866531372, "learning_rate": 2.965637585612469e-05, "loss": 0.0957021713256836, "step": 983 }, { "epoch": 0.13302915082382763, "grad_norm": 1.1611295938491821, "learning_rate": 2.965491770861841e-05, "loss": 0.08506393432617188, "step": 984 }, { "epoch": 0.1331643430502746, "grad_norm": 0.5205171704292297, "learning_rate": 2.965345650988012e-05, "loss": 0.08159351348876953, "step": 985 }, { "epoch": 0.1332995352767216, "grad_norm": 0.4934007227420807, "learning_rate": 2.9651992260214035e-05, "loss": 0.07059884071350098, "step": 986 }, { "epoch": 0.13343472750316857, "grad_norm": 0.580374538898468, "learning_rate": 2.9650524959925037e-05, "loss": 0.1042940616607666, "step": 987 }, { "epoch": 0.13356991972961554, "grad_norm": 0.6456085443496704, "learning_rate": 2.9649054609318607e-05, "loss": 0.08425045013427734, "step": 988 }, { "epoch": 0.13370511195606252, "grad_norm": 0.5819563269615173, "learning_rate": 2.9647581208700894e-05, "loss": 0.12133598327636719, "step": 989 }, { "epoch": 0.1338403041825095, "grad_norm": 0.6495242714881897, "learning_rate": 2.9646104758378666e-05, "loss": 0.08918619155883789, "step": 990 }, { "epoch": 0.13397549640895648, "grad_norm": 0.5270083546638489, "learning_rate": 2.964462525865932e-05, "loss": 0.07123589515686035, "step": 991 }, { "epoch": 0.13411068863540346, "grad_norm": 0.361344575881958, "learning_rate": 2.96431427098509e-05, "loss": 0.06732702255249023, "step": 992 }, { "epoch": 0.13424588086185044, "grad_norm": 1.39292311668396, "learning_rate": 2.9641657112262084e-05, "loss": 0.12557554244995117, "step": 993 }, { "epoch": 0.13438107308829741, "grad_norm": 0.5411080121994019, "learning_rate": 2.9640168466202174e-05, "loss": 0.05614137649536133, "step": 994 }, { "epoch": 0.1345162653147444, "grad_norm": 0.24641117453575134, "learning_rate": 2.9638676771981124e-05, "loss": 0.03980302810668945, "step": 995 }, { "epoch": 0.13465145754119137, "grad_norm": 0.4695040285587311, "learning_rate": 2.9637182029909508e-05, "loss": 0.06638145446777344, "step": 996 }, { "epoch": 0.13478664976763835, "grad_norm": 0.8481863737106323, "learning_rate": 2.9635684240298532e-05, "loss": 0.09592628479003906, "step": 997 }, { "epoch": 0.13492184199408533, "grad_norm": 0.4733298122882843, "learning_rate": 2.9634183403460053e-05, "loss": 0.06079983711242676, "step": 998 }, { "epoch": 0.1350570342205323, "grad_norm": 0.7947366237640381, "learning_rate": 2.9632679519706553e-05, "loss": 0.06237435340881348, "step": 999 }, { "epoch": 0.13519222644697929, "grad_norm": 0.41676557064056396, "learning_rate": 2.9631172589351137e-05, "loss": 0.10515213012695312, "step": 1000 }, { "epoch": 0.1353274186734263, "grad_norm": 1.2089643478393555, "learning_rate": 2.962966261270758e-05, "loss": 0.11839675903320312, "step": 1001 }, { "epoch": 0.13546261089987327, "grad_norm": 0.3339465856552124, "learning_rate": 2.962814959009024e-05, "loss": 0.04536879062652588, "step": 1002 }, { "epoch": 0.13559780312632025, "grad_norm": 0.6421874761581421, "learning_rate": 2.962663352181415e-05, "loss": 0.08237600326538086, "step": 1003 }, { "epoch": 0.13573299535276723, "grad_norm": 0.8473460078239441, "learning_rate": 2.9625114408194966e-05, "loss": 0.08166265487670898, "step": 1004 }, { "epoch": 0.1358681875792142, "grad_norm": 0.45747676491737366, "learning_rate": 2.962359224954897e-05, "loss": 0.054253578186035156, "step": 1005 }, { "epoch": 0.13600337980566118, "grad_norm": 0.5543858408927917, "learning_rate": 2.9622067046193086e-05, "loss": 0.06447505950927734, "step": 1006 }, { "epoch": 0.13613857203210816, "grad_norm": 0.41753020882606506, "learning_rate": 2.9620538798444867e-05, "loss": 0.060455322265625, "step": 1007 }, { "epoch": 0.13627376425855514, "grad_norm": 0.5391937494277954, "learning_rate": 2.9619007506622506e-05, "loss": 0.12227058410644531, "step": 1008 }, { "epoch": 0.13640895648500212, "grad_norm": 0.2970508337020874, "learning_rate": 2.961747317104482e-05, "loss": 0.05994606018066406, "step": 1009 }, { "epoch": 0.1365441487114491, "grad_norm": 0.5311251878738403, "learning_rate": 2.9615935792031274e-05, "loss": 0.07413721084594727, "step": 1010 }, { "epoch": 0.13667934093789608, "grad_norm": 0.6452791690826416, "learning_rate": 2.9614395369901953e-05, "loss": 0.10640430450439453, "step": 1011 }, { "epoch": 0.13681453316434306, "grad_norm": 0.8512495160102844, "learning_rate": 2.9612851904977582e-05, "loss": 0.09710979461669922, "step": 1012 }, { "epoch": 0.13694972539079003, "grad_norm": 0.396820068359375, "learning_rate": 2.9611305397579518e-05, "loss": 0.10082101821899414, "step": 1013 }, { "epoch": 0.137084917617237, "grad_norm": 0.4907481372356415, "learning_rate": 2.9609755848029755e-05, "loss": 0.0582963228225708, "step": 1014 }, { "epoch": 0.137220109843684, "grad_norm": 0.48300814628601074, "learning_rate": 2.9608203256650916e-05, "loss": 0.07283365726470947, "step": 1015 }, { "epoch": 0.13735530207013097, "grad_norm": 0.401167094707489, "learning_rate": 2.9606647623766257e-05, "loss": 0.06013607978820801, "step": 1016 }, { "epoch": 0.13749049429657795, "grad_norm": 1.058401107788086, "learning_rate": 2.9605088949699672e-05, "loss": 0.08579373359680176, "step": 1017 }, { "epoch": 0.13762568652302493, "grad_norm": 0.44358688592910767, "learning_rate": 2.9603527234775682e-05, "loss": 0.08293008804321289, "step": 1018 }, { "epoch": 0.1377608787494719, "grad_norm": 0.3435921370983124, "learning_rate": 2.960196247931945e-05, "loss": 0.06587696075439453, "step": 1019 }, { "epoch": 0.13789607097591888, "grad_norm": 0.5250937342643738, "learning_rate": 2.960039468365676e-05, "loss": 0.09907770156860352, "step": 1020 }, { "epoch": 0.13803126320236586, "grad_norm": 0.4432773292064667, "learning_rate": 2.959882384811404e-05, "loss": 0.07782220840454102, "step": 1021 }, { "epoch": 0.13816645542881284, "grad_norm": 0.3793444335460663, "learning_rate": 2.9597249973018343e-05, "loss": 0.07200241088867188, "step": 1022 }, { "epoch": 0.13830164765525982, "grad_norm": 0.7196708917617798, "learning_rate": 2.959567305869736e-05, "loss": 0.07615351676940918, "step": 1023 }, { "epoch": 0.1384368398817068, "grad_norm": 0.5667684674263, "learning_rate": 2.9594093105479413e-05, "loss": 0.0909280776977539, "step": 1024 }, { "epoch": 0.13857203210815378, "grad_norm": 0.42288902401924133, "learning_rate": 2.959251011369345e-05, "loss": 0.05932807922363281, "step": 1025 }, { "epoch": 0.13870722433460075, "grad_norm": 0.4394952058792114, "learning_rate": 2.959092408366907e-05, "loss": 0.05600166320800781, "step": 1026 }, { "epoch": 0.13884241656104773, "grad_norm": 1.555326223373413, "learning_rate": 2.958933501573649e-05, "loss": 0.10719490051269531, "step": 1027 }, { "epoch": 0.1389776087874947, "grad_norm": 0.6938232183456421, "learning_rate": 2.9587742910226555e-05, "loss": 0.0561060905456543, "step": 1028 }, { "epoch": 0.1391128010139417, "grad_norm": 0.2859027087688446, "learning_rate": 2.958614776747076e-05, "loss": 0.05535411834716797, "step": 1029 }, { "epoch": 0.13924799324038867, "grad_norm": 0.5560369491577148, "learning_rate": 2.9584549587801213e-05, "loss": 0.105712890625, "step": 1030 }, { "epoch": 0.13938318546683565, "grad_norm": 0.6921489238739014, "learning_rate": 2.958294837155067e-05, "loss": 0.05954265594482422, "step": 1031 }, { "epoch": 0.13951837769328262, "grad_norm": 0.28092825412750244, "learning_rate": 2.9581344119052508e-05, "loss": 0.023319005966186523, "step": 1032 }, { "epoch": 0.1396535699197296, "grad_norm": 1.4917162656784058, "learning_rate": 2.957973683064074e-05, "loss": 0.11414146423339844, "step": 1033 }, { "epoch": 0.13978876214617658, "grad_norm": 0.5399127006530762, "learning_rate": 2.957812650665002e-05, "loss": 0.09021472930908203, "step": 1034 }, { "epoch": 0.1399239543726236, "grad_norm": 0.3196600675582886, "learning_rate": 2.957651314741562e-05, "loss": 0.06321144104003906, "step": 1035 }, { "epoch": 0.14005914659907057, "grad_norm": 0.5471014380455017, "learning_rate": 2.9574896753273454e-05, "loss": 0.07691097259521484, "step": 1036 }, { "epoch": 0.14019433882551754, "grad_norm": 0.7765745520591736, "learning_rate": 2.9573277324560058e-05, "loss": 0.10283279418945312, "step": 1037 }, { "epoch": 0.14032953105196452, "grad_norm": 0.5622662901878357, "learning_rate": 2.9571654861612608e-05, "loss": 0.04420614242553711, "step": 1038 }, { "epoch": 0.1404647232784115, "grad_norm": 0.3749459683895111, "learning_rate": 2.957002936476891e-05, "loss": 0.08757567405700684, "step": 1039 }, { "epoch": 0.14059991550485848, "grad_norm": 0.4691568911075592, "learning_rate": 2.9568400834367406e-05, "loss": 0.07784152030944824, "step": 1040 }, { "epoch": 0.14073510773130546, "grad_norm": 0.6638574600219727, "learning_rate": 2.9566769270747158e-05, "loss": 0.07189655303955078, "step": 1041 }, { "epoch": 0.14087029995775244, "grad_norm": 0.31720519065856934, "learning_rate": 2.9565134674247864e-05, "loss": 0.06950515508651733, "step": 1042 }, { "epoch": 0.14100549218419942, "grad_norm": 0.25245657563209534, "learning_rate": 2.9563497045209866e-05, "loss": 0.05792236328125, "step": 1043 }, { "epoch": 0.1411406844106464, "grad_norm": 0.7169446349143982, "learning_rate": 2.9561856383974118e-05, "loss": 0.10790157318115234, "step": 1044 }, { "epoch": 0.14127587663709337, "grad_norm": 0.22160176932811737, "learning_rate": 2.9560212690882218e-05, "loss": 0.05505990982055664, "step": 1045 }, { "epoch": 0.14141106886354035, "grad_norm": 0.5142950415611267, "learning_rate": 2.9558565966276395e-05, "loss": 0.07321357727050781, "step": 1046 }, { "epoch": 0.14154626108998733, "grad_norm": 0.5482933521270752, "learning_rate": 2.9556916210499497e-05, "loss": 0.08965778350830078, "step": 1047 }, { "epoch": 0.1416814533164343, "grad_norm": 1.486504077911377, "learning_rate": 2.9555263423895016e-05, "loss": 0.08441925048828125, "step": 1048 }, { "epoch": 0.1418166455428813, "grad_norm": 0.40179309248924255, "learning_rate": 2.955360760680708e-05, "loss": 0.07248306274414062, "step": 1049 }, { "epoch": 0.14195183776932827, "grad_norm": 0.19929270446300507, "learning_rate": 2.9551948759580423e-05, "loss": 0.04222297668457031, "step": 1050 }, { "epoch": 0.14208702999577524, "grad_norm": 1.3443044424057007, "learning_rate": 2.9550286882560435e-05, "loss": 0.08512163162231445, "step": 1051 }, { "epoch": 0.14222222222222222, "grad_norm": 0.6884334683418274, "learning_rate": 2.9548621976093126e-05, "loss": 0.08371925354003906, "step": 1052 }, { "epoch": 0.1423574144486692, "grad_norm": 0.5100288391113281, "learning_rate": 2.9546954040525144e-05, "loss": 0.07264518737792969, "step": 1053 }, { "epoch": 0.14249260667511618, "grad_norm": 0.3990485370159149, "learning_rate": 2.9545283076203753e-05, "loss": 0.060115814208984375, "step": 1054 }, { "epoch": 0.14262779890156316, "grad_norm": 1.017004132270813, "learning_rate": 2.954360908347686e-05, "loss": 0.09311389923095703, "step": 1055 }, { "epoch": 0.14276299112801014, "grad_norm": 0.6060172319412231, "learning_rate": 2.9541932062693e-05, "loss": 0.05374288558959961, "step": 1056 }, { "epoch": 0.14289818335445711, "grad_norm": 1.6509504318237305, "learning_rate": 2.954025201420134e-05, "loss": 0.0938262939453125, "step": 1057 }, { "epoch": 0.1430333755809041, "grad_norm": 1.1378108263015747, "learning_rate": 2.9538568938351672e-05, "loss": 0.07957077026367188, "step": 1058 }, { "epoch": 0.14316856780735107, "grad_norm": 0.31916308403015137, "learning_rate": 2.953688283549442e-05, "loss": 0.05724906921386719, "step": 1059 }, { "epoch": 0.14330376003379805, "grad_norm": 0.3913947343826294, "learning_rate": 2.9535193705980642e-05, "loss": 0.05498838424682617, "step": 1060 }, { "epoch": 0.14343895226024503, "grad_norm": 0.7348978519439697, "learning_rate": 2.9533501550162028e-05, "loss": 0.0771627426147461, "step": 1061 }, { "epoch": 0.143574144486692, "grad_norm": 0.5150848627090454, "learning_rate": 2.9531806368390882e-05, "loss": 0.07370626926422119, "step": 1062 }, { "epoch": 0.14370933671313899, "grad_norm": 1.1939650774002075, "learning_rate": 2.953010816102016e-05, "loss": 0.06557035446166992, "step": 1063 }, { "epoch": 0.14384452893958596, "grad_norm": 0.4618939757347107, "learning_rate": 2.952840692840343e-05, "loss": 0.09060502052307129, "step": 1064 }, { "epoch": 0.14397972116603294, "grad_norm": 0.4631308913230896, "learning_rate": 2.9526702670894914e-05, "loss": 0.09955406188964844, "step": 1065 }, { "epoch": 0.14411491339247992, "grad_norm": 0.4638875722885132, "learning_rate": 2.952499538884943e-05, "loss": 0.07295036315917969, "step": 1066 }, { "epoch": 0.1442501056189269, "grad_norm": 0.2289997935295105, "learning_rate": 2.9523285082622448e-05, "loss": 0.05170297622680664, "step": 1067 }, { "epoch": 0.14438529784537388, "grad_norm": 0.28436052799224854, "learning_rate": 2.9521571752570064e-05, "loss": 0.05417752265930176, "step": 1068 }, { "epoch": 0.14452049007182088, "grad_norm": 0.5871800184249878, "learning_rate": 2.9519855399049004e-05, "loss": 0.060787200927734375, "step": 1069 }, { "epoch": 0.14465568229826786, "grad_norm": 0.8053838014602661, "learning_rate": 2.951813602241662e-05, "loss": 0.09525346755981445, "step": 1070 }, { "epoch": 0.14479087452471484, "grad_norm": 0.5935882329940796, "learning_rate": 2.9516413623030896e-05, "loss": 0.09336566925048828, "step": 1071 }, { "epoch": 0.14492606675116182, "grad_norm": 0.7615500092506409, "learning_rate": 2.951468820125045e-05, "loss": 0.07540607452392578, "step": 1072 }, { "epoch": 0.1450612589776088, "grad_norm": 0.8269580006599426, "learning_rate": 2.9512959757434508e-05, "loss": 0.056622982025146484, "step": 1073 }, { "epoch": 0.14519645120405578, "grad_norm": 0.5052419304847717, "learning_rate": 2.951122829194296e-05, "loss": 0.08120441436767578, "step": 1074 }, { "epoch": 0.14533164343050275, "grad_norm": 0.4648810625076294, "learning_rate": 2.9509493805136296e-05, "loss": 0.07573890686035156, "step": 1075 }, { "epoch": 0.14546683565694973, "grad_norm": 0.43185341358184814, "learning_rate": 2.9507756297375648e-05, "loss": 0.10009193420410156, "step": 1076 }, { "epoch": 0.1456020278833967, "grad_norm": 0.5906679034233093, "learning_rate": 2.9506015769022778e-05, "loss": 0.1013031005859375, "step": 1077 }, { "epoch": 0.1457372201098437, "grad_norm": 0.5084944367408752, "learning_rate": 2.950427222044006e-05, "loss": 0.08063554763793945, "step": 1078 }, { "epoch": 0.14587241233629067, "grad_norm": 0.5056231021881104, "learning_rate": 2.9502525651990525e-05, "loss": 0.1089935302734375, "step": 1079 }, { "epoch": 0.14600760456273765, "grad_norm": 0.4640476703643799, "learning_rate": 2.9500776064037813e-05, "loss": 0.05266237258911133, "step": 1080 }, { "epoch": 0.14614279678918463, "grad_norm": 0.37210893630981445, "learning_rate": 2.9499023456946194e-05, "loss": 0.06466007232666016, "step": 1081 }, { "epoch": 0.1462779890156316, "grad_norm": 0.5744715929031372, "learning_rate": 2.9497267831080575e-05, "loss": 0.065887451171875, "step": 1082 }, { "epoch": 0.14641318124207858, "grad_norm": 0.5396444201469421, "learning_rate": 2.949550918680649e-05, "loss": 0.07608509063720703, "step": 1083 }, { "epoch": 0.14654837346852556, "grad_norm": 0.3733367919921875, "learning_rate": 2.9493747524490086e-05, "loss": 0.07193183898925781, "step": 1084 }, { "epoch": 0.14668356569497254, "grad_norm": 0.3441978693008423, "learning_rate": 2.9491982844498156e-05, "loss": 0.06608152389526367, "step": 1085 }, { "epoch": 0.14681875792141952, "grad_norm": 0.49852749705314636, "learning_rate": 2.949021514719812e-05, "loss": 0.08067667484283447, "step": 1086 }, { "epoch": 0.1469539501478665, "grad_norm": 0.4173051118850708, "learning_rate": 2.948844443295802e-05, "loss": 0.0680074691772461, "step": 1087 }, { "epoch": 0.14708914237431348, "grad_norm": 0.7929953336715698, "learning_rate": 2.9486670702146526e-05, "loss": 0.11616706848144531, "step": 1088 }, { "epoch": 0.14722433460076045, "grad_norm": 0.8731603026390076, "learning_rate": 2.948489395513294e-05, "loss": 0.0901632308959961, "step": 1089 }, { "epoch": 0.14735952682720743, "grad_norm": 0.7319706678390503, "learning_rate": 2.948311419228719e-05, "loss": 0.11249923706054688, "step": 1090 }, { "epoch": 0.1474947190536544, "grad_norm": 0.907683789730072, "learning_rate": 2.948133141397983e-05, "loss": 0.0464324951171875, "step": 1091 }, { "epoch": 0.1476299112801014, "grad_norm": 0.5078043937683105, "learning_rate": 2.9479545620582047e-05, "loss": 0.0528569221496582, "step": 1092 }, { "epoch": 0.14776510350654837, "grad_norm": 0.421255886554718, "learning_rate": 2.9477756812465652e-05, "loss": 0.08293342590332031, "step": 1093 }, { "epoch": 0.14790029573299535, "grad_norm": 0.40994036197662354, "learning_rate": 2.9475964990003085e-05, "loss": 0.03383135795593262, "step": 1094 }, { "epoch": 0.14803548795944232, "grad_norm": 0.4158625900745392, "learning_rate": 2.9474170153567406e-05, "loss": 0.07805156707763672, "step": 1095 }, { "epoch": 0.1481706801858893, "grad_norm": 0.7317113876342773, "learning_rate": 2.947237230353232e-05, "loss": 0.1040964126586914, "step": 1096 }, { "epoch": 0.14830587241233628, "grad_norm": 0.23181234300136566, "learning_rate": 2.9470571440272147e-05, "loss": 0.05652332305908203, "step": 1097 }, { "epoch": 0.14844106463878326, "grad_norm": 1.2110581398010254, "learning_rate": 2.946876756416183e-05, "loss": 0.06962728500366211, "step": 1098 }, { "epoch": 0.14857625686523024, "grad_norm": 0.9232327342033386, "learning_rate": 2.946696067557695e-05, "loss": 0.07483863830566406, "step": 1099 }, { "epoch": 0.14871144909167722, "grad_norm": 0.2956840395927429, "learning_rate": 2.9465150774893706e-05, "loss": 0.06692075729370117, "step": 1100 }, { "epoch": 0.1488466413181242, "grad_norm": 0.21126961708068848, "learning_rate": 2.9463337862488938e-05, "loss": 0.04665088653564453, "step": 1101 }, { "epoch": 0.14898183354457117, "grad_norm": 1.7077643871307373, "learning_rate": 2.9461521938740096e-05, "loss": 0.09197998046875, "step": 1102 }, { "epoch": 0.14911702577101818, "grad_norm": 0.6724783182144165, "learning_rate": 2.9459703004025273e-05, "loss": 0.08422660827636719, "step": 1103 }, { "epoch": 0.14925221799746516, "grad_norm": 1.316791296005249, "learning_rate": 2.9457881058723174e-05, "loss": 0.10529279708862305, "step": 1104 }, { "epoch": 0.14938741022391214, "grad_norm": 0.42805215716362, "learning_rate": 2.9456056103213137e-05, "loss": 0.09313654899597168, "step": 1105 }, { "epoch": 0.14952260245035912, "grad_norm": 0.657561182975769, "learning_rate": 2.945422813787513e-05, "loss": 0.052576541900634766, "step": 1106 }, { "epoch": 0.1496577946768061, "grad_norm": 0.41958561539649963, "learning_rate": 2.9452397163089748e-05, "loss": 0.04102897644042969, "step": 1107 }, { "epoch": 0.14979298690325307, "grad_norm": 0.6558903455734253, "learning_rate": 2.9450563179238207e-05, "loss": 0.0634920597076416, "step": 1108 }, { "epoch": 0.14992817912970005, "grad_norm": 0.9969330430030823, "learning_rate": 2.9448726186702354e-05, "loss": 0.07057714462280273, "step": 1109 }, { "epoch": 0.15006337135614703, "grad_norm": 0.4252939224243164, "learning_rate": 2.9446886185864652e-05, "loss": 0.0529322624206543, "step": 1110 }, { "epoch": 0.150198563582594, "grad_norm": 0.46789953112602234, "learning_rate": 2.944504317710821e-05, "loss": 0.07893943786621094, "step": 1111 }, { "epoch": 0.150333755809041, "grad_norm": 1.2787641286849976, "learning_rate": 2.944319716081675e-05, "loss": 0.08886337280273438, "step": 1112 }, { "epoch": 0.15046894803548796, "grad_norm": 0.5518364906311035, "learning_rate": 2.944134813737462e-05, "loss": 0.08701801300048828, "step": 1113 }, { "epoch": 0.15060414026193494, "grad_norm": 0.2912185490131378, "learning_rate": 2.9439496107166796e-05, "loss": 0.0686025619506836, "step": 1114 }, { "epoch": 0.15073933248838192, "grad_norm": 0.4945691227912903, "learning_rate": 2.943764107057888e-05, "loss": 0.10154247283935547, "step": 1115 }, { "epoch": 0.1508745247148289, "grad_norm": 0.4553638696670532, "learning_rate": 2.9435783027997106e-05, "loss": 0.09839916229248047, "step": 1116 }, { "epoch": 0.15100971694127588, "grad_norm": 0.43696364760398865, "learning_rate": 2.9433921979808323e-05, "loss": 0.0523834228515625, "step": 1117 }, { "epoch": 0.15114490916772286, "grad_norm": 0.49646279215812683, "learning_rate": 2.9432057926400014e-05, "loss": 0.06965899467468262, "step": 1118 }, { "epoch": 0.15128010139416984, "grad_norm": 0.37695229053497314, "learning_rate": 2.943019086816028e-05, "loss": 0.06361579895019531, "step": 1119 }, { "epoch": 0.15141529362061681, "grad_norm": 0.34943926334381104, "learning_rate": 2.9428320805477855e-05, "loss": 0.059767723083496094, "step": 1120 }, { "epoch": 0.1515504858470638, "grad_norm": 0.72733074426651, "learning_rate": 2.9426447738742104e-05, "loss": 0.09514474868774414, "step": 1121 }, { "epoch": 0.15168567807351077, "grad_norm": 0.23378986120224, "learning_rate": 2.9424571668343e-05, "loss": 0.05317068099975586, "step": 1122 }, { "epoch": 0.15182087029995775, "grad_norm": 0.3431641757488251, "learning_rate": 2.942269259467115e-05, "loss": 0.07488346099853516, "step": 1123 }, { "epoch": 0.15195606252640473, "grad_norm": 0.5362439751625061, "learning_rate": 2.9420810518117794e-05, "loss": 0.04477691650390625, "step": 1124 }, { "epoch": 0.1520912547528517, "grad_norm": 0.5341426134109497, "learning_rate": 2.9418925439074784e-05, "loss": 0.09751033782958984, "step": 1125 }, { "epoch": 0.15222644697929869, "grad_norm": 0.393439918756485, "learning_rate": 2.9417037357934606e-05, "loss": 0.0592045783996582, "step": 1126 }, { "epoch": 0.15236163920574566, "grad_norm": 0.8095723390579224, "learning_rate": 2.9415146275090373e-05, "loss": 0.0764150619506836, "step": 1127 }, { "epoch": 0.15249683143219264, "grad_norm": 0.9617822170257568, "learning_rate": 2.9413252190935813e-05, "loss": 0.07028436660766602, "step": 1128 }, { "epoch": 0.15263202365863962, "grad_norm": 0.29926449060440063, "learning_rate": 2.9411355105865286e-05, "loss": 0.07842016220092773, "step": 1129 }, { "epoch": 0.1527672158850866, "grad_norm": 0.4743107259273529, "learning_rate": 2.9409455020273775e-05, "loss": 0.10325860977172852, "step": 1130 }, { "epoch": 0.15290240811153358, "grad_norm": 0.37752461433410645, "learning_rate": 2.940755193455689e-05, "loss": 0.06924009323120117, "step": 1131 }, { "epoch": 0.15303760033798056, "grad_norm": 0.901729166507721, "learning_rate": 2.940564584911086e-05, "loss": 0.07530999183654785, "step": 1132 }, { "epoch": 0.15317279256442753, "grad_norm": 0.4585990011692047, "learning_rate": 2.9403736764332543e-05, "loss": 0.06424522399902344, "step": 1133 }, { "epoch": 0.1533079847908745, "grad_norm": 0.5100941061973572, "learning_rate": 2.9401824680619423e-05, "loss": 0.09717559814453125, "step": 1134 }, { "epoch": 0.1534431770173215, "grad_norm": 0.38628143072128296, "learning_rate": 2.9399909598369604e-05, "loss": 0.07665491104125977, "step": 1135 }, { "epoch": 0.15357836924376847, "grad_norm": 0.6921217441558838, "learning_rate": 2.939799151798182e-05, "loss": 0.07894515991210938, "step": 1136 }, { "epoch": 0.15371356147021548, "grad_norm": 0.510297417640686, "learning_rate": 2.9396070439855417e-05, "loss": 0.09485530853271484, "step": 1137 }, { "epoch": 0.15384875369666245, "grad_norm": 0.42599210143089294, "learning_rate": 2.9394146364390382e-05, "loss": 0.08612918853759766, "step": 1138 }, { "epoch": 0.15398394592310943, "grad_norm": 0.4523150622844696, "learning_rate": 2.9392219291987315e-05, "loss": 0.08692288398742676, "step": 1139 }, { "epoch": 0.1541191381495564, "grad_norm": 0.7739888429641724, "learning_rate": 2.939028922304744e-05, "loss": 0.06455564498901367, "step": 1140 }, { "epoch": 0.1542543303760034, "grad_norm": 0.7813996076583862, "learning_rate": 2.9388356157972615e-05, "loss": 0.07198441028594971, "step": 1141 }, { "epoch": 0.15438952260245037, "grad_norm": 0.3456525206565857, "learning_rate": 2.938642009716531e-05, "loss": 0.0829610824584961, "step": 1142 }, { "epoch": 0.15452471482889735, "grad_norm": 0.3205811381340027, "learning_rate": 2.938448104102862e-05, "loss": 0.06907987594604492, "step": 1143 }, { "epoch": 0.15465990705534433, "grad_norm": 0.3885345160961151, "learning_rate": 2.9382538989966267e-05, "loss": 0.08256053924560547, "step": 1144 }, { "epoch": 0.1547950992817913, "grad_norm": 0.29711881279945374, "learning_rate": 2.9380593944382605e-05, "loss": 0.03909134864807129, "step": 1145 }, { "epoch": 0.15493029150823828, "grad_norm": 0.47773250937461853, "learning_rate": 2.9378645904682596e-05, "loss": 0.05340099334716797, "step": 1146 }, { "epoch": 0.15506548373468526, "grad_norm": 0.7147215604782104, "learning_rate": 2.937669487127183e-05, "loss": 0.07366132736206055, "step": 1147 }, { "epoch": 0.15520067596113224, "grad_norm": 0.7375534176826477, "learning_rate": 2.9374740844556532e-05, "loss": 0.08477926254272461, "step": 1148 }, { "epoch": 0.15533586818757922, "grad_norm": 0.4000887870788574, "learning_rate": 2.937278382494353e-05, "loss": 0.08346080780029297, "step": 1149 }, { "epoch": 0.1554710604140262, "grad_norm": 0.3509191870689392, "learning_rate": 2.9370823812840287e-05, "loss": 0.08596360683441162, "step": 1150 }, { "epoch": 0.15560625264047317, "grad_norm": 0.8168505430221558, "learning_rate": 2.93688608086549e-05, "loss": 0.07916259765625, "step": 1151 }, { "epoch": 0.15574144486692015, "grad_norm": 0.22475913166999817, "learning_rate": 2.9366894812796064e-05, "loss": 0.06238365173339844, "step": 1152 }, { "epoch": 0.15587663709336713, "grad_norm": 0.5967375040054321, "learning_rate": 2.9364925825673117e-05, "loss": 0.06840991973876953, "step": 1153 }, { "epoch": 0.1560118293198141, "grad_norm": 0.6725037693977356, "learning_rate": 2.9362953847696006e-05, "loss": 0.0830678939819336, "step": 1154 }, { "epoch": 0.1561470215462611, "grad_norm": 0.9402245283126831, "learning_rate": 2.9360978879275313e-05, "loss": 0.08952546119689941, "step": 1155 }, { "epoch": 0.15628221377270807, "grad_norm": 0.571772038936615, "learning_rate": 2.9359000920822237e-05, "loss": 0.06865835189819336, "step": 1156 }, { "epoch": 0.15641740599915505, "grad_norm": 0.5571675300598145, "learning_rate": 2.9357019972748594e-05, "loss": 0.06963920593261719, "step": 1157 }, { "epoch": 0.15655259822560202, "grad_norm": 0.4392727017402649, "learning_rate": 2.9355036035466836e-05, "loss": 0.0725712776184082, "step": 1158 }, { "epoch": 0.156687790452049, "grad_norm": 0.9154953956604004, "learning_rate": 2.935304910939002e-05, "loss": 0.08074283599853516, "step": 1159 }, { "epoch": 0.15682298267849598, "grad_norm": 1.5962560176849365, "learning_rate": 2.935105919493184e-05, "loss": 0.09226322174072266, "step": 1160 }, { "epoch": 0.15695817490494296, "grad_norm": 0.28233760595321655, "learning_rate": 2.9349066292506613e-05, "loss": 0.07190513610839844, "step": 1161 }, { "epoch": 0.15709336713138994, "grad_norm": 0.43321600556373596, "learning_rate": 2.934707040252926e-05, "loss": 0.08964920043945312, "step": 1162 }, { "epoch": 0.15722855935783692, "grad_norm": 0.37806591391563416, "learning_rate": 2.9345071525415342e-05, "loss": 0.10108518600463867, "step": 1163 }, { "epoch": 0.1573637515842839, "grad_norm": 0.6121034026145935, "learning_rate": 2.9343069661581035e-05, "loss": 0.0862569808959961, "step": 1164 }, { "epoch": 0.15749894381073087, "grad_norm": 1.3534986972808838, "learning_rate": 2.9341064811443138e-05, "loss": 0.10041403770446777, "step": 1165 }, { "epoch": 0.15763413603717785, "grad_norm": 0.6392109394073486, "learning_rate": 2.9339056975419078e-05, "loss": 0.0789952278137207, "step": 1166 }, { "epoch": 0.15776932826362483, "grad_norm": 1.1749188899993896, "learning_rate": 2.9337046153926882e-05, "loss": 0.10968422889709473, "step": 1167 }, { "epoch": 0.1579045204900718, "grad_norm": 0.2618032693862915, "learning_rate": 2.9335032347385224e-05, "loss": 0.04763603210449219, "step": 1168 }, { "epoch": 0.1580397127165188, "grad_norm": 0.8895334005355835, "learning_rate": 2.933301555621339e-05, "loss": 0.10931873321533203, "step": 1169 }, { "epoch": 0.15817490494296577, "grad_norm": 1.1451064348220825, "learning_rate": 2.933099578083128e-05, "loss": 0.08714914321899414, "step": 1170 }, { "epoch": 0.15831009716941277, "grad_norm": 1.0426045656204224, "learning_rate": 2.932897302165943e-05, "loss": 0.12275981903076172, "step": 1171 }, { "epoch": 0.15844528939585975, "grad_norm": 1.392737627029419, "learning_rate": 2.9326947279118983e-05, "loss": 0.08675193786621094, "step": 1172 }, { "epoch": 0.15858048162230673, "grad_norm": 0.6134676337242126, "learning_rate": 2.9324918553631716e-05, "loss": 0.06651067733764648, "step": 1173 }, { "epoch": 0.1587156738487537, "grad_norm": 0.4304104447364807, "learning_rate": 2.9322886845620013e-05, "loss": 0.07193470001220703, "step": 1174 }, { "epoch": 0.1588508660752007, "grad_norm": 0.47915732860565186, "learning_rate": 2.932085215550689e-05, "loss": 0.06539511680603027, "step": 1175 }, { "epoch": 0.15898605830164766, "grad_norm": 0.35044535994529724, "learning_rate": 2.9318814483715982e-05, "loss": 0.09577655792236328, "step": 1176 }, { "epoch": 0.15912125052809464, "grad_norm": 0.5289446115493774, "learning_rate": 2.9316773830671537e-05, "loss": 0.05567789077758789, "step": 1177 }, { "epoch": 0.15925644275454162, "grad_norm": 1.16676926612854, "learning_rate": 2.9314730196798437e-05, "loss": 0.07228612899780273, "step": 1178 }, { "epoch": 0.1593916349809886, "grad_norm": 0.33163681626319885, "learning_rate": 2.9312683582522178e-05, "loss": 0.06267356872558594, "step": 1179 }, { "epoch": 0.15952682720743558, "grad_norm": 0.47903668880462646, "learning_rate": 2.9310633988268868e-05, "loss": 0.07154417037963867, "step": 1180 }, { "epoch": 0.15966201943388256, "grad_norm": 0.445639431476593, "learning_rate": 2.9308581414465246e-05, "loss": 0.05153632164001465, "step": 1181 }, { "epoch": 0.15979721166032954, "grad_norm": 0.2278568148612976, "learning_rate": 2.9306525861538674e-05, "loss": 0.06707310676574707, "step": 1182 }, { "epoch": 0.15993240388677651, "grad_norm": 0.36722877621650696, "learning_rate": 2.9304467329917127e-05, "loss": 0.06810903549194336, "step": 1183 }, { "epoch": 0.1600675961132235, "grad_norm": 0.5402987599372864, "learning_rate": 2.9302405820029198e-05, "loss": 0.07645750045776367, "step": 1184 }, { "epoch": 0.16020278833967047, "grad_norm": 0.7956507205963135, "learning_rate": 2.9300341332304114e-05, "loss": 0.102630615234375, "step": 1185 }, { "epoch": 0.16033798056611745, "grad_norm": 0.47227296233177185, "learning_rate": 2.9298273867171697e-05, "loss": 0.0795888900756836, "step": 1186 }, { "epoch": 0.16047317279256443, "grad_norm": 0.48212334513664246, "learning_rate": 2.929620342506242e-05, "loss": 0.07121658325195312, "step": 1187 }, { "epoch": 0.1606083650190114, "grad_norm": 0.5086870789527893, "learning_rate": 2.929413000640735e-05, "loss": 0.07348346710205078, "step": 1188 }, { "epoch": 0.16074355724545838, "grad_norm": 0.40089017152786255, "learning_rate": 2.9292053611638187e-05, "loss": 0.06230783462524414, "step": 1189 }, { "epoch": 0.16087874947190536, "grad_norm": 0.7533932328224182, "learning_rate": 2.928997424118725e-05, "loss": 0.08992528915405273, "step": 1190 }, { "epoch": 0.16101394169835234, "grad_norm": 0.37295597791671753, "learning_rate": 2.928789189548747e-05, "loss": 0.09594249725341797, "step": 1191 }, { "epoch": 0.16114913392479932, "grad_norm": 0.5290022492408752, "learning_rate": 2.9285806574972405e-05, "loss": 0.08588194847106934, "step": 1192 }, { "epoch": 0.1612843261512463, "grad_norm": 0.3387167155742645, "learning_rate": 2.928371828007623e-05, "loss": 0.04946088790893555, "step": 1193 }, { "epoch": 0.16141951837769328, "grad_norm": 0.3090049624443054, "learning_rate": 2.928162701123374e-05, "loss": 0.06363749504089355, "step": 1194 }, { "epoch": 0.16155471060414026, "grad_norm": 0.7510243654251099, "learning_rate": 2.9279532768880345e-05, "loss": 0.08713436126708984, "step": 1195 }, { "epoch": 0.16168990283058723, "grad_norm": 0.2118089497089386, "learning_rate": 2.9277435553452084e-05, "loss": 0.04293251037597656, "step": 1196 }, { "epoch": 0.1618250950570342, "grad_norm": 0.2888762056827545, "learning_rate": 2.9275335365385602e-05, "loss": 0.07376575469970703, "step": 1197 }, { "epoch": 0.1619602872834812, "grad_norm": 0.5952282547950745, "learning_rate": 2.927323220511817e-05, "loss": 0.08271503448486328, "step": 1198 }, { "epoch": 0.16209547950992817, "grad_norm": 0.39375627040863037, "learning_rate": 2.9271126073087684e-05, "loss": 0.0668494701385498, "step": 1199 }, { "epoch": 0.16223067173637515, "grad_norm": 0.591256856918335, "learning_rate": 2.926901696973264e-05, "loss": 0.0479205846786499, "step": 1200 }, { "epoch": 0.16236586396282213, "grad_norm": 0.5941855311393738, "learning_rate": 2.9266904895492177e-05, "loss": 0.07880735397338867, "step": 1201 }, { "epoch": 0.1625010561892691, "grad_norm": 0.5759038329124451, "learning_rate": 2.926478985080603e-05, "loss": 0.07346010208129883, "step": 1202 }, { "epoch": 0.16263624841571608, "grad_norm": 0.39038926362991333, "learning_rate": 2.9262671836114568e-05, "loss": 0.05943942070007324, "step": 1203 }, { "epoch": 0.16277144064216306, "grad_norm": 0.5903465747833252, "learning_rate": 2.9260550851858774e-05, "loss": 0.07378315925598145, "step": 1204 }, { "epoch": 0.16290663286861007, "grad_norm": 0.5197759866714478, "learning_rate": 2.9258426898480243e-05, "loss": 0.09259724617004395, "step": 1205 }, { "epoch": 0.16304182509505705, "grad_norm": 0.46826356649398804, "learning_rate": 2.9256299976421198e-05, "loss": 0.07935118675231934, "step": 1206 }, { "epoch": 0.16317701732150403, "grad_norm": 0.39055630564689636, "learning_rate": 2.9254170086124474e-05, "loss": 0.07779121398925781, "step": 1207 }, { "epoch": 0.163312209547951, "grad_norm": 0.5419468283653259, "learning_rate": 2.9252037228033526e-05, "loss": 0.1074373722076416, "step": 1208 }, { "epoch": 0.16344740177439798, "grad_norm": 0.4405940771102905, "learning_rate": 2.9249901402592424e-05, "loss": 0.0735478401184082, "step": 1209 }, { "epoch": 0.16358259400084496, "grad_norm": 0.7484593987464905, "learning_rate": 2.9247762610245863e-05, "loss": 0.08356046676635742, "step": 1210 }, { "epoch": 0.16371778622729194, "grad_norm": 0.418267160654068, "learning_rate": 2.9245620851439146e-05, "loss": 0.04102802276611328, "step": 1211 }, { "epoch": 0.16385297845373892, "grad_norm": 0.3406740427017212, "learning_rate": 2.92434761266182e-05, "loss": 0.07135295867919922, "step": 1212 }, { "epoch": 0.1639881706801859, "grad_norm": 0.4251808524131775, "learning_rate": 2.924132843622957e-05, "loss": 0.07227134704589844, "step": 1213 }, { "epoch": 0.16412336290663287, "grad_norm": 0.8093040585517883, "learning_rate": 2.9239177780720418e-05, "loss": 0.079986572265625, "step": 1214 }, { "epoch": 0.16425855513307985, "grad_norm": 0.4330202639102936, "learning_rate": 2.923702416053852e-05, "loss": 0.08249950408935547, "step": 1215 }, { "epoch": 0.16439374735952683, "grad_norm": 0.2535877525806427, "learning_rate": 2.9234867576132268e-05, "loss": 0.0570220947265625, "step": 1216 }, { "epoch": 0.1645289395859738, "grad_norm": 0.5611778497695923, "learning_rate": 2.923270802795068e-05, "loss": 0.10200023651123047, "step": 1217 }, { "epoch": 0.1646641318124208, "grad_norm": 0.2409171760082245, "learning_rate": 2.9230545516443378e-05, "loss": 0.059731483459472656, "step": 1218 }, { "epoch": 0.16479932403886777, "grad_norm": 0.22265183925628662, "learning_rate": 2.9228380042060615e-05, "loss": 0.06401443481445312, "step": 1219 }, { "epoch": 0.16493451626531475, "grad_norm": 0.18647074699401855, "learning_rate": 2.9226211605253252e-05, "loss": 0.036158084869384766, "step": 1220 }, { "epoch": 0.16506970849176172, "grad_norm": 0.6751832365989685, "learning_rate": 2.922404020647277e-05, "loss": 0.055684566497802734, "step": 1221 }, { "epoch": 0.1652049007182087, "grad_norm": 0.5978150367736816, "learning_rate": 2.9221865846171264e-05, "loss": 0.07187366485595703, "step": 1222 }, { "epoch": 0.16534009294465568, "grad_norm": 0.23081234097480774, "learning_rate": 2.9219688524801446e-05, "loss": 0.04249095916748047, "step": 1223 }, { "epoch": 0.16547528517110266, "grad_norm": 0.2462223619222641, "learning_rate": 2.9217508242816653e-05, "loss": 0.05764305591583252, "step": 1224 }, { "epoch": 0.16561047739754964, "grad_norm": 0.2645401954650879, "learning_rate": 2.921532500067083e-05, "loss": 0.04351234436035156, "step": 1225 }, { "epoch": 0.16574566962399662, "grad_norm": 0.30472224950790405, "learning_rate": 2.9213138798818528e-05, "loss": 0.05962371826171875, "step": 1226 }, { "epoch": 0.1658808618504436, "grad_norm": 0.7413991093635559, "learning_rate": 2.921094963771494e-05, "loss": 0.08212137222290039, "step": 1227 }, { "epoch": 0.16601605407689057, "grad_norm": 0.5718117356300354, "learning_rate": 2.9208757517815855e-05, "loss": 0.07232332229614258, "step": 1228 }, { "epoch": 0.16615124630333755, "grad_norm": 0.5255526900291443, "learning_rate": 2.9206562439577684e-05, "loss": 0.06885433197021484, "step": 1229 }, { "epoch": 0.16628643852978453, "grad_norm": 0.2876903712749481, "learning_rate": 2.9204364403457452e-05, "loss": 0.07418346405029297, "step": 1230 }, { "epoch": 0.1664216307562315, "grad_norm": 0.577975332736969, "learning_rate": 2.9202163409912808e-05, "loss": 0.06223487854003906, "step": 1231 }, { "epoch": 0.1665568229826785, "grad_norm": 1.10220205783844, "learning_rate": 2.9199959459402003e-05, "loss": 0.08066082000732422, "step": 1232 }, { "epoch": 0.16669201520912547, "grad_norm": 0.407647967338562, "learning_rate": 2.919775255238392e-05, "loss": 0.07311725616455078, "step": 1233 }, { "epoch": 0.16682720743557244, "grad_norm": 0.6401172280311584, "learning_rate": 2.919554268931804e-05, "loss": 0.0740213394165039, "step": 1234 }, { "epoch": 0.16696239966201942, "grad_norm": 0.5067819356918335, "learning_rate": 2.9193329870664475e-05, "loss": 0.07566213607788086, "step": 1235 }, { "epoch": 0.1670975918884664, "grad_norm": 0.45964089035987854, "learning_rate": 2.9191114096883938e-05, "loss": 0.07619571685791016, "step": 1236 }, { "epoch": 0.16723278411491338, "grad_norm": 0.9318496584892273, "learning_rate": 2.9188895368437774e-05, "loss": 0.0842432975769043, "step": 1237 }, { "epoch": 0.16736797634136036, "grad_norm": 0.8267168998718262, "learning_rate": 2.9186673685787926e-05, "loss": 0.09326457977294922, "step": 1238 }, { "epoch": 0.16750316856780736, "grad_norm": 0.5304529666900635, "learning_rate": 2.918444904939697e-05, "loss": 0.08791971206665039, "step": 1239 }, { "epoch": 0.16763836079425434, "grad_norm": 0.4358448386192322, "learning_rate": 2.9182221459728078e-05, "loss": 0.07210993766784668, "step": 1240 }, { "epoch": 0.16777355302070132, "grad_norm": 0.5428539514541626, "learning_rate": 2.917999091724505e-05, "loss": 0.08456897735595703, "step": 1241 }, { "epoch": 0.1679087452471483, "grad_norm": 0.6845792531967163, "learning_rate": 2.9177757422412294e-05, "loss": 0.07977914810180664, "step": 1242 }, { "epoch": 0.16804393747359528, "grad_norm": 0.8218401670455933, "learning_rate": 2.917552097569484e-05, "loss": 0.09357595443725586, "step": 1243 }, { "epoch": 0.16817912970004226, "grad_norm": 0.5126569867134094, "learning_rate": 2.917328157755832e-05, "loss": 0.07925701141357422, "step": 1244 }, { "epoch": 0.16831432192648924, "grad_norm": 0.41536298394203186, "learning_rate": 2.9171039228469003e-05, "loss": 0.06977081298828125, "step": 1245 }, { "epoch": 0.16844951415293621, "grad_norm": 0.4484061598777771, "learning_rate": 2.9168793928893747e-05, "loss": 0.07013273239135742, "step": 1246 }, { "epoch": 0.1685847063793832, "grad_norm": 0.34745410084724426, "learning_rate": 2.9166545679300036e-05, "loss": 0.05951499938964844, "step": 1247 }, { "epoch": 0.16871989860583017, "grad_norm": 0.9694186449050903, "learning_rate": 2.9164294480155966e-05, "loss": 0.09905719757080078, "step": 1248 }, { "epoch": 0.16885509083227715, "grad_norm": 0.38170325756073, "learning_rate": 2.9162040331930256e-05, "loss": 0.0772395133972168, "step": 1249 }, { "epoch": 0.16899028305872413, "grad_norm": 0.47763752937316895, "learning_rate": 2.915978323509223e-05, "loss": 0.08076238632202148, "step": 1250 }, { "epoch": 0.1691254752851711, "grad_norm": 0.3137807250022888, "learning_rate": 2.915752319011182e-05, "loss": 0.07265853881835938, "step": 1251 }, { "epoch": 0.16926066751161808, "grad_norm": 0.22357763350009918, "learning_rate": 2.9155260197459588e-05, "loss": 0.049147605895996094, "step": 1252 }, { "epoch": 0.16939585973806506, "grad_norm": 0.29378217458724976, "learning_rate": 2.91529942576067e-05, "loss": 0.05040168762207031, "step": 1253 }, { "epoch": 0.16953105196451204, "grad_norm": 0.7124695777893066, "learning_rate": 2.915072537102493e-05, "loss": 0.09065961837768555, "step": 1254 }, { "epoch": 0.16966624419095902, "grad_norm": 0.44418132305145264, "learning_rate": 2.914845353818668e-05, "loss": 0.08211135864257812, "step": 1255 }, { "epoch": 0.169801436417406, "grad_norm": 0.3166307806968689, "learning_rate": 2.9146178759564953e-05, "loss": 0.07910871505737305, "step": 1256 }, { "epoch": 0.16993662864385298, "grad_norm": 0.42782777547836304, "learning_rate": 2.914390103563337e-05, "loss": 0.0905752182006836, "step": 1257 }, { "epoch": 0.17007182087029996, "grad_norm": 0.6515711545944214, "learning_rate": 2.914162036686617e-05, "loss": 0.11740922927856445, "step": 1258 }, { "epoch": 0.17020701309674693, "grad_norm": 0.44100797176361084, "learning_rate": 2.9139336753738196e-05, "loss": 0.06583881378173828, "step": 1259 }, { "epoch": 0.1703422053231939, "grad_norm": 0.6195662021636963, "learning_rate": 2.913705019672491e-05, "loss": 0.09023380279541016, "step": 1260 }, { "epoch": 0.1704773975496409, "grad_norm": 0.3901897370815277, "learning_rate": 2.9134760696302386e-05, "loss": 0.048100948333740234, "step": 1261 }, { "epoch": 0.17061258977608787, "grad_norm": 0.6318403482437134, "learning_rate": 2.9132468252947306e-05, "loss": 0.08879256248474121, "step": 1262 }, { "epoch": 0.17074778200253485, "grad_norm": 0.27793169021606445, "learning_rate": 2.9130172867136974e-05, "loss": 0.04077959060668945, "step": 1263 }, { "epoch": 0.17088297422898183, "grad_norm": 1.3737980127334595, "learning_rate": 2.91278745393493e-05, "loss": 0.11382007598876953, "step": 1264 }, { "epoch": 0.1710181664554288, "grad_norm": 0.8151664733886719, "learning_rate": 2.9125573270062812e-05, "loss": 0.0523991584777832, "step": 1265 }, { "epoch": 0.17115335868187578, "grad_norm": 0.40451499819755554, "learning_rate": 2.9123269059756634e-05, "loss": 0.04644775390625, "step": 1266 }, { "epoch": 0.17128855090832276, "grad_norm": 0.4913591742515564, "learning_rate": 2.9120961908910528e-05, "loss": 0.10492610931396484, "step": 1267 }, { "epoch": 0.17142374313476974, "grad_norm": 0.45436978340148926, "learning_rate": 2.911865181800485e-05, "loss": 0.05088472366333008, "step": 1268 }, { "epoch": 0.17155893536121672, "grad_norm": 0.5139414072036743, "learning_rate": 2.9116338787520577e-05, "loss": 0.059630393981933594, "step": 1269 }, { "epoch": 0.1716941275876637, "grad_norm": 1.0088963508605957, "learning_rate": 2.9114022817939283e-05, "loss": 0.0697479248046875, "step": 1270 }, { "epoch": 0.17182931981411068, "grad_norm": 0.6361703276634216, "learning_rate": 2.911170390974318e-05, "loss": 0.05156564712524414, "step": 1271 }, { "epoch": 0.17196451204055765, "grad_norm": 0.5383647084236145, "learning_rate": 2.9109382063415067e-05, "loss": 0.07959222793579102, "step": 1272 }, { "epoch": 0.17209970426700466, "grad_norm": 0.2548418641090393, "learning_rate": 2.9107057279438372e-05, "loss": 0.05634641647338867, "step": 1273 }, { "epoch": 0.17223489649345164, "grad_norm": 0.7601280808448792, "learning_rate": 2.910472955829712e-05, "loss": 0.07485723495483398, "step": 1274 }, { "epoch": 0.17237008871989862, "grad_norm": 0.819226086139679, "learning_rate": 2.9102398900475958e-05, "loss": 0.0662851333618164, "step": 1275 }, { "epoch": 0.1725052809463456, "grad_norm": 0.4801798462867737, "learning_rate": 2.910006530646014e-05, "loss": 0.06519842147827148, "step": 1276 }, { "epoch": 0.17264047317279257, "grad_norm": 0.7828125953674316, "learning_rate": 2.909772877673554e-05, "loss": 0.05161428451538086, "step": 1277 }, { "epoch": 0.17277566539923955, "grad_norm": 0.5340220332145691, "learning_rate": 2.9095389311788626e-05, "loss": 0.08268213272094727, "step": 1278 }, { "epoch": 0.17291085762568653, "grad_norm": 0.644839346408844, "learning_rate": 2.9093046912106494e-05, "loss": 0.1143331527709961, "step": 1279 }, { "epoch": 0.1730460498521335, "grad_norm": 0.6397328972816467, "learning_rate": 2.909070157817684e-05, "loss": 0.05923748016357422, "step": 1280 }, { "epoch": 0.1731812420785805, "grad_norm": 0.6796171069145203, "learning_rate": 2.9088353310487976e-05, "loss": 0.053072452545166016, "step": 1281 }, { "epoch": 0.17331643430502747, "grad_norm": 1.5975652933120728, "learning_rate": 2.9086002109528825e-05, "loss": 0.09544229507446289, "step": 1282 }, { "epoch": 0.17345162653147445, "grad_norm": 0.5910240411758423, "learning_rate": 2.908364797578892e-05, "loss": 0.04752826690673828, "step": 1283 }, { "epoch": 0.17358681875792142, "grad_norm": 0.37046805024147034, "learning_rate": 2.9081290909758405e-05, "loss": 0.06883811950683594, "step": 1284 }, { "epoch": 0.1737220109843684, "grad_norm": 0.9193475246429443, "learning_rate": 2.9078930911928033e-05, "loss": 0.09998798370361328, "step": 1285 }, { "epoch": 0.17385720321081538, "grad_norm": 0.3985954523086548, "learning_rate": 2.907656798278916e-05, "loss": 0.09383535385131836, "step": 1286 }, { "epoch": 0.17399239543726236, "grad_norm": 0.4473966956138611, "learning_rate": 2.9074202122833773e-05, "loss": 0.07605195045471191, "step": 1287 }, { "epoch": 0.17412758766370934, "grad_norm": 0.4351816177368164, "learning_rate": 2.907183333255445e-05, "loss": 0.09464550018310547, "step": 1288 }, { "epoch": 0.17426277989015632, "grad_norm": 1.3260295391082764, "learning_rate": 2.9069461612444384e-05, "loss": 0.1009683609008789, "step": 1289 }, { "epoch": 0.1743979721166033, "grad_norm": 1.2182968854904175, "learning_rate": 2.9067086962997385e-05, "loss": 0.07427072525024414, "step": 1290 }, { "epoch": 0.17453316434305027, "grad_norm": 0.37077826261520386, "learning_rate": 2.9064709384707868e-05, "loss": 0.056021690368652344, "step": 1291 }, { "epoch": 0.17466835656949725, "grad_norm": 1.2460367679595947, "learning_rate": 2.9062328878070855e-05, "loss": 0.15209388732910156, "step": 1292 }, { "epoch": 0.17480354879594423, "grad_norm": 0.9635239839553833, "learning_rate": 2.905994544358198e-05, "loss": 0.07224225997924805, "step": 1293 }, { "epoch": 0.1749387410223912, "grad_norm": 0.28591299057006836, "learning_rate": 2.9057559081737482e-05, "loss": 0.06308269500732422, "step": 1294 }, { "epoch": 0.1750739332488382, "grad_norm": 0.6258271932601929, "learning_rate": 2.9055169793034225e-05, "loss": 0.09959697723388672, "step": 1295 }, { "epoch": 0.17520912547528517, "grad_norm": 0.4155055284500122, "learning_rate": 2.9052777577969656e-05, "loss": 0.08299922943115234, "step": 1296 }, { "epoch": 0.17534431770173214, "grad_norm": 0.22354209423065186, "learning_rate": 2.9050382437041868e-05, "loss": 0.04102611541748047, "step": 1297 }, { "epoch": 0.17547950992817912, "grad_norm": 0.3546565771102905, "learning_rate": 2.9047984370749526e-05, "loss": 0.09758901596069336, "step": 1298 }, { "epoch": 0.1756147021546261, "grad_norm": 0.34732240438461304, "learning_rate": 2.9045583379591925e-05, "loss": 0.058268070220947266, "step": 1299 }, { "epoch": 0.17574989438107308, "grad_norm": 0.4467600882053375, "learning_rate": 2.9043179464068965e-05, "loss": 0.09261894226074219, "step": 1300 }, { "epoch": 0.17588508660752006, "grad_norm": 0.2954292297363281, "learning_rate": 2.9040772624681152e-05, "loss": 0.06338787078857422, "step": 1301 }, { "epoch": 0.17602027883396704, "grad_norm": 0.49611571431159973, "learning_rate": 2.9038362861929603e-05, "loss": 0.0854954719543457, "step": 1302 }, { "epoch": 0.17615547106041402, "grad_norm": 0.506494402885437, "learning_rate": 2.903595017631605e-05, "loss": 0.06277728080749512, "step": 1303 }, { "epoch": 0.176290663286861, "grad_norm": 0.20389805734157562, "learning_rate": 2.903353456834282e-05, "loss": 0.0464324951171875, "step": 1304 }, { "epoch": 0.17642585551330797, "grad_norm": 0.25398895144462585, "learning_rate": 2.903111603851285e-05, "loss": 0.06668472290039062, "step": 1305 }, { "epoch": 0.17656104773975495, "grad_norm": 0.2984888255596161, "learning_rate": 2.9028694587329704e-05, "loss": 0.07360184192657471, "step": 1306 }, { "epoch": 0.17669623996620196, "grad_norm": 0.14982306957244873, "learning_rate": 2.902627021529753e-05, "loss": 0.04499363899230957, "step": 1307 }, { "epoch": 0.17683143219264894, "grad_norm": 0.24880695343017578, "learning_rate": 2.9023842922921105e-05, "loss": 0.06262731552124023, "step": 1308 }, { "epoch": 0.1769666244190959, "grad_norm": 0.24707214534282684, "learning_rate": 2.90214127107058e-05, "loss": 0.07372760772705078, "step": 1309 }, { "epoch": 0.1771018166455429, "grad_norm": 0.5720853209495544, "learning_rate": 2.9018979579157592e-05, "loss": 0.09578704833984375, "step": 1310 }, { "epoch": 0.17723700887198987, "grad_norm": 0.3586277961730957, "learning_rate": 2.901654352878308e-05, "loss": 0.06875896453857422, "step": 1311 }, { "epoch": 0.17737220109843685, "grad_norm": 0.23985342681407928, "learning_rate": 2.9014104560089462e-05, "loss": 0.036717891693115234, "step": 1312 }, { "epoch": 0.17750739332488383, "grad_norm": 0.6026805639266968, "learning_rate": 2.9011662673584538e-05, "loss": 0.08232450485229492, "step": 1313 }, { "epoch": 0.1776425855513308, "grad_norm": 0.25450122356414795, "learning_rate": 2.900921786977673e-05, "loss": 0.07907867431640625, "step": 1314 }, { "epoch": 0.17777777777777778, "grad_norm": 0.6614646911621094, "learning_rate": 2.900677014917505e-05, "loss": 0.07966995239257812, "step": 1315 }, { "epoch": 0.17791297000422476, "grad_norm": 0.2466992437839508, "learning_rate": 2.9004319512289136e-05, "loss": 0.06562530994415283, "step": 1316 }, { "epoch": 0.17804816223067174, "grad_norm": 0.4602111279964447, "learning_rate": 2.9001865959629222e-05, "loss": 0.0880885124206543, "step": 1317 }, { "epoch": 0.17818335445711872, "grad_norm": 0.8172971606254578, "learning_rate": 2.8999409491706143e-05, "loss": 0.0840153694152832, "step": 1318 }, { "epoch": 0.1783185466835657, "grad_norm": 0.4828501343727112, "learning_rate": 2.8996950109031355e-05, "loss": 0.07391500473022461, "step": 1319 }, { "epoch": 0.17845373891001268, "grad_norm": 0.44382625818252563, "learning_rate": 2.8994487812116917e-05, "loss": 0.05877399444580078, "step": 1320 }, { "epoch": 0.17858893113645966, "grad_norm": 0.43092894554138184, "learning_rate": 2.8992022601475483e-05, "loss": 0.08002662658691406, "step": 1321 }, { "epoch": 0.17872412336290663, "grad_norm": 0.5200045704841614, "learning_rate": 2.8989554477620332e-05, "loss": 0.05827474594116211, "step": 1322 }, { "epoch": 0.1788593155893536, "grad_norm": 0.2672748863697052, "learning_rate": 2.8987083441065335e-05, "loss": 0.07176971435546875, "step": 1323 }, { "epoch": 0.1789945078158006, "grad_norm": 0.42366379499435425, "learning_rate": 2.8984609492324983e-05, "loss": 0.06212615966796875, "step": 1324 }, { "epoch": 0.17912970004224757, "grad_norm": 0.6650789976119995, "learning_rate": 2.8982132631914357e-05, "loss": 0.1201324462890625, "step": 1325 }, { "epoch": 0.17926489226869455, "grad_norm": 0.6314013004302979, "learning_rate": 2.8979652860349154e-05, "loss": 0.1195363998413086, "step": 1326 }, { "epoch": 0.17940008449514153, "grad_norm": 0.37290480732917786, "learning_rate": 2.8977170178145675e-05, "loss": 0.0666818618774414, "step": 1327 }, { "epoch": 0.1795352767215885, "grad_norm": 0.14347536861896515, "learning_rate": 2.8974684585820833e-05, "loss": 0.03762531280517578, "step": 1328 }, { "epoch": 0.17967046894803548, "grad_norm": 0.33331525325775146, "learning_rate": 2.8972196083892138e-05, "loss": 0.06098461151123047, "step": 1329 }, { "epoch": 0.17980566117448246, "grad_norm": 0.36663979291915894, "learning_rate": 2.8969704672877707e-05, "loss": 0.059617042541503906, "step": 1330 }, { "epoch": 0.17994085340092944, "grad_norm": 0.4675559997558594, "learning_rate": 2.896721035329627e-05, "loss": 0.07706928253173828, "step": 1331 }, { "epoch": 0.18007604562737642, "grad_norm": 0.23798486590385437, "learning_rate": 2.8964713125667153e-05, "loss": 0.03920292854309082, "step": 1332 }, { "epoch": 0.1802112378538234, "grad_norm": 0.643490731716156, "learning_rate": 2.8962212990510294e-05, "loss": 0.08657097816467285, "step": 1333 }, { "epoch": 0.18034643008027038, "grad_norm": 0.4887504577636719, "learning_rate": 2.8959709948346237e-05, "loss": 0.08656644821166992, "step": 1334 }, { "epoch": 0.18048162230671735, "grad_norm": 0.27658864855766296, "learning_rate": 2.8957203999696124e-05, "loss": 0.05988359451293945, "step": 1335 }, { "epoch": 0.18061681453316433, "grad_norm": 0.5200914740562439, "learning_rate": 2.8954695145081713e-05, "loss": 0.055371761322021484, "step": 1336 }, { "epoch": 0.1807520067596113, "grad_norm": 0.3747258484363556, "learning_rate": 2.8952183385025356e-05, "loss": 0.07856559753417969, "step": 1337 }, { "epoch": 0.1808871989860583, "grad_norm": 0.8470179438591003, "learning_rate": 2.8949668720050014e-05, "loss": 0.10687398910522461, "step": 1338 }, { "epoch": 0.18102239121250527, "grad_norm": 0.38438788056373596, "learning_rate": 2.8947151150679256e-05, "loss": 0.0556793212890625, "step": 1339 }, { "epoch": 0.18115758343895225, "grad_norm": 0.40534600615501404, "learning_rate": 2.8944630677437255e-05, "loss": 0.06749582290649414, "step": 1340 }, { "epoch": 0.18129277566539925, "grad_norm": 0.3961862325668335, "learning_rate": 2.8942107300848784e-05, "loss": 0.04695749282836914, "step": 1341 }, { "epoch": 0.18142796789184623, "grad_norm": 1.1106945276260376, "learning_rate": 2.8939581021439225e-05, "loss": 0.09221553802490234, "step": 1342 }, { "epoch": 0.1815631601182932, "grad_norm": 0.3141094446182251, "learning_rate": 2.8937051839734563e-05, "loss": 0.07662510871887207, "step": 1343 }, { "epoch": 0.1816983523447402, "grad_norm": 0.7614315152168274, "learning_rate": 2.8934519756261384e-05, "loss": 0.09467506408691406, "step": 1344 }, { "epoch": 0.18183354457118717, "grad_norm": 0.27528730034828186, "learning_rate": 2.8931984771546885e-05, "loss": 0.03645491600036621, "step": 1345 }, { "epoch": 0.18196873679763415, "grad_norm": 0.30013570189476013, "learning_rate": 2.8929446886118866e-05, "loss": 0.06853103637695312, "step": 1346 }, { "epoch": 0.18210392902408112, "grad_norm": 0.6001962423324585, "learning_rate": 2.892690610050572e-05, "loss": 0.08995652198791504, "step": 1347 }, { "epoch": 0.1822391212505281, "grad_norm": 0.2631484568119049, "learning_rate": 2.892436241523646e-05, "loss": 0.05412912368774414, "step": 1348 }, { "epoch": 0.18237431347697508, "grad_norm": 0.25234776735305786, "learning_rate": 2.8921815830840685e-05, "loss": 0.0598912239074707, "step": 1349 }, { "epoch": 0.18250950570342206, "grad_norm": 0.2799901068210602, "learning_rate": 2.891926634784862e-05, "loss": 0.07377505302429199, "step": 1350 }, { "epoch": 0.18264469792986904, "grad_norm": 0.33253398537635803, "learning_rate": 2.8916713966791076e-05, "loss": 0.0797414779663086, "step": 1351 }, { "epoch": 0.18277989015631602, "grad_norm": 0.3313521146774292, "learning_rate": 2.8914158688199464e-05, "loss": 0.0764913558959961, "step": 1352 }, { "epoch": 0.182915082382763, "grad_norm": 1.5569920539855957, "learning_rate": 2.891160051260582e-05, "loss": 0.08131265640258789, "step": 1353 }, { "epoch": 0.18305027460920997, "grad_norm": 0.662760317325592, "learning_rate": 2.8909039440542758e-05, "loss": 0.09003162384033203, "step": 1354 }, { "epoch": 0.18318546683565695, "grad_norm": 0.42132049798965454, "learning_rate": 2.890647547254352e-05, "loss": 0.0610203742980957, "step": 1355 }, { "epoch": 0.18332065906210393, "grad_norm": 0.369578093290329, "learning_rate": 2.8903908609141923e-05, "loss": 0.07361698150634766, "step": 1356 }, { "epoch": 0.1834558512885509, "grad_norm": 0.6707038283348083, "learning_rate": 2.8901338850872413e-05, "loss": 0.09276676177978516, "step": 1357 }, { "epoch": 0.1835910435149979, "grad_norm": 0.6248319149017334, "learning_rate": 2.8898766198270022e-05, "loss": 0.09302616119384766, "step": 1358 }, { "epoch": 0.18372623574144487, "grad_norm": 0.5640145540237427, "learning_rate": 2.8896190651870392e-05, "loss": 0.06710171699523926, "step": 1359 }, { "epoch": 0.18386142796789184, "grad_norm": 0.5199601650238037, "learning_rate": 2.8893612212209763e-05, "loss": 0.08800101280212402, "step": 1360 }, { "epoch": 0.18399662019433882, "grad_norm": 0.6198127269744873, "learning_rate": 2.8891030879824985e-05, "loss": 0.05260324478149414, "step": 1361 }, { "epoch": 0.1841318124207858, "grad_norm": 0.33992907404899597, "learning_rate": 2.88884466552535e-05, "loss": 0.05054807662963867, "step": 1362 }, { "epoch": 0.18426700464723278, "grad_norm": 0.43843114376068115, "learning_rate": 2.888585953903336e-05, "loss": 0.10338830947875977, "step": 1363 }, { "epoch": 0.18440219687367976, "grad_norm": 0.40042394399642944, "learning_rate": 2.888326953170321e-05, "loss": 0.06149578094482422, "step": 1364 }, { "epoch": 0.18453738910012674, "grad_norm": 0.8685281276702881, "learning_rate": 2.8880676633802314e-05, "loss": 0.07832813262939453, "step": 1365 }, { "epoch": 0.18467258132657371, "grad_norm": 0.1962835192680359, "learning_rate": 2.8878080845870522e-05, "loss": 0.04717904329299927, "step": 1366 }, { "epoch": 0.1848077735530207, "grad_norm": 0.3657790422439575, "learning_rate": 2.887548216844829e-05, "loss": 0.0577082633972168, "step": 1367 }, { "epoch": 0.18494296577946767, "grad_norm": 0.27183419466018677, "learning_rate": 2.8872880602076675e-05, "loss": 0.054390907287597656, "step": 1368 }, { "epoch": 0.18507815800591465, "grad_norm": 0.6341437697410583, "learning_rate": 2.8870276147297344e-05, "loss": 0.10355615615844727, "step": 1369 }, { "epoch": 0.18521335023236163, "grad_norm": 0.5695599317550659, "learning_rate": 2.8867668804652552e-05, "loss": 0.07115697860717773, "step": 1370 }, { "epoch": 0.1853485424588086, "grad_norm": 0.35412582755088806, "learning_rate": 2.886505857468516e-05, "loss": 0.06946957111358643, "step": 1371 }, { "epoch": 0.18548373468525559, "grad_norm": 0.6484407782554626, "learning_rate": 2.8862445457938642e-05, "loss": 0.08343744277954102, "step": 1372 }, { "epoch": 0.18561892691170256, "grad_norm": 0.2549974322319031, "learning_rate": 2.8859829454957053e-05, "loss": 0.08511996269226074, "step": 1373 }, { "epoch": 0.18575411913814954, "grad_norm": 0.24268680810928345, "learning_rate": 2.8857210566285062e-05, "loss": 0.0642235279083252, "step": 1374 }, { "epoch": 0.18588931136459655, "grad_norm": 0.3865174949169159, "learning_rate": 2.8854588792467932e-05, "loss": 0.08867883682250977, "step": 1375 }, { "epoch": 0.18602450359104353, "grad_norm": 1.0227934122085571, "learning_rate": 2.8851964134051535e-05, "loss": 0.08764886856079102, "step": 1376 }, { "epoch": 0.1861596958174905, "grad_norm": 0.9363678097724915, "learning_rate": 2.884933659158234e-05, "loss": 0.11895275115966797, "step": 1377 }, { "epoch": 0.18629488804393748, "grad_norm": 0.6783412098884583, "learning_rate": 2.8846706165607415e-05, "loss": 0.06989693641662598, "step": 1378 }, { "epoch": 0.18643008027038446, "grad_norm": 0.49864137172698975, "learning_rate": 2.8844072856674422e-05, "loss": 0.07854223251342773, "step": 1379 }, { "epoch": 0.18656527249683144, "grad_norm": 0.8806300759315491, "learning_rate": 2.8841436665331634e-05, "loss": 0.07428121566772461, "step": 1380 }, { "epoch": 0.18670046472327842, "grad_norm": 0.36095812916755676, "learning_rate": 2.8838797592127927e-05, "loss": 0.057088375091552734, "step": 1381 }, { "epoch": 0.1868356569497254, "grad_norm": 0.21287067234516144, "learning_rate": 2.883615563761276e-05, "loss": 0.056174278259277344, "step": 1382 }, { "epoch": 0.18697084917617238, "grad_norm": 0.7511873245239258, "learning_rate": 2.8833510802336206e-05, "loss": 0.1345386505126953, "step": 1383 }, { "epoch": 0.18710604140261936, "grad_norm": 0.5694673657417297, "learning_rate": 2.883086308684893e-05, "loss": 0.0859684944152832, "step": 1384 }, { "epoch": 0.18724123362906633, "grad_norm": 0.32220032811164856, "learning_rate": 2.882821249170221e-05, "loss": 0.09302234649658203, "step": 1385 }, { "epoch": 0.1873764258555133, "grad_norm": 0.6958578824996948, "learning_rate": 2.8825559017447905e-05, "loss": 0.08498334884643555, "step": 1386 }, { "epoch": 0.1875116180819603, "grad_norm": 0.43010157346725464, "learning_rate": 2.8822902664638487e-05, "loss": 0.08757972717285156, "step": 1387 }, { "epoch": 0.18764681030840727, "grad_norm": 0.5990293622016907, "learning_rate": 2.882024343382702e-05, "loss": 0.06763720512390137, "step": 1388 }, { "epoch": 0.18778200253485425, "grad_norm": 0.3788200616836548, "learning_rate": 2.8817581325567174e-05, "loss": 0.09396004676818848, "step": 1389 }, { "epoch": 0.18791719476130123, "grad_norm": 0.25358206033706665, "learning_rate": 2.8814916340413205e-05, "loss": 0.05852937698364258, "step": 1390 }, { "epoch": 0.1880523869877482, "grad_norm": 0.3131154179573059, "learning_rate": 2.881224847891999e-05, "loss": 0.0705723762512207, "step": 1391 }, { "epoch": 0.18818757921419518, "grad_norm": 0.3784100413322449, "learning_rate": 2.8809577741642987e-05, "loss": 0.09511137008666992, "step": 1392 }, { "epoch": 0.18832277144064216, "grad_norm": 0.22725166380405426, "learning_rate": 2.8806904129138255e-05, "loss": 0.0523834228515625, "step": 1393 }, { "epoch": 0.18845796366708914, "grad_norm": 0.49162107706069946, "learning_rate": 2.8804227641962457e-05, "loss": 0.08653831481933594, "step": 1394 }, { "epoch": 0.18859315589353612, "grad_norm": 0.30303943157196045, "learning_rate": 2.8801548280672847e-05, "loss": 0.04696464538574219, "step": 1395 }, { "epoch": 0.1887283481199831, "grad_norm": 0.2844439744949341, "learning_rate": 2.8798866045827288e-05, "loss": 0.061730384826660156, "step": 1396 }, { "epoch": 0.18886354034643008, "grad_norm": 0.21750833094120026, "learning_rate": 2.8796180937984234e-05, "loss": 0.04741954803466797, "step": 1397 }, { "epoch": 0.18899873257287705, "grad_norm": 0.359749436378479, "learning_rate": 2.8793492957702738e-05, "loss": 0.05258369445800781, "step": 1398 }, { "epoch": 0.18913392479932403, "grad_norm": 0.4648842215538025, "learning_rate": 2.8790802105542454e-05, "loss": 0.05172157287597656, "step": 1399 }, { "epoch": 0.189269117025771, "grad_norm": 0.6724797487258911, "learning_rate": 2.8788108382063628e-05, "loss": 0.08913755416870117, "step": 1400 }, { "epoch": 0.189404309252218, "grad_norm": 0.2939850687980652, "learning_rate": 2.878541178782711e-05, "loss": 0.06687545776367188, "step": 1401 }, { "epoch": 0.18953950147866497, "grad_norm": 1.0107295513153076, "learning_rate": 2.8782712323394344e-05, "loss": 0.10088801383972168, "step": 1402 }, { "epoch": 0.18967469370511195, "grad_norm": 0.5084041357040405, "learning_rate": 2.878000998932738e-05, "loss": 0.06899881362915039, "step": 1403 }, { "epoch": 0.18980988593155892, "grad_norm": 0.23239442706108093, "learning_rate": 2.8777304786188847e-05, "loss": 0.04545164108276367, "step": 1404 }, { "epoch": 0.1899450781580059, "grad_norm": 0.5677239894866943, "learning_rate": 2.8774596714541988e-05, "loss": 0.08278656005859375, "step": 1405 }, { "epoch": 0.19008027038445288, "grad_norm": 0.598884105682373, "learning_rate": 2.8771885774950637e-05, "loss": 0.05105018615722656, "step": 1406 }, { "epoch": 0.19021546261089986, "grad_norm": 0.4362331032752991, "learning_rate": 2.876917196797923e-05, "loss": 0.08715939521789551, "step": 1407 }, { "epoch": 0.19035065483734684, "grad_norm": 0.4212772846221924, "learning_rate": 2.876645529419279e-05, "loss": 0.10063362121582031, "step": 1408 }, { "epoch": 0.19048584706379385, "grad_norm": 0.28369805216789246, "learning_rate": 2.876373575415695e-05, "loss": 0.03939962387084961, "step": 1409 }, { "epoch": 0.19062103929024082, "grad_norm": 0.47209009528160095, "learning_rate": 2.8761013348437926e-05, "loss": 0.07767105102539062, "step": 1410 }, { "epoch": 0.1907562315166878, "grad_norm": 0.3788244426250458, "learning_rate": 2.875828807760254e-05, "loss": 0.09695816040039062, "step": 1411 }, { "epoch": 0.19089142374313478, "grad_norm": 0.39608728885650635, "learning_rate": 2.875555994221821e-05, "loss": 0.08797359466552734, "step": 1412 }, { "epoch": 0.19102661596958176, "grad_norm": 0.27540138363838196, "learning_rate": 2.8752828942852943e-05, "loss": 0.05115818977355957, "step": 1413 }, { "epoch": 0.19116180819602874, "grad_norm": 0.23669497668743134, "learning_rate": 2.875009508007535e-05, "loss": 0.05493497848510742, "step": 1414 }, { "epoch": 0.19129700042247572, "grad_norm": 0.40007221698760986, "learning_rate": 2.8747358354454642e-05, "loss": 0.07413768768310547, "step": 1415 }, { "epoch": 0.1914321926489227, "grad_norm": 0.4296001195907593, "learning_rate": 2.8744618766560614e-05, "loss": 0.10266685485839844, "step": 1416 }, { "epoch": 0.19156738487536967, "grad_norm": 0.500477135181427, "learning_rate": 2.8741876316963664e-05, "loss": 0.11459064483642578, "step": 1417 }, { "epoch": 0.19170257710181665, "grad_norm": 0.3222072124481201, "learning_rate": 2.873913100623478e-05, "loss": 0.06590056419372559, "step": 1418 }, { "epoch": 0.19183776932826363, "grad_norm": 0.5025795698165894, "learning_rate": 2.873638283494556e-05, "loss": 0.09156131744384766, "step": 1419 }, { "epoch": 0.1919729615547106, "grad_norm": 0.26444897055625916, "learning_rate": 2.8733631803668178e-05, "loss": 0.050261080265045166, "step": 1420 }, { "epoch": 0.1921081537811576, "grad_norm": 0.24569398164749146, "learning_rate": 2.8730877912975418e-05, "loss": 0.055072784423828125, "step": 1421 }, { "epoch": 0.19224334600760457, "grad_norm": 0.3167657256126404, "learning_rate": 2.8728121163440656e-05, "loss": 0.0781550407409668, "step": 1422 }, { "epoch": 0.19237853823405154, "grad_norm": 0.661824643611908, "learning_rate": 2.8725361555637863e-05, "loss": 0.06928849220275879, "step": 1423 }, { "epoch": 0.19251373046049852, "grad_norm": 0.373063325881958, "learning_rate": 2.8722599090141598e-05, "loss": 0.07135629653930664, "step": 1424 }, { "epoch": 0.1926489226869455, "grad_norm": 0.4678906500339508, "learning_rate": 2.8719833767527026e-05, "loss": 0.07494425773620605, "step": 1425 }, { "epoch": 0.19278411491339248, "grad_norm": 0.47001031041145325, "learning_rate": 2.8717065588369896e-05, "loss": 0.09882116317749023, "step": 1426 }, { "epoch": 0.19291930713983946, "grad_norm": 0.3049338757991791, "learning_rate": 2.871429455324657e-05, "loss": 0.04781341552734375, "step": 1427 }, { "epoch": 0.19305449936628644, "grad_norm": 0.5737128257751465, "learning_rate": 2.871152066273398e-05, "loss": 0.11860418319702148, "step": 1428 }, { "epoch": 0.19318969159273341, "grad_norm": 0.272352397441864, "learning_rate": 2.870874391740967e-05, "loss": 0.06917500495910645, "step": 1429 }, { "epoch": 0.1933248838191804, "grad_norm": 0.3731553852558136, "learning_rate": 2.8705964317851774e-05, "loss": 0.08531379699707031, "step": 1430 }, { "epoch": 0.19346007604562737, "grad_norm": 0.5229251980781555, "learning_rate": 2.8703181864639013e-05, "loss": 0.08930301666259766, "step": 1431 }, { "epoch": 0.19359526827207435, "grad_norm": 0.24590151011943817, "learning_rate": 2.870039655835072e-05, "loss": 0.06226539611816406, "step": 1432 }, { "epoch": 0.19373046049852133, "grad_norm": 0.4491427540779114, "learning_rate": 2.8697608399566796e-05, "loss": 0.060721397399902344, "step": 1433 }, { "epoch": 0.1938656527249683, "grad_norm": 0.4435080587863922, "learning_rate": 2.869481738886777e-05, "loss": 0.0764169692993164, "step": 1434 }, { "epoch": 0.19400084495141529, "grad_norm": 0.2470765858888626, "learning_rate": 2.8692023526834725e-05, "loss": 0.05698108673095703, "step": 1435 }, { "epoch": 0.19413603717786226, "grad_norm": 0.27826058864593506, "learning_rate": 2.8689226814049367e-05, "loss": 0.06575632095336914, "step": 1436 }, { "epoch": 0.19427122940430924, "grad_norm": 0.7404970526695251, "learning_rate": 2.868642725109399e-05, "loss": 0.1010732650756836, "step": 1437 }, { "epoch": 0.19440642163075622, "grad_norm": 0.648781955242157, "learning_rate": 2.868362483855147e-05, "loss": 0.09947490692138672, "step": 1438 }, { "epoch": 0.1945416138572032, "grad_norm": 0.4128603935241699, "learning_rate": 2.8680819577005295e-05, "loss": 0.07432889938354492, "step": 1439 }, { "epoch": 0.19467680608365018, "grad_norm": 0.5470665097236633, "learning_rate": 2.8678011467039526e-05, "loss": 0.0687570571899414, "step": 1440 }, { "epoch": 0.19481199831009716, "grad_norm": 0.32682204246520996, "learning_rate": 2.867520050923883e-05, "loss": 0.06057882308959961, "step": 1441 }, { "epoch": 0.19494719053654413, "grad_norm": 0.3395327627658844, "learning_rate": 2.8672386704188466e-05, "loss": 0.040506839752197266, "step": 1442 }, { "epoch": 0.19508238276299114, "grad_norm": 0.42658281326293945, "learning_rate": 2.8669570052474273e-05, "loss": 0.07293081283569336, "step": 1443 }, { "epoch": 0.19521757498943812, "grad_norm": 0.823521614074707, "learning_rate": 2.86667505546827e-05, "loss": 0.07235860824584961, "step": 1444 }, { "epoch": 0.1953527672158851, "grad_norm": 0.7458866238594055, "learning_rate": 2.866392821140079e-05, "loss": 0.08974456787109375, "step": 1445 }, { "epoch": 0.19548795944233208, "grad_norm": 0.37601667642593384, "learning_rate": 2.8661103023216154e-05, "loss": 0.061406612396240234, "step": 1446 }, { "epoch": 0.19562315166877906, "grad_norm": 0.333290696144104, "learning_rate": 2.8658274990717018e-05, "loss": 0.040827274322509766, "step": 1447 }, { "epoch": 0.19575834389522603, "grad_norm": 0.3625883162021637, "learning_rate": 2.86554441144922e-05, "loss": 0.08270835876464844, "step": 1448 }, { "epoch": 0.195893536121673, "grad_norm": 0.9148176312446594, "learning_rate": 2.8652610395131097e-05, "loss": 0.08868408203125, "step": 1449 }, { "epoch": 0.19602872834812, "grad_norm": 0.32081353664398193, "learning_rate": 2.8649773833223702e-05, "loss": 0.08452033996582031, "step": 1450 }, { "epoch": 0.19616392057456697, "grad_norm": 0.18311938643455505, "learning_rate": 2.8646934429360606e-05, "loss": 0.04708433151245117, "step": 1451 }, { "epoch": 0.19629911280101395, "grad_norm": 0.7025923728942871, "learning_rate": 2.8644092184132986e-05, "loss": 0.07661104202270508, "step": 1452 }, { "epoch": 0.19643430502746093, "grad_norm": 0.6821320652961731, "learning_rate": 2.864124709813262e-05, "loss": 0.1329793930053711, "step": 1453 }, { "epoch": 0.1965694972539079, "grad_norm": 0.19413632154464722, "learning_rate": 2.8638399171951856e-05, "loss": 0.04824209213256836, "step": 1454 }, { "epoch": 0.19670468948035488, "grad_norm": 0.36810097098350525, "learning_rate": 2.8635548406183664e-05, "loss": 0.06572818756103516, "step": 1455 }, { "epoch": 0.19683988170680186, "grad_norm": 0.35278984904289246, "learning_rate": 2.8632694801421576e-05, "loss": 0.055222511291503906, "step": 1456 }, { "epoch": 0.19697507393324884, "grad_norm": 0.7974708676338196, "learning_rate": 2.862983835825973e-05, "loss": 0.08074188232421875, "step": 1457 }, { "epoch": 0.19711026615969582, "grad_norm": 0.6072102785110474, "learning_rate": 2.8626979077292856e-05, "loss": 0.07314491271972656, "step": 1458 }, { "epoch": 0.1972454583861428, "grad_norm": 0.43032896518707275, "learning_rate": 2.862411695911627e-05, "loss": 0.07599592208862305, "step": 1459 }, { "epoch": 0.19738065061258978, "grad_norm": 0.17891408503055573, "learning_rate": 2.862125200432588e-05, "loss": 0.04436969757080078, "step": 1460 }, { "epoch": 0.19751584283903675, "grad_norm": 0.5310320258140564, "learning_rate": 2.8618384213518188e-05, "loss": 0.06681764125823975, "step": 1461 }, { "epoch": 0.19765103506548373, "grad_norm": 0.2765582799911499, "learning_rate": 2.861551358729028e-05, "loss": 0.06185102462768555, "step": 1462 }, { "epoch": 0.1977862272919307, "grad_norm": 0.4849787950515747, "learning_rate": 2.8612640126239836e-05, "loss": 0.05442094802856445, "step": 1463 }, { "epoch": 0.1979214195183777, "grad_norm": 0.937890350818634, "learning_rate": 2.8609763830965126e-05, "loss": 0.09646177291870117, "step": 1464 }, { "epoch": 0.19805661174482467, "grad_norm": 0.5472166538238525, "learning_rate": 2.860688470206501e-05, "loss": 0.08281135559082031, "step": 1465 }, { "epoch": 0.19819180397127165, "grad_norm": 0.4535694420337677, "learning_rate": 2.8604002740138936e-05, "loss": 0.10628128051757812, "step": 1466 }, { "epoch": 0.19832699619771862, "grad_norm": 0.3947764039039612, "learning_rate": 2.860111794578695e-05, "loss": 0.05362558364868164, "step": 1467 }, { "epoch": 0.1984621884241656, "grad_norm": 0.35460296273231506, "learning_rate": 2.8598230319609677e-05, "loss": 0.0586855411529541, "step": 1468 }, { "epoch": 0.19859738065061258, "grad_norm": 0.43101951479911804, "learning_rate": 2.8595339862208336e-05, "loss": 0.06691217422485352, "step": 1469 }, { "epoch": 0.19873257287705956, "grad_norm": 0.529268205165863, "learning_rate": 2.8592446574184733e-05, "loss": 0.0815577507019043, "step": 1470 }, { "epoch": 0.19886776510350654, "grad_norm": 0.6762465238571167, "learning_rate": 2.8589550456141274e-05, "loss": 0.11527824401855469, "step": 1471 }, { "epoch": 0.19900295732995352, "grad_norm": 0.661712110042572, "learning_rate": 2.8586651508680942e-05, "loss": 0.08480548858642578, "step": 1472 }, { "epoch": 0.1991381495564005, "grad_norm": 0.4358825385570526, "learning_rate": 2.8583749732407312e-05, "loss": 0.07483959197998047, "step": 1473 }, { "epoch": 0.19927334178284747, "grad_norm": 0.359188973903656, "learning_rate": 2.8580845127924546e-05, "loss": 0.08134269714355469, "step": 1474 }, { "epoch": 0.19940853400929445, "grad_norm": 0.35140374302864075, "learning_rate": 2.8577937695837408e-05, "loss": 0.05597877502441406, "step": 1475 }, { "epoch": 0.19954372623574146, "grad_norm": 0.7786426544189453, "learning_rate": 2.8575027436751235e-05, "loss": 0.06444978713989258, "step": 1476 }, { "epoch": 0.19967891846218844, "grad_norm": 0.2603908181190491, "learning_rate": 2.8572114351271955e-05, "loss": 0.06761407852172852, "step": 1477 }, { "epoch": 0.19981411068863542, "grad_norm": 0.7338619232177734, "learning_rate": 2.85691984400061e-05, "loss": 0.07612419128417969, "step": 1478 }, { "epoch": 0.1999493029150824, "grad_norm": 0.2574705183506012, "learning_rate": 2.8566279703560762e-05, "loss": 0.0436396598815918, "step": 1479 }, { "epoch": 0.20008449514152937, "grad_norm": 0.2783486545085907, "learning_rate": 2.8563358142543648e-05, "loss": 0.06040048599243164, "step": 1480 }, { "epoch": 0.20021968736797635, "grad_norm": 0.5714578628540039, "learning_rate": 2.856043375756304e-05, "loss": 0.08622550964355469, "step": 1481 }, { "epoch": 0.20035487959442333, "grad_norm": 0.6847553253173828, "learning_rate": 2.855750654922781e-05, "loss": 0.10153627395629883, "step": 1482 }, { "epoch": 0.2004900718208703, "grad_norm": 0.328132688999176, "learning_rate": 2.855457651814742e-05, "loss": 0.0824136734008789, "step": 1483 }, { "epoch": 0.2006252640473173, "grad_norm": 0.346091628074646, "learning_rate": 2.8551643664931916e-05, "loss": 0.08436298370361328, "step": 1484 }, { "epoch": 0.20076045627376427, "grad_norm": 0.47722601890563965, "learning_rate": 2.8548707990191933e-05, "loss": 0.10326790809631348, "step": 1485 }, { "epoch": 0.20089564850021124, "grad_norm": 0.41707944869995117, "learning_rate": 2.8545769494538698e-05, "loss": 0.06589221954345703, "step": 1486 }, { "epoch": 0.20103084072665822, "grad_norm": 0.5870721340179443, "learning_rate": 2.854282817858402e-05, "loss": 0.06289863586425781, "step": 1487 }, { "epoch": 0.2011660329531052, "grad_norm": 0.5807158350944519, "learning_rate": 2.85398840429403e-05, "loss": 0.11984395980834961, "step": 1488 }, { "epoch": 0.20130122517955218, "grad_norm": 0.4396728575229645, "learning_rate": 2.853693708822051e-05, "loss": 0.0967855453491211, "step": 1489 }, { "epoch": 0.20143641740599916, "grad_norm": 0.5051938891410828, "learning_rate": 2.8533987315038234e-05, "loss": 0.07933425903320312, "step": 1490 }, { "epoch": 0.20157160963244614, "grad_norm": 0.31764644384384155, "learning_rate": 2.8531034724007627e-05, "loss": 0.0799555778503418, "step": 1491 }, { "epoch": 0.20170680185889311, "grad_norm": 0.17717859148979187, "learning_rate": 2.8528079315743435e-05, "loss": 0.03476667404174805, "step": 1492 }, { "epoch": 0.2018419940853401, "grad_norm": 0.3987855613231659, "learning_rate": 2.852512109086099e-05, "loss": 0.06544375419616699, "step": 1493 }, { "epoch": 0.20197718631178707, "grad_norm": 0.6518162488937378, "learning_rate": 2.8522160049976208e-05, "loss": 0.0998842716217041, "step": 1494 }, { "epoch": 0.20211237853823405, "grad_norm": 0.5000519156455994, "learning_rate": 2.8519196193705595e-05, "loss": 0.05768442153930664, "step": 1495 }, { "epoch": 0.20224757076468103, "grad_norm": 0.6162581443786621, "learning_rate": 2.8516229522666243e-05, "loss": 0.0803002417087555, "step": 1496 }, { "epoch": 0.202382762991128, "grad_norm": 0.4996272623538971, "learning_rate": 2.8513260037475825e-05, "loss": 0.05600053071975708, "step": 1497 }, { "epoch": 0.20251795521757499, "grad_norm": 0.44334933161735535, "learning_rate": 2.8510287738752604e-05, "loss": 0.09145450592041016, "step": 1498 }, { "epoch": 0.20265314744402196, "grad_norm": 0.4339718520641327, "learning_rate": 2.8507312627115435e-05, "loss": 0.06299620866775513, "step": 1499 }, { "epoch": 0.20278833967046894, "grad_norm": 0.41716277599334717, "learning_rate": 2.850433470318374e-05, "loss": 0.052942752838134766, "step": 1500 }, { "epoch": 0.20292353189691592, "grad_norm": 0.4254150986671448, "learning_rate": 2.8501353967577556e-05, "loss": 0.07092094421386719, "step": 1501 }, { "epoch": 0.2030587241233629, "grad_norm": 0.314262330532074, "learning_rate": 2.8498370420917468e-05, "loss": 0.06371545791625977, "step": 1502 }, { "epoch": 0.20319391634980988, "grad_norm": 0.41127675771713257, "learning_rate": 2.8495384063824683e-05, "loss": 0.07239151000976562, "step": 1503 }, { "epoch": 0.20332910857625686, "grad_norm": 0.23433437943458557, "learning_rate": 2.8492394896920964e-05, "loss": 0.05965924263000488, "step": 1504 }, { "epoch": 0.20346430080270383, "grad_norm": 0.3009444773197174, "learning_rate": 2.848940292082868e-05, "loss": 0.06430244445800781, "step": 1505 }, { "epoch": 0.2035994930291508, "grad_norm": 0.48559582233428955, "learning_rate": 2.8486408136170772e-05, "loss": 0.0673685073852539, "step": 1506 }, { "epoch": 0.2037346852555978, "grad_norm": 0.443024218082428, "learning_rate": 2.8483410543570776e-05, "loss": 0.07785987854003906, "step": 1507 }, { "epoch": 0.20386987748204477, "grad_norm": 0.5127843022346497, "learning_rate": 2.8480410143652803e-05, "loss": 0.08401298522949219, "step": 1508 }, { "epoch": 0.20400506970849175, "grad_norm": 1.0754501819610596, "learning_rate": 2.8477406937041547e-05, "loss": 0.08812069892883301, "step": 1509 }, { "epoch": 0.20414026193493875, "grad_norm": 0.4283447563648224, "learning_rate": 2.8474400924362298e-05, "loss": 0.1021566390991211, "step": 1510 }, { "epoch": 0.20427545416138573, "grad_norm": 0.4467059373855591, "learning_rate": 2.847139210624092e-05, "loss": 0.05289793014526367, "step": 1511 }, { "epoch": 0.2044106463878327, "grad_norm": 0.46175262331962585, "learning_rate": 2.8468380483303873e-05, "loss": 0.08680331707000732, "step": 1512 }, { "epoch": 0.2045458386142797, "grad_norm": 0.7817302942276001, "learning_rate": 2.8465366056178183e-05, "loss": 0.056456804275512695, "step": 1513 }, { "epoch": 0.20468103084072667, "grad_norm": 0.6416090130805969, "learning_rate": 2.8462348825491475e-05, "loss": 0.06924200057983398, "step": 1514 }, { "epoch": 0.20481622306717365, "grad_norm": 0.4183678925037384, "learning_rate": 2.8459328791871953e-05, "loss": 0.10460662841796875, "step": 1515 }, { "epoch": 0.20495141529362063, "grad_norm": 0.4831472635269165, "learning_rate": 2.8456305955948402e-05, "loss": 0.06530570983886719, "step": 1516 }, { "epoch": 0.2050866075200676, "grad_norm": 0.5936373472213745, "learning_rate": 2.845328031835019e-05, "loss": 0.08108949661254883, "step": 1517 }, { "epoch": 0.20522179974651458, "grad_norm": 0.21675732731819153, "learning_rate": 2.8450251879707277e-05, "loss": 0.06800222396850586, "step": 1518 }, { "epoch": 0.20535699197296156, "grad_norm": 0.6820784211158752, "learning_rate": 2.8447220640650194e-05, "loss": 0.06974172592163086, "step": 1519 }, { "epoch": 0.20549218419940854, "grad_norm": 0.42916199564933777, "learning_rate": 2.8444186601810068e-05, "loss": 0.077606201171875, "step": 1520 }, { "epoch": 0.20562737642585552, "grad_norm": 0.5398802757263184, "learning_rate": 2.84411497638186e-05, "loss": 0.06264090538024902, "step": 1521 }, { "epoch": 0.2057625686523025, "grad_norm": 0.574612557888031, "learning_rate": 2.843811012730807e-05, "loss": 0.0909883975982666, "step": 1522 }, { "epoch": 0.20589776087874948, "grad_norm": 0.6658186912536621, "learning_rate": 2.8435067692911353e-05, "loss": 0.07992124557495117, "step": 1523 }, { "epoch": 0.20603295310519645, "grad_norm": 0.8445116877555847, "learning_rate": 2.8432022461261897e-05, "loss": 0.07997274398803711, "step": 1524 }, { "epoch": 0.20616814533164343, "grad_norm": 0.35964757204055786, "learning_rate": 2.8428974432993736e-05, "loss": 0.07648134231567383, "step": 1525 }, { "epoch": 0.2063033375580904, "grad_norm": 0.4669344127178192, "learning_rate": 2.8425923608741486e-05, "loss": 0.08966922760009766, "step": 1526 }, { "epoch": 0.2064385297845374, "grad_norm": 0.5233417749404907, "learning_rate": 2.8422869989140343e-05, "loss": 0.06952023506164551, "step": 1527 }, { "epoch": 0.20657372201098437, "grad_norm": 0.3246438205242157, "learning_rate": 2.8419813574826093e-05, "loss": 0.04368925094604492, "step": 1528 }, { "epoch": 0.20670891423743135, "grad_norm": 0.6084328889846802, "learning_rate": 2.8416754366435092e-05, "loss": 0.0888967514038086, "step": 1529 }, { "epoch": 0.20684410646387832, "grad_norm": 0.4416901767253876, "learning_rate": 2.8413692364604285e-05, "loss": 0.07677769660949707, "step": 1530 }, { "epoch": 0.2069792986903253, "grad_norm": 1.4339102506637573, "learning_rate": 2.8410627569971197e-05, "loss": 0.12319564819335938, "step": 1531 }, { "epoch": 0.20711449091677228, "grad_norm": 0.6371877193450928, "learning_rate": 2.8407559983173934e-05, "loss": 0.1146383285522461, "step": 1532 }, { "epoch": 0.20724968314321926, "grad_norm": 0.3475244641304016, "learning_rate": 2.8404489604851186e-05, "loss": 0.07754826545715332, "step": 1533 }, { "epoch": 0.20738487536966624, "grad_norm": 0.3506496250629425, "learning_rate": 2.840141643564222e-05, "loss": 0.055328369140625, "step": 1534 }, { "epoch": 0.20752006759611322, "grad_norm": 0.6898812055587769, "learning_rate": 2.8398340476186885e-05, "loss": 0.08753299713134766, "step": 1535 }, { "epoch": 0.2076552598225602, "grad_norm": 0.6379316449165344, "learning_rate": 2.8395261727125617e-05, "loss": 0.08071374893188477, "step": 1536 }, { "epoch": 0.20779045204900717, "grad_norm": 1.1470060348510742, "learning_rate": 2.8392180189099425e-05, "loss": 0.09624767303466797, "step": 1537 }, { "epoch": 0.20792564427545415, "grad_norm": 0.7213916182518005, "learning_rate": 2.83890958627499e-05, "loss": 0.07874691486358643, "step": 1538 }, { "epoch": 0.20806083650190113, "grad_norm": 0.25094074010849, "learning_rate": 2.8386008748719216e-05, "loss": 0.04964780807495117, "step": 1539 }, { "epoch": 0.2081960287283481, "grad_norm": 0.29679808020591736, "learning_rate": 2.838291884765013e-05, "loss": 0.07548999786376953, "step": 1540 }, { "epoch": 0.2083312209547951, "grad_norm": 0.6929604411125183, "learning_rate": 2.8379826160185975e-05, "loss": 0.09263134002685547, "step": 1541 }, { "epoch": 0.20846641318124207, "grad_norm": 0.5786704421043396, "learning_rate": 2.8376730686970664e-05, "loss": 0.0851593017578125, "step": 1542 }, { "epoch": 0.20860160540768904, "grad_norm": 0.1921105831861496, "learning_rate": 2.8373632428648683e-05, "loss": 0.054320335388183594, "step": 1543 }, { "epoch": 0.20873679763413605, "grad_norm": 0.3362789750099182, "learning_rate": 2.8370531385865124e-05, "loss": 0.05617380142211914, "step": 1544 }, { "epoch": 0.20887198986058303, "grad_norm": 0.5695424675941467, "learning_rate": 2.8367427559265622e-05, "loss": 0.05769157409667969, "step": 1545 }, { "epoch": 0.20900718208703, "grad_norm": 0.3748445212841034, "learning_rate": 2.836432094949642e-05, "loss": 0.08294010162353516, "step": 1546 }, { "epoch": 0.209142374313477, "grad_norm": 0.3538089096546173, "learning_rate": 2.836121155720433e-05, "loss": 0.06462764739990234, "step": 1547 }, { "epoch": 0.20927756653992396, "grad_norm": 0.42894431948661804, "learning_rate": 2.8358099383036745e-05, "loss": 0.0638885498046875, "step": 1548 }, { "epoch": 0.20941275876637094, "grad_norm": 0.47183895111083984, "learning_rate": 2.8354984427641634e-05, "loss": 0.06286382675170898, "step": 1549 }, { "epoch": 0.20954795099281792, "grad_norm": 0.2746015191078186, "learning_rate": 2.8351866691667544e-05, "loss": 0.0701746940612793, "step": 1550 }, { "epoch": 0.2096831432192649, "grad_norm": 0.28003767132759094, "learning_rate": 2.8348746175763613e-05, "loss": 0.0535731315612793, "step": 1551 }, { "epoch": 0.20981833544571188, "grad_norm": 0.4062836468219757, "learning_rate": 2.8345622880579537e-05, "loss": 0.07574129104614258, "step": 1552 }, { "epoch": 0.20995352767215886, "grad_norm": 0.6867547035217285, "learning_rate": 2.8342496806765615e-05, "loss": 0.08504390716552734, "step": 1553 }, { "epoch": 0.21008871989860584, "grad_norm": 0.14522002637386322, "learning_rate": 2.833936795497271e-05, "loss": 0.047537803649902344, "step": 1554 }, { "epoch": 0.21022391212505281, "grad_norm": 0.47608858346939087, "learning_rate": 2.8336236325852257e-05, "loss": 0.07120323181152344, "step": 1555 }, { "epoch": 0.2103591043514998, "grad_norm": 0.4264245927333832, "learning_rate": 2.8333101920056285e-05, "loss": 0.055731892585754395, "step": 1556 }, { "epoch": 0.21049429657794677, "grad_norm": 0.6420731544494629, "learning_rate": 2.8329964738237392e-05, "loss": 0.08244848251342773, "step": 1557 }, { "epoch": 0.21062948880439375, "grad_norm": 0.534919023513794, "learning_rate": 2.8326824781048756e-05, "loss": 0.09705448150634766, "step": 1558 }, { "epoch": 0.21076468103084073, "grad_norm": 0.9707580804824829, "learning_rate": 2.8323682049144135e-05, "loss": 0.09867000579833984, "step": 1559 }, { "epoch": 0.2108998732572877, "grad_norm": 0.37556371092796326, "learning_rate": 2.832053654317786e-05, "loss": 0.06475949287414551, "step": 1560 }, { "epoch": 0.21103506548373469, "grad_norm": 0.7855553030967712, "learning_rate": 2.8317388263804842e-05, "loss": 0.11359071731567383, "step": 1561 }, { "epoch": 0.21117025771018166, "grad_norm": 0.3888871669769287, "learning_rate": 2.8314237211680573e-05, "loss": 0.0841970443725586, "step": 1562 }, { "epoch": 0.21130544993662864, "grad_norm": 0.495535671710968, "learning_rate": 2.8311083387461118e-05, "loss": 0.07243537902832031, "step": 1563 }, { "epoch": 0.21144064216307562, "grad_norm": 0.18253490328788757, "learning_rate": 2.8307926791803114e-05, "loss": 0.05575847625732422, "step": 1564 }, { "epoch": 0.2115758343895226, "grad_norm": 0.5124557614326477, "learning_rate": 2.8304767425363785e-05, "loss": 0.07570600509643555, "step": 1565 }, { "epoch": 0.21171102661596958, "grad_norm": 0.28501805663108826, "learning_rate": 2.830160528880093e-05, "loss": 0.0680689811706543, "step": 1566 }, { "epoch": 0.21184621884241656, "grad_norm": 0.3124721646308899, "learning_rate": 2.829844038277292e-05, "loss": 0.07054710388183594, "step": 1567 }, { "epoch": 0.21198141106886353, "grad_norm": 0.35767754912376404, "learning_rate": 2.8295272707938706e-05, "loss": 0.07143974304199219, "step": 1568 }, { "epoch": 0.2121166032953105, "grad_norm": 0.6448807716369629, "learning_rate": 2.8292102264957817e-05, "loss": 0.0975027084350586, "step": 1569 }, { "epoch": 0.2122517955217575, "grad_norm": 0.5385596752166748, "learning_rate": 2.8288929054490357e-05, "loss": 0.08779716491699219, "step": 1570 }, { "epoch": 0.21238698774820447, "grad_norm": 0.35227349400520325, "learning_rate": 2.8285753077196998e-05, "loss": 0.0719599723815918, "step": 1571 }, { "epoch": 0.21252217997465145, "grad_norm": 0.4539269208908081, "learning_rate": 2.8282574333739006e-05, "loss": 0.06339693069458008, "step": 1572 }, { "epoch": 0.21265737220109843, "grad_norm": 0.8397480249404907, "learning_rate": 2.8279392824778197e-05, "loss": 0.0697946548461914, "step": 1573 }, { "epoch": 0.2127925644275454, "grad_norm": 0.6460810899734497, "learning_rate": 2.8276208550976993e-05, "loss": 0.06089729070663452, "step": 1574 }, { "epoch": 0.21292775665399238, "grad_norm": 0.4288603961467743, "learning_rate": 2.8273021512998372e-05, "loss": 0.087799072265625, "step": 1575 }, { "epoch": 0.21306294888043936, "grad_norm": 0.8934928774833679, "learning_rate": 2.826983171150589e-05, "loss": 0.07850122451782227, "step": 1576 }, { "epoch": 0.21319814110688634, "grad_norm": 0.47060516476631165, "learning_rate": 2.826663914716368e-05, "loss": 0.0952754020690918, "step": 1577 }, { "epoch": 0.21333333333333335, "grad_norm": 0.39937278628349304, "learning_rate": 2.826344382063646e-05, "loss": 0.07137489318847656, "step": 1578 }, { "epoch": 0.21346852555978033, "grad_norm": 0.8838233351707458, "learning_rate": 2.8260245732589503e-05, "loss": 0.08221101760864258, "step": 1579 }, { "epoch": 0.2136037177862273, "grad_norm": 0.6140885353088379, "learning_rate": 2.8257044883688672e-05, "loss": 0.06524944305419922, "step": 1580 }, { "epoch": 0.21373891001267428, "grad_norm": 0.33716484904289246, "learning_rate": 2.82538412746004e-05, "loss": 0.06624317169189453, "step": 1581 }, { "epoch": 0.21387410223912126, "grad_norm": 0.35295796394348145, "learning_rate": 2.8250634905991695e-05, "loss": 0.0657663345336914, "step": 1582 }, { "epoch": 0.21400929446556824, "grad_norm": 0.2585761845111847, "learning_rate": 2.824742577853015e-05, "loss": 0.046559810638427734, "step": 1583 }, { "epoch": 0.21414448669201522, "grad_norm": 0.34351542592048645, "learning_rate": 2.8244213892883907e-05, "loss": 0.07097244262695312, "step": 1584 }, { "epoch": 0.2142796789184622, "grad_norm": 0.20833387970924377, "learning_rate": 2.82409992497217e-05, "loss": 0.045954227447509766, "step": 1585 }, { "epoch": 0.21441487114490917, "grad_norm": 0.46430692076683044, "learning_rate": 2.8237781849712852e-05, "loss": 0.07614660263061523, "step": 1586 }, { "epoch": 0.21455006337135615, "grad_norm": 0.27918508648872375, "learning_rate": 2.8234561693527222e-05, "loss": 0.054819583892822266, "step": 1587 }, { "epoch": 0.21468525559780313, "grad_norm": 0.412960410118103, "learning_rate": 2.8231338781835275e-05, "loss": 0.07755041122436523, "step": 1588 }, { "epoch": 0.2148204478242501, "grad_norm": 0.4582512378692627, "learning_rate": 2.8228113115308032e-05, "loss": 0.10426759719848633, "step": 1589 }, { "epoch": 0.2149556400506971, "grad_norm": 0.3591464161872864, "learning_rate": 2.82248846946171e-05, "loss": 0.06746578216552734, "step": 1590 }, { "epoch": 0.21509083227714407, "grad_norm": 0.5530175566673279, "learning_rate": 2.822165352043465e-05, "loss": 0.06784582138061523, "step": 1591 }, { "epoch": 0.21522602450359105, "grad_norm": 0.8045269846916199, "learning_rate": 2.8218419593433437e-05, "loss": 0.08627581596374512, "step": 1592 }, { "epoch": 0.21536121673003802, "grad_norm": 0.20450256764888763, "learning_rate": 2.8215182914286768e-05, "loss": 0.0470733642578125, "step": 1593 }, { "epoch": 0.215496408956485, "grad_norm": 0.38736405968666077, "learning_rate": 2.8211943483668546e-05, "loss": 0.08690547943115234, "step": 1594 }, { "epoch": 0.21563160118293198, "grad_norm": 0.15070442855358124, "learning_rate": 2.8208701302253237e-05, "loss": 0.04215669631958008, "step": 1595 }, { "epoch": 0.21576679340937896, "grad_norm": 0.4521065354347229, "learning_rate": 2.820545637071588e-05, "loss": 0.08721351623535156, "step": 1596 }, { "epoch": 0.21590198563582594, "grad_norm": 0.3432156443595886, "learning_rate": 2.8202208689732083e-05, "loss": 0.056870460510253906, "step": 1597 }, { "epoch": 0.21603717786227292, "grad_norm": 0.39810627698898315, "learning_rate": 2.819895825997804e-05, "loss": 0.03982114791870117, "step": 1598 }, { "epoch": 0.2161723700887199, "grad_norm": 0.4973376393318176, "learning_rate": 2.81957050821305e-05, "loss": 0.07609415054321289, "step": 1599 }, { "epoch": 0.21630756231516687, "grad_norm": 0.6641979217529297, "learning_rate": 2.8192449156866787e-05, "loss": 0.09238433837890625, "step": 1600 }, { "epoch": 0.21644275454161385, "grad_norm": 0.7539771795272827, "learning_rate": 2.8189190484864814e-05, "loss": 0.09517097473144531, "step": 1601 }, { "epoch": 0.21657794676806083, "grad_norm": 0.3724718987941742, "learning_rate": 2.8185929066803052e-05, "loss": 0.05844688415527344, "step": 1602 }, { "epoch": 0.2167131389945078, "grad_norm": 0.37707340717315674, "learning_rate": 2.818266490336054e-05, "loss": 0.06653022766113281, "step": 1603 }, { "epoch": 0.2168483312209548, "grad_norm": 0.3917069435119629, "learning_rate": 2.817939799521689e-05, "loss": 0.07715415954589844, "step": 1604 }, { "epoch": 0.21698352344740177, "grad_norm": 0.4703545570373535, "learning_rate": 2.8176128343052304e-05, "loss": 0.07887458801269531, "step": 1605 }, { "epoch": 0.21711871567384874, "grad_norm": 0.5951199531555176, "learning_rate": 2.817285594754753e-05, "loss": 0.10241222381591797, "step": 1606 }, { "epoch": 0.21725390790029572, "grad_norm": 0.28272005915641785, "learning_rate": 2.8169580809383902e-05, "loss": 0.06273269653320312, "step": 1607 }, { "epoch": 0.2173891001267427, "grad_norm": 0.3859010636806488, "learning_rate": 2.8166302929243326e-05, "loss": 0.07463669776916504, "step": 1608 }, { "epoch": 0.21752429235318968, "grad_norm": 0.4278121888637543, "learning_rate": 2.8163022307808264e-05, "loss": 0.05964255332946777, "step": 1609 }, { "epoch": 0.21765948457963666, "grad_norm": 0.8931379318237305, "learning_rate": 2.8159738945761764e-05, "loss": 0.09549093246459961, "step": 1610 }, { "epoch": 0.21779467680608364, "grad_norm": 0.41075560450553894, "learning_rate": 2.8156452843787438e-05, "loss": 0.06782150268554688, "step": 1611 }, { "epoch": 0.21792986903253064, "grad_norm": 0.3972751498222351, "learning_rate": 2.815316400256947e-05, "loss": 0.10162878036499023, "step": 1612 }, { "epoch": 0.21806506125897762, "grad_norm": 0.5238962173461914, "learning_rate": 2.814987242279262e-05, "loss": 0.08918380737304688, "step": 1613 }, { "epoch": 0.2182002534854246, "grad_norm": 0.5222808122634888, "learning_rate": 2.8146578105142202e-05, "loss": 0.09671163558959961, "step": 1614 }, { "epoch": 0.21833544571187158, "grad_norm": 0.3641374111175537, "learning_rate": 2.814328105030412e-05, "loss": 0.06192207336425781, "step": 1615 }, { "epoch": 0.21847063793831856, "grad_norm": 0.2877354621887207, "learning_rate": 2.8139981258964836e-05, "loss": 0.0652461051940918, "step": 1616 }, { "epoch": 0.21860583016476554, "grad_norm": 0.4902604818344116, "learning_rate": 2.8136678731811385e-05, "loss": 0.053981781005859375, "step": 1617 }, { "epoch": 0.21874102239121251, "grad_norm": 0.6506996750831604, "learning_rate": 2.8133373469531362e-05, "loss": 0.0872354507446289, "step": 1618 }, { "epoch": 0.2188762146176595, "grad_norm": 0.22775883972644806, "learning_rate": 2.8130065472812952e-05, "loss": 0.0394824743270874, "step": 1619 }, { "epoch": 0.21901140684410647, "grad_norm": 0.20165197551250458, "learning_rate": 2.812675474234489e-05, "loss": 0.04616832733154297, "step": 1620 }, { "epoch": 0.21914659907055345, "grad_norm": 0.6121368408203125, "learning_rate": 2.812344127881649e-05, "loss": 0.1114950180053711, "step": 1621 }, { "epoch": 0.21928179129700043, "grad_norm": 0.844108521938324, "learning_rate": 2.8120125082917638e-05, "loss": 0.07841968536376953, "step": 1622 }, { "epoch": 0.2194169835234474, "grad_norm": 0.48570796847343445, "learning_rate": 2.8116806155338773e-05, "loss": 0.07401180267333984, "step": 1623 }, { "epoch": 0.21955217574989438, "grad_norm": 0.6665063500404358, "learning_rate": 2.8113484496770923e-05, "loss": 0.05527907609939575, "step": 1624 }, { "epoch": 0.21968736797634136, "grad_norm": 1.430572748184204, "learning_rate": 2.811016010790567e-05, "loss": 0.095458984375, "step": 1625 }, { "epoch": 0.21982256020278834, "grad_norm": 0.8118380308151245, "learning_rate": 2.8106832989435165e-05, "loss": 0.0848240852355957, "step": 1626 }, { "epoch": 0.21995775242923532, "grad_norm": 0.3135979175567627, "learning_rate": 2.8103503142052146e-05, "loss": 0.0822296142578125, "step": 1627 }, { "epoch": 0.2200929446556823, "grad_norm": 0.32224681973457336, "learning_rate": 2.8100170566449892e-05, "loss": 0.06378364562988281, "step": 1628 }, { "epoch": 0.22022813688212928, "grad_norm": 0.4593490660190582, "learning_rate": 2.8096835263322266e-05, "loss": 0.0670623779296875, "step": 1629 }, { "epoch": 0.22036332910857626, "grad_norm": 0.3958438038825989, "learning_rate": 2.8093497233363702e-05, "loss": 0.0702054500579834, "step": 1630 }, { "epoch": 0.22049852133502323, "grad_norm": 0.44892239570617676, "learning_rate": 2.8090156477269185e-05, "loss": 0.054297685623168945, "step": 1631 }, { "epoch": 0.2206337135614702, "grad_norm": 0.6927587985992432, "learning_rate": 2.808681299573429e-05, "loss": 0.0662841796875, "step": 1632 }, { "epoch": 0.2207689057879172, "grad_norm": 0.444137305021286, "learning_rate": 2.8083466789455137e-05, "loss": 0.07687711715698242, "step": 1633 }, { "epoch": 0.22090409801436417, "grad_norm": 0.39730507135391235, "learning_rate": 2.808011785912843e-05, "loss": 0.0796651840209961, "step": 1634 }, { "epoch": 0.22103929024081115, "grad_norm": 0.2972898483276367, "learning_rate": 2.8076766205451435e-05, "loss": 0.055123090744018555, "step": 1635 }, { "epoch": 0.22117448246725813, "grad_norm": 0.18792884051799774, "learning_rate": 2.8073411829121983e-05, "loss": 0.0420079231262207, "step": 1636 }, { "epoch": 0.2213096746937051, "grad_norm": 0.36996304988861084, "learning_rate": 2.8070054730838467e-05, "loss": 0.0798807144165039, "step": 1637 }, { "epoch": 0.22144486692015208, "grad_norm": 0.4978063702583313, "learning_rate": 2.8066694911299865e-05, "loss": 0.1259927749633789, "step": 1638 }, { "epoch": 0.22158005914659906, "grad_norm": 0.5838686227798462, "learning_rate": 2.8063332371205698e-05, "loss": 0.08549308776855469, "step": 1639 }, { "epoch": 0.22171525137304604, "grad_norm": 0.36738187074661255, "learning_rate": 2.8059967111256072e-05, "loss": 0.05120038986206055, "step": 1640 }, { "epoch": 0.22185044359949302, "grad_norm": 0.2991965413093567, "learning_rate": 2.8056599132151647e-05, "loss": 0.07931041717529297, "step": 1641 }, { "epoch": 0.22198563582594, "grad_norm": 0.4698663055896759, "learning_rate": 2.8053228434593656e-05, "loss": 0.06342899799346924, "step": 1642 }, { "epoch": 0.22212082805238698, "grad_norm": 0.4911721348762512, "learning_rate": 2.8049855019283895e-05, "loss": 0.09208106994628906, "step": 1643 }, { "epoch": 0.22225602027883395, "grad_norm": 0.4793623089790344, "learning_rate": 2.8046478886924736e-05, "loss": 0.06871199607849121, "step": 1644 }, { "epoch": 0.22239121250528093, "grad_norm": 0.28812339901924133, "learning_rate": 2.804310003821909e-05, "loss": 0.0816950798034668, "step": 1645 }, { "epoch": 0.22252640473172794, "grad_norm": 0.515812337398529, "learning_rate": 2.8039718473870473e-05, "loss": 0.09521055221557617, "step": 1646 }, { "epoch": 0.22266159695817492, "grad_norm": 0.3579423725605011, "learning_rate": 2.8036334194582924e-05, "loss": 0.06319046020507812, "step": 1647 }, { "epoch": 0.2227967891846219, "grad_norm": 0.2613706588745117, "learning_rate": 2.8032947201061084e-05, "loss": 0.052468299865722656, "step": 1648 }, { "epoch": 0.22293198141106887, "grad_norm": 0.54111647605896, "learning_rate": 2.8029557494010132e-05, "loss": 0.0862126350402832, "step": 1649 }, { "epoch": 0.22306717363751585, "grad_norm": 0.5427489876747131, "learning_rate": 2.802616507413583e-05, "loss": 0.07244467735290527, "step": 1650 }, { "epoch": 0.22320236586396283, "grad_norm": 0.3109368681907654, "learning_rate": 2.8022769942144492e-05, "loss": 0.08296823501586914, "step": 1651 }, { "epoch": 0.2233375580904098, "grad_norm": 0.3801068961620331, "learning_rate": 2.801937209874301e-05, "loss": 0.08475017547607422, "step": 1652 }, { "epoch": 0.2234727503168568, "grad_norm": 0.30102646350860596, "learning_rate": 2.8015971544638832e-05, "loss": 0.05987656116485596, "step": 1653 }, { "epoch": 0.22360794254330377, "grad_norm": 0.42254638671875, "learning_rate": 2.8012568280539964e-05, "loss": 0.0904998779296875, "step": 1654 }, { "epoch": 0.22374313476975075, "grad_norm": 0.22949378192424774, "learning_rate": 2.800916230715499e-05, "loss": 0.0606616735458374, "step": 1655 }, { "epoch": 0.22387832699619772, "grad_norm": 0.9997631907463074, "learning_rate": 2.800575362519305e-05, "loss": 0.08878564834594727, "step": 1656 }, { "epoch": 0.2240135192226447, "grad_norm": 0.28981316089630127, "learning_rate": 2.800234223536385e-05, "loss": 0.04744911193847656, "step": 1657 }, { "epoch": 0.22414871144909168, "grad_norm": 0.8237676620483398, "learning_rate": 2.799892813837766e-05, "loss": 0.09437572956085205, "step": 1658 }, { "epoch": 0.22428390367553866, "grad_norm": 0.45472365617752075, "learning_rate": 2.7995511334945315e-05, "loss": 0.08826065063476562, "step": 1659 }, { "epoch": 0.22441909590198564, "grad_norm": 0.4593025743961334, "learning_rate": 2.7992091825778202e-05, "loss": 0.06944799423217773, "step": 1660 }, { "epoch": 0.22455428812843262, "grad_norm": 0.7557271122932434, "learning_rate": 2.7988669611588295e-05, "loss": 0.0856008529663086, "step": 1661 }, { "epoch": 0.2246894803548796, "grad_norm": 0.29140231013298035, "learning_rate": 2.7985244693088112e-05, "loss": 0.028855204582214355, "step": 1662 }, { "epoch": 0.22482467258132657, "grad_norm": 0.5522841811180115, "learning_rate": 2.7981817070990736e-05, "loss": 0.07449054718017578, "step": 1663 }, { "epoch": 0.22495986480777355, "grad_norm": 0.3269520401954651, "learning_rate": 2.7978386746009813e-05, "loss": 0.060117244720458984, "step": 1664 }, { "epoch": 0.22509505703422053, "grad_norm": 0.7490774989128113, "learning_rate": 2.797495371885957e-05, "loss": 0.08272123336791992, "step": 1665 }, { "epoch": 0.2252302492606675, "grad_norm": 0.5157455205917358, "learning_rate": 2.7971517990254768e-05, "loss": 0.06467342376708984, "step": 1666 }, { "epoch": 0.2253654414871145, "grad_norm": 0.6056908965110779, "learning_rate": 2.7968079560910744e-05, "loss": 0.0678703784942627, "step": 1667 }, { "epoch": 0.22550063371356147, "grad_norm": 0.8360289931297302, "learning_rate": 2.7964638431543402e-05, "loss": 0.062014102935791016, "step": 1668 }, { "epoch": 0.22563582594000844, "grad_norm": 0.650890588760376, "learning_rate": 2.7961194602869208e-05, "loss": 0.07659244537353516, "step": 1669 }, { "epoch": 0.22577101816645542, "grad_norm": 0.4710063636302948, "learning_rate": 2.7957748075605178e-05, "loss": 0.107208251953125, "step": 1670 }, { "epoch": 0.2259062103929024, "grad_norm": 0.3793172836303711, "learning_rate": 2.7954298850468898e-05, "loss": 0.07379865646362305, "step": 1671 }, { "epoch": 0.22604140261934938, "grad_norm": 0.5283216238021851, "learning_rate": 2.7950846928178517e-05, "loss": 0.09037399291992188, "step": 1672 }, { "epoch": 0.22617659484579636, "grad_norm": 0.5233229994773865, "learning_rate": 2.7947392309452744e-05, "loss": 0.06802845001220703, "step": 1673 }, { "epoch": 0.22631178707224334, "grad_norm": 0.5740306973457336, "learning_rate": 2.7943934995010845e-05, "loss": 0.07895135879516602, "step": 1674 }, { "epoch": 0.22644697929869032, "grad_norm": 0.22923940420150757, "learning_rate": 2.7940474985572657e-05, "loss": 0.04120969772338867, "step": 1675 }, { "epoch": 0.2265821715251373, "grad_norm": 0.8615206480026245, "learning_rate": 2.793701228185857e-05, "loss": 0.05845475196838379, "step": 1676 }, { "epoch": 0.22671736375158427, "grad_norm": 0.25676557421684265, "learning_rate": 2.7933546884589536e-05, "loss": 0.053670406341552734, "step": 1677 }, { "epoch": 0.22685255597803125, "grad_norm": 0.4690135419368744, "learning_rate": 2.7930078794487077e-05, "loss": 0.0667567253112793, "step": 1678 }, { "epoch": 0.22698774820447823, "grad_norm": 0.3204026222229004, "learning_rate": 2.7926608012273253e-05, "loss": 0.06004142761230469, "step": 1679 }, { "epoch": 0.22712294043092524, "grad_norm": 0.6366397738456726, "learning_rate": 2.7923134538670715e-05, "loss": 0.08652496337890625, "step": 1680 }, { "epoch": 0.2272581326573722, "grad_norm": 0.31071799993515015, "learning_rate": 2.7919658374402645e-05, "loss": 0.04079151153564453, "step": 1681 }, { "epoch": 0.2273933248838192, "grad_norm": 0.3043774664402008, "learning_rate": 2.7916179520192807e-05, "loss": 0.06686258316040039, "step": 1682 }, { "epoch": 0.22752851711026617, "grad_norm": 0.26399528980255127, "learning_rate": 2.7912697976765516e-05, "loss": 0.07759618759155273, "step": 1683 }, { "epoch": 0.22766370933671315, "grad_norm": 0.34320852160453796, "learning_rate": 2.790921374484565e-05, "loss": 0.06002044677734375, "step": 1684 }, { "epoch": 0.22779890156316013, "grad_norm": 0.2710177004337311, "learning_rate": 2.7905726825158637e-05, "loss": 0.08067035675048828, "step": 1685 }, { "epoch": 0.2279340937896071, "grad_norm": 0.32556334137916565, "learning_rate": 2.7902237218430485e-05, "loss": 0.05906534194946289, "step": 1686 }, { "epoch": 0.22806928601605408, "grad_norm": 0.27595052123069763, "learning_rate": 2.7898744925387735e-05, "loss": 0.061810970306396484, "step": 1687 }, { "epoch": 0.22820447824250106, "grad_norm": 0.2853275537490845, "learning_rate": 2.7895249946757505e-05, "loss": 0.0636434555053711, "step": 1688 }, { "epoch": 0.22833967046894804, "grad_norm": 0.5397588014602661, "learning_rate": 2.7891752283267474e-05, "loss": 0.09965991973876953, "step": 1689 }, { "epoch": 0.22847486269539502, "grad_norm": 0.1745423674583435, "learning_rate": 2.788825193564587e-05, "loss": 0.03982114791870117, "step": 1690 }, { "epoch": 0.228610054921842, "grad_norm": 0.13676410913467407, "learning_rate": 2.7884748904621483e-05, "loss": 0.027382850646972656, "step": 1691 }, { "epoch": 0.22874524714828898, "grad_norm": 0.2747132480144501, "learning_rate": 2.7881243190923667e-05, "loss": 0.06270027160644531, "step": 1692 }, { "epoch": 0.22888043937473596, "grad_norm": 0.7449238300323486, "learning_rate": 2.7877734795282326e-05, "loss": 0.07269144058227539, "step": 1693 }, { "epoch": 0.22901563160118293, "grad_norm": 0.67307448387146, "learning_rate": 2.7874223718427926e-05, "loss": 0.09056568145751953, "step": 1694 }, { "epoch": 0.2291508238276299, "grad_norm": 0.3201822340488434, "learning_rate": 2.78707099610915e-05, "loss": 0.04983329772949219, "step": 1695 }, { "epoch": 0.2292860160540769, "grad_norm": 0.3325496017932892, "learning_rate": 2.7867193524004618e-05, "loss": 0.06076478958129883, "step": 1696 }, { "epoch": 0.22942120828052387, "grad_norm": 0.4683951437473297, "learning_rate": 2.786367440789943e-05, "loss": 0.05465984344482422, "step": 1697 }, { "epoch": 0.22955640050697085, "grad_norm": 0.3375021815299988, "learning_rate": 2.7860152613508634e-05, "loss": 0.06065797805786133, "step": 1698 }, { "epoch": 0.22969159273341783, "grad_norm": 1.1951357126235962, "learning_rate": 2.7856628141565484e-05, "loss": 0.10079479217529297, "step": 1699 }, { "epoch": 0.2298267849598648, "grad_norm": 0.1562008112668991, "learning_rate": 2.7853100992803797e-05, "loss": 0.05537271499633789, "step": 1700 }, { "epoch": 0.22996197718631178, "grad_norm": 0.3340827524662018, "learning_rate": 2.7849571167957942e-05, "loss": 0.08322334289550781, "step": 1701 }, { "epoch": 0.23009716941275876, "grad_norm": 0.4469181001186371, "learning_rate": 2.784603866776285e-05, "loss": 0.10549259185791016, "step": 1702 }, { "epoch": 0.23023236163920574, "grad_norm": 0.42036375403404236, "learning_rate": 2.7842503492953996e-05, "loss": 0.07880759239196777, "step": 1703 }, { "epoch": 0.23036755386565272, "grad_norm": 0.4152294099330902, "learning_rate": 2.7838965644267435e-05, "loss": 0.07308101654052734, "step": 1704 }, { "epoch": 0.2305027460920997, "grad_norm": 0.30345314741134644, "learning_rate": 2.7835425122439764e-05, "loss": 0.050995826721191406, "step": 1705 }, { "epoch": 0.23063793831854668, "grad_norm": 0.5056906342506409, "learning_rate": 2.7831881928208128e-05, "loss": 0.06304597854614258, "step": 1706 }, { "epoch": 0.23077313054499365, "grad_norm": 0.32819733023643494, "learning_rate": 2.7828336062310252e-05, "loss": 0.07190895080566406, "step": 1707 }, { "epoch": 0.23090832277144063, "grad_norm": 0.5538195371627808, "learning_rate": 2.7824787525484403e-05, "loss": 0.07466936111450195, "step": 1708 }, { "epoch": 0.2310435149978876, "grad_norm": 0.5046731233596802, "learning_rate": 2.7821236318469395e-05, "loss": 0.08994722366333008, "step": 1709 }, { "epoch": 0.2311787072243346, "grad_norm": 0.400930792093277, "learning_rate": 2.7817682442004615e-05, "loss": 0.08697342872619629, "step": 1710 }, { "epoch": 0.23131389945078157, "grad_norm": 0.3092827796936035, "learning_rate": 2.781412589683e-05, "loss": 0.07714653015136719, "step": 1711 }, { "epoch": 0.23144909167722855, "grad_norm": 0.4910641312599182, "learning_rate": 2.781056668368604e-05, "loss": 0.06731009483337402, "step": 1712 }, { "epoch": 0.23158428390367553, "grad_norm": 0.3656303286552429, "learning_rate": 2.780700480331378e-05, "loss": 0.06447601318359375, "step": 1713 }, { "epoch": 0.23171947613012253, "grad_norm": 0.3311465382575989, "learning_rate": 2.7803440256454825e-05, "loss": 0.07804012298583984, "step": 1714 }, { "epoch": 0.2318546683565695, "grad_norm": 0.6129467487335205, "learning_rate": 2.7799873043851337e-05, "loss": 0.059590816497802734, "step": 1715 }, { "epoch": 0.2319898605830165, "grad_norm": 0.7785227298736572, "learning_rate": 2.7796303166246016e-05, "loss": 0.08037972450256348, "step": 1716 }, { "epoch": 0.23212505280946347, "grad_norm": 0.7429837584495544, "learning_rate": 2.7792730624382142e-05, "loss": 0.09062862396240234, "step": 1717 }, { "epoch": 0.23226024503591045, "grad_norm": 0.42086148262023926, "learning_rate": 2.778915541900353e-05, "loss": 0.10192680358886719, "step": 1718 }, { "epoch": 0.23239543726235742, "grad_norm": 0.3895472586154938, "learning_rate": 2.7785577550854566e-05, "loss": 0.07087564468383789, "step": 1719 }, { "epoch": 0.2325306294888044, "grad_norm": 0.9704498648643494, "learning_rate": 2.778199702068017e-05, "loss": 0.07492542266845703, "step": 1720 }, { "epoch": 0.23266582171525138, "grad_norm": 0.6616615653038025, "learning_rate": 2.777841382922583e-05, "loss": 0.07393407821655273, "step": 1721 }, { "epoch": 0.23280101394169836, "grad_norm": 0.6751999258995056, "learning_rate": 2.7774827977237596e-05, "loss": 0.067596435546875, "step": 1722 }, { "epoch": 0.23293620616814534, "grad_norm": 0.22478896379470825, "learning_rate": 2.777123946546205e-05, "loss": 0.05893135070800781, "step": 1723 }, { "epoch": 0.23307139839459232, "grad_norm": 0.4378553628921509, "learning_rate": 2.776764829464634e-05, "loss": 0.06891918182373047, "step": 1724 }, { "epoch": 0.2332065906210393, "grad_norm": 0.5406183004379272, "learning_rate": 2.7764054465538173e-05, "loss": 0.0537571907043457, "step": 1725 }, { "epoch": 0.23334178284748627, "grad_norm": 0.5965280532836914, "learning_rate": 2.7760457978885794e-05, "loss": 0.06615829467773438, "step": 1726 }, { "epoch": 0.23347697507393325, "grad_norm": 0.4570900499820709, "learning_rate": 2.7756858835438022e-05, "loss": 0.07481217384338379, "step": 1727 }, { "epoch": 0.23361216730038023, "grad_norm": 0.4667648673057556, "learning_rate": 2.7753257035944216e-05, "loss": 0.07844877243041992, "step": 1728 }, { "epoch": 0.2337473595268272, "grad_norm": 0.2384541779756546, "learning_rate": 2.7749652581154277e-05, "loss": 0.047064781188964844, "step": 1729 }, { "epoch": 0.2338825517532742, "grad_norm": 0.2297324538230896, "learning_rate": 2.7746045471818685e-05, "loss": 0.049700260162353516, "step": 1730 }, { "epoch": 0.23401774397972117, "grad_norm": 0.4015132784843445, "learning_rate": 2.7742435708688458e-05, "loss": 0.06686007976531982, "step": 1731 }, { "epoch": 0.23415293620616814, "grad_norm": 0.6935897469520569, "learning_rate": 2.7738823292515167e-05, "loss": 0.09031033515930176, "step": 1732 }, { "epoch": 0.23428812843261512, "grad_norm": 1.1761062145233154, "learning_rate": 2.773520822405093e-05, "loss": 0.09777450561523438, "step": 1733 }, { "epoch": 0.2344233206590621, "grad_norm": 0.39272940158843994, "learning_rate": 2.7731590504048433e-05, "loss": 0.06181478500366211, "step": 1734 }, { "epoch": 0.23455851288550908, "grad_norm": 0.33699238300323486, "learning_rate": 2.7727970133260896e-05, "loss": 0.07762861251831055, "step": 1735 }, { "epoch": 0.23469370511195606, "grad_norm": 0.2577589452266693, "learning_rate": 2.7724347112442106e-05, "loss": 0.04999685287475586, "step": 1736 }, { "epoch": 0.23482889733840304, "grad_norm": 0.4214746654033661, "learning_rate": 2.772072144234639e-05, "loss": 0.05493927001953125, "step": 1737 }, { "epoch": 0.23496408956485001, "grad_norm": 0.23358161747455597, "learning_rate": 2.7717093123728634e-05, "loss": 0.06423664093017578, "step": 1738 }, { "epoch": 0.235099281791297, "grad_norm": 0.4160080552101135, "learning_rate": 2.771346215734428e-05, "loss": 0.07363462448120117, "step": 1739 }, { "epoch": 0.23523447401774397, "grad_norm": 0.5243673324584961, "learning_rate": 2.7709828543949302e-05, "loss": 0.07423591613769531, "step": 1740 }, { "epoch": 0.23536966624419095, "grad_norm": 0.5465552806854248, "learning_rate": 2.770619228430025e-05, "loss": 0.0896914005279541, "step": 1741 }, { "epoch": 0.23550485847063793, "grad_norm": 0.4041135013103485, "learning_rate": 2.77025533791542e-05, "loss": 0.06969207525253296, "step": 1742 }, { "epoch": 0.2356400506970849, "grad_norm": 0.33528199791908264, "learning_rate": 2.76989118292688e-05, "loss": 0.055374860763549805, "step": 1743 }, { "epoch": 0.23577524292353189, "grad_norm": 0.37491998076438904, "learning_rate": 2.7695267635402242e-05, "loss": 0.08498287200927734, "step": 1744 }, { "epoch": 0.23591043514997886, "grad_norm": 0.5534172058105469, "learning_rate": 2.7691620798313258e-05, "loss": 0.05818986892700195, "step": 1745 }, { "epoch": 0.23604562737642584, "grad_norm": 0.5367288589477539, "learning_rate": 2.7687971318761145e-05, "loss": 0.054492950439453125, "step": 1746 }, { "epoch": 0.23618081960287282, "grad_norm": 0.2770763039588928, "learning_rate": 2.7684319197505746e-05, "loss": 0.060950636863708496, "step": 1747 }, { "epoch": 0.23631601182931983, "grad_norm": 0.20724846422672272, "learning_rate": 2.7680664435307446e-05, "loss": 0.04057741165161133, "step": 1748 }, { "epoch": 0.2364512040557668, "grad_norm": 0.587720513343811, "learning_rate": 2.767700703292719e-05, "loss": 0.059874534606933594, "step": 1749 }, { "epoch": 0.23658639628221378, "grad_norm": 0.4023610055446625, "learning_rate": 2.767334699112647e-05, "loss": 0.09053611755371094, "step": 1750 }, { "epoch": 0.23672158850866076, "grad_norm": 0.4142555892467499, "learning_rate": 2.7669684310667318e-05, "loss": 0.038106441497802734, "step": 1751 }, { "epoch": 0.23685678073510774, "grad_norm": 0.2784874439239502, "learning_rate": 2.7666018992312333e-05, "loss": 0.06513595581054688, "step": 1752 }, { "epoch": 0.23699197296155472, "grad_norm": 0.3611937463283539, "learning_rate": 2.7662351036824653e-05, "loss": 0.07321715354919434, "step": 1753 }, { "epoch": 0.2371271651880017, "grad_norm": 0.3308567702770233, "learning_rate": 2.7658680444967964e-05, "loss": 0.06647872924804688, "step": 1754 }, { "epoch": 0.23726235741444868, "grad_norm": 0.4900350272655487, "learning_rate": 2.76550072175065e-05, "loss": 0.08567094802856445, "step": 1755 }, { "epoch": 0.23739754964089566, "grad_norm": 0.30941900610923767, "learning_rate": 2.7651331355205044e-05, "loss": 0.07367181777954102, "step": 1756 }, { "epoch": 0.23753274186734263, "grad_norm": 0.39354702830314636, "learning_rate": 2.7647652858828936e-05, "loss": 0.06630945205688477, "step": 1757 }, { "epoch": 0.2376679340937896, "grad_norm": 0.41852644085884094, "learning_rate": 2.764397172914406e-05, "loss": 0.09297752380371094, "step": 1758 }, { "epoch": 0.2378031263202366, "grad_norm": 0.2813112139701843, "learning_rate": 2.7640287966916845e-05, "loss": 0.05391216278076172, "step": 1759 }, { "epoch": 0.23793831854668357, "grad_norm": 0.3069011867046356, "learning_rate": 2.7636601572914266e-05, "loss": 0.06717586517333984, "step": 1760 }, { "epoch": 0.23807351077313055, "grad_norm": 0.3833242952823639, "learning_rate": 2.7632912547903855e-05, "loss": 0.05843085050582886, "step": 1761 }, { "epoch": 0.23820870299957753, "grad_norm": 0.3839249610900879, "learning_rate": 2.7629220892653685e-05, "loss": 0.06429481506347656, "step": 1762 }, { "epoch": 0.2383438952260245, "grad_norm": 0.22615458071231842, "learning_rate": 2.7625526607932378e-05, "loss": 0.05566549301147461, "step": 1763 }, { "epoch": 0.23847908745247148, "grad_norm": 0.5173673033714294, "learning_rate": 2.76218296945091e-05, "loss": 0.06610107421875, "step": 1764 }, { "epoch": 0.23861427967891846, "grad_norm": 0.25685936212539673, "learning_rate": 2.7618130153153577e-05, "loss": 0.056848883628845215, "step": 1765 }, { "epoch": 0.23874947190536544, "grad_norm": 0.1981397122144699, "learning_rate": 2.7614427984636063e-05, "loss": 0.040857791900634766, "step": 1766 }, { "epoch": 0.23888466413181242, "grad_norm": 0.25202375650405884, "learning_rate": 2.7610723189727377e-05, "loss": 0.05035686492919922, "step": 1767 }, { "epoch": 0.2390198563582594, "grad_norm": 0.47765684127807617, "learning_rate": 2.760701576919888e-05, "loss": 0.06260061264038086, "step": 1768 }, { "epoch": 0.23915504858470638, "grad_norm": 0.29305171966552734, "learning_rate": 2.760330572382246e-05, "loss": 0.07577896118164062, "step": 1769 }, { "epoch": 0.23929024081115335, "grad_norm": 0.21195778250694275, "learning_rate": 2.7599593054370584e-05, "loss": 0.050594329833984375, "step": 1770 }, { "epoch": 0.23942543303760033, "grad_norm": 0.31517109274864197, "learning_rate": 2.7595877761616246e-05, "loss": 0.07395100593566895, "step": 1771 }, { "epoch": 0.2395606252640473, "grad_norm": 0.42024487257003784, "learning_rate": 2.759215984633299e-05, "loss": 0.06496191024780273, "step": 1772 }, { "epoch": 0.2396958174904943, "grad_norm": 0.4478231966495514, "learning_rate": 2.7588439309294902e-05, "loss": 0.051871299743652344, "step": 1773 }, { "epoch": 0.23983100971694127, "grad_norm": 0.4402880072593689, "learning_rate": 2.7584716151276623e-05, "loss": 0.08018779754638672, "step": 1774 }, { "epoch": 0.23996620194338825, "grad_norm": 0.26607251167297363, "learning_rate": 2.7580990373053325e-05, "loss": 0.056462764739990234, "step": 1775 }, { "epoch": 0.24010139416983522, "grad_norm": 0.3490714132785797, "learning_rate": 2.7577261975400747e-05, "loss": 0.0739583969116211, "step": 1776 }, { "epoch": 0.2402365863962822, "grad_norm": 0.5762818455696106, "learning_rate": 2.7573530959095154e-05, "loss": 0.06903731822967529, "step": 1777 }, { "epoch": 0.24037177862272918, "grad_norm": 0.3279045820236206, "learning_rate": 2.756979732491336e-05, "loss": 0.06824302673339844, "step": 1778 }, { "epoch": 0.24050697084917616, "grad_norm": 0.3041938245296478, "learning_rate": 2.756606107363274e-05, "loss": 0.042186737060546875, "step": 1779 }, { "epoch": 0.24064216307562314, "grad_norm": 0.22868023812770844, "learning_rate": 2.7562322206031192e-05, "loss": 0.05701124668121338, "step": 1780 }, { "epoch": 0.24077735530207012, "grad_norm": 0.5037597417831421, "learning_rate": 2.7558580722887166e-05, "loss": 0.07582688331604004, "step": 1781 }, { "epoch": 0.24091254752851712, "grad_norm": 0.24145066738128662, "learning_rate": 2.7554836624979666e-05, "loss": 0.054518699645996094, "step": 1782 }, { "epoch": 0.2410477397549641, "grad_norm": 0.6626120209693909, "learning_rate": 2.7551089913088233e-05, "loss": 0.07623720169067383, "step": 1783 }, { "epoch": 0.24118293198141108, "grad_norm": 0.5188962817192078, "learning_rate": 2.7547340587992948e-05, "loss": 0.08408737182617188, "step": 1784 }, { "epoch": 0.24131812420785806, "grad_norm": 0.4455700218677521, "learning_rate": 2.754358865047444e-05, "loss": 0.06111133098602295, "step": 1785 }, { "epoch": 0.24145331643430504, "grad_norm": 0.5123838186264038, "learning_rate": 2.7539834101313885e-05, "loss": 0.086700439453125, "step": 1786 }, { "epoch": 0.24158850866075202, "grad_norm": 0.9112415909767151, "learning_rate": 2.7536076941293003e-05, "loss": 0.0907973051071167, "step": 1787 }, { "epoch": 0.241723700887199, "grad_norm": 0.20850281417369843, "learning_rate": 2.753231717119405e-05, "loss": 0.04787421226501465, "step": 1788 }, { "epoch": 0.24185889311364597, "grad_norm": 0.24106447398662567, "learning_rate": 2.7528554791799826e-05, "loss": 0.04145240783691406, "step": 1789 }, { "epoch": 0.24199408534009295, "grad_norm": 0.19539408385753632, "learning_rate": 2.7524789803893686e-05, "loss": 0.04960012435913086, "step": 1790 }, { "epoch": 0.24212927756653993, "grad_norm": 0.44446125626564026, "learning_rate": 2.7521022208259526e-05, "loss": 0.0738372802734375, "step": 1791 }, { "epoch": 0.2422644697929869, "grad_norm": 0.5379305481910706, "learning_rate": 2.7517252005681762e-05, "loss": 0.08936834335327148, "step": 1792 }, { "epoch": 0.2423996620194339, "grad_norm": 0.4252738952636719, "learning_rate": 2.7513479196945385e-05, "loss": 0.0784444808959961, "step": 1793 }, { "epoch": 0.24253485424588087, "grad_norm": 0.32151463627815247, "learning_rate": 2.750970378283591e-05, "loss": 0.07025814056396484, "step": 1794 }, { "epoch": 0.24267004647232784, "grad_norm": 0.3168907165527344, "learning_rate": 2.7505925764139398e-05, "loss": 0.05860328674316406, "step": 1795 }, { "epoch": 0.24280523869877482, "grad_norm": 0.18452592194080353, "learning_rate": 2.7502145141642447e-05, "loss": 0.03277397155761719, "step": 1796 }, { "epoch": 0.2429404309252218, "grad_norm": 0.5366207361221313, "learning_rate": 2.7498361916132212e-05, "loss": 0.06469535827636719, "step": 1797 }, { "epoch": 0.24307562315166878, "grad_norm": 0.3622032105922699, "learning_rate": 2.7494576088396376e-05, "loss": 0.06041765213012695, "step": 1798 }, { "epoch": 0.24321081537811576, "grad_norm": 0.5622307062149048, "learning_rate": 2.749078765922317e-05, "loss": 0.08621740341186523, "step": 1799 }, { "epoch": 0.24334600760456274, "grad_norm": 0.47309064865112305, "learning_rate": 2.7486996629401366e-05, "loss": 0.07869148254394531, "step": 1800 }, { "epoch": 0.24348119983100971, "grad_norm": 0.4613445997238159, "learning_rate": 2.7483202999720272e-05, "loss": 0.0908050537109375, "step": 1801 }, { "epoch": 0.2436163920574567, "grad_norm": 0.4424324035644531, "learning_rate": 2.7479406770969747e-05, "loss": 0.06970596313476562, "step": 1802 }, { "epoch": 0.24375158428390367, "grad_norm": 0.2568359375, "learning_rate": 2.7475607943940182e-05, "loss": 0.05686187744140625, "step": 1803 }, { "epoch": 0.24388677651035065, "grad_norm": 0.3203972280025482, "learning_rate": 2.7471806519422514e-05, "loss": 0.08163166046142578, "step": 1804 }, { "epoch": 0.24402196873679763, "grad_norm": 0.2624284625053406, "learning_rate": 2.746800249820822e-05, "loss": 0.038188934326171875, "step": 1805 }, { "epoch": 0.2441571609632446, "grad_norm": 0.7388049960136414, "learning_rate": 2.7464195881089323e-05, "loss": 0.10936880111694336, "step": 1806 }, { "epoch": 0.24429235318969159, "grad_norm": 0.41418957710266113, "learning_rate": 2.746038666885837e-05, "loss": 0.06546401977539062, "step": 1807 }, { "epoch": 0.24442754541613856, "grad_norm": 0.3561664819717407, "learning_rate": 2.7456574862308474e-05, "loss": 0.12682628631591797, "step": 1808 }, { "epoch": 0.24456273764258554, "grad_norm": 0.2145920991897583, "learning_rate": 2.745276046223326e-05, "loss": 0.041588783264160156, "step": 1809 }, { "epoch": 0.24469792986903252, "grad_norm": 0.8844074010848999, "learning_rate": 2.744894346942691e-05, "loss": 0.07974708080291748, "step": 1810 }, { "epoch": 0.2448331220954795, "grad_norm": 0.38096532225608826, "learning_rate": 2.744512388468415e-05, "loss": 0.07446169853210449, "step": 1811 }, { "epoch": 0.24496831432192648, "grad_norm": 0.3055894374847412, "learning_rate": 2.7441301708800227e-05, "loss": 0.058235883712768555, "step": 1812 }, { "epoch": 0.24510350654837346, "grad_norm": 0.23488426208496094, "learning_rate": 2.7437476942570942e-05, "loss": 0.048816680908203125, "step": 1813 }, { "epoch": 0.24523869877482044, "grad_norm": 0.316966712474823, "learning_rate": 2.7433649586792637e-05, "loss": 0.03723907470703125, "step": 1814 }, { "epoch": 0.2453738910012674, "grad_norm": 0.5726296305656433, "learning_rate": 2.7429819642262178e-05, "loss": 0.05982494354248047, "step": 1815 }, { "epoch": 0.24550908322771442, "grad_norm": 0.21773973107337952, "learning_rate": 2.7425987109776994e-05, "loss": 0.056328773498535156, "step": 1816 }, { "epoch": 0.2456442754541614, "grad_norm": 1.0662094354629517, "learning_rate": 2.7422151990135022e-05, "loss": 0.08037757873535156, "step": 1817 }, { "epoch": 0.24577946768060838, "grad_norm": 0.6202515959739685, "learning_rate": 2.741831428413477e-05, "loss": 0.09058094024658203, "step": 1818 }, { "epoch": 0.24591465990705536, "grad_norm": 0.29102861881256104, "learning_rate": 2.7414473992575257e-05, "loss": 0.06721258163452148, "step": 1819 }, { "epoch": 0.24604985213350233, "grad_norm": 0.5228894352912903, "learning_rate": 2.7410631116256054e-05, "loss": 0.07520627975463867, "step": 1820 }, { "epoch": 0.2461850443599493, "grad_norm": 0.7071523666381836, "learning_rate": 2.7406785655977275e-05, "loss": 0.06624984741210938, "step": 1821 }, { "epoch": 0.2463202365863963, "grad_norm": 0.6517912745475769, "learning_rate": 2.7402937612539563e-05, "loss": 0.08530855178833008, "step": 1822 }, { "epoch": 0.24645542881284327, "grad_norm": 0.2140028029680252, "learning_rate": 2.7399086986744095e-05, "loss": 0.058829307556152344, "step": 1823 }, { "epoch": 0.24659062103929025, "grad_norm": 0.24932214617729187, "learning_rate": 2.7395233779392598e-05, "loss": 0.056301116943359375, "step": 1824 }, { "epoch": 0.24672581326573723, "grad_norm": 1.0421650409698486, "learning_rate": 2.739137799128733e-05, "loss": 0.07622814178466797, "step": 1825 }, { "epoch": 0.2468610054921842, "grad_norm": 0.2780892550945282, "learning_rate": 2.7387519623231085e-05, "loss": 0.05814766883850098, "step": 1826 }, { "epoch": 0.24699619771863118, "grad_norm": 0.29341742396354675, "learning_rate": 2.7383658676027195e-05, "loss": 0.06454992294311523, "step": 1827 }, { "epoch": 0.24713138994507816, "grad_norm": 0.3836059868335724, "learning_rate": 2.7379795150479535e-05, "loss": 0.06870555877685547, "step": 1828 }, { "epoch": 0.24726658217152514, "grad_norm": 0.20684897899627686, "learning_rate": 2.73759290473925e-05, "loss": 0.053259849548339844, "step": 1829 }, { "epoch": 0.24740177439797212, "grad_norm": 0.6141616702079773, "learning_rate": 2.7372060367571044e-05, "loss": 0.09223604202270508, "step": 1830 }, { "epoch": 0.2475369666244191, "grad_norm": 0.5665993094444275, "learning_rate": 2.7368189111820648e-05, "loss": 0.0918726921081543, "step": 1831 }, { "epoch": 0.24767215885086608, "grad_norm": 0.5302583575248718, "learning_rate": 2.736431528094732e-05, "loss": 0.05847883224487305, "step": 1832 }, { "epoch": 0.24780735107731305, "grad_norm": 0.508675754070282, "learning_rate": 2.7360438875757614e-05, "loss": 0.07937335968017578, "step": 1833 }, { "epoch": 0.24794254330376003, "grad_norm": 0.39748477935791016, "learning_rate": 2.7356559897058624e-05, "loss": 0.07612800598144531, "step": 1834 }, { "epoch": 0.248077735530207, "grad_norm": 0.3022492229938507, "learning_rate": 2.735267834565797e-05, "loss": 0.052533626556396484, "step": 1835 }, { "epoch": 0.248212927756654, "grad_norm": 0.8229854702949524, "learning_rate": 2.734879422236381e-05, "loss": 0.07980060577392578, "step": 1836 }, { "epoch": 0.24834811998310097, "grad_norm": 0.8079227209091187, "learning_rate": 2.734490752798484e-05, "loss": 0.11629676818847656, "step": 1837 }, { "epoch": 0.24848331220954795, "grad_norm": 0.24918903410434723, "learning_rate": 2.7341018263330296e-05, "loss": 0.04086637496948242, "step": 1838 }, { "epoch": 0.24861850443599492, "grad_norm": 0.30910560488700867, "learning_rate": 2.7337126429209935e-05, "loss": 0.06302356719970703, "step": 1839 }, { "epoch": 0.2487536966624419, "grad_norm": 0.5225250124931335, "learning_rate": 2.7333232026434064e-05, "loss": 0.07401227951049805, "step": 1840 }, { "epoch": 0.24888888888888888, "grad_norm": 0.2577703297138214, "learning_rate": 2.7329335055813517e-05, "loss": 0.06687498092651367, "step": 1841 }, { "epoch": 0.24902408111533586, "grad_norm": 0.18020059168338776, "learning_rate": 2.732543551815966e-05, "loss": 0.049753665924072266, "step": 1842 }, { "epoch": 0.24915927334178284, "grad_norm": 0.8789290189743042, "learning_rate": 2.7321533414284404e-05, "loss": 0.06753873825073242, "step": 1843 }, { "epoch": 0.24929446556822982, "grad_norm": 0.3964804708957672, "learning_rate": 2.731762874500018e-05, "loss": 0.09026479721069336, "step": 1844 }, { "epoch": 0.2494296577946768, "grad_norm": 0.7734335660934448, "learning_rate": 2.7313721511119972e-05, "loss": 0.06596851348876953, "step": 1845 }, { "epoch": 0.24956485002112377, "grad_norm": 0.5335857272148132, "learning_rate": 2.7309811713457275e-05, "loss": 0.06571030616760254, "step": 1846 }, { "epoch": 0.24970004224757075, "grad_norm": 0.3945905566215515, "learning_rate": 2.730589935282614e-05, "loss": 0.06314945220947266, "step": 1847 }, { "epoch": 0.24983523447401773, "grad_norm": 0.7017887234687805, "learning_rate": 2.7301984430041135e-05, "loss": 0.06937360763549805, "step": 1848 }, { "epoch": 0.2499704267004647, "grad_norm": 0.31867197155952454, "learning_rate": 2.7298066945917368e-05, "loss": 0.0736551284790039, "step": 1849 }, { "epoch": 0.2501056189269117, "grad_norm": 0.31929993629455566, "learning_rate": 2.7294146901270482e-05, "loss": 0.06351780891418457, "step": 1850 }, { "epoch": 0.25024081115335867, "grad_norm": 0.41168221831321716, "learning_rate": 2.7290224296916653e-05, "loss": 0.07644474506378174, "step": 1851 }, { "epoch": 0.25037600337980565, "grad_norm": 0.38052400946617126, "learning_rate": 2.7286299133672584e-05, "loss": 0.060194969177246094, "step": 1852 }, { "epoch": 0.2505111956062526, "grad_norm": 0.7387686967849731, "learning_rate": 2.728237141235552e-05, "loss": 0.05945873260498047, "step": 1853 }, { "epoch": 0.2506463878326996, "grad_norm": 0.5313815474510193, "learning_rate": 2.727844113378322e-05, "loss": 0.06591439247131348, "step": 1854 }, { "epoch": 0.2507815800591466, "grad_norm": 0.5547090172767639, "learning_rate": 2.7274508298774013e-05, "loss": 0.06976318359375, "step": 1855 }, { "epoch": 0.25091677228559356, "grad_norm": 0.3434223234653473, "learning_rate": 2.727057290814672e-05, "loss": 0.08040809631347656, "step": 1856 }, { "epoch": 0.25105196451204054, "grad_norm": 0.4341026246547699, "learning_rate": 2.7266634962720704e-05, "loss": 0.04680442810058594, "step": 1857 }, { "epoch": 0.2511871567384875, "grad_norm": 0.4787321984767914, "learning_rate": 2.726269446331588e-05, "loss": 0.07925605773925781, "step": 1858 }, { "epoch": 0.2513223489649345, "grad_norm": 0.3505917489528656, "learning_rate": 2.7258751410752676e-05, "loss": 0.07482719421386719, "step": 1859 }, { "epoch": 0.2514575411913815, "grad_norm": 0.27366140484809875, "learning_rate": 2.725480580585206e-05, "loss": 0.05466175079345703, "step": 1860 }, { "epoch": 0.25159273341782845, "grad_norm": 0.4962812662124634, "learning_rate": 2.7250857649435522e-05, "loss": 0.07026433944702148, "step": 1861 }, { "epoch": 0.25172792564427543, "grad_norm": 0.41084927320480347, "learning_rate": 2.724690694232509e-05, "loss": 0.05806446075439453, "step": 1862 }, { "epoch": 0.2518631178707224, "grad_norm": 0.5985238552093506, "learning_rate": 2.7242953685343327e-05, "loss": 0.08927154541015625, "step": 1863 }, { "epoch": 0.2519983100971694, "grad_norm": 0.6761572360992432, "learning_rate": 2.723899787931332e-05, "loss": 0.07208061218261719, "step": 1864 }, { "epoch": 0.25213350232361637, "grad_norm": 0.29676154255867004, "learning_rate": 2.7235039525058684e-05, "loss": 0.06667649745941162, "step": 1865 }, { "epoch": 0.25226869455006334, "grad_norm": 0.33624520897865295, "learning_rate": 2.7231078623403575e-05, "loss": 0.05866742134094238, "step": 1866 }, { "epoch": 0.2524038867765104, "grad_norm": 0.3481168746948242, "learning_rate": 2.722711517517267e-05, "loss": 0.07056522369384766, "step": 1867 }, { "epoch": 0.25253907900295736, "grad_norm": 0.9190338850021362, "learning_rate": 2.7223149181191187e-05, "loss": 0.10958647727966309, "step": 1868 }, { "epoch": 0.25267427122940433, "grad_norm": 0.6682137250900269, "learning_rate": 2.7219180642284864e-05, "loss": 0.08075964450836182, "step": 1869 }, { "epoch": 0.2528094634558513, "grad_norm": 0.5249190926551819, "learning_rate": 2.721520955927997e-05, "loss": 0.07635307312011719, "step": 1870 }, { "epoch": 0.2529446556822983, "grad_norm": 0.583173394203186, "learning_rate": 2.7211235933003302e-05, "loss": 0.09204816818237305, "step": 1871 }, { "epoch": 0.25307984790874527, "grad_norm": 0.29399970173835754, "learning_rate": 2.72072597642822e-05, "loss": 0.044519901275634766, "step": 1872 }, { "epoch": 0.25321504013519225, "grad_norm": 0.4802243709564209, "learning_rate": 2.7203281053944512e-05, "loss": 0.09960746765136719, "step": 1873 }, { "epoch": 0.2533502323616392, "grad_norm": 0.3301074206829071, "learning_rate": 2.719929980281864e-05, "loss": 0.06789875030517578, "step": 1874 }, { "epoch": 0.2534854245880862, "grad_norm": 0.5058059692382812, "learning_rate": 2.719531601173349e-05, "loss": 0.08098983764648438, "step": 1875 }, { "epoch": 0.2536206168145332, "grad_norm": 0.3935385048389435, "learning_rate": 2.7191329681518512e-05, "loss": 0.0812234878540039, "step": 1876 }, { "epoch": 0.25375580904098016, "grad_norm": 0.3673516511917114, "learning_rate": 2.7187340813003682e-05, "loss": 0.04439401626586914, "step": 1877 }, { "epoch": 0.25389100126742714, "grad_norm": 0.31120064854621887, "learning_rate": 2.718334940701951e-05, "loss": 0.05800420045852661, "step": 1878 }, { "epoch": 0.2540261934938741, "grad_norm": 0.2556016743183136, "learning_rate": 2.7179355464397014e-05, "loss": 0.033266305923461914, "step": 1879 }, { "epoch": 0.2541613857203211, "grad_norm": 0.24700996279716492, "learning_rate": 2.7175358985967763e-05, "loss": 0.08549118041992188, "step": 1880 }, { "epoch": 0.2542965779467681, "grad_norm": 0.2749456763267517, "learning_rate": 2.717135997256385e-05, "loss": 0.060967206954956055, "step": 1881 }, { "epoch": 0.25443177017321505, "grad_norm": 0.6183525323867798, "learning_rate": 2.7167358425017882e-05, "loss": 0.07579755783081055, "step": 1882 }, { "epoch": 0.25456696239966203, "grad_norm": 0.21796010434627533, "learning_rate": 2.7163354344163004e-05, "loss": 0.06002235412597656, "step": 1883 }, { "epoch": 0.254702154626109, "grad_norm": 0.24536041915416718, "learning_rate": 2.715934773083289e-05, "loss": 0.04146528244018555, "step": 1884 }, { "epoch": 0.254837346852556, "grad_norm": 0.3926338851451874, "learning_rate": 2.715533858586174e-05, "loss": 0.08445155620574951, "step": 1885 }, { "epoch": 0.25497253907900297, "grad_norm": 0.1608278751373291, "learning_rate": 2.715132691008427e-05, "loss": 0.04503297805786133, "step": 1886 }, { "epoch": 0.25510773130544995, "grad_norm": 0.31084445118904114, "learning_rate": 2.714731270433574e-05, "loss": 0.07880449295043945, "step": 1887 }, { "epoch": 0.2552429235318969, "grad_norm": 0.5509818196296692, "learning_rate": 2.7143295969451933e-05, "loss": 0.05958402156829834, "step": 1888 }, { "epoch": 0.2553781157583439, "grad_norm": 0.3491239845752716, "learning_rate": 2.7139276706269147e-05, "loss": 0.04583573341369629, "step": 1889 }, { "epoch": 0.2555133079847909, "grad_norm": 0.3660595417022705, "learning_rate": 2.7135254915624213e-05, "loss": 0.05745553970336914, "step": 1890 }, { "epoch": 0.25564850021123786, "grad_norm": 0.3180834650993347, "learning_rate": 2.7131230598354497e-05, "loss": 0.07939624786376953, "step": 1891 }, { "epoch": 0.25578369243768484, "grad_norm": 0.3096720576286316, "learning_rate": 2.712720375529787e-05, "loss": 0.06689834594726562, "step": 1892 }, { "epoch": 0.2559188846641318, "grad_norm": 0.8833802938461304, "learning_rate": 2.7123174387292758e-05, "loss": 0.10140687227249146, "step": 1893 }, { "epoch": 0.2560540768905788, "grad_norm": 0.5502578616142273, "learning_rate": 2.7119142495178088e-05, "loss": 0.09019899368286133, "step": 1894 }, { "epoch": 0.2561892691170258, "grad_norm": 0.2593752443790436, "learning_rate": 2.711510807979333e-05, "loss": 0.052825212478637695, "step": 1895 }, { "epoch": 0.25632446134347275, "grad_norm": 0.5093520283699036, "learning_rate": 2.7111071141978452e-05, "loss": 0.06151437759399414, "step": 1896 }, { "epoch": 0.25645965356991973, "grad_norm": 0.3877309262752533, "learning_rate": 2.7107031682573987e-05, "loss": 0.043610453605651855, "step": 1897 }, { "epoch": 0.2565948457963667, "grad_norm": 0.3071913719177246, "learning_rate": 2.710298970242096e-05, "loss": 0.07933568954467773, "step": 1898 }, { "epoch": 0.2567300380228137, "grad_norm": 0.2128777951002121, "learning_rate": 2.7098945202360937e-05, "loss": 0.059468746185302734, "step": 1899 }, { "epoch": 0.25686523024926067, "grad_norm": 0.40854331851005554, "learning_rate": 2.7094898183236e-05, "loss": 0.093597412109375, "step": 1900 }, { "epoch": 0.25700042247570765, "grad_norm": 0.48837989568710327, "learning_rate": 2.709084864588877e-05, "loss": 0.08818244934082031, "step": 1901 }, { "epoch": 0.2571356147021546, "grad_norm": 0.22727102041244507, "learning_rate": 2.708679659116237e-05, "loss": 0.034447431564331055, "step": 1902 }, { "epoch": 0.2572708069286016, "grad_norm": 0.2908400595188141, "learning_rate": 2.708274201990047e-05, "loss": 0.06956076622009277, "step": 1903 }, { "epoch": 0.2574059991550486, "grad_norm": 0.29992619156837463, "learning_rate": 2.7078684932947247e-05, "loss": 0.07156085968017578, "step": 1904 }, { "epoch": 0.25754119138149556, "grad_norm": 0.490669846534729, "learning_rate": 2.7074625331147407e-05, "loss": 0.09055781364440918, "step": 1905 }, { "epoch": 0.25767638360794254, "grad_norm": 0.474657267332077, "learning_rate": 2.7070563215346184e-05, "loss": 0.0897374153137207, "step": 1906 }, { "epoch": 0.2578115758343895, "grad_norm": 0.4891012907028198, "learning_rate": 2.7066498586389332e-05, "loss": 0.10694503784179688, "step": 1907 }, { "epoch": 0.2579467680608365, "grad_norm": 0.6720229983329773, "learning_rate": 2.7062431445123127e-05, "loss": 0.10859489440917969, "step": 1908 }, { "epoch": 0.2580819602872835, "grad_norm": 0.163092240691185, "learning_rate": 2.705836179239437e-05, "loss": 0.03721427917480469, "step": 1909 }, { "epoch": 0.25821715251373045, "grad_norm": 0.2246158868074417, "learning_rate": 2.705428962905039e-05, "loss": 0.05305147171020508, "step": 1910 }, { "epoch": 0.25835234474017743, "grad_norm": 0.4187816381454468, "learning_rate": 2.705021495593902e-05, "loss": 0.06217479705810547, "step": 1911 }, { "epoch": 0.2584875369666244, "grad_norm": 0.23612040281295776, "learning_rate": 2.704613777390864e-05, "loss": 0.05178356170654297, "step": 1912 }, { "epoch": 0.2586227291930714, "grad_norm": 0.2466345876455307, "learning_rate": 2.7042058083808135e-05, "loss": 0.06723594665527344, "step": 1913 }, { "epoch": 0.25875792141951837, "grad_norm": 0.3143467605113983, "learning_rate": 2.7037975886486928e-05, "loss": 0.07501935958862305, "step": 1914 }, { "epoch": 0.25889311364596534, "grad_norm": 0.7473633885383606, "learning_rate": 2.7033891182794942e-05, "loss": 0.07051324844360352, "step": 1915 }, { "epoch": 0.2590283058724123, "grad_norm": 0.3036815822124481, "learning_rate": 2.7029803973582642e-05, "loss": 0.09438610076904297, "step": 1916 }, { "epoch": 0.2591634980988593, "grad_norm": 0.7471302151679993, "learning_rate": 2.7025714259701e-05, "loss": 0.11596965789794922, "step": 1917 }, { "epoch": 0.2592986903253063, "grad_norm": 0.2975316345691681, "learning_rate": 2.7021622042001524e-05, "loss": 0.060982704162597656, "step": 1918 }, { "epoch": 0.25943388255175326, "grad_norm": 0.2315310388803482, "learning_rate": 2.701752732133623e-05, "loss": 0.06364107131958008, "step": 1919 }, { "epoch": 0.25956907477820024, "grad_norm": 0.36252063512802124, "learning_rate": 2.7013430098557664e-05, "loss": 0.07845568656921387, "step": 1920 }, { "epoch": 0.2597042670046472, "grad_norm": 0.29441624879837036, "learning_rate": 2.7009330374518885e-05, "loss": 0.07388079166412354, "step": 1921 }, { "epoch": 0.2598394592310942, "grad_norm": 0.3996450901031494, "learning_rate": 2.7005228150073483e-05, "loss": 0.06785202026367188, "step": 1922 }, { "epoch": 0.2599746514575412, "grad_norm": 0.42293208837509155, "learning_rate": 2.7001123426075558e-05, "loss": 0.07725763320922852, "step": 1923 }, { "epoch": 0.26010984368398815, "grad_norm": 0.36804306507110596, "learning_rate": 2.699701620337974e-05, "loss": 0.08327007293701172, "step": 1924 }, { "epoch": 0.26024503591043513, "grad_norm": 0.20941795408725739, "learning_rate": 2.699290648284117e-05, "loss": 0.04230928421020508, "step": 1925 }, { "epoch": 0.2603802281368821, "grad_norm": 0.41908788681030273, "learning_rate": 2.6988794265315522e-05, "loss": 0.07233428955078125, "step": 1926 }, { "epoch": 0.2605154203633291, "grad_norm": 0.648202121257782, "learning_rate": 2.698467955165897e-05, "loss": 0.07257938385009766, "step": 1927 }, { "epoch": 0.26065061258977607, "grad_norm": 0.6043091416358948, "learning_rate": 2.6980562342728226e-05, "loss": 0.07048463821411133, "step": 1928 }, { "epoch": 0.26078580481622304, "grad_norm": 0.27515342831611633, "learning_rate": 2.6976442639380516e-05, "loss": 0.05468153953552246, "step": 1929 }, { "epoch": 0.26092099704267, "grad_norm": 0.39143988490104675, "learning_rate": 2.6972320442473583e-05, "loss": 0.11225557327270508, "step": 1930 }, { "epoch": 0.261056189269117, "grad_norm": 0.34398409724235535, "learning_rate": 2.6968195752865686e-05, "loss": 0.06856608390808105, "step": 1931 }, { "epoch": 0.261191381495564, "grad_norm": 0.5425191521644592, "learning_rate": 2.6964068571415613e-05, "loss": 0.05328011512756348, "step": 1932 }, { "epoch": 0.26132657372201096, "grad_norm": 0.5468816161155701, "learning_rate": 2.6959938898982667e-05, "loss": 0.05895841121673584, "step": 1933 }, { "epoch": 0.26146176594845794, "grad_norm": 0.7372881770133972, "learning_rate": 2.6955806736426657e-05, "loss": 0.055188655853271484, "step": 1934 }, { "epoch": 0.26159695817490497, "grad_norm": 0.6510387659072876, "learning_rate": 2.6951672084607937e-05, "loss": 0.07172107696533203, "step": 1935 }, { "epoch": 0.26173215040135195, "grad_norm": 0.4357663094997406, "learning_rate": 2.694753494438735e-05, "loss": 0.08475422859191895, "step": 1936 }, { "epoch": 0.2618673426277989, "grad_norm": 0.36257466673851013, "learning_rate": 2.6943395316626272e-05, "loss": 0.07384920120239258, "step": 1937 }, { "epoch": 0.2620025348542459, "grad_norm": 0.6099448800086975, "learning_rate": 2.69392532021866e-05, "loss": 0.06450843811035156, "step": 1938 }, { "epoch": 0.2621377270806929, "grad_norm": 0.5269374847412109, "learning_rate": 2.693510860193075e-05, "loss": 0.04517483711242676, "step": 1939 }, { "epoch": 0.26227291930713986, "grad_norm": 0.3653620779514313, "learning_rate": 2.6930961516721638e-05, "loss": 0.07281732559204102, "step": 1940 }, { "epoch": 0.26240811153358684, "grad_norm": 0.34565868973731995, "learning_rate": 2.6926811947422717e-05, "loss": 0.07845878601074219, "step": 1941 }, { "epoch": 0.2625433037600338, "grad_norm": 0.4030977487564087, "learning_rate": 2.6922659894897946e-05, "loss": 0.09230232238769531, "step": 1942 }, { "epoch": 0.2626784959864808, "grad_norm": 0.1322583258152008, "learning_rate": 2.6918505360011805e-05, "loss": 0.04181957244873047, "step": 1943 }, { "epoch": 0.2628136882129278, "grad_norm": 0.3881249725818634, "learning_rate": 2.6914348343629292e-05, "loss": 0.06250715255737305, "step": 1944 }, { "epoch": 0.26294888043937475, "grad_norm": 0.3524782955646515, "learning_rate": 2.6910188846615918e-05, "loss": 0.06326961517333984, "step": 1945 }, { "epoch": 0.26308407266582173, "grad_norm": 0.18269595503807068, "learning_rate": 2.6906026869837714e-05, "loss": 0.05874431133270264, "step": 1946 }, { "epoch": 0.2632192648922687, "grad_norm": 0.2870374917984009, "learning_rate": 2.6901862414161222e-05, "loss": 0.07396650314331055, "step": 1947 }, { "epoch": 0.2633544571187157, "grad_norm": 0.2652686834335327, "learning_rate": 2.689769548045351e-05, "loss": 0.05239880084991455, "step": 1948 }, { "epoch": 0.26348964934516267, "grad_norm": 0.4712505340576172, "learning_rate": 2.6893526069582154e-05, "loss": 0.067535400390625, "step": 1949 }, { "epoch": 0.26362484157160965, "grad_norm": 0.2714921236038208, "learning_rate": 2.6889354182415245e-05, "loss": 0.04259657859802246, "step": 1950 }, { "epoch": 0.2637600337980566, "grad_norm": 0.21372658014297485, "learning_rate": 2.688517981982139e-05, "loss": 0.06403589248657227, "step": 1951 }, { "epoch": 0.2638952260245036, "grad_norm": 0.45280420780181885, "learning_rate": 2.6881002982669723e-05, "loss": 0.06479430198669434, "step": 1952 }, { "epoch": 0.2640304182509506, "grad_norm": 0.3762657642364502, "learning_rate": 2.6876823671829874e-05, "loss": 0.06491422653198242, "step": 1953 }, { "epoch": 0.26416561047739756, "grad_norm": 0.33864468336105347, "learning_rate": 2.6872641888172e-05, "loss": 0.0740060806274414, "step": 1954 }, { "epoch": 0.26430080270384454, "grad_norm": 0.3459004759788513, "learning_rate": 2.6868457632566774e-05, "loss": 0.08530616760253906, "step": 1955 }, { "epoch": 0.2644359949302915, "grad_norm": 0.3440328538417816, "learning_rate": 2.6864270905885377e-05, "loss": 0.07538318634033203, "step": 1956 }, { "epoch": 0.2645711871567385, "grad_norm": 0.5170190334320068, "learning_rate": 2.6860081708999515e-05, "loss": 0.05870771408081055, "step": 1957 }, { "epoch": 0.2647063793831855, "grad_norm": 0.4916151762008667, "learning_rate": 2.685589004278139e-05, "loss": 0.1157674789428711, "step": 1958 }, { "epoch": 0.26484157160963245, "grad_norm": 0.31486639380455017, "learning_rate": 2.6851695908103737e-05, "loss": 0.07006168365478516, "step": 1959 }, { "epoch": 0.26497676383607943, "grad_norm": 0.7468628287315369, "learning_rate": 2.6847499305839796e-05, "loss": 0.07650637626647949, "step": 1960 }, { "epoch": 0.2651119560625264, "grad_norm": 0.2336750477552414, "learning_rate": 2.684330023686332e-05, "loss": 0.05476212501525879, "step": 1961 }, { "epoch": 0.2652471482889734, "grad_norm": 0.2948932945728302, "learning_rate": 2.6839098702048577e-05, "loss": 0.05624723434448242, "step": 1962 }, { "epoch": 0.26538234051542037, "grad_norm": 0.46952325105667114, "learning_rate": 2.683489470227035e-05, "loss": 0.09190154075622559, "step": 1963 }, { "epoch": 0.26551753274186735, "grad_norm": 0.3077831566333771, "learning_rate": 2.6830688238403936e-05, "loss": 0.05613231658935547, "step": 1964 }, { "epoch": 0.2656527249683143, "grad_norm": 0.20866359770298004, "learning_rate": 2.682647931132514e-05, "loss": 0.04815483093261719, "step": 1965 }, { "epoch": 0.2657879171947613, "grad_norm": 0.33155784010887146, "learning_rate": 2.682226792191029e-05, "loss": 0.05470585823059082, "step": 1966 }, { "epoch": 0.2659231094212083, "grad_norm": 0.35803017020225525, "learning_rate": 2.681805407103621e-05, "loss": 0.057668209075927734, "step": 1967 }, { "epoch": 0.26605830164765526, "grad_norm": 0.2577706575393677, "learning_rate": 2.6813837759580253e-05, "loss": 0.04786479473114014, "step": 1968 }, { "epoch": 0.26619349387410224, "grad_norm": 0.7822843194007874, "learning_rate": 2.6809618988420274e-05, "loss": 0.08090686798095703, "step": 1969 }, { "epoch": 0.2663286861005492, "grad_norm": 0.39716994762420654, "learning_rate": 2.6805397758434647e-05, "loss": 0.04407310485839844, "step": 1970 }, { "epoch": 0.2664638783269962, "grad_norm": 0.3969775438308716, "learning_rate": 2.6801174070502248e-05, "loss": 0.06796550750732422, "step": 1971 }, { "epoch": 0.2665990705534432, "grad_norm": 0.5066125392913818, "learning_rate": 2.679694792550248e-05, "loss": 0.1039915680885315, "step": 1972 }, { "epoch": 0.26673426277989015, "grad_norm": 0.3619740605354309, "learning_rate": 2.6792719324315248e-05, "loss": 0.07608604431152344, "step": 1973 }, { "epoch": 0.26686945500633713, "grad_norm": 0.46979114413261414, "learning_rate": 2.678848826782096e-05, "loss": 0.07393312454223633, "step": 1974 }, { "epoch": 0.2670046472327841, "grad_norm": 0.2395251840353012, "learning_rate": 2.678425475690055e-05, "loss": 0.06704425811767578, "step": 1975 }, { "epoch": 0.2671398394592311, "grad_norm": 0.3646530508995056, "learning_rate": 2.6780018792435464e-05, "loss": 0.05572080612182617, "step": 1976 }, { "epoch": 0.26727503168567807, "grad_norm": 0.5919400453567505, "learning_rate": 2.6775780375307645e-05, "loss": 0.08342146873474121, "step": 1977 }, { "epoch": 0.26741022391212504, "grad_norm": 0.5902851819992065, "learning_rate": 2.6771539506399555e-05, "loss": 0.09327661991119385, "step": 1978 }, { "epoch": 0.267545416138572, "grad_norm": 0.19895702600479126, "learning_rate": 2.6767296186594165e-05, "loss": 0.03602182865142822, "step": 1979 }, { "epoch": 0.267680608365019, "grad_norm": 0.27372804284095764, "learning_rate": 2.676305041677496e-05, "loss": 0.04723072052001953, "step": 1980 }, { "epoch": 0.267815800591466, "grad_norm": 0.3940640985965729, "learning_rate": 2.675880219782593e-05, "loss": 0.07594537734985352, "step": 1981 }, { "epoch": 0.26795099281791296, "grad_norm": 0.512906551361084, "learning_rate": 2.6754551530631575e-05, "loss": 0.046251535415649414, "step": 1982 }, { "epoch": 0.26808618504435994, "grad_norm": 0.2687968313694, "learning_rate": 2.6750298416076907e-05, "loss": 0.05890655517578125, "step": 1983 }, { "epoch": 0.2682213772708069, "grad_norm": 0.3124563992023468, "learning_rate": 2.674604285504745e-05, "loss": 0.07229232788085938, "step": 1984 }, { "epoch": 0.2683565694972539, "grad_norm": 0.29689836502075195, "learning_rate": 2.6741784848429235e-05, "loss": 0.07830238342285156, "step": 1985 }, { "epoch": 0.2684917617237009, "grad_norm": 0.41927099227905273, "learning_rate": 2.67375243971088e-05, "loss": 0.08188128471374512, "step": 1986 }, { "epoch": 0.26862695395014785, "grad_norm": 0.36072978377342224, "learning_rate": 2.6733261501973192e-05, "loss": 0.06334662437438965, "step": 1987 }, { "epoch": 0.26876214617659483, "grad_norm": 0.26369619369506836, "learning_rate": 2.672899616390997e-05, "loss": 0.045501708984375, "step": 1988 }, { "epoch": 0.2688973384030418, "grad_norm": 0.3635765314102173, "learning_rate": 2.67247283838072e-05, "loss": 0.07266116142272949, "step": 1989 }, { "epoch": 0.2690325306294888, "grad_norm": 0.43948882818222046, "learning_rate": 2.6720458162553457e-05, "loss": 0.07158470153808594, "step": 1990 }, { "epoch": 0.26916772285593576, "grad_norm": 0.31330564618110657, "learning_rate": 2.6716185501037822e-05, "loss": 0.05783557891845703, "step": 1991 }, { "epoch": 0.26930291508238274, "grad_norm": 0.35045677423477173, "learning_rate": 2.671191040014989e-05, "loss": 0.05690479278564453, "step": 1992 }, { "epoch": 0.2694381073088297, "grad_norm": 0.6701129674911499, "learning_rate": 2.6707632860779756e-05, "loss": 0.06578874588012695, "step": 1993 }, { "epoch": 0.2695732995352767, "grad_norm": 0.2643255889415741, "learning_rate": 2.6703352883818024e-05, "loss": 0.07078218460083008, "step": 1994 }, { "epoch": 0.2697084917617237, "grad_norm": 0.2178814560174942, "learning_rate": 2.6699070470155816e-05, "loss": 0.059894561767578125, "step": 1995 }, { "epoch": 0.26984368398817066, "grad_norm": 0.33343926072120667, "learning_rate": 2.669478562068475e-05, "loss": 0.0846400260925293, "step": 1996 }, { "epoch": 0.26997887621461764, "grad_norm": 0.2248917669057846, "learning_rate": 2.6690498336296955e-05, "loss": 0.05446124076843262, "step": 1997 }, { "epoch": 0.2701140684410646, "grad_norm": 0.5515278577804565, "learning_rate": 2.6686208617885057e-05, "loss": 0.09082603454589844, "step": 1998 }, { "epoch": 0.2702492606675116, "grad_norm": 0.20593951642513275, "learning_rate": 2.668191646634221e-05, "loss": 0.02307724952697754, "step": 1999 }, { "epoch": 0.27038445289395857, "grad_norm": 0.270003080368042, "learning_rate": 2.667762188256206e-05, "loss": 0.065643310546875, "step": 2000 }, { "epoch": 0.27051964512040555, "grad_norm": 0.21622775495052338, "learning_rate": 2.6673324867438764e-05, "loss": 0.05423140525817871, "step": 2001 }, { "epoch": 0.2706548373468526, "grad_norm": 0.351639986038208, "learning_rate": 2.666902542186698e-05, "loss": 0.054004669189453125, "step": 2002 }, { "epoch": 0.27079002957329956, "grad_norm": 0.47563913464546204, "learning_rate": 2.666472354674187e-05, "loss": 0.12000083923339844, "step": 2003 }, { "epoch": 0.27092522179974654, "grad_norm": 0.4176982045173645, "learning_rate": 2.666041924295912e-05, "loss": 0.0824594497680664, "step": 2004 }, { "epoch": 0.2710604140261935, "grad_norm": 0.3982656002044678, "learning_rate": 2.6656112511414902e-05, "loss": 0.08656144142150879, "step": 2005 }, { "epoch": 0.2711956062526405, "grad_norm": 0.5034632086753845, "learning_rate": 2.6651803353005896e-05, "loss": 0.05389070510864258, "step": 2006 }, { "epoch": 0.2713307984790875, "grad_norm": 0.5947905778884888, "learning_rate": 2.66474917686293e-05, "loss": 0.09485912322998047, "step": 2007 }, { "epoch": 0.27146599070553445, "grad_norm": 0.6628316044807434, "learning_rate": 2.664317775918281e-05, "loss": 0.06086158752441406, "step": 2008 }, { "epoch": 0.27160118293198143, "grad_norm": 0.31264886260032654, "learning_rate": 2.6638861325564615e-05, "loss": 0.058119773864746094, "step": 2009 }, { "epoch": 0.2717363751584284, "grad_norm": 0.3556406795978546, "learning_rate": 2.6634542468673432e-05, "loss": 0.0729360580444336, "step": 2010 }, { "epoch": 0.2718715673848754, "grad_norm": 0.4376427233219147, "learning_rate": 2.663022118940846e-05, "loss": 0.07712364196777344, "step": 2011 }, { "epoch": 0.27200675961132237, "grad_norm": 0.49218839406967163, "learning_rate": 2.662589748866942e-05, "loss": 0.06293153762817383, "step": 2012 }, { "epoch": 0.27214195183776935, "grad_norm": 0.5261367559432983, "learning_rate": 2.6621571367356522e-05, "loss": 0.057858943939208984, "step": 2013 }, { "epoch": 0.2722771440642163, "grad_norm": 0.455219030380249, "learning_rate": 2.6617242826370495e-05, "loss": 0.08683300018310547, "step": 2014 }, { "epoch": 0.2724123362906633, "grad_norm": 0.24125950038433075, "learning_rate": 2.661291186661256e-05, "loss": 0.05467796325683594, "step": 2015 }, { "epoch": 0.2725475285171103, "grad_norm": 1.090306043624878, "learning_rate": 2.6608578488984444e-05, "loss": 0.09870195388793945, "step": 2016 }, { "epoch": 0.27268272074355726, "grad_norm": 0.6628405451774597, "learning_rate": 2.6604242694388388e-05, "loss": 0.1032710075378418, "step": 2017 }, { "epoch": 0.27281791297000424, "grad_norm": 0.5847178101539612, "learning_rate": 2.6599904483727116e-05, "loss": 0.05669689178466797, "step": 2018 }, { "epoch": 0.2729531051964512, "grad_norm": 0.3156111240386963, "learning_rate": 2.6595563857903872e-05, "loss": 0.0486445426940918, "step": 2019 }, { "epoch": 0.2730882974228982, "grad_norm": 0.45537737011909485, "learning_rate": 2.6591220817822405e-05, "loss": 0.08964300155639648, "step": 2020 }, { "epoch": 0.2732234896493452, "grad_norm": 0.6203433871269226, "learning_rate": 2.658687536438694e-05, "loss": 0.07988929748535156, "step": 2021 }, { "epoch": 0.27335868187579215, "grad_norm": 0.25784873962402344, "learning_rate": 2.6582527498502243e-05, "loss": 0.04907023906707764, "step": 2022 }, { "epoch": 0.27349387410223913, "grad_norm": 0.6224722266197205, "learning_rate": 2.6578177221073556e-05, "loss": 0.07737493515014648, "step": 2023 }, { "epoch": 0.2736290663286861, "grad_norm": 0.2951950132846832, "learning_rate": 2.6573824533006628e-05, "loss": 0.08441162109375, "step": 2024 }, { "epoch": 0.2737642585551331, "grad_norm": 0.2897282838821411, "learning_rate": 2.6569469435207712e-05, "loss": 0.06387567520141602, "step": 2025 }, { "epoch": 0.27389945078158007, "grad_norm": 0.47775185108184814, "learning_rate": 2.656511192858356e-05, "loss": 0.04537701606750488, "step": 2026 }, { "epoch": 0.27403464300802705, "grad_norm": 0.587962806224823, "learning_rate": 2.6560752014041438e-05, "loss": 0.062053680419921875, "step": 2027 }, { "epoch": 0.274169835234474, "grad_norm": 0.3327188491821289, "learning_rate": 2.6556389692489098e-05, "loss": 0.07045269012451172, "step": 2028 }, { "epoch": 0.274305027460921, "grad_norm": 0.25019848346710205, "learning_rate": 2.6552024964834795e-05, "loss": 0.08115196228027344, "step": 2029 }, { "epoch": 0.274440219687368, "grad_norm": 0.5601955056190491, "learning_rate": 2.6547657831987286e-05, "loss": 0.09872913360595703, "step": 2030 }, { "epoch": 0.27457541191381496, "grad_norm": 0.5355115532875061, "learning_rate": 2.6543288294855843e-05, "loss": 0.07593822479248047, "step": 2031 }, { "epoch": 0.27471060414026194, "grad_norm": 0.37519311904907227, "learning_rate": 2.653891635435022e-05, "loss": 0.07274961471557617, "step": 2032 }, { "epoch": 0.2748457963667089, "grad_norm": 0.2885375916957855, "learning_rate": 2.653454201138068e-05, "loss": 0.06522369384765625, "step": 2033 }, { "epoch": 0.2749809885931559, "grad_norm": 0.504676342010498, "learning_rate": 2.653016526685798e-05, "loss": 0.08700704574584961, "step": 2034 }, { "epoch": 0.2751161808196029, "grad_norm": 0.4324076175689697, "learning_rate": 2.6525786121693387e-05, "loss": 0.0872201919555664, "step": 2035 }, { "epoch": 0.27525137304604985, "grad_norm": 0.6423711776733398, "learning_rate": 2.652140457679866e-05, "loss": 0.08132648468017578, "step": 2036 }, { "epoch": 0.27538656527249683, "grad_norm": 0.24215656518936157, "learning_rate": 2.6517020633086064e-05, "loss": 0.059365272521972656, "step": 2037 }, { "epoch": 0.2755217574989438, "grad_norm": 0.30316051840782166, "learning_rate": 2.6512634291468354e-05, "loss": 0.07519912719726562, "step": 2038 }, { "epoch": 0.2756569497253908, "grad_norm": 0.16837528347969055, "learning_rate": 2.6508245552858792e-05, "loss": 0.04826211929321289, "step": 2039 }, { "epoch": 0.27579214195183777, "grad_norm": 0.25674694776535034, "learning_rate": 2.6503854418171133e-05, "loss": 0.05576038360595703, "step": 2040 }, { "epoch": 0.27592733417828474, "grad_norm": 0.30849340558052063, "learning_rate": 2.6499460888319644e-05, "loss": 0.03331947326660156, "step": 2041 }, { "epoch": 0.2760625264047317, "grad_norm": 0.7548207640647888, "learning_rate": 2.6495064964219073e-05, "loss": 0.08566570281982422, "step": 2042 }, { "epoch": 0.2761977186311787, "grad_norm": 0.4818129539489746, "learning_rate": 2.649066664678467e-05, "loss": 0.06218671798706055, "step": 2043 }, { "epoch": 0.2763329108576257, "grad_norm": 0.2774481177330017, "learning_rate": 2.6486265936932205e-05, "loss": 0.06703591346740723, "step": 2044 }, { "epoch": 0.27646810308407266, "grad_norm": 0.21266894042491913, "learning_rate": 2.6481862835577915e-05, "loss": 0.05988788604736328, "step": 2045 }, { "epoch": 0.27660329531051964, "grad_norm": 0.47143039107322693, "learning_rate": 2.6477457343638557e-05, "loss": 0.0818624496459961, "step": 2046 }, { "epoch": 0.2767384875369666, "grad_norm": 1.2861460447311401, "learning_rate": 2.647304946203137e-05, "loss": 0.10152912139892578, "step": 2047 }, { "epoch": 0.2768736797634136, "grad_norm": 0.23386774957180023, "learning_rate": 2.6468639191674106e-05, "loss": 0.04874753952026367, "step": 2048 }, { "epoch": 0.2770088719898606, "grad_norm": 0.3961261808872223, "learning_rate": 2.6464226533485007e-05, "loss": 0.08870506286621094, "step": 2049 }, { "epoch": 0.27714406421630755, "grad_norm": 0.2394411414861679, "learning_rate": 2.6459811488382806e-05, "loss": 0.059814453125, "step": 2050 }, { "epoch": 0.27727925644275453, "grad_norm": 0.35150033235549927, "learning_rate": 2.645539405728674e-05, "loss": 0.07271122932434082, "step": 2051 }, { "epoch": 0.2774144486692015, "grad_norm": 0.21934662759304047, "learning_rate": 2.6450974241116545e-05, "loss": 0.04949212074279785, "step": 2052 }, { "epoch": 0.2775496408956485, "grad_norm": 0.33216625452041626, "learning_rate": 2.644655204079245e-05, "loss": 0.056897759437561035, "step": 2053 }, { "epoch": 0.27768483312209546, "grad_norm": 0.2515887916088104, "learning_rate": 2.6442127457235177e-05, "loss": 0.051683902740478516, "step": 2054 }, { "epoch": 0.27782002534854244, "grad_norm": 0.4746667742729187, "learning_rate": 2.6437700491365957e-05, "loss": 0.06813335418701172, "step": 2055 }, { "epoch": 0.2779552175749894, "grad_norm": 0.4989183843135834, "learning_rate": 2.6433271144106495e-05, "loss": 0.053537845611572266, "step": 2056 }, { "epoch": 0.2780904098014364, "grad_norm": 1.0948442220687866, "learning_rate": 2.6428839416379015e-05, "loss": 0.07752418518066406, "step": 2057 }, { "epoch": 0.2782256020278834, "grad_norm": 0.5345621705055237, "learning_rate": 2.642440530910622e-05, "loss": 0.06610107421875, "step": 2058 }, { "epoch": 0.27836079425433036, "grad_norm": 0.6258991956710815, "learning_rate": 2.6419968823211318e-05, "loss": 0.09168887138366699, "step": 2059 }, { "epoch": 0.27849598648077734, "grad_norm": 0.42434990406036377, "learning_rate": 2.641552995961801e-05, "loss": 0.057613372802734375, "step": 2060 }, { "epoch": 0.2786311787072243, "grad_norm": 0.3588142693042755, "learning_rate": 2.6411088719250484e-05, "loss": 0.06316852569580078, "step": 2061 }, { "epoch": 0.2787663709336713, "grad_norm": 0.2978154420852661, "learning_rate": 2.6406645103033442e-05, "loss": 0.0728154182434082, "step": 2062 }, { "epoch": 0.27890156316011827, "grad_norm": 0.35572904348373413, "learning_rate": 2.640219911189206e-05, "loss": 0.046875953674316406, "step": 2063 }, { "epoch": 0.27903675538656525, "grad_norm": 0.22884875535964966, "learning_rate": 2.6397750746752015e-05, "loss": 0.0498356819152832, "step": 2064 }, { "epoch": 0.27917194761301223, "grad_norm": 0.7761141061782837, "learning_rate": 2.6393300008539488e-05, "loss": 0.08547496795654297, "step": 2065 }, { "epoch": 0.2793071398394592, "grad_norm": 0.38458916544914246, "learning_rate": 2.6388846898181143e-05, "loss": 0.10342693328857422, "step": 2066 }, { "epoch": 0.2794423320659062, "grad_norm": 0.43140697479248047, "learning_rate": 2.6384391416604142e-05, "loss": 0.09862852096557617, "step": 2067 }, { "epoch": 0.27957752429235316, "grad_norm": 0.3888213634490967, "learning_rate": 2.6379933564736136e-05, "loss": 0.059099674224853516, "step": 2068 }, { "epoch": 0.27971271651880014, "grad_norm": 0.24982337653636932, "learning_rate": 2.637547334350528e-05, "loss": 0.06424570083618164, "step": 2069 }, { "epoch": 0.2798479087452472, "grad_norm": 0.27125102281570435, "learning_rate": 2.637101075384021e-05, "loss": 0.06197500228881836, "step": 2070 }, { "epoch": 0.27998310097169415, "grad_norm": 0.31710943579673767, "learning_rate": 2.636654579667006e-05, "loss": 0.06935620307922363, "step": 2071 }, { "epoch": 0.28011829319814113, "grad_norm": 0.4451105296611786, "learning_rate": 2.6362078472924467e-05, "loss": 0.1120138168334961, "step": 2072 }, { "epoch": 0.2802534854245881, "grad_norm": 0.6225810647010803, "learning_rate": 2.6357608783533545e-05, "loss": 0.10464096069335938, "step": 2073 }, { "epoch": 0.2803886776510351, "grad_norm": 0.4876011312007904, "learning_rate": 2.6353136729427907e-05, "loss": 0.11539840698242188, "step": 2074 }, { "epoch": 0.28052386987748207, "grad_norm": 0.8319481015205383, "learning_rate": 2.6348662311538657e-05, "loss": 0.10995841026306152, "step": 2075 }, { "epoch": 0.28065906210392905, "grad_norm": 0.3177359104156494, "learning_rate": 2.6344185530797398e-05, "loss": 0.08544445037841797, "step": 2076 }, { "epoch": 0.280794254330376, "grad_norm": 0.4108535051345825, "learning_rate": 2.633970638813622e-05, "loss": 0.06264829635620117, "step": 2077 }, { "epoch": 0.280929446556823, "grad_norm": 0.8796353340148926, "learning_rate": 2.6335224884487698e-05, "loss": 0.10674858093261719, "step": 2078 }, { "epoch": 0.28106463878327, "grad_norm": 0.6604017615318298, "learning_rate": 2.6330741020784905e-05, "loss": 0.08148258924484253, "step": 2079 }, { "epoch": 0.28119983100971696, "grad_norm": 0.3304895758628845, "learning_rate": 2.6326254797961415e-05, "loss": 0.07968354225158691, "step": 2080 }, { "epoch": 0.28133502323616394, "grad_norm": 0.28452926874160767, "learning_rate": 2.6321766216951273e-05, "loss": 0.04527163505554199, "step": 2081 }, { "epoch": 0.2814702154626109, "grad_norm": 0.18396013975143433, "learning_rate": 2.631727527868903e-05, "loss": 0.040405988693237305, "step": 2082 }, { "epoch": 0.2816054076890579, "grad_norm": 0.4389074146747589, "learning_rate": 2.6312781984109727e-05, "loss": 0.0720205307006836, "step": 2083 }, { "epoch": 0.2817405999155049, "grad_norm": 0.4166303277015686, "learning_rate": 2.6308286334148882e-05, "loss": 0.06545305252075195, "step": 2084 }, { "epoch": 0.28187579214195185, "grad_norm": 0.5256975889205933, "learning_rate": 2.630378832974252e-05, "loss": 0.07265925407409668, "step": 2085 }, { "epoch": 0.28201098436839883, "grad_norm": 0.4502474367618561, "learning_rate": 2.6299287971827154e-05, "loss": 0.0744485855102539, "step": 2086 }, { "epoch": 0.2821461765948458, "grad_norm": 0.4261792004108429, "learning_rate": 2.629478526133977e-05, "loss": 0.061431884765625, "step": 2087 }, { "epoch": 0.2822813688212928, "grad_norm": 0.6339582204818726, "learning_rate": 2.6290280199217867e-05, "loss": 0.09616327285766602, "step": 2088 }, { "epoch": 0.28241656104773977, "grad_norm": 0.6434690952301025, "learning_rate": 2.6285772786399424e-05, "loss": 0.06148338317871094, "step": 2089 }, { "epoch": 0.28255175327418675, "grad_norm": 0.30254992842674255, "learning_rate": 2.6281263023822894e-05, "loss": 0.06952857971191406, "step": 2090 }, { "epoch": 0.2826869455006337, "grad_norm": 0.37189915776252747, "learning_rate": 2.627675091242725e-05, "loss": 0.06276750564575195, "step": 2091 }, { "epoch": 0.2828221377270807, "grad_norm": 0.29637306928634644, "learning_rate": 2.627223645315193e-05, "loss": 0.06892728805541992, "step": 2092 }, { "epoch": 0.2829573299535277, "grad_norm": 0.3086879253387451, "learning_rate": 2.6267719646936868e-05, "loss": 0.05177867412567139, "step": 2093 }, { "epoch": 0.28309252217997466, "grad_norm": 0.23939448595046997, "learning_rate": 2.626320049472249e-05, "loss": 0.06682491302490234, "step": 2094 }, { "epoch": 0.28322771440642164, "grad_norm": 0.5124935507774353, "learning_rate": 2.6258678997449705e-05, "loss": 0.08545827865600586, "step": 2095 }, { "epoch": 0.2833629066328686, "grad_norm": 0.5773860216140747, "learning_rate": 2.6254155156059912e-05, "loss": 0.09173870086669922, "step": 2096 }, { "epoch": 0.2834980988593156, "grad_norm": 0.46175697445869446, "learning_rate": 2.6249628971495006e-05, "loss": 0.0669870376586914, "step": 2097 }, { "epoch": 0.2836332910857626, "grad_norm": 0.2535857856273651, "learning_rate": 2.6245100444697353e-05, "loss": 0.07016611099243164, "step": 2098 }, { "epoch": 0.28376848331220955, "grad_norm": 0.20684558153152466, "learning_rate": 2.6240569576609824e-05, "loss": 0.04796886444091797, "step": 2099 }, { "epoch": 0.28390367553865653, "grad_norm": 0.20866382122039795, "learning_rate": 2.623603636817577e-05, "loss": 0.0501713752746582, "step": 2100 }, { "epoch": 0.2840388677651035, "grad_norm": 0.2257915437221527, "learning_rate": 2.6231500820339024e-05, "loss": 0.05210685729980469, "step": 2101 }, { "epoch": 0.2841740599915505, "grad_norm": 0.30343684554100037, "learning_rate": 2.6226962934043913e-05, "loss": 0.06421279907226562, "step": 2102 }, { "epoch": 0.28430925221799747, "grad_norm": 0.3582315742969513, "learning_rate": 2.622242271023525e-05, "loss": 0.054036617279052734, "step": 2103 }, { "epoch": 0.28444444444444444, "grad_norm": 0.523270845413208, "learning_rate": 2.6217880149858333e-05, "loss": 0.07139182090759277, "step": 2104 }, { "epoch": 0.2845796366708914, "grad_norm": 0.23183846473693848, "learning_rate": 2.621333525385895e-05, "loss": 0.06073260307312012, "step": 2105 }, { "epoch": 0.2847148288973384, "grad_norm": 0.30376967787742615, "learning_rate": 2.6208788023183366e-05, "loss": 0.08838939666748047, "step": 2106 }, { "epoch": 0.2848500211237854, "grad_norm": 0.5577009916305542, "learning_rate": 2.6204238458778346e-05, "loss": 0.09938955307006836, "step": 2107 }, { "epoch": 0.28498521335023236, "grad_norm": 0.3254562020301819, "learning_rate": 2.619968656159113e-05, "loss": 0.04650545120239258, "step": 2108 }, { "epoch": 0.28512040557667934, "grad_norm": 0.3568468391895294, "learning_rate": 2.6195132332569445e-05, "loss": 0.09248828887939453, "step": 2109 }, { "epoch": 0.2852555978031263, "grad_norm": 0.2698529064655304, "learning_rate": 2.619057577266151e-05, "loss": 0.04743695259094238, "step": 2110 }, { "epoch": 0.2853907900295733, "grad_norm": 0.3744509220123291, "learning_rate": 2.6186016882816027e-05, "loss": 0.08064508438110352, "step": 2111 }, { "epoch": 0.28552598225602027, "grad_norm": 0.31511595845222473, "learning_rate": 2.6181455663982175e-05, "loss": 0.06336402893066406, "step": 2112 }, { "epoch": 0.28566117448246725, "grad_norm": 0.7368386387825012, "learning_rate": 2.6176892117109628e-05, "loss": 0.08360576629638672, "step": 2113 }, { "epoch": 0.28579636670891423, "grad_norm": 0.2987616956233978, "learning_rate": 2.617232624314854e-05, "loss": 0.07652616500854492, "step": 2114 }, { "epoch": 0.2859315589353612, "grad_norm": 0.2696145176887512, "learning_rate": 2.616775804304955e-05, "loss": 0.05968737602233887, "step": 2115 }, { "epoch": 0.2860667511618082, "grad_norm": 0.23673371970653534, "learning_rate": 2.616318751776378e-05, "loss": 0.03470277786254883, "step": 2116 }, { "epoch": 0.28620194338825516, "grad_norm": 0.3378492295742035, "learning_rate": 2.615861466824284e-05, "loss": 0.04506063461303711, "step": 2117 }, { "epoch": 0.28633713561470214, "grad_norm": 0.5445701479911804, "learning_rate": 2.6154039495438825e-05, "loss": 0.058304548263549805, "step": 2118 }, { "epoch": 0.2864723278411491, "grad_norm": 0.31142300367355347, "learning_rate": 2.6149462000304302e-05, "loss": 0.06822836399078369, "step": 2119 }, { "epoch": 0.2866075200675961, "grad_norm": 0.14689652621746063, "learning_rate": 2.6144882183792335e-05, "loss": 0.04671072959899902, "step": 2120 }, { "epoch": 0.2867427122940431, "grad_norm": 0.29540175199508667, "learning_rate": 2.6140300046856468e-05, "loss": 0.06613868474960327, "step": 2121 }, { "epoch": 0.28687790452049006, "grad_norm": 0.43784645199775696, "learning_rate": 2.6135715590450722e-05, "loss": 0.07046675682067871, "step": 2122 }, { "epoch": 0.28701309674693704, "grad_norm": 0.34985587000846863, "learning_rate": 2.6131128815529608e-05, "loss": 0.07567620277404785, "step": 2123 }, { "epoch": 0.287148288973384, "grad_norm": 0.32104727625846863, "learning_rate": 2.6126539723048115e-05, "loss": 0.0802912712097168, "step": 2124 }, { "epoch": 0.287283481199831, "grad_norm": 0.33987459540367126, "learning_rate": 2.612194831396172e-05, "loss": 0.05086708068847656, "step": 2125 }, { "epoch": 0.28741867342627797, "grad_norm": 0.2595984935760498, "learning_rate": 2.611735458922637e-05, "loss": 0.05791115760803223, "step": 2126 }, { "epoch": 0.28755386565272495, "grad_norm": 0.7451924085617065, "learning_rate": 2.6112758549798515e-05, "loss": 0.0922541618347168, "step": 2127 }, { "epoch": 0.2876890578791719, "grad_norm": 0.3935089409351349, "learning_rate": 2.610816019663507e-05, "loss": 0.09931564331054688, "step": 2128 }, { "epoch": 0.2878242501056189, "grad_norm": 0.4284669756889343, "learning_rate": 2.6103559530693436e-05, "loss": 0.0782618522644043, "step": 2129 }, { "epoch": 0.2879594423320659, "grad_norm": 0.2335512638092041, "learning_rate": 2.6098956552931495e-05, "loss": 0.0554194450378418, "step": 2130 }, { "epoch": 0.28809463455851286, "grad_norm": 0.4409209191799164, "learning_rate": 2.6094351264307613e-05, "loss": 0.09137964248657227, "step": 2131 }, { "epoch": 0.28822982678495984, "grad_norm": 0.33390122652053833, "learning_rate": 2.6089743665780635e-05, "loss": 0.07697200775146484, "step": 2132 }, { "epoch": 0.2883650190114068, "grad_norm": 0.31019923090934753, "learning_rate": 2.6085133758309887e-05, "loss": 0.08348321914672852, "step": 2133 }, { "epoch": 0.2885002112378538, "grad_norm": 0.24304573237895966, "learning_rate": 2.6080521542855182e-05, "loss": 0.06161618232727051, "step": 2134 }, { "epoch": 0.2886354034643008, "grad_norm": 0.505469560623169, "learning_rate": 2.60759070203768e-05, "loss": 0.06316828727722168, "step": 2135 }, { "epoch": 0.28877059569074776, "grad_norm": 0.14189660549163818, "learning_rate": 2.607129019183551e-05, "loss": 0.03790163993835449, "step": 2136 }, { "epoch": 0.28890578791719473, "grad_norm": 0.4363943636417389, "learning_rate": 2.6066671058192566e-05, "loss": 0.0781412124633789, "step": 2137 }, { "epoch": 0.28904098014364177, "grad_norm": 0.4986313283443451, "learning_rate": 2.606204962040969e-05, "loss": 0.11483097076416016, "step": 2138 }, { "epoch": 0.28917617237008875, "grad_norm": 0.37120503187179565, "learning_rate": 2.6057425879449095e-05, "loss": 0.06394124031066895, "step": 2139 }, { "epoch": 0.2893113645965357, "grad_norm": 0.5296469330787659, "learning_rate": 2.605279983627347e-05, "loss": 0.05107307434082031, "step": 2140 }, { "epoch": 0.2894465568229827, "grad_norm": 0.85414057970047, "learning_rate": 2.6048171491845974e-05, "loss": 0.09617280960083008, "step": 2141 }, { "epoch": 0.2895817490494297, "grad_norm": 0.6802937388420105, "learning_rate": 2.604354084713026e-05, "loss": 0.06146836280822754, "step": 2142 }, { "epoch": 0.28971694127587666, "grad_norm": 0.1778324544429779, "learning_rate": 2.6038907903090446e-05, "loss": 0.04071521759033203, "step": 2143 }, { "epoch": 0.28985213350232364, "grad_norm": 0.44937044382095337, "learning_rate": 2.6034272660691143e-05, "loss": 0.08974599838256836, "step": 2144 }, { "epoch": 0.2899873257287706, "grad_norm": 0.3087543249130249, "learning_rate": 2.6029635120897434e-05, "loss": 0.05942583084106445, "step": 2145 }, { "epoch": 0.2901225179552176, "grad_norm": 0.587215781211853, "learning_rate": 2.6024995284674867e-05, "loss": 0.0678091049194336, "step": 2146 }, { "epoch": 0.2902577101816646, "grad_norm": 0.6224071979522705, "learning_rate": 2.6020353152989496e-05, "loss": 0.06561803817749023, "step": 2147 }, { "epoch": 0.29039290240811155, "grad_norm": 0.29002130031585693, "learning_rate": 2.601570872680783e-05, "loss": 0.05462241172790527, "step": 2148 }, { "epoch": 0.29052809463455853, "grad_norm": 0.25732851028442383, "learning_rate": 2.6011062007096857e-05, "loss": 0.050579190254211426, "step": 2149 }, { "epoch": 0.2906632868610055, "grad_norm": 0.25392258167266846, "learning_rate": 2.6006412994824067e-05, "loss": 0.08182525634765625, "step": 2150 }, { "epoch": 0.2907984790874525, "grad_norm": 0.3276512622833252, "learning_rate": 2.6001761690957388e-05, "loss": 0.058895111083984375, "step": 2151 }, { "epoch": 0.29093367131389947, "grad_norm": 0.27798473834991455, "learning_rate": 2.5997108096465263e-05, "loss": 0.05332183837890625, "step": 2152 }, { "epoch": 0.29106886354034645, "grad_norm": 0.6182574033737183, "learning_rate": 2.599245221231659e-05, "loss": 0.10705089569091797, "step": 2153 }, { "epoch": 0.2912040557667934, "grad_norm": 0.4207114279270172, "learning_rate": 2.5987794039480743e-05, "loss": 0.07284879684448242, "step": 2154 }, { "epoch": 0.2913392479932404, "grad_norm": 0.28705453872680664, "learning_rate": 2.5983133578927584e-05, "loss": 0.08143019676208496, "step": 2155 }, { "epoch": 0.2914744402196874, "grad_norm": 0.31432029604911804, "learning_rate": 2.5978470831627444e-05, "loss": 0.062462806701660156, "step": 2156 }, { "epoch": 0.29160963244613436, "grad_norm": 0.327834814786911, "learning_rate": 2.597380579855113e-05, "loss": 0.05445361137390137, "step": 2157 }, { "epoch": 0.29174482467258134, "grad_norm": 0.19253234565258026, "learning_rate": 2.5969138480669936e-05, "loss": 0.04073488712310791, "step": 2158 }, { "epoch": 0.2918800168990283, "grad_norm": 0.35155487060546875, "learning_rate": 2.5964468878955614e-05, "loss": 0.09739065170288086, "step": 2159 }, { "epoch": 0.2920152091254753, "grad_norm": 0.8452580571174622, "learning_rate": 2.5959796994380397e-05, "loss": 0.09544754028320312, "step": 2160 }, { "epoch": 0.2921504013519223, "grad_norm": 0.5045964121818542, "learning_rate": 2.5955122827917004e-05, "loss": 0.06953799724578857, "step": 2161 }, { "epoch": 0.29228559357836925, "grad_norm": 0.28637591004371643, "learning_rate": 2.595044638053862e-05, "loss": 0.06774139404296875, "step": 2162 }, { "epoch": 0.29242078580481623, "grad_norm": 0.358574241399765, "learning_rate": 2.59457676532189e-05, "loss": 0.07538914680480957, "step": 2163 }, { "epoch": 0.2925559780312632, "grad_norm": 0.18713060021400452, "learning_rate": 2.594108664693199e-05, "loss": 0.04306435585021973, "step": 2164 }, { "epoch": 0.2926911702577102, "grad_norm": 0.2859300374984741, "learning_rate": 2.5936403362652494e-05, "loss": 0.05262899398803711, "step": 2165 }, { "epoch": 0.29282636248415717, "grad_norm": 0.22714892029762268, "learning_rate": 2.5931717801355497e-05, "loss": 0.057476043701171875, "step": 2166 }, { "epoch": 0.29296155471060414, "grad_norm": 0.4381961524486542, "learning_rate": 2.5927029964016556e-05, "loss": 0.08054375648498535, "step": 2167 }, { "epoch": 0.2930967469370511, "grad_norm": 0.5504139065742493, "learning_rate": 2.592233985161171e-05, "loss": 0.11686086654663086, "step": 2168 }, { "epoch": 0.2932319391634981, "grad_norm": 0.44838497042655945, "learning_rate": 2.5917647465117463e-05, "loss": 0.05600118637084961, "step": 2169 }, { "epoch": 0.2933671313899451, "grad_norm": 0.3687882721424103, "learning_rate": 2.591295280551079e-05, "loss": 0.09234809875488281, "step": 2170 }, { "epoch": 0.29350232361639206, "grad_norm": 0.3056941330432892, "learning_rate": 2.590825587376915e-05, "loss": 0.07508683204650879, "step": 2171 }, { "epoch": 0.29363751584283904, "grad_norm": 0.49831512570381165, "learning_rate": 2.5903556670870464e-05, "loss": 0.08393239974975586, "step": 2172 }, { "epoch": 0.293772708069286, "grad_norm": 0.40404629707336426, "learning_rate": 2.589885519779314e-05, "loss": 0.07346916198730469, "step": 2173 }, { "epoch": 0.293907900295733, "grad_norm": 0.40110450983047485, "learning_rate": 2.5894151455516043e-05, "loss": 0.048647165298461914, "step": 2174 }, { "epoch": 0.29404309252217997, "grad_norm": 0.8366751074790955, "learning_rate": 2.5889445445018513e-05, "loss": 0.08992195129394531, "step": 2175 }, { "epoch": 0.29417828474862695, "grad_norm": 0.5584591031074524, "learning_rate": 2.5884737167280375e-05, "loss": 0.08277320861816406, "step": 2176 }, { "epoch": 0.29431347697507393, "grad_norm": 0.3746296465396881, "learning_rate": 2.5880026623281914e-05, "loss": 0.051294565200805664, "step": 2177 }, { "epoch": 0.2944486692015209, "grad_norm": 0.6029298901557922, "learning_rate": 2.5875313814003892e-05, "loss": 0.09947824478149414, "step": 2178 }, { "epoch": 0.2945838614279679, "grad_norm": 0.3427838683128357, "learning_rate": 2.587059874042754e-05, "loss": 0.04492020606994629, "step": 2179 }, { "epoch": 0.29471905365441486, "grad_norm": 0.325602650642395, "learning_rate": 2.5865881403534557e-05, "loss": 0.05062389373779297, "step": 2180 }, { "epoch": 0.29485424588086184, "grad_norm": 0.5873908400535583, "learning_rate": 2.5861161804307124e-05, "loss": 0.07400941848754883, "step": 2181 }, { "epoch": 0.2949894381073088, "grad_norm": 0.5059689879417419, "learning_rate": 2.5856439943727886e-05, "loss": 0.07167625427246094, "step": 2182 }, { "epoch": 0.2951246303337558, "grad_norm": 0.5450960993766785, "learning_rate": 2.5851715822779954e-05, "loss": 0.06877422332763672, "step": 2183 }, { "epoch": 0.2952598225602028, "grad_norm": 0.6907166242599487, "learning_rate": 2.5846989442446926e-05, "loss": 0.08760929107666016, "step": 2184 }, { "epoch": 0.29539501478664976, "grad_norm": 0.43607136607170105, "learning_rate": 2.584226080371285e-05, "loss": 0.06807327270507812, "step": 2185 }, { "epoch": 0.29553020701309674, "grad_norm": 0.43293964862823486, "learning_rate": 2.5837529907562258e-05, "loss": 0.07244682312011719, "step": 2186 }, { "epoch": 0.2956653992395437, "grad_norm": 0.4194972515106201, "learning_rate": 2.5832796754980138e-05, "loss": 0.06484603881835938, "step": 2187 }, { "epoch": 0.2958005914659907, "grad_norm": 0.8278330564498901, "learning_rate": 2.5828061346951974e-05, "loss": 0.08581256866455078, "step": 2188 }, { "epoch": 0.29593578369243767, "grad_norm": 0.37330037355422974, "learning_rate": 2.5823323684463693e-05, "loss": 0.07082855701446533, "step": 2189 }, { "epoch": 0.29607097591888465, "grad_norm": 0.2774165868759155, "learning_rate": 2.5818583768501708e-05, "loss": 0.06768918037414551, "step": 2190 }, { "epoch": 0.2962061681453316, "grad_norm": 0.23821696639060974, "learning_rate": 2.5813841600052887e-05, "loss": 0.060115814208984375, "step": 2191 }, { "epoch": 0.2963413603717786, "grad_norm": 0.4634384512901306, "learning_rate": 2.580909718010458e-05, "loss": 0.08556747436523438, "step": 2192 }, { "epoch": 0.2964765525982256, "grad_norm": 0.3078383803367615, "learning_rate": 2.58043505096446e-05, "loss": 0.07528328895568848, "step": 2193 }, { "epoch": 0.29661174482467256, "grad_norm": 0.4035947620868683, "learning_rate": 2.5799601589661223e-05, "loss": 0.09781312942504883, "step": 2194 }, { "epoch": 0.29674693705111954, "grad_norm": 0.21481718122959137, "learning_rate": 2.579485042114321e-05, "loss": 0.032877445220947266, "step": 2195 }, { "epoch": 0.2968821292775665, "grad_norm": 0.19529657065868378, "learning_rate": 2.5790097005079766e-05, "loss": 0.04868650436401367, "step": 2196 }, { "epoch": 0.2970173215040135, "grad_norm": 0.37735462188720703, "learning_rate": 2.5785341342460595e-05, "loss": 0.06742596626281738, "step": 2197 }, { "epoch": 0.2971525137304605, "grad_norm": 0.634418249130249, "learning_rate": 2.5780583434275837e-05, "loss": 0.06396782398223877, "step": 2198 }, { "epoch": 0.29728770595690746, "grad_norm": 0.2229696363210678, "learning_rate": 2.577582328151612e-05, "loss": 0.05273914337158203, "step": 2199 }, { "epoch": 0.29742289818335443, "grad_norm": 0.3070399761199951, "learning_rate": 2.5771060885172532e-05, "loss": 0.048244476318359375, "step": 2200 }, { "epoch": 0.2975580904098014, "grad_norm": 0.4507209062576294, "learning_rate": 2.5766296246236628e-05, "loss": 0.07697296142578125, "step": 2201 }, { "epoch": 0.2976932826362484, "grad_norm": 0.3317039906978607, "learning_rate": 2.5761529365700437e-05, "loss": 0.06859683990478516, "step": 2202 }, { "epoch": 0.29782847486269537, "grad_norm": 0.24445976316928864, "learning_rate": 2.5756760244556445e-05, "loss": 0.06505942344665527, "step": 2203 }, { "epoch": 0.29796366708914235, "grad_norm": 0.3230881094932556, "learning_rate": 2.5751988883797603e-05, "loss": 0.045443058013916016, "step": 2204 }, { "epoch": 0.2980988593155893, "grad_norm": 0.5358401536941528, "learning_rate": 2.574721528441734e-05, "loss": 0.08615493774414062, "step": 2205 }, { "epoch": 0.29823405154203636, "grad_norm": 0.5425900816917419, "learning_rate": 2.5742439447409545e-05, "loss": 0.0880136489868164, "step": 2206 }, { "epoch": 0.29836924376848334, "grad_norm": 0.29647156596183777, "learning_rate": 2.5737661373768568e-05, "loss": 0.07267105579376221, "step": 2207 }, { "epoch": 0.2985044359949303, "grad_norm": 0.20283657312393188, "learning_rate": 2.5732881064489237e-05, "loss": 0.05887603759765625, "step": 2208 }, { "epoch": 0.2986396282213773, "grad_norm": 0.31360796093940735, "learning_rate": 2.572809852056683e-05, "loss": 0.06426763534545898, "step": 2209 }, { "epoch": 0.2987748204478243, "grad_norm": 0.32841983437538147, "learning_rate": 2.572331374299711e-05, "loss": 0.08529233932495117, "step": 2210 }, { "epoch": 0.29891001267427125, "grad_norm": 0.5919017791748047, "learning_rate": 2.5718526732776276e-05, "loss": 0.10047054290771484, "step": 2211 }, { "epoch": 0.29904520490071823, "grad_norm": 0.4126860797405243, "learning_rate": 2.5713737490901023e-05, "loss": 0.10191154479980469, "step": 2212 }, { "epoch": 0.2991803971271652, "grad_norm": 0.3165905475616455, "learning_rate": 2.570894601836849e-05, "loss": 0.07066822052001953, "step": 2213 }, { "epoch": 0.2993155893536122, "grad_norm": 0.6350536942481995, "learning_rate": 2.5704152316176287e-05, "loss": 0.08958101272583008, "step": 2214 }, { "epoch": 0.29945078158005917, "grad_norm": 0.2643260657787323, "learning_rate": 2.5699356385322487e-05, "loss": 0.09143209457397461, "step": 2215 }, { "epoch": 0.29958597380650615, "grad_norm": 0.22539664804935455, "learning_rate": 2.5694558226805643e-05, "loss": 0.04373311996459961, "step": 2216 }, { "epoch": 0.2997211660329531, "grad_norm": 0.3142715394496918, "learning_rate": 2.568975784162474e-05, "loss": 0.06304371356964111, "step": 2217 }, { "epoch": 0.2998563582594001, "grad_norm": 0.13800500333309174, "learning_rate": 2.5684955230779245e-05, "loss": 0.037569522857666016, "step": 2218 }, { "epoch": 0.2999915504858471, "grad_norm": 0.343159019947052, "learning_rate": 2.5680150395269096e-05, "loss": 0.08579206466674805, "step": 2219 }, { "epoch": 0.30012674271229406, "grad_norm": 0.32813340425491333, "learning_rate": 2.5675343336094683e-05, "loss": 0.0595088005065918, "step": 2220 }, { "epoch": 0.30026193493874104, "grad_norm": 0.2609620988368988, "learning_rate": 2.5670534054256855e-05, "loss": 0.057581186294555664, "step": 2221 }, { "epoch": 0.300397127165188, "grad_norm": 0.16500750184059143, "learning_rate": 2.5665722550756937e-05, "loss": 0.0329890251159668, "step": 2222 }, { "epoch": 0.300532319391635, "grad_norm": 0.2694440484046936, "learning_rate": 2.5660908826596707e-05, "loss": 0.06969094276428223, "step": 2223 }, { "epoch": 0.300667511618082, "grad_norm": 0.3647930324077606, "learning_rate": 2.5656092882778413e-05, "loss": 0.07144689559936523, "step": 2224 }, { "epoch": 0.30080270384452895, "grad_norm": 0.3648439645767212, "learning_rate": 2.565127472030475e-05, "loss": 0.09749078750610352, "step": 2225 }, { "epoch": 0.30093789607097593, "grad_norm": 0.189004048705101, "learning_rate": 2.5646454340178894e-05, "loss": 0.034863948822021484, "step": 2226 }, { "epoch": 0.3010730882974229, "grad_norm": 0.2944033741950989, "learning_rate": 2.564163174340447e-05, "loss": 0.046622276306152344, "step": 2227 }, { "epoch": 0.3012082805238699, "grad_norm": 0.4640463590621948, "learning_rate": 2.5636806930985565e-05, "loss": 0.10419654846191406, "step": 2228 }, { "epoch": 0.30134347275031687, "grad_norm": 0.44318172335624695, "learning_rate": 2.5631979903926738e-05, "loss": 0.05919647216796875, "step": 2229 }, { "epoch": 0.30147866497676384, "grad_norm": 0.6915941834449768, "learning_rate": 2.5627150663233e-05, "loss": 0.0716242790222168, "step": 2230 }, { "epoch": 0.3016138572032108, "grad_norm": 0.23209555447101593, "learning_rate": 2.5622319209909817e-05, "loss": 0.06306028366088867, "step": 2231 }, { "epoch": 0.3017490494296578, "grad_norm": 0.3597882091999054, "learning_rate": 2.5617485544963135e-05, "loss": 0.06644511222839355, "step": 2232 }, { "epoch": 0.3018842416561048, "grad_norm": 0.32142817974090576, "learning_rate": 2.561264966939934e-05, "loss": 0.0655069351196289, "step": 2233 }, { "epoch": 0.30201943388255176, "grad_norm": 0.5331870913505554, "learning_rate": 2.5607811584225294e-05, "loss": 0.07327508926391602, "step": 2234 }, { "epoch": 0.30215462610899874, "grad_norm": 0.38085871934890747, "learning_rate": 2.5602971290448305e-05, "loss": 0.05700969696044922, "step": 2235 }, { "epoch": 0.3022898183354457, "grad_norm": 0.5730758309364319, "learning_rate": 2.5598128789076152e-05, "loss": 0.09220600128173828, "step": 2236 }, { "epoch": 0.3024250105618927, "grad_norm": 0.24458611011505127, "learning_rate": 2.559328408111707e-05, "loss": 0.06153547763824463, "step": 2237 }, { "epoch": 0.30256020278833967, "grad_norm": 0.24315804243087769, "learning_rate": 2.5588437167579755e-05, "loss": 0.06522560119628906, "step": 2238 }, { "epoch": 0.30269539501478665, "grad_norm": 0.49711737036705017, "learning_rate": 2.558358804947335e-05, "loss": 0.0958859920501709, "step": 2239 }, { "epoch": 0.30283058724123363, "grad_norm": 0.1731814742088318, "learning_rate": 2.557873672780748e-05, "loss": 0.044841468334198, "step": 2240 }, { "epoch": 0.3029657794676806, "grad_norm": 0.40775710344314575, "learning_rate": 2.557388320359221e-05, "loss": 0.0728074312210083, "step": 2241 }, { "epoch": 0.3031009716941276, "grad_norm": 0.35549649596214294, "learning_rate": 2.5569027477838068e-05, "loss": 0.08945083618164062, "step": 2242 }, { "epoch": 0.30323616392057456, "grad_norm": 0.6588951349258423, "learning_rate": 2.5564169551556044e-05, "loss": 0.06209409236907959, "step": 2243 }, { "epoch": 0.30337135614702154, "grad_norm": 0.5763364434242249, "learning_rate": 2.5559309425757586e-05, "loss": 0.08461761474609375, "step": 2244 }, { "epoch": 0.3035065483734685, "grad_norm": 0.5224930047988892, "learning_rate": 2.5554447101454597e-05, "loss": 0.07148313522338867, "step": 2245 }, { "epoch": 0.3036417405999155, "grad_norm": 0.22069016098976135, "learning_rate": 2.554958257965944e-05, "loss": 0.05002164840698242, "step": 2246 }, { "epoch": 0.3037769328263625, "grad_norm": 0.2523531913757324, "learning_rate": 2.554471586138493e-05, "loss": 0.04245567321777344, "step": 2247 }, { "epoch": 0.30391212505280946, "grad_norm": 0.5251622796058655, "learning_rate": 2.5539846947644342e-05, "loss": 0.05673050880432129, "step": 2248 }, { "epoch": 0.30404731727925643, "grad_norm": 0.7554552555084229, "learning_rate": 2.5534975839451416e-05, "loss": 0.07118797302246094, "step": 2249 }, { "epoch": 0.3041825095057034, "grad_norm": 0.39777493476867676, "learning_rate": 2.5530102537820348e-05, "loss": 0.07718515396118164, "step": 2250 }, { "epoch": 0.3043177017321504, "grad_norm": 0.32851290702819824, "learning_rate": 2.5525227043765774e-05, "loss": 0.07964611053466797, "step": 2251 }, { "epoch": 0.30445289395859737, "grad_norm": 0.47780588269233704, "learning_rate": 2.55203493583028e-05, "loss": 0.05550628900527954, "step": 2252 }, { "epoch": 0.30458808618504435, "grad_norm": 0.217255637049675, "learning_rate": 2.551546948244699e-05, "loss": 0.05336284637451172, "step": 2253 }, { "epoch": 0.3047232784114913, "grad_norm": 0.5100780129432678, "learning_rate": 2.551058741721436e-05, "loss": 0.08109474182128906, "step": 2254 }, { "epoch": 0.3048584706379383, "grad_norm": 0.4553520083427429, "learning_rate": 2.550570316362138e-05, "loss": 0.07550406455993652, "step": 2255 }, { "epoch": 0.3049936628643853, "grad_norm": 0.3899535536766052, "learning_rate": 2.5500816722684975e-05, "loss": 0.05846214294433594, "step": 2256 }, { "epoch": 0.30512885509083226, "grad_norm": 0.4410453736782074, "learning_rate": 2.549592809542253e-05, "loss": 0.06292164325714111, "step": 2257 }, { "epoch": 0.30526404731727924, "grad_norm": 0.3141182065010071, "learning_rate": 2.549103728285189e-05, "loss": 0.07396173477172852, "step": 2258 }, { "epoch": 0.3053992395437262, "grad_norm": 0.4648004472255707, "learning_rate": 2.548614428599134e-05, "loss": 0.08410477638244629, "step": 2259 }, { "epoch": 0.3055344317701732, "grad_norm": 0.3898724615573883, "learning_rate": 2.5481249105859633e-05, "loss": 0.05596303939819336, "step": 2260 }, { "epoch": 0.3056696239966202, "grad_norm": 0.3171229660511017, "learning_rate": 2.5476351743475964e-05, "loss": 0.06770706176757812, "step": 2261 }, { "epoch": 0.30580481622306716, "grad_norm": 0.3378964364528656, "learning_rate": 2.547145219986e-05, "loss": 0.06552982330322266, "step": 2262 }, { "epoch": 0.30594000844951413, "grad_norm": 0.288042277097702, "learning_rate": 2.5466550476031846e-05, "loss": 0.057799339294433594, "step": 2263 }, { "epoch": 0.3060752006759611, "grad_norm": 0.8326659202575684, "learning_rate": 2.5461646573012072e-05, "loss": 0.09737968444824219, "step": 2264 }, { "epoch": 0.3062103929024081, "grad_norm": 0.615618109703064, "learning_rate": 2.5456740491821687e-05, "loss": 0.0564122200012207, "step": 2265 }, { "epoch": 0.30634558512885507, "grad_norm": 0.278624027967453, "learning_rate": 2.5451832233482172e-05, "loss": 0.07700729370117188, "step": 2266 }, { "epoch": 0.30648077735530205, "grad_norm": 0.7243728637695312, "learning_rate": 2.544692179901545e-05, "loss": 0.08532524108886719, "step": 2267 }, { "epoch": 0.306615969581749, "grad_norm": 0.31695494055747986, "learning_rate": 2.5442009189443902e-05, "loss": 0.04893636703491211, "step": 2268 }, { "epoch": 0.306751161808196, "grad_norm": 0.41937071084976196, "learning_rate": 2.5437094405790355e-05, "loss": 0.050414085388183594, "step": 2269 }, { "epoch": 0.306886354034643, "grad_norm": 0.7431758642196655, "learning_rate": 2.5432177449078096e-05, "loss": 0.08933520317077637, "step": 2270 }, { "epoch": 0.30702154626108996, "grad_norm": 0.32833048701286316, "learning_rate": 2.5427258320330857e-05, "loss": 0.03657245635986328, "step": 2271 }, { "epoch": 0.30715673848753694, "grad_norm": 0.326952189207077, "learning_rate": 2.5422337020572835e-05, "loss": 0.057470083236694336, "step": 2272 }, { "epoch": 0.3072919307139839, "grad_norm": 0.21216987073421478, "learning_rate": 2.5417413550828667e-05, "loss": 0.052736759185791016, "step": 2273 }, { "epoch": 0.30742712294043095, "grad_norm": 0.2672852575778961, "learning_rate": 2.5412487912123444e-05, "loss": 0.06456470489501953, "step": 2274 }, { "epoch": 0.30756231516687793, "grad_norm": 0.2392459213733673, "learning_rate": 2.5407560105482708e-05, "loss": 0.06494379043579102, "step": 2275 }, { "epoch": 0.3076975073933249, "grad_norm": 0.20050475001335144, "learning_rate": 2.540263013193246e-05, "loss": 0.0528712272644043, "step": 2276 }, { "epoch": 0.3078326996197719, "grad_norm": 0.5630680322647095, "learning_rate": 2.539769799249915e-05, "loss": 0.0738229751586914, "step": 2277 }, { "epoch": 0.30796789184621887, "grad_norm": 0.27651989459991455, "learning_rate": 2.5392763688209666e-05, "loss": 0.06695342063903809, "step": 2278 }, { "epoch": 0.30810308407266584, "grad_norm": 0.5245217084884644, "learning_rate": 2.5387827220091362e-05, "loss": 0.07639026641845703, "step": 2279 }, { "epoch": 0.3082382762991128, "grad_norm": 0.5639858245849609, "learning_rate": 2.538288858917204e-05, "loss": 0.0779733657836914, "step": 2280 }, { "epoch": 0.3083734685255598, "grad_norm": 50.24871826171875, "learning_rate": 2.5377947796479936e-05, "loss": 0.07575416564941406, "step": 2281 }, { "epoch": 0.3085086607520068, "grad_norm": 0.19175417721271515, "learning_rate": 2.537300484304377e-05, "loss": 0.049895405769348145, "step": 2282 }, { "epoch": 0.30864385297845376, "grad_norm": 0.5713838934898376, "learning_rate": 2.536805972989267e-05, "loss": 0.05373960733413696, "step": 2283 }, { "epoch": 0.30877904520490074, "grad_norm": 0.43302640318870544, "learning_rate": 2.5363112458056252e-05, "loss": 0.061408042907714844, "step": 2284 }, { "epoch": 0.3089142374313477, "grad_norm": 0.41834351420402527, "learning_rate": 2.5358163028564552e-05, "loss": 0.09498310089111328, "step": 2285 }, { "epoch": 0.3090494296577947, "grad_norm": 0.1747867912054062, "learning_rate": 2.535321144244808e-05, "loss": 0.053493499755859375, "step": 2286 }, { "epoch": 0.3091846218842417, "grad_norm": 0.26045501232147217, "learning_rate": 2.534825770073777e-05, "loss": 0.06392765045166016, "step": 2287 }, { "epoch": 0.30931981411068865, "grad_norm": 0.43064865469932556, "learning_rate": 2.5343301804465026e-05, "loss": 0.08998680114746094, "step": 2288 }, { "epoch": 0.30945500633713563, "grad_norm": 0.46299099922180176, "learning_rate": 2.533834375466169e-05, "loss": 0.08238840103149414, "step": 2289 }, { "epoch": 0.3095901985635826, "grad_norm": 0.37767377495765686, "learning_rate": 2.533338355236005e-05, "loss": 0.06478118896484375, "step": 2290 }, { "epoch": 0.3097253907900296, "grad_norm": 0.31063926219940186, "learning_rate": 2.532842119859285e-05, "loss": 0.06270742416381836, "step": 2291 }, { "epoch": 0.30986058301647657, "grad_norm": 0.7849839925765991, "learning_rate": 2.532345669439328e-05, "loss": 0.09780025482177734, "step": 2292 }, { "epoch": 0.30999577524292354, "grad_norm": 0.24539461731910706, "learning_rate": 2.5318490040794975e-05, "loss": 0.04785728454589844, "step": 2293 }, { "epoch": 0.3101309674693705, "grad_norm": 0.3790227472782135, "learning_rate": 2.531352123883202e-05, "loss": 0.07446098327636719, "step": 2294 }, { "epoch": 0.3102661596958175, "grad_norm": 0.33788323402404785, "learning_rate": 2.530855028953894e-05, "loss": 0.07399749755859375, "step": 2295 }, { "epoch": 0.3104013519222645, "grad_norm": 0.3270670473575592, "learning_rate": 2.5303577193950724e-05, "loss": 0.058358192443847656, "step": 2296 }, { "epoch": 0.31053654414871146, "grad_norm": 0.4766222834587097, "learning_rate": 2.5298601953102785e-05, "loss": 0.0890495777130127, "step": 2297 }, { "epoch": 0.31067173637515844, "grad_norm": 0.5018221139907837, "learning_rate": 2.5293624568031008e-05, "loss": 0.06609797477722168, "step": 2298 }, { "epoch": 0.3108069286016054, "grad_norm": 0.4079321622848511, "learning_rate": 2.5288645039771697e-05, "loss": 0.05998802185058594, "step": 2299 }, { "epoch": 0.3109421208280524, "grad_norm": 0.39015671610832214, "learning_rate": 2.5283663369361624e-05, "loss": 0.04808831214904785, "step": 2300 }, { "epoch": 0.31107731305449937, "grad_norm": 0.28397712111473083, "learning_rate": 2.5278679557837998e-05, "loss": 0.04781508445739746, "step": 2301 }, { "epoch": 0.31121250528094635, "grad_norm": 0.8658705949783325, "learning_rate": 2.5273693606238474e-05, "loss": 0.09870147705078125, "step": 2302 }, { "epoch": 0.31134769750739333, "grad_norm": 0.43585294485092163, "learning_rate": 2.5268705515601164e-05, "loss": 0.07146501541137695, "step": 2303 }, { "epoch": 0.3114828897338403, "grad_norm": 0.20357875525951385, "learning_rate": 2.5263715286964596e-05, "loss": 0.04984736442565918, "step": 2304 }, { "epoch": 0.3116180819602873, "grad_norm": 0.3360837996006012, "learning_rate": 2.525872292136778e-05, "loss": 0.07769489288330078, "step": 2305 }, { "epoch": 0.31175327418673426, "grad_norm": 0.30891481041908264, "learning_rate": 2.525372841985014e-05, "loss": 0.0885767936706543, "step": 2306 }, { "epoch": 0.31188846641318124, "grad_norm": 0.19803576171398163, "learning_rate": 2.5248731783451567e-05, "loss": 0.04516786336898804, "step": 2307 }, { "epoch": 0.3120236586396282, "grad_norm": 0.33781781792640686, "learning_rate": 2.524373301321238e-05, "loss": 0.07804155349731445, "step": 2308 }, { "epoch": 0.3121588508660752, "grad_norm": 0.21870151162147522, "learning_rate": 2.5238732110173356e-05, "loss": 0.06848716735839844, "step": 2309 }, { "epoch": 0.3122940430925222, "grad_norm": 0.30236539244651794, "learning_rate": 2.5233729075375708e-05, "loss": 0.06928348541259766, "step": 2310 }, { "epoch": 0.31242923531896916, "grad_norm": 0.350146621465683, "learning_rate": 2.522872390986109e-05, "loss": 0.0879526138305664, "step": 2311 }, { "epoch": 0.31256442754541613, "grad_norm": 0.34718042612075806, "learning_rate": 2.522371661467161e-05, "loss": 0.03924298286437988, "step": 2312 }, { "epoch": 0.3126996197718631, "grad_norm": 0.39739125967025757, "learning_rate": 2.521870719084981e-05, "loss": 0.08159017562866211, "step": 2313 }, { "epoch": 0.3128348119983101, "grad_norm": 0.5475897789001465, "learning_rate": 2.5213695639438686e-05, "loss": 0.07092475891113281, "step": 2314 }, { "epoch": 0.31297000422475707, "grad_norm": 0.6640769839286804, "learning_rate": 2.5208681961481657e-05, "loss": 0.10393238067626953, "step": 2315 }, { "epoch": 0.31310519645120405, "grad_norm": 0.28634682297706604, "learning_rate": 2.5203666158022607e-05, "loss": 0.04479193687438965, "step": 2316 }, { "epoch": 0.313240388677651, "grad_norm": 0.238839790225029, "learning_rate": 2.519864823010585e-05, "loss": 0.05104637145996094, "step": 2317 }, { "epoch": 0.313375580904098, "grad_norm": 0.20142219960689545, "learning_rate": 2.5193628178776148e-05, "loss": 0.04235410690307617, "step": 2318 }, { "epoch": 0.313510773130545, "grad_norm": 0.2510052025318146, "learning_rate": 2.5188606005078695e-05, "loss": 0.07171773910522461, "step": 2319 }, { "epoch": 0.31364596535699196, "grad_norm": 0.23331953585147858, "learning_rate": 2.518358171005914e-05, "loss": 0.0742340087890625, "step": 2320 }, { "epoch": 0.31378115758343894, "grad_norm": 0.5179891586303711, "learning_rate": 2.517855529476357e-05, "loss": 0.09337329864501953, "step": 2321 }, { "epoch": 0.3139163498098859, "grad_norm": 0.3570202589035034, "learning_rate": 2.517352676023851e-05, "loss": 0.06017327308654785, "step": 2322 }, { "epoch": 0.3140515420363329, "grad_norm": 0.5075674653053284, "learning_rate": 2.5168496107530925e-05, "loss": 0.06083083152770996, "step": 2323 }, { "epoch": 0.3141867342627799, "grad_norm": 0.3785947859287262, "learning_rate": 2.5163463337688224e-05, "loss": 0.06925487518310547, "step": 2324 }, { "epoch": 0.31432192648922685, "grad_norm": 0.35839200019836426, "learning_rate": 2.515842845175826e-05, "loss": 0.05210590362548828, "step": 2325 }, { "epoch": 0.31445711871567383, "grad_norm": 0.24263793230056763, "learning_rate": 2.5153391450789326e-05, "loss": 0.04543876647949219, "step": 2326 }, { "epoch": 0.3145923109421208, "grad_norm": 0.2730138301849365, "learning_rate": 2.514835233583014e-05, "loss": 0.0621342658996582, "step": 2327 }, { "epoch": 0.3147275031685678, "grad_norm": 0.2266179472208023, "learning_rate": 2.514331110792988e-05, "loss": 0.054215431213378906, "step": 2328 }, { "epoch": 0.31486269539501477, "grad_norm": 0.18004214763641357, "learning_rate": 2.513826776813816e-05, "loss": 0.060683250427246094, "step": 2329 }, { "epoch": 0.31499788762146175, "grad_norm": 0.3396877944469452, "learning_rate": 2.5133222317505024e-05, "loss": 0.07338428497314453, "step": 2330 }, { "epoch": 0.3151330798479087, "grad_norm": 0.28569263219833374, "learning_rate": 2.5128174757080965e-05, "loss": 0.07358694076538086, "step": 2331 }, { "epoch": 0.3152682720743557, "grad_norm": 0.5643778443336487, "learning_rate": 2.5123125087916916e-05, "loss": 0.06362366676330566, "step": 2332 }, { "epoch": 0.3154034643008027, "grad_norm": 0.54227614402771, "learning_rate": 2.5118073311064236e-05, "loss": 0.06790781021118164, "step": 2333 }, { "epoch": 0.31553865652724966, "grad_norm": 0.39101892709732056, "learning_rate": 2.5113019427574734e-05, "loss": 0.06030726432800293, "step": 2334 }, { "epoch": 0.31567384875369664, "grad_norm": 0.44047442078590393, "learning_rate": 2.5107963438500666e-05, "loss": 0.0663461685180664, "step": 2335 }, { "epoch": 0.3158090409801436, "grad_norm": 0.21675944328308105, "learning_rate": 2.51029053448947e-05, "loss": 0.05660057067871094, "step": 2336 }, { "epoch": 0.3159442332065906, "grad_norm": 0.3622889816761017, "learning_rate": 2.509784514780997e-05, "loss": 0.057483673095703125, "step": 2337 }, { "epoch": 0.3160794254330376, "grad_norm": 0.5427830815315247, "learning_rate": 2.5092782848300033e-05, "loss": 0.0858011245727539, "step": 2338 }, { "epoch": 0.31621461765948455, "grad_norm": 0.7293141484260559, "learning_rate": 2.5087718447418886e-05, "loss": 0.07434439659118652, "step": 2339 }, { "epoch": 0.31634980988593153, "grad_norm": 0.22585107386112213, "learning_rate": 2.5082651946220958e-05, "loss": 0.07416868209838867, "step": 2340 }, { "epoch": 0.3164850021123785, "grad_norm": 0.2874718904495239, "learning_rate": 2.507758334576113e-05, "loss": 0.06458187103271484, "step": 2341 }, { "epoch": 0.31662019433882554, "grad_norm": 0.40999042987823486, "learning_rate": 2.5072512647094713e-05, "loss": 0.07197344303131104, "step": 2342 }, { "epoch": 0.3167553865652725, "grad_norm": 0.2622562348842621, "learning_rate": 2.506743985127745e-05, "loss": 0.08288097381591797, "step": 2343 }, { "epoch": 0.3168905787917195, "grad_norm": 0.3014708459377289, "learning_rate": 2.506236495936552e-05, "loss": 0.05206465721130371, "step": 2344 }, { "epoch": 0.3170257710181665, "grad_norm": 0.22873209416866302, "learning_rate": 2.5057287972415547e-05, "loss": 0.06554126739501953, "step": 2345 }, { "epoch": 0.31716096324461346, "grad_norm": 0.2425178438425064, "learning_rate": 2.5052208891484588e-05, "loss": 0.06679439544677734, "step": 2346 }, { "epoch": 0.31729615547106044, "grad_norm": 0.22180858254432678, "learning_rate": 2.504712771763013e-05, "loss": 0.059993743896484375, "step": 2347 }, { "epoch": 0.3174313476975074, "grad_norm": 0.31056347489356995, "learning_rate": 2.5042044451910108e-05, "loss": 0.08017396926879883, "step": 2348 }, { "epoch": 0.3175665399239544, "grad_norm": 0.5533975958824158, "learning_rate": 2.5036959095382875e-05, "loss": 0.0951075553894043, "step": 2349 }, { "epoch": 0.3177017321504014, "grad_norm": 0.26910534501075745, "learning_rate": 2.5031871649107233e-05, "loss": 0.050804853439331055, "step": 2350 }, { "epoch": 0.31783692437684835, "grad_norm": 0.4528810679912567, "learning_rate": 2.5026782114142426e-05, "loss": 0.08442306518554688, "step": 2351 }, { "epoch": 0.31797211660329533, "grad_norm": 0.6761053204536438, "learning_rate": 2.5021690491548107e-05, "loss": 0.10964083671569824, "step": 2352 }, { "epoch": 0.3181073088297423, "grad_norm": 0.2603733241558075, "learning_rate": 2.5016596782384387e-05, "loss": 0.04936408996582031, "step": 2353 }, { "epoch": 0.3182425010561893, "grad_norm": 0.4187442362308502, "learning_rate": 2.5011500987711804e-05, "loss": 0.09982156753540039, "step": 2354 }, { "epoch": 0.31837769328263626, "grad_norm": 0.3205567002296448, "learning_rate": 2.5006403108591325e-05, "loss": 0.07063543796539307, "step": 2355 }, { "epoch": 0.31851288550908324, "grad_norm": 0.35416197776794434, "learning_rate": 2.500130314608436e-05, "loss": 0.07037878036499023, "step": 2356 }, { "epoch": 0.3186480777355302, "grad_norm": 0.3131636381149292, "learning_rate": 2.4996201101252742e-05, "loss": 0.06651270389556885, "step": 2357 }, { "epoch": 0.3187832699619772, "grad_norm": 1.0636065006256104, "learning_rate": 2.4991096975158757e-05, "loss": 0.07877635955810547, "step": 2358 }, { "epoch": 0.3189184621884242, "grad_norm": 0.706203818321228, "learning_rate": 2.4985990768865095e-05, "loss": 0.07495427131652832, "step": 2359 }, { "epoch": 0.31905365441487116, "grad_norm": 0.3577408194541931, "learning_rate": 2.4980882483434904e-05, "loss": 0.06362533569335938, "step": 2360 }, { "epoch": 0.31918884664131814, "grad_norm": 0.2690145671367645, "learning_rate": 2.497577211993176e-05, "loss": 0.07851815223693848, "step": 2361 }, { "epoch": 0.3193240388677651, "grad_norm": 0.2870801091194153, "learning_rate": 2.4970659679419658e-05, "loss": 0.06454581022262573, "step": 2362 }, { "epoch": 0.3194592310942121, "grad_norm": 0.5231418013572693, "learning_rate": 2.496554516296304e-05, "loss": 0.08087480068206787, "step": 2363 }, { "epoch": 0.31959442332065907, "grad_norm": 0.5498780608177185, "learning_rate": 2.4960428571626784e-05, "loss": 0.08754825592041016, "step": 2364 }, { "epoch": 0.31972961554710605, "grad_norm": 0.290738046169281, "learning_rate": 2.4955309906476177e-05, "loss": 0.07848024368286133, "step": 2365 }, { "epoch": 0.31986480777355303, "grad_norm": 0.21460847556591034, "learning_rate": 2.495018916857696e-05, "loss": 0.06312370300292969, "step": 2366 }, { "epoch": 0.32, "grad_norm": 0.6291492581367493, "learning_rate": 2.4945066358995304e-05, "loss": 0.07329463958740234, "step": 2367 }, { "epoch": 0.320135192226447, "grad_norm": 0.21784338355064392, "learning_rate": 2.493994147879779e-05, "loss": 0.056948184967041016, "step": 2368 }, { "epoch": 0.32027038445289396, "grad_norm": 0.6110391616821289, "learning_rate": 2.4934814529051458e-05, "loss": 0.0796196460723877, "step": 2369 }, { "epoch": 0.32040557667934094, "grad_norm": 0.31653475761413574, "learning_rate": 2.4929685510823763e-05, "loss": 0.0922393798828125, "step": 2370 }, { "epoch": 0.3205407689057879, "grad_norm": 0.44890958070755005, "learning_rate": 2.492455442518259e-05, "loss": 0.08061408996582031, "step": 2371 }, { "epoch": 0.3206759611322349, "grad_norm": 0.3351346552371979, "learning_rate": 2.4919421273196262e-05, "loss": 0.058281898498535156, "step": 2372 }, { "epoch": 0.3208111533586819, "grad_norm": 0.2324884682893753, "learning_rate": 2.4914286055933527e-05, "loss": 0.064056396484375, "step": 2373 }, { "epoch": 0.32094634558512886, "grad_norm": 0.4020441472530365, "learning_rate": 2.4909148774463572e-05, "loss": 0.09259605407714844, "step": 2374 }, { "epoch": 0.32108153781157583, "grad_norm": 0.30229443311691284, "learning_rate": 2.4904009429855992e-05, "loss": 0.046872496604919434, "step": 2375 }, { "epoch": 0.3212167300380228, "grad_norm": 0.43443962931632996, "learning_rate": 2.4898868023180844e-05, "loss": 0.06595730781555176, "step": 2376 }, { "epoch": 0.3213519222644698, "grad_norm": 0.39085280895233154, "learning_rate": 2.4893724555508575e-05, "loss": 0.10126161575317383, "step": 2377 }, { "epoch": 0.32148711449091677, "grad_norm": 0.25762200355529785, "learning_rate": 2.4888579027910105e-05, "loss": 0.07214164733886719, "step": 2378 }, { "epoch": 0.32162230671736375, "grad_norm": 0.2287529706954956, "learning_rate": 2.4883431441456738e-05, "loss": 0.07157754898071289, "step": 2379 }, { "epoch": 0.3217574989438107, "grad_norm": 0.3518243432044983, "learning_rate": 2.4878281797220244e-05, "loss": 0.06694567203521729, "step": 2380 }, { "epoch": 0.3218926911702577, "grad_norm": 0.4456416070461273, "learning_rate": 2.4873130096272805e-05, "loss": 0.07938957214355469, "step": 2381 }, { "epoch": 0.3220278833967047, "grad_norm": 0.26300668716430664, "learning_rate": 2.4867976339687026e-05, "loss": 0.06231880187988281, "step": 2382 }, { "epoch": 0.32216307562315166, "grad_norm": 0.8203449249267578, "learning_rate": 2.4862820528535955e-05, "loss": 0.0864250659942627, "step": 2383 }, { "epoch": 0.32229826784959864, "grad_norm": 0.43552136421203613, "learning_rate": 2.4857662663893054e-05, "loss": 0.0946817398071289, "step": 2384 }, { "epoch": 0.3224334600760456, "grad_norm": 0.18367503583431244, "learning_rate": 2.485250274683222e-05, "loss": 0.02898305654525757, "step": 2385 }, { "epoch": 0.3225686523024926, "grad_norm": 0.18898211419582367, "learning_rate": 2.4847340778427772e-05, "loss": 0.05438041687011719, "step": 2386 }, { "epoch": 0.3227038445289396, "grad_norm": 0.33147570490837097, "learning_rate": 2.484217675975446e-05, "loss": 0.05866193771362305, "step": 2387 }, { "epoch": 0.32283903675538655, "grad_norm": 0.9009811282157898, "learning_rate": 2.4837010691887466e-05, "loss": 0.09663963317871094, "step": 2388 }, { "epoch": 0.32297422898183353, "grad_norm": 0.7079839706420898, "learning_rate": 2.4831842575902383e-05, "loss": 0.08978748321533203, "step": 2389 }, { "epoch": 0.3231094212082805, "grad_norm": 0.27973049879074097, "learning_rate": 2.482667241287525e-05, "loss": 0.0802769660949707, "step": 2390 }, { "epoch": 0.3232446134347275, "grad_norm": 0.21611611545085907, "learning_rate": 2.4821500203882517e-05, "loss": 0.051239967346191406, "step": 2391 }, { "epoch": 0.32337980566117447, "grad_norm": 0.4308067858219147, "learning_rate": 2.4816325950001067e-05, "loss": 0.08449077606201172, "step": 2392 }, { "epoch": 0.32351499788762145, "grad_norm": 0.18825387954711914, "learning_rate": 2.4811149652308205e-05, "loss": 0.06111717224121094, "step": 2393 }, { "epoch": 0.3236501901140684, "grad_norm": 0.42325976490974426, "learning_rate": 2.480597131188167e-05, "loss": 0.09814834594726562, "step": 2394 }, { "epoch": 0.3237853823405154, "grad_norm": 0.27803319692611694, "learning_rate": 2.4800790929799614e-05, "loss": 0.07898426055908203, "step": 2395 }, { "epoch": 0.3239205745669624, "grad_norm": 0.3575000464916229, "learning_rate": 2.4795608507140623e-05, "loss": 0.07029938697814941, "step": 2396 }, { "epoch": 0.32405576679340936, "grad_norm": 0.5440062880516052, "learning_rate": 2.4790424044983705e-05, "loss": 0.09047508239746094, "step": 2397 }, { "epoch": 0.32419095901985634, "grad_norm": 0.30727916955947876, "learning_rate": 2.4785237544408288e-05, "loss": 0.09341716766357422, "step": 2398 }, { "epoch": 0.3243261512463033, "grad_norm": 0.3869057297706604, "learning_rate": 2.478004900649424e-05, "loss": 0.07088875770568848, "step": 2399 }, { "epoch": 0.3244613434727503, "grad_norm": 0.3073582351207733, "learning_rate": 2.477485843232183e-05, "loss": 0.05738997459411621, "step": 2400 }, { "epoch": 0.3245965356991973, "grad_norm": 0.26189202070236206, "learning_rate": 2.476966582297177e-05, "loss": 0.08462309837341309, "step": 2401 }, { "epoch": 0.32473172792564425, "grad_norm": 0.4069730341434479, "learning_rate": 2.4764471179525188e-05, "loss": 0.08410358428955078, "step": 2402 }, { "epoch": 0.32486692015209123, "grad_norm": 0.3748754858970642, "learning_rate": 2.4759274503063632e-05, "loss": 0.09998178482055664, "step": 2403 }, { "epoch": 0.3250021123785382, "grad_norm": 0.760499894618988, "learning_rate": 2.4754075794669088e-05, "loss": 0.0803825855255127, "step": 2404 }, { "epoch": 0.3251373046049852, "grad_norm": 0.3767063319683075, "learning_rate": 2.4748875055423942e-05, "loss": 0.08749151229858398, "step": 2405 }, { "epoch": 0.32527249683143217, "grad_norm": 0.5090206265449524, "learning_rate": 2.4743672286411027e-05, "loss": 0.07190060615539551, "step": 2406 }, { "epoch": 0.32540768905787915, "grad_norm": 0.4415806531906128, "learning_rate": 2.4738467488713582e-05, "loss": 0.08155441284179688, "step": 2407 }, { "epoch": 0.3255428812843261, "grad_norm": 0.5361815690994263, "learning_rate": 2.473326066341527e-05, "loss": 0.08111095428466797, "step": 2408 }, { "epoch": 0.3256780735107731, "grad_norm": 0.246195986866951, "learning_rate": 2.4728051811600184e-05, "loss": 0.06524133682250977, "step": 2409 }, { "epoch": 0.32581326573722014, "grad_norm": 0.4579744338989258, "learning_rate": 2.4722840934352838e-05, "loss": 0.09024429321289062, "step": 2410 }, { "epoch": 0.3259484579636671, "grad_norm": 0.4055515229701996, "learning_rate": 2.471762803275816e-05, "loss": 0.059591054916381836, "step": 2411 }, { "epoch": 0.3260836501901141, "grad_norm": 0.4381348490715027, "learning_rate": 2.4712413107901504e-05, "loss": 0.07091140747070312, "step": 2412 }, { "epoch": 0.3262188424165611, "grad_norm": 0.30810537934303284, "learning_rate": 2.470719616086865e-05, "loss": 0.0596085786819458, "step": 2413 }, { "epoch": 0.32635403464300805, "grad_norm": 0.34375977516174316, "learning_rate": 2.4701977192745785e-05, "loss": 0.0819559097290039, "step": 2414 }, { "epoch": 0.32648922686945503, "grad_norm": 0.3275398313999176, "learning_rate": 2.4696756204619535e-05, "loss": 0.07708930969238281, "step": 2415 }, { "epoch": 0.326624419095902, "grad_norm": 0.5567411184310913, "learning_rate": 2.469153319757693e-05, "loss": 0.10406804084777832, "step": 2416 }, { "epoch": 0.326759611322349, "grad_norm": 0.2049816995859146, "learning_rate": 2.4686308172705433e-05, "loss": 0.03955411911010742, "step": 2417 }, { "epoch": 0.32689480354879596, "grad_norm": 0.18643677234649658, "learning_rate": 2.4681081131092926e-05, "loss": 0.054799556732177734, "step": 2418 }, { "epoch": 0.32702999577524294, "grad_norm": 0.29502829909324646, "learning_rate": 2.467585207382769e-05, "loss": 0.07020759582519531, "step": 2419 }, { "epoch": 0.3271651880016899, "grad_norm": 0.2715286612510681, "learning_rate": 2.4670621001998467e-05, "loss": 0.05517768859863281, "step": 2420 }, { "epoch": 0.3273003802281369, "grad_norm": 0.30594679713249207, "learning_rate": 2.466538791669437e-05, "loss": 0.06382042169570923, "step": 2421 }, { "epoch": 0.3274355724545839, "grad_norm": 0.4444860517978668, "learning_rate": 2.4660152819004973e-05, "loss": 0.08150219917297363, "step": 2422 }, { "epoch": 0.32757076468103086, "grad_norm": 0.4667988419532776, "learning_rate": 2.4654915710020246e-05, "loss": 0.05817985534667969, "step": 2423 }, { "epoch": 0.32770595690747784, "grad_norm": 0.42448729276657104, "learning_rate": 2.464967659083058e-05, "loss": 0.07563543319702148, "step": 2424 }, { "epoch": 0.3278411491339248, "grad_norm": 0.32104459404945374, "learning_rate": 2.464443546252679e-05, "loss": 0.06878280639648438, "step": 2425 }, { "epoch": 0.3279763413603718, "grad_norm": 0.3184334635734558, "learning_rate": 2.4639192326200104e-05, "loss": 0.07930433750152588, "step": 2426 }, { "epoch": 0.32811153358681877, "grad_norm": 0.1668136864900589, "learning_rate": 2.463394718294218e-05, "loss": 0.030389070510864258, "step": 2427 }, { "epoch": 0.32824672581326575, "grad_norm": 0.43874040246009827, "learning_rate": 2.4628700033845072e-05, "loss": 0.07626056671142578, "step": 2428 }, { "epoch": 0.32838191803971273, "grad_norm": 0.6310276985168457, "learning_rate": 2.4623450880001268e-05, "loss": 0.0951528549194336, "step": 2429 }, { "epoch": 0.3285171102661597, "grad_norm": 0.21510225534439087, "learning_rate": 2.4618199722503676e-05, "loss": 0.04945993423461914, "step": 2430 }, { "epoch": 0.3286523024926067, "grad_norm": 0.6062254905700684, "learning_rate": 2.4612946562445613e-05, "loss": 0.0645909309387207, "step": 2431 }, { "epoch": 0.32878749471905366, "grad_norm": 0.3155181407928467, "learning_rate": 2.460769140092081e-05, "loss": 0.07437801361083984, "step": 2432 }, { "epoch": 0.32892268694550064, "grad_norm": 0.2749468684196472, "learning_rate": 2.460243423902342e-05, "loss": 0.06017649173736572, "step": 2433 }, { "epoch": 0.3290578791719476, "grad_norm": 0.3019416034221649, "learning_rate": 2.459717507784802e-05, "loss": 0.07065987586975098, "step": 2434 }, { "epoch": 0.3291930713983946, "grad_norm": 0.4691430926322937, "learning_rate": 2.459191391848959e-05, "loss": 0.06434822082519531, "step": 2435 }, { "epoch": 0.3293282636248416, "grad_norm": 0.3791032135486603, "learning_rate": 2.4586650762043538e-05, "loss": 0.07087516784667969, "step": 2436 }, { "epoch": 0.32946345585128856, "grad_norm": 0.6622600555419922, "learning_rate": 2.4581385609605665e-05, "loss": 0.0833359956741333, "step": 2437 }, { "epoch": 0.32959864807773553, "grad_norm": 0.46688854694366455, "learning_rate": 2.4576118462272218e-05, "loss": 0.08251190185546875, "step": 2438 }, { "epoch": 0.3297338403041825, "grad_norm": 0.3251646161079407, "learning_rate": 2.4570849321139836e-05, "loss": 0.09423828125, "step": 2439 }, { "epoch": 0.3298690325306295, "grad_norm": 0.27186331152915955, "learning_rate": 2.4565578187305596e-05, "loss": 0.06013679504394531, "step": 2440 }, { "epoch": 0.33000422475707647, "grad_norm": 0.23624005913734436, "learning_rate": 2.456030506186696e-05, "loss": 0.05769085884094238, "step": 2441 }, { "epoch": 0.33013941698352345, "grad_norm": 0.20653310418128967, "learning_rate": 2.4555029945921832e-05, "loss": 0.049443721771240234, "step": 2442 }, { "epoch": 0.3302746092099704, "grad_norm": 0.3757876455783844, "learning_rate": 2.4549752840568516e-05, "loss": 0.0664370059967041, "step": 2443 }, { "epoch": 0.3304098014364174, "grad_norm": 0.3367663621902466, "learning_rate": 2.4544473746905733e-05, "loss": 0.07474899291992188, "step": 2444 }, { "epoch": 0.3305449936628644, "grad_norm": 0.23202677071094513, "learning_rate": 2.4539192666032617e-05, "loss": 0.06258010864257812, "step": 2445 }, { "epoch": 0.33068018588931136, "grad_norm": 0.1665327548980713, "learning_rate": 2.4533909599048718e-05, "loss": 0.03667253255844116, "step": 2446 }, { "epoch": 0.33081537811575834, "grad_norm": 0.42714065313339233, "learning_rate": 2.4528624547054003e-05, "loss": 0.058437228202819824, "step": 2447 }, { "epoch": 0.3309505703422053, "grad_norm": 0.4131702482700348, "learning_rate": 2.4523337511148843e-05, "loss": 0.058091163635253906, "step": 2448 }, { "epoch": 0.3310857625686523, "grad_norm": 0.5783988833427429, "learning_rate": 2.4518048492434028e-05, "loss": 0.0949258804321289, "step": 2449 }, { "epoch": 0.3312209547950993, "grad_norm": 0.24023276567459106, "learning_rate": 2.4512757492010762e-05, "loss": 0.0540919303894043, "step": 2450 }, { "epoch": 0.33135614702154625, "grad_norm": 0.24015502631664276, "learning_rate": 2.4507464510980652e-05, "loss": 0.07108378410339355, "step": 2451 }, { "epoch": 0.33149133924799323, "grad_norm": 0.2364378720521927, "learning_rate": 2.450216955044574e-05, "loss": 0.06290721893310547, "step": 2452 }, { "epoch": 0.3316265314744402, "grad_norm": 0.5855490565299988, "learning_rate": 2.449687261150845e-05, "loss": 0.10780525207519531, "step": 2453 }, { "epoch": 0.3317617237008872, "grad_norm": 0.6197882294654846, "learning_rate": 2.449157369527164e-05, "loss": 0.09061050415039062, "step": 2454 }, { "epoch": 0.33189691592733417, "grad_norm": 0.33625009655952454, "learning_rate": 2.448627280283857e-05, "loss": 0.06360673904418945, "step": 2455 }, { "epoch": 0.33203210815378115, "grad_norm": 0.3676844537258148, "learning_rate": 2.4480969935312917e-05, "loss": 0.07337427139282227, "step": 2456 }, { "epoch": 0.3321673003802281, "grad_norm": 0.20737534761428833, "learning_rate": 2.4475665093798766e-05, "loss": 0.059857845306396484, "step": 2457 }, { "epoch": 0.3323024926066751, "grad_norm": 0.6505117416381836, "learning_rate": 2.447035827940061e-05, "loss": 0.07700157165527344, "step": 2458 }, { "epoch": 0.3324376848331221, "grad_norm": 0.4976421594619751, "learning_rate": 2.4465049493223356e-05, "loss": 0.0570683479309082, "step": 2459 }, { "epoch": 0.33257287705956906, "grad_norm": 0.3306278884410858, "learning_rate": 2.4459738736372327e-05, "loss": 0.0829916000366211, "step": 2460 }, { "epoch": 0.33270806928601604, "grad_norm": 0.3618042469024658, "learning_rate": 2.4454426009953252e-05, "loss": 0.04579496383666992, "step": 2461 }, { "epoch": 0.332843261512463, "grad_norm": 0.31327348947525024, "learning_rate": 2.4449111315072254e-05, "loss": 0.07779788970947266, "step": 2462 }, { "epoch": 0.33297845373891, "grad_norm": 0.5885433554649353, "learning_rate": 2.44437946528359e-05, "loss": 0.09245491027832031, "step": 2463 }, { "epoch": 0.333113645965357, "grad_norm": 0.38821515440940857, "learning_rate": 2.4438476024351138e-05, "loss": 0.06781506538391113, "step": 2464 }, { "epoch": 0.33324883819180395, "grad_norm": 0.3372174799442291, "learning_rate": 2.4433155430725333e-05, "loss": 0.07224130630493164, "step": 2465 }, { "epoch": 0.33338403041825093, "grad_norm": 0.34986966848373413, "learning_rate": 2.4427832873066262e-05, "loss": 0.07378792762756348, "step": 2466 }, { "epoch": 0.3335192226446979, "grad_norm": 0.291404664516449, "learning_rate": 2.4422508352482113e-05, "loss": 0.05615878105163574, "step": 2467 }, { "epoch": 0.3336544148711449, "grad_norm": 0.4609013795852661, "learning_rate": 2.441718187008148e-05, "loss": 0.06363534927368164, "step": 2468 }, { "epoch": 0.33378960709759187, "grad_norm": 0.18457138538360596, "learning_rate": 2.441185342697336e-05, "loss": 0.04251909255981445, "step": 2469 }, { "epoch": 0.33392479932403885, "grad_norm": 0.2803070545196533, "learning_rate": 2.440652302426717e-05, "loss": 0.055945396423339844, "step": 2470 }, { "epoch": 0.3340599915504858, "grad_norm": 0.45884576439857483, "learning_rate": 2.440119066307272e-05, "loss": 0.08475303649902344, "step": 2471 }, { "epoch": 0.3341951837769328, "grad_norm": 0.28993961215019226, "learning_rate": 2.4395856344500244e-05, "loss": 0.07215166091918945, "step": 2472 }, { "epoch": 0.3343303760033798, "grad_norm": 0.4510860741138458, "learning_rate": 2.4390520069660377e-05, "loss": 0.07471084594726562, "step": 2473 }, { "epoch": 0.33446556822982676, "grad_norm": 0.33531898260116577, "learning_rate": 2.4385181839664146e-05, "loss": 0.06544113159179688, "step": 2474 }, { "epoch": 0.33460076045627374, "grad_norm": 0.4310208559036255, "learning_rate": 2.437984165562301e-05, "loss": 0.09584617614746094, "step": 2475 }, { "epoch": 0.3347359526827207, "grad_norm": 0.33662885427474976, "learning_rate": 2.4374499518648827e-05, "loss": 0.052356719970703125, "step": 2476 }, { "epoch": 0.33487114490916775, "grad_norm": 0.2527439296245575, "learning_rate": 2.436915542985385e-05, "loss": 0.05551505088806152, "step": 2477 }, { "epoch": 0.33500633713561473, "grad_norm": 0.3310180604457855, "learning_rate": 2.436380939035075e-05, "loss": 0.08473348617553711, "step": 2478 }, { "epoch": 0.3351415293620617, "grad_norm": 0.2368757724761963, "learning_rate": 2.43584614012526e-05, "loss": 0.042256832122802734, "step": 2479 }, { "epoch": 0.3352767215885087, "grad_norm": 0.17077018320560455, "learning_rate": 2.4353111463672882e-05, "loss": 0.03796041011810303, "step": 2480 }, { "epoch": 0.33541191381495566, "grad_norm": 0.5071596503257751, "learning_rate": 2.4347759578725482e-05, "loss": 0.08613014221191406, "step": 2481 }, { "epoch": 0.33554710604140264, "grad_norm": 0.4547046720981598, "learning_rate": 2.4342405747524685e-05, "loss": 0.08476781845092773, "step": 2482 }, { "epoch": 0.3356822982678496, "grad_norm": 0.46177658438682556, "learning_rate": 2.4337049971185194e-05, "loss": 0.1083369255065918, "step": 2483 }, { "epoch": 0.3358174904942966, "grad_norm": 0.36410853266716003, "learning_rate": 2.433169225082211e-05, "loss": 0.07090163230895996, "step": 2484 }, { "epoch": 0.3359526827207436, "grad_norm": 0.20836199820041656, "learning_rate": 2.432633258755093e-05, "loss": 0.05080842971801758, "step": 2485 }, { "epoch": 0.33608787494719056, "grad_norm": 0.3155006170272827, "learning_rate": 2.432097098248758e-05, "loss": 0.06565451622009277, "step": 2486 }, { "epoch": 0.33622306717363754, "grad_norm": 0.40902048349380493, "learning_rate": 2.4315607436748362e-05, "loss": 0.0543973445892334, "step": 2487 }, { "epoch": 0.3363582594000845, "grad_norm": 0.6134740710258484, "learning_rate": 2.4310241951449997e-05, "loss": 0.060318946838378906, "step": 2488 }, { "epoch": 0.3364934516265315, "grad_norm": 0.3207819163799286, "learning_rate": 2.4304874527709614e-05, "loss": 0.06946825981140137, "step": 2489 }, { "epoch": 0.33662864385297847, "grad_norm": 0.31709375977516174, "learning_rate": 2.429950516664473e-05, "loss": 0.06220436096191406, "step": 2490 }, { "epoch": 0.33676383607942545, "grad_norm": 0.24454593658447266, "learning_rate": 2.4294133869373284e-05, "loss": 0.05053210258483887, "step": 2491 }, { "epoch": 0.33689902830587243, "grad_norm": 0.4360541105270386, "learning_rate": 2.42887606370136e-05, "loss": 0.06767749786376953, "step": 2492 }, { "epoch": 0.3370342205323194, "grad_norm": 0.25928908586502075, "learning_rate": 2.428338547068442e-05, "loss": 0.05216515064239502, "step": 2493 }, { "epoch": 0.3371694127587664, "grad_norm": 0.9997990727424622, "learning_rate": 2.427800837150488e-05, "loss": 0.07999038696289062, "step": 2494 }, { "epoch": 0.33730460498521336, "grad_norm": 0.4855858385562897, "learning_rate": 2.4272629340594518e-05, "loss": 0.0854107141494751, "step": 2495 }, { "epoch": 0.33743979721166034, "grad_norm": 0.3172101676464081, "learning_rate": 2.426724837907328e-05, "loss": 0.06275415420532227, "step": 2496 }, { "epoch": 0.3375749894381073, "grad_norm": 0.45540010929107666, "learning_rate": 2.4261865488061512e-05, "loss": 0.06249189376831055, "step": 2497 }, { "epoch": 0.3377101816645543, "grad_norm": 0.5258225202560425, "learning_rate": 2.4256480668679958e-05, "loss": 0.0672459602355957, "step": 2498 }, { "epoch": 0.3378453738910013, "grad_norm": 0.22932447493076324, "learning_rate": 2.4251093922049766e-05, "loss": 0.07059991359710693, "step": 2499 }, { "epoch": 0.33798056611744826, "grad_norm": 0.2417266070842743, "learning_rate": 2.4245705249292494e-05, "loss": 0.05587363243103027, "step": 2500 }, { "epoch": 0.33811575834389523, "grad_norm": 0.5009039044380188, "learning_rate": 2.4240314651530073e-05, "loss": 0.0823812484741211, "step": 2501 }, { "epoch": 0.3382509505703422, "grad_norm": 0.4212474524974823, "learning_rate": 2.4234922129884873e-05, "loss": 0.06021285057067871, "step": 2502 }, { "epoch": 0.3383861427967892, "grad_norm": 0.5608205199241638, "learning_rate": 2.4229527685479644e-05, "loss": 0.08831501007080078, "step": 2503 }, { "epoch": 0.33852133502323617, "grad_norm": 0.28817611932754517, "learning_rate": 2.4224131319437523e-05, "loss": 0.05916786193847656, "step": 2504 }, { "epoch": 0.33865652724968315, "grad_norm": 0.37260687351226807, "learning_rate": 2.421873303288208e-05, "loss": 0.07591724395751953, "step": 2505 }, { "epoch": 0.3387917194761301, "grad_norm": 0.7916498780250549, "learning_rate": 2.4213332826937255e-05, "loss": 0.10970115661621094, "step": 2506 }, { "epoch": 0.3389269117025771, "grad_norm": 0.17081116139888763, "learning_rate": 2.4207930702727404e-05, "loss": 0.05327153205871582, "step": 2507 }, { "epoch": 0.3390621039290241, "grad_norm": 0.3214297592639923, "learning_rate": 2.420252666137728e-05, "loss": 0.05479860305786133, "step": 2508 }, { "epoch": 0.33919729615547106, "grad_norm": 0.23765519261360168, "learning_rate": 2.419712070401203e-05, "loss": 0.06476879119873047, "step": 2509 }, { "epoch": 0.33933248838191804, "grad_norm": 0.493564635515213, "learning_rate": 2.4191712831757203e-05, "loss": 0.08979177474975586, "step": 2510 }, { "epoch": 0.339467680608365, "grad_norm": 0.35108911991119385, "learning_rate": 2.418630304573875e-05, "loss": 0.06929492950439453, "step": 2511 }, { "epoch": 0.339602872834812, "grad_norm": 0.2620317339897156, "learning_rate": 2.418089134708302e-05, "loss": 0.06982278823852539, "step": 2512 }, { "epoch": 0.339738065061259, "grad_norm": 0.3687685430049896, "learning_rate": 2.4175477736916743e-05, "loss": 0.10542488098144531, "step": 2513 }, { "epoch": 0.33987325728770595, "grad_norm": 0.7251862287521362, "learning_rate": 2.4170062216367082e-05, "loss": 0.08716773986816406, "step": 2514 }, { "epoch": 0.34000844951415293, "grad_norm": 0.7256414890289307, "learning_rate": 2.416464478656156e-05, "loss": 0.08104228973388672, "step": 2515 }, { "epoch": 0.3401436417405999, "grad_norm": 0.3080953061580658, "learning_rate": 2.4159225448628123e-05, "loss": 0.058271169662475586, "step": 2516 }, { "epoch": 0.3402788339670469, "grad_norm": 0.5783313512802124, "learning_rate": 2.4153804203695103e-05, "loss": 0.08752673864364624, "step": 2517 }, { "epoch": 0.34041402619349387, "grad_norm": 0.6522753834724426, "learning_rate": 2.4148381052891236e-05, "loss": 0.06243085861206055, "step": 2518 }, { "epoch": 0.34054921841994085, "grad_norm": 0.30998751521110535, "learning_rate": 2.4142955997345648e-05, "loss": 0.0700232982635498, "step": 2519 }, { "epoch": 0.3406844106463878, "grad_norm": 0.21559259295463562, "learning_rate": 2.4137529038187864e-05, "loss": 0.06367969512939453, "step": 2520 }, { "epoch": 0.3408196028728348, "grad_norm": 0.35696348547935486, "learning_rate": 2.413210017654781e-05, "loss": 0.05223369598388672, "step": 2521 }, { "epoch": 0.3409547950992818, "grad_norm": 0.45169007778167725, "learning_rate": 2.4126669413555802e-05, "loss": 0.08095884323120117, "step": 2522 }, { "epoch": 0.34108998732572876, "grad_norm": 0.6773762106895447, "learning_rate": 2.4121236750342548e-05, "loss": 0.0945596694946289, "step": 2523 }, { "epoch": 0.34122517955217574, "grad_norm": 0.2786770462989807, "learning_rate": 2.4115802188039165e-05, "loss": 0.07101106643676758, "step": 2524 }, { "epoch": 0.3413603717786227, "grad_norm": 0.3666849434375763, "learning_rate": 2.4110365727777156e-05, "loss": 0.06577682495117188, "step": 2525 }, { "epoch": 0.3414955640050697, "grad_norm": 0.3651229739189148, "learning_rate": 2.410492737068842e-05, "loss": 0.06389713287353516, "step": 2526 }, { "epoch": 0.3416307562315167, "grad_norm": 0.3447984755039215, "learning_rate": 2.409948711790525e-05, "loss": 0.09207653999328613, "step": 2527 }, { "epoch": 0.34176594845796365, "grad_norm": 0.5862815380096436, "learning_rate": 2.4094044970560336e-05, "loss": 0.09639883041381836, "step": 2528 }, { "epoch": 0.34190114068441063, "grad_norm": 0.3143575191497803, "learning_rate": 2.4088600929786767e-05, "loss": 0.06098484992980957, "step": 2529 }, { "epoch": 0.3420363329108576, "grad_norm": 0.5567917823791504, "learning_rate": 2.408315499671802e-05, "loss": 0.07166743278503418, "step": 2530 }, { "epoch": 0.3421715251373046, "grad_norm": 0.24091428518295288, "learning_rate": 2.407770717248796e-05, "loss": 0.05601072311401367, "step": 2531 }, { "epoch": 0.34230671736375157, "grad_norm": 0.4519946277141571, "learning_rate": 2.407225745823086e-05, "loss": 0.07663154602050781, "step": 2532 }, { "epoch": 0.34244190959019855, "grad_norm": 0.43207940459251404, "learning_rate": 2.4066805855081378e-05, "loss": 0.06264829635620117, "step": 2533 }, { "epoch": 0.3425771018166455, "grad_norm": 0.4547562897205353, "learning_rate": 2.406135236417457e-05, "loss": 0.10232162475585938, "step": 2534 }, { "epoch": 0.3427122940430925, "grad_norm": 0.5358670949935913, "learning_rate": 2.4055896986645875e-05, "loss": 0.08292484283447266, "step": 2535 }, { "epoch": 0.3428474862695395, "grad_norm": 0.1853201687335968, "learning_rate": 2.4050439723631136e-05, "loss": 0.050823330879211426, "step": 2536 }, { "epoch": 0.34298267849598646, "grad_norm": 0.19585396349430084, "learning_rate": 2.404498057626659e-05, "loss": 0.05597734451293945, "step": 2537 }, { "epoch": 0.34311787072243344, "grad_norm": 0.33710625767707825, "learning_rate": 2.4039519545688848e-05, "loss": 0.08285927772521973, "step": 2538 }, { "epoch": 0.3432530629488804, "grad_norm": 0.7918868660926819, "learning_rate": 2.4034056633034932e-05, "loss": 0.08617329597473145, "step": 2539 }, { "epoch": 0.3433882551753274, "grad_norm": 0.3767209053039551, "learning_rate": 2.402859183944225e-05, "loss": 0.08967018127441406, "step": 2540 }, { "epoch": 0.3435234474017744, "grad_norm": 0.26449450850486755, "learning_rate": 2.4023125166048597e-05, "loss": 0.056416988372802734, "step": 2541 }, { "epoch": 0.34365863962822135, "grad_norm": 0.35602036118507385, "learning_rate": 2.401765661399218e-05, "loss": 0.06450843811035156, "step": 2542 }, { "epoch": 0.34379383185466833, "grad_norm": 0.4066449999809265, "learning_rate": 2.4012186184411556e-05, "loss": 0.08829712867736816, "step": 2543 }, { "epoch": 0.3439290240811153, "grad_norm": 0.3011881113052368, "learning_rate": 2.400671387844571e-05, "loss": 0.06794929504394531, "step": 2544 }, { "epoch": 0.34406421630756234, "grad_norm": 0.43036797642707825, "learning_rate": 2.4001239697234008e-05, "loss": 0.0652916431427002, "step": 2545 }, { "epoch": 0.3441994085340093, "grad_norm": 0.1733134686946869, "learning_rate": 2.3995763641916205e-05, "loss": 0.040465593338012695, "step": 2546 }, { "epoch": 0.3443346007604563, "grad_norm": 0.28372931480407715, "learning_rate": 2.3990285713632436e-05, "loss": 0.06408858299255371, "step": 2547 }, { "epoch": 0.3444697929869033, "grad_norm": 0.31761035323143005, "learning_rate": 2.398480591352324e-05, "loss": 0.060512423515319824, "step": 2548 }, { "epoch": 0.34460498521335026, "grad_norm": 0.1383335292339325, "learning_rate": 2.3979324242729537e-05, "loss": 0.043040931224823, "step": 2549 }, { "epoch": 0.34474017743979724, "grad_norm": 0.26285022497177124, "learning_rate": 2.3973840702392646e-05, "loss": 0.07085132598876953, "step": 2550 }, { "epoch": 0.3448753696662442, "grad_norm": 0.12210826575756073, "learning_rate": 2.3968355293654267e-05, "loss": 0.0397639274597168, "step": 2551 }, { "epoch": 0.3450105618926912, "grad_norm": 0.18168649077415466, "learning_rate": 2.396286801765649e-05, "loss": 0.047493934631347656, "step": 2552 }, { "epoch": 0.34514575411913817, "grad_norm": 0.31178680062294006, "learning_rate": 2.3957378875541795e-05, "loss": 0.06428849697113037, "step": 2553 }, { "epoch": 0.34528094634558515, "grad_norm": 0.32699131965637207, "learning_rate": 2.395188786845305e-05, "loss": 0.0743408203125, "step": 2554 }, { "epoch": 0.3454161385720321, "grad_norm": 0.23083823919296265, "learning_rate": 2.3946394997533516e-05, "loss": 0.06639689207077026, "step": 2555 }, { "epoch": 0.3455513307984791, "grad_norm": 0.38130173087120056, "learning_rate": 2.3940900263926833e-05, "loss": 0.06683635711669922, "step": 2556 }, { "epoch": 0.3456865230249261, "grad_norm": 0.43939515948295593, "learning_rate": 2.393540366877704e-05, "loss": 0.09607124328613281, "step": 2557 }, { "epoch": 0.34582171525137306, "grad_norm": 0.6057755947113037, "learning_rate": 2.392990521322855e-05, "loss": 0.06909465789794922, "step": 2558 }, { "epoch": 0.34595690747782004, "grad_norm": 0.6586816906929016, "learning_rate": 2.392440489842618e-05, "loss": 0.09536075592041016, "step": 2559 }, { "epoch": 0.346092099704267, "grad_norm": 0.3285783529281616, "learning_rate": 2.3918902725515118e-05, "loss": 0.0948324203491211, "step": 2560 }, { "epoch": 0.346227291930714, "grad_norm": 0.2755489945411682, "learning_rate": 2.391339869564094e-05, "loss": 0.07570266723632812, "step": 2561 }, { "epoch": 0.346362484157161, "grad_norm": 0.25667473673820496, "learning_rate": 2.3907892809949628e-05, "loss": 0.038190245628356934, "step": 2562 }, { "epoch": 0.34649767638360796, "grad_norm": 0.2906736731529236, "learning_rate": 2.390238506958753e-05, "loss": 0.06618642807006836, "step": 2563 }, { "epoch": 0.34663286861005493, "grad_norm": 0.5905830264091492, "learning_rate": 2.3896875475701387e-05, "loss": 0.09708178043365479, "step": 2564 }, { "epoch": 0.3467680608365019, "grad_norm": 0.2868516743183136, "learning_rate": 2.3891364029438323e-05, "loss": 0.0561293363571167, "step": 2565 }, { "epoch": 0.3469032530629489, "grad_norm": 0.29824212193489075, "learning_rate": 2.3885850731945857e-05, "loss": 0.06819438934326172, "step": 2566 }, { "epoch": 0.34703844528939587, "grad_norm": 0.49924513697624207, "learning_rate": 2.3880335584371884e-05, "loss": 0.0761265754699707, "step": 2567 }, { "epoch": 0.34717363751584285, "grad_norm": 0.5289023518562317, "learning_rate": 2.387481858786468e-05, "loss": 0.08414745330810547, "step": 2568 }, { "epoch": 0.3473088297422898, "grad_norm": 0.3754660487174988, "learning_rate": 2.386929974357293e-05, "loss": 0.05096358060836792, "step": 2569 }, { "epoch": 0.3474440219687368, "grad_norm": 0.29868337512016296, "learning_rate": 2.386377905264567e-05, "loss": 0.0711207389831543, "step": 2570 }, { "epoch": 0.3475792141951838, "grad_norm": 0.4866524040699005, "learning_rate": 2.3858256516232346e-05, "loss": 0.10391855239868164, "step": 2571 }, { "epoch": 0.34771440642163076, "grad_norm": 0.2132396697998047, "learning_rate": 2.3852732135482775e-05, "loss": 0.060329437255859375, "step": 2572 }, { "epoch": 0.34784959864807774, "grad_norm": 0.3209385573863983, "learning_rate": 2.3847205911547166e-05, "loss": 0.05642986297607422, "step": 2573 }, { "epoch": 0.3479847908745247, "grad_norm": 0.29649993777275085, "learning_rate": 2.3841677845576108e-05, "loss": 0.037259817123413086, "step": 2574 }, { "epoch": 0.3481199831009717, "grad_norm": 0.4815114140510559, "learning_rate": 2.383614793872057e-05, "loss": 0.10496234893798828, "step": 2575 }, { "epoch": 0.3482551753274187, "grad_norm": 0.2752836346626282, "learning_rate": 2.3830616192131913e-05, "loss": 0.07256031036376953, "step": 2576 }, { "epoch": 0.34839036755386565, "grad_norm": 0.21755489706993103, "learning_rate": 2.3825082606961876e-05, "loss": 0.06976318359375, "step": 2577 }, { "epoch": 0.34852555978031263, "grad_norm": 0.21390248835086823, "learning_rate": 2.3819547184362575e-05, "loss": 0.040138840675354004, "step": 2578 }, { "epoch": 0.3486607520067596, "grad_norm": 0.40627220273017883, "learning_rate": 2.3814009925486522e-05, "loss": 0.0750131607055664, "step": 2579 }, { "epoch": 0.3487959442332066, "grad_norm": 0.22322294116020203, "learning_rate": 2.38084708314866e-05, "loss": 0.067047119140625, "step": 2580 }, { "epoch": 0.34893113645965357, "grad_norm": 0.31564804911613464, "learning_rate": 2.380292990351608e-05, "loss": 0.08023738861083984, "step": 2581 }, { "epoch": 0.34906632868610055, "grad_norm": 0.29696330428123474, "learning_rate": 2.3797387142728607e-05, "loss": 0.06979990005493164, "step": 2582 }, { "epoch": 0.3492015209125475, "grad_norm": 0.2819051146507263, "learning_rate": 2.379184255027822e-05, "loss": 0.0637967586517334, "step": 2583 }, { "epoch": 0.3493367131389945, "grad_norm": 0.40146055817604065, "learning_rate": 2.378629612731933e-05, "loss": 0.08315134048461914, "step": 2584 }, { "epoch": 0.3494719053654415, "grad_norm": 0.31889259815216064, "learning_rate": 2.3780747875006735e-05, "loss": 0.05870974063873291, "step": 2585 }, { "epoch": 0.34960709759188846, "grad_norm": 0.3140102028846741, "learning_rate": 2.37751977944956e-05, "loss": 0.0748147964477539, "step": 2586 }, { "epoch": 0.34974228981833544, "grad_norm": 0.37359362840652466, "learning_rate": 2.3769645886941497e-05, "loss": 0.07153606414794922, "step": 2587 }, { "epoch": 0.3498774820447824, "grad_norm": 0.3564010262489319, "learning_rate": 2.376409215350035e-05, "loss": 0.0818319320678711, "step": 2588 }, { "epoch": 0.3500126742712294, "grad_norm": 0.3945556581020355, "learning_rate": 2.3758536595328486e-05, "loss": 0.07982826232910156, "step": 2589 }, { "epoch": 0.3501478664976764, "grad_norm": 0.4441947638988495, "learning_rate": 2.375297921358259e-05, "loss": 0.06662988662719727, "step": 2590 }, { "epoch": 0.35028305872412335, "grad_norm": 0.2367473840713501, "learning_rate": 2.3747420009419745e-05, "loss": 0.043430328369140625, "step": 2591 }, { "epoch": 0.35041825095057033, "grad_norm": 0.4292266368865967, "learning_rate": 2.3741858983997415e-05, "loss": 0.07572412490844727, "step": 2592 }, { "epoch": 0.3505534431770173, "grad_norm": 0.31039565801620483, "learning_rate": 2.373629613847342e-05, "loss": 0.053858041763305664, "step": 2593 }, { "epoch": 0.3506886354034643, "grad_norm": 0.5356007218360901, "learning_rate": 2.3730731474005988e-05, "loss": 0.0842585563659668, "step": 2594 }, { "epoch": 0.35082382762991127, "grad_norm": 0.10539377480745316, "learning_rate": 2.37251649917537e-05, "loss": 0.027021288871765137, "step": 2595 }, { "epoch": 0.35095901985635825, "grad_norm": 0.18666324019432068, "learning_rate": 2.3719596692875534e-05, "loss": 0.05283641815185547, "step": 2596 }, { "epoch": 0.3510942120828052, "grad_norm": 0.6938058137893677, "learning_rate": 2.3714026578530836e-05, "loss": 0.08938217163085938, "step": 2597 }, { "epoch": 0.3512294043092522, "grad_norm": 0.29282745718955994, "learning_rate": 2.370845464987934e-05, "loss": 0.054965972900390625, "step": 2598 }, { "epoch": 0.3513645965356992, "grad_norm": 0.17689616978168488, "learning_rate": 2.370288090808114e-05, "loss": 0.03647947311401367, "step": 2599 }, { "epoch": 0.35149978876214616, "grad_norm": 0.35268816351890564, "learning_rate": 2.369730535429673e-05, "loss": 0.053194522857666016, "step": 2600 }, { "epoch": 0.35163498098859314, "grad_norm": 0.5388017892837524, "learning_rate": 2.369172798968697e-05, "loss": 0.06907200813293457, "step": 2601 }, { "epoch": 0.3517701732150401, "grad_norm": 0.25129905343055725, "learning_rate": 2.3686148815413083e-05, "loss": 0.07374000549316406, "step": 2602 }, { "epoch": 0.3519053654414871, "grad_norm": 0.5436261892318726, "learning_rate": 2.3680567832636695e-05, "loss": 0.10884809494018555, "step": 2603 }, { "epoch": 0.3520405576679341, "grad_norm": 0.5323015451431274, "learning_rate": 2.3674985042519795e-05, "loss": 0.10357809066772461, "step": 2604 }, { "epoch": 0.35217574989438105, "grad_norm": 0.3448658883571625, "learning_rate": 2.366940044622475e-05, "loss": 0.07070350646972656, "step": 2605 }, { "epoch": 0.35231094212082803, "grad_norm": 2.245497941970825, "learning_rate": 2.3663814044914302e-05, "loss": 0.0849158763885498, "step": 2606 }, { "epoch": 0.352446134347275, "grad_norm": 0.4565022587776184, "learning_rate": 2.3658225839751566e-05, "loss": 0.05966758728027344, "step": 2607 }, { "epoch": 0.352581326573722, "grad_norm": 0.25623318552970886, "learning_rate": 2.3652635831900043e-05, "loss": 0.05603742599487305, "step": 2608 }, { "epoch": 0.35271651880016897, "grad_norm": 0.7350065112113953, "learning_rate": 2.3647044022523595e-05, "loss": 0.09103775024414062, "step": 2609 }, { "epoch": 0.35285171102661594, "grad_norm": 0.3453652262687683, "learning_rate": 2.364145041278647e-05, "loss": 0.09230566024780273, "step": 2610 }, { "epoch": 0.3529869032530629, "grad_norm": 0.28571489453315735, "learning_rate": 2.3635855003853287e-05, "loss": 0.06862258911132812, "step": 2611 }, { "epoch": 0.3531220954795099, "grad_norm": 0.38044893741607666, "learning_rate": 2.363025779688904e-05, "loss": 0.0712437629699707, "step": 2612 }, { "epoch": 0.35325728770595693, "grad_norm": 0.3779444396495819, "learning_rate": 2.3624658793059103e-05, "loss": 0.058063507080078125, "step": 2613 }, { "epoch": 0.3533924799324039, "grad_norm": 0.42958560585975647, "learning_rate": 2.3619057993529204e-05, "loss": 0.05938148498535156, "step": 2614 }, { "epoch": 0.3535276721588509, "grad_norm": 0.3385521471500397, "learning_rate": 2.3613455399465475e-05, "loss": 0.07525253295898438, "step": 2615 }, { "epoch": 0.35366286438529787, "grad_norm": 0.35580816864967346, "learning_rate": 2.3607851012034394e-05, "loss": 0.09200477600097656, "step": 2616 }, { "epoch": 0.35379805661174485, "grad_norm": 0.4908611476421356, "learning_rate": 2.3602244832402838e-05, "loss": 0.08040714263916016, "step": 2617 }, { "epoch": 0.3539332488381918, "grad_norm": 0.4449275732040405, "learning_rate": 2.3596636861738024e-05, "loss": 0.08589982986450195, "step": 2618 }, { "epoch": 0.3540684410646388, "grad_norm": 0.22156795859336853, "learning_rate": 2.3591027101207578e-05, "loss": 0.062267184257507324, "step": 2619 }, { "epoch": 0.3542036332910858, "grad_norm": 0.4815278947353363, "learning_rate": 2.3585415551979476e-05, "loss": 0.06960344314575195, "step": 2620 }, { "epoch": 0.35433882551753276, "grad_norm": 0.24333488941192627, "learning_rate": 2.3579802215222076e-05, "loss": 0.047625064849853516, "step": 2621 }, { "epoch": 0.35447401774397974, "grad_norm": 0.23683685064315796, "learning_rate": 2.35741870921041e-05, "loss": 0.05717003345489502, "step": 2622 }, { "epoch": 0.3546092099704267, "grad_norm": 0.9289461970329285, "learning_rate": 2.3568570183794645e-05, "loss": 0.06751084327697754, "step": 2623 }, { "epoch": 0.3547444021968737, "grad_norm": 0.24210478365421295, "learning_rate": 2.356295149146319e-05, "loss": 0.06833076477050781, "step": 2624 }, { "epoch": 0.3548795944233207, "grad_norm": 0.31110408902168274, "learning_rate": 2.3557331016279567e-05, "loss": 0.048918724060058594, "step": 2625 }, { "epoch": 0.35501478664976766, "grad_norm": 0.4204086661338806, "learning_rate": 2.3551708759413998e-05, "loss": 0.10658454895019531, "step": 2626 }, { "epoch": 0.35514997887621463, "grad_norm": 0.627342700958252, "learning_rate": 2.354608472203706e-05, "loss": 0.057012081146240234, "step": 2627 }, { "epoch": 0.3552851711026616, "grad_norm": 0.6301606893539429, "learning_rate": 2.3540458905319705e-05, "loss": 0.07567739486694336, "step": 2628 }, { "epoch": 0.3554203633291086, "grad_norm": 0.8095700740814209, "learning_rate": 2.3534831310433264e-05, "loss": 0.06254839897155762, "step": 2629 }, { "epoch": 0.35555555555555557, "grad_norm": 0.41683945059776306, "learning_rate": 2.3529201938549434e-05, "loss": 0.07245302200317383, "step": 2630 }, { "epoch": 0.35569074778200255, "grad_norm": 0.2649560570716858, "learning_rate": 2.3523570790840274e-05, "loss": 0.07520246505737305, "step": 2631 }, { "epoch": 0.3558259400084495, "grad_norm": 0.3291664719581604, "learning_rate": 2.3517937868478228e-05, "loss": 0.06602144241333008, "step": 2632 }, { "epoch": 0.3559611322348965, "grad_norm": 0.45417171716690063, "learning_rate": 2.3512303172636092e-05, "loss": 0.10899162292480469, "step": 2633 }, { "epoch": 0.3560963244613435, "grad_norm": 0.47209101915359497, "learning_rate": 2.3506666704487033e-05, "loss": 0.08172464370727539, "step": 2634 }, { "epoch": 0.35623151668779046, "grad_norm": 0.2024308145046234, "learning_rate": 2.3501028465204614e-05, "loss": 0.05172157287597656, "step": 2635 }, { "epoch": 0.35636670891423744, "grad_norm": 0.2909577786922455, "learning_rate": 2.3495388455962734e-05, "loss": 0.06408214569091797, "step": 2636 }, { "epoch": 0.3565019011406844, "grad_norm": 0.30045104026794434, "learning_rate": 2.3489746677935673e-05, "loss": 0.06735086441040039, "step": 2637 }, { "epoch": 0.3566370933671314, "grad_norm": 0.29983365535736084, "learning_rate": 2.3484103132298082e-05, "loss": 0.09738349914550781, "step": 2638 }, { "epoch": 0.3567722855935784, "grad_norm": 0.5098674297332764, "learning_rate": 2.347845782022497e-05, "loss": 0.07211780548095703, "step": 2639 }, { "epoch": 0.35690747782002535, "grad_norm": 0.45709922909736633, "learning_rate": 2.3472810742891734e-05, "loss": 0.08301448822021484, "step": 2640 }, { "epoch": 0.35704267004647233, "grad_norm": 0.3854307234287262, "learning_rate": 2.3467161901474118e-05, "loss": 0.06582474708557129, "step": 2641 }, { "epoch": 0.3571778622729193, "grad_norm": 1.0477949380874634, "learning_rate": 2.346151129714824e-05, "loss": 0.11779594421386719, "step": 2642 }, { "epoch": 0.3573130544993663, "grad_norm": 0.5089383125305176, "learning_rate": 2.3455858931090588e-05, "loss": 0.07087850570678711, "step": 2643 }, { "epoch": 0.35744824672581327, "grad_norm": 0.28413328528404236, "learning_rate": 2.3450204804478014e-05, "loss": 0.06539106369018555, "step": 2644 }, { "epoch": 0.35758343895226025, "grad_norm": 0.3827768862247467, "learning_rate": 2.344454891848774e-05, "loss": 0.06665468215942383, "step": 2645 }, { "epoch": 0.3577186311787072, "grad_norm": 0.22406728565692902, "learning_rate": 2.3438891274297348e-05, "loss": 0.05580329895019531, "step": 2646 }, { "epoch": 0.3578538234051542, "grad_norm": 0.2666054964065552, "learning_rate": 2.343323187308479e-05, "loss": 0.036884307861328125, "step": 2647 }, { "epoch": 0.3579890156316012, "grad_norm": 0.32149770855903625, "learning_rate": 2.342757071602839e-05, "loss": 0.05465364456176758, "step": 2648 }, { "epoch": 0.35812420785804816, "grad_norm": 0.4001545011997223, "learning_rate": 2.3421907804306816e-05, "loss": 0.07889938354492188, "step": 2649 }, { "epoch": 0.35825940008449514, "grad_norm": 0.4604174494743347, "learning_rate": 2.341624313909913e-05, "loss": 0.07190680503845215, "step": 2650 }, { "epoch": 0.3583945923109421, "grad_norm": 0.4112929105758667, "learning_rate": 2.3410576721584742e-05, "loss": 0.060394287109375, "step": 2651 }, { "epoch": 0.3585297845373891, "grad_norm": 0.6505587100982666, "learning_rate": 2.3404908552943435e-05, "loss": 0.0673673152923584, "step": 2652 }, { "epoch": 0.3586649767638361, "grad_norm": 0.20990322530269623, "learning_rate": 2.339923863435534e-05, "loss": 0.04921913146972656, "step": 2653 }, { "epoch": 0.35880016899028305, "grad_norm": 0.29582133889198303, "learning_rate": 2.3393566967000974e-05, "loss": 0.08043354749679565, "step": 2654 }, { "epoch": 0.35893536121673003, "grad_norm": 0.540556788444519, "learning_rate": 2.3387893552061202e-05, "loss": 0.0742948055267334, "step": 2655 }, { "epoch": 0.359070553443177, "grad_norm": 0.5607966780662537, "learning_rate": 2.3382218390717268e-05, "loss": 0.05429863929748535, "step": 2656 }, { "epoch": 0.359205745669624, "grad_norm": 0.3039640486240387, "learning_rate": 2.3376541484150762e-05, "loss": 0.054396629333496094, "step": 2657 }, { "epoch": 0.35934093789607097, "grad_norm": 0.28045663237571716, "learning_rate": 2.3370862833543652e-05, "loss": 0.05309939384460449, "step": 2658 }, { "epoch": 0.35947613012251795, "grad_norm": 0.20683476328849792, "learning_rate": 2.336518244007826e-05, "loss": 0.05039215087890625, "step": 2659 }, { "epoch": 0.3596113223489649, "grad_norm": 0.5803465843200684, "learning_rate": 2.3359500304937274e-05, "loss": 0.1053009033203125, "step": 2660 }, { "epoch": 0.3597465145754119, "grad_norm": 0.3155378997325897, "learning_rate": 2.335381642930375e-05, "loss": 0.05952921509742737, "step": 2661 }, { "epoch": 0.3598817068018589, "grad_norm": 0.3066279888153076, "learning_rate": 2.3348130814361094e-05, "loss": 0.08416175842285156, "step": 2662 }, { "epoch": 0.36001689902830586, "grad_norm": 0.3454969525337219, "learning_rate": 2.334244346129309e-05, "loss": 0.08137035369873047, "step": 2663 }, { "epoch": 0.36015209125475284, "grad_norm": 0.3854672312736511, "learning_rate": 2.3336754371283862e-05, "loss": 0.06069779396057129, "step": 2664 }, { "epoch": 0.3602872834811998, "grad_norm": 0.336407333612442, "learning_rate": 2.333106354551792e-05, "loss": 0.07065081596374512, "step": 2665 }, { "epoch": 0.3604224757076468, "grad_norm": 0.22606343030929565, "learning_rate": 2.332537098518012e-05, "loss": 0.056944847106933594, "step": 2666 }, { "epoch": 0.3605576679340938, "grad_norm": 0.23767049610614777, "learning_rate": 2.3319676691455686e-05, "loss": 0.05093240737915039, "step": 2667 }, { "epoch": 0.36069286016054075, "grad_norm": 0.34505635499954224, "learning_rate": 2.3313980665530205e-05, "loss": 0.07906246185302734, "step": 2668 }, { "epoch": 0.36082805238698773, "grad_norm": 0.2485184669494629, "learning_rate": 2.3308282908589606e-05, "loss": 0.0647132396697998, "step": 2669 }, { "epoch": 0.3609632446134347, "grad_norm": 0.8621676564216614, "learning_rate": 2.330258342182021e-05, "loss": 0.08985209465026855, "step": 2670 }, { "epoch": 0.3610984368398817, "grad_norm": 0.21045689284801483, "learning_rate": 2.329688220640866e-05, "loss": 0.05572819709777832, "step": 2671 }, { "epoch": 0.36123362906632867, "grad_norm": 0.2865007519721985, "learning_rate": 2.329117926354199e-05, "loss": 0.07909107208251953, "step": 2672 }, { "epoch": 0.36136882129277564, "grad_norm": 0.35110726952552795, "learning_rate": 2.3285474594407588e-05, "loss": 0.08241653442382812, "step": 2673 }, { "epoch": 0.3615040135192226, "grad_norm": 0.3314117193222046, "learning_rate": 2.327976820019319e-05, "loss": 0.07957983016967773, "step": 2674 }, { "epoch": 0.3616392057456696, "grad_norm": 0.4399498403072357, "learning_rate": 2.32740600820869e-05, "loss": 0.1187286376953125, "step": 2675 }, { "epoch": 0.3617743979721166, "grad_norm": 0.5442261695861816, "learning_rate": 2.326835024127718e-05, "loss": 0.07625532150268555, "step": 2676 }, { "epoch": 0.36190959019856356, "grad_norm": 0.29945287108421326, "learning_rate": 2.326263867895285e-05, "loss": 0.052216529846191406, "step": 2677 }, { "epoch": 0.36204478242501054, "grad_norm": 0.31563931703567505, "learning_rate": 2.3256925396303076e-05, "loss": 0.05013012886047363, "step": 2678 }, { "epoch": 0.3621799746514575, "grad_norm": 0.3799682557582855, "learning_rate": 2.3251210394517412e-05, "loss": 0.09147167205810547, "step": 2679 }, { "epoch": 0.3623151668779045, "grad_norm": 0.13053618371486664, "learning_rate": 2.3245493674785742e-05, "loss": 0.03233528137207031, "step": 2680 }, { "epoch": 0.3624503591043515, "grad_norm": 0.31542643904685974, "learning_rate": 2.3239775238298316e-05, "loss": 0.06459903717041016, "step": 2681 }, { "epoch": 0.3625855513307985, "grad_norm": 0.34938162565231323, "learning_rate": 2.3234055086245744e-05, "loss": 0.06383705139160156, "step": 2682 }, { "epoch": 0.3627207435572455, "grad_norm": 0.49902424216270447, "learning_rate": 2.3228333219818998e-05, "loss": 0.055005550384521484, "step": 2683 }, { "epoch": 0.36285593578369246, "grad_norm": 0.6347744464874268, "learning_rate": 2.3222609640209397e-05, "loss": 0.0708012580871582, "step": 2684 }, { "epoch": 0.36299112801013944, "grad_norm": 0.2834555208683014, "learning_rate": 2.3216884348608614e-05, "loss": 0.0907144546508789, "step": 2685 }, { "epoch": 0.3631263202365864, "grad_norm": 0.4300340414047241, "learning_rate": 2.32111573462087e-05, "loss": 0.06136035919189453, "step": 2686 }, { "epoch": 0.3632615124630334, "grad_norm": 0.46756449341773987, "learning_rate": 2.3205428634202028e-05, "loss": 0.06485748291015625, "step": 2687 }, { "epoch": 0.3633967046894804, "grad_norm": 0.42676329612731934, "learning_rate": 2.3199698213781367e-05, "loss": 0.08089709281921387, "step": 2688 }, { "epoch": 0.36353189691592735, "grad_norm": 0.617524266242981, "learning_rate": 2.319396608613981e-05, "loss": 0.09272432327270508, "step": 2689 }, { "epoch": 0.36366708914237433, "grad_norm": 0.39837923645973206, "learning_rate": 2.318823225247082e-05, "loss": 0.0822758674621582, "step": 2690 }, { "epoch": 0.3638022813688213, "grad_norm": 0.43281444907188416, "learning_rate": 2.3182496713968208e-05, "loss": 0.08322620391845703, "step": 2691 }, { "epoch": 0.3639374735952683, "grad_norm": 0.16380058228969574, "learning_rate": 2.3176759471826143e-05, "loss": 0.04537546634674072, "step": 2692 }, { "epoch": 0.36407266582171527, "grad_norm": 0.39458754658699036, "learning_rate": 2.3171020527239155e-05, "loss": 0.07996082305908203, "step": 2693 }, { "epoch": 0.36420785804816225, "grad_norm": 0.4488235414028168, "learning_rate": 2.316527988140212e-05, "loss": 0.08432817459106445, "step": 2694 }, { "epoch": 0.3643430502746092, "grad_norm": 0.1847764551639557, "learning_rate": 2.315953753551027e-05, "loss": 0.043015480041503906, "step": 2695 }, { "epoch": 0.3644782425010562, "grad_norm": 0.23556862771511078, "learning_rate": 2.3153793490759197e-05, "loss": 0.0568547248840332, "step": 2696 }, { "epoch": 0.3646134347275032, "grad_norm": 0.2863653302192688, "learning_rate": 2.3148047748344835e-05, "loss": 0.05669593811035156, "step": 2697 }, { "epoch": 0.36474862695395016, "grad_norm": 0.23151041567325592, "learning_rate": 2.314230030946348e-05, "loss": 0.04804563522338867, "step": 2698 }, { "epoch": 0.36488381918039714, "grad_norm": 0.2547624409198761, "learning_rate": 2.3136551175311782e-05, "loss": 0.06856918334960938, "step": 2699 }, { "epoch": 0.3650190114068441, "grad_norm": 0.19527021050453186, "learning_rate": 2.313080034708674e-05, "loss": 0.04534578323364258, "step": 2700 }, { "epoch": 0.3651542036332911, "grad_norm": 0.3311425745487213, "learning_rate": 2.312504782598571e-05, "loss": 0.06702995300292969, "step": 2701 }, { "epoch": 0.3652893958597381, "grad_norm": 0.31967419385910034, "learning_rate": 2.311929361320639e-05, "loss": 0.06995820999145508, "step": 2702 }, { "epoch": 0.36542458808618505, "grad_norm": 0.30657532811164856, "learning_rate": 2.311353770994684e-05, "loss": 0.0912940502166748, "step": 2703 }, { "epoch": 0.36555978031263203, "grad_norm": 1.057766079902649, "learning_rate": 2.310778011740548e-05, "loss": 0.11150264739990234, "step": 2704 }, { "epoch": 0.365694972539079, "grad_norm": 0.4057547152042389, "learning_rate": 2.310202083678106e-05, "loss": 0.08566856384277344, "step": 2705 }, { "epoch": 0.365830164765526, "grad_norm": 0.32117873430252075, "learning_rate": 2.3096259869272694e-05, "loss": 0.0414891242980957, "step": 2706 }, { "epoch": 0.36596535699197297, "grad_norm": 0.304269403219223, "learning_rate": 2.309049721607985e-05, "loss": 0.05008554458618164, "step": 2707 }, { "epoch": 0.36610054921841995, "grad_norm": 0.30867356061935425, "learning_rate": 2.3084732878402342e-05, "loss": 0.06404685974121094, "step": 2708 }, { "epoch": 0.3662357414448669, "grad_norm": 0.4098702371120453, "learning_rate": 2.307896685744034e-05, "loss": 0.0984644889831543, "step": 2709 }, { "epoch": 0.3663709336713139, "grad_norm": 0.3380717933177948, "learning_rate": 2.3073199154394352e-05, "loss": 0.059273719787597656, "step": 2710 }, { "epoch": 0.3665061258977609, "grad_norm": 0.28665271401405334, "learning_rate": 2.3067429770465246e-05, "loss": 0.06628751754760742, "step": 2711 }, { "epoch": 0.36664131812420786, "grad_norm": 0.27406877279281616, "learning_rate": 2.3061658706854244e-05, "loss": 0.06778573989868164, "step": 2712 }, { "epoch": 0.36677651035065484, "grad_norm": 0.21890553832054138, "learning_rate": 2.3055885964762907e-05, "loss": 0.0660862922668457, "step": 2713 }, { "epoch": 0.3669117025771018, "grad_norm": 0.22885750234127045, "learning_rate": 2.3050111545393156e-05, "loss": 0.06143760681152344, "step": 2714 }, { "epoch": 0.3670468948035488, "grad_norm": 0.43521565198898315, "learning_rate": 2.304433544994725e-05, "loss": 0.09455585479736328, "step": 2715 }, { "epoch": 0.3671820870299958, "grad_norm": 0.4448837637901306, "learning_rate": 2.303855767962781e-05, "loss": 0.08457422256469727, "step": 2716 }, { "epoch": 0.36731727925644275, "grad_norm": 0.30835726857185364, "learning_rate": 2.303277823563779e-05, "loss": 0.0513911247253418, "step": 2717 }, { "epoch": 0.36745247148288973, "grad_norm": 0.42902329564094543, "learning_rate": 2.3026997119180507e-05, "loss": 0.06301069259643555, "step": 2718 }, { "epoch": 0.3675876637093367, "grad_norm": 0.15876011550426483, "learning_rate": 2.3021214331459616e-05, "loss": 0.039977073669433594, "step": 2719 }, { "epoch": 0.3677228559357837, "grad_norm": 0.3574123978614807, "learning_rate": 2.301542987367913e-05, "loss": 0.07372152805328369, "step": 2720 }, { "epoch": 0.36785804816223067, "grad_norm": 0.27513548731803894, "learning_rate": 2.3009643747043403e-05, "loss": 0.05409669876098633, "step": 2721 }, { "epoch": 0.36799324038867764, "grad_norm": 0.4079545736312866, "learning_rate": 2.3003855952757132e-05, "loss": 0.06157541275024414, "step": 2722 }, { "epoch": 0.3681284326151246, "grad_norm": 0.4085012972354889, "learning_rate": 2.2998066492025372e-05, "loss": 0.08626556396484375, "step": 2723 }, { "epoch": 0.3682636248415716, "grad_norm": 0.28132185339927673, "learning_rate": 2.2992275366053513e-05, "loss": 0.0478367805480957, "step": 2724 }, { "epoch": 0.3683988170680186, "grad_norm": 0.40833789110183716, "learning_rate": 2.2986482576047305e-05, "loss": 0.08284187316894531, "step": 2725 }, { "epoch": 0.36853400929446556, "grad_norm": 0.34776440262794495, "learning_rate": 2.298068812321284e-05, "loss": 0.06432294845581055, "step": 2726 }, { "epoch": 0.36866920152091254, "grad_norm": 0.2508021295070648, "learning_rate": 2.297489200875654e-05, "loss": 0.05548906326293945, "step": 2727 }, { "epoch": 0.3688043937473595, "grad_norm": 0.24563243985176086, "learning_rate": 2.2969094233885204e-05, "loss": 0.04309797286987305, "step": 2728 }, { "epoch": 0.3689395859738065, "grad_norm": 0.23610103130340576, "learning_rate": 2.296329479980595e-05, "loss": 0.05093646049499512, "step": 2729 }, { "epoch": 0.3690747782002535, "grad_norm": 0.22633366286754608, "learning_rate": 2.2957493707726252e-05, "loss": 0.04903268814086914, "step": 2730 }, { "epoch": 0.36920997042670045, "grad_norm": 0.4889355003833771, "learning_rate": 2.2951690958853932e-05, "loss": 0.08364629745483398, "step": 2731 }, { "epoch": 0.36934516265314743, "grad_norm": 0.4934959411621094, "learning_rate": 2.2945886554397154e-05, "loss": 0.07945013046264648, "step": 2732 }, { "epoch": 0.3694803548795944, "grad_norm": 0.5263916850090027, "learning_rate": 2.294008049556441e-05, "loss": 0.08485126495361328, "step": 2733 }, { "epoch": 0.3696155471060414, "grad_norm": 0.3724130094051361, "learning_rate": 2.2934272783564577e-05, "loss": 0.09107446670532227, "step": 2734 }, { "epoch": 0.36975073933248837, "grad_norm": 0.2591290771961212, "learning_rate": 2.2928463419606835e-05, "loss": 0.05467033386230469, "step": 2735 }, { "epoch": 0.36988593155893534, "grad_norm": 0.36286965012550354, "learning_rate": 2.292265240490073e-05, "loss": 0.06613016128540039, "step": 2736 }, { "epoch": 0.3700211237853823, "grad_norm": 0.3119962811470032, "learning_rate": 2.2916839740656154e-05, "loss": 0.06099367141723633, "step": 2737 }, { "epoch": 0.3701563160118293, "grad_norm": 0.23577842116355896, "learning_rate": 2.2911025428083316e-05, "loss": 0.04093170166015625, "step": 2738 }, { "epoch": 0.3702915082382763, "grad_norm": 0.2807522118091583, "learning_rate": 2.2905209468392798e-05, "loss": 0.0690145492553711, "step": 2739 }, { "epoch": 0.37042670046472326, "grad_norm": 0.22506266832351685, "learning_rate": 2.2899391862795514e-05, "loss": 0.058818817138671875, "step": 2740 }, { "epoch": 0.37056189269117024, "grad_norm": 0.15967266261577606, "learning_rate": 2.2893572612502718e-05, "loss": 0.03066563606262207, "step": 2741 }, { "epoch": 0.3706970849176172, "grad_norm": 0.24224527180194855, "learning_rate": 2.2887751718726013e-05, "loss": 0.060857564210891724, "step": 2742 }, { "epoch": 0.3708322771440642, "grad_norm": 0.3642336428165436, "learning_rate": 2.288192918267734e-05, "loss": 0.04384136199951172, "step": 2743 }, { "epoch": 0.37096746937051117, "grad_norm": 0.33621320128440857, "learning_rate": 2.2876105005568974e-05, "loss": 0.07636129856109619, "step": 2744 }, { "epoch": 0.37110266159695815, "grad_norm": 0.37995463609695435, "learning_rate": 2.287027918861355e-05, "loss": 0.09354114532470703, "step": 2745 }, { "epoch": 0.37123785382340513, "grad_norm": 0.2660060226917267, "learning_rate": 2.2864451733024024e-05, "loss": 0.049910545349121094, "step": 2746 }, { "epoch": 0.3713730460498521, "grad_norm": 0.23012079298496246, "learning_rate": 2.2858622640013716e-05, "loss": 0.06747961044311523, "step": 2747 }, { "epoch": 0.3715082382762991, "grad_norm": 0.35055288672447205, "learning_rate": 2.285279191079626e-05, "loss": 0.06960487365722656, "step": 2748 }, { "epoch": 0.3716434305027461, "grad_norm": 0.2419525682926178, "learning_rate": 2.2846959546585656e-05, "loss": 0.06652379035949707, "step": 2749 }, { "epoch": 0.3717786227291931, "grad_norm": 0.2012145072221756, "learning_rate": 2.2841125548596225e-05, "loss": 0.0448918342590332, "step": 2750 }, { "epoch": 0.3719138149556401, "grad_norm": 0.35305511951446533, "learning_rate": 2.2835289918042648e-05, "loss": 0.08181023597717285, "step": 2751 }, { "epoch": 0.37204900718208705, "grad_norm": 0.24050933122634888, "learning_rate": 2.282945265613992e-05, "loss": 0.06129741668701172, "step": 2752 }, { "epoch": 0.37218419940853403, "grad_norm": 0.34321218729019165, "learning_rate": 2.2823613764103406e-05, "loss": 0.09194564819335938, "step": 2753 }, { "epoch": 0.372319391634981, "grad_norm": 0.4667815864086151, "learning_rate": 2.2817773243148776e-05, "loss": 0.08031225204467773, "step": 2754 }, { "epoch": 0.372454583861428, "grad_norm": 0.2626888155937195, "learning_rate": 2.2811931094492074e-05, "loss": 0.048987627029418945, "step": 2755 }, { "epoch": 0.37258977608787497, "grad_norm": 0.5321769118309021, "learning_rate": 2.280608731934966e-05, "loss": 0.09866523742675781, "step": 2756 }, { "epoch": 0.37272496831432195, "grad_norm": 0.4468326270580292, "learning_rate": 2.280024191893823e-05, "loss": 0.08239603042602539, "step": 2757 }, { "epoch": 0.3728601605407689, "grad_norm": 0.36021310091018677, "learning_rate": 2.279439489447485e-05, "loss": 0.07276153564453125, "step": 2758 }, { "epoch": 0.3729953527672159, "grad_norm": 0.17463645339012146, "learning_rate": 2.278854624717688e-05, "loss": 0.043268680572509766, "step": 2759 }, { "epoch": 0.3731305449936629, "grad_norm": 0.28230324387550354, "learning_rate": 2.2782695978262045e-05, "loss": 0.07775115966796875, "step": 2760 }, { "epoch": 0.37326573722010986, "grad_norm": 0.28289878368377686, "learning_rate": 2.2776844088948406e-05, "loss": 0.07141399383544922, "step": 2761 }, { "epoch": 0.37340092944655684, "grad_norm": 0.2537488341331482, "learning_rate": 2.2770990580454364e-05, "loss": 0.06646251678466797, "step": 2762 }, { "epoch": 0.3735361216730038, "grad_norm": 0.3188614845275879, "learning_rate": 2.276513545399864e-05, "loss": 0.06700706481933594, "step": 2763 }, { "epoch": 0.3736713138994508, "grad_norm": 0.3226417601108551, "learning_rate": 2.2759278710800306e-05, "loss": 0.0826559066772461, "step": 2764 }, { "epoch": 0.3738065061258978, "grad_norm": 0.7408802509307861, "learning_rate": 2.275342035207876e-05, "loss": 0.09273117780685425, "step": 2765 }, { "epoch": 0.37394169835234475, "grad_norm": 0.35370224714279175, "learning_rate": 2.2747560379053752e-05, "loss": 0.07136058807373047, "step": 2766 }, { "epoch": 0.37407689057879173, "grad_norm": 0.7217914462089539, "learning_rate": 2.2741698792945364e-05, "loss": 0.07427978515625, "step": 2767 }, { "epoch": 0.3742120828052387, "grad_norm": 0.25097402930259705, "learning_rate": 2.2735835594974003e-05, "loss": 0.05281949043273926, "step": 2768 }, { "epoch": 0.3743472750316857, "grad_norm": 0.27837270498275757, "learning_rate": 2.272997078636042e-05, "loss": 0.06299638748168945, "step": 2769 }, { "epoch": 0.37448246725813267, "grad_norm": 0.43518730998039246, "learning_rate": 2.272410436832569e-05, "loss": 0.0702052116394043, "step": 2770 }, { "epoch": 0.37461765948457965, "grad_norm": 0.2612428069114685, "learning_rate": 2.2718236342091248e-05, "loss": 0.04570198059082031, "step": 2771 }, { "epoch": 0.3747528517110266, "grad_norm": 0.4023292064666748, "learning_rate": 2.2712366708878838e-05, "loss": 0.07886171340942383, "step": 2772 }, { "epoch": 0.3748880439374736, "grad_norm": 0.3393609821796417, "learning_rate": 2.2706495469910552e-05, "loss": 0.048750877380371094, "step": 2773 }, { "epoch": 0.3750232361639206, "grad_norm": 0.3709828555583954, "learning_rate": 2.2700622626408814e-05, "loss": 0.05859220027923584, "step": 2774 }, { "epoch": 0.37515842839036756, "grad_norm": 0.5168056488037109, "learning_rate": 2.2694748179596375e-05, "loss": 0.07851588726043701, "step": 2775 }, { "epoch": 0.37529362061681454, "grad_norm": 0.43829160928726196, "learning_rate": 2.2688872130696342e-05, "loss": 0.0841212272644043, "step": 2776 }, { "epoch": 0.3754288128432615, "grad_norm": 0.640561044216156, "learning_rate": 2.268299448093212e-05, "loss": 0.07652139663696289, "step": 2777 }, { "epoch": 0.3755640050697085, "grad_norm": 0.20402050018310547, "learning_rate": 2.2677115231527482e-05, "loss": 0.05471146106719971, "step": 2778 }, { "epoch": 0.3756991972961555, "grad_norm": 0.22976543009281158, "learning_rate": 2.267123438370651e-05, "loss": 0.05427813529968262, "step": 2779 }, { "epoch": 0.37583438952260245, "grad_norm": 0.6341850757598877, "learning_rate": 2.266535193869363e-05, "loss": 0.08101654052734375, "step": 2780 }, { "epoch": 0.37596958174904943, "grad_norm": 0.5343198776245117, "learning_rate": 2.2659467897713604e-05, "loss": 0.058493614196777344, "step": 2781 }, { "epoch": 0.3761047739754964, "grad_norm": 0.23260414600372314, "learning_rate": 2.2653582261991516e-05, "loss": 0.05374574661254883, "step": 2782 }, { "epoch": 0.3762399662019434, "grad_norm": 0.13126878440380096, "learning_rate": 2.2647695032752785e-05, "loss": 0.03887307643890381, "step": 2783 }, { "epoch": 0.37637515842839037, "grad_norm": 0.37174203991889954, "learning_rate": 2.264180621122317e-05, "loss": 0.08713150024414062, "step": 2784 }, { "epoch": 0.37651035065483734, "grad_norm": 0.3188643753528595, "learning_rate": 2.2635915798628747e-05, "loss": 0.07477092742919922, "step": 2785 }, { "epoch": 0.3766455428812843, "grad_norm": 0.22900445759296417, "learning_rate": 2.2630023796195932e-05, "loss": 0.060585975646972656, "step": 2786 }, { "epoch": 0.3767807351077313, "grad_norm": 0.2584836483001709, "learning_rate": 2.262413020515148e-05, "loss": 0.049193382263183594, "step": 2787 }, { "epoch": 0.3769159273341783, "grad_norm": 0.3672921061515808, "learning_rate": 2.261823502672246e-05, "loss": 0.0650472640991211, "step": 2788 }, { "epoch": 0.37705111956062526, "grad_norm": 0.2643785774707794, "learning_rate": 2.261233826213628e-05, "loss": 0.05163264274597168, "step": 2789 }, { "epoch": 0.37718631178707224, "grad_norm": 0.9889642596244812, "learning_rate": 2.2606439912620688e-05, "loss": 0.11510229110717773, "step": 2790 }, { "epoch": 0.3773215040135192, "grad_norm": 0.3425293266773224, "learning_rate": 2.2600539979403734e-05, "loss": 0.09545040130615234, "step": 2791 }, { "epoch": 0.3774566962399662, "grad_norm": 0.21810343861579895, "learning_rate": 2.259463846371383e-05, "loss": 0.06734371185302734, "step": 2792 }, { "epoch": 0.3775918884664132, "grad_norm": 0.2664501368999481, "learning_rate": 2.2588735366779698e-05, "loss": 0.057622671127319336, "step": 2793 }, { "epoch": 0.37772708069286015, "grad_norm": 0.27079471945762634, "learning_rate": 2.2582830689830394e-05, "loss": 0.04458427429199219, "step": 2794 }, { "epoch": 0.37786227291930713, "grad_norm": 0.23766346275806427, "learning_rate": 2.2576924434095305e-05, "loss": 0.05955839157104492, "step": 2795 }, { "epoch": 0.3779974651457541, "grad_norm": 0.38281500339508057, "learning_rate": 2.257101660080414e-05, "loss": 0.06927323341369629, "step": 2796 }, { "epoch": 0.3781326573722011, "grad_norm": 0.29529163241386414, "learning_rate": 2.256510719118695e-05, "loss": 0.04836249351501465, "step": 2797 }, { "epoch": 0.37826784959864806, "grad_norm": 0.20660586655139923, "learning_rate": 2.2559196206474094e-05, "loss": 0.056540489196777344, "step": 2798 }, { "epoch": 0.37840304182509504, "grad_norm": 0.4235142767429352, "learning_rate": 2.2553283647896287e-05, "loss": 0.043947458267211914, "step": 2799 }, { "epoch": 0.378538234051542, "grad_norm": 0.6575372815132141, "learning_rate": 2.254736951668454e-05, "loss": 0.07664811611175537, "step": 2800 }, { "epoch": 0.378673426277989, "grad_norm": 0.39030948281288147, "learning_rate": 2.2541453814070212e-05, "loss": 0.06126832962036133, "step": 2801 }, { "epoch": 0.378808618504436, "grad_norm": 0.32001739740371704, "learning_rate": 2.2535536541284983e-05, "loss": 0.07765579223632812, "step": 2802 }, { "epoch": 0.37894381073088296, "grad_norm": 0.3228089511394501, "learning_rate": 2.2529617699560857e-05, "loss": 0.04865312576293945, "step": 2803 }, { "epoch": 0.37907900295732994, "grad_norm": 0.2946130931377411, "learning_rate": 2.2523697290130185e-05, "loss": 0.06833744049072266, "step": 2804 }, { "epoch": 0.3792141951837769, "grad_norm": 0.3752012848854065, "learning_rate": 2.251777531422561e-05, "loss": 0.07851028442382812, "step": 2805 }, { "epoch": 0.3793493874102239, "grad_norm": 0.586348831653595, "learning_rate": 2.2511851773080127e-05, "loss": 0.10017108917236328, "step": 2806 }, { "epoch": 0.37948457963667087, "grad_norm": 0.5851037502288818, "learning_rate": 2.2505926667927043e-05, "loss": 0.07167577743530273, "step": 2807 }, { "epoch": 0.37961977186311785, "grad_norm": 0.31651997566223145, "learning_rate": 2.25e-05, "loss": 0.08237814903259277, "step": 2808 }, { "epoch": 0.37975496408956483, "grad_norm": 0.2083374410867691, "learning_rate": 2.2494071770532966e-05, "loss": 0.05229926109313965, "step": 2809 }, { "epoch": 0.3798901563160118, "grad_norm": 0.20976102352142334, "learning_rate": 2.2488141980760223e-05, "loss": 0.050457000732421875, "step": 2810 }, { "epoch": 0.3800253485424588, "grad_norm": 0.28908151388168335, "learning_rate": 2.248221063191639e-05, "loss": 0.0801401138305664, "step": 2811 }, { "epoch": 0.38016054076890576, "grad_norm": 0.29635336995124817, "learning_rate": 2.24762777252364e-05, "loss": 0.05572652816772461, "step": 2812 }, { "epoch": 0.38029573299535274, "grad_norm": 0.43832477927207947, "learning_rate": 2.2470343261955525e-05, "loss": 0.04839181900024414, "step": 2813 }, { "epoch": 0.3804309252217997, "grad_norm": 0.2038612812757492, "learning_rate": 2.246440724330934e-05, "loss": 0.052689552307128906, "step": 2814 }, { "epoch": 0.3805661174482467, "grad_norm": 0.2589648962020874, "learning_rate": 2.2458469670533765e-05, "loss": 0.06357526779174805, "step": 2815 }, { "epoch": 0.3807013096746937, "grad_norm": 0.33596086502075195, "learning_rate": 2.2452530544865034e-05, "loss": 0.08633661270141602, "step": 2816 }, { "epoch": 0.3808365019011407, "grad_norm": 0.2913959324359894, "learning_rate": 2.24465898675397e-05, "loss": 0.08229923248291016, "step": 2817 }, { "epoch": 0.3809716941275877, "grad_norm": 0.53569096326828, "learning_rate": 2.244064763979464e-05, "loss": 0.06948637962341309, "step": 2818 }, { "epoch": 0.38110688635403467, "grad_norm": 0.23542706668376923, "learning_rate": 2.2434703862867068e-05, "loss": 0.06728863716125488, "step": 2819 }, { "epoch": 0.38124207858048165, "grad_norm": 0.35748404264450073, "learning_rate": 2.2428758537994504e-05, "loss": 0.059915125370025635, "step": 2820 }, { "epoch": 0.3813772708069286, "grad_norm": 0.5053253173828125, "learning_rate": 2.24228116664148e-05, "loss": 0.0852808952331543, "step": 2821 }, { "epoch": 0.3815124630333756, "grad_norm": 0.45190510153770447, "learning_rate": 2.2416863249366125e-05, "loss": 0.08397531509399414, "step": 2822 }, { "epoch": 0.3816476552598226, "grad_norm": 0.40200597047805786, "learning_rate": 2.241091328808696e-05, "loss": 0.05139565467834473, "step": 2823 }, { "epoch": 0.38178284748626956, "grad_norm": 0.3294566571712494, "learning_rate": 2.240496178381614e-05, "loss": 0.051227569580078125, "step": 2824 }, { "epoch": 0.38191803971271654, "grad_norm": 0.5295678377151489, "learning_rate": 2.239900873779278e-05, "loss": 0.05830860137939453, "step": 2825 }, { "epoch": 0.3820532319391635, "grad_norm": 0.33402496576309204, "learning_rate": 2.2393054151256352e-05, "loss": 0.05664205551147461, "step": 2826 }, { "epoch": 0.3821884241656105, "grad_norm": 0.37120258808135986, "learning_rate": 2.238709802544662e-05, "loss": 0.09269380569458008, "step": 2827 }, { "epoch": 0.3823236163920575, "grad_norm": 0.5080986618995667, "learning_rate": 2.2381140361603686e-05, "loss": 0.07447850704193115, "step": 2828 }, { "epoch": 0.38245880861850445, "grad_norm": 0.3413444459438324, "learning_rate": 2.237518116096797e-05, "loss": 0.063995361328125, "step": 2829 }, { "epoch": 0.38259400084495143, "grad_norm": 0.25153154134750366, "learning_rate": 2.2369220424780203e-05, "loss": 0.06133317947387695, "step": 2830 }, { "epoch": 0.3827291930713984, "grad_norm": 0.316782683134079, "learning_rate": 2.2363258154281452e-05, "loss": 0.09371709823608398, "step": 2831 }, { "epoch": 0.3828643852978454, "grad_norm": 0.35821273922920227, "learning_rate": 2.2357294350713088e-05, "loss": 0.056162118911743164, "step": 2832 }, { "epoch": 0.38299957752429237, "grad_norm": 0.33244818449020386, "learning_rate": 2.2351329015316802e-05, "loss": 0.06569063663482666, "step": 2833 }, { "epoch": 0.38313476975073935, "grad_norm": 0.7121259570121765, "learning_rate": 2.2345362149334613e-05, "loss": 0.07290458679199219, "step": 2834 }, { "epoch": 0.3832699619771863, "grad_norm": 0.3778751790523529, "learning_rate": 2.2339393754008854e-05, "loss": 0.07805752754211426, "step": 2835 }, { "epoch": 0.3834051542036333, "grad_norm": 0.46667975187301636, "learning_rate": 2.233342383058218e-05, "loss": 0.09800231456756592, "step": 2836 }, { "epoch": 0.3835403464300803, "grad_norm": 0.1805945634841919, "learning_rate": 2.2327452380297554e-05, "loss": 0.042798519134521484, "step": 2837 }, { "epoch": 0.38367553865652726, "grad_norm": 0.26929038763046265, "learning_rate": 2.232147940439827e-05, "loss": 0.06944727897644043, "step": 2838 }, { "epoch": 0.38381073088297424, "grad_norm": 0.3745846450328827, "learning_rate": 2.2315504904127936e-05, "loss": 0.08572673797607422, "step": 2839 }, { "epoch": 0.3839459231094212, "grad_norm": 0.25058165192604065, "learning_rate": 2.2309528880730463e-05, "loss": 0.06348562240600586, "step": 2840 }, { "epoch": 0.3840811153358682, "grad_norm": 0.20035450160503387, "learning_rate": 2.2303551335450096e-05, "loss": 0.03886604309082031, "step": 2841 }, { "epoch": 0.3842163075623152, "grad_norm": 0.43715858459472656, "learning_rate": 2.2297572269531398e-05, "loss": 0.05697441101074219, "step": 2842 }, { "epoch": 0.38435149978876215, "grad_norm": 0.23630057275295258, "learning_rate": 2.2291591684219243e-05, "loss": 0.06506586074829102, "step": 2843 }, { "epoch": 0.38448669201520913, "grad_norm": 0.3400982618331909, "learning_rate": 2.2285609580758806e-05, "loss": 0.04954862594604492, "step": 2844 }, { "epoch": 0.3846218842416561, "grad_norm": 0.2969959080219269, "learning_rate": 2.227962596039561e-05, "loss": 0.06925249099731445, "step": 2845 }, { "epoch": 0.3847570764681031, "grad_norm": 0.44228595495224, "learning_rate": 2.2273640824375462e-05, "loss": 0.06440496444702148, "step": 2846 }, { "epoch": 0.38489226869455007, "grad_norm": 0.43075305223464966, "learning_rate": 2.2267654173944515e-05, "loss": 0.0472872257232666, "step": 2847 }, { "epoch": 0.38502746092099704, "grad_norm": 0.24376831948757172, "learning_rate": 2.2261666010349212e-05, "loss": 0.04905128479003906, "step": 2848 }, { "epoch": 0.385162653147444, "grad_norm": 0.31551626324653625, "learning_rate": 2.2255676334836317e-05, "loss": 0.07819223403930664, "step": 2849 }, { "epoch": 0.385297845373891, "grad_norm": 0.21241162717342377, "learning_rate": 2.2249685148652917e-05, "loss": 0.05205106735229492, "step": 2850 }, { "epoch": 0.385433037600338, "grad_norm": 0.5643239617347717, "learning_rate": 2.224369245304641e-05, "loss": 0.0682516098022461, "step": 2851 }, { "epoch": 0.38556822982678496, "grad_norm": 0.7587423920631409, "learning_rate": 2.2237698249264507e-05, "loss": 0.07445049285888672, "step": 2852 }, { "epoch": 0.38570342205323194, "grad_norm": 0.22339507937431335, "learning_rate": 2.2231702538555235e-05, "loss": 0.06548738479614258, "step": 2853 }, { "epoch": 0.3858386142796789, "grad_norm": 0.4586602747440338, "learning_rate": 2.2225705322166928e-05, "loss": 0.0638885498046875, "step": 2854 }, { "epoch": 0.3859738065061259, "grad_norm": 0.7055124640464783, "learning_rate": 2.2219706601348242e-05, "loss": 0.06122446060180664, "step": 2855 }, { "epoch": 0.3861089987325729, "grad_norm": 0.5644597411155701, "learning_rate": 2.221370637734814e-05, "loss": 0.071929931640625, "step": 2856 }, { "epoch": 0.38624419095901985, "grad_norm": 0.4933735132217407, "learning_rate": 2.22077046514159e-05, "loss": 0.08907699584960938, "step": 2857 }, { "epoch": 0.38637938318546683, "grad_norm": 0.31833040714263916, "learning_rate": 2.220170142480112e-05, "loss": 0.06456661224365234, "step": 2858 }, { "epoch": 0.3865145754119138, "grad_norm": 0.21775972843170166, "learning_rate": 2.2195696698753695e-05, "loss": 0.0671839714050293, "step": 2859 }, { "epoch": 0.3866497676383608, "grad_norm": 0.21280226111412048, "learning_rate": 2.2189690474523844e-05, "loss": 0.03300642967224121, "step": 2860 }, { "epoch": 0.38678495986480776, "grad_norm": 0.39132893085479736, "learning_rate": 2.21836827533621e-05, "loss": 0.07460212707519531, "step": 2861 }, { "epoch": 0.38692015209125474, "grad_norm": 0.24174456298351288, "learning_rate": 2.2177673536519297e-05, "loss": 0.04238176345825195, "step": 2862 }, { "epoch": 0.3870553443177017, "grad_norm": 0.7087098360061646, "learning_rate": 2.217166282524659e-05, "loss": 0.08649265766143799, "step": 2863 }, { "epoch": 0.3871905365441487, "grad_norm": 0.44046109914779663, "learning_rate": 2.216565062079544e-05, "loss": 0.07009744644165039, "step": 2864 }, { "epoch": 0.3873257287705957, "grad_norm": 0.6048197150230408, "learning_rate": 2.2159636924417612e-05, "loss": 0.05726814270019531, "step": 2865 }, { "epoch": 0.38746092099704266, "grad_norm": 0.48509982228279114, "learning_rate": 2.2153621737365205e-05, "loss": 0.08022212982177734, "step": 2866 }, { "epoch": 0.38759611322348964, "grad_norm": 0.3172224462032318, "learning_rate": 2.2147605060890598e-05, "loss": 0.05950450897216797, "step": 2867 }, { "epoch": 0.3877313054499366, "grad_norm": 0.3830418586730957, "learning_rate": 2.2141586896246503e-05, "loss": 0.04602622985839844, "step": 2868 }, { "epoch": 0.3878664976763836, "grad_norm": 0.3074946999549866, "learning_rate": 2.2135567244685933e-05, "loss": 0.057450294494628906, "step": 2869 }, { "epoch": 0.38800168990283057, "grad_norm": 0.4040433168411255, "learning_rate": 2.2129546107462214e-05, "loss": 0.09191322326660156, "step": 2870 }, { "epoch": 0.38813688212927755, "grad_norm": 0.26097050309181213, "learning_rate": 2.212352348582897e-05, "loss": 0.07602787017822266, "step": 2871 }, { "epoch": 0.38827207435572453, "grad_norm": 0.21725687384605408, "learning_rate": 2.2117499381040157e-05, "loss": 0.06807947158813477, "step": 2872 }, { "epoch": 0.3884072665821715, "grad_norm": 0.3893558382987976, "learning_rate": 2.211147379435001e-05, "loss": 0.07411360740661621, "step": 2873 }, { "epoch": 0.3885424588086185, "grad_norm": 0.41507139801979065, "learning_rate": 2.2105446727013098e-05, "loss": 0.08133411407470703, "step": 2874 }, { "epoch": 0.38867765103506546, "grad_norm": 0.5074864625930786, "learning_rate": 2.209941818028429e-05, "loss": 0.08805179595947266, "step": 2875 }, { "epoch": 0.38881284326151244, "grad_norm": 0.3369874358177185, "learning_rate": 2.2093388155418757e-05, "loss": 0.05635690689086914, "step": 2876 }, { "epoch": 0.3889480354879594, "grad_norm": 0.42361295223236084, "learning_rate": 2.2087356653671982e-05, "loss": 0.07360386848449707, "step": 2877 }, { "epoch": 0.3890832277144064, "grad_norm": 0.13845252990722656, "learning_rate": 2.2081323676299756e-05, "loss": 0.04274892807006836, "step": 2878 }, { "epoch": 0.3892184199408534, "grad_norm": 0.317159503698349, "learning_rate": 2.207528922455818e-05, "loss": 0.053198814392089844, "step": 2879 }, { "epoch": 0.38935361216730036, "grad_norm": 0.19099321961402893, "learning_rate": 2.206925329970366e-05, "loss": 0.04053235054016113, "step": 2880 }, { "epoch": 0.38948880439374733, "grad_norm": 0.28317663073539734, "learning_rate": 2.20632159029929e-05, "loss": 0.08286499977111816, "step": 2881 }, { "epoch": 0.3896239966201943, "grad_norm": 0.401815265417099, "learning_rate": 2.2057177035682926e-05, "loss": 0.07370281219482422, "step": 2882 }, { "epoch": 0.3897591888466413, "grad_norm": 0.42706337571144104, "learning_rate": 2.2051136699031058e-05, "loss": 0.07823848724365234, "step": 2883 }, { "epoch": 0.38989438107308827, "grad_norm": 0.4053654074668884, "learning_rate": 2.2045094894294933e-05, "loss": 0.07383918762207031, "step": 2884 }, { "epoch": 0.3900295732995353, "grad_norm": 0.3096252977848053, "learning_rate": 2.203905162273248e-05, "loss": 0.06415963172912598, "step": 2885 }, { "epoch": 0.3901647655259823, "grad_norm": 0.5765342116355896, "learning_rate": 2.203300688560194e-05, "loss": 0.08324623107910156, "step": 2886 }, { "epoch": 0.39029995775242926, "grad_norm": 0.3492313623428345, "learning_rate": 2.2026960684161862e-05, "loss": 0.052733659744262695, "step": 2887 }, { "epoch": 0.39043514997887624, "grad_norm": 0.2543022930622101, "learning_rate": 2.2020913019671097e-05, "loss": 0.056778907775878906, "step": 2888 }, { "epoch": 0.3905703422053232, "grad_norm": 0.22441573441028595, "learning_rate": 2.20148638933888e-05, "loss": 0.05511680245399475, "step": 2889 }, { "epoch": 0.3907055344317702, "grad_norm": 0.2703585922718048, "learning_rate": 2.2008813306574438e-05, "loss": 0.05136585235595703, "step": 2890 }, { "epoch": 0.3908407266582172, "grad_norm": 0.5141708254814148, "learning_rate": 2.200276126048777e-05, "loss": 0.08012866973876953, "step": 2891 }, { "epoch": 0.39097591888466415, "grad_norm": 0.33381491899490356, "learning_rate": 2.199670775638886e-05, "loss": 0.0890340805053711, "step": 2892 }, { "epoch": 0.39111111111111113, "grad_norm": 0.34990614652633667, "learning_rate": 2.1990652795538085e-05, "loss": 0.05481719970703125, "step": 2893 }, { "epoch": 0.3912463033375581, "grad_norm": 0.28374919295310974, "learning_rate": 2.1984596379196117e-05, "loss": 0.054431915283203125, "step": 2894 }, { "epoch": 0.3913814955640051, "grad_norm": 0.2645837068557739, "learning_rate": 2.1978538508623942e-05, "loss": 0.08042633533477783, "step": 2895 }, { "epoch": 0.39151668779045207, "grad_norm": 0.42274656891822815, "learning_rate": 2.197247918508283e-05, "loss": 0.0625143051147461, "step": 2896 }, { "epoch": 0.39165188001689905, "grad_norm": 0.49335598945617676, "learning_rate": 2.1966418409834374e-05, "loss": 0.06671142578125, "step": 2897 }, { "epoch": 0.391787072243346, "grad_norm": 0.3548906445503235, "learning_rate": 2.1960356184140453e-05, "loss": 0.0783998966217041, "step": 2898 }, { "epoch": 0.391922264469793, "grad_norm": 0.2631221413612366, "learning_rate": 2.1954292509263258e-05, "loss": 0.055816650390625, "step": 2899 }, { "epoch": 0.39205745669624, "grad_norm": 0.18816456198692322, "learning_rate": 2.194822738646528e-05, "loss": 0.046355485916137695, "step": 2900 }, { "epoch": 0.39219264892268696, "grad_norm": 0.41053277254104614, "learning_rate": 2.1942160817009304e-05, "loss": 0.07032132148742676, "step": 2901 }, { "epoch": 0.39232784114913394, "grad_norm": 0.7297241687774658, "learning_rate": 2.193609280215843e-05, "loss": 0.08548355102539062, "step": 2902 }, { "epoch": 0.3924630333755809, "grad_norm": 0.5028813481330872, "learning_rate": 2.1930023343176044e-05, "loss": 0.07327556610107422, "step": 2903 }, { "epoch": 0.3925982256020279, "grad_norm": 0.2076469361782074, "learning_rate": 2.1923952441325837e-05, "loss": 0.07343053817749023, "step": 2904 }, { "epoch": 0.3927334178284749, "grad_norm": 0.26775214076042175, "learning_rate": 2.191788009787182e-05, "loss": 0.05754566192626953, "step": 2905 }, { "epoch": 0.39286861005492185, "grad_norm": 0.3015051782131195, "learning_rate": 2.1911806314078267e-05, "loss": 0.0705561637878418, "step": 2906 }, { "epoch": 0.39300380228136883, "grad_norm": 0.34772127866744995, "learning_rate": 2.1905731091209786e-05, "loss": 0.081695556640625, "step": 2907 }, { "epoch": 0.3931389945078158, "grad_norm": 0.3780077397823334, "learning_rate": 2.1899654430531262e-05, "loss": 0.10386800765991211, "step": 2908 }, { "epoch": 0.3932741867342628, "grad_norm": 0.37237271666526794, "learning_rate": 2.18935763333079e-05, "loss": 0.06761503219604492, "step": 2909 }, { "epoch": 0.39340937896070977, "grad_norm": 0.22529982030391693, "learning_rate": 2.1887496800805175e-05, "loss": 0.06626415252685547, "step": 2910 }, { "epoch": 0.39354457118715674, "grad_norm": 0.3191796541213989, "learning_rate": 2.188141583428889e-05, "loss": 0.0892951488494873, "step": 2911 }, { "epoch": 0.3936797634136037, "grad_norm": 0.14784513413906097, "learning_rate": 2.1875333435025138e-05, "loss": 0.026790857315063477, "step": 2912 }, { "epoch": 0.3938149556400507, "grad_norm": 0.12898798286914825, "learning_rate": 2.1869249604280296e-05, "loss": 0.034829139709472656, "step": 2913 }, { "epoch": 0.3939501478664977, "grad_norm": 0.39526745676994324, "learning_rate": 2.1863164343321057e-05, "loss": 0.06908988952636719, "step": 2914 }, { "epoch": 0.39408534009294466, "grad_norm": 0.38009893894195557, "learning_rate": 2.1857077653414397e-05, "loss": 0.08252692222595215, "step": 2915 }, { "epoch": 0.39422053231939164, "grad_norm": 0.2364497035741806, "learning_rate": 2.185098953582761e-05, "loss": 0.0792229175567627, "step": 2916 }, { "epoch": 0.3943557245458386, "grad_norm": 0.35587725043296814, "learning_rate": 2.1844899991828265e-05, "loss": 0.0800466537475586, "step": 2917 }, { "epoch": 0.3944909167722856, "grad_norm": 0.5487125515937805, "learning_rate": 2.1838809022684247e-05, "loss": 0.0685577392578125, "step": 2918 }, { "epoch": 0.39462610899873257, "grad_norm": 0.3930009603500366, "learning_rate": 2.1832716629663712e-05, "loss": 0.05109715461730957, "step": 2919 }, { "epoch": 0.39476130122517955, "grad_norm": 0.3796512484550476, "learning_rate": 2.1826622814035138e-05, "loss": 0.0814366340637207, "step": 2920 }, { "epoch": 0.39489649345162653, "grad_norm": 0.29777342081069946, "learning_rate": 2.1820527577067293e-05, "loss": 0.07606244087219238, "step": 2921 }, { "epoch": 0.3950316856780735, "grad_norm": 0.24324575066566467, "learning_rate": 2.1814430920029238e-05, "loss": 0.0659322738647461, "step": 2922 }, { "epoch": 0.3951668779045205, "grad_norm": 0.28217342495918274, "learning_rate": 2.1808332844190325e-05, "loss": 0.07102012634277344, "step": 2923 }, { "epoch": 0.39530207013096746, "grad_norm": 0.3461306095123291, "learning_rate": 2.1802233350820203e-05, "loss": 0.06911420822143555, "step": 2924 }, { "epoch": 0.39543726235741444, "grad_norm": 0.40410807728767395, "learning_rate": 2.179613244118883e-05, "loss": 0.0623631477355957, "step": 2925 }, { "epoch": 0.3955724545838614, "grad_norm": 0.3982985019683838, "learning_rate": 2.1790030116566436e-05, "loss": 0.06379342079162598, "step": 2926 }, { "epoch": 0.3957076468103084, "grad_norm": 0.31446436047554016, "learning_rate": 2.1783926378223563e-05, "loss": 0.07854652404785156, "step": 2927 }, { "epoch": 0.3958428390367554, "grad_norm": 0.26369568705558777, "learning_rate": 2.1777821227431048e-05, "loss": 0.06229257583618164, "step": 2928 }, { "epoch": 0.39597803126320236, "grad_norm": 0.372574120759964, "learning_rate": 2.1771714665460005e-05, "loss": 0.07358336448669434, "step": 2929 }, { "epoch": 0.39611322348964934, "grad_norm": 0.27585744857788086, "learning_rate": 2.1765606693581857e-05, "loss": 0.0627450942993164, "step": 2930 }, { "epoch": 0.3962484157160963, "grad_norm": 0.3051319420337677, "learning_rate": 2.1759497313068316e-05, "loss": 0.09266376495361328, "step": 2931 }, { "epoch": 0.3963836079425433, "grad_norm": 0.421512633562088, "learning_rate": 2.175338652519139e-05, "loss": 0.07506465911865234, "step": 2932 }, { "epoch": 0.39651880016899027, "grad_norm": 0.4446544051170349, "learning_rate": 2.1747274331223377e-05, "loss": 0.07260560989379883, "step": 2933 }, { "epoch": 0.39665399239543725, "grad_norm": 0.2291414588689804, "learning_rate": 2.1741160732436865e-05, "loss": 0.06614303588867188, "step": 2934 }, { "epoch": 0.39678918462188423, "grad_norm": 0.34969449043273926, "learning_rate": 2.1735045730104746e-05, "loss": 0.10336685180664062, "step": 2935 }, { "epoch": 0.3969243768483312, "grad_norm": 0.22155889868736267, "learning_rate": 2.1728929325500183e-05, "loss": 0.046473026275634766, "step": 2936 }, { "epoch": 0.3970595690747782, "grad_norm": 0.3308123052120209, "learning_rate": 2.1722811519896654e-05, "loss": 0.061977386474609375, "step": 2937 }, { "epoch": 0.39719476130122516, "grad_norm": 0.3256804943084717, "learning_rate": 2.171669231456792e-05, "loss": 0.06390666961669922, "step": 2938 }, { "epoch": 0.39732995352767214, "grad_norm": 0.35173648595809937, "learning_rate": 2.1710571710788025e-05, "loss": 0.07680559158325195, "step": 2939 }, { "epoch": 0.3974651457541191, "grad_norm": 0.36809730529785156, "learning_rate": 2.1704449709831312e-05, "loss": 0.040982723236083984, "step": 2940 }, { "epoch": 0.3976003379805661, "grad_norm": 0.29116567969322205, "learning_rate": 2.1698326312972423e-05, "loss": 0.05270671844482422, "step": 2941 }, { "epoch": 0.3977355302070131, "grad_norm": 0.16820156574249268, "learning_rate": 2.1692201521486268e-05, "loss": 0.0484539270401001, "step": 2942 }, { "epoch": 0.39787072243346006, "grad_norm": 0.29799380898475647, "learning_rate": 2.1686075336648075e-05, "loss": 0.07322835922241211, "step": 2943 }, { "epoch": 0.39800591465990703, "grad_norm": 0.43065980076789856, "learning_rate": 2.167994775973334e-05, "loss": 0.06460905075073242, "step": 2944 }, { "epoch": 0.398141106886354, "grad_norm": 0.37359461188316345, "learning_rate": 2.167381879201786e-05, "loss": 0.06734895706176758, "step": 2945 }, { "epoch": 0.398276299112801, "grad_norm": 0.45527586340904236, "learning_rate": 2.166768843477772e-05, "loss": 0.06209278106689453, "step": 2946 }, { "epoch": 0.39841149133924797, "grad_norm": 0.3737458288669586, "learning_rate": 2.166155668928929e-05, "loss": 0.06509113311767578, "step": 2947 }, { "epoch": 0.39854668356569495, "grad_norm": 0.2930571734905243, "learning_rate": 2.1655423556829233e-05, "loss": 0.04776430130004883, "step": 2948 }, { "epoch": 0.3986818757921419, "grad_norm": 0.2965002954006195, "learning_rate": 2.1649289038674504e-05, "loss": 0.07193899154663086, "step": 2949 }, { "epoch": 0.3988170680185889, "grad_norm": 0.20768171548843384, "learning_rate": 2.1643153136102333e-05, "loss": 0.05431318283081055, "step": 2950 }, { "epoch": 0.3989522602450359, "grad_norm": 0.2334291785955429, "learning_rate": 2.1637015850390255e-05, "loss": 0.05737948417663574, "step": 2951 }, { "epoch": 0.3990874524714829, "grad_norm": 0.27689775824546814, "learning_rate": 2.1630877182816087e-05, "loss": 0.06384515762329102, "step": 2952 }, { "epoch": 0.3992226446979299, "grad_norm": 0.3566146790981293, "learning_rate": 2.162473713465793e-05, "loss": 0.07159090042114258, "step": 2953 }, { "epoch": 0.3993578369243769, "grad_norm": 0.34134575724601746, "learning_rate": 2.161859570719417e-05, "loss": 0.07120990753173828, "step": 2954 }, { "epoch": 0.39949302915082385, "grad_norm": 0.49748843908309937, "learning_rate": 2.161245290170349e-05, "loss": 0.10651493072509766, "step": 2955 }, { "epoch": 0.39962822137727083, "grad_norm": 0.3313961327075958, "learning_rate": 2.1606308719464858e-05, "loss": 0.062203407287597656, "step": 2956 }, { "epoch": 0.3997634136037178, "grad_norm": 0.304697722196579, "learning_rate": 2.160016316175752e-05, "loss": 0.06562328338623047, "step": 2957 }, { "epoch": 0.3998986058301648, "grad_norm": 0.20526127517223358, "learning_rate": 2.159401622986101e-05, "loss": 0.05479693412780762, "step": 2958 }, { "epoch": 0.40003379805661177, "grad_norm": 0.1999506652355194, "learning_rate": 2.1587867925055165e-05, "loss": 0.05121326446533203, "step": 2959 }, { "epoch": 0.40016899028305875, "grad_norm": 0.3261958360671997, "learning_rate": 2.158171824862008e-05, "loss": 0.06938362121582031, "step": 2960 }, { "epoch": 0.4003041825095057, "grad_norm": 0.538030743598938, "learning_rate": 2.157556720183616e-05, "loss": 0.0921485424041748, "step": 2961 }, { "epoch": 0.4004393747359527, "grad_norm": 0.28004932403564453, "learning_rate": 2.156941478598409e-05, "loss": 0.05296158790588379, "step": 2962 }, { "epoch": 0.4005745669623997, "grad_norm": 0.2439052164554596, "learning_rate": 2.156326100234482e-05, "loss": 0.05390024185180664, "step": 2963 }, { "epoch": 0.40070975918884666, "grad_norm": 0.30761170387268066, "learning_rate": 2.1557105852199612e-05, "loss": 0.0789484977722168, "step": 2964 }, { "epoch": 0.40084495141529364, "grad_norm": 0.45547664165496826, "learning_rate": 2.155094933683e-05, "loss": 0.07648730278015137, "step": 2965 }, { "epoch": 0.4009801436417406, "grad_norm": 0.22745536267757416, "learning_rate": 2.1544791457517802e-05, "loss": 0.05563545227050781, "step": 2966 }, { "epoch": 0.4011153358681876, "grad_norm": 0.2178754061460495, "learning_rate": 2.1538632215545126e-05, "loss": 0.05500507354736328, "step": 2967 }, { "epoch": 0.4012505280946346, "grad_norm": 0.34506499767303467, "learning_rate": 2.153247161219435e-05, "loss": 0.061418771743774414, "step": 2968 }, { "epoch": 0.40138572032108155, "grad_norm": 0.6026394963264465, "learning_rate": 2.1526309648748147e-05, "loss": 0.08974194526672363, "step": 2969 }, { "epoch": 0.40152091254752853, "grad_norm": 0.2664487361907959, "learning_rate": 2.1520146326489476e-05, "loss": 0.07912063598632812, "step": 2970 }, { "epoch": 0.4016561047739755, "grad_norm": 0.39691248536109924, "learning_rate": 2.151398164670157e-05, "loss": 0.08194828033447266, "step": 2971 }, { "epoch": 0.4017912970004225, "grad_norm": 0.39125579595565796, "learning_rate": 2.1507815610667948e-05, "loss": 0.056514739990234375, "step": 2972 }, { "epoch": 0.40192648922686947, "grad_norm": 0.34392523765563965, "learning_rate": 2.1501648219672407e-05, "loss": 0.06239795684814453, "step": 2973 }, { "epoch": 0.40206168145331644, "grad_norm": 0.6169334650039673, "learning_rate": 2.149547947499904e-05, "loss": 0.09360408782958984, "step": 2974 }, { "epoch": 0.4021968736797634, "grad_norm": 0.5909185409545898, "learning_rate": 2.1489309377932212e-05, "loss": 0.07928919792175293, "step": 2975 }, { "epoch": 0.4023320659062104, "grad_norm": 0.5892732739448547, "learning_rate": 2.1483137929756562e-05, "loss": 0.08875560760498047, "step": 2976 }, { "epoch": 0.4024672581326574, "grad_norm": 0.3181554973125458, "learning_rate": 2.147696513175702e-05, "loss": 0.06846928596496582, "step": 2977 }, { "epoch": 0.40260245035910436, "grad_norm": 0.25439995527267456, "learning_rate": 2.1470790985218804e-05, "loss": 0.05667448043823242, "step": 2978 }, { "epoch": 0.40273764258555134, "grad_norm": 0.40789681673049927, "learning_rate": 2.1464615491427393e-05, "loss": 0.05379605293273926, "step": 2979 }, { "epoch": 0.4028728348119983, "grad_norm": 0.16324302554130554, "learning_rate": 2.1458438651668567e-05, "loss": 0.04113578796386719, "step": 2980 }, { "epoch": 0.4030080270384453, "grad_norm": 0.44761011004447937, "learning_rate": 2.1452260467228376e-05, "loss": 0.04816389083862305, "step": 2981 }, { "epoch": 0.40314321926489227, "grad_norm": 1.3353536128997803, "learning_rate": 2.144608093939314e-05, "loss": 0.14235496520996094, "step": 2982 }, { "epoch": 0.40327841149133925, "grad_norm": 0.37307024002075195, "learning_rate": 2.1439900069449483e-05, "loss": 0.05567169189453125, "step": 2983 }, { "epoch": 0.40341360371778623, "grad_norm": 0.25737079977989197, "learning_rate": 2.1433717858684286e-05, "loss": 0.06741142272949219, "step": 2984 }, { "epoch": 0.4035487959442332, "grad_norm": 0.4321408271789551, "learning_rate": 2.1427534308384724e-05, "loss": 0.07166290283203125, "step": 2985 }, { "epoch": 0.4036839881706802, "grad_norm": 0.4518645703792572, "learning_rate": 2.1421349419838245e-05, "loss": 0.09037590026855469, "step": 2986 }, { "epoch": 0.40381918039712716, "grad_norm": 0.391391396522522, "learning_rate": 2.1415163194332574e-05, "loss": 0.08176469802856445, "step": 2987 }, { "epoch": 0.40395437262357414, "grad_norm": 0.27435001730918884, "learning_rate": 2.1408975633155715e-05, "loss": 0.07512962818145752, "step": 2988 }, { "epoch": 0.4040895648500211, "grad_norm": 0.4362889230251312, "learning_rate": 2.140278673759595e-05, "loss": 0.0748453140258789, "step": 2989 }, { "epoch": 0.4042247570764681, "grad_norm": 0.656727135181427, "learning_rate": 2.1396596508941847e-05, "loss": 0.0941476821899414, "step": 2990 }, { "epoch": 0.4043599493029151, "grad_norm": 0.34294772148132324, "learning_rate": 2.1390404948482238e-05, "loss": 0.0550762414932251, "step": 2991 }, { "epoch": 0.40449514152936206, "grad_norm": 0.2868243157863617, "learning_rate": 2.1384212057506243e-05, "loss": 0.06558704376220703, "step": 2992 }, { "epoch": 0.40463033375580904, "grad_norm": 0.18572582304477692, "learning_rate": 2.137801783730325e-05, "loss": 0.059659481048583984, "step": 2993 }, { "epoch": 0.404765525982256, "grad_norm": 0.3745408058166504, "learning_rate": 2.137182228916293e-05, "loss": 0.0683131217956543, "step": 2994 }, { "epoch": 0.404900718208703, "grad_norm": 0.2456149309873581, "learning_rate": 2.136562541437523e-05, "loss": 0.05441570281982422, "step": 2995 }, { "epoch": 0.40503591043514997, "grad_norm": 0.18396519124507904, "learning_rate": 2.135942721423038e-05, "loss": 0.0449833869934082, "step": 2996 }, { "epoch": 0.40517110266159695, "grad_norm": 0.3149019777774811, "learning_rate": 2.1353227690018865e-05, "loss": 0.059523582458496094, "step": 2997 }, { "epoch": 0.4053062948880439, "grad_norm": 0.3205578625202179, "learning_rate": 2.1347026843031467e-05, "loss": 0.07893157005310059, "step": 2998 }, { "epoch": 0.4054414871144909, "grad_norm": 0.6945152282714844, "learning_rate": 2.1340824674559238e-05, "loss": 0.08907890319824219, "step": 2999 }, { "epoch": 0.4055766793409379, "grad_norm": 0.2924824357032776, "learning_rate": 2.133462118589349e-05, "loss": 0.06456422805786133, "step": 3000 }, { "epoch": 0.40571187156738486, "grad_norm": 0.13423191010951996, "learning_rate": 2.1328416378325837e-05, "loss": 0.03351247310638428, "step": 3001 }, { "epoch": 0.40584706379383184, "grad_norm": 0.2060016691684723, "learning_rate": 2.1322210253148144e-05, "loss": 0.04142570495605469, "step": 3002 }, { "epoch": 0.4059822560202788, "grad_norm": 0.20809145271778107, "learning_rate": 2.131600281165257e-05, "loss": 0.04394388198852539, "step": 3003 }, { "epoch": 0.4061174482467258, "grad_norm": 0.3027481734752655, "learning_rate": 2.130979405513152e-05, "loss": 0.051453351974487305, "step": 3004 }, { "epoch": 0.4062526404731728, "grad_norm": 0.40413138270378113, "learning_rate": 2.1303583984877697e-05, "loss": 0.07988333702087402, "step": 3005 }, { "epoch": 0.40638783269961976, "grad_norm": 0.24105243384838104, "learning_rate": 2.1297372602184085e-05, "loss": 0.05415964126586914, "step": 3006 }, { "epoch": 0.40652302492606673, "grad_norm": 0.597404956817627, "learning_rate": 2.1291159908343907e-05, "loss": 0.06812691688537598, "step": 3007 }, { "epoch": 0.4066582171525137, "grad_norm": 0.4122142493724823, "learning_rate": 2.1284945904650693e-05, "loss": 0.0854644775390625, "step": 3008 }, { "epoch": 0.4067934093789607, "grad_norm": 0.3772042691707611, "learning_rate": 2.127873059239822e-05, "loss": 0.08576107025146484, "step": 3009 }, { "epoch": 0.40692860160540767, "grad_norm": 0.17306067049503326, "learning_rate": 2.127251397288056e-05, "loss": 0.046257853507995605, "step": 3010 }, { "epoch": 0.40706379383185465, "grad_norm": 0.3024364709854126, "learning_rate": 2.126629604739204e-05, "loss": 0.06950187683105469, "step": 3011 }, { "epoch": 0.4071989860583016, "grad_norm": 0.2842211425304413, "learning_rate": 2.1260076817227268e-05, "loss": 0.07561922073364258, "step": 3012 }, { "epoch": 0.4073341782847486, "grad_norm": 0.29641711711883545, "learning_rate": 2.1253856283681122e-05, "loss": 0.05509376525878906, "step": 3013 }, { "epoch": 0.4074693705111956, "grad_norm": 0.3248545527458191, "learning_rate": 2.1247634448048743e-05, "loss": 0.0809168815612793, "step": 3014 }, { "epoch": 0.40760456273764256, "grad_norm": 0.18395265936851501, "learning_rate": 2.1241411311625562e-05, "loss": 0.03696107864379883, "step": 3015 }, { "epoch": 0.40773975496408954, "grad_norm": 0.4544692933559418, "learning_rate": 2.1235186875707257e-05, "loss": 0.06669068336486816, "step": 3016 }, { "epoch": 0.4078749471905365, "grad_norm": 0.28700846433639526, "learning_rate": 2.1228961141589797e-05, "loss": 0.06910467147827148, "step": 3017 }, { "epoch": 0.4080101394169835, "grad_norm": 0.305080771446228, "learning_rate": 2.122273411056941e-05, "loss": 0.0659780502319336, "step": 3018 }, { "epoch": 0.4081453316434305, "grad_norm": 0.49413424730300903, "learning_rate": 2.1216505783942592e-05, "loss": 0.08311891555786133, "step": 3019 }, { "epoch": 0.4082805238698775, "grad_norm": 0.23321709036827087, "learning_rate": 2.121027616300613e-05, "loss": 0.06351089477539062, "step": 3020 }, { "epoch": 0.4084157160963245, "grad_norm": 0.45711737871170044, "learning_rate": 2.1204045249057043e-05, "loss": 0.09911465644836426, "step": 3021 }, { "epoch": 0.40855090832277147, "grad_norm": 0.24758939445018768, "learning_rate": 2.119781304339266e-05, "loss": 0.0674586296081543, "step": 3022 }, { "epoch": 0.40868610054921845, "grad_norm": 0.223337322473526, "learning_rate": 2.1191579547310547e-05, "loss": 0.06248021125793457, "step": 3023 }, { "epoch": 0.4088212927756654, "grad_norm": 0.41172075271606445, "learning_rate": 2.1185344762108556e-05, "loss": 0.06467294692993164, "step": 3024 }, { "epoch": 0.4089564850021124, "grad_norm": 0.22348926961421967, "learning_rate": 2.11791086890848e-05, "loss": 0.0626610517501831, "step": 3025 }, { "epoch": 0.4090916772285594, "grad_norm": 0.21511821448802948, "learning_rate": 2.1172871329537662e-05, "loss": 0.05102062225341797, "step": 3026 }, { "epoch": 0.40922686945500636, "grad_norm": 0.2693392336368561, "learning_rate": 2.1166632684765794e-05, "loss": 0.05055880546569824, "step": 3027 }, { "epoch": 0.40936206168145334, "grad_norm": 0.37640175223350525, "learning_rate": 2.1160392756068124e-05, "loss": 0.07921767234802246, "step": 3028 }, { "epoch": 0.4094972539079003, "grad_norm": 0.2543674409389496, "learning_rate": 2.1154151544743826e-05, "loss": 0.06111800670623779, "step": 3029 }, { "epoch": 0.4096324461343473, "grad_norm": 0.14939340949058533, "learning_rate": 2.114790905209236e-05, "loss": 0.04032135009765625, "step": 3030 }, { "epoch": 0.4097676383607943, "grad_norm": 0.500823438167572, "learning_rate": 2.1141665279413444e-05, "loss": 0.06637167930603027, "step": 3031 }, { "epoch": 0.40990283058724125, "grad_norm": 0.36694619059562683, "learning_rate": 2.1135420228007062e-05, "loss": 0.06368553638458252, "step": 3032 }, { "epoch": 0.41003802281368823, "grad_norm": 0.31600624322891235, "learning_rate": 2.1129173899173474e-05, "loss": 0.039308786392211914, "step": 3033 }, { "epoch": 0.4101732150401352, "grad_norm": 0.4579591155052185, "learning_rate": 2.11229262942132e-05, "loss": 0.09085893630981445, "step": 3034 }, { "epoch": 0.4103084072665822, "grad_norm": 0.47570300102233887, "learning_rate": 2.1116677414427008e-05, "loss": 0.1028285026550293, "step": 3035 }, { "epoch": 0.41044359949302917, "grad_norm": 0.3296443223953247, "learning_rate": 2.1110427261115972e-05, "loss": 0.09491825103759766, "step": 3036 }, { "epoch": 0.41057879171947614, "grad_norm": 0.2338595688343048, "learning_rate": 2.1104175835581386e-05, "loss": 0.07488346099853516, "step": 3037 }, { "epoch": 0.4107139839459231, "grad_norm": 0.2658068537712097, "learning_rate": 2.1097923139124846e-05, "loss": 0.06888961791992188, "step": 3038 }, { "epoch": 0.4108491761723701, "grad_norm": 0.24706095457077026, "learning_rate": 2.109166917304819e-05, "loss": 0.07400083541870117, "step": 3039 }, { "epoch": 0.4109843683988171, "grad_norm": 0.2957863509654999, "learning_rate": 2.1085413938653532e-05, "loss": 0.05295455455780029, "step": 3040 }, { "epoch": 0.41111956062526406, "grad_norm": 0.3177264332771301, "learning_rate": 2.107915743724323e-05, "loss": 0.06435346603393555, "step": 3041 }, { "epoch": 0.41125475285171104, "grad_norm": 0.2976820766925812, "learning_rate": 2.1072899670119935e-05, "loss": 0.0619511604309082, "step": 3042 }, { "epoch": 0.411389945078158, "grad_norm": 0.4411100149154663, "learning_rate": 2.1066640638586543e-05, "loss": 0.0731658935546875, "step": 3043 }, { "epoch": 0.411525137304605, "grad_norm": 0.1755773276090622, "learning_rate": 2.1060380343946223e-05, "loss": 0.031377315521240234, "step": 3044 }, { "epoch": 0.41166032953105197, "grad_norm": 0.3265551030635834, "learning_rate": 2.10541187875024e-05, "loss": 0.06280660629272461, "step": 3045 }, { "epoch": 0.41179552175749895, "grad_norm": 0.16660244762897491, "learning_rate": 2.1047855970558753e-05, "loss": 0.03179144859313965, "step": 3046 }, { "epoch": 0.41193071398394593, "grad_norm": 0.25354695320129395, "learning_rate": 2.1041591894419244e-05, "loss": 0.06399154663085938, "step": 3047 }, { "epoch": 0.4120659062103929, "grad_norm": 0.34272727370262146, "learning_rate": 2.1035326560388087e-05, "loss": 0.09914398193359375, "step": 3048 }, { "epoch": 0.4122010984368399, "grad_norm": 0.4404015839099884, "learning_rate": 2.1029059969769756e-05, "loss": 0.08698320388793945, "step": 3049 }, { "epoch": 0.41233629066328686, "grad_norm": 0.6310895085334778, "learning_rate": 2.1022792123868986e-05, "loss": 0.11017704010009766, "step": 3050 }, { "epoch": 0.41247148288973384, "grad_norm": 0.4720802307128906, "learning_rate": 2.1016523023990783e-05, "loss": 0.0863032341003418, "step": 3051 }, { "epoch": 0.4126066751161808, "grad_norm": 0.15391072630882263, "learning_rate": 2.1010252671440398e-05, "loss": 0.03853416442871094, "step": 3052 }, { "epoch": 0.4127418673426278, "grad_norm": 0.49569839239120483, "learning_rate": 2.1003981067523358e-05, "loss": 0.07751250267028809, "step": 3053 }, { "epoch": 0.4128770595690748, "grad_norm": 0.2507389187812805, "learning_rate": 2.099770821354544e-05, "loss": 0.05987739562988281, "step": 3054 }, { "epoch": 0.41301225179552176, "grad_norm": 0.286739319562912, "learning_rate": 2.0991434110812692e-05, "loss": 0.050983428955078125, "step": 3055 }, { "epoch": 0.41314744402196873, "grad_norm": 0.6455079913139343, "learning_rate": 2.0985158760631415e-05, "loss": 0.09912776947021484, "step": 3056 }, { "epoch": 0.4132826362484157, "grad_norm": 0.6160709857940674, "learning_rate": 2.0978882164308157e-05, "loss": 0.0709223747253418, "step": 3057 }, { "epoch": 0.4134178284748627, "grad_norm": 0.4100304841995239, "learning_rate": 2.0972604323149755e-05, "loss": 0.08180618286132812, "step": 3058 }, { "epoch": 0.41355302070130967, "grad_norm": 0.34018391370773315, "learning_rate": 2.0966325238463283e-05, "loss": 0.08721542358398438, "step": 3059 }, { "epoch": 0.41368821292775665, "grad_norm": 0.19317318499088287, "learning_rate": 2.096004491155608e-05, "loss": 0.06745672225952148, "step": 3060 }, { "epoch": 0.4138234051542036, "grad_norm": 0.3779497444629669, "learning_rate": 2.0953763343735746e-05, "loss": 0.08286094665527344, "step": 3061 }, { "epoch": 0.4139585973806506, "grad_norm": 0.31215447187423706, "learning_rate": 2.0947480536310133e-05, "loss": 0.05769228935241699, "step": 3062 }, { "epoch": 0.4140937896070976, "grad_norm": 0.47123974561691284, "learning_rate": 2.0941196490587352e-05, "loss": 0.08304882049560547, "step": 3063 }, { "epoch": 0.41422898183354456, "grad_norm": 0.28059065341949463, "learning_rate": 2.0934911207875782e-05, "loss": 0.0818033218383789, "step": 3064 }, { "epoch": 0.41436417405999154, "grad_norm": 0.48526760935783386, "learning_rate": 2.092862468948405e-05, "loss": 0.09539604187011719, "step": 3065 }, { "epoch": 0.4144993662864385, "grad_norm": 0.21677564084529877, "learning_rate": 2.0922336936721044e-05, "loss": 0.0581052303314209, "step": 3066 }, { "epoch": 0.4146345585128855, "grad_norm": 0.21911999583244324, "learning_rate": 2.0916047950895907e-05, "loss": 0.05284690856933594, "step": 3067 }, { "epoch": 0.4147697507393325, "grad_norm": 0.523078441619873, "learning_rate": 2.0909757733318035e-05, "loss": 0.07018661499023438, "step": 3068 }, { "epoch": 0.41490494296577946, "grad_norm": 0.2608419358730316, "learning_rate": 2.090346628529709e-05, "loss": 0.06709098815917969, "step": 3069 }, { "epoch": 0.41504013519222643, "grad_norm": 0.4290355443954468, "learning_rate": 2.089717360814298e-05, "loss": 0.09755420684814453, "step": 3070 }, { "epoch": 0.4151753274186734, "grad_norm": 0.209347203373909, "learning_rate": 2.0890879703165885e-05, "loss": 0.055274009704589844, "step": 3071 }, { "epoch": 0.4153105196451204, "grad_norm": 0.4696345925331116, "learning_rate": 2.0884584571676217e-05, "loss": 0.0701138973236084, "step": 3072 }, { "epoch": 0.41544571187156737, "grad_norm": 0.29664644598960876, "learning_rate": 2.0878288214984657e-05, "loss": 0.06930828094482422, "step": 3073 }, { "epoch": 0.41558090409801435, "grad_norm": 0.22433871030807495, "learning_rate": 2.0871990634402147e-05, "loss": 0.0613095760345459, "step": 3074 }, { "epoch": 0.4157160963244613, "grad_norm": 0.31830698251724243, "learning_rate": 2.0865691831239877e-05, "loss": 0.066925048828125, "step": 3075 }, { "epoch": 0.4158512885509083, "grad_norm": 0.4611347019672394, "learning_rate": 2.0859391806809285e-05, "loss": 0.08638906478881836, "step": 3076 }, { "epoch": 0.4159864807773553, "grad_norm": 0.3555915951728821, "learning_rate": 2.0853090562422072e-05, "loss": 0.06160902976989746, "step": 3077 }, { "epoch": 0.41612167300380226, "grad_norm": 0.20421913266181946, "learning_rate": 2.084678809939019e-05, "loss": 0.05112624168395996, "step": 3078 }, { "epoch": 0.41625686523024924, "grad_norm": 0.5702481269836426, "learning_rate": 2.084048441902585e-05, "loss": 0.08750391006469727, "step": 3079 }, { "epoch": 0.4163920574566962, "grad_norm": 0.6666382551193237, "learning_rate": 2.0834179522641508e-05, "loss": 0.08485496044158936, "step": 3080 }, { "epoch": 0.4165272496831432, "grad_norm": 0.6690225005149841, "learning_rate": 2.0827873411549877e-05, "loss": 0.10233592987060547, "step": 3081 }, { "epoch": 0.4166624419095902, "grad_norm": 0.49933552742004395, "learning_rate": 2.0821566087063926e-05, "loss": 0.09810972213745117, "step": 3082 }, { "epoch": 0.41679763413603715, "grad_norm": 0.6558201909065247, "learning_rate": 2.081525755049687e-05, "loss": 0.08875274658203125, "step": 3083 }, { "epoch": 0.41693282636248413, "grad_norm": 0.506667971611023, "learning_rate": 2.0808947803162182e-05, "loss": 0.05261152982711792, "step": 3084 }, { "epoch": 0.4170680185889311, "grad_norm": 0.7671221494674683, "learning_rate": 2.0802636846373578e-05, "loss": 0.06038618087768555, "step": 3085 }, { "epoch": 0.4172032108153781, "grad_norm": 0.3433699309825897, "learning_rate": 2.0796324681445045e-05, "loss": 0.10524749755859375, "step": 3086 }, { "epoch": 0.41733840304182507, "grad_norm": 0.2865079343318939, "learning_rate": 2.0790011309690806e-05, "loss": 0.053057193756103516, "step": 3087 }, { "epoch": 0.4174735952682721, "grad_norm": 0.28365615010261536, "learning_rate": 2.0783696732425332e-05, "loss": 0.09075212478637695, "step": 3088 }, { "epoch": 0.4176087874947191, "grad_norm": 0.1373298317193985, "learning_rate": 2.0777380950963355e-05, "loss": 0.03651416301727295, "step": 3089 }, { "epoch": 0.41774397972116606, "grad_norm": 0.5920847058296204, "learning_rate": 2.0771063966619854e-05, "loss": 0.08390545845031738, "step": 3090 }, { "epoch": 0.41787917194761304, "grad_norm": 1.0154945850372314, "learning_rate": 2.0764745780710065e-05, "loss": 0.11055469512939453, "step": 3091 }, { "epoch": 0.41801436417406, "grad_norm": 0.23824970424175262, "learning_rate": 2.075842639454946e-05, "loss": 0.04407072067260742, "step": 3092 }, { "epoch": 0.418149556400507, "grad_norm": 0.49458593130111694, "learning_rate": 2.075210580945378e-05, "loss": 0.06061124801635742, "step": 3093 }, { "epoch": 0.418284748626954, "grad_norm": 0.32972678542137146, "learning_rate": 2.0745784026738984e-05, "loss": 0.06857776641845703, "step": 3094 }, { "epoch": 0.41841994085340095, "grad_norm": 0.3052026629447937, "learning_rate": 2.073946104772132e-05, "loss": 0.0900566577911377, "step": 3095 }, { "epoch": 0.41855513307984793, "grad_norm": 0.36622339487075806, "learning_rate": 2.0733136873717258e-05, "loss": 0.05018901824951172, "step": 3096 }, { "epoch": 0.4186903253062949, "grad_norm": 0.2580163776874542, "learning_rate": 2.0726811506043527e-05, "loss": 0.07453489303588867, "step": 3097 }, { "epoch": 0.4188255175327419, "grad_norm": 0.3222690224647522, "learning_rate": 2.0720484946017104e-05, "loss": 0.06111764907836914, "step": 3098 }, { "epoch": 0.41896070975918887, "grad_norm": 0.3600582480430603, "learning_rate": 2.0714157194955202e-05, "loss": 0.06430578231811523, "step": 3099 }, { "epoch": 0.41909590198563584, "grad_norm": 0.34537774324417114, "learning_rate": 2.070782825417531e-05, "loss": 0.05692088603973389, "step": 3100 }, { "epoch": 0.4192310942120828, "grad_norm": 0.4973081946372986, "learning_rate": 2.0701498124995127e-05, "loss": 0.0888824462890625, "step": 3101 }, { "epoch": 0.4193662864385298, "grad_norm": 0.31936827301979065, "learning_rate": 2.069516680873264e-05, "loss": 0.07187843322753906, "step": 3102 }, { "epoch": 0.4195014786649768, "grad_norm": 0.2811417579650879, "learning_rate": 2.0688834306706047e-05, "loss": 0.06684207916259766, "step": 3103 }, { "epoch": 0.41963667089142376, "grad_norm": 0.3405231833457947, "learning_rate": 2.0682500620233815e-05, "loss": 0.0851449966430664, "step": 3104 }, { "epoch": 0.41977186311787074, "grad_norm": 0.26490673422813416, "learning_rate": 2.0676165750634656e-05, "loss": 0.03852987289428711, "step": 3105 }, { "epoch": 0.4199070553443177, "grad_norm": 0.18394184112548828, "learning_rate": 2.0669829699227513e-05, "loss": 0.04652690887451172, "step": 3106 }, { "epoch": 0.4200422475707647, "grad_norm": 0.40319758653640747, "learning_rate": 2.06634924673316e-05, "loss": 0.06999492645263672, "step": 3107 }, { "epoch": 0.42017743979721167, "grad_norm": 0.4493744671344757, "learning_rate": 2.0657154056266346e-05, "loss": 0.09916114807128906, "step": 3108 }, { "epoch": 0.42031263202365865, "grad_norm": 0.7170290946960449, "learning_rate": 2.0650814467351452e-05, "loss": 0.063210129737854, "step": 3109 }, { "epoch": 0.42044782425010563, "grad_norm": 0.2866007685661316, "learning_rate": 2.064447370190685e-05, "loss": 0.04627275466918945, "step": 3110 }, { "epoch": 0.4205830164765526, "grad_norm": 0.39962875843048096, "learning_rate": 2.0638131761252724e-05, "loss": 0.07088196277618408, "step": 3111 }, { "epoch": 0.4207182087029996, "grad_norm": 0.5062935948371887, "learning_rate": 2.06317886467095e-05, "loss": 0.09743881225585938, "step": 3112 }, { "epoch": 0.42085340092944656, "grad_norm": 0.18093198537826538, "learning_rate": 2.0625444359597847e-05, "loss": 0.04445075988769531, "step": 3113 }, { "epoch": 0.42098859315589354, "grad_norm": 0.7550981640815735, "learning_rate": 2.0619098901238684e-05, "loss": 0.07025456428527832, "step": 3114 }, { "epoch": 0.4211237853823405, "grad_norm": 0.3525427281856537, "learning_rate": 2.0612752272953158e-05, "loss": 0.05990409851074219, "step": 3115 }, { "epoch": 0.4212589776087875, "grad_norm": 0.48233169317245483, "learning_rate": 2.060640447606268e-05, "loss": 0.0691070556640625, "step": 3116 }, { "epoch": 0.4213941698352345, "grad_norm": 0.37629425525665283, "learning_rate": 2.0600055511888895e-05, "loss": 0.06235241889953613, "step": 3117 }, { "epoch": 0.42152936206168146, "grad_norm": 0.38955265283584595, "learning_rate": 2.059370538175369e-05, "loss": 0.07288360595703125, "step": 3118 }, { "epoch": 0.42166455428812843, "grad_norm": 0.3728174567222595, "learning_rate": 2.0587354086979194e-05, "loss": 0.07150459289550781, "step": 3119 }, { "epoch": 0.4217997465145754, "grad_norm": 0.22158977389335632, "learning_rate": 2.0581001628887785e-05, "loss": 0.05391716957092285, "step": 3120 }, { "epoch": 0.4219349387410224, "grad_norm": 0.5192732214927673, "learning_rate": 2.057464800880207e-05, "loss": 0.08232378959655762, "step": 3121 }, { "epoch": 0.42207013096746937, "grad_norm": 0.3635179102420807, "learning_rate": 2.0568293228044914e-05, "loss": 0.07523679733276367, "step": 3122 }, { "epoch": 0.42220532319391635, "grad_norm": 0.16220387816429138, "learning_rate": 2.0561937287939413e-05, "loss": 0.04125869274139404, "step": 3123 }, { "epoch": 0.4223405154203633, "grad_norm": 0.23348811268806458, "learning_rate": 2.055558018980891e-05, "loss": 0.0638723373413086, "step": 3124 }, { "epoch": 0.4224757076468103, "grad_norm": 0.334791362285614, "learning_rate": 2.0549221934976987e-05, "loss": 0.08350467681884766, "step": 3125 }, { "epoch": 0.4226108998732573, "grad_norm": 0.3081171214580536, "learning_rate": 2.054286252476746e-05, "loss": 0.07777190208435059, "step": 3126 }, { "epoch": 0.42274609209970426, "grad_norm": 0.3021962344646454, "learning_rate": 2.05365019605044e-05, "loss": 0.07677555084228516, "step": 3127 }, { "epoch": 0.42288128432615124, "grad_norm": 0.2252826690673828, "learning_rate": 2.053014024351211e-05, "loss": 0.06123685836791992, "step": 3128 }, { "epoch": 0.4230164765525982, "grad_norm": 0.17544758319854736, "learning_rate": 2.0523777375115133e-05, "loss": 0.047986507415771484, "step": 3129 }, { "epoch": 0.4231516687790452, "grad_norm": 0.25181517004966736, "learning_rate": 2.0517413356638245e-05, "loss": 0.04684591293334961, "step": 3130 }, { "epoch": 0.4232868610054922, "grad_norm": 0.6005727648735046, "learning_rate": 2.0511048189406472e-05, "loss": 0.10750675201416016, "step": 3131 }, { "epoch": 0.42342205323193916, "grad_norm": 0.4774467349052429, "learning_rate": 2.0504681874745082e-05, "loss": 0.0865640640258789, "step": 3132 }, { "epoch": 0.42355724545838613, "grad_norm": 0.33371490240097046, "learning_rate": 2.049831441397957e-05, "loss": 0.07904577255249023, "step": 3133 }, { "epoch": 0.4236924376848331, "grad_norm": 0.6800884008407593, "learning_rate": 2.0491945808435674e-05, "loss": 0.08980846405029297, "step": 3134 }, { "epoch": 0.4238276299112801, "grad_norm": 0.2587440609931946, "learning_rate": 2.048557605943938e-05, "loss": 0.03163552284240723, "step": 3135 }, { "epoch": 0.42396282213772707, "grad_norm": 0.28416770696640015, "learning_rate": 2.047920516831689e-05, "loss": 0.060605525970458984, "step": 3136 }, { "epoch": 0.42409801436417405, "grad_norm": 0.37363213300704956, "learning_rate": 2.047283313639467e-05, "loss": 0.10064458847045898, "step": 3137 }, { "epoch": 0.424233206590621, "grad_norm": 0.32091838121414185, "learning_rate": 2.0466459964999408e-05, "loss": 0.04574716091156006, "step": 3138 }, { "epoch": 0.424368398817068, "grad_norm": 0.3039965033531189, "learning_rate": 2.0460085655458025e-05, "loss": 0.05404996871948242, "step": 3139 }, { "epoch": 0.424503591043515, "grad_norm": 0.3287688195705414, "learning_rate": 2.0453710209097697e-05, "loss": 0.08441638946533203, "step": 3140 }, { "epoch": 0.42463878326996196, "grad_norm": 0.2386762499809265, "learning_rate": 2.044733362724582e-05, "loss": 0.06625723838806152, "step": 3141 }, { "epoch": 0.42477397549640894, "grad_norm": 0.21166974306106567, "learning_rate": 2.0440955911230028e-05, "loss": 0.06540441513061523, "step": 3142 }, { "epoch": 0.4249091677228559, "grad_norm": 0.2828345000743866, "learning_rate": 2.0434577062378203e-05, "loss": 0.09438323974609375, "step": 3143 }, { "epoch": 0.4250443599493029, "grad_norm": 0.2404760867357254, "learning_rate": 2.0428197082018458e-05, "loss": 0.05620098114013672, "step": 3144 }, { "epoch": 0.4251795521757499, "grad_norm": 0.33218252658843994, "learning_rate": 2.042181597147913e-05, "loss": 0.07691001892089844, "step": 3145 }, { "epoch": 0.42531474440219685, "grad_norm": 0.31662729382514954, "learning_rate": 2.0415433732088806e-05, "loss": 0.0770573616027832, "step": 3146 }, { "epoch": 0.42544993662864383, "grad_norm": 0.2540580630302429, "learning_rate": 2.0409050365176294e-05, "loss": 0.06334209442138672, "step": 3147 }, { "epoch": 0.4255851288550908, "grad_norm": 0.6130107045173645, "learning_rate": 2.0402665872070656e-05, "loss": 0.08261871337890625, "step": 3148 }, { "epoch": 0.4257203210815378, "grad_norm": 0.2526322603225708, "learning_rate": 2.0396280254101172e-05, "loss": 0.03117823600769043, "step": 3149 }, { "epoch": 0.42585551330798477, "grad_norm": 0.2598215341567993, "learning_rate": 2.0389893512597364e-05, "loss": 0.06298279762268066, "step": 3150 }, { "epoch": 0.42599070553443175, "grad_norm": 0.2517451345920563, "learning_rate": 2.0383505648888986e-05, "loss": 0.05115222930908203, "step": 3151 }, { "epoch": 0.4261258977608787, "grad_norm": 0.4152345359325409, "learning_rate": 2.037711666430602e-05, "loss": 0.06592178344726562, "step": 3152 }, { "epoch": 0.4262610899873257, "grad_norm": 0.21208588778972626, "learning_rate": 2.0370726560178693e-05, "loss": 0.06539249420166016, "step": 3153 }, { "epoch": 0.4263962822137727, "grad_norm": 0.3017374575138092, "learning_rate": 2.036433533783745e-05, "loss": 0.0654914379119873, "step": 3154 }, { "epoch": 0.42653147444021966, "grad_norm": 0.4026988744735718, "learning_rate": 2.0357942998612988e-05, "loss": 0.06147575378417969, "step": 3155 }, { "epoch": 0.4266666666666667, "grad_norm": 0.2728249728679657, "learning_rate": 2.0351549543836224e-05, "loss": 0.07100391387939453, "step": 3156 }, { "epoch": 0.4268018588931137, "grad_norm": 0.2211737185716629, "learning_rate": 2.0345154974838307e-05, "loss": 0.05893146991729736, "step": 3157 }, { "epoch": 0.42693705111956065, "grad_norm": 0.3204041123390198, "learning_rate": 2.0338759292950618e-05, "loss": 0.06424188613891602, "step": 3158 }, { "epoch": 0.42707224334600763, "grad_norm": 0.23772023618221283, "learning_rate": 2.033236249950477e-05, "loss": 0.04523515701293945, "step": 3159 }, { "epoch": 0.4272074355724546, "grad_norm": 0.24550877511501312, "learning_rate": 2.0325964595832618e-05, "loss": 0.055190324783325195, "step": 3160 }, { "epoch": 0.4273426277989016, "grad_norm": 0.3470247685909271, "learning_rate": 2.031956558326624e-05, "loss": 0.07321572303771973, "step": 3161 }, { "epoch": 0.42747782002534856, "grad_norm": 0.3611384928226471, "learning_rate": 2.0313165463137935e-05, "loss": 0.07228946685791016, "step": 3162 }, { "epoch": 0.42761301225179554, "grad_norm": 0.34616079926490784, "learning_rate": 2.030676423678025e-05, "loss": 0.07276630401611328, "step": 3163 }, { "epoch": 0.4277482044782425, "grad_norm": 0.18685780465602875, "learning_rate": 2.030036190552595e-05, "loss": 0.07238388061523438, "step": 3164 }, { "epoch": 0.4278833967046895, "grad_norm": 0.4286274313926697, "learning_rate": 2.029395847070803e-05, "loss": 0.0866384506225586, "step": 3165 }, { "epoch": 0.4280185889311365, "grad_norm": 0.34199005365371704, "learning_rate": 2.0287553933659735e-05, "loss": 0.06592819094657898, "step": 3166 }, { "epoch": 0.42815378115758346, "grad_norm": 0.4649122357368469, "learning_rate": 2.0281148295714512e-05, "loss": 0.06458044052124023, "step": 3167 }, { "epoch": 0.42828897338403044, "grad_norm": 0.2594263553619385, "learning_rate": 2.027474155820605e-05, "loss": 0.060778021812438965, "step": 3168 }, { "epoch": 0.4284241656104774, "grad_norm": 0.8002752661705017, "learning_rate": 2.026833372246827e-05, "loss": 0.10184288024902344, "step": 3169 }, { "epoch": 0.4285593578369244, "grad_norm": 0.37460413575172424, "learning_rate": 2.026192478983531e-05, "loss": 0.07912003993988037, "step": 3170 }, { "epoch": 0.42869455006337137, "grad_norm": 0.49152660369873047, "learning_rate": 2.0255514761641555e-05, "loss": 0.08054447174072266, "step": 3171 }, { "epoch": 0.42882974228981835, "grad_norm": 0.4222905933856964, "learning_rate": 2.0249103639221597e-05, "loss": 0.0560762882232666, "step": 3172 }, { "epoch": 0.42896493451626533, "grad_norm": 0.4432280361652374, "learning_rate": 2.024269142391027e-05, "loss": 0.057440757751464844, "step": 3173 }, { "epoch": 0.4291001267427123, "grad_norm": 0.3203895688056946, "learning_rate": 2.023627811704263e-05, "loss": 0.07187581062316895, "step": 3174 }, { "epoch": 0.4292353189691593, "grad_norm": 0.8808659315109253, "learning_rate": 2.0229863719953963e-05, "loss": 0.09990119934082031, "step": 3175 }, { "epoch": 0.42937051119560626, "grad_norm": 0.4019676148891449, "learning_rate": 2.0223448233979785e-05, "loss": 0.0510178804397583, "step": 3176 }, { "epoch": 0.42950570342205324, "grad_norm": 0.5386053919792175, "learning_rate": 2.0217031660455825e-05, "loss": 0.06357765197753906, "step": 3177 }, { "epoch": 0.4296408956485002, "grad_norm": 0.24994978308677673, "learning_rate": 2.0210614000718054e-05, "loss": 0.06611442565917969, "step": 3178 }, { "epoch": 0.4297760878749472, "grad_norm": 0.41393131017684937, "learning_rate": 2.020419525610266e-05, "loss": 0.06168675422668457, "step": 3179 }, { "epoch": 0.4299112801013942, "grad_norm": 0.2493085414171219, "learning_rate": 2.0197775427946066e-05, "loss": 0.06556558609008789, "step": 3180 }, { "epoch": 0.43004647232784116, "grad_norm": 0.3049503564834595, "learning_rate": 2.0191354517584902e-05, "loss": 0.09117889404296875, "step": 3181 }, { "epoch": 0.43018166455428813, "grad_norm": 0.5270341038703918, "learning_rate": 2.018493252635605e-05, "loss": 0.052628517150878906, "step": 3182 }, { "epoch": 0.4303168567807351, "grad_norm": 0.33322596549987793, "learning_rate": 2.0178509455596598e-05, "loss": 0.06375730037689209, "step": 3183 }, { "epoch": 0.4304520490071821, "grad_norm": 0.21174496412277222, "learning_rate": 2.017208530664386e-05, "loss": 0.05694985389709473, "step": 3184 }, { "epoch": 0.43058724123362907, "grad_norm": 0.23592528700828552, "learning_rate": 2.016566008083538e-05, "loss": 0.06638085842132568, "step": 3185 }, { "epoch": 0.43072243346007605, "grad_norm": 0.15597030520439148, "learning_rate": 2.0159233779508923e-05, "loss": 0.03405046463012695, "step": 3186 }, { "epoch": 0.430857625686523, "grad_norm": 0.31485578417778015, "learning_rate": 2.0152806404002482e-05, "loss": 0.06981182098388672, "step": 3187 }, { "epoch": 0.43099281791297, "grad_norm": 0.1980515718460083, "learning_rate": 2.014637795565427e-05, "loss": 0.056336402893066406, "step": 3188 }, { "epoch": 0.431128010139417, "grad_norm": 0.29888081550598145, "learning_rate": 2.0139948435802722e-05, "loss": 0.0712289810180664, "step": 3189 }, { "epoch": 0.43126320236586396, "grad_norm": 0.4977204203605652, "learning_rate": 2.0133517845786504e-05, "loss": 0.08624553680419922, "step": 3190 }, { "epoch": 0.43139839459231094, "grad_norm": 0.34856224060058594, "learning_rate": 2.012708618694449e-05, "loss": 0.10265088081359863, "step": 3191 }, { "epoch": 0.4315335868187579, "grad_norm": 0.2021336406469345, "learning_rate": 2.0120653460615795e-05, "loss": 0.048661231994628906, "step": 3192 }, { "epoch": 0.4316687790452049, "grad_norm": 0.26064735651016235, "learning_rate": 2.011421966813974e-05, "loss": 0.06618070602416992, "step": 3193 }, { "epoch": 0.4318039712716519, "grad_norm": 0.19883018732070923, "learning_rate": 2.0107784810855882e-05, "loss": 0.06342935562133789, "step": 3194 }, { "epoch": 0.43193916349809885, "grad_norm": 0.4939442574977875, "learning_rate": 2.0101348890103985e-05, "loss": 0.09639501571655273, "step": 3195 }, { "epoch": 0.43207435572454583, "grad_norm": 0.32421886920928955, "learning_rate": 2.0094911907224043e-05, "loss": 0.05952805280685425, "step": 3196 }, { "epoch": 0.4322095479509928, "grad_norm": 0.3773537278175354, "learning_rate": 2.008847386355628e-05, "loss": 0.05793642997741699, "step": 3197 }, { "epoch": 0.4323447401774398, "grad_norm": 0.45993101596832275, "learning_rate": 2.008203476044112e-05, "loss": 0.08371472358703613, "step": 3198 }, { "epoch": 0.43247993240388677, "grad_norm": 0.6151641607284546, "learning_rate": 2.007559459921922e-05, "loss": 0.1029520034790039, "step": 3199 }, { "epoch": 0.43261512463033375, "grad_norm": 0.30804434418678284, "learning_rate": 2.0069153381231456e-05, "loss": 0.07819843292236328, "step": 3200 }, { "epoch": 0.4327503168567807, "grad_norm": 0.30804434418678284, "learning_rate": 2.0062711107818933e-05, "loss": 0.06537199020385742, "step": 3201 }, { "epoch": 0.4328855090832277, "grad_norm": 0.37973085045814514, "learning_rate": 2.0056267780322953e-05, "loss": 0.07285261154174805, "step": 3202 }, { "epoch": 0.4330207013096747, "grad_norm": 0.3804246187210083, "learning_rate": 2.004982340008506e-05, "loss": 0.07038390636444092, "step": 3203 }, { "epoch": 0.43315589353612166, "grad_norm": 0.1897815465927124, "learning_rate": 2.004337796844701e-05, "loss": 0.05299806594848633, "step": 3204 }, { "epoch": 0.43329108576256864, "grad_norm": 0.5176883339881897, "learning_rate": 2.003693148675077e-05, "loss": 0.06395530700683594, "step": 3205 }, { "epoch": 0.4334262779890156, "grad_norm": 0.33159148693084717, "learning_rate": 2.003048395633853e-05, "loss": 0.06426000595092773, "step": 3206 }, { "epoch": 0.4335614702154626, "grad_norm": 0.4107198119163513, "learning_rate": 2.0024035378552708e-05, "loss": 0.08121180534362793, "step": 3207 }, { "epoch": 0.4336966624419096, "grad_norm": 0.215802401304245, "learning_rate": 2.001758575473593e-05, "loss": 0.053858280181884766, "step": 3208 }, { "epoch": 0.43383185466835655, "grad_norm": 0.1386597603559494, "learning_rate": 2.0011135086231042e-05, "loss": 0.029804706573486328, "step": 3209 }, { "epoch": 0.43396704689480353, "grad_norm": 0.28065362572669983, "learning_rate": 2.0004683374381104e-05, "loss": 0.07149219512939453, "step": 3210 }, { "epoch": 0.4341022391212505, "grad_norm": 0.1404481679201126, "learning_rate": 1.9998230620529395e-05, "loss": 0.03793168067932129, "step": 3211 }, { "epoch": 0.4342374313476975, "grad_norm": 0.2699950933456421, "learning_rate": 1.999177682601942e-05, "loss": 0.072113037109375, "step": 3212 }, { "epoch": 0.43437262357414447, "grad_norm": 0.3491198718547821, "learning_rate": 1.9985321992194896e-05, "loss": 0.06703424453735352, "step": 3213 }, { "epoch": 0.43450781580059145, "grad_norm": 0.22347047924995422, "learning_rate": 1.9978866120399746e-05, "loss": 0.044330596923828125, "step": 3214 }, { "epoch": 0.4346430080270384, "grad_norm": 0.44929203391075134, "learning_rate": 1.9972409211978116e-05, "loss": 0.08514881134033203, "step": 3215 }, { "epoch": 0.4347782002534854, "grad_norm": 0.3418920338153839, "learning_rate": 1.9965951268274373e-05, "loss": 0.08155107498168945, "step": 3216 }, { "epoch": 0.4349133924799324, "grad_norm": 0.34604474902153015, "learning_rate": 1.9959492290633093e-05, "loss": 0.06706786155700684, "step": 3217 }, { "epoch": 0.43504858470637936, "grad_norm": 0.4195420742034912, "learning_rate": 1.995303228039907e-05, "loss": 0.05675840377807617, "step": 3218 }, { "epoch": 0.43518377693282634, "grad_norm": 0.4107212424278259, "learning_rate": 1.994657123891732e-05, "loss": 0.061762332916259766, "step": 3219 }, { "epoch": 0.4353189691592733, "grad_norm": 0.2144065499305725, "learning_rate": 1.9940109167533055e-05, "loss": 0.05501842498779297, "step": 3220 }, { "epoch": 0.4354541613857203, "grad_norm": 0.23718760907649994, "learning_rate": 1.9933646067591716e-05, "loss": 0.0576167106628418, "step": 3221 }, { "epoch": 0.4355893536121673, "grad_norm": 0.543972373008728, "learning_rate": 1.992718194043896e-05, "loss": 0.0962066650390625, "step": 3222 }, { "epoch": 0.43572454583861425, "grad_norm": 0.2456401288509369, "learning_rate": 1.9920716787420643e-05, "loss": 0.06356048583984375, "step": 3223 }, { "epoch": 0.4358597380650613, "grad_norm": 0.4617266356945038, "learning_rate": 1.9914250609882858e-05, "loss": 0.09366726875305176, "step": 3224 }, { "epoch": 0.43599493029150826, "grad_norm": 0.5732520818710327, "learning_rate": 1.9907783409171885e-05, "loss": 0.12010478973388672, "step": 3225 }, { "epoch": 0.43613012251795524, "grad_norm": 0.3567442297935486, "learning_rate": 1.990131518663424e-05, "loss": 0.09979486465454102, "step": 3226 }, { "epoch": 0.4362653147444022, "grad_norm": 0.23667088150978088, "learning_rate": 1.9894845943616632e-05, "loss": 0.05428934097290039, "step": 3227 }, { "epoch": 0.4364005069708492, "grad_norm": 0.34958022832870483, "learning_rate": 1.988837568146599e-05, "loss": 0.08516740798950195, "step": 3228 }, { "epoch": 0.4365356991972962, "grad_norm": 0.34907564520835876, "learning_rate": 1.988190440152947e-05, "loss": 0.08010435104370117, "step": 3229 }, { "epoch": 0.43667089142374316, "grad_norm": 0.44578802585601807, "learning_rate": 1.9875432105154424e-05, "loss": 0.08909749984741211, "step": 3230 }, { "epoch": 0.43680608365019014, "grad_norm": 0.34709492325782776, "learning_rate": 1.9868958793688412e-05, "loss": 0.08998554944992065, "step": 3231 }, { "epoch": 0.4369412758766371, "grad_norm": 0.24281227588653564, "learning_rate": 1.9862484468479213e-05, "loss": 0.05726742744445801, "step": 3232 }, { "epoch": 0.4370764681030841, "grad_norm": 0.24110205471515656, "learning_rate": 1.985600913087482e-05, "loss": 0.038764238357543945, "step": 3233 }, { "epoch": 0.43721166032953107, "grad_norm": 0.210011288523674, "learning_rate": 1.9849532782223425e-05, "loss": 0.05803108215332031, "step": 3234 }, { "epoch": 0.43734685255597805, "grad_norm": 0.2844734191894531, "learning_rate": 1.9843055423873447e-05, "loss": 0.054401397705078125, "step": 3235 }, { "epoch": 0.43748204478242503, "grad_norm": 0.2790979743003845, "learning_rate": 1.9836577057173507e-05, "loss": 0.050627708435058594, "step": 3236 }, { "epoch": 0.437617237008872, "grad_norm": 0.20133322477340698, "learning_rate": 1.9830097683472427e-05, "loss": 0.06956779956817627, "step": 3237 }, { "epoch": 0.437752429235319, "grad_norm": 0.26941344141960144, "learning_rate": 1.9823617304119252e-05, "loss": 0.06499528884887695, "step": 3238 }, { "epoch": 0.43788762146176596, "grad_norm": 0.383767694234848, "learning_rate": 1.9817135920463232e-05, "loss": 0.07185077667236328, "step": 3239 }, { "epoch": 0.43802281368821294, "grad_norm": 0.2848849892616272, "learning_rate": 1.9810653533853826e-05, "loss": 0.07364368438720703, "step": 3240 }, { "epoch": 0.4381580059146599, "grad_norm": 0.2620222270488739, "learning_rate": 1.9804170145640706e-05, "loss": 0.06521177291870117, "step": 3241 }, { "epoch": 0.4382931981411069, "grad_norm": 0.7277887463569641, "learning_rate": 1.9797685757173737e-05, "loss": 0.0758504867553711, "step": 3242 }, { "epoch": 0.4384283903675539, "grad_norm": 0.21800507605075836, "learning_rate": 1.979120036980301e-05, "loss": 0.056427717208862305, "step": 3243 }, { "epoch": 0.43856358259400086, "grad_norm": 0.2958162724971771, "learning_rate": 1.9784713984878814e-05, "loss": 0.07983338832855225, "step": 3244 }, { "epoch": 0.43869877482044783, "grad_norm": 0.37943050265312195, "learning_rate": 1.9778226603751652e-05, "loss": 0.08130407333374023, "step": 3245 }, { "epoch": 0.4388339670468948, "grad_norm": 0.2472180873155594, "learning_rate": 1.9771738227772235e-05, "loss": 0.04934966564178467, "step": 3246 }, { "epoch": 0.4389691592733418, "grad_norm": 0.29032063484191895, "learning_rate": 1.976524885829147e-05, "loss": 0.05225777626037598, "step": 3247 }, { "epoch": 0.43910435149978877, "grad_norm": 0.21247722208499908, "learning_rate": 1.975875849666048e-05, "loss": 0.042376160621643066, "step": 3248 }, { "epoch": 0.43923954372623575, "grad_norm": 0.3725825250148773, "learning_rate": 1.9752267144230595e-05, "loss": 0.08426475524902344, "step": 3249 }, { "epoch": 0.4393747359526827, "grad_norm": 0.2907467782497406, "learning_rate": 1.9745774802353347e-05, "loss": 0.06331539154052734, "step": 3250 }, { "epoch": 0.4395099281791297, "grad_norm": 0.3804432451725006, "learning_rate": 1.973928147238048e-05, "loss": 0.07320070266723633, "step": 3251 }, { "epoch": 0.4396451204055767, "grad_norm": 0.21498610079288483, "learning_rate": 1.973278715566394e-05, "loss": 0.06844902038574219, "step": 3252 }, { "epoch": 0.43978031263202366, "grad_norm": 0.31550881266593933, "learning_rate": 1.972629185355587e-05, "loss": 0.055769920349121094, "step": 3253 }, { "epoch": 0.43991550485847064, "grad_norm": 1.6527247428894043, "learning_rate": 1.971979556740864e-05, "loss": 0.07448101043701172, "step": 3254 }, { "epoch": 0.4400506970849176, "grad_norm": 0.18900066614151, "learning_rate": 1.971329829857479e-05, "loss": 0.057601213455200195, "step": 3255 }, { "epoch": 0.4401858893113646, "grad_norm": 0.4061093032360077, "learning_rate": 1.9706800048407112e-05, "loss": 0.07447242736816406, "step": 3256 }, { "epoch": 0.4403210815378116, "grad_norm": 0.3093827962875366, "learning_rate": 1.9700300818258566e-05, "loss": 0.06330156326293945, "step": 3257 }, { "epoch": 0.44045627376425855, "grad_norm": 0.4635726511478424, "learning_rate": 1.9693800609482318e-05, "loss": 0.06341552734375, "step": 3258 }, { "epoch": 0.44059146599070553, "grad_norm": 0.36550334095954895, "learning_rate": 1.9687299423431754e-05, "loss": 0.07641315460205078, "step": 3259 }, { "epoch": 0.4407266582171525, "grad_norm": 0.400806725025177, "learning_rate": 1.968079726146045e-05, "loss": 0.05738019943237305, "step": 3260 }, { "epoch": 0.4408618504435995, "grad_norm": 0.30217820405960083, "learning_rate": 1.9674294124922204e-05, "loss": 0.06429815292358398, "step": 3261 }, { "epoch": 0.44099704267004647, "grad_norm": 0.2563503384590149, "learning_rate": 1.966779001517099e-05, "loss": 0.04334282875061035, "step": 3262 }, { "epoch": 0.44113223489649345, "grad_norm": 0.34525415301322937, "learning_rate": 1.9661284933561006e-05, "loss": 0.09828376770019531, "step": 3263 }, { "epoch": 0.4412674271229404, "grad_norm": 0.3323579728603363, "learning_rate": 1.9654778881446636e-05, "loss": 0.07301616668701172, "step": 3264 }, { "epoch": 0.4414026193493874, "grad_norm": 0.2360515296459198, "learning_rate": 1.9648271860182487e-05, "loss": 0.06273889541625977, "step": 3265 }, { "epoch": 0.4415378115758344, "grad_norm": 0.314397931098938, "learning_rate": 1.9641763871123345e-05, "loss": 0.053080081939697266, "step": 3266 }, { "epoch": 0.44167300380228136, "grad_norm": 0.2568463087081909, "learning_rate": 1.963525491562421e-05, "loss": 0.07741570472717285, "step": 3267 }, { "epoch": 0.44180819602872834, "grad_norm": 0.5656931400299072, "learning_rate": 1.9628744995040287e-05, "loss": 0.1472454071044922, "step": 3268 }, { "epoch": 0.4419433882551753, "grad_norm": 0.24615713953971863, "learning_rate": 1.9622234110726976e-05, "loss": 0.049279212951660156, "step": 3269 }, { "epoch": 0.4420785804816223, "grad_norm": 0.3402886390686035, "learning_rate": 1.9615722264039868e-05, "loss": 0.07629966735839844, "step": 3270 }, { "epoch": 0.4422137727080693, "grad_norm": 0.25823819637298584, "learning_rate": 1.9609209456334772e-05, "loss": 0.05963563919067383, "step": 3271 }, { "epoch": 0.44234896493451625, "grad_norm": 0.420064777135849, "learning_rate": 1.960269568896769e-05, "loss": 0.06271743774414062, "step": 3272 }, { "epoch": 0.44248415716096323, "grad_norm": 0.33517250418663025, "learning_rate": 1.9596180963294822e-05, "loss": 0.07872200012207031, "step": 3273 }, { "epoch": 0.4426193493874102, "grad_norm": 0.2894724905490875, "learning_rate": 1.9589665280672564e-05, "loss": 0.0593571662902832, "step": 3274 }, { "epoch": 0.4427545416138572, "grad_norm": 0.317278653383255, "learning_rate": 1.958314864245752e-05, "loss": 0.07319831848144531, "step": 3275 }, { "epoch": 0.44288973384030417, "grad_norm": 0.30626583099365234, "learning_rate": 1.957663105000649e-05, "loss": 0.06558942794799805, "step": 3276 }, { "epoch": 0.44302492606675115, "grad_norm": 0.37172073125839233, "learning_rate": 1.957011250467647e-05, "loss": 0.04667782783508301, "step": 3277 }, { "epoch": 0.4431601182931981, "grad_norm": 0.44745346903800964, "learning_rate": 1.9563593007824658e-05, "loss": 0.07155179977416992, "step": 3278 }, { "epoch": 0.4432953105196451, "grad_norm": 0.30978548526763916, "learning_rate": 1.9557072560808442e-05, "loss": 0.08234691619873047, "step": 3279 }, { "epoch": 0.4434305027460921, "grad_norm": 0.26966768503189087, "learning_rate": 1.9550551164985418e-05, "loss": 0.06200361251831055, "step": 3280 }, { "epoch": 0.44356569497253906, "grad_norm": 0.4869910776615143, "learning_rate": 1.9544028821713372e-05, "loss": 0.08661699295043945, "step": 3281 }, { "epoch": 0.44370088719898604, "grad_norm": 0.25459226965904236, "learning_rate": 1.9537505532350298e-05, "loss": 0.07809257507324219, "step": 3282 }, { "epoch": 0.443836079425433, "grad_norm": 0.3783869445323944, "learning_rate": 1.9530981298254376e-05, "loss": 0.06965303421020508, "step": 3283 }, { "epoch": 0.44397127165188, "grad_norm": 0.17915265262126923, "learning_rate": 1.9524456120783983e-05, "loss": 0.06292033195495605, "step": 3284 }, { "epoch": 0.444106463878327, "grad_norm": 0.2549220621585846, "learning_rate": 1.95179300012977e-05, "loss": 0.06046426296234131, "step": 3285 }, { "epoch": 0.44424165610477395, "grad_norm": 0.3083202540874481, "learning_rate": 1.9511402941154296e-05, "loss": 0.07697391510009766, "step": 3286 }, { "epoch": 0.44437684833122093, "grad_norm": 0.6192878484725952, "learning_rate": 1.950487494171274e-05, "loss": 0.09878063201904297, "step": 3287 }, { "epoch": 0.4445120405576679, "grad_norm": 0.4199250042438507, "learning_rate": 1.9498346004332203e-05, "loss": 0.07847166061401367, "step": 3288 }, { "epoch": 0.4446472327841149, "grad_norm": 0.2877029776573181, "learning_rate": 1.949181613037204e-05, "loss": 0.08483314514160156, "step": 3289 }, { "epoch": 0.44478242501056187, "grad_norm": 0.17623817920684814, "learning_rate": 1.9485285321191804e-05, "loss": 0.04591083526611328, "step": 3290 }, { "epoch": 0.44491761723700884, "grad_norm": 0.42056602239608765, "learning_rate": 1.9478753578151244e-05, "loss": 0.06670105457305908, "step": 3291 }, { "epoch": 0.4450528094634559, "grad_norm": 0.4309781491756439, "learning_rate": 1.9472220902610304e-05, "loss": 0.070465087890625, "step": 3292 }, { "epoch": 0.44518800168990286, "grad_norm": 0.14935748279094696, "learning_rate": 1.9465687295929127e-05, "loss": 0.03504657745361328, "step": 3293 }, { "epoch": 0.44532319391634984, "grad_norm": 0.21144822239875793, "learning_rate": 1.945915275946804e-05, "loss": 0.042441487312316895, "step": 3294 }, { "epoch": 0.4454583861427968, "grad_norm": 0.30384671688079834, "learning_rate": 1.9452617294587573e-05, "loss": 0.07517242431640625, "step": 3295 }, { "epoch": 0.4455935783692438, "grad_norm": 0.18729279935359955, "learning_rate": 1.9446080902648435e-05, "loss": 0.04670882225036621, "step": 3296 }, { "epoch": 0.44572877059569077, "grad_norm": 0.5563015937805176, "learning_rate": 1.943954358501154e-05, "loss": 0.0895843505859375, "step": 3297 }, { "epoch": 0.44586396282213775, "grad_norm": 0.342330664396286, "learning_rate": 1.943300534303801e-05, "loss": 0.05006873607635498, "step": 3298 }, { "epoch": 0.44599915504858473, "grad_norm": 0.30710843205451965, "learning_rate": 1.9426466178089116e-05, "loss": 0.06839275360107422, "step": 3299 }, { "epoch": 0.4461343472750317, "grad_norm": 0.3760884404182434, "learning_rate": 1.9419926091526367e-05, "loss": 0.09142160415649414, "step": 3300 }, { "epoch": 0.4462695395014787, "grad_norm": 0.5989261269569397, "learning_rate": 1.9413385084711425e-05, "loss": 0.09687232971191406, "step": 3301 }, { "epoch": 0.44640473172792566, "grad_norm": 0.2804635763168335, "learning_rate": 1.9406843159006183e-05, "loss": 0.064483642578125, "step": 3302 }, { "epoch": 0.44653992395437264, "grad_norm": 0.34838199615478516, "learning_rate": 1.940030031577269e-05, "loss": 0.057769775390625, "step": 3303 }, { "epoch": 0.4466751161808196, "grad_norm": 0.6450948715209961, "learning_rate": 1.9393756556373212e-05, "loss": 0.09352350234985352, "step": 3304 }, { "epoch": 0.4468103084072666, "grad_norm": 0.4629974365234375, "learning_rate": 1.9387211882170184e-05, "loss": 0.06857538223266602, "step": 3305 }, { "epoch": 0.4469455006337136, "grad_norm": 0.38311535120010376, "learning_rate": 1.9380666294526243e-05, "loss": 0.06781387329101562, "step": 3306 }, { "epoch": 0.44708069286016056, "grad_norm": 0.3830869495868683, "learning_rate": 1.9374119794804228e-05, "loss": 0.10775327682495117, "step": 3307 }, { "epoch": 0.44721588508660753, "grad_norm": 0.2984815835952759, "learning_rate": 1.936757238436714e-05, "loss": 0.0723876953125, "step": 3308 }, { "epoch": 0.4473510773130545, "grad_norm": 0.407243937253952, "learning_rate": 1.936102406457819e-05, "loss": 0.0800161361694336, "step": 3309 }, { "epoch": 0.4474862695395015, "grad_norm": 0.5078486204147339, "learning_rate": 1.935447483680078e-05, "loss": 0.09047174453735352, "step": 3310 }, { "epoch": 0.44762146176594847, "grad_norm": 0.2887296676635742, "learning_rate": 1.9347924702398484e-05, "loss": 0.04402494430541992, "step": 3311 }, { "epoch": 0.44775665399239545, "grad_norm": 0.3636722266674042, "learning_rate": 1.9341373662735075e-05, "loss": 0.07357645034790039, "step": 3312 }, { "epoch": 0.4478918462188424, "grad_norm": 0.22289150953292847, "learning_rate": 1.9334821719174524e-05, "loss": 0.054033517837524414, "step": 3313 }, { "epoch": 0.4480270384452894, "grad_norm": 0.35751694440841675, "learning_rate": 1.9328268873080974e-05, "loss": 0.10008716583251953, "step": 3314 }, { "epoch": 0.4481622306717364, "grad_norm": 0.2627313733100891, "learning_rate": 1.9321715125818765e-05, "loss": 0.07291555404663086, "step": 3315 }, { "epoch": 0.44829742289818336, "grad_norm": 0.24357503652572632, "learning_rate": 1.931516047875242e-05, "loss": 0.0700075626373291, "step": 3316 }, { "epoch": 0.44843261512463034, "grad_norm": 0.39955195784568787, "learning_rate": 1.930860493324665e-05, "loss": 0.07883906364440918, "step": 3317 }, { "epoch": 0.4485678073510773, "grad_norm": 0.22202704846858978, "learning_rate": 1.9302048490666356e-05, "loss": 0.059880733489990234, "step": 3318 }, { "epoch": 0.4487029995775243, "grad_norm": 0.42034509778022766, "learning_rate": 1.9295491152376623e-05, "loss": 0.06959772109985352, "step": 3319 }, { "epoch": 0.4488381918039713, "grad_norm": 0.4925479590892792, "learning_rate": 1.928893291974273e-05, "loss": 0.06459498405456543, "step": 3320 }, { "epoch": 0.44897338403041825, "grad_norm": 0.38164782524108887, "learning_rate": 1.9282373794130132e-05, "loss": 0.06588971614837646, "step": 3321 }, { "epoch": 0.44910857625686523, "grad_norm": 0.23238146305084229, "learning_rate": 1.9275813776904472e-05, "loss": 0.04213905334472656, "step": 3322 }, { "epoch": 0.4492437684833122, "grad_norm": 0.4110434949398041, "learning_rate": 1.9269252869431582e-05, "loss": 0.08344841003417969, "step": 3323 }, { "epoch": 0.4493789607097592, "grad_norm": 0.3393557667732239, "learning_rate": 1.9262691073077476e-05, "loss": 0.045320987701416016, "step": 3324 }, { "epoch": 0.44951415293620617, "grad_norm": 0.19677022099494934, "learning_rate": 1.9256128389208363e-05, "loss": 0.04906785488128662, "step": 3325 }, { "epoch": 0.44964934516265315, "grad_norm": 0.21272487938404083, "learning_rate": 1.924956481919062e-05, "loss": 0.056756019592285156, "step": 3326 }, { "epoch": 0.4497845373891001, "grad_norm": 0.28534969687461853, "learning_rate": 1.9243000364390825e-05, "loss": 0.07583838701248169, "step": 3327 }, { "epoch": 0.4499197296155471, "grad_norm": 0.22729046642780304, "learning_rate": 1.9236435026175717e-05, "loss": 0.044859886169433594, "step": 3328 }, { "epoch": 0.4500549218419941, "grad_norm": 0.32950401306152344, "learning_rate": 1.9229868805912248e-05, "loss": 0.07035517692565918, "step": 3329 }, { "epoch": 0.45019011406844106, "grad_norm": 0.285521924495697, "learning_rate": 1.9223301704967543e-05, "loss": 0.06944990158081055, "step": 3330 }, { "epoch": 0.45032530629488804, "grad_norm": 0.5533238053321838, "learning_rate": 1.92167337247089e-05, "loss": 0.09995520114898682, "step": 3331 }, { "epoch": 0.450460498521335, "grad_norm": 0.15855996310710907, "learning_rate": 1.9210164866503808e-05, "loss": 0.047154903411865234, "step": 3332 }, { "epoch": 0.450595690747782, "grad_norm": 0.19494955241680145, "learning_rate": 1.9203595131719935e-05, "loss": 0.049901723861694336, "step": 3333 }, { "epoch": 0.450730882974229, "grad_norm": 0.31304165720939636, "learning_rate": 1.9197024521725148e-05, "loss": 0.08922672271728516, "step": 3334 }, { "epoch": 0.45086607520067595, "grad_norm": 0.4357836842536926, "learning_rate": 1.9190453037887465e-05, "loss": 0.1002645492553711, "step": 3335 }, { "epoch": 0.45100126742712293, "grad_norm": 0.33210405707359314, "learning_rate": 1.918388068157512e-05, "loss": 0.05168336629867554, "step": 3336 }, { "epoch": 0.4511364596535699, "grad_norm": 0.2215290665626526, "learning_rate": 1.9177307454156507e-05, "loss": 0.06288552284240723, "step": 3337 }, { "epoch": 0.4512716518800169, "grad_norm": 0.4807816743850708, "learning_rate": 1.9170733357000202e-05, "loss": 0.0641336441040039, "step": 3338 }, { "epoch": 0.45140684410646387, "grad_norm": 0.2763362228870392, "learning_rate": 1.916415839147497e-05, "loss": 0.06133127212524414, "step": 3339 }, { "epoch": 0.45154203633291085, "grad_norm": 0.3626943826675415, "learning_rate": 1.9157582558949756e-05, "loss": 0.04251909255981445, "step": 3340 }, { "epoch": 0.4516772285593578, "grad_norm": 0.409523069858551, "learning_rate": 1.9151005860793682e-05, "loss": 0.07260751724243164, "step": 3341 }, { "epoch": 0.4518124207858048, "grad_norm": 0.3742903769016266, "learning_rate": 1.9144428298376056e-05, "loss": 0.09168076515197754, "step": 3342 }, { "epoch": 0.4519476130122518, "grad_norm": 0.3158626854419708, "learning_rate": 1.9137849873066355e-05, "loss": 0.07207798957824707, "step": 3343 }, { "epoch": 0.45208280523869876, "grad_norm": 0.5290866494178772, "learning_rate": 1.9131270586234243e-05, "loss": 0.08540534973144531, "step": 3344 }, { "epoch": 0.45221799746514574, "grad_norm": 0.33754172921180725, "learning_rate": 1.9124690439249564e-05, "loss": 0.06018531322479248, "step": 3345 }, { "epoch": 0.4523531896915927, "grad_norm": 0.21841798722743988, "learning_rate": 1.9118109433482342e-05, "loss": 0.04831266403198242, "step": 3346 }, { "epoch": 0.4524883819180397, "grad_norm": 0.3793192505836487, "learning_rate": 1.911152757030278e-05, "loss": 0.06748485565185547, "step": 3347 }, { "epoch": 0.4526235741444867, "grad_norm": 0.17827394604682922, "learning_rate": 1.9104944851081247e-05, "loss": 0.044198036193847656, "step": 3348 }, { "epoch": 0.45275876637093365, "grad_norm": 0.27998778223991394, "learning_rate": 1.9098361277188303e-05, "loss": 0.06831824779510498, "step": 3349 }, { "epoch": 0.45289395859738063, "grad_norm": 0.34288692474365234, "learning_rate": 1.909177684999469e-05, "loss": 0.05758047103881836, "step": 3350 }, { "epoch": 0.4530291508238276, "grad_norm": 0.2111118584871292, "learning_rate": 1.9085191570871316e-05, "loss": 0.056126147508621216, "step": 3351 }, { "epoch": 0.4531643430502746, "grad_norm": 0.3934229910373688, "learning_rate": 1.9078605441189275e-05, "loss": 0.07675504684448242, "step": 3352 }, { "epoch": 0.45329953527672157, "grad_norm": 0.23766179382801056, "learning_rate": 1.9072018462319828e-05, "loss": 0.054659128189086914, "step": 3353 }, { "epoch": 0.45343472750316854, "grad_norm": 0.530798077583313, "learning_rate": 1.9065430635634422e-05, "loss": 0.08081603050231934, "step": 3354 }, { "epoch": 0.4535699197296155, "grad_norm": 0.244573712348938, "learning_rate": 1.9058841962504677e-05, "loss": 0.057622432708740234, "step": 3355 }, { "epoch": 0.4537051119560625, "grad_norm": 0.16016773879528046, "learning_rate": 1.9052252444302394e-05, "loss": 0.04063546657562256, "step": 3356 }, { "epoch": 0.4538403041825095, "grad_norm": 0.15886633098125458, "learning_rate": 1.904566208239954e-05, "loss": 0.05351591110229492, "step": 3357 }, { "epoch": 0.45397549640895646, "grad_norm": 0.556342363357544, "learning_rate": 1.903907087816827e-05, "loss": 0.07323169708251953, "step": 3358 }, { "epoch": 0.45411068863540344, "grad_norm": 0.33654287457466125, "learning_rate": 1.9032478832980902e-05, "loss": 0.08651256561279297, "step": 3359 }, { "epoch": 0.45424588086185047, "grad_norm": 0.23699747025966644, "learning_rate": 1.9025885948209938e-05, "loss": 0.06017589569091797, "step": 3360 }, { "epoch": 0.45438107308829745, "grad_norm": 0.5729971528053284, "learning_rate": 1.901929222522805e-05, "loss": 0.1022958755493164, "step": 3361 }, { "epoch": 0.4545162653147444, "grad_norm": 0.28968188166618347, "learning_rate": 1.901269766540809e-05, "loss": 0.0852656364440918, "step": 3362 }, { "epoch": 0.4546514575411914, "grad_norm": 0.15169383585453033, "learning_rate": 1.9006102270123076e-05, "loss": 0.03434610366821289, "step": 3363 }, { "epoch": 0.4547866497676384, "grad_norm": 0.33873650431632996, "learning_rate": 1.8999506040746208e-05, "loss": 0.054129600524902344, "step": 3364 }, { "epoch": 0.45492184199408536, "grad_norm": 0.30081894993782043, "learning_rate": 1.899290897865085e-05, "loss": 0.064056396484375, "step": 3365 }, { "epoch": 0.45505703422053234, "grad_norm": 0.21700730919837952, "learning_rate": 1.898631108521055e-05, "loss": 0.06077003479003906, "step": 3366 }, { "epoch": 0.4551922264469793, "grad_norm": 0.16849905252456665, "learning_rate": 1.8979712361799027e-05, "loss": 0.050501346588134766, "step": 3367 }, { "epoch": 0.4553274186734263, "grad_norm": 0.46449747681617737, "learning_rate": 1.8973112809790168e-05, "loss": 0.0909113883972168, "step": 3368 }, { "epoch": 0.4554626108998733, "grad_norm": 0.3328554630279541, "learning_rate": 1.8966512430558036e-05, "loss": 0.08860397338867188, "step": 3369 }, { "epoch": 0.45559780312632026, "grad_norm": 0.3225538432598114, "learning_rate": 1.8959911225476858e-05, "loss": 0.05960369110107422, "step": 3370 }, { "epoch": 0.45573299535276723, "grad_norm": 0.29006409645080566, "learning_rate": 1.895330919592105e-05, "loss": 0.05233478546142578, "step": 3371 }, { "epoch": 0.4558681875792142, "grad_norm": 0.30673539638519287, "learning_rate": 1.8946706343265187e-05, "loss": 0.07146930694580078, "step": 3372 }, { "epoch": 0.4560033798056612, "grad_norm": 0.3561408519744873, "learning_rate": 1.8940102668884016e-05, "loss": 0.09210491180419922, "step": 3373 }, { "epoch": 0.45613857203210817, "grad_norm": 0.30946722626686096, "learning_rate": 1.893349817415246e-05, "loss": 0.06884050369262695, "step": 3374 }, { "epoch": 0.45627376425855515, "grad_norm": 0.20836809277534485, "learning_rate": 1.8926892860445607e-05, "loss": 0.05988717079162598, "step": 3375 }, { "epoch": 0.4564089564850021, "grad_norm": 0.42764633893966675, "learning_rate": 1.8920286729138718e-05, "loss": 0.06943464279174805, "step": 3376 }, { "epoch": 0.4565441487114491, "grad_norm": 0.2909482717514038, "learning_rate": 1.8913679781607225e-05, "loss": 0.0875391960144043, "step": 3377 }, { "epoch": 0.4566793409378961, "grad_norm": 0.4012003540992737, "learning_rate": 1.8907072019226734e-05, "loss": 0.07361888885498047, "step": 3378 }, { "epoch": 0.45681453316434306, "grad_norm": 0.33011263608932495, "learning_rate": 1.8900463443373015e-05, "loss": 0.06682038307189941, "step": 3379 }, { "epoch": 0.45694972539079004, "grad_norm": 0.273349791765213, "learning_rate": 1.889385405542201e-05, "loss": 0.06336069107055664, "step": 3380 }, { "epoch": 0.457084917617237, "grad_norm": 0.4372940957546234, "learning_rate": 1.8887243856749816e-05, "loss": 0.09548282623291016, "step": 3381 }, { "epoch": 0.457220109843684, "grad_norm": 0.269704669713974, "learning_rate": 1.8880632848732723e-05, "loss": 0.06457805633544922, "step": 3382 }, { "epoch": 0.457355302070131, "grad_norm": 0.447592556476593, "learning_rate": 1.8874021032747185e-05, "loss": 0.0651707649230957, "step": 3383 }, { "epoch": 0.45749049429657795, "grad_norm": 0.36614465713500977, "learning_rate": 1.8867408410169803e-05, "loss": 0.04779016971588135, "step": 3384 }, { "epoch": 0.45762568652302493, "grad_norm": 0.5801144242286682, "learning_rate": 1.886079498237737e-05, "loss": 0.10569000244140625, "step": 3385 }, { "epoch": 0.4577608787494719, "grad_norm": 0.3168382942676544, "learning_rate": 1.885418075074683e-05, "loss": 0.07435190677642822, "step": 3386 }, { "epoch": 0.4578960709759189, "grad_norm": 0.5302079916000366, "learning_rate": 1.884756571665531e-05, "loss": 0.08707332611083984, "step": 3387 }, { "epoch": 0.45803126320236587, "grad_norm": 0.3065987527370453, "learning_rate": 1.8840949881480085e-05, "loss": 0.08809423446655273, "step": 3388 }, { "epoch": 0.45816645542881285, "grad_norm": 0.25234851241111755, "learning_rate": 1.8834333246598613e-05, "loss": 0.05010044574737549, "step": 3389 }, { "epoch": 0.4583016476552598, "grad_norm": 0.2220805585384369, "learning_rate": 1.8827715813388514e-05, "loss": 0.0434725284576416, "step": 3390 }, { "epoch": 0.4584368398817068, "grad_norm": 0.5903912782669067, "learning_rate": 1.8821097583227572e-05, "loss": 0.07428836822509766, "step": 3391 }, { "epoch": 0.4585720321081538, "grad_norm": 0.38121965527534485, "learning_rate": 1.8814478557493732e-05, "loss": 0.07053303718566895, "step": 3392 }, { "epoch": 0.45870722433460076, "grad_norm": 0.45263683795928955, "learning_rate": 1.8807858737565118e-05, "loss": 0.07894706726074219, "step": 3393 }, { "epoch": 0.45884241656104774, "grad_norm": 0.4440828561782837, "learning_rate": 1.880123812482001e-05, "loss": 0.09063720703125, "step": 3394 }, { "epoch": 0.4589776087874947, "grad_norm": 0.3302997350692749, "learning_rate": 1.8794616720636853e-05, "loss": 0.05445122718811035, "step": 3395 }, { "epoch": 0.4591128010139417, "grad_norm": 0.2598004639148712, "learning_rate": 1.8787994526394257e-05, "loss": 0.06152057647705078, "step": 3396 }, { "epoch": 0.4592479932403887, "grad_norm": 0.3922182619571686, "learning_rate": 1.8781371543471e-05, "loss": 0.057865142822265625, "step": 3397 }, { "epoch": 0.45938318546683565, "grad_norm": 0.21379107236862183, "learning_rate": 1.8774747773246024e-05, "loss": 0.03534579277038574, "step": 3398 }, { "epoch": 0.45951837769328263, "grad_norm": 0.3878244459629059, "learning_rate": 1.8768123217098438e-05, "loss": 0.057530879974365234, "step": 3399 }, { "epoch": 0.4596535699197296, "grad_norm": 0.5795989036560059, "learning_rate": 1.8761497876407496e-05, "loss": 0.07978522777557373, "step": 3400 }, { "epoch": 0.4597887621461766, "grad_norm": 0.23303039371967316, "learning_rate": 1.8754871752552646e-05, "loss": 0.06266593933105469, "step": 3401 }, { "epoch": 0.45992395437262357, "grad_norm": 0.17354288697242737, "learning_rate": 1.8748244846913463e-05, "loss": 0.036647796630859375, "step": 3402 }, { "epoch": 0.46005914659907055, "grad_norm": 0.2165619432926178, "learning_rate": 1.874161716086972e-05, "loss": 0.0589756965637207, "step": 3403 }, { "epoch": 0.4601943388255175, "grad_norm": 0.4398226737976074, "learning_rate": 1.8734988695801333e-05, "loss": 0.08697986602783203, "step": 3404 }, { "epoch": 0.4603295310519645, "grad_norm": 0.3662380874156952, "learning_rate": 1.8728359453088382e-05, "loss": 0.05473041534423828, "step": 3405 }, { "epoch": 0.4604647232784115, "grad_norm": 0.16858461499214172, "learning_rate": 1.8721729434111108e-05, "loss": 0.037944793701171875, "step": 3406 }, { "epoch": 0.46059991550485846, "grad_norm": 0.31228041648864746, "learning_rate": 1.871509864024992e-05, "loss": 0.08854866027832031, "step": 3407 }, { "epoch": 0.46073510773130544, "grad_norm": 0.25661909580230713, "learning_rate": 1.8708467072885385e-05, "loss": 0.07062864303588867, "step": 3408 }, { "epoch": 0.4608702999577524, "grad_norm": 0.2618213891983032, "learning_rate": 1.8701834733398227e-05, "loss": 0.07455968856811523, "step": 3409 }, { "epoch": 0.4610054921841994, "grad_norm": 0.3570909798145294, "learning_rate": 1.8695201623169335e-05, "loss": 0.049992918968200684, "step": 3410 }, { "epoch": 0.4611406844106464, "grad_norm": 0.34414079785346985, "learning_rate": 1.868856774357977e-05, "loss": 0.07537531852722168, "step": 3411 }, { "epoch": 0.46127587663709335, "grad_norm": 0.5453190803527832, "learning_rate": 1.868193309601072e-05, "loss": 0.06511664390563965, "step": 3412 }, { "epoch": 0.46141106886354033, "grad_norm": 0.16751492023468018, "learning_rate": 1.867529768184357e-05, "loss": 0.05666553974151611, "step": 3413 }, { "epoch": 0.4615462610899873, "grad_norm": 0.17946866154670715, "learning_rate": 1.8668661502459842e-05, "loss": 0.04185080528259277, "step": 3414 }, { "epoch": 0.4616814533164343, "grad_norm": 0.17078571021556854, "learning_rate": 1.866202455924123e-05, "loss": 0.04868030548095703, "step": 3415 }, { "epoch": 0.46181664554288127, "grad_norm": 0.22714941203594208, "learning_rate": 1.865538685356957e-05, "loss": 0.06744003295898438, "step": 3416 }, { "epoch": 0.46195183776932824, "grad_norm": 0.3168025314807892, "learning_rate": 1.864874838682688e-05, "loss": 0.08167481422424316, "step": 3417 }, { "epoch": 0.4620870299957752, "grad_norm": 0.3245197832584381, "learning_rate": 1.864210916039531e-05, "loss": 0.08807134628295898, "step": 3418 }, { "epoch": 0.4622222222222222, "grad_norm": 0.6082145571708679, "learning_rate": 1.86354691756572e-05, "loss": 0.07672929763793945, "step": 3419 }, { "epoch": 0.4623574144486692, "grad_norm": 0.4219649136066437, "learning_rate": 1.8628828433995013e-05, "loss": 0.061176180839538574, "step": 3420 }, { "epoch": 0.46249260667511616, "grad_norm": 0.5252535939216614, "learning_rate": 1.86221869367914e-05, "loss": 0.10416841506958008, "step": 3421 }, { "epoch": 0.46262779890156314, "grad_norm": 0.16591042280197144, "learning_rate": 1.8615544685429153e-05, "loss": 0.0382232666015625, "step": 3422 }, { "epoch": 0.4627629911280101, "grad_norm": 0.3893396854400635, "learning_rate": 1.860890168129122e-05, "loss": 0.0856313705444336, "step": 3423 }, { "epoch": 0.4628981833544571, "grad_norm": 0.2738642990589142, "learning_rate": 1.8602257925760708e-05, "loss": 0.05086398124694824, "step": 3424 }, { "epoch": 0.46303337558090407, "grad_norm": 0.22418640553951263, "learning_rate": 1.859561342022089e-05, "loss": 0.05542564392089844, "step": 3425 }, { "epoch": 0.46316856780735105, "grad_norm": 0.2757858633995056, "learning_rate": 1.8588968166055185e-05, "loss": 0.04795694351196289, "step": 3426 }, { "epoch": 0.4633037600337981, "grad_norm": 0.37701842188835144, "learning_rate": 1.8582322164647166e-05, "loss": 0.06206989288330078, "step": 3427 }, { "epoch": 0.46343895226024506, "grad_norm": 0.29527583718299866, "learning_rate": 1.8575675417380568e-05, "loss": 0.05865907669067383, "step": 3428 }, { "epoch": 0.46357414448669204, "grad_norm": 0.554803192615509, "learning_rate": 1.856902792563928e-05, "loss": 0.08922004699707031, "step": 3429 }, { "epoch": 0.463709336713139, "grad_norm": 0.4446096420288086, "learning_rate": 1.856237969080735e-05, "loss": 0.09359264373779297, "step": 3430 }, { "epoch": 0.463844528939586, "grad_norm": 0.2654801607131958, "learning_rate": 1.8555730714268967e-05, "loss": 0.04187750816345215, "step": 3431 }, { "epoch": 0.463979721166033, "grad_norm": 0.29031580686569214, "learning_rate": 1.8549080997408492e-05, "loss": 0.0662527084350586, "step": 3432 }, { "epoch": 0.46411491339247996, "grad_norm": 0.3369176387786865, "learning_rate": 1.8542430541610426e-05, "loss": 0.06745195388793945, "step": 3433 }, { "epoch": 0.46425010561892693, "grad_norm": 0.32542991638183594, "learning_rate": 1.8535779348259428e-05, "loss": 0.06024777889251709, "step": 3434 }, { "epoch": 0.4643852978453739, "grad_norm": 0.19965492188930511, "learning_rate": 1.852912741874032e-05, "loss": 0.040612220764160156, "step": 3435 }, { "epoch": 0.4645204900718209, "grad_norm": 0.23951001465320587, "learning_rate": 1.8522474754438056e-05, "loss": 0.06544429063796997, "step": 3436 }, { "epoch": 0.46465568229826787, "grad_norm": 0.351089209318161, "learning_rate": 1.851582135673777e-05, "loss": 0.05725383758544922, "step": 3437 }, { "epoch": 0.46479087452471485, "grad_norm": 0.2836418151855469, "learning_rate": 1.850916722702473e-05, "loss": 0.06226921081542969, "step": 3438 }, { "epoch": 0.4649260667511618, "grad_norm": 0.3764338791370392, "learning_rate": 1.8502512366684355e-05, "loss": 0.05451464653015137, "step": 3439 }, { "epoch": 0.4650612589776088, "grad_norm": 0.25200191140174866, "learning_rate": 1.8495856777102232e-05, "loss": 0.0630868673324585, "step": 3440 }, { "epoch": 0.4651964512040558, "grad_norm": 0.35389888286590576, "learning_rate": 1.848920045966408e-05, "loss": 0.05103874206542969, "step": 3441 }, { "epoch": 0.46533164343050276, "grad_norm": 0.23646841943264008, "learning_rate": 1.8482543415755797e-05, "loss": 0.05875825881958008, "step": 3442 }, { "epoch": 0.46546683565694974, "grad_norm": 0.30320239067077637, "learning_rate": 1.8475885646763394e-05, "loss": 0.05823349952697754, "step": 3443 }, { "epoch": 0.4656020278833967, "grad_norm": 0.18037791550159454, "learning_rate": 1.8469227154073064e-05, "loss": 0.04271292686462402, "step": 3444 }, { "epoch": 0.4657372201098437, "grad_norm": 0.41756686568260193, "learning_rate": 1.8462567939071142e-05, "loss": 0.05832624435424805, "step": 3445 }, { "epoch": 0.4658724123362907, "grad_norm": 0.5267950296401978, "learning_rate": 1.845590800314411e-05, "loss": 0.07138395309448242, "step": 3446 }, { "epoch": 0.46600760456273765, "grad_norm": 0.30034756660461426, "learning_rate": 1.8449247347678607e-05, "loss": 0.055419921875, "step": 3447 }, { "epoch": 0.46614279678918463, "grad_norm": 0.27279147505760193, "learning_rate": 1.8442585974061405e-05, "loss": 0.0683819055557251, "step": 3448 }, { "epoch": 0.4662779890156316, "grad_norm": 0.28086161613464355, "learning_rate": 1.8435923883679452e-05, "loss": 0.051483154296875, "step": 3449 }, { "epoch": 0.4664131812420786, "grad_norm": 0.2908973693847656, "learning_rate": 1.8429261077919818e-05, "loss": 0.06496095657348633, "step": 3450 }, { "epoch": 0.46654837346852557, "grad_norm": 0.5070411562919617, "learning_rate": 1.8422597558169742e-05, "loss": 0.07939910888671875, "step": 3451 }, { "epoch": 0.46668356569497255, "grad_norm": 0.4878532588481903, "learning_rate": 1.84159333258166e-05, "loss": 0.08448600769042969, "step": 3452 }, { "epoch": 0.4668187579214195, "grad_norm": 0.49835166335105896, "learning_rate": 1.8409268382247933e-05, "loss": 0.06260395050048828, "step": 3453 }, { "epoch": 0.4669539501478665, "grad_norm": 0.24069352447986603, "learning_rate": 1.8402602728851405e-05, "loss": 0.042691707611083984, "step": 3454 }, { "epoch": 0.4670891423743135, "grad_norm": 0.13667982816696167, "learning_rate": 1.839593636701484e-05, "loss": 0.02815401554107666, "step": 3455 }, { "epoch": 0.46722433460076046, "grad_norm": 0.25796830654144287, "learning_rate": 1.8389269298126214e-05, "loss": 0.047643184661865234, "step": 3456 }, { "epoch": 0.46735952682720744, "grad_norm": 0.4226883053779602, "learning_rate": 1.838260152357365e-05, "loss": 0.0767965316772461, "step": 3457 }, { "epoch": 0.4674947190536544, "grad_norm": 0.4205719828605652, "learning_rate": 1.837593304474541e-05, "loss": 0.0598905086517334, "step": 3458 }, { "epoch": 0.4676299112801014, "grad_norm": 0.27018600702285767, "learning_rate": 1.836926386302991e-05, "loss": 0.0561213493347168, "step": 3459 }, { "epoch": 0.4677651035065484, "grad_norm": 0.520823061466217, "learning_rate": 1.8362593979815696e-05, "loss": 0.06141042709350586, "step": 3460 }, { "epoch": 0.46790029573299535, "grad_norm": 0.17237596213817596, "learning_rate": 1.8355923396491496e-05, "loss": 0.04550826549530029, "step": 3461 }, { "epoch": 0.46803548795944233, "grad_norm": 0.21555861830711365, "learning_rate": 1.8349252114446138e-05, "loss": 0.03649139404296875, "step": 3462 }, { "epoch": 0.4681706801858893, "grad_norm": 0.3433397710323334, "learning_rate": 1.834258013506864e-05, "loss": 0.07322883605957031, "step": 3463 }, { "epoch": 0.4683058724123363, "grad_norm": 0.27201583981513977, "learning_rate": 1.833590745974813e-05, "loss": 0.042952537536621094, "step": 3464 }, { "epoch": 0.46844106463878327, "grad_norm": 0.23703953623771667, "learning_rate": 1.8329234089873892e-05, "loss": 0.0563054084777832, "step": 3465 }, { "epoch": 0.46857625686523025, "grad_norm": 0.3415151834487915, "learning_rate": 1.8322560026835366e-05, "loss": 0.08687686920166016, "step": 3466 }, { "epoch": 0.4687114490916772, "grad_norm": 0.22360317409038544, "learning_rate": 1.8315885272022125e-05, "loss": 0.04466962814331055, "step": 3467 }, { "epoch": 0.4688466413181242, "grad_norm": 0.24439865350723267, "learning_rate": 1.830920982682389e-05, "loss": 0.058303654193878174, "step": 3468 }, { "epoch": 0.4689818335445712, "grad_norm": 0.1943369358778, "learning_rate": 1.830253369263052e-05, "loss": 0.04276081919670105, "step": 3469 }, { "epoch": 0.46911702577101816, "grad_norm": 0.39875328540802, "learning_rate": 1.8295856870832024e-05, "loss": 0.07946610450744629, "step": 3470 }, { "epoch": 0.46925221799746514, "grad_norm": 0.16692864894866943, "learning_rate": 1.828917936281855e-05, "loss": 0.04334312677383423, "step": 3471 }, { "epoch": 0.4693874102239121, "grad_norm": 0.2114153653383255, "learning_rate": 1.8282501169980396e-05, "loss": 0.03536951541900635, "step": 3472 }, { "epoch": 0.4695226024503591, "grad_norm": 0.36615613102912903, "learning_rate": 1.8275822293707992e-05, "loss": 0.08063077926635742, "step": 3473 }, { "epoch": 0.4696577946768061, "grad_norm": 0.24879881739616394, "learning_rate": 1.8269142735391917e-05, "loss": 0.05558371543884277, "step": 3474 }, { "epoch": 0.46979298690325305, "grad_norm": 0.316879004240036, "learning_rate": 1.8262462496422893e-05, "loss": 0.050569355487823486, "step": 3475 }, { "epoch": 0.46992817912970003, "grad_norm": 0.5125378370285034, "learning_rate": 1.8255781578191778e-05, "loss": 0.07561302185058594, "step": 3476 }, { "epoch": 0.470063371356147, "grad_norm": 0.4138695299625397, "learning_rate": 1.824909998208958e-05, "loss": 0.08216714859008789, "step": 3477 }, { "epoch": 0.470198563582594, "grad_norm": 0.25304940342903137, "learning_rate": 1.8242417709507434e-05, "loss": 0.046431541442871094, "step": 3478 }, { "epoch": 0.47033375580904097, "grad_norm": 0.3585839569568634, "learning_rate": 1.8235734761836637e-05, "loss": 0.06269121170043945, "step": 3479 }, { "epoch": 0.47046894803548794, "grad_norm": 0.19518537819385529, "learning_rate": 1.82290511404686e-05, "loss": 0.04918479919433594, "step": 3480 }, { "epoch": 0.4706041402619349, "grad_norm": 0.4402546286582947, "learning_rate": 1.8222366846794904e-05, "loss": 0.08723926544189453, "step": 3481 }, { "epoch": 0.4707393324883819, "grad_norm": 0.27418094873428345, "learning_rate": 1.8215681882207238e-05, "loss": 0.06601524353027344, "step": 3482 }, { "epoch": 0.4708745247148289, "grad_norm": 0.24436897039413452, "learning_rate": 1.8208996248097462e-05, "loss": 0.050644636154174805, "step": 3483 }, { "epoch": 0.47100971694127586, "grad_norm": 0.3325497806072235, "learning_rate": 1.8202309945857557e-05, "loss": 0.04599761962890625, "step": 3484 }, { "epoch": 0.47114490916772284, "grad_norm": 0.33765271306037903, "learning_rate": 1.8195622976879638e-05, "loss": 0.04256880283355713, "step": 3485 }, { "epoch": 0.4712801013941698, "grad_norm": 0.20836441218852997, "learning_rate": 1.8188935342555977e-05, "loss": 0.04271090030670166, "step": 3486 }, { "epoch": 0.4714152936206168, "grad_norm": 0.1930689960718155, "learning_rate": 1.818224704427897e-05, "loss": 0.05206632614135742, "step": 3487 }, { "epoch": 0.47155048584706377, "grad_norm": 0.5138967037200928, "learning_rate": 1.8175558083441162e-05, "loss": 0.08325481414794922, "step": 3488 }, { "epoch": 0.47168567807351075, "grad_norm": 0.23282721638679504, "learning_rate": 1.8168868461435225e-05, "loss": 0.06511831283569336, "step": 3489 }, { "epoch": 0.47182087029995773, "grad_norm": 0.3169877231121063, "learning_rate": 1.8162178179653977e-05, "loss": 0.06747961044311523, "step": 3490 }, { "epoch": 0.4719560625264047, "grad_norm": 0.1433219462633133, "learning_rate": 1.815548723949037e-05, "loss": 0.03924417495727539, "step": 3491 }, { "epoch": 0.4720912547528517, "grad_norm": 0.5342644453048706, "learning_rate": 1.814879564233749e-05, "loss": 0.06855583190917969, "step": 3492 }, { "epoch": 0.47222644697929866, "grad_norm": 0.2186155617237091, "learning_rate": 1.8142103389588567e-05, "loss": 0.06804943084716797, "step": 3493 }, { "epoch": 0.47236163920574564, "grad_norm": 0.3618812561035156, "learning_rate": 1.813541048263696e-05, "loss": 0.05611443519592285, "step": 3494 }, { "epoch": 0.4724968314321927, "grad_norm": 0.4073510468006134, "learning_rate": 1.8128716922876178e-05, "loss": 0.071868896484375, "step": 3495 }, { "epoch": 0.47263202365863966, "grad_norm": 0.24067722260951996, "learning_rate": 1.812202271169984e-05, "loss": 0.052373528480529785, "step": 3496 }, { "epoch": 0.47276721588508663, "grad_norm": 0.42137137055397034, "learning_rate": 1.8115327850501726e-05, "loss": 0.07637220621109009, "step": 3497 }, { "epoch": 0.4729024081115336, "grad_norm": 0.2202090471982956, "learning_rate": 1.8108632340675746e-05, "loss": 0.06374931335449219, "step": 3498 }, { "epoch": 0.4730376003379806, "grad_norm": 0.49998244643211365, "learning_rate": 1.810193618361593e-05, "loss": 0.06397366523742676, "step": 3499 }, { "epoch": 0.47317279256442757, "grad_norm": 0.44209524989128113, "learning_rate": 1.8095239380716464e-05, "loss": 0.07425355911254883, "step": 3500 }, { "epoch": 0.47330798479087455, "grad_norm": 0.26591023802757263, "learning_rate": 1.808854193337165e-05, "loss": 0.07446479797363281, "step": 3501 }, { "epoch": 0.4734431770173215, "grad_norm": 0.33701515197753906, "learning_rate": 1.8081843842975935e-05, "loss": 0.07298707962036133, "step": 3502 }, { "epoch": 0.4735783692437685, "grad_norm": 0.31315985321998596, "learning_rate": 1.80751451109239e-05, "loss": 0.05843305587768555, "step": 3503 }, { "epoch": 0.4737135614702155, "grad_norm": 0.33350905776023865, "learning_rate": 1.806844573861025e-05, "loss": 0.06082010269165039, "step": 3504 }, { "epoch": 0.47384875369666246, "grad_norm": 0.3647076189517975, "learning_rate": 1.806174572742984e-05, "loss": 0.05537247657775879, "step": 3505 }, { "epoch": 0.47398394592310944, "grad_norm": 0.25711965560913086, "learning_rate": 1.8055045078777634e-05, "loss": 0.057493627071380615, "step": 3506 }, { "epoch": 0.4741191381495564, "grad_norm": 0.26133498549461365, "learning_rate": 1.8048343794048762e-05, "loss": 0.07339620590209961, "step": 3507 }, { "epoch": 0.4742543303760034, "grad_norm": 0.2561602294445038, "learning_rate": 1.8041641874638445e-05, "loss": 0.0631871223449707, "step": 3508 }, { "epoch": 0.4743895226024504, "grad_norm": 0.31776508688926697, "learning_rate": 1.8034939321942077e-05, "loss": 0.060988783836364746, "step": 3509 }, { "epoch": 0.47452471482889735, "grad_norm": 0.258253812789917, "learning_rate": 1.8028236137355154e-05, "loss": 0.0681147575378418, "step": 3510 }, { "epoch": 0.47465990705534433, "grad_norm": 0.2523079216480255, "learning_rate": 1.8021532322273327e-05, "loss": 0.04806375503540039, "step": 3511 }, { "epoch": 0.4747950992817913, "grad_norm": 0.2886520028114319, "learning_rate": 1.8014827878092347e-05, "loss": 0.056687116622924805, "step": 3512 }, { "epoch": 0.4749302915082383, "grad_norm": 0.3062235116958618, "learning_rate": 1.800812280620813e-05, "loss": 0.059404850006103516, "step": 3513 }, { "epoch": 0.47506548373468527, "grad_norm": 0.4281168282032013, "learning_rate": 1.80014171080167e-05, "loss": 0.08860492706298828, "step": 3514 }, { "epoch": 0.47520067596113225, "grad_norm": 0.3539980351924896, "learning_rate": 1.7994710784914227e-05, "loss": 0.07063722610473633, "step": 3515 }, { "epoch": 0.4753358681875792, "grad_norm": 0.3549995422363281, "learning_rate": 1.7988003838297e-05, "loss": 0.053465962409973145, "step": 3516 }, { "epoch": 0.4754710604140262, "grad_norm": 0.3343566060066223, "learning_rate": 1.7981296269561438e-05, "loss": 0.06540298461914062, "step": 3517 }, { "epoch": 0.4756062526404732, "grad_norm": 0.3658030331134796, "learning_rate": 1.7974588080104095e-05, "loss": 0.06323480606079102, "step": 3518 }, { "epoch": 0.47574144486692016, "grad_norm": 0.23306547105312347, "learning_rate": 1.7967879271321648e-05, "loss": 0.05868053436279297, "step": 3519 }, { "epoch": 0.47587663709336714, "grad_norm": 0.22199088335037231, "learning_rate": 1.7961169844610913e-05, "loss": 0.06950092315673828, "step": 3520 }, { "epoch": 0.4760118293198141, "grad_norm": 0.25085461139678955, "learning_rate": 1.795445980136883e-05, "loss": 0.06714701652526855, "step": 3521 }, { "epoch": 0.4761470215462611, "grad_norm": 0.2103465348482132, "learning_rate": 1.794774914299245e-05, "loss": 0.04891157150268555, "step": 3522 }, { "epoch": 0.4762822137727081, "grad_norm": 0.42396554350852966, "learning_rate": 1.794103787087899e-05, "loss": 0.06476306915283203, "step": 3523 }, { "epoch": 0.47641740599915505, "grad_norm": 0.2529670000076294, "learning_rate": 1.7934325986425755e-05, "loss": 0.05025029182434082, "step": 3524 }, { "epoch": 0.47655259822560203, "grad_norm": 0.25131848454475403, "learning_rate": 1.7927613491030204e-05, "loss": 0.05663418769836426, "step": 3525 }, { "epoch": 0.476687790452049, "grad_norm": 0.3443480432033539, "learning_rate": 1.7920900386089913e-05, "loss": 0.06392145156860352, "step": 3526 }, { "epoch": 0.476822982678496, "grad_norm": 0.37560203671455383, "learning_rate": 1.7914186673002588e-05, "loss": 0.09542560577392578, "step": 3527 }, { "epoch": 0.47695817490494297, "grad_norm": 0.2930920720100403, "learning_rate": 1.790747235316605e-05, "loss": 0.06230950355529785, "step": 3528 }, { "epoch": 0.47709336713138994, "grad_norm": 0.4717599153518677, "learning_rate": 1.790075742797827e-05, "loss": 0.08771228790283203, "step": 3529 }, { "epoch": 0.4772285593578369, "grad_norm": 0.2221323400735855, "learning_rate": 1.789404189883732e-05, "loss": 0.07851028442382812, "step": 3530 }, { "epoch": 0.4773637515842839, "grad_norm": 0.6136850118637085, "learning_rate": 1.7887325767141415e-05, "loss": 0.10535526275634766, "step": 3531 }, { "epoch": 0.4774989438107309, "grad_norm": 0.35282716155052185, "learning_rate": 1.7880609034288894e-05, "loss": 0.0712127685546875, "step": 3532 }, { "epoch": 0.47763413603717786, "grad_norm": 0.3388783037662506, "learning_rate": 1.7873891701678208e-05, "loss": 0.05991506576538086, "step": 3533 }, { "epoch": 0.47776932826362484, "grad_norm": 0.3645012080669403, "learning_rate": 1.786717377070794e-05, "loss": 0.0606842041015625, "step": 3534 }, { "epoch": 0.4779045204900718, "grad_norm": 0.3051288425922394, "learning_rate": 1.7860455242776803e-05, "loss": 0.06638526916503906, "step": 3535 }, { "epoch": 0.4780397127165188, "grad_norm": 0.2696172893047333, "learning_rate": 1.7853736119283635e-05, "loss": 0.06256294250488281, "step": 3536 }, { "epoch": 0.4781749049429658, "grad_norm": 0.21427100896835327, "learning_rate": 1.7847016401627388e-05, "loss": 0.06046009063720703, "step": 3537 }, { "epoch": 0.47831009716941275, "grad_norm": 0.3176337480545044, "learning_rate": 1.7840296091207144e-05, "loss": 0.061319828033447266, "step": 3538 }, { "epoch": 0.47844528939585973, "grad_norm": 0.47059541940689087, "learning_rate": 1.7833575189422107e-05, "loss": 0.06873583793640137, "step": 3539 }, { "epoch": 0.4785804816223067, "grad_norm": 0.2860632538795471, "learning_rate": 1.7826853697671604e-05, "loss": 0.04511648416519165, "step": 3540 }, { "epoch": 0.4787156738487537, "grad_norm": 0.4167746603488922, "learning_rate": 1.782013161735509e-05, "loss": 0.07744717597961426, "step": 3541 }, { "epoch": 0.47885086607520067, "grad_norm": 0.33337217569351196, "learning_rate": 1.781340894987213e-05, "loss": 0.06517982482910156, "step": 3542 }, { "epoch": 0.47898605830164764, "grad_norm": 0.24096325039863586, "learning_rate": 1.7806685696622426e-05, "loss": 0.06062459945678711, "step": 3543 }, { "epoch": 0.4791212505280946, "grad_norm": 0.36147910356521606, "learning_rate": 1.779996185900579e-05, "loss": 0.0786590576171875, "step": 3544 }, { "epoch": 0.4792564427545416, "grad_norm": 0.3622245490550995, "learning_rate": 1.7793237438422165e-05, "loss": 0.06263208389282227, "step": 3545 }, { "epoch": 0.4793916349809886, "grad_norm": 0.7660185098648071, "learning_rate": 1.7786512436271617e-05, "loss": 0.09419870376586914, "step": 3546 }, { "epoch": 0.47952682720743556, "grad_norm": 0.4656512141227722, "learning_rate": 1.777978685395431e-05, "loss": 0.0979766845703125, "step": 3547 }, { "epoch": 0.47966201943388254, "grad_norm": 0.39631086587905884, "learning_rate": 1.7773060692870564e-05, "loss": 0.06427288055419922, "step": 3548 }, { "epoch": 0.4797972116603295, "grad_norm": 0.3351927399635315, "learning_rate": 1.7766333954420794e-05, "loss": 0.0591578483581543, "step": 3549 }, { "epoch": 0.4799324038867765, "grad_norm": 0.3812809884548187, "learning_rate": 1.775960664000554e-05, "loss": 0.07298469543457031, "step": 3550 }, { "epoch": 0.48006759611322347, "grad_norm": 0.2618882358074188, "learning_rate": 1.7752878751025463e-05, "loss": 0.06702661514282227, "step": 3551 }, { "epoch": 0.48020278833967045, "grad_norm": 0.41424980759620667, "learning_rate": 1.7746150288881352e-05, "loss": 0.07321524620056152, "step": 3552 }, { "epoch": 0.48033798056611743, "grad_norm": 0.32623255252838135, "learning_rate": 1.7739421254974114e-05, "loss": 0.07372140884399414, "step": 3553 }, { "epoch": 0.4804731727925644, "grad_norm": 0.5553512573242188, "learning_rate": 1.7732691650704756e-05, "loss": 0.06461763381958008, "step": 3554 }, { "epoch": 0.4806083650190114, "grad_norm": 0.36241671442985535, "learning_rate": 1.7725961477474423e-05, "loss": 0.05710554122924805, "step": 3555 }, { "epoch": 0.48074355724545836, "grad_norm": 0.36378782987594604, "learning_rate": 1.7719230736684375e-05, "loss": 0.061231374740600586, "step": 3556 }, { "epoch": 0.48087874947190534, "grad_norm": 0.16820940375328064, "learning_rate": 1.771249942973599e-05, "loss": 0.04599440097808838, "step": 3557 }, { "epoch": 0.4810139416983523, "grad_norm": 0.27629798650741577, "learning_rate": 1.7705767558030756e-05, "loss": 0.05648040771484375, "step": 3558 }, { "epoch": 0.4811491339247993, "grad_norm": 0.33989623188972473, "learning_rate": 1.769903512297029e-05, "loss": 0.06895923614501953, "step": 3559 }, { "epoch": 0.4812843261512463, "grad_norm": 0.48969021439552307, "learning_rate": 1.7692302125956315e-05, "loss": 0.0887598991394043, "step": 3560 }, { "epoch": 0.48141951837769326, "grad_norm": 0.4426460862159729, "learning_rate": 1.768556856839068e-05, "loss": 0.07474339008331299, "step": 3561 }, { "epoch": 0.48155471060414023, "grad_norm": 0.6918957233428955, "learning_rate": 1.767883445167535e-05, "loss": 0.0795588493347168, "step": 3562 }, { "epoch": 0.48168990283058727, "grad_norm": 0.47357305884361267, "learning_rate": 1.7672099777212398e-05, "loss": 0.0653543472290039, "step": 3563 }, { "epoch": 0.48182509505703425, "grad_norm": 0.3981247544288635, "learning_rate": 1.7665364546404034e-05, "loss": 0.08488988876342773, "step": 3564 }, { "epoch": 0.4819602872834812, "grad_norm": 0.23753295838832855, "learning_rate": 1.7658628760652548e-05, "loss": 0.04964733123779297, "step": 3565 }, { "epoch": 0.4820954795099282, "grad_norm": 0.3822995126247406, "learning_rate": 1.765189242136038e-05, "loss": 0.08352327346801758, "step": 3566 }, { "epoch": 0.4822306717363752, "grad_norm": 0.20389318466186523, "learning_rate": 1.7645155529930065e-05, "loss": 0.05323982238769531, "step": 3567 }, { "epoch": 0.48236586396282216, "grad_norm": 0.6372861266136169, "learning_rate": 1.763841808776426e-05, "loss": 0.08947277069091797, "step": 3568 }, { "epoch": 0.48250105618926914, "grad_norm": 0.3404562175273895, "learning_rate": 1.763168009626575e-05, "loss": 0.07779765129089355, "step": 3569 }, { "epoch": 0.4826362484157161, "grad_norm": 0.38316822052001953, "learning_rate": 1.7624941556837406e-05, "loss": 0.07512998580932617, "step": 3570 }, { "epoch": 0.4827714406421631, "grad_norm": 0.2137921005487442, "learning_rate": 1.7618202470882233e-05, "loss": 0.04875946044921875, "step": 3571 }, { "epoch": 0.4829066328686101, "grad_norm": 0.21961045265197754, "learning_rate": 1.7611462839803336e-05, "loss": 0.05422782897949219, "step": 3572 }, { "epoch": 0.48304182509505705, "grad_norm": 0.5259507894515991, "learning_rate": 1.760472266500396e-05, "loss": 0.0807027816772461, "step": 3573 }, { "epoch": 0.48317701732150403, "grad_norm": 0.24510490894317627, "learning_rate": 1.759798194788743e-05, "loss": 0.047423362731933594, "step": 3574 }, { "epoch": 0.483312209547951, "grad_norm": 0.4417347013950348, "learning_rate": 1.75912406898572e-05, "loss": 0.06473278999328613, "step": 3575 }, { "epoch": 0.483447401774398, "grad_norm": 0.317496657371521, "learning_rate": 1.758449889231685e-05, "loss": 0.08271288871765137, "step": 3576 }, { "epoch": 0.48358259400084497, "grad_norm": 0.2920406758785248, "learning_rate": 1.757775655667004e-05, "loss": 0.08787870407104492, "step": 3577 }, { "epoch": 0.48371778622729195, "grad_norm": 0.25630223751068115, "learning_rate": 1.757101368432057e-05, "loss": 0.0801386833190918, "step": 3578 }, { "epoch": 0.4838529784537389, "grad_norm": 0.33489561080932617, "learning_rate": 1.7564270276672343e-05, "loss": 0.08162117004394531, "step": 3579 }, { "epoch": 0.4839881706801859, "grad_norm": 0.31049832701683044, "learning_rate": 1.7557526335129372e-05, "loss": 0.07106292247772217, "step": 3580 }, { "epoch": 0.4841233629066329, "grad_norm": 0.3761488199234009, "learning_rate": 1.7550781861095774e-05, "loss": 0.08405113220214844, "step": 3581 }, { "epoch": 0.48425855513307986, "grad_norm": 0.28159165382385254, "learning_rate": 1.754403685597579e-05, "loss": 0.051313042640686035, "step": 3582 }, { "epoch": 0.48439374735952684, "grad_norm": 0.2896674871444702, "learning_rate": 1.7537291321173773e-05, "loss": 0.0895681381225586, "step": 3583 }, { "epoch": 0.4845289395859738, "grad_norm": 0.27373892068862915, "learning_rate": 1.7530545258094165e-05, "loss": 0.06352818012237549, "step": 3584 }, { "epoch": 0.4846641318124208, "grad_norm": 0.2834480404853821, "learning_rate": 1.7523798668141548e-05, "loss": 0.07228279113769531, "step": 3585 }, { "epoch": 0.4847993240388678, "grad_norm": 0.38150838017463684, "learning_rate": 1.7517051552720584e-05, "loss": 0.08531951904296875, "step": 3586 }, { "epoch": 0.48493451626531475, "grad_norm": 0.2771611213684082, "learning_rate": 1.7510303913236066e-05, "loss": 0.0728762149810791, "step": 3587 }, { "epoch": 0.48506970849176173, "grad_norm": 0.22732122242450714, "learning_rate": 1.7503555751092883e-05, "loss": 0.04701519012451172, "step": 3588 }, { "epoch": 0.4852049007182087, "grad_norm": 0.4139147102832794, "learning_rate": 1.7496807067696046e-05, "loss": 0.08082056045532227, "step": 3589 }, { "epoch": 0.4853400929446557, "grad_norm": 0.3019137978553772, "learning_rate": 1.7490057864450665e-05, "loss": 0.06302165985107422, "step": 3590 }, { "epoch": 0.48547528517110267, "grad_norm": 0.34268927574157715, "learning_rate": 1.748330814276195e-05, "loss": 0.07494640350341797, "step": 3591 }, { "epoch": 0.48561047739754964, "grad_norm": 0.24578368663787842, "learning_rate": 1.7476557904035243e-05, "loss": 0.055861711502075195, "step": 3592 }, { "epoch": 0.4857456696239966, "grad_norm": 0.3556198179721832, "learning_rate": 1.7469807149675973e-05, "loss": 0.07169103622436523, "step": 3593 }, { "epoch": 0.4858808618504436, "grad_norm": 0.33735206723213196, "learning_rate": 1.7463055881089685e-05, "loss": 0.08289265632629395, "step": 3594 }, { "epoch": 0.4860160540768906, "grad_norm": 0.23104017972946167, "learning_rate": 1.7456304099682024e-05, "loss": 0.06075930595397949, "step": 3595 }, { "epoch": 0.48615124630333756, "grad_norm": 0.23543255031108856, "learning_rate": 1.7449551806858756e-05, "loss": 0.040868282318115234, "step": 3596 }, { "epoch": 0.48628643852978454, "grad_norm": 0.23597250878810883, "learning_rate": 1.7442799004025733e-05, "loss": 0.055483102798461914, "step": 3597 }, { "epoch": 0.4864216307562315, "grad_norm": 0.13425995409488678, "learning_rate": 1.7436045692588934e-05, "loss": 0.02841949462890625, "step": 3598 }, { "epoch": 0.4865568229826785, "grad_norm": 0.3073100745677948, "learning_rate": 1.742929187395443e-05, "loss": 0.07666206359863281, "step": 3599 }, { "epoch": 0.4866920152091255, "grad_norm": 0.21240779757499695, "learning_rate": 1.7422537549528402e-05, "loss": 0.07803821563720703, "step": 3600 }, { "epoch": 0.48682720743557245, "grad_norm": 0.2068926990032196, "learning_rate": 1.7415782720717147e-05, "loss": 0.0429685115814209, "step": 3601 }, { "epoch": 0.48696239966201943, "grad_norm": 0.2717747390270233, "learning_rate": 1.740902738892704e-05, "loss": 0.07506906986236572, "step": 3602 }, { "epoch": 0.4870975918884664, "grad_norm": 0.21194171905517578, "learning_rate": 1.7402271555564585e-05, "loss": 0.054779052734375, "step": 3603 }, { "epoch": 0.4872327841149134, "grad_norm": 0.23456545174121857, "learning_rate": 1.739551522203638e-05, "loss": 0.06736063957214355, "step": 3604 }, { "epoch": 0.48736797634136036, "grad_norm": 0.16652098298072815, "learning_rate": 1.738875838974913e-05, "loss": 0.045694947242736816, "step": 3605 }, { "epoch": 0.48750316856780734, "grad_norm": 0.3408462405204773, "learning_rate": 1.7382001060109652e-05, "loss": 0.060717105865478516, "step": 3606 }, { "epoch": 0.4876383607942543, "grad_norm": 0.33881875872612, "learning_rate": 1.7375243234524843e-05, "loss": 0.07779502868652344, "step": 3607 }, { "epoch": 0.4877735530207013, "grad_norm": 0.4297168254852295, "learning_rate": 1.736848491440173e-05, "loss": 0.06330251693725586, "step": 3608 }, { "epoch": 0.4879087452471483, "grad_norm": 0.3366488814353943, "learning_rate": 1.7361726101147424e-05, "loss": 0.049425363540649414, "step": 3609 }, { "epoch": 0.48804393747359526, "grad_norm": 0.36104920506477356, "learning_rate": 1.7354966796169157e-05, "loss": 0.07415008544921875, "step": 3610 }, { "epoch": 0.48817912970004224, "grad_norm": 0.2824004292488098, "learning_rate": 1.734820700087424e-05, "loss": 0.0686182975769043, "step": 3611 }, { "epoch": 0.4883143219264892, "grad_norm": 0.2889813184738159, "learning_rate": 1.7341446716670103e-05, "loss": 0.06698274612426758, "step": 3612 }, { "epoch": 0.4884495141529362, "grad_norm": 0.3684225082397461, "learning_rate": 1.7334685944964272e-05, "loss": 0.0602569580078125, "step": 3613 }, { "epoch": 0.48858470637938317, "grad_norm": 0.3210425078868866, "learning_rate": 1.732792468716438e-05, "loss": 0.05359315872192383, "step": 3614 }, { "epoch": 0.48871989860583015, "grad_norm": 0.40614524483680725, "learning_rate": 1.7321162944678155e-05, "loss": 0.06607246398925781, "step": 3615 }, { "epoch": 0.48885509083227713, "grad_norm": 0.4153936207294464, "learning_rate": 1.7314400718913425e-05, "loss": 0.052260637283325195, "step": 3616 }, { "epoch": 0.4889902830587241, "grad_norm": 0.25126591324806213, "learning_rate": 1.7307638011278126e-05, "loss": 0.06325578689575195, "step": 3617 }, { "epoch": 0.4891254752851711, "grad_norm": 0.16747547686100006, "learning_rate": 1.7300874823180284e-05, "loss": 0.03872513771057129, "step": 3618 }, { "epoch": 0.48926066751161806, "grad_norm": 0.7757872343063354, "learning_rate": 1.7294111156028034e-05, "loss": 0.09938335418701172, "step": 3619 }, { "epoch": 0.48939585973806504, "grad_norm": 0.2757006585597992, "learning_rate": 1.7287347011229605e-05, "loss": 0.07147598266601562, "step": 3620 }, { "epoch": 0.489531051964512, "grad_norm": 0.20310567319393158, "learning_rate": 1.7280582390193333e-05, "loss": 0.041744232177734375, "step": 3621 }, { "epoch": 0.489666244190959, "grad_norm": 0.2866188585758209, "learning_rate": 1.7273817294327653e-05, "loss": 0.052403926849365234, "step": 3622 }, { "epoch": 0.489801436417406, "grad_norm": 0.25933730602264404, "learning_rate": 1.726705172504108e-05, "loss": 0.08164167404174805, "step": 3623 }, { "epoch": 0.48993662864385296, "grad_norm": 0.2824884355068207, "learning_rate": 1.7260285683742248e-05, "loss": 0.046981215476989746, "step": 3624 }, { "epoch": 0.49007182087029993, "grad_norm": 0.12598471343517303, "learning_rate": 1.7253519171839883e-05, "loss": 0.03424215316772461, "step": 3625 }, { "epoch": 0.4902070130967469, "grad_norm": 0.38121935725212097, "learning_rate": 1.724675219074281e-05, "loss": 0.06019115447998047, "step": 3626 }, { "epoch": 0.4903422053231939, "grad_norm": 0.14721521735191345, "learning_rate": 1.7239984741859957e-05, "loss": 0.027959585189819336, "step": 3627 }, { "epoch": 0.49047739754964087, "grad_norm": 0.3771783113479614, "learning_rate": 1.7233216826600324e-05, "loss": 0.07523393630981445, "step": 3628 }, { "epoch": 0.49061258977608785, "grad_norm": 0.1825200766324997, "learning_rate": 1.7226448446373047e-05, "loss": 0.047032833099365234, "step": 3629 }, { "epoch": 0.4907477820025348, "grad_norm": 0.637860119342804, "learning_rate": 1.7219679602587326e-05, "loss": 0.1150738000869751, "step": 3630 }, { "epoch": 0.49088297422898186, "grad_norm": 0.311662882566452, "learning_rate": 1.7212910296652476e-05, "loss": 0.06574392318725586, "step": 3631 }, { "epoch": 0.49101816645542884, "grad_norm": 0.16438733041286469, "learning_rate": 1.7206140529977904e-05, "loss": 0.04515564441680908, "step": 3632 }, { "epoch": 0.4911533586818758, "grad_norm": 0.22129812836647034, "learning_rate": 1.719937030397311e-05, "loss": 0.05523681640625, "step": 3633 }, { "epoch": 0.4912885509083228, "grad_norm": 0.15538308024406433, "learning_rate": 1.7192599620047683e-05, "loss": 0.0331113338470459, "step": 3634 }, { "epoch": 0.4914237431347698, "grad_norm": 0.3123125731945038, "learning_rate": 1.718582847961133e-05, "loss": 0.06272220611572266, "step": 3635 }, { "epoch": 0.49155893536121675, "grad_norm": 0.19490057229995728, "learning_rate": 1.7179056884073826e-05, "loss": 0.06317687034606934, "step": 3636 }, { "epoch": 0.49169412758766373, "grad_norm": 0.45507940649986267, "learning_rate": 1.717228483484506e-05, "loss": 0.055414438247680664, "step": 3637 }, { "epoch": 0.4918293198141107, "grad_norm": 0.5364943742752075, "learning_rate": 1.7165512333335013e-05, "loss": 0.06654071807861328, "step": 3638 }, { "epoch": 0.4919645120405577, "grad_norm": 0.370889276266098, "learning_rate": 1.715873938095374e-05, "loss": 0.07144880294799805, "step": 3639 }, { "epoch": 0.49209970426700467, "grad_norm": 0.21666491031646729, "learning_rate": 1.7151965979111427e-05, "loss": 0.06777763366699219, "step": 3640 }, { "epoch": 0.49223489649345165, "grad_norm": 0.24387285113334656, "learning_rate": 1.7145192129218313e-05, "loss": 0.06093788146972656, "step": 3641 }, { "epoch": 0.4923700887198986, "grad_norm": 0.344928503036499, "learning_rate": 1.7138417832684763e-05, "loss": 0.07947254180908203, "step": 3642 }, { "epoch": 0.4925052809463456, "grad_norm": 0.22023750841617584, "learning_rate": 1.7131643090921216e-05, "loss": 0.053901731967926025, "step": 3643 }, { "epoch": 0.4926404731727926, "grad_norm": 0.30977097153663635, "learning_rate": 1.712486790533821e-05, "loss": 0.08480358123779297, "step": 3644 }, { "epoch": 0.49277566539923956, "grad_norm": 0.2903079688549042, "learning_rate": 1.7118092277346372e-05, "loss": 0.06642723083496094, "step": 3645 }, { "epoch": 0.49291085762568654, "grad_norm": 0.3772442638874054, "learning_rate": 1.7111316208356428e-05, "loss": 0.08548742532730103, "step": 3646 }, { "epoch": 0.4930460498521335, "grad_norm": 0.12240912765264511, "learning_rate": 1.7104539699779192e-05, "loss": 0.0373530387878418, "step": 3647 }, { "epoch": 0.4931812420785805, "grad_norm": 0.2652791142463684, "learning_rate": 1.709776275302557e-05, "loss": 0.0529026985168457, "step": 3648 }, { "epoch": 0.4933164343050275, "grad_norm": 0.33920103311538696, "learning_rate": 1.7090985369506555e-05, "loss": 0.09222221374511719, "step": 3649 }, { "epoch": 0.49345162653147445, "grad_norm": 0.2678898870944977, "learning_rate": 1.708420755063323e-05, "loss": 0.05522608757019043, "step": 3650 }, { "epoch": 0.49358681875792143, "grad_norm": 0.18864497542381287, "learning_rate": 1.707742929781678e-05, "loss": 0.04535555839538574, "step": 3651 }, { "epoch": 0.4937220109843684, "grad_norm": 0.16273167729377747, "learning_rate": 1.707065061246848e-05, "loss": 0.047704219818115234, "step": 3652 }, { "epoch": 0.4938572032108154, "grad_norm": 0.3690492510795593, "learning_rate": 1.7063871495999677e-05, "loss": 0.07541942596435547, "step": 3653 }, { "epoch": 0.49399239543726237, "grad_norm": 0.26042553782463074, "learning_rate": 1.705709194982182e-05, "loss": 0.06457853317260742, "step": 3654 }, { "epoch": 0.49412758766370934, "grad_norm": 0.32687294483184814, "learning_rate": 1.7050311975346447e-05, "loss": 0.08898448944091797, "step": 3655 }, { "epoch": 0.4942627798901563, "grad_norm": 0.3517727851867676, "learning_rate": 1.704353157398519e-05, "loss": 0.06552386283874512, "step": 3656 }, { "epoch": 0.4943979721166033, "grad_norm": 0.26627597212791443, "learning_rate": 1.7036750747149764e-05, "loss": 0.05442667007446289, "step": 3657 }, { "epoch": 0.4945331643430503, "grad_norm": 0.2553832232952118, "learning_rate": 1.702996949625197e-05, "loss": 0.05390191078186035, "step": 3658 }, { "epoch": 0.49466835656949726, "grad_norm": 0.26399871706962585, "learning_rate": 1.7023187822703702e-05, "loss": 0.06447291374206543, "step": 3659 }, { "epoch": 0.49480354879594424, "grad_norm": 0.3089481294155121, "learning_rate": 1.7016405727916936e-05, "loss": 0.06996846199035645, "step": 3660 }, { "epoch": 0.4949387410223912, "grad_norm": 0.4111063778400421, "learning_rate": 1.700962321330375e-05, "loss": 0.08368968963623047, "step": 3661 }, { "epoch": 0.4950739332488382, "grad_norm": 0.33295831084251404, "learning_rate": 1.700284028027629e-05, "loss": 0.07135665416717529, "step": 3662 }, { "epoch": 0.4952091254752852, "grad_norm": 0.24843953549861908, "learning_rate": 1.6996056930246807e-05, "loss": 0.056128501892089844, "step": 3663 }, { "epoch": 0.49534431770173215, "grad_norm": 0.2934357523918152, "learning_rate": 1.6989273164627626e-05, "loss": 0.0641700029373169, "step": 3664 }, { "epoch": 0.49547950992817913, "grad_norm": 0.5144562125205994, "learning_rate": 1.6982488984831163e-05, "loss": 0.06073331832885742, "step": 3665 }, { "epoch": 0.4956147021546261, "grad_norm": 0.42985787987709045, "learning_rate": 1.697570439226992e-05, "loss": 0.04372763633728027, "step": 3666 }, { "epoch": 0.4957498943810731, "grad_norm": 0.7460087537765503, "learning_rate": 1.6968919388356486e-05, "loss": 0.10422563552856445, "step": 3667 }, { "epoch": 0.49588508660752006, "grad_norm": 0.2547072768211365, "learning_rate": 1.696213397450354e-05, "loss": 0.05795717239379883, "step": 3668 }, { "epoch": 0.49602027883396704, "grad_norm": 0.5143445730209351, "learning_rate": 1.695534815212384e-05, "loss": 0.07362556457519531, "step": 3669 }, { "epoch": 0.496155471060414, "grad_norm": 0.5136200785636902, "learning_rate": 1.6948561922630223e-05, "loss": 0.08347606658935547, "step": 3670 }, { "epoch": 0.496290663286861, "grad_norm": 0.2744808793067932, "learning_rate": 1.694177528743562e-05, "loss": 0.06880831718444824, "step": 3671 }, { "epoch": 0.496425855513308, "grad_norm": 0.43338385224342346, "learning_rate": 1.6934988247953053e-05, "loss": 0.09651374816894531, "step": 3672 }, { "epoch": 0.49656104773975496, "grad_norm": 0.47571730613708496, "learning_rate": 1.6928200805595606e-05, "loss": 0.07631063461303711, "step": 3673 }, { "epoch": 0.49669623996620194, "grad_norm": 0.2577090859413147, "learning_rate": 1.6921412961776475e-05, "loss": 0.06802845001220703, "step": 3674 }, { "epoch": 0.4968314321926489, "grad_norm": 0.3719443380832672, "learning_rate": 1.6914624717908922e-05, "loss": 0.07479619979858398, "step": 3675 }, { "epoch": 0.4969666244190959, "grad_norm": 0.4411269724369049, "learning_rate": 1.6907836075406288e-05, "loss": 0.06498908996582031, "step": 3676 }, { "epoch": 0.49710181664554287, "grad_norm": 0.16571342945098877, "learning_rate": 1.690104703568201e-05, "loss": 0.04548358917236328, "step": 3677 }, { "epoch": 0.49723700887198985, "grad_norm": 0.3210400640964508, "learning_rate": 1.68942576001496e-05, "loss": 0.060634732246398926, "step": 3678 }, { "epoch": 0.49737220109843683, "grad_norm": 0.2716442346572876, "learning_rate": 1.6887467770222658e-05, "loss": 0.05343437194824219, "step": 3679 }, { "epoch": 0.4975073933248838, "grad_norm": 0.3123975396156311, "learning_rate": 1.6880677547314865e-05, "loss": 0.06958913803100586, "step": 3680 }, { "epoch": 0.4976425855513308, "grad_norm": 0.5601609945297241, "learning_rate": 1.6873886932839973e-05, "loss": 0.06643915176391602, "step": 3681 }, { "epoch": 0.49777777777777776, "grad_norm": 0.32763174176216125, "learning_rate": 1.686709592821183e-05, "loss": 0.045107483863830566, "step": 3682 }, { "epoch": 0.49791297000422474, "grad_norm": 0.2631048560142517, "learning_rate": 1.6860304534844355e-05, "loss": 0.06543707847595215, "step": 3683 }, { "epoch": 0.4980481622306717, "grad_norm": 0.45689377188682556, "learning_rate": 1.6853512754151556e-05, "loss": 0.09626007080078125, "step": 3684 }, { "epoch": 0.4981833544571187, "grad_norm": 0.18219901621341705, "learning_rate": 1.684672058754752e-05, "loss": 0.06010627746582031, "step": 3685 }, { "epoch": 0.4983185466835657, "grad_norm": 0.28514334559440613, "learning_rate": 1.6839928036446416e-05, "loss": 0.08095073699951172, "step": 3686 }, { "epoch": 0.49845373891001266, "grad_norm": 0.40925583243370056, "learning_rate": 1.6833135102262473e-05, "loss": 0.08321571350097656, "step": 3687 }, { "epoch": 0.49858893113645963, "grad_norm": 0.5032579302787781, "learning_rate": 1.682634178641003e-05, "loss": 0.06683099269866943, "step": 3688 }, { "epoch": 0.4987241233629066, "grad_norm": 0.2649838626384735, "learning_rate": 1.6819548090303485e-05, "loss": 0.08434677124023438, "step": 3689 }, { "epoch": 0.4988593155893536, "grad_norm": 0.292099267244339, "learning_rate": 1.6812754015357328e-05, "loss": 0.08489036560058594, "step": 3690 }, { "epoch": 0.49899450781580057, "grad_norm": 0.2250075489282608, "learning_rate": 1.680595956298612e-05, "loss": 0.054587364196777344, "step": 3691 }, { "epoch": 0.49912970004224755, "grad_norm": 0.23876000940799713, "learning_rate": 1.6799164734604497e-05, "loss": 0.05211949348449707, "step": 3692 }, { "epoch": 0.4992648922686945, "grad_norm": 0.5526087880134583, "learning_rate": 1.6792369531627186e-05, "loss": 0.07991886138916016, "step": 3693 }, { "epoch": 0.4994000844951415, "grad_norm": 0.21991519629955292, "learning_rate": 1.6785573955468974e-05, "loss": 0.06328868865966797, "step": 3694 }, { "epoch": 0.4995352767215885, "grad_norm": 0.25740939378738403, "learning_rate": 1.6778778007544745e-05, "loss": 0.05763810873031616, "step": 3695 }, { "epoch": 0.49967046894803546, "grad_norm": 0.22257405519485474, "learning_rate": 1.6771981689269452e-05, "loss": 0.05683422088623047, "step": 3696 }, { "epoch": 0.49980566117448244, "grad_norm": 0.23424720764160156, "learning_rate": 1.6765185002058123e-05, "loss": 0.0545077919960022, "step": 3697 }, { "epoch": 0.4999408534009294, "grad_norm": 0.24864928424358368, "learning_rate": 1.6758387947325856e-05, "loss": 0.07478094100952148, "step": 3698 }, { "epoch": 0.5000760456273764, "grad_norm": 0.26618900895118713, "learning_rate": 1.6751590526487843e-05, "loss": 0.044876694679260254, "step": 3699 }, { "epoch": 0.5002112378538234, "grad_norm": 0.3026556670665741, "learning_rate": 1.6744792740959347e-05, "loss": 0.06730842590332031, "step": 3700 }, { "epoch": 0.5003464300802704, "grad_norm": 0.573049783706665, "learning_rate": 1.6737994592155697e-05, "loss": 0.0917515754699707, "step": 3701 }, { "epoch": 0.5004816223067173, "grad_norm": 0.15614494681358337, "learning_rate": 1.6731196081492307e-05, "loss": 0.04000115394592285, "step": 3702 }, { "epoch": 0.5006168145331643, "grad_norm": 0.24104918539524078, "learning_rate": 1.6724397210384655e-05, "loss": 0.04947304725646973, "step": 3703 }, { "epoch": 0.5007520067596113, "grad_norm": 0.2229125201702118, "learning_rate": 1.6717597980248308e-05, "loss": 0.0579066276550293, "step": 3704 }, { "epoch": 0.5008871989860583, "grad_norm": 0.3010246455669403, "learning_rate": 1.6710798392498904e-05, "loss": 0.06881427764892578, "step": 3705 }, { "epoch": 0.5010223912125052, "grad_norm": 0.6752832531929016, "learning_rate": 1.6703998448552154e-05, "loss": 0.10321807861328125, "step": 3706 }, { "epoch": 0.5011575834389522, "grad_norm": 0.3871508240699768, "learning_rate": 1.669719814982384e-05, "loss": 0.06765806674957275, "step": 3707 }, { "epoch": 0.5012927756653992, "grad_norm": 0.47764483094215393, "learning_rate": 1.6690397497729818e-05, "loss": 0.08770179748535156, "step": 3708 }, { "epoch": 0.5014279678918462, "grad_norm": 0.2824943959712982, "learning_rate": 1.6683596493686028e-05, "loss": 0.060486793518066406, "step": 3709 }, { "epoch": 0.5015631601182932, "grad_norm": 0.26470473408699036, "learning_rate": 1.667679513910846e-05, "loss": 0.06539344787597656, "step": 3710 }, { "epoch": 0.5016983523447401, "grad_norm": 0.34304139018058777, "learning_rate": 1.666999343541321e-05, "loss": 0.04815506935119629, "step": 3711 }, { "epoch": 0.5018335445711871, "grad_norm": 0.28561848402023315, "learning_rate": 1.6663191384016422e-05, "loss": 0.06753778457641602, "step": 3712 }, { "epoch": 0.5019687367976341, "grad_norm": 0.285433828830719, "learning_rate": 1.6656388986334315e-05, "loss": 0.0729752779006958, "step": 3713 }, { "epoch": 0.5021039290240811, "grad_norm": 0.32891544699668884, "learning_rate": 1.6649586243783186e-05, "loss": 0.0743703842163086, "step": 3714 }, { "epoch": 0.502239121250528, "grad_norm": 0.3816950023174286, "learning_rate": 1.6642783157779405e-05, "loss": 0.07209134101867676, "step": 3715 }, { "epoch": 0.502374313476975, "grad_norm": 0.263702392578125, "learning_rate": 1.6635979729739417e-05, "loss": 0.05934882164001465, "step": 3716 }, { "epoch": 0.502509505703422, "grad_norm": 0.6623087525367737, "learning_rate": 1.662917596107972e-05, "loss": 0.10495758056640625, "step": 3717 }, { "epoch": 0.502644697929869, "grad_norm": 0.40381714701652527, "learning_rate": 1.6622371853216904e-05, "loss": 0.06665229797363281, "step": 3718 }, { "epoch": 0.502779890156316, "grad_norm": 0.28729915618896484, "learning_rate": 1.661556740756761e-05, "loss": 0.04879331588745117, "step": 3719 }, { "epoch": 0.502915082382763, "grad_norm": 0.18054580688476562, "learning_rate": 1.6608762625548572e-05, "loss": 0.049221038818359375, "step": 3720 }, { "epoch": 0.5030502746092099, "grad_norm": 0.24564173817634583, "learning_rate": 1.6601957508576573e-05, "loss": 0.06606054306030273, "step": 3721 }, { "epoch": 0.5031854668356569, "grad_norm": 0.33070167899131775, "learning_rate": 1.659515205806848e-05, "loss": 0.07685279846191406, "step": 3722 }, { "epoch": 0.5033206590621039, "grad_norm": 0.2144303321838379, "learning_rate": 1.6588346275441224e-05, "loss": 0.0387420654296875, "step": 3723 }, { "epoch": 0.5034558512885509, "grad_norm": 0.3891025483608246, "learning_rate": 1.65815401621118e-05, "loss": 0.07223653793334961, "step": 3724 }, { "epoch": 0.5035910435149978, "grad_norm": 0.2672344148159027, "learning_rate": 1.657473371949728e-05, "loss": 0.06636923551559448, "step": 3725 }, { "epoch": 0.5037262357414448, "grad_norm": 0.23694884777069092, "learning_rate": 1.6567926949014805e-05, "loss": 0.06660652160644531, "step": 3726 }, { "epoch": 0.5038614279678918, "grad_norm": 0.21389798820018768, "learning_rate": 1.6561119852081574e-05, "loss": 0.06486654281616211, "step": 3727 }, { "epoch": 0.5039966201943388, "grad_norm": 0.19706472754478455, "learning_rate": 1.6554312430114868e-05, "loss": 0.05663442611694336, "step": 3728 }, { "epoch": 0.5041318124207858, "grad_norm": 0.12402030825614929, "learning_rate": 1.6547504684532026e-05, "loss": 0.027398109436035156, "step": 3729 }, { "epoch": 0.5042670046472327, "grad_norm": 0.17284323275089264, "learning_rate": 1.6540696616750454e-05, "loss": 0.0577009916305542, "step": 3730 }, { "epoch": 0.5044021968736797, "grad_norm": 0.22955511510372162, "learning_rate": 1.6533888228187628e-05, "loss": 0.05922102928161621, "step": 3731 }, { "epoch": 0.5045373891001267, "grad_norm": 0.23436099290847778, "learning_rate": 1.6527079520261103e-05, "loss": 0.06249058246612549, "step": 3732 }, { "epoch": 0.5046725813265738, "grad_norm": 0.4458872377872467, "learning_rate": 1.6520270494388472e-05, "loss": 0.09632301330566406, "step": 3733 }, { "epoch": 0.5048077735530208, "grad_norm": 0.2628108859062195, "learning_rate": 1.6513461151987418e-05, "loss": 0.06567001342773438, "step": 3734 }, { "epoch": 0.5049429657794677, "grad_norm": 0.2242846041917801, "learning_rate": 1.6506651494475678e-05, "loss": 0.05688738822937012, "step": 3735 }, { "epoch": 0.5050781580059147, "grad_norm": 0.18838833272457123, "learning_rate": 1.6499841523271062e-05, "loss": 0.05216526985168457, "step": 3736 }, { "epoch": 0.5052133502323617, "grad_norm": 0.5324968099594116, "learning_rate": 1.649303123979145e-05, "loss": 0.09590935707092285, "step": 3737 }, { "epoch": 0.5053485424588087, "grad_norm": 0.2790025472640991, "learning_rate": 1.648622064545477e-05, "loss": 0.06006813049316406, "step": 3738 }, { "epoch": 0.5054837346852556, "grad_norm": 0.23045334219932556, "learning_rate": 1.6479409741679025e-05, "loss": 0.044718146324157715, "step": 3739 }, { "epoch": 0.5056189269117026, "grad_norm": 0.44952282309532166, "learning_rate": 1.6472598529882277e-05, "loss": 0.06872034072875977, "step": 3740 }, { "epoch": 0.5057541191381496, "grad_norm": 0.15813127160072327, "learning_rate": 1.646578701148267e-05, "loss": 0.04623889923095703, "step": 3741 }, { "epoch": 0.5058893113645966, "grad_norm": 0.4009650647640228, "learning_rate": 1.6458975187898384e-05, "loss": 0.07753944396972656, "step": 3742 }, { "epoch": 0.5060245035910436, "grad_norm": 0.3411290943622589, "learning_rate": 1.6452163060547687e-05, "loss": 0.08057403564453125, "step": 3743 }, { "epoch": 0.5061596958174905, "grad_norm": 0.20344440639019012, "learning_rate": 1.64453506308489e-05, "loss": 0.052147626876831055, "step": 3744 }, { "epoch": 0.5062948880439375, "grad_norm": 0.3297085464000702, "learning_rate": 1.64385379002204e-05, "loss": 0.0858154296875, "step": 3745 }, { "epoch": 0.5064300802703845, "grad_norm": 0.26425638794898987, "learning_rate": 1.643172487008064e-05, "loss": 0.058515310287475586, "step": 3746 }, { "epoch": 0.5065652724968315, "grad_norm": 0.25333982706069946, "learning_rate": 1.6424911541848124e-05, "loss": 0.04899787902832031, "step": 3747 }, { "epoch": 0.5067004647232785, "grad_norm": 0.403068870306015, "learning_rate": 1.641809791694143e-05, "loss": 0.05936574935913086, "step": 3748 }, { "epoch": 0.5068356569497254, "grad_norm": 0.3366691768169403, "learning_rate": 1.6411283996779184e-05, "loss": 0.07269346714019775, "step": 3749 }, { "epoch": 0.5069708491761724, "grad_norm": 0.4272845685482025, "learning_rate": 1.6404469782780088e-05, "loss": 0.0903940200805664, "step": 3750 }, { "epoch": 0.5071060414026194, "grad_norm": 0.26125475764274597, "learning_rate": 1.639765527636289e-05, "loss": 0.06279802322387695, "step": 3751 }, { "epoch": 0.5072412336290664, "grad_norm": 0.2773575484752655, "learning_rate": 1.639084047894641e-05, "loss": 0.06950044631958008, "step": 3752 }, { "epoch": 0.5073764258555133, "grad_norm": 0.335889607667923, "learning_rate": 1.638402539194953e-05, "loss": 0.09420204162597656, "step": 3753 }, { "epoch": 0.5075116180819603, "grad_norm": 0.5095440745353699, "learning_rate": 1.6377210016791182e-05, "loss": 0.07118093967437744, "step": 3754 }, { "epoch": 0.5076468103084073, "grad_norm": 0.31784096360206604, "learning_rate": 1.6370394354890364e-05, "loss": 0.0667428970336914, "step": 3755 }, { "epoch": 0.5077820025348543, "grad_norm": 0.4596634805202484, "learning_rate": 1.636357840766613e-05, "loss": 0.0998077392578125, "step": 3756 }, { "epoch": 0.5079171947613013, "grad_norm": 0.16048742830753326, "learning_rate": 1.6356762176537606e-05, "loss": 0.03633636236190796, "step": 3757 }, { "epoch": 0.5080523869877482, "grad_norm": 0.2662222683429718, "learning_rate": 1.6349945662923953e-05, "loss": 0.0802011489868164, "step": 3758 }, { "epoch": 0.5081875792141952, "grad_norm": 0.23478074371814728, "learning_rate": 1.634312886824442e-05, "loss": 0.0711979866027832, "step": 3759 }, { "epoch": 0.5083227714406422, "grad_norm": 0.21493028104305267, "learning_rate": 1.6336311793918298e-05, "loss": 0.05287933349609375, "step": 3760 }, { "epoch": 0.5084579636670892, "grad_norm": 0.42671141028404236, "learning_rate": 1.6329494441364925e-05, "loss": 0.10504579544067383, "step": 3761 }, { "epoch": 0.5085931558935362, "grad_norm": 0.18574018776416779, "learning_rate": 1.6322676812003727e-05, "loss": 0.043202877044677734, "step": 3762 }, { "epoch": 0.5087283481199831, "grad_norm": 0.2294776737689972, "learning_rate": 1.631585890725416e-05, "loss": 0.048957228660583496, "step": 3763 }, { "epoch": 0.5088635403464301, "grad_norm": 0.18813537061214447, "learning_rate": 1.630904072853575e-05, "loss": 0.04468345642089844, "step": 3764 }, { "epoch": 0.5089987325728771, "grad_norm": 0.4334390163421631, "learning_rate": 1.6302222277268085e-05, "loss": 0.10156059265136719, "step": 3765 }, { "epoch": 0.5091339247993241, "grad_norm": 0.2519180476665497, "learning_rate": 1.6295403554870794e-05, "loss": 0.05563712120056152, "step": 3766 }, { "epoch": 0.509269117025771, "grad_norm": 0.33654114603996277, "learning_rate": 1.6288584562763572e-05, "loss": 0.0795598030090332, "step": 3767 }, { "epoch": 0.509404309252218, "grad_norm": 0.30152785778045654, "learning_rate": 1.6281765302366176e-05, "loss": 0.08346986770629883, "step": 3768 }, { "epoch": 0.509539501478665, "grad_norm": 0.348947674036026, "learning_rate": 1.6274945775098412e-05, "loss": 0.07816386222839355, "step": 3769 }, { "epoch": 0.509674693705112, "grad_norm": 0.5325326919555664, "learning_rate": 1.6268125982380135e-05, "loss": 0.06723415851593018, "step": 3770 }, { "epoch": 0.509809885931559, "grad_norm": 0.2995305061340332, "learning_rate": 1.626130592563127e-05, "loss": 0.07673883438110352, "step": 3771 }, { "epoch": 0.5099450781580059, "grad_norm": 0.2491988092660904, "learning_rate": 1.6254485606271778e-05, "loss": 0.04816484451293945, "step": 3772 }, { "epoch": 0.5100802703844529, "grad_norm": 0.27081945538520813, "learning_rate": 1.6247665025721698e-05, "loss": 0.08287811279296875, "step": 3773 }, { "epoch": 0.5102154626108999, "grad_norm": 0.3316383361816406, "learning_rate": 1.62408441854011e-05, "loss": 0.0939030647277832, "step": 3774 }, { "epoch": 0.5103506548373469, "grad_norm": 0.2686176598072052, "learning_rate": 1.6234023086730136e-05, "loss": 0.05086064338684082, "step": 3775 }, { "epoch": 0.5104858470637939, "grad_norm": 0.1850663274526596, "learning_rate": 1.622720173112898e-05, "loss": 0.04773569107055664, "step": 3776 }, { "epoch": 0.5106210392902408, "grad_norm": 0.46319863200187683, "learning_rate": 1.6220380120017874e-05, "loss": 0.06171417236328125, "step": 3777 }, { "epoch": 0.5107562315166878, "grad_norm": 0.2455659657716751, "learning_rate": 1.6213558254817128e-05, "loss": 0.06778955459594727, "step": 3778 }, { "epoch": 0.5108914237431348, "grad_norm": 0.19397512078285217, "learning_rate": 1.6206736136947074e-05, "loss": 0.04960966110229492, "step": 3779 }, { "epoch": 0.5110266159695818, "grad_norm": 0.2613491714000702, "learning_rate": 1.6199913767828126e-05, "loss": 0.07055473327636719, "step": 3780 }, { "epoch": 0.5111618081960287, "grad_norm": 0.2777951955795288, "learning_rate": 1.6193091148880733e-05, "loss": 0.08042335510253906, "step": 3781 }, { "epoch": 0.5112970004224757, "grad_norm": 0.3295799791812897, "learning_rate": 1.61862682815254e-05, "loss": 0.07029294967651367, "step": 3782 }, { "epoch": 0.5114321926489227, "grad_norm": 0.2006811946630478, "learning_rate": 1.617944516718268e-05, "loss": 0.055863380432128906, "step": 3783 }, { "epoch": 0.5115673848753697, "grad_norm": 0.2747129499912262, "learning_rate": 1.617262180727319e-05, "loss": 0.07995939254760742, "step": 3784 }, { "epoch": 0.5117025771018167, "grad_norm": 0.3211976885795593, "learning_rate": 1.6165798203217588e-05, "loss": 0.10124373435974121, "step": 3785 }, { "epoch": 0.5118377693282636, "grad_norm": 0.501947283744812, "learning_rate": 1.6158974356436585e-05, "loss": 0.08261764049530029, "step": 3786 }, { "epoch": 0.5119729615547106, "grad_norm": 0.1718253791332245, "learning_rate": 1.6152150268350938e-05, "loss": 0.04965782165527344, "step": 3787 }, { "epoch": 0.5121081537811576, "grad_norm": 0.2696118652820587, "learning_rate": 1.6145325940381458e-05, "loss": 0.04480338096618652, "step": 3788 }, { "epoch": 0.5122433460076046, "grad_norm": 0.23200277984142303, "learning_rate": 1.6138501373949018e-05, "loss": 0.06024932861328125, "step": 3789 }, { "epoch": 0.5123785382340516, "grad_norm": 0.28233274817466736, "learning_rate": 1.613167657047451e-05, "loss": 0.05148577690124512, "step": 3790 }, { "epoch": 0.5125137304604985, "grad_norm": 0.16529662907123566, "learning_rate": 1.612485153137891e-05, "loss": 0.04063296318054199, "step": 3791 }, { "epoch": 0.5126489226869455, "grad_norm": 0.3903208076953888, "learning_rate": 1.611802625808323e-05, "loss": 0.09176206588745117, "step": 3792 }, { "epoch": 0.5127841149133925, "grad_norm": 0.36968398094177246, "learning_rate": 1.611120075200851e-05, "loss": 0.08179950714111328, "step": 3793 }, { "epoch": 0.5129193071398395, "grad_norm": 0.3314633369445801, "learning_rate": 1.610437501457587e-05, "loss": 0.07626128196716309, "step": 3794 }, { "epoch": 0.5130544993662864, "grad_norm": 0.2416071742773056, "learning_rate": 1.6097549047206464e-05, "loss": 0.057384490966796875, "step": 3795 }, { "epoch": 0.5131896915927334, "grad_norm": 0.3267790973186493, "learning_rate": 1.6090722851321497e-05, "loss": 0.05952334403991699, "step": 3796 }, { "epoch": 0.5133248838191804, "grad_norm": 0.3248332142829895, "learning_rate": 1.6083896428342213e-05, "loss": 0.07203388214111328, "step": 3797 }, { "epoch": 0.5134600760456274, "grad_norm": 0.7720645666122437, "learning_rate": 1.6077069779689915e-05, "loss": 0.0859231948852539, "step": 3798 }, { "epoch": 0.5135952682720744, "grad_norm": 0.21609126031398773, "learning_rate": 1.607024290678594e-05, "loss": 0.04471468925476074, "step": 3799 }, { "epoch": 0.5137304604985213, "grad_norm": 0.20270587503910065, "learning_rate": 1.6063415811051686e-05, "loss": 0.06509971618652344, "step": 3800 }, { "epoch": 0.5138656527249683, "grad_norm": 0.28665196895599365, "learning_rate": 1.6056588493908596e-05, "loss": 0.0832204818725586, "step": 3801 }, { "epoch": 0.5140008449514153, "grad_norm": 0.3154524266719818, "learning_rate": 1.604976095677814e-05, "loss": 0.07967185974121094, "step": 3802 }, { "epoch": 0.5141360371778623, "grad_norm": 0.21033187210559845, "learning_rate": 1.604293320108186e-05, "loss": 0.044440269470214844, "step": 3803 }, { "epoch": 0.5142712294043092, "grad_norm": 0.4083731174468994, "learning_rate": 1.603610522824132e-05, "loss": 0.06813383102416992, "step": 3804 }, { "epoch": 0.5144064216307562, "grad_norm": 0.3776145577430725, "learning_rate": 1.6029277039678153e-05, "loss": 0.08827733993530273, "step": 3805 }, { "epoch": 0.5145416138572032, "grad_norm": 0.2450273483991623, "learning_rate": 1.602244863681401e-05, "loss": 0.05036449432373047, "step": 3806 }, { "epoch": 0.5146768060836502, "grad_norm": 0.2197941392660141, "learning_rate": 1.6015620021070613e-05, "loss": 0.054108619689941406, "step": 3807 }, { "epoch": 0.5148119983100972, "grad_norm": 0.19557338953018188, "learning_rate": 1.6008791193869714e-05, "loss": 0.04666924476623535, "step": 3808 }, { "epoch": 0.5149471905365441, "grad_norm": 0.31723499298095703, "learning_rate": 1.6001962156633102e-05, "loss": 0.06638240814208984, "step": 3809 }, { "epoch": 0.5150823827629911, "grad_norm": 0.33990269899368286, "learning_rate": 1.5995132910782632e-05, "loss": 0.08015680313110352, "step": 3810 }, { "epoch": 0.5152175749894381, "grad_norm": 0.2806377410888672, "learning_rate": 1.5988303457740178e-05, "loss": 0.06101036071777344, "step": 3811 }, { "epoch": 0.5153527672158851, "grad_norm": 0.2810553014278412, "learning_rate": 1.598147379892768e-05, "loss": 0.05346107482910156, "step": 3812 }, { "epoch": 0.515487959442332, "grad_norm": 0.2763311266899109, "learning_rate": 1.5974643935767098e-05, "loss": 0.05693769454956055, "step": 3813 }, { "epoch": 0.515623151668779, "grad_norm": 0.31364405155181885, "learning_rate": 1.5967813869680452e-05, "loss": 0.061385154724121094, "step": 3814 }, { "epoch": 0.515758343895226, "grad_norm": 0.3968309760093689, "learning_rate": 1.59609836020898e-05, "loss": 0.06057929992675781, "step": 3815 }, { "epoch": 0.515893536121673, "grad_norm": 0.2200632244348526, "learning_rate": 1.5954153134417236e-05, "loss": 0.060900211334228516, "step": 3816 }, { "epoch": 0.51602872834812, "grad_norm": 0.33449482917785645, "learning_rate": 1.59473224680849e-05, "loss": 0.05581164360046387, "step": 3817 }, { "epoch": 0.516163920574567, "grad_norm": 0.2559302747249603, "learning_rate": 1.5940491604514976e-05, "loss": 0.06303215026855469, "step": 3818 }, { "epoch": 0.5162991128010139, "grad_norm": 0.2785590887069702, "learning_rate": 1.5933660545129683e-05, "loss": 0.0716848373413086, "step": 3819 }, { "epoch": 0.5164343050274609, "grad_norm": 0.6427760124206543, "learning_rate": 1.5926829291351288e-05, "loss": 0.0704050064086914, "step": 3820 }, { "epoch": 0.5165694972539079, "grad_norm": 0.3818990886211395, "learning_rate": 1.591999784460209e-05, "loss": 0.05056571960449219, "step": 3821 }, { "epoch": 0.5167046894803549, "grad_norm": 0.3075590431690216, "learning_rate": 1.5913166206304435e-05, "loss": 0.06289792060852051, "step": 3822 }, { "epoch": 0.5168398817068018, "grad_norm": 0.24053780734539032, "learning_rate": 1.5906334377880707e-05, "loss": 0.05351996421813965, "step": 3823 }, { "epoch": 0.5169750739332488, "grad_norm": 0.39546748995780945, "learning_rate": 1.589950236075333e-05, "loss": 0.07835102081298828, "step": 3824 }, { "epoch": 0.5171102661596958, "grad_norm": 0.2950100898742676, "learning_rate": 1.5892670156344764e-05, "loss": 0.06669855117797852, "step": 3825 }, { "epoch": 0.5172454583861428, "grad_norm": 0.30787771940231323, "learning_rate": 1.588583776607751e-05, "loss": 0.051708221435546875, "step": 3826 }, { "epoch": 0.5173806506125898, "grad_norm": 0.2573969066143036, "learning_rate": 1.5879005191374106e-05, "loss": 0.054888248443603516, "step": 3827 }, { "epoch": 0.5175158428390367, "grad_norm": 0.31818974018096924, "learning_rate": 1.587217243365714e-05, "loss": 0.08167243003845215, "step": 3828 }, { "epoch": 0.5176510350654837, "grad_norm": 0.20823635160923004, "learning_rate": 1.586533949434922e-05, "loss": 0.04093742370605469, "step": 3829 }, { "epoch": 0.5177862272919307, "grad_norm": 0.29374828934669495, "learning_rate": 1.5858506374872998e-05, "loss": 0.06679534912109375, "step": 3830 }, { "epoch": 0.5179214195183777, "grad_norm": 0.3115184009075165, "learning_rate": 1.5851673076651178e-05, "loss": 0.04941272735595703, "step": 3831 }, { "epoch": 0.5180566117448246, "grad_norm": 0.15480205416679382, "learning_rate": 1.5844839601106477e-05, "loss": 0.041136980056762695, "step": 3832 }, { "epoch": 0.5181918039712716, "grad_norm": 0.3272755742073059, "learning_rate": 1.583800594966167e-05, "loss": 0.06614065170288086, "step": 3833 }, { "epoch": 0.5183269961977186, "grad_norm": 0.2249755561351776, "learning_rate": 1.583117212373955e-05, "loss": 0.05387401580810547, "step": 3834 }, { "epoch": 0.5184621884241656, "grad_norm": 0.2950299084186554, "learning_rate": 1.5824338124762967e-05, "loss": 0.05718231201171875, "step": 3835 }, { "epoch": 0.5185973806506126, "grad_norm": 0.28259211778640747, "learning_rate": 1.581750395415479e-05, "loss": 0.04875826835632324, "step": 3836 }, { "epoch": 0.5187325728770595, "grad_norm": 0.6511593461036682, "learning_rate": 1.5810669613337922e-05, "loss": 0.10650253295898438, "step": 3837 }, { "epoch": 0.5188677651035065, "grad_norm": 0.18748058378696442, "learning_rate": 1.5803835103735327e-05, "loss": 0.05097150802612305, "step": 3838 }, { "epoch": 0.5190029573299535, "grad_norm": 0.2657163441181183, "learning_rate": 1.5797000426769973e-05, "loss": 0.06188774108886719, "step": 3839 }, { "epoch": 0.5191381495564005, "grad_norm": 0.3725571632385254, "learning_rate": 1.579016558386488e-05, "loss": 0.08385348320007324, "step": 3840 }, { "epoch": 0.5192733417828475, "grad_norm": 0.5250045657157898, "learning_rate": 1.5783330576443096e-05, "loss": 0.10803556442260742, "step": 3841 }, { "epoch": 0.5194085340092944, "grad_norm": 0.4862484335899353, "learning_rate": 1.5776495405927716e-05, "loss": 0.06979751586914062, "step": 3842 }, { "epoch": 0.5195437262357414, "grad_norm": 0.52033931016922, "learning_rate": 1.5769660073741844e-05, "loss": 0.10051965713500977, "step": 3843 }, { "epoch": 0.5196789184621884, "grad_norm": 0.265226274728775, "learning_rate": 1.5762824581308645e-05, "loss": 0.0498957633972168, "step": 3844 }, { "epoch": 0.5198141106886354, "grad_norm": 0.21396085619926453, "learning_rate": 1.5755988930051304e-05, "loss": 0.05698704719543457, "step": 3845 }, { "epoch": 0.5199493029150823, "grad_norm": 0.10826153308153152, "learning_rate": 1.5749153121393025e-05, "loss": 0.03523445129394531, "step": 3846 }, { "epoch": 0.5200844951415293, "grad_norm": 0.3097763955593109, "learning_rate": 1.574231715675708e-05, "loss": 0.06415605545043945, "step": 3847 }, { "epoch": 0.5202196873679763, "grad_norm": 0.23440732061862946, "learning_rate": 1.573548103756674e-05, "loss": 0.05231189727783203, "step": 3848 }, { "epoch": 0.5203548795944233, "grad_norm": 0.35284194350242615, "learning_rate": 1.572864476524533e-05, "loss": 0.0575556755065918, "step": 3849 }, { "epoch": 0.5204900718208703, "grad_norm": 0.5734379291534424, "learning_rate": 1.5721808341216195e-05, "loss": 0.043912649154663086, "step": 3850 }, { "epoch": 0.5206252640473172, "grad_norm": 0.2837241590023041, "learning_rate": 1.571497176690271e-05, "loss": 0.060446739196777344, "step": 3851 }, { "epoch": 0.5207604562737642, "grad_norm": 0.2905561029911041, "learning_rate": 1.570813504372829e-05, "loss": 0.06624746322631836, "step": 3852 }, { "epoch": 0.5208956485002112, "grad_norm": 0.2592106759548187, "learning_rate": 1.570129817311638e-05, "loss": 0.05748152732849121, "step": 3853 }, { "epoch": 0.5210308407266582, "grad_norm": 0.4550934135913849, "learning_rate": 1.5694461156490452e-05, "loss": 0.07835555076599121, "step": 3854 }, { "epoch": 0.5211660329531052, "grad_norm": 0.28268054127693176, "learning_rate": 1.5687623995274008e-05, "loss": 0.04772043228149414, "step": 3855 }, { "epoch": 0.5213012251795521, "grad_norm": 0.3123287558555603, "learning_rate": 1.568078669089058e-05, "loss": 0.07772183418273926, "step": 3856 }, { "epoch": 0.5214364174059991, "grad_norm": 0.32369691133499146, "learning_rate": 1.567394924476373e-05, "loss": 0.08242559432983398, "step": 3857 }, { "epoch": 0.5215716096324461, "grad_norm": 0.5650047659873962, "learning_rate": 1.5667111658317057e-05, "loss": 0.08599567413330078, "step": 3858 }, { "epoch": 0.5217068018588931, "grad_norm": 0.232393279671669, "learning_rate": 1.5660273932974177e-05, "loss": 0.04138898849487305, "step": 3859 }, { "epoch": 0.52184199408534, "grad_norm": 0.5171011090278625, "learning_rate": 1.5653436070158743e-05, "loss": 0.07064485549926758, "step": 3860 }, { "epoch": 0.521977186311787, "grad_norm": 0.2689336836338043, "learning_rate": 1.564659807129444e-05, "loss": 0.08013391494750977, "step": 3861 }, { "epoch": 0.522112378538234, "grad_norm": 0.4905410706996918, "learning_rate": 1.5639759937804962e-05, "loss": 0.07244324684143066, "step": 3862 }, { "epoch": 0.522247570764681, "grad_norm": 0.40969574451446533, "learning_rate": 1.5632921671114055e-05, "loss": 0.09016799926757812, "step": 3863 }, { "epoch": 0.522382762991128, "grad_norm": 0.40808987617492676, "learning_rate": 1.5626083272645485e-05, "loss": 0.06834697723388672, "step": 3864 }, { "epoch": 0.5225179552175749, "grad_norm": 0.55250483751297, "learning_rate": 1.5619244743823038e-05, "loss": 0.06690692901611328, "step": 3865 }, { "epoch": 0.5226531474440219, "grad_norm": 0.3186188042163849, "learning_rate": 1.5612406086070534e-05, "loss": 0.08028411865234375, "step": 3866 }, { "epoch": 0.5227883396704689, "grad_norm": 0.21495214104652405, "learning_rate": 1.560556730081181e-05, "loss": 0.05619668960571289, "step": 3867 }, { "epoch": 0.5229235318969159, "grad_norm": 0.4362373650074005, "learning_rate": 1.5598728389470754e-05, "loss": 0.10913562774658203, "step": 3868 }, { "epoch": 0.523058724123363, "grad_norm": 0.2426283210515976, "learning_rate": 1.5591889353471245e-05, "loss": 0.06658697128295898, "step": 3869 }, { "epoch": 0.5231939163498099, "grad_norm": 0.23084065318107605, "learning_rate": 1.5585050194237226e-05, "loss": 0.054060935974121094, "step": 3870 }, { "epoch": 0.5233291085762569, "grad_norm": 0.4734134376049042, "learning_rate": 1.557821091319263e-05, "loss": 0.10500526428222656, "step": 3871 }, { "epoch": 0.5234643008027039, "grad_norm": 0.33723270893096924, "learning_rate": 1.5571371511761446e-05, "loss": 0.0969386100769043, "step": 3872 }, { "epoch": 0.5235994930291509, "grad_norm": 0.24272193014621735, "learning_rate": 1.5564531991367658e-05, "loss": 0.055225372314453125, "step": 3873 }, { "epoch": 0.5237346852555979, "grad_norm": 0.27921679615974426, "learning_rate": 1.5557692353435302e-05, "loss": 0.060835838317871094, "step": 3874 }, { "epoch": 0.5238698774820448, "grad_norm": 0.2498018741607666, "learning_rate": 1.5550852599388424e-05, "loss": 0.04842883348464966, "step": 3875 }, { "epoch": 0.5240050697084918, "grad_norm": 0.36984783411026, "learning_rate": 1.5544012730651096e-05, "loss": 0.08437395095825195, "step": 3876 }, { "epoch": 0.5241402619349388, "grad_norm": 0.23008708655834198, "learning_rate": 1.5537172748647422e-05, "loss": 0.050740718841552734, "step": 3877 }, { "epoch": 0.5242754541613858, "grad_norm": 0.3785080909729004, "learning_rate": 1.553033265480151e-05, "loss": 0.0830850601196289, "step": 3878 }, { "epoch": 0.5244106463878327, "grad_norm": 0.2892637550830841, "learning_rate": 1.552349245053752e-05, "loss": 0.0842580795288086, "step": 3879 }, { "epoch": 0.5245458386142797, "grad_norm": 0.21594905853271484, "learning_rate": 1.5516652137279597e-05, "loss": 0.05982255935668945, "step": 3880 }, { "epoch": 0.5246810308407267, "grad_norm": 0.31781211495399475, "learning_rate": 1.5509811716451955e-05, "loss": 0.053493499755859375, "step": 3881 }, { "epoch": 0.5248162230671737, "grad_norm": 0.23139196634292603, "learning_rate": 1.550297118947879e-05, "loss": 0.059580087661743164, "step": 3882 }, { "epoch": 0.5249514152936207, "grad_norm": 0.2394331991672516, "learning_rate": 1.5496130557784343e-05, "loss": 0.04545712471008301, "step": 3883 }, { "epoch": 0.5250866075200676, "grad_norm": 0.2936137616634369, "learning_rate": 1.5489289822792868e-05, "loss": 0.06028890609741211, "step": 3884 }, { "epoch": 0.5252217997465146, "grad_norm": 0.6374660134315491, "learning_rate": 1.5482448985928645e-05, "loss": 0.09602594375610352, "step": 3885 }, { "epoch": 0.5253569919729616, "grad_norm": 0.20281396806240082, "learning_rate": 1.5475608048615964e-05, "loss": 0.05556201934814453, "step": 3886 }, { "epoch": 0.5254921841994086, "grad_norm": 0.16149945557117462, "learning_rate": 1.546876701227916e-05, "loss": 0.04952716827392578, "step": 3887 }, { "epoch": 0.5256273764258556, "grad_norm": 0.3811986446380615, "learning_rate": 1.5461925878342558e-05, "loss": 0.07009601593017578, "step": 3888 }, { "epoch": 0.5257625686523025, "grad_norm": 0.26125630736351013, "learning_rate": 1.5455084648230527e-05, "loss": 0.06839418411254883, "step": 3889 }, { "epoch": 0.5258977608787495, "grad_norm": 0.24328996241092682, "learning_rate": 1.5448243323367438e-05, "loss": 0.06770992279052734, "step": 3890 }, { "epoch": 0.5260329531051965, "grad_norm": 0.37833523750305176, "learning_rate": 1.544140190517771e-05, "loss": 0.07322216033935547, "step": 3891 }, { "epoch": 0.5261681453316435, "grad_norm": 0.2731761634349823, "learning_rate": 1.5434560395085745e-05, "loss": 0.06831550598144531, "step": 3892 }, { "epoch": 0.5263033375580904, "grad_norm": 0.29786181449890137, "learning_rate": 1.542771879451599e-05, "loss": 0.05793619155883789, "step": 3893 }, { "epoch": 0.5264385297845374, "grad_norm": 0.3301241993904114, "learning_rate": 1.54208771048929e-05, "loss": 0.055074214935302734, "step": 3894 }, { "epoch": 0.5265737220109844, "grad_norm": 0.31042030453681946, "learning_rate": 1.5414035327640958e-05, "loss": 0.07587242126464844, "step": 3895 }, { "epoch": 0.5267089142374314, "grad_norm": 0.24964457750320435, "learning_rate": 1.5407193464184644e-05, "loss": 0.047933101654052734, "step": 3896 }, { "epoch": 0.5268441064638784, "grad_norm": 0.5645866990089417, "learning_rate": 1.5400351515948485e-05, "loss": 0.07502710819244385, "step": 3897 }, { "epoch": 0.5269792986903253, "grad_norm": 0.5430082082748413, "learning_rate": 1.5393509484357006e-05, "loss": 0.09406805038452148, "step": 3898 }, { "epoch": 0.5271144909167723, "grad_norm": 0.3937346935272217, "learning_rate": 1.538666737083475e-05, "loss": 0.07242584228515625, "step": 3899 }, { "epoch": 0.5272496831432193, "grad_norm": 0.1799725741147995, "learning_rate": 1.537982517680629e-05, "loss": 0.048305511474609375, "step": 3900 }, { "epoch": 0.5273848753696663, "grad_norm": 0.45096421241760254, "learning_rate": 1.5372982903696196e-05, "loss": 0.08168745040893555, "step": 3901 }, { "epoch": 0.5275200675961133, "grad_norm": 0.2164062261581421, "learning_rate": 1.536614055292908e-05, "loss": 0.05948066711425781, "step": 3902 }, { "epoch": 0.5276552598225602, "grad_norm": 0.3785545229911804, "learning_rate": 1.535929812592955e-05, "loss": 0.0536121129989624, "step": 3903 }, { "epoch": 0.5277904520490072, "grad_norm": 0.45809656381607056, "learning_rate": 1.5352455624122227e-05, "loss": 0.06551361083984375, "step": 3904 }, { "epoch": 0.5279256442754542, "grad_norm": 0.35153132677078247, "learning_rate": 1.5345613048931765e-05, "loss": 0.09002065658569336, "step": 3905 }, { "epoch": 0.5280608365019012, "grad_norm": 0.6052442193031311, "learning_rate": 1.5338770401782822e-05, "loss": 0.09721851348876953, "step": 3906 }, { "epoch": 0.5281960287283481, "grad_norm": 0.24167391657829285, "learning_rate": 1.5331927684100077e-05, "loss": 0.05780363082885742, "step": 3907 }, { "epoch": 0.5283312209547951, "grad_norm": 0.15925996005535126, "learning_rate": 1.5325084897308218e-05, "loss": 0.04418158531188965, "step": 3908 }, { "epoch": 0.5284664131812421, "grad_norm": 0.32850953936576843, "learning_rate": 1.5318242042831952e-05, "loss": 0.06540441513061523, "step": 3909 }, { "epoch": 0.5286016054076891, "grad_norm": 0.28783658146858215, "learning_rate": 1.5311399122095992e-05, "loss": 0.060230255126953125, "step": 3910 }, { "epoch": 0.5287367976341361, "grad_norm": 0.3889015018939972, "learning_rate": 1.5304556136525074e-05, "loss": 0.10230636596679688, "step": 3911 }, { "epoch": 0.528871989860583, "grad_norm": 0.3231453597545624, "learning_rate": 1.5297713087543948e-05, "loss": 0.06882619857788086, "step": 3912 }, { "epoch": 0.52900718208703, "grad_norm": 0.2304292768239975, "learning_rate": 1.5290869976577365e-05, "loss": 0.052089691162109375, "step": 3913 }, { "epoch": 0.529142374313477, "grad_norm": 0.24516160786151886, "learning_rate": 1.5284026805050107e-05, "loss": 0.06309914588928223, "step": 3914 }, { "epoch": 0.529277566539924, "grad_norm": 0.37919315695762634, "learning_rate": 1.5277183574386947e-05, "loss": 0.07090353965759277, "step": 3915 }, { "epoch": 0.529412758766371, "grad_norm": 0.5468193292617798, "learning_rate": 1.5270340286012694e-05, "loss": 0.0863804817199707, "step": 3916 }, { "epoch": 0.5295479509928179, "grad_norm": 0.3777635097503662, "learning_rate": 1.526349694135215e-05, "loss": 0.09398460388183594, "step": 3917 }, { "epoch": 0.5296831432192649, "grad_norm": 0.29108086228370667, "learning_rate": 1.525665354183014e-05, "loss": 0.06123161315917969, "step": 3918 }, { "epoch": 0.5298183354457119, "grad_norm": 0.3264821469783783, "learning_rate": 1.5249810088871493e-05, "loss": 0.0648965835571289, "step": 3919 }, { "epoch": 0.5299535276721589, "grad_norm": 0.2696027159690857, "learning_rate": 1.5242966583901052e-05, "loss": 0.07930850982666016, "step": 3920 }, { "epoch": 0.5300887198986058, "grad_norm": 0.27279606461524963, "learning_rate": 1.523612302834367e-05, "loss": 0.07864618301391602, "step": 3921 }, { "epoch": 0.5302239121250528, "grad_norm": 0.2681496739387512, "learning_rate": 1.5229279423624217e-05, "loss": 0.04422426223754883, "step": 3922 }, { "epoch": 0.5303591043514998, "grad_norm": 0.19210343062877655, "learning_rate": 1.5222435771167566e-05, "loss": 0.0578460693359375, "step": 3923 }, { "epoch": 0.5304942965779468, "grad_norm": 0.23293140530586243, "learning_rate": 1.5215592072398602e-05, "loss": 0.06531012058258057, "step": 3924 }, { "epoch": 0.5306294888043938, "grad_norm": 0.3658544719219208, "learning_rate": 1.520874832874222e-05, "loss": 0.07075285911560059, "step": 3925 }, { "epoch": 0.5307646810308407, "grad_norm": 0.48965251445770264, "learning_rate": 1.5201904541623318e-05, "loss": 0.06623554229736328, "step": 3926 }, { "epoch": 0.5308998732572877, "grad_norm": 0.19527631998062134, "learning_rate": 1.5195060712466817e-05, "loss": 0.041087985038757324, "step": 3927 }, { "epoch": 0.5310350654837347, "grad_norm": 0.36395376920700073, "learning_rate": 1.5188216842697635e-05, "loss": 0.07523655891418457, "step": 3928 }, { "epoch": 0.5311702577101817, "grad_norm": 0.372333824634552, "learning_rate": 1.5181372933740703e-05, "loss": 0.06962180137634277, "step": 3929 }, { "epoch": 0.5313054499366286, "grad_norm": 0.17896510660648346, "learning_rate": 1.5174528987020958e-05, "loss": 0.04320979118347168, "step": 3930 }, { "epoch": 0.5314406421630756, "grad_norm": 0.2079179286956787, "learning_rate": 1.5167685003963345e-05, "loss": 0.05081343650817871, "step": 3931 }, { "epoch": 0.5315758343895226, "grad_norm": 0.2520752251148224, "learning_rate": 1.5160840985992824e-05, "loss": 0.0585322380065918, "step": 3932 }, { "epoch": 0.5317110266159696, "grad_norm": 0.2267618179321289, "learning_rate": 1.515399693453435e-05, "loss": 0.055820465087890625, "step": 3933 }, { "epoch": 0.5318462188424166, "grad_norm": 0.44096824526786804, "learning_rate": 1.5147152851012894e-05, "loss": 0.07697486877441406, "step": 3934 }, { "epoch": 0.5319814110688635, "grad_norm": 0.3692184388637543, "learning_rate": 1.514030873685343e-05, "loss": 0.08869409561157227, "step": 3935 }, { "epoch": 0.5321166032953105, "grad_norm": 0.37288644909858704, "learning_rate": 1.513346459348094e-05, "loss": 0.06119251251220703, "step": 3936 }, { "epoch": 0.5322517955217575, "grad_norm": 0.2925654351711273, "learning_rate": 1.5126620422320405e-05, "loss": 0.0448918342590332, "step": 3937 }, { "epoch": 0.5323869877482045, "grad_norm": 0.22279894351959229, "learning_rate": 1.5119776224796823e-05, "loss": 0.04670119285583496, "step": 3938 }, { "epoch": 0.5325221799746515, "grad_norm": 0.49064162373542786, "learning_rate": 1.5112932002335195e-05, "loss": 0.09360694885253906, "step": 3939 }, { "epoch": 0.5326573722010984, "grad_norm": 0.4237077832221985, "learning_rate": 1.5106087756360524e-05, "loss": 0.07799720764160156, "step": 3940 }, { "epoch": 0.5327925644275454, "grad_norm": 0.2859136760234833, "learning_rate": 1.5099243488297816e-05, "loss": 0.0719451904296875, "step": 3941 }, { "epoch": 0.5329277566539924, "grad_norm": 0.3216422200202942, "learning_rate": 1.5092399199572083e-05, "loss": 0.0841364860534668, "step": 3942 }, { "epoch": 0.5330629488804394, "grad_norm": 0.19512711465358734, "learning_rate": 1.5085554891608343e-05, "loss": 0.04526257514953613, "step": 3943 }, { "epoch": 0.5331981411068863, "grad_norm": 0.25813478231430054, "learning_rate": 1.5078710565831616e-05, "loss": 0.05426192283630371, "step": 3944 }, { "epoch": 0.5333333333333333, "grad_norm": 0.3508923351764679, "learning_rate": 1.5071866223666935e-05, "loss": 0.058115243911743164, "step": 3945 }, { "epoch": 0.5334685255597803, "grad_norm": 0.37193968892097473, "learning_rate": 1.5065021866539323e-05, "loss": 0.07733488082885742, "step": 3946 }, { "epoch": 0.5336037177862273, "grad_norm": 0.3799670934677124, "learning_rate": 1.5058177495873805e-05, "loss": 0.0815572738647461, "step": 3947 }, { "epoch": 0.5337389100126743, "grad_norm": 0.22421643137931824, "learning_rate": 1.5051333113095429e-05, "loss": 0.06677722930908203, "step": 3948 }, { "epoch": 0.5338741022391212, "grad_norm": 0.18863984942436218, "learning_rate": 1.5044488719629218e-05, "loss": 0.04372763633728027, "step": 3949 }, { "epoch": 0.5340092944655682, "grad_norm": 0.1745264083147049, "learning_rate": 1.5037644316900227e-05, "loss": 0.04700195789337158, "step": 3950 }, { "epoch": 0.5341444866920152, "grad_norm": 0.42912304401397705, "learning_rate": 1.5030799906333484e-05, "loss": 0.09153890609741211, "step": 3951 }, { "epoch": 0.5342796789184622, "grad_norm": 0.2364966869354248, "learning_rate": 1.5023955489354031e-05, "loss": 0.0482325553894043, "step": 3952 }, { "epoch": 0.5344148711449092, "grad_norm": 0.29457613825798035, "learning_rate": 1.5017111067386927e-05, "loss": 0.08064079284667969, "step": 3953 }, { "epoch": 0.5345500633713561, "grad_norm": 0.2295532077550888, "learning_rate": 1.50102666418572e-05, "loss": 0.0716547966003418, "step": 3954 }, { "epoch": 0.5346852555978031, "grad_norm": 0.2165478616952896, "learning_rate": 1.500342221418991e-05, "loss": 0.06681489944458008, "step": 3955 }, { "epoch": 0.5348204478242501, "grad_norm": 0.307179719209671, "learning_rate": 1.4996577785810094e-05, "loss": 0.07864665985107422, "step": 3956 }, { "epoch": 0.5349556400506971, "grad_norm": 0.25438976287841797, "learning_rate": 1.4989733358142798e-05, "loss": 0.06771159172058105, "step": 3957 }, { "epoch": 0.535090832277144, "grad_norm": 0.18071763217449188, "learning_rate": 1.498288893261308e-05, "loss": 0.05147981643676758, "step": 3958 }, { "epoch": 0.535226024503591, "grad_norm": 0.21865326166152954, "learning_rate": 1.497604451064597e-05, "loss": 0.06647729873657227, "step": 3959 }, { "epoch": 0.535361216730038, "grad_norm": 0.2598375976085663, "learning_rate": 1.496920009366652e-05, "loss": 0.05479311943054199, "step": 3960 }, { "epoch": 0.535496408956485, "grad_norm": 0.31111493706703186, "learning_rate": 1.4962355683099777e-05, "loss": 0.08464837074279785, "step": 3961 }, { "epoch": 0.535631601182932, "grad_norm": 0.19168411195278168, "learning_rate": 1.4955511280370782e-05, "loss": 0.050550222396850586, "step": 3962 }, { "epoch": 0.5357667934093789, "grad_norm": 0.3476887345314026, "learning_rate": 1.4948666886904579e-05, "loss": 0.07009601593017578, "step": 3963 }, { "epoch": 0.5359019856358259, "grad_norm": 0.32077741622924805, "learning_rate": 1.4941822504126199e-05, "loss": 0.08756351470947266, "step": 3964 }, { "epoch": 0.5360371778622729, "grad_norm": 0.22124578058719635, "learning_rate": 1.4934978133460681e-05, "loss": 0.05996525287628174, "step": 3965 }, { "epoch": 0.5361723700887199, "grad_norm": 0.2110230028629303, "learning_rate": 1.4928133776333068e-05, "loss": 0.06747722625732422, "step": 3966 }, { "epoch": 0.5363075623151669, "grad_norm": 0.29011982679367065, "learning_rate": 1.4921289434168386e-05, "loss": 0.05568695068359375, "step": 3967 }, { "epoch": 0.5364427545416138, "grad_norm": 0.36574438214302063, "learning_rate": 1.4914445108391663e-05, "loss": 0.06701183319091797, "step": 3968 }, { "epoch": 0.5365779467680608, "grad_norm": 0.2853558659553528, "learning_rate": 1.4907600800427922e-05, "loss": 0.06931304931640625, "step": 3969 }, { "epoch": 0.5367131389945078, "grad_norm": 0.746479332447052, "learning_rate": 1.4900756511702188e-05, "loss": 0.12049102783203125, "step": 3970 }, { "epoch": 0.5368483312209548, "grad_norm": 0.20287103950977325, "learning_rate": 1.4893912243639479e-05, "loss": 0.06114983558654785, "step": 3971 }, { "epoch": 0.5369835234474017, "grad_norm": 0.38798338174819946, "learning_rate": 1.4887067997664807e-05, "loss": 0.0707712173461914, "step": 3972 }, { "epoch": 0.5371187156738487, "grad_norm": 0.340192973613739, "learning_rate": 1.488022377520318e-05, "loss": 0.09295082092285156, "step": 3973 }, { "epoch": 0.5372539079002957, "grad_norm": 0.3362419903278351, "learning_rate": 1.4873379577679599e-05, "loss": 0.06299209594726562, "step": 3974 }, { "epoch": 0.5373891001267427, "grad_norm": 0.44043293595314026, "learning_rate": 1.4866535406519063e-05, "loss": 0.06498146057128906, "step": 3975 }, { "epoch": 0.5375242923531897, "grad_norm": 0.12804383039474487, "learning_rate": 1.4859691263146574e-05, "loss": 0.03209352493286133, "step": 3976 }, { "epoch": 0.5376594845796366, "grad_norm": 0.3819577395915985, "learning_rate": 1.485284714898711e-05, "loss": 0.06276702880859375, "step": 3977 }, { "epoch": 0.5377946768060836, "grad_norm": 0.24549607932567596, "learning_rate": 1.4846003065465653e-05, "loss": 0.055055320262908936, "step": 3978 }, { "epoch": 0.5379298690325306, "grad_norm": 0.6252531409263611, "learning_rate": 1.4839159014007179e-05, "loss": 0.07657742500305176, "step": 3979 }, { "epoch": 0.5380650612589776, "grad_norm": 0.2426566630601883, "learning_rate": 1.4832314996036653e-05, "loss": 0.05623602867126465, "step": 3980 }, { "epoch": 0.5382002534854246, "grad_norm": 0.3851320743560791, "learning_rate": 1.4825471012979048e-05, "loss": 0.07387828826904297, "step": 3981 }, { "epoch": 0.5383354457118715, "grad_norm": 0.34674301743507385, "learning_rate": 1.4818627066259301e-05, "loss": 0.07103347778320312, "step": 3982 }, { "epoch": 0.5384706379383185, "grad_norm": 0.22041839361190796, "learning_rate": 1.481178315730237e-05, "loss": 0.06679677963256836, "step": 3983 }, { "epoch": 0.5386058301647655, "grad_norm": 0.19270789623260498, "learning_rate": 1.4804939287533184e-05, "loss": 0.07610785961151123, "step": 3984 }, { "epoch": 0.5387410223912125, "grad_norm": 0.3698720932006836, "learning_rate": 1.4798095458376682e-05, "loss": 0.08079910278320312, "step": 3985 }, { "epoch": 0.5388762146176594, "grad_norm": 0.40295422077178955, "learning_rate": 1.4791251671257788e-05, "loss": 0.07804924249649048, "step": 3986 }, { "epoch": 0.5390114068441064, "grad_norm": 0.273698091506958, "learning_rate": 1.4784407927601401e-05, "loss": 0.07843509316444397, "step": 3987 }, { "epoch": 0.5391465990705534, "grad_norm": 0.5676789879798889, "learning_rate": 1.4777564228832436e-05, "loss": 0.0808095932006836, "step": 3988 }, { "epoch": 0.5392817912970004, "grad_norm": 0.2204323559999466, "learning_rate": 1.4770720576375782e-05, "loss": 0.045033931732177734, "step": 3989 }, { "epoch": 0.5394169835234474, "grad_norm": 0.2715097665786743, "learning_rate": 1.4763876971656334e-05, "loss": 0.06560802459716797, "step": 3990 }, { "epoch": 0.5395521757498943, "grad_norm": 0.6461181640625, "learning_rate": 1.4757033416098953e-05, "loss": 0.09101676940917969, "step": 3991 }, { "epoch": 0.5396873679763413, "grad_norm": 0.48632514476776123, "learning_rate": 1.4750189911128511e-05, "loss": 0.08342933654785156, "step": 3992 }, { "epoch": 0.5398225602027883, "grad_norm": 0.19051408767700195, "learning_rate": 1.4743346458169863e-05, "loss": 0.033000946044921875, "step": 3993 }, { "epoch": 0.5399577524292353, "grad_norm": 0.5683019757270813, "learning_rate": 1.473650305864785e-05, "loss": 0.09323787689208984, "step": 3994 }, { "epoch": 0.5400929446556822, "grad_norm": 0.4190486967563629, "learning_rate": 1.472965971398731e-05, "loss": 0.06127762794494629, "step": 3995 }, { "epoch": 0.5402281368821292, "grad_norm": 0.555079996585846, "learning_rate": 1.4722816425613054e-05, "loss": 0.07727217674255371, "step": 3996 }, { "epoch": 0.5403633291085762, "grad_norm": 0.9168782234191895, "learning_rate": 1.4715973194949895e-05, "loss": 0.07502365112304688, "step": 3997 }, { "epoch": 0.5404985213350232, "grad_norm": 0.27501973509788513, "learning_rate": 1.4709130023422636e-05, "loss": 0.07636690139770508, "step": 3998 }, { "epoch": 0.5406337135614702, "grad_norm": 0.3995406925678253, "learning_rate": 1.4702286912456052e-05, "loss": 0.07165002822875977, "step": 3999 }, { "epoch": 0.5407689057879171, "grad_norm": 1.017317771911621, "learning_rate": 1.4695443863474928e-05, "loss": 0.12094640731811523, "step": 4000 }, { "epoch": 0.5409040980143641, "grad_norm": 0.23867133259773254, "learning_rate": 1.4688600877904012e-05, "loss": 0.057695865631103516, "step": 4001 }, { "epoch": 0.5410392902408111, "grad_norm": 1.027727723121643, "learning_rate": 1.468175795716805e-05, "loss": 0.07674694061279297, "step": 4002 }, { "epoch": 0.5411744824672581, "grad_norm": 0.28664782643318176, "learning_rate": 1.4674915102691783e-05, "loss": 0.06580352783203125, "step": 4003 }, { "epoch": 0.5413096746937052, "grad_norm": 0.20941662788391113, "learning_rate": 1.4668072315899926e-05, "loss": 0.04004549980163574, "step": 4004 }, { "epoch": 0.5414448669201521, "grad_norm": 0.266179621219635, "learning_rate": 1.466122959821718e-05, "loss": 0.05678606033325195, "step": 4005 }, { "epoch": 0.5415800591465991, "grad_norm": 0.3486866056919098, "learning_rate": 1.4654386951068239e-05, "loss": 0.08021783828735352, "step": 4006 }, { "epoch": 0.5417152513730461, "grad_norm": 0.18772508203983307, "learning_rate": 1.4647544375877776e-05, "loss": 0.04320788383483887, "step": 4007 }, { "epoch": 0.5418504435994931, "grad_norm": 0.2924988865852356, "learning_rate": 1.4640701874070457e-05, "loss": 0.07803106307983398, "step": 4008 }, { "epoch": 0.5419856358259401, "grad_norm": 0.2681550979614258, "learning_rate": 1.4633859447070922e-05, "loss": 0.08400964736938477, "step": 4009 }, { "epoch": 0.542120828052387, "grad_norm": 0.4184032678604126, "learning_rate": 1.4627017096303805e-05, "loss": 0.07605552673339844, "step": 4010 }, { "epoch": 0.542256020278834, "grad_norm": 0.6821161508560181, "learning_rate": 1.4620174823193711e-05, "loss": 0.0885004997253418, "step": 4011 }, { "epoch": 0.542391212505281, "grad_norm": 0.8827657699584961, "learning_rate": 1.4613332629165249e-05, "loss": 0.06777095794677734, "step": 4012 }, { "epoch": 0.542526404731728, "grad_norm": 0.32371920347213745, "learning_rate": 1.4606490515642998e-05, "loss": 0.05782461166381836, "step": 4013 }, { "epoch": 0.542661596958175, "grad_norm": 0.5198997855186462, "learning_rate": 1.4599648484051516e-05, "loss": 0.10658121109008789, "step": 4014 }, { "epoch": 0.5427967891846219, "grad_norm": 0.4386010468006134, "learning_rate": 1.4592806535815358e-05, "loss": 0.07213687896728516, "step": 4015 }, { "epoch": 0.5429319814110689, "grad_norm": 0.3345072865486145, "learning_rate": 1.4585964672359045e-05, "loss": 0.0564727783203125, "step": 4016 }, { "epoch": 0.5430671736375159, "grad_norm": 0.36038216948509216, "learning_rate": 1.4579122895107098e-05, "loss": 0.07569026947021484, "step": 4017 }, { "epoch": 0.5432023658639629, "grad_norm": 0.3713933527469635, "learning_rate": 1.4572281205484012e-05, "loss": 0.07886934280395508, "step": 4018 }, { "epoch": 0.5433375580904098, "grad_norm": 0.7414966225624084, "learning_rate": 1.4565439604914256e-05, "loss": 0.08749055862426758, "step": 4019 }, { "epoch": 0.5434727503168568, "grad_norm": 0.6894304752349854, "learning_rate": 1.4558598094822294e-05, "loss": 0.07340073585510254, "step": 4020 }, { "epoch": 0.5436079425433038, "grad_norm": 0.3799217939376831, "learning_rate": 1.455175667663256e-05, "loss": 0.04095315933227539, "step": 4021 }, { "epoch": 0.5437431347697508, "grad_norm": 0.2783501446247101, "learning_rate": 1.4544915351769476e-05, "loss": 0.04219430685043335, "step": 4022 }, { "epoch": 0.5438783269961978, "grad_norm": 0.3897891342639923, "learning_rate": 1.4538074121657448e-05, "loss": 0.04985332489013672, "step": 4023 }, { "epoch": 0.5440135192226447, "grad_norm": 0.3952426612377167, "learning_rate": 1.4531232987720846e-05, "loss": 0.0752558708190918, "step": 4024 }, { "epoch": 0.5441487114490917, "grad_norm": 0.4929008185863495, "learning_rate": 1.4524391951384037e-05, "loss": 0.07437896728515625, "step": 4025 }, { "epoch": 0.5442839036755387, "grad_norm": 2.2381513118743896, "learning_rate": 1.4517551014071358e-05, "loss": 0.10858821868896484, "step": 4026 }, { "epoch": 0.5444190959019857, "grad_norm": 0.3457643985748291, "learning_rate": 1.4510710177207137e-05, "loss": 0.06877613067626953, "step": 4027 }, { "epoch": 0.5445542881284327, "grad_norm": 0.3232475817203522, "learning_rate": 1.450386944221566e-05, "loss": 0.05397367477416992, "step": 4028 }, { "epoch": 0.5446894803548796, "grad_norm": 0.4774819612503052, "learning_rate": 1.449702881052121e-05, "loss": 0.07984447479248047, "step": 4029 }, { "epoch": 0.5448246725813266, "grad_norm": 0.31248894333839417, "learning_rate": 1.4490188283548048e-05, "loss": 0.06699800491333008, "step": 4030 }, { "epoch": 0.5449598648077736, "grad_norm": 0.6607853770256042, "learning_rate": 1.44833478627204e-05, "loss": 0.07941150665283203, "step": 4031 }, { "epoch": 0.5450950570342206, "grad_norm": 0.5920162200927734, "learning_rate": 1.447650754946249e-05, "loss": 0.07666754722595215, "step": 4032 }, { "epoch": 0.5452302492606675, "grad_norm": 0.3527291417121887, "learning_rate": 1.4469667345198492e-05, "loss": 0.08711528778076172, "step": 4033 }, { "epoch": 0.5453654414871145, "grad_norm": 0.47891005873680115, "learning_rate": 1.446282725135258e-05, "loss": 0.08446550369262695, "step": 4034 }, { "epoch": 0.5455006337135615, "grad_norm": 0.6416643261909485, "learning_rate": 1.4455987269348904e-05, "loss": 0.06750345230102539, "step": 4035 }, { "epoch": 0.5456358259400085, "grad_norm": 0.37330344319343567, "learning_rate": 1.4449147400611578e-05, "loss": 0.07050609588623047, "step": 4036 }, { "epoch": 0.5457710181664555, "grad_norm": 0.4069422781467438, "learning_rate": 1.4442307646564702e-05, "loss": 0.07738596200942993, "step": 4037 }, { "epoch": 0.5459062103929024, "grad_norm": 0.29950210452079773, "learning_rate": 1.4435468008632345e-05, "loss": 0.06700122356414795, "step": 4038 }, { "epoch": 0.5460414026193494, "grad_norm": 0.35566675662994385, "learning_rate": 1.4428628488238557e-05, "loss": 0.08124065399169922, "step": 4039 }, { "epoch": 0.5461765948457964, "grad_norm": 0.3127792775630951, "learning_rate": 1.442178908680737e-05, "loss": 0.08421540260314941, "step": 4040 }, { "epoch": 0.5463117870722434, "grad_norm": 0.38258448243141174, "learning_rate": 1.4414949805762779e-05, "loss": 0.06313610076904297, "step": 4041 }, { "epoch": 0.5464469792986903, "grad_norm": 0.181279718875885, "learning_rate": 1.4408110646528757e-05, "loss": 0.04441499710083008, "step": 4042 }, { "epoch": 0.5465821715251373, "grad_norm": 0.3570832312107086, "learning_rate": 1.440127161052925e-05, "loss": 0.06917190551757812, "step": 4043 }, { "epoch": 0.5467173637515843, "grad_norm": 0.3481096625328064, "learning_rate": 1.4394432699188188e-05, "loss": 0.07164525985717773, "step": 4044 }, { "epoch": 0.5468525559780313, "grad_norm": 0.13638432323932648, "learning_rate": 1.4387593913929472e-05, "loss": 0.033329010009765625, "step": 4045 }, { "epoch": 0.5469877482044783, "grad_norm": 0.4036140441894531, "learning_rate": 1.4380755256176968e-05, "loss": 0.06957507133483887, "step": 4046 }, { "epoch": 0.5471229404309252, "grad_norm": 0.18465276062488556, "learning_rate": 1.437391672735452e-05, "loss": 0.04264044761657715, "step": 4047 }, { "epoch": 0.5472581326573722, "grad_norm": 0.32538607716560364, "learning_rate": 1.4367078328885946e-05, "loss": 0.06711673736572266, "step": 4048 }, { "epoch": 0.5473933248838192, "grad_norm": 0.4988720118999481, "learning_rate": 1.4360240062195039e-05, "loss": 0.05175590515136719, "step": 4049 }, { "epoch": 0.5475285171102662, "grad_norm": 0.1915602684020996, "learning_rate": 1.435340192870557e-05, "loss": 0.061510562896728516, "step": 4050 }, { "epoch": 0.5476637093367132, "grad_norm": 0.22108495235443115, "learning_rate": 1.434656392984126e-05, "loss": 0.053974151611328125, "step": 4051 }, { "epoch": 0.5477989015631601, "grad_norm": 0.4570392370223999, "learning_rate": 1.4339726067025828e-05, "loss": 0.0910940170288086, "step": 4052 }, { "epoch": 0.5479340937896071, "grad_norm": 0.43323811888694763, "learning_rate": 1.4332888341682947e-05, "loss": 0.06746768951416016, "step": 4053 }, { "epoch": 0.5480692860160541, "grad_norm": 0.5113841891288757, "learning_rate": 1.432605075523627e-05, "loss": 0.09701395034790039, "step": 4054 }, { "epoch": 0.5482044782425011, "grad_norm": 0.3196141719818115, "learning_rate": 1.4319213309109426e-05, "loss": 0.057730674743652344, "step": 4055 }, { "epoch": 0.548339670468948, "grad_norm": 0.27028515934944153, "learning_rate": 1.4312376004725996e-05, "loss": 0.07694220542907715, "step": 4056 }, { "epoch": 0.548474862695395, "grad_norm": 0.33444029092788696, "learning_rate": 1.430553884350955e-05, "loss": 0.058648109436035156, "step": 4057 }, { "epoch": 0.548610054921842, "grad_norm": 0.410898894071579, "learning_rate": 1.429870182688362e-05, "loss": 0.0593876838684082, "step": 4058 }, { "epoch": 0.548745247148289, "grad_norm": 0.19892758131027222, "learning_rate": 1.4291864956271713e-05, "loss": 0.04279899597167969, "step": 4059 }, { "epoch": 0.548880439374736, "grad_norm": 0.2936065196990967, "learning_rate": 1.4285028233097293e-05, "loss": 0.07022547721862793, "step": 4060 }, { "epoch": 0.5490156316011829, "grad_norm": 1.709394097328186, "learning_rate": 1.4278191658783809e-05, "loss": 0.07623481750488281, "step": 4061 }, { "epoch": 0.5491508238276299, "grad_norm": 0.38031327724456787, "learning_rate": 1.427135523475467e-05, "loss": 0.08426761627197266, "step": 4062 }, { "epoch": 0.5492860160540769, "grad_norm": 0.3300657272338867, "learning_rate": 1.4264518962433258e-05, "loss": 0.08411979675292969, "step": 4063 }, { "epoch": 0.5494212082805239, "grad_norm": 0.27288737893104553, "learning_rate": 1.4257682843242925e-05, "loss": 0.047863006591796875, "step": 4064 }, { "epoch": 0.5495564005069709, "grad_norm": 0.3293284773826599, "learning_rate": 1.4250846878606974e-05, "loss": 0.06148529052734375, "step": 4065 }, { "epoch": 0.5496915927334178, "grad_norm": 0.28542405366897583, "learning_rate": 1.4244011069948702e-05, "loss": 0.056427955627441406, "step": 4066 }, { "epoch": 0.5498267849598648, "grad_norm": 0.34654128551483154, "learning_rate": 1.4237175418691357e-05, "loss": 0.05337119102478027, "step": 4067 }, { "epoch": 0.5499619771863118, "grad_norm": 0.309227854013443, "learning_rate": 1.4230339926258153e-05, "loss": 0.05243372917175293, "step": 4068 }, { "epoch": 0.5500971694127588, "grad_norm": 0.36314624547958374, "learning_rate": 1.422350459407229e-05, "loss": 0.07169866561889648, "step": 4069 }, { "epoch": 0.5502323616392057, "grad_norm": 0.44304680824279785, "learning_rate": 1.4216669423556903e-05, "loss": 0.08015775680541992, "step": 4070 }, { "epoch": 0.5503675538656527, "grad_norm": 0.20376355946063995, "learning_rate": 1.420983441613512e-05, "loss": 0.033985912799835205, "step": 4071 }, { "epoch": 0.5505027460920997, "grad_norm": 0.2869951128959656, "learning_rate": 1.420299957323003e-05, "loss": 0.043447256088256836, "step": 4072 }, { "epoch": 0.5506379383185467, "grad_norm": 0.7137879729270935, "learning_rate": 1.4196164896264679e-05, "loss": 0.10390281677246094, "step": 4073 }, { "epoch": 0.5507731305449937, "grad_norm": 0.25919097661972046, "learning_rate": 1.418933038666208e-05, "loss": 0.057084083557128906, "step": 4074 }, { "epoch": 0.5509083227714406, "grad_norm": 0.24649231135845184, "learning_rate": 1.4182496045845217e-05, "loss": 0.06522965431213379, "step": 4075 }, { "epoch": 0.5510435149978876, "grad_norm": 0.28304094076156616, "learning_rate": 1.4175661875237036e-05, "loss": 0.05105435848236084, "step": 4076 }, { "epoch": 0.5511787072243346, "grad_norm": 0.18977323174476624, "learning_rate": 1.416882787626045e-05, "loss": 0.036368727684020996, "step": 4077 }, { "epoch": 0.5513138994507816, "grad_norm": 0.20677489042282104, "learning_rate": 1.4161994050338334e-05, "loss": 0.04225122928619385, "step": 4078 }, { "epoch": 0.5514490916772286, "grad_norm": 0.24968455731868744, "learning_rate": 1.4155160398893528e-05, "loss": 0.05938720703125, "step": 4079 }, { "epoch": 0.5515842839036755, "grad_norm": 0.620607852935791, "learning_rate": 1.4148326923348824e-05, "loss": 0.10203838348388672, "step": 4080 }, { "epoch": 0.5517194761301225, "grad_norm": 0.1507611721754074, "learning_rate": 1.4141493625127e-05, "loss": 0.03885054588317871, "step": 4081 }, { "epoch": 0.5518546683565695, "grad_norm": 0.46643996238708496, "learning_rate": 1.4134660505650786e-05, "loss": 0.05947399139404297, "step": 4082 }, { "epoch": 0.5519898605830165, "grad_norm": 0.3163384795188904, "learning_rate": 1.4127827566342864e-05, "loss": 0.04616665840148926, "step": 4083 }, { "epoch": 0.5521250528094634, "grad_norm": 0.2818586826324463, "learning_rate": 1.4120994808625896e-05, "loss": 0.05243968963623047, "step": 4084 }, { "epoch": 0.5522602450359104, "grad_norm": 0.3116128742694855, "learning_rate": 1.4114162233922494e-05, "loss": 0.062319278717041016, "step": 4085 }, { "epoch": 0.5523954372623574, "grad_norm": 0.2428111433982849, "learning_rate": 1.4107329843655238e-05, "loss": 0.054234862327575684, "step": 4086 }, { "epoch": 0.5525306294888044, "grad_norm": 0.48988455533981323, "learning_rate": 1.4100497639246675e-05, "loss": 0.08711624145507812, "step": 4087 }, { "epoch": 0.5526658217152514, "grad_norm": 0.30226606130599976, "learning_rate": 1.4093665622119294e-05, "loss": 0.07283449172973633, "step": 4088 }, { "epoch": 0.5528010139416983, "grad_norm": 0.5557790994644165, "learning_rate": 1.4086833793695566e-05, "loss": 0.06707191467285156, "step": 4089 }, { "epoch": 0.5529362061681453, "grad_norm": 0.2503361701965332, "learning_rate": 1.408000215539791e-05, "loss": 0.04856252670288086, "step": 4090 }, { "epoch": 0.5530713983945923, "grad_norm": 0.45031601190567017, "learning_rate": 1.4073170708648711e-05, "loss": 0.07099366188049316, "step": 4091 }, { "epoch": 0.5532065906210393, "grad_norm": 0.6956272721290588, "learning_rate": 1.406633945487032e-05, "loss": 0.0933542251586914, "step": 4092 }, { "epoch": 0.5533417828474863, "grad_norm": 0.2610994875431061, "learning_rate": 1.4059508395485026e-05, "loss": 0.04047584533691406, "step": 4093 }, { "epoch": 0.5534769750739332, "grad_norm": 0.19905126094818115, "learning_rate": 1.4052677531915102e-05, "loss": 0.04932081699371338, "step": 4094 }, { "epoch": 0.5536121673003802, "grad_norm": 0.4185316264629364, "learning_rate": 1.4045846865582765e-05, "loss": 0.09696292877197266, "step": 4095 }, { "epoch": 0.5537473595268272, "grad_norm": 0.2882910668849945, "learning_rate": 1.4039016397910206e-05, "loss": 0.06048727035522461, "step": 4096 }, { "epoch": 0.5538825517532742, "grad_norm": 0.28646066784858704, "learning_rate": 1.403218613031955e-05, "loss": 0.051027774810791016, "step": 4097 }, { "epoch": 0.5540177439797211, "grad_norm": 0.30673086643218994, "learning_rate": 1.4025356064232903e-05, "loss": 0.0428166389465332, "step": 4098 }, { "epoch": 0.5541529362061681, "grad_norm": 0.2924477159976959, "learning_rate": 1.4018526201072324e-05, "loss": 0.06857061386108398, "step": 4099 }, { "epoch": 0.5542881284326151, "grad_norm": 0.3255417048931122, "learning_rate": 1.4011696542259821e-05, "loss": 0.0710136890411377, "step": 4100 }, { "epoch": 0.5544233206590621, "grad_norm": 0.5660695433616638, "learning_rate": 1.4004867089217376e-05, "loss": 0.06834745407104492, "step": 4101 }, { "epoch": 0.5545585128855091, "grad_norm": 0.3874152600765228, "learning_rate": 1.39980378433669e-05, "loss": 0.06563949584960938, "step": 4102 }, { "epoch": 0.554693705111956, "grad_norm": 0.397693008184433, "learning_rate": 1.399120880613029e-05, "loss": 0.0656123161315918, "step": 4103 }, { "epoch": 0.554828897338403, "grad_norm": 0.2028006762266159, "learning_rate": 1.3984379978929388e-05, "loss": 0.036795616149902344, "step": 4104 }, { "epoch": 0.55496408956485, "grad_norm": 0.45990800857543945, "learning_rate": 1.3977551363185995e-05, "loss": 0.0576939582824707, "step": 4105 }, { "epoch": 0.555099281791297, "grad_norm": 0.38087111711502075, "learning_rate": 1.3970722960321854e-05, "loss": 0.07435894012451172, "step": 4106 }, { "epoch": 0.555234474017744, "grad_norm": 0.17435480654239655, "learning_rate": 1.3963894771758682e-05, "loss": 0.031667470932006836, "step": 4107 }, { "epoch": 0.5553696662441909, "grad_norm": 0.1557110846042633, "learning_rate": 1.3957066798918143e-05, "loss": 0.040529847145080566, "step": 4108 }, { "epoch": 0.5555048584706379, "grad_norm": 0.4714154005050659, "learning_rate": 1.3950239043221861e-05, "loss": 0.05920982360839844, "step": 4109 }, { "epoch": 0.5556400506970849, "grad_norm": 0.49100977182388306, "learning_rate": 1.3943411506091408e-05, "loss": 0.06539535522460938, "step": 4110 }, { "epoch": 0.5557752429235319, "grad_norm": 0.23461778461933136, "learning_rate": 1.3936584188948313e-05, "loss": 0.039727210998535156, "step": 4111 }, { "epoch": 0.5559104351499788, "grad_norm": 0.5778148174285889, "learning_rate": 1.3929757093214059e-05, "loss": 0.08493161201477051, "step": 4112 }, { "epoch": 0.5560456273764258, "grad_norm": 0.5185837745666504, "learning_rate": 1.3922930220310085e-05, "loss": 0.07986998558044434, "step": 4113 }, { "epoch": 0.5561808196028728, "grad_norm": 0.3703401982784271, "learning_rate": 1.3916103571657786e-05, "loss": 0.0818014144897461, "step": 4114 }, { "epoch": 0.5563160118293198, "grad_norm": 0.41620615124702454, "learning_rate": 1.3909277148678504e-05, "loss": 0.08162307739257812, "step": 4115 }, { "epoch": 0.5564512040557668, "grad_norm": 0.31471601128578186, "learning_rate": 1.3902450952793536e-05, "loss": 0.06625914573669434, "step": 4116 }, { "epoch": 0.5565863962822137, "grad_norm": 0.5340596437454224, "learning_rate": 1.389562498542413e-05, "loss": 0.045081496238708496, "step": 4117 }, { "epoch": 0.5567215885086607, "grad_norm": 0.45191457867622375, "learning_rate": 1.388879924799149e-05, "loss": 0.09464693069458008, "step": 4118 }, { "epoch": 0.5568567807351077, "grad_norm": 0.2838112711906433, "learning_rate": 1.388197374191678e-05, "loss": 0.061566829681396484, "step": 4119 }, { "epoch": 0.5569919729615547, "grad_norm": 0.4281176030635834, "learning_rate": 1.387514846862109e-05, "loss": 0.07387542724609375, "step": 4120 }, { "epoch": 0.5571271651880016, "grad_norm": 0.1971270740032196, "learning_rate": 1.3868323429525492e-05, "loss": 0.051799774169921875, "step": 4121 }, { "epoch": 0.5572623574144486, "grad_norm": 0.3478848934173584, "learning_rate": 1.3861498626050986e-05, "loss": 0.05951738357543945, "step": 4122 }, { "epoch": 0.5573975496408956, "grad_norm": 0.1675407886505127, "learning_rate": 1.385467405961854e-05, "loss": 0.042083740234375, "step": 4123 }, { "epoch": 0.5575327418673426, "grad_norm": 0.3506906032562256, "learning_rate": 1.3847849731649066e-05, "loss": 0.05516231060028076, "step": 4124 }, { "epoch": 0.5576679340937896, "grad_norm": 0.31182748079299927, "learning_rate": 1.3841025643563418e-05, "loss": 0.06922054290771484, "step": 4125 }, { "epoch": 0.5578031263202365, "grad_norm": 0.30659523606300354, "learning_rate": 1.3834201796782413e-05, "loss": 0.07000207901000977, "step": 4126 }, { "epoch": 0.5579383185466835, "grad_norm": 0.39148402214050293, "learning_rate": 1.3827378192726808e-05, "loss": 0.09355020523071289, "step": 4127 }, { "epoch": 0.5580735107731305, "grad_norm": 0.22750145196914673, "learning_rate": 1.3820554832817324e-05, "loss": 0.06097602844238281, "step": 4128 }, { "epoch": 0.5582087029995775, "grad_norm": 0.26270392537117004, "learning_rate": 1.3813731718474606e-05, "loss": 0.045249104499816895, "step": 4129 }, { "epoch": 0.5583438952260245, "grad_norm": 0.280582994222641, "learning_rate": 1.380690885111927e-05, "loss": 0.07164716720581055, "step": 4130 }, { "epoch": 0.5584790874524714, "grad_norm": 0.2737008035182953, "learning_rate": 1.3800086232171877e-05, "loss": 0.06156015396118164, "step": 4131 }, { "epoch": 0.5586142796789184, "grad_norm": 0.31067678332328796, "learning_rate": 1.3793263863052926e-05, "loss": 0.053952932357788086, "step": 4132 }, { "epoch": 0.5587494719053654, "grad_norm": 0.20502115786075592, "learning_rate": 1.3786441745182881e-05, "loss": 0.05380678176879883, "step": 4133 }, { "epoch": 0.5588846641318124, "grad_norm": 0.16852745413780212, "learning_rate": 1.3779619879982127e-05, "loss": 0.046740055084228516, "step": 4134 }, { "epoch": 0.5590198563582593, "grad_norm": 0.2714245021343231, "learning_rate": 1.3772798268871025e-05, "loss": 0.05841350555419922, "step": 4135 }, { "epoch": 0.5591550485847063, "grad_norm": 0.178030863404274, "learning_rate": 1.376597691326987e-05, "loss": 0.04270339012145996, "step": 4136 }, { "epoch": 0.5592902408111533, "grad_norm": 0.49629151821136475, "learning_rate": 1.3759155814598898e-05, "loss": 0.08703851699829102, "step": 4137 }, { "epoch": 0.5594254330376003, "grad_norm": 0.399566113948822, "learning_rate": 1.3752334974278308e-05, "loss": 0.08135366439819336, "step": 4138 }, { "epoch": 0.5595606252640473, "grad_norm": 0.22032016515731812, "learning_rate": 1.3745514393728225e-05, "loss": 0.04446768760681152, "step": 4139 }, { "epoch": 0.5596958174904944, "grad_norm": 0.1380263715982437, "learning_rate": 1.3738694074368735e-05, "loss": 0.025764435529708862, "step": 4140 }, { "epoch": 0.5598310097169413, "grad_norm": 1.0428732633590698, "learning_rate": 1.3731874017619868e-05, "loss": 0.08602285385131836, "step": 4141 }, { "epoch": 0.5599662019433883, "grad_norm": 0.23632653057575226, "learning_rate": 1.3725054224901597e-05, "loss": 0.050508737564086914, "step": 4142 }, { "epoch": 0.5601013941698353, "grad_norm": 0.3460511863231659, "learning_rate": 1.3718234697633826e-05, "loss": 0.0711984634399414, "step": 4143 }, { "epoch": 0.5602365863962823, "grad_norm": 0.3127269744873047, "learning_rate": 1.3711415437236427e-05, "loss": 0.06948280334472656, "step": 4144 }, { "epoch": 0.5603717786227292, "grad_norm": 0.3011138141155243, "learning_rate": 1.3704596445129207e-05, "loss": 0.06205272674560547, "step": 4145 }, { "epoch": 0.5605069708491762, "grad_norm": 0.25340166687965393, "learning_rate": 1.369777772273192e-05, "loss": 0.04357600212097168, "step": 4146 }, { "epoch": 0.5606421630756232, "grad_norm": 0.3297371566295624, "learning_rate": 1.369095927146425e-05, "loss": 0.054175376892089844, "step": 4147 }, { "epoch": 0.5607773553020702, "grad_norm": 0.21068519353866577, "learning_rate": 1.3684141092745846e-05, "loss": 0.051644086837768555, "step": 4148 }, { "epoch": 0.5609125475285172, "grad_norm": 0.38605958223342896, "learning_rate": 1.3677323187996276e-05, "loss": 0.05482792854309082, "step": 4149 }, { "epoch": 0.5610477397549641, "grad_norm": 0.5324007868766785, "learning_rate": 1.3670505558635074e-05, "loss": 0.07591867446899414, "step": 4150 }, { "epoch": 0.5611829319814111, "grad_norm": 0.39704835414886475, "learning_rate": 1.366368820608171e-05, "loss": 0.07014083862304688, "step": 4151 }, { "epoch": 0.5613181242078581, "grad_norm": 0.2284613996744156, "learning_rate": 1.365687113175558e-05, "loss": 0.04727625846862793, "step": 4152 }, { "epoch": 0.5614533164343051, "grad_norm": 0.17523261904716492, "learning_rate": 1.3650054337076049e-05, "loss": 0.049777984619140625, "step": 4153 }, { "epoch": 0.561588508660752, "grad_norm": 0.2719191610813141, "learning_rate": 1.3643237823462398e-05, "loss": 0.06025516986846924, "step": 4154 }, { "epoch": 0.561723700887199, "grad_norm": 0.21693992614746094, "learning_rate": 1.363642159233387e-05, "loss": 0.05410635471343994, "step": 4155 }, { "epoch": 0.561858893113646, "grad_norm": 0.2796480357646942, "learning_rate": 1.3629605645109642e-05, "loss": 0.045880913734436035, "step": 4156 }, { "epoch": 0.561994085340093, "grad_norm": 0.2789880931377411, "learning_rate": 1.362278998320882e-05, "loss": 0.08246898651123047, "step": 4157 }, { "epoch": 0.56212927756654, "grad_norm": 0.4399503767490387, "learning_rate": 1.3615974608050472e-05, "loss": 0.06561732292175293, "step": 4158 }, { "epoch": 0.5622644697929869, "grad_norm": 0.6177327036857605, "learning_rate": 1.3609159521053588e-05, "loss": 0.10786080360412598, "step": 4159 }, { "epoch": 0.5623996620194339, "grad_norm": 0.49539682269096375, "learning_rate": 1.3602344723637107e-05, "loss": 0.08597803115844727, "step": 4160 }, { "epoch": 0.5625348542458809, "grad_norm": 0.17610320448875427, "learning_rate": 1.3595530217219916e-05, "loss": 0.04601788520812988, "step": 4161 }, { "epoch": 0.5626700464723279, "grad_norm": 0.19023549556732178, "learning_rate": 1.3588716003220815e-05, "loss": 0.06010150909423828, "step": 4162 }, { "epoch": 0.5628052386987749, "grad_norm": 0.45934152603149414, "learning_rate": 1.3581902083058574e-05, "loss": 0.06469964981079102, "step": 4163 }, { "epoch": 0.5629404309252218, "grad_norm": 0.15902107954025269, "learning_rate": 1.3575088458151877e-05, "loss": 0.041898250579833984, "step": 4164 }, { "epoch": 0.5630756231516688, "grad_norm": 1.0006210803985596, "learning_rate": 1.3568275129919367e-05, "loss": 0.07063019275665283, "step": 4165 }, { "epoch": 0.5632108153781158, "grad_norm": 0.4762057363986969, "learning_rate": 1.3561462099779604e-05, "loss": 0.07624053955078125, "step": 4166 }, { "epoch": 0.5633460076045628, "grad_norm": 0.35259366035461426, "learning_rate": 1.3554649369151104e-05, "loss": 0.06088662147521973, "step": 4167 }, { "epoch": 0.5634811998310097, "grad_norm": 0.33427998423576355, "learning_rate": 1.3547836939452315e-05, "loss": 0.06496810913085938, "step": 4168 }, { "epoch": 0.5636163920574567, "grad_norm": 0.22689493000507355, "learning_rate": 1.3541024812101615e-05, "loss": 0.0627431869506836, "step": 4169 }, { "epoch": 0.5637515842839037, "grad_norm": 0.33148717880249023, "learning_rate": 1.3534212988517339e-05, "loss": 0.057376861572265625, "step": 4170 }, { "epoch": 0.5638867765103507, "grad_norm": 0.44841817021369934, "learning_rate": 1.3527401470117726e-05, "loss": 0.06867027282714844, "step": 4171 }, { "epoch": 0.5640219687367977, "grad_norm": 0.14598503708839417, "learning_rate": 1.3520590258320981e-05, "loss": 0.03270459175109863, "step": 4172 }, { "epoch": 0.5641571609632446, "grad_norm": 0.1817558854818344, "learning_rate": 1.3513779354545235e-05, "loss": 0.051550865173339844, "step": 4173 }, { "epoch": 0.5642923531896916, "grad_norm": 0.2841106951236725, "learning_rate": 1.3506968760208557e-05, "loss": 0.055995941162109375, "step": 4174 }, { "epoch": 0.5644275454161386, "grad_norm": 0.40993207693099976, "learning_rate": 1.3500158476728938e-05, "loss": 0.06054532527923584, "step": 4175 }, { "epoch": 0.5645627376425856, "grad_norm": 0.17506608366966248, "learning_rate": 1.3493348505524325e-05, "loss": 0.05012226104736328, "step": 4176 }, { "epoch": 0.5646979298690326, "grad_norm": 0.437804639339447, "learning_rate": 1.3486538848012586e-05, "loss": 0.06559109687805176, "step": 4177 }, { "epoch": 0.5648331220954795, "grad_norm": 0.3530019223690033, "learning_rate": 1.3479729505611532e-05, "loss": 0.08833551406860352, "step": 4178 }, { "epoch": 0.5649683143219265, "grad_norm": 0.4161841571331024, "learning_rate": 1.3472920479738906e-05, "loss": 0.06860017776489258, "step": 4179 }, { "epoch": 0.5651035065483735, "grad_norm": 0.2763068377971649, "learning_rate": 1.346611177181237e-05, "loss": 0.056932687759399414, "step": 4180 }, { "epoch": 0.5652386987748205, "grad_norm": 0.307731956243515, "learning_rate": 1.3459303383249547e-05, "loss": 0.053565025329589844, "step": 4181 }, { "epoch": 0.5653738910012674, "grad_norm": 0.5274808406829834, "learning_rate": 1.3452495315467975e-05, "loss": 0.07222676277160645, "step": 4182 }, { "epoch": 0.5655090832277144, "grad_norm": 0.25629836320877075, "learning_rate": 1.3445687569885132e-05, "loss": 0.056728363037109375, "step": 4183 }, { "epoch": 0.5656442754541614, "grad_norm": 0.19897255301475525, "learning_rate": 1.3438880147918429e-05, "loss": 0.03602886199951172, "step": 4184 }, { "epoch": 0.5657794676806084, "grad_norm": 0.2085980474948883, "learning_rate": 1.3432073050985201e-05, "loss": 0.03757810592651367, "step": 4185 }, { "epoch": 0.5659146599070554, "grad_norm": 0.6800008416175842, "learning_rate": 1.3425266280502721e-05, "loss": 0.058939218521118164, "step": 4186 }, { "epoch": 0.5660498521335023, "grad_norm": 0.3324357867240906, "learning_rate": 1.3418459837888202e-05, "loss": 0.07649612426757812, "step": 4187 }, { "epoch": 0.5661850443599493, "grad_norm": 0.4090869724750519, "learning_rate": 1.3411653724558784e-05, "loss": 0.07547235488891602, "step": 4188 }, { "epoch": 0.5663202365863963, "grad_norm": 0.18743127584457397, "learning_rate": 1.3404847941931523e-05, "loss": 0.05271005630493164, "step": 4189 }, { "epoch": 0.5664554288128433, "grad_norm": 0.29850631952285767, "learning_rate": 1.339804249142343e-05, "loss": 0.08056211471557617, "step": 4190 }, { "epoch": 0.5665906210392903, "grad_norm": 0.348247230052948, "learning_rate": 1.3391237374451429e-05, "loss": 0.042586326599121094, "step": 4191 }, { "epoch": 0.5667258132657372, "grad_norm": 0.4494287073612213, "learning_rate": 1.3384432592432388e-05, "loss": 0.07525444030761719, "step": 4192 }, { "epoch": 0.5668610054921842, "grad_norm": 0.7001757621765137, "learning_rate": 1.3377628146783102e-05, "loss": 0.11281776428222656, "step": 4193 }, { "epoch": 0.5669961977186312, "grad_norm": 0.3711039423942566, "learning_rate": 1.3370824038920281e-05, "loss": 0.04892301559448242, "step": 4194 }, { "epoch": 0.5671313899450782, "grad_norm": 0.25460025668144226, "learning_rate": 1.3364020270260586e-05, "loss": 0.07265377044677734, "step": 4195 }, { "epoch": 0.5672665821715251, "grad_norm": 0.3742760121822357, "learning_rate": 1.335721684222059e-05, "loss": 0.0734403133392334, "step": 4196 }, { "epoch": 0.5674017743979721, "grad_norm": 0.26575955748558044, "learning_rate": 1.3350413756216816e-05, "loss": 0.049715638160705566, "step": 4197 }, { "epoch": 0.5675369666244191, "grad_norm": 0.2327270656824112, "learning_rate": 1.334361101366569e-05, "loss": 0.06168937683105469, "step": 4198 }, { "epoch": 0.5676721588508661, "grad_norm": 0.341900110244751, "learning_rate": 1.3336808615983582e-05, "loss": 0.04771614074707031, "step": 4199 }, { "epoch": 0.5678073510773131, "grad_norm": 0.31476742029190063, "learning_rate": 1.3330006564586791e-05, "loss": 0.06220579147338867, "step": 4200 }, { "epoch": 0.56794254330376, "grad_norm": 0.48586228489875793, "learning_rate": 1.3323204860891539e-05, "loss": 0.09546470642089844, "step": 4201 }, { "epoch": 0.568077735530207, "grad_norm": 0.24473726749420166, "learning_rate": 1.3316403506313981e-05, "loss": 0.04458355903625488, "step": 4202 }, { "epoch": 0.568212927756654, "grad_norm": 0.39096829295158386, "learning_rate": 1.3309602502270184e-05, "loss": 0.04271745681762695, "step": 4203 }, { "epoch": 0.568348119983101, "grad_norm": 0.1461983174085617, "learning_rate": 1.3302801850176161e-05, "loss": 0.03778576850891113, "step": 4204 }, { "epoch": 0.568483312209548, "grad_norm": 0.3260999321937561, "learning_rate": 1.3296001551447848e-05, "loss": 0.050777435302734375, "step": 4205 }, { "epoch": 0.5686185044359949, "grad_norm": 0.24449004232883453, "learning_rate": 1.32892016075011e-05, "loss": 0.07197999954223633, "step": 4206 }, { "epoch": 0.5687536966624419, "grad_norm": 0.21209828555583954, "learning_rate": 1.3282402019751694e-05, "loss": 0.05071306228637695, "step": 4207 }, { "epoch": 0.5688888888888889, "grad_norm": 0.30274760723114014, "learning_rate": 1.327560278961535e-05, "loss": 0.04763948917388916, "step": 4208 }, { "epoch": 0.5690240811153359, "grad_norm": 0.36460569500923157, "learning_rate": 1.3268803918507699e-05, "loss": 0.07638049125671387, "step": 4209 }, { "epoch": 0.5691592733417828, "grad_norm": 0.40261271595954895, "learning_rate": 1.3262005407844306e-05, "loss": 0.06149876117706299, "step": 4210 }, { "epoch": 0.5692944655682298, "grad_norm": 0.24275512993335724, "learning_rate": 1.325520725904066e-05, "loss": 0.05030655860900879, "step": 4211 }, { "epoch": 0.5694296577946768, "grad_norm": 0.2505589425563812, "learning_rate": 1.3248409473512158e-05, "loss": 0.05789375305175781, "step": 4212 }, { "epoch": 0.5695648500211238, "grad_norm": 0.49377721548080444, "learning_rate": 1.3241612052674146e-05, "loss": 0.10123944282531738, "step": 4213 }, { "epoch": 0.5697000422475708, "grad_norm": 0.34104618430137634, "learning_rate": 1.3234814997941883e-05, "loss": 0.08298385143280029, "step": 4214 }, { "epoch": 0.5698352344740177, "grad_norm": 0.3067338764667511, "learning_rate": 1.322801831073055e-05, "loss": 0.06383585929870605, "step": 4215 }, { "epoch": 0.5699704267004647, "grad_norm": 0.32947397232055664, "learning_rate": 1.322122199245526e-05, "loss": 0.06831598281860352, "step": 4216 }, { "epoch": 0.5701056189269117, "grad_norm": 0.2474450170993805, "learning_rate": 1.321442604453103e-05, "loss": 0.06218385696411133, "step": 4217 }, { "epoch": 0.5702408111533587, "grad_norm": 0.3361920118331909, "learning_rate": 1.320763046837282e-05, "loss": 0.06545305252075195, "step": 4218 }, { "epoch": 0.5703760033798057, "grad_norm": 0.44064798951148987, "learning_rate": 1.3200835265395504e-05, "loss": 0.0945591926574707, "step": 4219 }, { "epoch": 0.5705111956062526, "grad_norm": 0.4850946068763733, "learning_rate": 1.3194040437013885e-05, "loss": 0.08764171600341797, "step": 4220 }, { "epoch": 0.5706463878326996, "grad_norm": 0.3688046634197235, "learning_rate": 1.3187245984642673e-05, "loss": 0.05492281913757324, "step": 4221 }, { "epoch": 0.5707815800591466, "grad_norm": 0.8212231397628784, "learning_rate": 1.3180451909696517e-05, "loss": 0.04170036315917969, "step": 4222 }, { "epoch": 0.5709167722855936, "grad_norm": 0.2588346600532532, "learning_rate": 1.3173658213589972e-05, "loss": 0.057157278060913086, "step": 4223 }, { "epoch": 0.5710519645120405, "grad_norm": 0.7691304683685303, "learning_rate": 1.3166864897737526e-05, "loss": 0.0939931869506836, "step": 4224 }, { "epoch": 0.5711871567384875, "grad_norm": 0.22323811054229736, "learning_rate": 1.3160071963553593e-05, "loss": 0.039671897888183594, "step": 4225 }, { "epoch": 0.5713223489649345, "grad_norm": 0.4611817002296448, "learning_rate": 1.315327941245248e-05, "loss": 0.09499216079711914, "step": 4226 }, { "epoch": 0.5714575411913815, "grad_norm": 0.3247144818305969, "learning_rate": 1.3146487245848445e-05, "loss": 0.07895421981811523, "step": 4227 }, { "epoch": 0.5715927334178285, "grad_norm": 0.2039884477853775, "learning_rate": 1.3139695465155645e-05, "loss": 0.0419316291809082, "step": 4228 }, { "epoch": 0.5717279256442754, "grad_norm": 0.3020569980144501, "learning_rate": 1.3132904071788177e-05, "loss": 0.07380104064941406, "step": 4229 }, { "epoch": 0.5718631178707224, "grad_norm": 0.421318382024765, "learning_rate": 1.3126113067160031e-05, "loss": 0.07337641716003418, "step": 4230 }, { "epoch": 0.5719983100971694, "grad_norm": 0.26076003909111023, "learning_rate": 1.3119322452685139e-05, "loss": 0.07502365112304688, "step": 4231 }, { "epoch": 0.5721335023236164, "grad_norm": 0.456095814704895, "learning_rate": 1.3112532229777344e-05, "loss": 0.09958291053771973, "step": 4232 }, { "epoch": 0.5722686945500634, "grad_norm": 0.3706338405609131, "learning_rate": 1.3105742399850399e-05, "loss": 0.0721426010131836, "step": 4233 }, { "epoch": 0.5724038867765103, "grad_norm": 0.1377008855342865, "learning_rate": 1.3098952964317996e-05, "loss": 0.04186058044433594, "step": 4234 }, { "epoch": 0.5725390790029573, "grad_norm": 0.22118398547172546, "learning_rate": 1.3092163924593717e-05, "loss": 0.04795026779174805, "step": 4235 }, { "epoch": 0.5726742712294043, "grad_norm": 0.2733802795410156, "learning_rate": 1.308537528209108e-05, "loss": 0.07524681091308594, "step": 4236 }, { "epoch": 0.5728094634558513, "grad_norm": 0.3269210755825043, "learning_rate": 1.3078587038223525e-05, "loss": 0.07893228530883789, "step": 4237 }, { "epoch": 0.5729446556822982, "grad_norm": 0.37420061230659485, "learning_rate": 1.3071799194404392e-05, "loss": 0.1178579330444336, "step": 4238 }, { "epoch": 0.5730798479087452, "grad_norm": 0.4048102796077728, "learning_rate": 1.3065011752046955e-05, "loss": 0.10272502899169922, "step": 4239 }, { "epoch": 0.5732150401351922, "grad_norm": 0.20261088013648987, "learning_rate": 1.3058224712564382e-05, "loss": 0.05172419548034668, "step": 4240 }, { "epoch": 0.5733502323616392, "grad_norm": 0.2715659439563751, "learning_rate": 1.305143807736978e-05, "loss": 0.07482242584228516, "step": 4241 }, { "epoch": 0.5734854245880862, "grad_norm": 0.3544654846191406, "learning_rate": 1.3044651847876163e-05, "loss": 0.08915042877197266, "step": 4242 }, { "epoch": 0.5736206168145331, "grad_norm": 0.6068330407142639, "learning_rate": 1.3037866025496466e-05, "loss": 0.09934806823730469, "step": 4243 }, { "epoch": 0.5737558090409801, "grad_norm": 0.3568841516971588, "learning_rate": 1.3031080611643514e-05, "loss": 0.06970405578613281, "step": 4244 }, { "epoch": 0.5738910012674271, "grad_norm": 0.27211013436317444, "learning_rate": 1.3024295607730083e-05, "loss": 0.07442402839660645, "step": 4245 }, { "epoch": 0.5740261934938741, "grad_norm": 0.3491658568382263, "learning_rate": 1.301751101516884e-05, "loss": 0.0696868896484375, "step": 4246 }, { "epoch": 0.574161385720321, "grad_norm": 0.4164893925189972, "learning_rate": 1.3010726835372377e-05, "loss": 0.06960058212280273, "step": 4247 }, { "epoch": 0.574296577946768, "grad_norm": 0.667211651802063, "learning_rate": 1.30039430697532e-05, "loss": 0.12839508056640625, "step": 4248 }, { "epoch": 0.574431770173215, "grad_norm": 0.35223665833473206, "learning_rate": 1.2997159719723713e-05, "loss": 0.05519413948059082, "step": 4249 }, { "epoch": 0.574566962399662, "grad_norm": 0.196187362074852, "learning_rate": 1.2990376786696254e-05, "loss": 0.049340248107910156, "step": 4250 }, { "epoch": 0.574702154626109, "grad_norm": 0.2209187150001526, "learning_rate": 1.2983594272083063e-05, "loss": 0.06000876426696777, "step": 4251 }, { "epoch": 0.5748373468525559, "grad_norm": 0.6193345785140991, "learning_rate": 1.2976812177296307e-05, "loss": 0.08017492294311523, "step": 4252 }, { "epoch": 0.5749725390790029, "grad_norm": 0.21420758962631226, "learning_rate": 1.2970030503748039e-05, "loss": 0.05553770065307617, "step": 4253 }, { "epoch": 0.5751077313054499, "grad_norm": 0.24513158202171326, "learning_rate": 1.2963249252850242e-05, "loss": 0.04099082946777344, "step": 4254 }, { "epoch": 0.5752429235318969, "grad_norm": 0.29867297410964966, "learning_rate": 1.295646842601481e-05, "loss": 0.05138349533081055, "step": 4255 }, { "epoch": 0.5753781157583439, "grad_norm": 0.5102136731147766, "learning_rate": 1.294968802465355e-05, "loss": 0.10262441635131836, "step": 4256 }, { "epoch": 0.5755133079847908, "grad_norm": 0.24540355801582336, "learning_rate": 1.2942908050178187e-05, "loss": 0.05449390411376953, "step": 4257 }, { "epoch": 0.5756485002112378, "grad_norm": 0.24239884316921234, "learning_rate": 1.293612850400033e-05, "loss": 0.05982780456542969, "step": 4258 }, { "epoch": 0.5757836924376848, "grad_norm": 0.3142111599445343, "learning_rate": 1.2929349387531525e-05, "loss": 0.08676910400390625, "step": 4259 }, { "epoch": 0.5759188846641318, "grad_norm": 0.28510600328445435, "learning_rate": 1.2922570702183217e-05, "loss": 0.054271697998046875, "step": 4260 }, { "epoch": 0.5760540768905787, "grad_norm": 0.2281937450170517, "learning_rate": 1.2915792449366768e-05, "loss": 0.05469083786010742, "step": 4261 }, { "epoch": 0.5761892691170257, "grad_norm": 0.3259390592575073, "learning_rate": 1.2909014630493451e-05, "loss": 0.06585693359375, "step": 4262 }, { "epoch": 0.5763244613434727, "grad_norm": 0.2396048605442047, "learning_rate": 1.2902237246974432e-05, "loss": 0.06091499328613281, "step": 4263 }, { "epoch": 0.5764596535699197, "grad_norm": 0.34529733657836914, "learning_rate": 1.289546030022081e-05, "loss": 0.06329727172851562, "step": 4264 }, { "epoch": 0.5765948457963667, "grad_norm": 0.2951788306236267, "learning_rate": 1.2888683791643572e-05, "loss": 0.05420076847076416, "step": 4265 }, { "epoch": 0.5767300380228136, "grad_norm": 0.27161645889282227, "learning_rate": 1.2881907722653633e-05, "loss": 0.044722557067871094, "step": 4266 }, { "epoch": 0.5768652302492606, "grad_norm": 0.19044330716133118, "learning_rate": 1.2875132094661796e-05, "loss": 0.04693293571472168, "step": 4267 }, { "epoch": 0.5770004224757076, "grad_norm": 0.42991766333580017, "learning_rate": 1.2868356909078787e-05, "loss": 0.08460688591003418, "step": 4268 }, { "epoch": 0.5771356147021546, "grad_norm": 0.23017805814743042, "learning_rate": 1.286158216731524e-05, "loss": 0.0533832311630249, "step": 4269 }, { "epoch": 0.5772708069286016, "grad_norm": 0.21773308515548706, "learning_rate": 1.2854807870781686e-05, "loss": 0.06833410263061523, "step": 4270 }, { "epoch": 0.5774059991550485, "grad_norm": 0.2180422693490982, "learning_rate": 1.284803402088858e-05, "loss": 0.058348894119262695, "step": 4271 }, { "epoch": 0.5775411913814955, "grad_norm": 0.39264976978302, "learning_rate": 1.284126061904626e-05, "loss": 0.06962013244628906, "step": 4272 }, { "epoch": 0.5776763836079425, "grad_norm": 0.40345919132232666, "learning_rate": 1.283448766666499e-05, "loss": 0.10006141662597656, "step": 4273 }, { "epoch": 0.5778115758343895, "grad_norm": 0.25123125314712524, "learning_rate": 1.282771516515494e-05, "loss": 0.0528331995010376, "step": 4274 }, { "epoch": 0.5779467680608364, "grad_norm": 0.3381307125091553, "learning_rate": 1.282094311592618e-05, "loss": 0.05719423294067383, "step": 4275 }, { "epoch": 0.5780819602872835, "grad_norm": 0.26049450039863586, "learning_rate": 1.2814171520388676e-05, "loss": 0.05408525466918945, "step": 4276 }, { "epoch": 0.5782171525137305, "grad_norm": 0.39702731370925903, "learning_rate": 1.2807400379952318e-05, "loss": 0.07463216781616211, "step": 4277 }, { "epoch": 0.5783523447401775, "grad_norm": 0.5777104496955872, "learning_rate": 1.2800629696026895e-05, "loss": 0.09904742240905762, "step": 4278 }, { "epoch": 0.5784875369666245, "grad_norm": 0.4103236198425293, "learning_rate": 1.2793859470022098e-05, "loss": 0.07288527488708496, "step": 4279 }, { "epoch": 0.5786227291930715, "grad_norm": 0.32955893874168396, "learning_rate": 1.278708970334753e-05, "loss": 0.07037973403930664, "step": 4280 }, { "epoch": 0.5787579214195184, "grad_norm": 0.3472101092338562, "learning_rate": 1.2780320397412678e-05, "loss": 0.07021617889404297, "step": 4281 }, { "epoch": 0.5788931136459654, "grad_norm": 0.6170212030410767, "learning_rate": 1.2773551553626957e-05, "loss": 0.06719493865966797, "step": 4282 }, { "epoch": 0.5790283058724124, "grad_norm": 0.4556148648262024, "learning_rate": 1.2766783173399675e-05, "loss": 0.07308840751647949, "step": 4283 }, { "epoch": 0.5791634980988594, "grad_norm": 0.5287730097770691, "learning_rate": 1.276001525814005e-05, "loss": 0.07259821891784668, "step": 4284 }, { "epoch": 0.5792986903253063, "grad_norm": 0.3877529799938202, "learning_rate": 1.2753247809257192e-05, "loss": 0.06958603858947754, "step": 4285 }, { "epoch": 0.5794338825517533, "grad_norm": 0.31652677059173584, "learning_rate": 1.2746480828160119e-05, "loss": 0.05983781814575195, "step": 4286 }, { "epoch": 0.5795690747782003, "grad_norm": 0.2596035897731781, "learning_rate": 1.2739714316257753e-05, "loss": 0.06442499160766602, "step": 4287 }, { "epoch": 0.5797042670046473, "grad_norm": 0.4206702411174774, "learning_rate": 1.273294827495892e-05, "loss": 0.08462643623352051, "step": 4288 }, { "epoch": 0.5798394592310943, "grad_norm": 0.3511323630809784, "learning_rate": 1.2726182705672352e-05, "loss": 0.06094837188720703, "step": 4289 }, { "epoch": 0.5799746514575412, "grad_norm": 0.2860526144504547, "learning_rate": 1.271941760980667e-05, "loss": 0.0730443000793457, "step": 4290 }, { "epoch": 0.5801098436839882, "grad_norm": 0.578201174736023, "learning_rate": 1.2712652988770396e-05, "loss": 0.07014274597167969, "step": 4291 }, { "epoch": 0.5802450359104352, "grad_norm": 0.2719123065471649, "learning_rate": 1.2705888843971967e-05, "loss": 0.05372893810272217, "step": 4292 }, { "epoch": 0.5803802281368822, "grad_norm": 0.37068089842796326, "learning_rate": 1.2699125176819717e-05, "loss": 0.07972216606140137, "step": 4293 }, { "epoch": 0.5805154203633291, "grad_norm": 0.14126096665859222, "learning_rate": 1.269236198872188e-05, "loss": 0.04064035415649414, "step": 4294 }, { "epoch": 0.5806506125897761, "grad_norm": 0.2503538727760315, "learning_rate": 1.2685599281086577e-05, "loss": 0.06243181228637695, "step": 4295 }, { "epoch": 0.5807858048162231, "grad_norm": 0.15925723314285278, "learning_rate": 1.2678837055321849e-05, "loss": 0.04054218530654907, "step": 4296 }, { "epoch": 0.5809209970426701, "grad_norm": 0.3280768394470215, "learning_rate": 1.267207531283562e-05, "loss": 0.07500123977661133, "step": 4297 }, { "epoch": 0.5810561892691171, "grad_norm": 0.17902933061122894, "learning_rate": 1.266531405503573e-05, "loss": 0.035021305084228516, "step": 4298 }, { "epoch": 0.581191381495564, "grad_norm": 0.30209484696388245, "learning_rate": 1.26585532833299e-05, "loss": 0.078033447265625, "step": 4299 }, { "epoch": 0.581326573722011, "grad_norm": 0.4677204489707947, "learning_rate": 1.2651792999125763e-05, "loss": 0.08407258987426758, "step": 4300 }, { "epoch": 0.581461765948458, "grad_norm": 0.18018262088298798, "learning_rate": 1.2645033203830846e-05, "loss": 0.04263925552368164, "step": 4301 }, { "epoch": 0.581596958174905, "grad_norm": 0.23889626562595367, "learning_rate": 1.2638273898852573e-05, "loss": 0.06528663635253906, "step": 4302 }, { "epoch": 0.581732150401352, "grad_norm": 0.22320957481861115, "learning_rate": 1.2631515085598275e-05, "loss": 0.054903507232666016, "step": 4303 }, { "epoch": 0.5818673426277989, "grad_norm": 0.30559372901916504, "learning_rate": 1.262475676547516e-05, "loss": 0.08128738403320312, "step": 4304 }, { "epoch": 0.5820025348542459, "grad_norm": 0.32230517268180847, "learning_rate": 1.2617998939890352e-05, "loss": 0.05758810043334961, "step": 4305 }, { "epoch": 0.5821377270806929, "grad_norm": 0.3014519214630127, "learning_rate": 1.261124161025087e-05, "loss": 0.07735538482666016, "step": 4306 }, { "epoch": 0.5822729193071399, "grad_norm": 0.2579478919506073, "learning_rate": 1.260448477796362e-05, "loss": 0.06965446472167969, "step": 4307 }, { "epoch": 0.5824081115335868, "grad_norm": 0.16503727436065674, "learning_rate": 1.259772844443542e-05, "loss": 0.03880476951599121, "step": 4308 }, { "epoch": 0.5825433037600338, "grad_norm": 0.47773122787475586, "learning_rate": 1.2590972611072964e-05, "loss": 0.0793771743774414, "step": 4309 }, { "epoch": 0.5826784959864808, "grad_norm": 0.5266780257225037, "learning_rate": 1.2584217279282855e-05, "loss": 0.08047771453857422, "step": 4310 }, { "epoch": 0.5828136882129278, "grad_norm": 0.19990146160125732, "learning_rate": 1.2577462450471593e-05, "loss": 0.04231917858123779, "step": 4311 }, { "epoch": 0.5829488804393748, "grad_norm": 0.40081676840782166, "learning_rate": 1.2570708126045574e-05, "loss": 0.09717178344726562, "step": 4312 }, { "epoch": 0.5830840726658217, "grad_norm": 0.23123854398727417, "learning_rate": 1.256395430741107e-05, "loss": 0.0459747314453125, "step": 4313 }, { "epoch": 0.5832192648922687, "grad_norm": 0.2861766815185547, "learning_rate": 1.2557200995974268e-05, "loss": 0.06968975067138672, "step": 4314 }, { "epoch": 0.5833544571187157, "grad_norm": 0.3242214620113373, "learning_rate": 1.2550448193141248e-05, "loss": 0.05453753471374512, "step": 4315 }, { "epoch": 0.5834896493451627, "grad_norm": 0.33520573377609253, "learning_rate": 1.2543695900317977e-05, "loss": 0.07026195526123047, "step": 4316 }, { "epoch": 0.5836248415716097, "grad_norm": 0.353973388671875, "learning_rate": 1.2536944118910323e-05, "loss": 0.05172041058540344, "step": 4317 }, { "epoch": 0.5837600337980566, "grad_norm": 0.20054422318935394, "learning_rate": 1.2530192850324032e-05, "loss": 0.048375606536865234, "step": 4318 }, { "epoch": 0.5838952260245036, "grad_norm": 0.18036530911922455, "learning_rate": 1.252344209596476e-05, "loss": 0.041887491941452026, "step": 4319 }, { "epoch": 0.5840304182509506, "grad_norm": 0.41878896951675415, "learning_rate": 1.251669185723805e-05, "loss": 0.08332812786102295, "step": 4320 }, { "epoch": 0.5841656104773976, "grad_norm": 0.3873213827610016, "learning_rate": 1.2509942135549344e-05, "loss": 0.06663715839385986, "step": 4321 }, { "epoch": 0.5843008027038445, "grad_norm": 0.6192523837089539, "learning_rate": 1.250319293230396e-05, "loss": 0.07886505126953125, "step": 4322 }, { "epoch": 0.5844359949302915, "grad_norm": 0.4124995172023773, "learning_rate": 1.2496444248907121e-05, "loss": 0.07800483703613281, "step": 4323 }, { "epoch": 0.5845711871567385, "grad_norm": 0.14207519590854645, "learning_rate": 1.2489696086763939e-05, "loss": 0.03564596176147461, "step": 4324 }, { "epoch": 0.5847063793831855, "grad_norm": 0.31678009033203125, "learning_rate": 1.2482948447279417e-05, "loss": 0.059558868408203125, "step": 4325 }, { "epoch": 0.5848415716096325, "grad_norm": 0.3399200439453125, "learning_rate": 1.2476201331858458e-05, "loss": 0.07506275177001953, "step": 4326 }, { "epoch": 0.5849767638360794, "grad_norm": 0.3999166488647461, "learning_rate": 1.2469454741905839e-05, "loss": 0.06264925003051758, "step": 4327 }, { "epoch": 0.5851119560625264, "grad_norm": 0.4690093398094177, "learning_rate": 1.2462708678826233e-05, "loss": 0.08655166625976562, "step": 4328 }, { "epoch": 0.5852471482889734, "grad_norm": 0.31304731965065, "learning_rate": 1.245596314402421e-05, "loss": 0.08196377754211426, "step": 4329 }, { "epoch": 0.5853823405154204, "grad_norm": 0.23181632161140442, "learning_rate": 1.2449218138904225e-05, "loss": 0.05222761631011963, "step": 4330 }, { "epoch": 0.5855175327418674, "grad_norm": 0.22853532433509827, "learning_rate": 1.2442473664870636e-05, "loss": 0.06299638748168945, "step": 4331 }, { "epoch": 0.5856527249683143, "grad_norm": 0.3060552775859833, "learning_rate": 1.2435729723327661e-05, "loss": 0.06338882446289062, "step": 4332 }, { "epoch": 0.5857879171947613, "grad_norm": 0.38231733441352844, "learning_rate": 1.2428986315679433e-05, "loss": 0.07598447799682617, "step": 4333 }, { "epoch": 0.5859231094212083, "grad_norm": 0.4449533522129059, "learning_rate": 1.2422243443329962e-05, "loss": 0.07172644138336182, "step": 4334 }, { "epoch": 0.5860583016476553, "grad_norm": 0.3609060049057007, "learning_rate": 1.241550110768316e-05, "loss": 0.07144355773925781, "step": 4335 }, { "epoch": 0.5861934938741022, "grad_norm": 0.4742407202720642, "learning_rate": 1.2408759310142803e-05, "loss": 0.08863067626953125, "step": 4336 }, { "epoch": 0.5863286861005492, "grad_norm": 0.40765735507011414, "learning_rate": 1.2402018052112576e-05, "loss": 0.0685725212097168, "step": 4337 }, { "epoch": 0.5864638783269962, "grad_norm": 0.2166009247303009, "learning_rate": 1.2395277334996045e-05, "loss": 0.05163097381591797, "step": 4338 }, { "epoch": 0.5865990705534432, "grad_norm": 0.2810015380382538, "learning_rate": 1.2388537160196663e-05, "loss": 0.07212471961975098, "step": 4339 }, { "epoch": 0.5867342627798902, "grad_norm": 0.8222311735153198, "learning_rate": 1.2381797529117776e-05, "loss": 0.09038233757019043, "step": 4340 }, { "epoch": 0.5868694550063371, "grad_norm": 0.4227246344089508, "learning_rate": 1.23750584431626e-05, "loss": 0.08516407012939453, "step": 4341 }, { "epoch": 0.5870046472327841, "grad_norm": 0.278315007686615, "learning_rate": 1.236831990373425e-05, "loss": 0.07106304168701172, "step": 4342 }, { "epoch": 0.5871398394592311, "grad_norm": 0.46818864345550537, "learning_rate": 1.2361581912235736e-05, "loss": 0.09428846836090088, "step": 4343 }, { "epoch": 0.5872750316856781, "grad_norm": 0.42657315731048584, "learning_rate": 1.235484447006994e-05, "loss": 0.06872272491455078, "step": 4344 }, { "epoch": 0.587410223912125, "grad_norm": 0.341647744178772, "learning_rate": 1.2348107578639627e-05, "loss": 0.05318880081176758, "step": 4345 }, { "epoch": 0.587545416138572, "grad_norm": 0.28094810247421265, "learning_rate": 1.2341371239347454e-05, "loss": 0.07716250419616699, "step": 4346 }, { "epoch": 0.587680608365019, "grad_norm": 0.29405146837234497, "learning_rate": 1.233463545359597e-05, "loss": 0.07787227630615234, "step": 4347 }, { "epoch": 0.587815800591466, "grad_norm": 0.26101914048194885, "learning_rate": 1.23279002227876e-05, "loss": 0.06612205505371094, "step": 4348 }, { "epoch": 0.587950992817913, "grad_norm": 0.40927189588546753, "learning_rate": 1.2321165548324655e-05, "loss": 0.08002543449401855, "step": 4349 }, { "epoch": 0.5880861850443599, "grad_norm": 0.3589462339878082, "learning_rate": 1.2314431431609323e-05, "loss": 0.05216789245605469, "step": 4350 }, { "epoch": 0.5882213772708069, "grad_norm": 0.268644779920578, "learning_rate": 1.2307697874043687e-05, "loss": 0.0704643726348877, "step": 4351 }, { "epoch": 0.5883565694972539, "grad_norm": 0.2965880334377289, "learning_rate": 1.2300964877029712e-05, "loss": 0.05938005447387695, "step": 4352 }, { "epoch": 0.5884917617237009, "grad_norm": 0.3145478069782257, "learning_rate": 1.2294232441969246e-05, "loss": 0.06465482711791992, "step": 4353 }, { "epoch": 0.5886269539501479, "grad_norm": 0.28410008549690247, "learning_rate": 1.2287500570264017e-05, "loss": 0.05022144317626953, "step": 4354 }, { "epoch": 0.5887621461765948, "grad_norm": 0.23661217093467712, "learning_rate": 1.2280769263315628e-05, "loss": 0.042916297912597656, "step": 4355 }, { "epoch": 0.5888973384030418, "grad_norm": 0.39567339420318604, "learning_rate": 1.2274038522525577e-05, "loss": 0.08017563819885254, "step": 4356 }, { "epoch": 0.5890325306294888, "grad_norm": 0.31014779210090637, "learning_rate": 1.2267308349295246e-05, "loss": 0.07112038135528564, "step": 4357 }, { "epoch": 0.5891677228559358, "grad_norm": 0.1551205962896347, "learning_rate": 1.2260578745025892e-05, "loss": 0.02809929847717285, "step": 4358 }, { "epoch": 0.5893029150823828, "grad_norm": 0.40543797612190247, "learning_rate": 1.225384971111865e-05, "loss": 0.09670495986938477, "step": 4359 }, { "epoch": 0.5894381073088297, "grad_norm": 0.3169974088668823, "learning_rate": 1.224712124897454e-05, "loss": 0.06278276443481445, "step": 4360 }, { "epoch": 0.5895732995352767, "grad_norm": 0.3309388756752014, "learning_rate": 1.2240393359994466e-05, "loss": 0.07917428016662598, "step": 4361 }, { "epoch": 0.5897084917617237, "grad_norm": 0.48446932435035706, "learning_rate": 1.2233666045579209e-05, "loss": 0.0726470947265625, "step": 4362 }, { "epoch": 0.5898436839881707, "grad_norm": 0.32095468044281006, "learning_rate": 1.222693930712944e-05, "loss": 0.08367252349853516, "step": 4363 }, { "epoch": 0.5899788762146176, "grad_norm": 1.0433701276779175, "learning_rate": 1.2220213146045691e-05, "loss": 0.0549013614654541, "step": 4364 }, { "epoch": 0.5901140684410646, "grad_norm": 0.48906344175338745, "learning_rate": 1.2213487563728389e-05, "loss": 0.04992187023162842, "step": 4365 }, { "epoch": 0.5902492606675116, "grad_norm": 0.6377118825912476, "learning_rate": 1.220676256157783e-05, "loss": 0.07468461990356445, "step": 4366 }, { "epoch": 0.5903844528939586, "grad_norm": 0.2501105070114136, "learning_rate": 1.2200038140994212e-05, "loss": 0.04983377456665039, "step": 4367 }, { "epoch": 0.5905196451204056, "grad_norm": 0.15972237288951874, "learning_rate": 1.2193314303377578e-05, "loss": 0.03852057456970215, "step": 4368 }, { "epoch": 0.5906548373468525, "grad_norm": 0.2540685832500458, "learning_rate": 1.2186591050127874e-05, "loss": 0.052690982818603516, "step": 4369 }, { "epoch": 0.5907900295732995, "grad_norm": 0.30108579993247986, "learning_rate": 1.2179868382644916e-05, "loss": 0.07224893569946289, "step": 4370 }, { "epoch": 0.5909252217997465, "grad_norm": 0.38752955198287964, "learning_rate": 1.2173146302328396e-05, "loss": 0.06566667556762695, "step": 4371 }, { "epoch": 0.5910604140261935, "grad_norm": 0.37940678000450134, "learning_rate": 1.21664248105779e-05, "loss": 0.06433296203613281, "step": 4372 }, { "epoch": 0.5911956062526404, "grad_norm": 0.9298123717308044, "learning_rate": 1.2159703908792858e-05, "loss": 0.11943340301513672, "step": 4373 }, { "epoch": 0.5913307984790874, "grad_norm": 0.4051517844200134, "learning_rate": 1.2152983598372613e-05, "loss": 0.08199405670166016, "step": 4374 }, { "epoch": 0.5914659907055344, "grad_norm": 0.4235312342643738, "learning_rate": 1.2146263880716366e-05, "loss": 0.07595133781433105, "step": 4375 }, { "epoch": 0.5916011829319814, "grad_norm": 0.43915656208992004, "learning_rate": 1.2139544757223194e-05, "loss": 0.11987781524658203, "step": 4376 }, { "epoch": 0.5917363751584284, "grad_norm": 0.40207841992378235, "learning_rate": 1.2132826229292066e-05, "loss": 0.08121633529663086, "step": 4377 }, { "epoch": 0.5918715673848753, "grad_norm": 0.47785353660583496, "learning_rate": 1.2126108298321798e-05, "loss": 0.0492706298828125, "step": 4378 }, { "epoch": 0.5920067596113223, "grad_norm": 0.3230375647544861, "learning_rate": 1.2119390965711107e-05, "loss": 0.07749605178833008, "step": 4379 }, { "epoch": 0.5921419518377693, "grad_norm": 0.29489728808403015, "learning_rate": 1.2112674232858582e-05, "loss": 0.05100727081298828, "step": 4380 }, { "epoch": 0.5922771440642163, "grad_norm": 0.6030992865562439, "learning_rate": 1.2105958101162684e-05, "loss": 0.09671342372894287, "step": 4381 }, { "epoch": 0.5924123362906633, "grad_norm": 0.2506835162639618, "learning_rate": 1.2099242572021735e-05, "loss": 0.055449843406677246, "step": 4382 }, { "epoch": 0.5925475285171102, "grad_norm": 0.17606131732463837, "learning_rate": 1.209252764683395e-05, "loss": 0.04475855827331543, "step": 4383 }, { "epoch": 0.5926827207435572, "grad_norm": 0.3181266188621521, "learning_rate": 1.2085813326997414e-05, "loss": 0.06715583801269531, "step": 4384 }, { "epoch": 0.5928179129700042, "grad_norm": 0.31322917342185974, "learning_rate": 1.2079099613910088e-05, "loss": 0.07672882080078125, "step": 4385 }, { "epoch": 0.5929531051964512, "grad_norm": 0.41575130820274353, "learning_rate": 1.20723865089698e-05, "loss": 0.062352657318115234, "step": 4386 }, { "epoch": 0.5930882974228981, "grad_norm": 0.22439508140087128, "learning_rate": 1.2065674013574248e-05, "loss": 0.05141806602478027, "step": 4387 }, { "epoch": 0.5932234896493451, "grad_norm": 0.3074764609336853, "learning_rate": 1.2058962129121013e-05, "loss": 0.06819868087768555, "step": 4388 }, { "epoch": 0.5933586818757921, "grad_norm": 0.4605916142463684, "learning_rate": 1.2052250857007548e-05, "loss": 0.0662388801574707, "step": 4389 }, { "epoch": 0.5934938741022391, "grad_norm": 0.22463290393352509, "learning_rate": 1.2045540198631177e-05, "loss": 0.048723697662353516, "step": 4390 }, { "epoch": 0.5936290663286861, "grad_norm": 0.2734999656677246, "learning_rate": 1.2038830155389091e-05, "loss": 0.06160116195678711, "step": 4391 }, { "epoch": 0.593764258555133, "grad_norm": 0.5060994029045105, "learning_rate": 1.2032120728678354e-05, "loss": 0.06933116912841797, "step": 4392 }, { "epoch": 0.59389945078158, "grad_norm": 0.3359962999820709, "learning_rate": 1.2025411919895907e-05, "loss": 0.06267333030700684, "step": 4393 }, { "epoch": 0.594034643008027, "grad_norm": 0.3087286949157715, "learning_rate": 1.2018703730438561e-05, "loss": 0.07016444206237793, "step": 4394 }, { "epoch": 0.594169835234474, "grad_norm": 0.37510600686073303, "learning_rate": 1.2011996161703003e-05, "loss": 0.07643699645996094, "step": 4395 }, { "epoch": 0.594305027460921, "grad_norm": 0.5119466185569763, "learning_rate": 1.2005289215085775e-05, "loss": 0.10711193084716797, "step": 4396 }, { "epoch": 0.5944402196873679, "grad_norm": 0.33476558327674866, "learning_rate": 1.19985828919833e-05, "loss": 0.05661749839782715, "step": 4397 }, { "epoch": 0.5945754119138149, "grad_norm": 0.19884833693504333, "learning_rate": 1.1991877193791872e-05, "loss": 0.048090994358062744, "step": 4398 }, { "epoch": 0.5947106041402619, "grad_norm": 0.26273098587989807, "learning_rate": 1.1985172121907653e-05, "loss": 0.06650567054748535, "step": 4399 }, { "epoch": 0.5948457963667089, "grad_norm": 0.6869086623191833, "learning_rate": 1.1978467677726682e-05, "loss": 0.0905604362487793, "step": 4400 }, { "epoch": 0.5949809885931558, "grad_norm": 0.5525241494178772, "learning_rate": 1.197176386264485e-05, "loss": 0.08647680282592773, "step": 4401 }, { "epoch": 0.5951161808196028, "grad_norm": 0.17063087224960327, "learning_rate": 1.1965060678057927e-05, "loss": 0.04663515090942383, "step": 4402 }, { "epoch": 0.5952513730460498, "grad_norm": 0.5738041996955872, "learning_rate": 1.1958358125361554e-05, "loss": 0.10048294067382812, "step": 4403 }, { "epoch": 0.5953865652724968, "grad_norm": 0.18927814066410065, "learning_rate": 1.1951656205951247e-05, "loss": 0.04247361421585083, "step": 4404 }, { "epoch": 0.5955217574989438, "grad_norm": 0.4425094425678253, "learning_rate": 1.1944954921222367e-05, "loss": 0.06999385356903076, "step": 4405 }, { "epoch": 0.5956569497253907, "grad_norm": 0.2552849352359772, "learning_rate": 1.1938254272570167e-05, "loss": 0.03948771953582764, "step": 4406 }, { "epoch": 0.5957921419518377, "grad_norm": 0.337760329246521, "learning_rate": 1.1931554261389751e-05, "loss": 0.06551945209503174, "step": 4407 }, { "epoch": 0.5959273341782847, "grad_norm": 0.35250124335289, "learning_rate": 1.1924854889076103e-05, "loss": 0.07527542114257812, "step": 4408 }, { "epoch": 0.5960625264047317, "grad_norm": 0.29173189401626587, "learning_rate": 1.191815615702407e-05, "loss": 0.057657837867736816, "step": 4409 }, { "epoch": 0.5961977186311787, "grad_norm": 0.33585357666015625, "learning_rate": 1.1911458066628353e-05, "loss": 0.06293225288391113, "step": 4410 }, { "epoch": 0.5963329108576257, "grad_norm": 0.41833505034446716, "learning_rate": 1.1904760619283537e-05, "loss": 0.0571436882019043, "step": 4411 }, { "epoch": 0.5964681030840727, "grad_norm": 0.48114222288131714, "learning_rate": 1.1898063816384069e-05, "loss": 0.09582042694091797, "step": 4412 }, { "epoch": 0.5966032953105197, "grad_norm": 0.2538919150829315, "learning_rate": 1.189136765932426e-05, "loss": 0.0637664794921875, "step": 4413 }, { "epoch": 0.5967384875369667, "grad_norm": 0.1911185383796692, "learning_rate": 1.1884672149498276e-05, "loss": 0.05926513671875, "step": 4414 }, { "epoch": 0.5968736797634137, "grad_norm": 0.37829163670539856, "learning_rate": 1.187797728830016e-05, "loss": 0.04589354991912842, "step": 4415 }, { "epoch": 0.5970088719898606, "grad_norm": 0.560752272605896, "learning_rate": 1.1871283077123823e-05, "loss": 0.09465360641479492, "step": 4416 }, { "epoch": 0.5971440642163076, "grad_norm": 0.23581284284591675, "learning_rate": 1.1864589517363038e-05, "loss": 0.04889726638793945, "step": 4417 }, { "epoch": 0.5972792564427546, "grad_norm": 0.2395762801170349, "learning_rate": 1.185789661041144e-05, "loss": 0.0637204647064209, "step": 4418 }, { "epoch": 0.5974144486692016, "grad_norm": 0.24712955951690674, "learning_rate": 1.1851204357662513e-05, "loss": 0.06745243072509766, "step": 4419 }, { "epoch": 0.5975496408956485, "grad_norm": 0.2869206666946411, "learning_rate": 1.1844512760509634e-05, "loss": 0.050952911376953125, "step": 4420 }, { "epoch": 0.5976848331220955, "grad_norm": 0.36108118295669556, "learning_rate": 1.1837821820346022e-05, "loss": 0.07438981533050537, "step": 4421 }, { "epoch": 0.5978200253485425, "grad_norm": 0.4791282117366791, "learning_rate": 1.1831131538564775e-05, "loss": 0.09142017364501953, "step": 4422 }, { "epoch": 0.5979552175749895, "grad_norm": 0.1886630803346634, "learning_rate": 1.1824441916558843e-05, "loss": 0.04613995552062988, "step": 4423 }, { "epoch": 0.5980904098014365, "grad_norm": 0.27580684423446655, "learning_rate": 1.1817752955721031e-05, "loss": 0.0632939338684082, "step": 4424 }, { "epoch": 0.5982256020278834, "grad_norm": 0.35286441445350647, "learning_rate": 1.1811064657444023e-05, "loss": 0.07007050514221191, "step": 4425 }, { "epoch": 0.5983607942543304, "grad_norm": 0.5837258696556091, "learning_rate": 1.1804377023120361e-05, "loss": 0.09163427352905273, "step": 4426 }, { "epoch": 0.5984959864807774, "grad_norm": 0.3789232075214386, "learning_rate": 1.1797690054142451e-05, "loss": 0.05483245849609375, "step": 4427 }, { "epoch": 0.5986311787072244, "grad_norm": 0.17387795448303223, "learning_rate": 1.1791003751902542e-05, "loss": 0.04521942138671875, "step": 4428 }, { "epoch": 0.5987663709336714, "grad_norm": 0.36982420086860657, "learning_rate": 1.1784318117792763e-05, "loss": 0.05423092842102051, "step": 4429 }, { "epoch": 0.5989015631601183, "grad_norm": 0.4229053556919098, "learning_rate": 1.17776331532051e-05, "loss": 0.06500959396362305, "step": 4430 }, { "epoch": 0.5990367553865653, "grad_norm": 0.416337788105011, "learning_rate": 1.1770948859531397e-05, "loss": 0.07033443450927734, "step": 4431 }, { "epoch": 0.5991719476130123, "grad_norm": 0.40159526467323303, "learning_rate": 1.1764265238163369e-05, "loss": 0.07051944732666016, "step": 4432 }, { "epoch": 0.5993071398394593, "grad_norm": 0.23113057017326355, "learning_rate": 1.1757582290492568e-05, "loss": 0.0779874324798584, "step": 4433 }, { "epoch": 0.5994423320659062, "grad_norm": 0.41072410345077515, "learning_rate": 1.1750900017910425e-05, "loss": 0.0892038345336914, "step": 4434 }, { "epoch": 0.5995775242923532, "grad_norm": 0.2522313892841339, "learning_rate": 1.1744218421808221e-05, "loss": 0.05801105499267578, "step": 4435 }, { "epoch": 0.5997127165188002, "grad_norm": 0.15272016823291779, "learning_rate": 1.1737537503577112e-05, "loss": 0.04536318778991699, "step": 4436 }, { "epoch": 0.5998479087452472, "grad_norm": 0.3802728056907654, "learning_rate": 1.1730857264608086e-05, "loss": 0.06792402267456055, "step": 4437 }, { "epoch": 0.5999831009716942, "grad_norm": 0.33153486251831055, "learning_rate": 1.1724177706292013e-05, "loss": 0.067108154296875, "step": 4438 }, { "epoch": 0.6001182931981411, "grad_norm": 0.2935335338115692, "learning_rate": 1.1717498830019607e-05, "loss": 0.050164103507995605, "step": 4439 }, { "epoch": 0.6002534854245881, "grad_norm": 0.2432565540075302, "learning_rate": 1.1710820637181449e-05, "loss": 0.06165719032287598, "step": 4440 }, { "epoch": 0.6003886776510351, "grad_norm": 0.22496061027050018, "learning_rate": 1.170414312916798e-05, "loss": 0.07255363464355469, "step": 4441 }, { "epoch": 0.6005238698774821, "grad_norm": 0.295534610748291, "learning_rate": 1.1697466307369484e-05, "loss": 0.0618743896484375, "step": 4442 }, { "epoch": 0.600659062103929, "grad_norm": 0.20630301535129547, "learning_rate": 1.1690790173176116e-05, "loss": 0.05825996398925781, "step": 4443 }, { "epoch": 0.600794254330376, "grad_norm": 0.13719916343688965, "learning_rate": 1.1684114727977876e-05, "loss": 0.035945743322372437, "step": 4444 }, { "epoch": 0.600929446556823, "grad_norm": 0.34321242570877075, "learning_rate": 1.167743997316464e-05, "loss": 0.07386445999145508, "step": 4445 }, { "epoch": 0.60106463878327, "grad_norm": 0.1980847269296646, "learning_rate": 1.1670765910126112e-05, "loss": 0.05725669860839844, "step": 4446 }, { "epoch": 0.601199831009717, "grad_norm": 0.19179165363311768, "learning_rate": 1.1664092540251877e-05, "loss": 0.03864544630050659, "step": 4447 }, { "epoch": 0.601335023236164, "grad_norm": 0.36496564745903015, "learning_rate": 1.1657419864931361e-05, "loss": 0.06734848022460938, "step": 4448 }, { "epoch": 0.6014702154626109, "grad_norm": 0.42936888337135315, "learning_rate": 1.165074788555386e-05, "loss": 0.0985708236694336, "step": 4449 }, { "epoch": 0.6016054076890579, "grad_norm": 0.36141806840896606, "learning_rate": 1.1644076603508514e-05, "loss": 0.07548117637634277, "step": 4450 }, { "epoch": 0.6017405999155049, "grad_norm": 0.16163617372512817, "learning_rate": 1.1637406020184305e-05, "loss": 0.02950739860534668, "step": 4451 }, { "epoch": 0.6018757921419519, "grad_norm": 0.32977062463760376, "learning_rate": 1.1630736136970097e-05, "loss": 0.06973505020141602, "step": 4452 }, { "epoch": 0.6020109843683988, "grad_norm": 0.23191730678081512, "learning_rate": 1.162406695525459e-05, "loss": 0.03450584411621094, "step": 4453 }, { "epoch": 0.6021461765948458, "grad_norm": 0.26966822147369385, "learning_rate": 1.161739847642635e-05, "loss": 0.044283151626586914, "step": 4454 }, { "epoch": 0.6022813688212928, "grad_norm": 0.2920010983943939, "learning_rate": 1.1610730701873788e-05, "loss": 0.04284381866455078, "step": 4455 }, { "epoch": 0.6024165610477398, "grad_norm": 0.22126412391662598, "learning_rate": 1.1604063632985163e-05, "loss": 0.06336522102355957, "step": 4456 }, { "epoch": 0.6025517532741868, "grad_norm": 0.2864057421684265, "learning_rate": 1.1597397271148598e-05, "loss": 0.06883811950683594, "step": 4457 }, { "epoch": 0.6026869455006337, "grad_norm": 0.5990896224975586, "learning_rate": 1.1590731617752067e-05, "loss": 0.1023101806640625, "step": 4458 }, { "epoch": 0.6028221377270807, "grad_norm": 0.2607079744338989, "learning_rate": 1.1584066674183398e-05, "loss": 0.07424402236938477, "step": 4459 }, { "epoch": 0.6029573299535277, "grad_norm": 0.13554170727729797, "learning_rate": 1.1577402441830262e-05, "loss": 0.03943014144897461, "step": 4460 }, { "epoch": 0.6030925221799747, "grad_norm": 0.4893263280391693, "learning_rate": 1.1570738922080185e-05, "loss": 0.08084595203399658, "step": 4461 }, { "epoch": 0.6032277144064216, "grad_norm": 0.3303582966327667, "learning_rate": 1.1564076116320552e-05, "loss": 0.05921870470046997, "step": 4462 }, { "epoch": 0.6033629066328686, "grad_norm": 0.26532837748527527, "learning_rate": 1.1557414025938592e-05, "loss": 0.05894160270690918, "step": 4463 }, { "epoch": 0.6034980988593156, "grad_norm": 0.2555946409702301, "learning_rate": 1.15507526523214e-05, "loss": 0.06483268737792969, "step": 4464 }, { "epoch": 0.6036332910857626, "grad_norm": 0.1355355978012085, "learning_rate": 1.1544091996855895e-05, "loss": 0.042125701904296875, "step": 4465 }, { "epoch": 0.6037684833122096, "grad_norm": 1.1640866994857788, "learning_rate": 1.153743206092886e-05, "loss": 0.07763147354125977, "step": 4466 }, { "epoch": 0.6039036755386565, "grad_norm": 0.3471051752567291, "learning_rate": 1.1530772845926936e-05, "loss": 0.06594324111938477, "step": 4467 }, { "epoch": 0.6040388677651035, "grad_norm": 0.14810936152935028, "learning_rate": 1.1524114353236614e-05, "loss": 0.038870811462402344, "step": 4468 }, { "epoch": 0.6041740599915505, "grad_norm": 0.315115749835968, "learning_rate": 1.151745658424421e-05, "loss": 0.06246590614318848, "step": 4469 }, { "epoch": 0.6043092522179975, "grad_norm": 0.2308308631181717, "learning_rate": 1.151079954033592e-05, "loss": 0.06431865692138672, "step": 4470 }, { "epoch": 0.6044444444444445, "grad_norm": 0.3533167541027069, "learning_rate": 1.150414322289777e-05, "loss": 0.06346344947814941, "step": 4471 }, { "epoch": 0.6045796366708914, "grad_norm": 0.26321864128112793, "learning_rate": 1.1497487633315643e-05, "loss": 0.06212043762207031, "step": 4472 }, { "epoch": 0.6047148288973384, "grad_norm": 0.23988419771194458, "learning_rate": 1.1490832772975275e-05, "loss": 0.05938410758972168, "step": 4473 }, { "epoch": 0.6048500211237854, "grad_norm": 1.596050500869751, "learning_rate": 1.148417864326223e-05, "loss": 0.06836318969726562, "step": 4474 }, { "epoch": 0.6049852133502324, "grad_norm": 0.18255159258842468, "learning_rate": 1.1477525245561944e-05, "loss": 0.041760921478271484, "step": 4475 }, { "epoch": 0.6051204055766793, "grad_norm": 0.35769742727279663, "learning_rate": 1.1470872581259684e-05, "loss": 0.06638026237487793, "step": 4476 }, { "epoch": 0.6052555978031263, "grad_norm": 0.2494780272245407, "learning_rate": 1.146422065174057e-05, "loss": 0.05401611328125, "step": 4477 }, { "epoch": 0.6053907900295733, "grad_norm": 0.25412124395370483, "learning_rate": 1.1457569458389578e-05, "loss": 0.055255889892578125, "step": 4478 }, { "epoch": 0.6055259822560203, "grad_norm": 0.3694517910480499, "learning_rate": 1.145091900259151e-05, "loss": 0.07106637954711914, "step": 4479 }, { "epoch": 0.6056611744824673, "grad_norm": 0.3325493633747101, "learning_rate": 1.1444269285731032e-05, "loss": 0.0693817138671875, "step": 4480 }, { "epoch": 0.6057963667089142, "grad_norm": 0.34629350900650024, "learning_rate": 1.1437620309192652e-05, "loss": 0.07529926300048828, "step": 4481 }, { "epoch": 0.6059315589353612, "grad_norm": 0.35613033175468445, "learning_rate": 1.1430972074360722e-05, "loss": 0.09632658958435059, "step": 4482 }, { "epoch": 0.6060667511618082, "grad_norm": 0.49247199296951294, "learning_rate": 1.1424324582619435e-05, "loss": 0.08773231506347656, "step": 4483 }, { "epoch": 0.6062019433882552, "grad_norm": 0.17535701394081116, "learning_rate": 1.1417677835352837e-05, "loss": 0.047327399253845215, "step": 4484 }, { "epoch": 0.6063371356147021, "grad_norm": 0.3107914328575134, "learning_rate": 1.1411031833944816e-05, "loss": 0.08040523529052734, "step": 4485 }, { "epoch": 0.6064723278411491, "grad_norm": 0.3590696156024933, "learning_rate": 1.1404386579779111e-05, "loss": 0.07081890106201172, "step": 4486 }, { "epoch": 0.6066075200675961, "grad_norm": 0.37001606822013855, "learning_rate": 1.1397742074239296e-05, "loss": 0.11190128326416016, "step": 4487 }, { "epoch": 0.6067427122940431, "grad_norm": 0.37231770157814026, "learning_rate": 1.1391098318708785e-05, "loss": 0.06675958633422852, "step": 4488 }, { "epoch": 0.6068779045204901, "grad_norm": 0.7272331714630127, "learning_rate": 1.1384455314570848e-05, "loss": 0.10275650024414062, "step": 4489 }, { "epoch": 0.607013096746937, "grad_norm": 0.3578093349933624, "learning_rate": 1.1377813063208596e-05, "loss": 0.091766357421875, "step": 4490 }, { "epoch": 0.607148288973384, "grad_norm": 0.40044039487838745, "learning_rate": 1.1371171566004986e-05, "loss": 0.08358001708984375, "step": 4491 }, { "epoch": 0.607283481199831, "grad_norm": 0.2737836241722107, "learning_rate": 1.1364530824342806e-05, "loss": 0.06424522399902344, "step": 4492 }, { "epoch": 0.607418673426278, "grad_norm": 0.17177489399909973, "learning_rate": 1.1357890839604688e-05, "loss": 0.03950929641723633, "step": 4493 }, { "epoch": 0.607553865652725, "grad_norm": 0.2997469902038574, "learning_rate": 1.1351251613173122e-05, "loss": 0.06270122528076172, "step": 4494 }, { "epoch": 0.6076890578791719, "grad_norm": 0.2717417776584625, "learning_rate": 1.1344613146430428e-05, "loss": 0.0774831771850586, "step": 4495 }, { "epoch": 0.6078242501056189, "grad_norm": 0.3255259692668915, "learning_rate": 1.1337975440758775e-05, "loss": 0.07410800457000732, "step": 4496 }, { "epoch": 0.6079594423320659, "grad_norm": 0.20851555466651917, "learning_rate": 1.133133849754016e-05, "loss": 0.047528743743896484, "step": 4497 }, { "epoch": 0.6080946345585129, "grad_norm": 0.4493762254714966, "learning_rate": 1.1324702318156431e-05, "loss": 0.05563664436340332, "step": 4498 }, { "epoch": 0.6082298267849598, "grad_norm": 0.4611785113811493, "learning_rate": 1.1318066903989279e-05, "loss": 0.05272743105888367, "step": 4499 }, { "epoch": 0.6083650190114068, "grad_norm": 0.30822551250457764, "learning_rate": 1.1311432256420232e-05, "loss": 0.05843162536621094, "step": 4500 }, { "epoch": 0.6085002112378538, "grad_norm": 0.6020203828811646, "learning_rate": 1.1304798376830664e-05, "loss": 0.13279247283935547, "step": 4501 }, { "epoch": 0.6086354034643008, "grad_norm": 0.41823306679725647, "learning_rate": 1.1298165266601778e-05, "loss": 0.0845341682434082, "step": 4502 }, { "epoch": 0.6087705956907478, "grad_norm": 0.33516818284988403, "learning_rate": 1.129153292711462e-05, "loss": 0.06222248077392578, "step": 4503 }, { "epoch": 0.6089057879171947, "grad_norm": 0.32954806089401245, "learning_rate": 1.1284901359750082e-05, "loss": 0.07540035247802734, "step": 4504 }, { "epoch": 0.6090409801436417, "grad_norm": 0.2039014995098114, "learning_rate": 1.1278270565888897e-05, "loss": 0.05766725540161133, "step": 4505 }, { "epoch": 0.6091761723700887, "grad_norm": 0.24326585233211517, "learning_rate": 1.1271640546911624e-05, "loss": 0.06625604629516602, "step": 4506 }, { "epoch": 0.6093113645965357, "grad_norm": 0.27929040789604187, "learning_rate": 1.1265011304198672e-05, "loss": 0.046045541763305664, "step": 4507 }, { "epoch": 0.6094465568229827, "grad_norm": 0.3388141691684723, "learning_rate": 1.1258382839130282e-05, "loss": 0.0822591781616211, "step": 4508 }, { "epoch": 0.6095817490494296, "grad_norm": 0.2273472398519516, "learning_rate": 1.1251755153086536e-05, "loss": 0.05302000045776367, "step": 4509 }, { "epoch": 0.6097169412758766, "grad_norm": 0.5747863054275513, "learning_rate": 1.1245128247447362e-05, "loss": 0.05986785888671875, "step": 4510 }, { "epoch": 0.6098521335023236, "grad_norm": 0.17942984402179718, "learning_rate": 1.1238502123592507e-05, "loss": 0.056525230407714844, "step": 4511 }, { "epoch": 0.6099873257287706, "grad_norm": 0.5319517254829407, "learning_rate": 1.1231876782901568e-05, "loss": 0.06388139724731445, "step": 4512 }, { "epoch": 0.6101225179552175, "grad_norm": 0.17188724875450134, "learning_rate": 1.1225252226753975e-05, "loss": 0.045076847076416016, "step": 4513 }, { "epoch": 0.6102577101816645, "grad_norm": 0.8894361853599548, "learning_rate": 1.1218628456529005e-05, "loss": 0.10037994384765625, "step": 4514 }, { "epoch": 0.6103929024081115, "grad_norm": 0.26242193579673767, "learning_rate": 1.1212005473605746e-05, "loss": 0.0639791488647461, "step": 4515 }, { "epoch": 0.6105280946345585, "grad_norm": 0.24523401260375977, "learning_rate": 1.120538327936315e-05, "loss": 0.08405637741088867, "step": 4516 }, { "epoch": 0.6106632868610055, "grad_norm": 0.3713717460632324, "learning_rate": 1.1198761875179993e-05, "loss": 0.0932474136352539, "step": 4517 }, { "epoch": 0.6107984790874524, "grad_norm": 0.42905890941619873, "learning_rate": 1.1192141262434883e-05, "loss": 0.06979227066040039, "step": 4518 }, { "epoch": 0.6109336713138994, "grad_norm": 0.29989415407180786, "learning_rate": 1.1185521442506272e-05, "loss": 0.07082176208496094, "step": 4519 }, { "epoch": 0.6110688635403464, "grad_norm": 0.5955882668495178, "learning_rate": 1.1178902416772432e-05, "loss": 0.09388101100921631, "step": 4520 }, { "epoch": 0.6112040557667934, "grad_norm": 0.3061008155345917, "learning_rate": 1.1172284186611485e-05, "loss": 0.07376432418823242, "step": 4521 }, { "epoch": 0.6113392479932404, "grad_norm": 0.2999645173549652, "learning_rate": 1.1165666753401384e-05, "loss": 0.06786715984344482, "step": 4522 }, { "epoch": 0.6114744402196873, "grad_norm": 0.36514514684677124, "learning_rate": 1.1159050118519914e-05, "loss": 0.06745219230651855, "step": 4523 }, { "epoch": 0.6116096324461343, "grad_norm": 0.19906677305698395, "learning_rate": 1.1152434283344696e-05, "loss": 0.0386357307434082, "step": 4524 }, { "epoch": 0.6117448246725813, "grad_norm": 0.4493810832500458, "learning_rate": 1.114581924925317e-05, "loss": 0.06978821754455566, "step": 4525 }, { "epoch": 0.6118800168990283, "grad_norm": 0.2112336903810501, "learning_rate": 1.113920501762263e-05, "loss": 0.051206111907958984, "step": 4526 }, { "epoch": 0.6120152091254752, "grad_norm": 0.48546716570854187, "learning_rate": 1.1132591589830193e-05, "loss": 0.09076404571533203, "step": 4527 }, { "epoch": 0.6121504013519222, "grad_norm": 0.42367085814476013, "learning_rate": 1.1125978967252818e-05, "loss": 0.07500863075256348, "step": 4528 }, { "epoch": 0.6122855935783692, "grad_norm": 0.3923182189464569, "learning_rate": 1.1119367151267278e-05, "loss": 0.059465885162353516, "step": 4529 }, { "epoch": 0.6124207858048162, "grad_norm": 0.25272053480148315, "learning_rate": 1.1112756143250186e-05, "loss": 0.05335044860839844, "step": 4530 }, { "epoch": 0.6125559780312632, "grad_norm": 0.2617141306400299, "learning_rate": 1.1106145944577995e-05, "loss": 0.06698143482208252, "step": 4531 }, { "epoch": 0.6126911702577101, "grad_norm": 0.24244795739650726, "learning_rate": 1.1099536556626984e-05, "loss": 0.062398672103881836, "step": 4532 }, { "epoch": 0.6128263624841571, "grad_norm": 0.3359515964984894, "learning_rate": 1.1092927980773269e-05, "loss": 0.06544971466064453, "step": 4533 }, { "epoch": 0.6129615547106041, "grad_norm": 0.34295231103897095, "learning_rate": 1.1086320218392777e-05, "loss": 0.0650484561920166, "step": 4534 }, { "epoch": 0.6130967469370511, "grad_norm": 0.5967460870742798, "learning_rate": 1.1079713270861286e-05, "loss": 0.07013285160064697, "step": 4535 }, { "epoch": 0.613231939163498, "grad_norm": 0.27164527773857117, "learning_rate": 1.1073107139554395e-05, "loss": 0.056929588317871094, "step": 4536 }, { "epoch": 0.613367131389945, "grad_norm": 0.2666477859020233, "learning_rate": 1.1066501825847545e-05, "loss": 0.07135677337646484, "step": 4537 }, { "epoch": 0.613502323616392, "grad_norm": 0.8466290235519409, "learning_rate": 1.1059897331115985e-05, "loss": 0.07445192337036133, "step": 4538 }, { "epoch": 0.613637515842839, "grad_norm": 0.5046213269233704, "learning_rate": 1.1053293656734816e-05, "loss": 0.06831479072570801, "step": 4539 }, { "epoch": 0.613772708069286, "grad_norm": 0.2886885702610016, "learning_rate": 1.1046690804078949e-05, "loss": 0.06088685989379883, "step": 4540 }, { "epoch": 0.613907900295733, "grad_norm": 0.2978242337703705, "learning_rate": 1.1040088774523139e-05, "loss": 0.05490458011627197, "step": 4541 }, { "epoch": 0.6140430925221799, "grad_norm": 0.14407029747962952, "learning_rate": 1.1033487569441971e-05, "loss": 0.02333986759185791, "step": 4542 }, { "epoch": 0.6141782847486269, "grad_norm": 0.3460027575492859, "learning_rate": 1.1026887190209834e-05, "loss": 0.09065914154052734, "step": 4543 }, { "epoch": 0.6143134769750739, "grad_norm": 0.17839789390563965, "learning_rate": 1.1020287638200977e-05, "loss": 0.03936338424682617, "step": 4544 }, { "epoch": 0.6144486692015209, "grad_norm": 0.3785291612148285, "learning_rate": 1.1013688914789452e-05, "loss": 0.07198762893676758, "step": 4545 }, { "epoch": 0.6145838614279678, "grad_norm": 0.4057964086532593, "learning_rate": 1.100709102134915e-05, "loss": 0.08894228935241699, "step": 4546 }, { "epoch": 0.6147190536544149, "grad_norm": 0.3648238778114319, "learning_rate": 1.10004939592538e-05, "loss": 0.07256269454956055, "step": 4547 }, { "epoch": 0.6148542458808619, "grad_norm": 0.19871188700199127, "learning_rate": 1.0993897729876927e-05, "loss": 0.0429837703704834, "step": 4548 }, { "epoch": 0.6149894381073089, "grad_norm": 0.36041560769081116, "learning_rate": 1.0987302334591915e-05, "loss": 0.08607816696166992, "step": 4549 }, { "epoch": 0.6151246303337559, "grad_norm": 0.4003594219684601, "learning_rate": 1.098070777477195e-05, "loss": 0.05415606498718262, "step": 4550 }, { "epoch": 0.6152598225602028, "grad_norm": 0.2990688979625702, "learning_rate": 1.0974114051790067e-05, "loss": 0.0651865005493164, "step": 4551 }, { "epoch": 0.6153950147866498, "grad_norm": 0.17521131038665771, "learning_rate": 1.09675211670191e-05, "loss": 0.03891599178314209, "step": 4552 }, { "epoch": 0.6155302070130968, "grad_norm": 0.3220004439353943, "learning_rate": 1.0960929121831732e-05, "loss": 0.05576133728027344, "step": 4553 }, { "epoch": 0.6156653992395438, "grad_norm": 0.28507575392723083, "learning_rate": 1.095433791760046e-05, "loss": 0.05747842788696289, "step": 4554 }, { "epoch": 0.6158005914659908, "grad_norm": 0.3530092239379883, "learning_rate": 1.0947747555697609e-05, "loss": 0.0827333927154541, "step": 4555 }, { "epoch": 0.6159357836924377, "grad_norm": 0.17005878686904907, "learning_rate": 1.0941158037495328e-05, "loss": 0.04477977752685547, "step": 4556 }, { "epoch": 0.6160709759188847, "grad_norm": 0.1905655860900879, "learning_rate": 1.0934569364365583e-05, "loss": 0.056057095527648926, "step": 4557 }, { "epoch": 0.6162061681453317, "grad_norm": 0.44855692982673645, "learning_rate": 1.0927981537680176e-05, "loss": 0.08228158950805664, "step": 4558 }, { "epoch": 0.6163413603717787, "grad_norm": 0.43794751167297363, "learning_rate": 1.0921394558810726e-05, "loss": 0.07080614566802979, "step": 4559 }, { "epoch": 0.6164765525982256, "grad_norm": 0.484383761882782, "learning_rate": 1.0914808429128688e-05, "loss": 0.06489837169647217, "step": 4560 }, { "epoch": 0.6166117448246726, "grad_norm": 0.3000618517398834, "learning_rate": 1.0908223150005315e-05, "loss": 0.07498455047607422, "step": 4561 }, { "epoch": 0.6167469370511196, "grad_norm": 0.24319665133953094, "learning_rate": 1.09016387228117e-05, "loss": 0.04724609851837158, "step": 4562 }, { "epoch": 0.6168821292775666, "grad_norm": 0.20916372537612915, "learning_rate": 1.0895055148918758e-05, "loss": 0.03911542892456055, "step": 4563 }, { "epoch": 0.6170173215040136, "grad_norm": 0.36333584785461426, "learning_rate": 1.0888472429697223e-05, "loss": 0.051422834396362305, "step": 4564 }, { "epoch": 0.6171525137304605, "grad_norm": 0.14120206236839294, "learning_rate": 1.088189056651766e-05, "loss": 0.05162477493286133, "step": 4565 }, { "epoch": 0.6172877059569075, "grad_norm": 0.2763865292072296, "learning_rate": 1.0875309560750438e-05, "loss": 0.06842708587646484, "step": 4566 }, { "epoch": 0.6174228981833545, "grad_norm": 0.34497779607772827, "learning_rate": 1.086872941376576e-05, "loss": 0.07580947875976562, "step": 4567 }, { "epoch": 0.6175580904098015, "grad_norm": 0.3612481951713562, "learning_rate": 1.0862150126933648e-05, "loss": 0.06434965133666992, "step": 4568 }, { "epoch": 0.6176932826362485, "grad_norm": 0.4365478456020355, "learning_rate": 1.0855571701623942e-05, "loss": 0.06622171401977539, "step": 4569 }, { "epoch": 0.6178284748626954, "grad_norm": 0.28336986899375916, "learning_rate": 1.0848994139206317e-05, "loss": 0.054013967514038086, "step": 4570 }, { "epoch": 0.6179636670891424, "grad_norm": 0.2518436908721924, "learning_rate": 1.0842417441050247e-05, "loss": 0.06433439254760742, "step": 4571 }, { "epoch": 0.6180988593155894, "grad_norm": 0.33681032061576843, "learning_rate": 1.0835841608525031e-05, "loss": 0.06585288047790527, "step": 4572 }, { "epoch": 0.6182340515420364, "grad_norm": 0.31533190608024597, "learning_rate": 1.08292666429998e-05, "loss": 0.06459236145019531, "step": 4573 }, { "epoch": 0.6183692437684833, "grad_norm": 0.2309371531009674, "learning_rate": 1.08226925458435e-05, "loss": 0.05120372772216797, "step": 4574 }, { "epoch": 0.6185044359949303, "grad_norm": 0.21744850277900696, "learning_rate": 1.0816119318424882e-05, "loss": 0.06373906135559082, "step": 4575 }, { "epoch": 0.6186396282213773, "grad_norm": 0.2714594006538391, "learning_rate": 1.0809546962112535e-05, "loss": 0.07249265909194946, "step": 4576 }, { "epoch": 0.6187748204478243, "grad_norm": 0.2508978545665741, "learning_rate": 1.0802975478274856e-05, "loss": 0.058229684829711914, "step": 4577 }, { "epoch": 0.6189100126742713, "grad_norm": 0.352032333612442, "learning_rate": 1.0796404868280062e-05, "loss": 0.05313920974731445, "step": 4578 }, { "epoch": 0.6190452049007182, "grad_norm": 0.24912215769290924, "learning_rate": 1.07898351334962e-05, "loss": 0.05867719650268555, "step": 4579 }, { "epoch": 0.6191803971271652, "grad_norm": 0.2457178831100464, "learning_rate": 1.0783266275291103e-05, "loss": 0.06145477294921875, "step": 4580 }, { "epoch": 0.6193155893536122, "grad_norm": 0.22022317349910736, "learning_rate": 1.077669829503246e-05, "loss": 0.054735422134399414, "step": 4581 }, { "epoch": 0.6194507815800592, "grad_norm": 0.30525389313697815, "learning_rate": 1.077013119408775e-05, "loss": 0.07169342041015625, "step": 4582 }, { "epoch": 0.6195859738065062, "grad_norm": 0.212591752409935, "learning_rate": 1.0763564973824289e-05, "loss": 0.05258166790008545, "step": 4583 }, { "epoch": 0.6197211660329531, "grad_norm": 0.3430812358856201, "learning_rate": 1.0756999635609185e-05, "loss": 0.05687212944030762, "step": 4584 }, { "epoch": 0.6198563582594001, "grad_norm": 0.27148669958114624, "learning_rate": 1.0750435180809381e-05, "loss": 0.0537867546081543, "step": 4585 }, { "epoch": 0.6199915504858471, "grad_norm": 0.2941371202468872, "learning_rate": 1.074387161079164e-05, "loss": 0.05854785442352295, "step": 4586 }, { "epoch": 0.6201267427122941, "grad_norm": 0.16351230442523956, "learning_rate": 1.0737308926922521e-05, "loss": 0.03908205032348633, "step": 4587 }, { "epoch": 0.620261934938741, "grad_norm": 0.13048654794692993, "learning_rate": 1.0730747130568424e-05, "loss": 0.034320831298828125, "step": 4588 }, { "epoch": 0.620397127165188, "grad_norm": 0.29218488931655884, "learning_rate": 1.0724186223095532e-05, "loss": 0.06438088417053223, "step": 4589 }, { "epoch": 0.620532319391635, "grad_norm": 0.17921486496925354, "learning_rate": 1.071762620586987e-05, "loss": 0.04278111457824707, "step": 4590 }, { "epoch": 0.620667511618082, "grad_norm": 0.3059333860874176, "learning_rate": 1.0711067080257273e-05, "loss": 0.05582815408706665, "step": 4591 }, { "epoch": 0.620802703844529, "grad_norm": 0.3186451494693756, "learning_rate": 1.0704508847623374e-05, "loss": 0.07403016090393066, "step": 4592 }, { "epoch": 0.6209378960709759, "grad_norm": 0.30739936232566833, "learning_rate": 1.069795150933365e-05, "loss": 0.05772209167480469, "step": 4593 }, { "epoch": 0.6210730882974229, "grad_norm": 0.21873977780342102, "learning_rate": 1.0691395066753357e-05, "loss": 0.05909430980682373, "step": 4594 }, { "epoch": 0.6212082805238699, "grad_norm": 0.3679015636444092, "learning_rate": 1.0684839521247584e-05, "loss": 0.05233001708984375, "step": 4595 }, { "epoch": 0.6213434727503169, "grad_norm": 0.3394359350204468, "learning_rate": 1.0678284874181234e-05, "loss": 0.08089494705200195, "step": 4596 }, { "epoch": 0.6214786649767639, "grad_norm": 0.22854389250278473, "learning_rate": 1.0671731126919028e-05, "loss": 0.05193519592285156, "step": 4597 }, { "epoch": 0.6216138572032108, "grad_norm": 0.20391437411308289, "learning_rate": 1.066517828082548e-05, "loss": 0.05152487754821777, "step": 4598 }, { "epoch": 0.6217490494296578, "grad_norm": 0.2641715705394745, "learning_rate": 1.0658626337264926e-05, "loss": 0.060117244720458984, "step": 4599 }, { "epoch": 0.6218842416561048, "grad_norm": 0.31620243191719055, "learning_rate": 1.0652075297601518e-05, "loss": 0.04932093620300293, "step": 4600 }, { "epoch": 0.6220194338825518, "grad_norm": 0.3243793547153473, "learning_rate": 1.0645525163199222e-05, "loss": 0.0713043212890625, "step": 4601 }, { "epoch": 0.6221546261089987, "grad_norm": 0.27476295828819275, "learning_rate": 1.063897593542181e-05, "loss": 0.06162452697753906, "step": 4602 }, { "epoch": 0.6222898183354457, "grad_norm": 0.39935553073883057, "learning_rate": 1.0632427615632864e-05, "loss": 0.06130671501159668, "step": 4603 }, { "epoch": 0.6224250105618927, "grad_norm": 0.22969891130924225, "learning_rate": 1.0625880205195776e-05, "loss": 0.04492330551147461, "step": 4604 }, { "epoch": 0.6225602027883397, "grad_norm": 0.29548484086990356, "learning_rate": 1.0619333705473754e-05, "loss": 0.06001782417297363, "step": 4605 }, { "epoch": 0.6226953950147867, "grad_norm": 0.631999135017395, "learning_rate": 1.0612788117829821e-05, "loss": 0.08805418014526367, "step": 4606 }, { "epoch": 0.6228305872412336, "grad_norm": 0.35514265298843384, "learning_rate": 1.0606243443626792e-05, "loss": 0.07119536399841309, "step": 4607 }, { "epoch": 0.6229657794676806, "grad_norm": 0.1862819939851761, "learning_rate": 1.0599699684227313e-05, "loss": 0.03358948230743408, "step": 4608 }, { "epoch": 0.6231009716941276, "grad_norm": 0.30135810375213623, "learning_rate": 1.0593156840993818e-05, "loss": 0.04882550239562988, "step": 4609 }, { "epoch": 0.6232361639205746, "grad_norm": 0.17869390547275543, "learning_rate": 1.0586614915288571e-05, "loss": 0.03380930423736572, "step": 4610 }, { "epoch": 0.6233713561470215, "grad_norm": 0.2399933636188507, "learning_rate": 1.0580073908473641e-05, "loss": 0.06104230880737305, "step": 4611 }, { "epoch": 0.6235065483734685, "grad_norm": 0.20708365738391876, "learning_rate": 1.0573533821910885e-05, "loss": 0.04581332206726074, "step": 4612 }, { "epoch": 0.6236417405999155, "grad_norm": 0.49541765451431274, "learning_rate": 1.0566994656961997e-05, "loss": 0.07208776473999023, "step": 4613 }, { "epoch": 0.6237769328263625, "grad_norm": 0.3167388141155243, "learning_rate": 1.0560456414988456e-05, "loss": 0.0721883773803711, "step": 4614 }, { "epoch": 0.6239121250528095, "grad_norm": 0.2761615812778473, "learning_rate": 1.0553919097351564e-05, "loss": 0.06961297988891602, "step": 4615 }, { "epoch": 0.6240473172792564, "grad_norm": 0.4039650559425354, "learning_rate": 1.0547382705412434e-05, "loss": 0.07571643590927124, "step": 4616 }, { "epoch": 0.6241825095057034, "grad_norm": 0.7439659833908081, "learning_rate": 1.054084724053196e-05, "loss": 0.13332080841064453, "step": 4617 }, { "epoch": 0.6243177017321504, "grad_norm": 0.26782482862472534, "learning_rate": 1.0534312704070875e-05, "loss": 0.06744956970214844, "step": 4618 }, { "epoch": 0.6244528939585974, "grad_norm": 0.32765668630599976, "learning_rate": 1.0527779097389695e-05, "loss": 0.07637345790863037, "step": 4619 }, { "epoch": 0.6245880861850444, "grad_norm": 0.29157114028930664, "learning_rate": 1.0521246421848762e-05, "loss": 0.08211612701416016, "step": 4620 }, { "epoch": 0.6247232784114913, "grad_norm": 0.7886411547660828, "learning_rate": 1.0514714678808202e-05, "loss": 0.10589981079101562, "step": 4621 }, { "epoch": 0.6248584706379383, "grad_norm": 0.22971338033676147, "learning_rate": 1.0508183869627962e-05, "loss": 0.0625, "step": 4622 }, { "epoch": 0.6249936628643853, "grad_norm": 0.2727302610874176, "learning_rate": 1.0501653995667798e-05, "loss": 0.07536149024963379, "step": 4623 }, { "epoch": 0.6251288550908323, "grad_norm": 0.42857256531715393, "learning_rate": 1.0495125058287258e-05, "loss": 0.09780502319335938, "step": 4624 }, { "epoch": 0.6252640473172792, "grad_norm": 0.7165300846099854, "learning_rate": 1.0488597058845708e-05, "loss": 0.08171486854553223, "step": 4625 }, { "epoch": 0.6253992395437262, "grad_norm": 0.41147565841674805, "learning_rate": 1.0482069998702304e-05, "loss": 0.08458304405212402, "step": 4626 }, { "epoch": 0.6255344317701732, "grad_norm": 0.2602044939994812, "learning_rate": 1.0475543879216017e-05, "loss": 0.05593538284301758, "step": 4627 }, { "epoch": 0.6256696239966202, "grad_norm": 0.3791307508945465, "learning_rate": 1.0469018701745626e-05, "loss": 0.08008337020874023, "step": 4628 }, { "epoch": 0.6258048162230672, "grad_norm": 0.2977467179298401, "learning_rate": 1.0462494467649704e-05, "loss": 0.07937431335449219, "step": 4629 }, { "epoch": 0.6259400084495141, "grad_norm": 0.39518147706985474, "learning_rate": 1.045597117828663e-05, "loss": 0.05633068084716797, "step": 4630 }, { "epoch": 0.6260752006759611, "grad_norm": 0.3023243844509125, "learning_rate": 1.0449448835014586e-05, "loss": 0.06179237365722656, "step": 4631 }, { "epoch": 0.6262103929024081, "grad_norm": 0.3019101023674011, "learning_rate": 1.044292743919156e-05, "loss": 0.049596309661865234, "step": 4632 }, { "epoch": 0.6263455851288551, "grad_norm": 0.43353164196014404, "learning_rate": 1.0436406992175343e-05, "loss": 0.06688785552978516, "step": 4633 }, { "epoch": 0.626480777355302, "grad_norm": 0.2690771222114563, "learning_rate": 1.0429887495323532e-05, "loss": 0.04748481512069702, "step": 4634 }, { "epoch": 0.626615969581749, "grad_norm": 0.7041105031967163, "learning_rate": 1.0423368949993512e-05, "loss": 0.07501733303070068, "step": 4635 }, { "epoch": 0.626751161808196, "grad_norm": 0.35685858130455017, "learning_rate": 1.041685135754248e-05, "loss": 0.08731919527053833, "step": 4636 }, { "epoch": 0.626886354034643, "grad_norm": 0.20517200231552124, "learning_rate": 1.0410334719327435e-05, "loss": 0.05757880210876465, "step": 4637 }, { "epoch": 0.62702154626109, "grad_norm": 0.2358854115009308, "learning_rate": 1.0403819036705177e-05, "loss": 0.06907224655151367, "step": 4638 }, { "epoch": 0.627156738487537, "grad_norm": 0.40448957681655884, "learning_rate": 1.0397304311032311e-05, "loss": 0.06854289770126343, "step": 4639 }, { "epoch": 0.6272919307139839, "grad_norm": 0.40503883361816406, "learning_rate": 1.039079054366523e-05, "loss": 0.07426023483276367, "step": 4640 }, { "epoch": 0.6274271229404309, "grad_norm": 0.49728554487228394, "learning_rate": 1.0384277735960133e-05, "loss": 0.07779121398925781, "step": 4641 }, { "epoch": 0.6275623151668779, "grad_norm": 0.5770511627197266, "learning_rate": 1.0377765889273025e-05, "loss": 0.06285107135772705, "step": 4642 }, { "epoch": 0.6276975073933249, "grad_norm": 0.2105005979537964, "learning_rate": 1.0371255004959715e-05, "loss": 0.062136173248291016, "step": 4643 }, { "epoch": 0.6278326996197718, "grad_norm": 0.29505887627601624, "learning_rate": 1.036474508437579e-05, "loss": 0.05975842475891113, "step": 4644 }, { "epoch": 0.6279678918462188, "grad_norm": 0.2482919991016388, "learning_rate": 1.035823612887666e-05, "loss": 0.05152767896652222, "step": 4645 }, { "epoch": 0.6281030840726658, "grad_norm": 0.24198034405708313, "learning_rate": 1.0351728139817517e-05, "loss": 0.037592172622680664, "step": 4646 }, { "epoch": 0.6282382762991128, "grad_norm": 0.26556435227394104, "learning_rate": 1.0345221118553362e-05, "loss": 0.0791773796081543, "step": 4647 }, { "epoch": 0.6283734685255598, "grad_norm": 0.22227388620376587, "learning_rate": 1.0338715066439002e-05, "loss": 0.06063961982727051, "step": 4648 }, { "epoch": 0.6285086607520067, "grad_norm": 0.4060885012149811, "learning_rate": 1.0332209984829013e-05, "loss": 0.08052146434783936, "step": 4649 }, { "epoch": 0.6286438529784537, "grad_norm": 0.2979709804058075, "learning_rate": 1.03257058750778e-05, "loss": 0.062424659729003906, "step": 4650 }, { "epoch": 0.6287790452049007, "grad_norm": 0.1807127147912979, "learning_rate": 1.0319202738539548e-05, "loss": 0.050330400466918945, "step": 4651 }, { "epoch": 0.6289142374313477, "grad_norm": 0.28656843304634094, "learning_rate": 1.0312700576568253e-05, "loss": 0.06861495971679688, "step": 4652 }, { "epoch": 0.6290494296577946, "grad_norm": 0.29121947288513184, "learning_rate": 1.0306199390517688e-05, "loss": 0.05763101577758789, "step": 4653 }, { "epoch": 0.6291846218842416, "grad_norm": 0.23022115230560303, "learning_rate": 1.0299699181741439e-05, "loss": 0.059467315673828125, "step": 4654 }, { "epoch": 0.6293198141106886, "grad_norm": 0.3273932635784149, "learning_rate": 1.0293199951592889e-05, "loss": 0.07540702819824219, "step": 4655 }, { "epoch": 0.6294550063371356, "grad_norm": 0.4194307029247284, "learning_rate": 1.0286701701425206e-05, "loss": 0.09010553359985352, "step": 4656 }, { "epoch": 0.6295901985635826, "grad_norm": 0.43677449226379395, "learning_rate": 1.0280204432591369e-05, "loss": 0.10966968536376953, "step": 4657 }, { "epoch": 0.6297253907900295, "grad_norm": 0.24091710150241852, "learning_rate": 1.0273708146444133e-05, "loss": 0.07066917419433594, "step": 4658 }, { "epoch": 0.6298605830164765, "grad_norm": 0.20182935893535614, "learning_rate": 1.0267212844336062e-05, "loss": 0.054204583168029785, "step": 4659 }, { "epoch": 0.6299957752429235, "grad_norm": 0.40133270621299744, "learning_rate": 1.026071852761952e-05, "loss": 0.06823158264160156, "step": 4660 }, { "epoch": 0.6301309674693705, "grad_norm": 0.35685959458351135, "learning_rate": 1.025422519764665e-05, "loss": 0.08153486251831055, "step": 4661 }, { "epoch": 0.6302661596958175, "grad_norm": 0.16551874577999115, "learning_rate": 1.024773285576941e-05, "loss": 0.05325794219970703, "step": 4662 }, { "epoch": 0.6304013519222644, "grad_norm": 0.18010757863521576, "learning_rate": 1.0241241503339524e-05, "loss": 0.044170379638671875, "step": 4663 }, { "epoch": 0.6305365441487114, "grad_norm": 0.29997074604034424, "learning_rate": 1.023475114170853e-05, "loss": 0.07453131675720215, "step": 4664 }, { "epoch": 0.6306717363751584, "grad_norm": 0.2977597415447235, "learning_rate": 1.0228261772227768e-05, "loss": 0.06371140480041504, "step": 4665 }, { "epoch": 0.6308069286016054, "grad_norm": 0.32815954089164734, "learning_rate": 1.0221773396248349e-05, "loss": 0.07728123664855957, "step": 4666 }, { "epoch": 0.6309421208280523, "grad_norm": 0.3637869358062744, "learning_rate": 1.021528601512119e-05, "loss": 0.06687641143798828, "step": 4667 }, { "epoch": 0.6310773130544993, "grad_norm": 0.2982170879840851, "learning_rate": 1.0208799630196994e-05, "loss": 0.07450008392333984, "step": 4668 }, { "epoch": 0.6312125052809463, "grad_norm": 0.2322484850883484, "learning_rate": 1.0202314242826264e-05, "loss": 0.07700347900390625, "step": 4669 }, { "epoch": 0.6313476975073933, "grad_norm": 0.2507709562778473, "learning_rate": 1.0195829854359299e-05, "loss": 0.06022167205810547, "step": 4670 }, { "epoch": 0.6314828897338403, "grad_norm": 0.25489166378974915, "learning_rate": 1.0189346466146175e-05, "loss": 0.06063413619995117, "step": 4671 }, { "epoch": 0.6316180819602872, "grad_norm": 0.2549836337566376, "learning_rate": 1.018286407953677e-05, "loss": 0.06438636779785156, "step": 4672 }, { "epoch": 0.6317532741867342, "grad_norm": 0.22776448726654053, "learning_rate": 1.017638269588075e-05, "loss": 0.05417680740356445, "step": 4673 }, { "epoch": 0.6318884664131812, "grad_norm": 0.2738092541694641, "learning_rate": 1.0169902316527575e-05, "loss": 0.04232466220855713, "step": 4674 }, { "epoch": 0.6320236586396282, "grad_norm": 0.3591974377632141, "learning_rate": 1.0163422942826502e-05, "loss": 0.07692217826843262, "step": 4675 }, { "epoch": 0.6321588508660752, "grad_norm": 0.2739556133747101, "learning_rate": 1.0156944576126555e-05, "loss": 0.06323790550231934, "step": 4676 }, { "epoch": 0.6322940430925221, "grad_norm": 0.32111856341362, "learning_rate": 1.0150467217776579e-05, "loss": 0.06371116638183594, "step": 4677 }, { "epoch": 0.6324292353189691, "grad_norm": 0.16997277736663818, "learning_rate": 1.0143990869125185e-05, "loss": 0.03658187389373779, "step": 4678 }, { "epoch": 0.6325644275454161, "grad_norm": 0.40965399146080017, "learning_rate": 1.013751553152079e-05, "loss": 0.07799696922302246, "step": 4679 }, { "epoch": 0.6326996197718631, "grad_norm": 0.488856703042984, "learning_rate": 1.0131041206311594e-05, "loss": 0.07051825523376465, "step": 4680 }, { "epoch": 0.63283481199831, "grad_norm": 0.3490300476551056, "learning_rate": 1.0124567894845578e-05, "loss": 0.06315314769744873, "step": 4681 }, { "epoch": 0.632970004224757, "grad_norm": 0.25645068287849426, "learning_rate": 1.0118095598470528e-05, "loss": 0.045945167541503906, "step": 4682 }, { "epoch": 0.6331051964512041, "grad_norm": 0.35110464692115784, "learning_rate": 1.0111624318534006e-05, "loss": 0.07792937755584717, "step": 4683 }, { "epoch": 0.6332403886776511, "grad_norm": 0.2600908875465393, "learning_rate": 1.0105154056383377e-05, "loss": 0.05178546905517578, "step": 4684 }, { "epoch": 0.6333755809040981, "grad_norm": 0.5623437166213989, "learning_rate": 1.0098684813365764e-05, "loss": 0.09689855575561523, "step": 4685 }, { "epoch": 0.633510773130545, "grad_norm": 0.2899349331855774, "learning_rate": 1.0092216590828115e-05, "loss": 0.06019878387451172, "step": 4686 }, { "epoch": 0.633645965356992, "grad_norm": 0.3198620676994324, "learning_rate": 1.0085749390117146e-05, "loss": 0.07785367965698242, "step": 4687 }, { "epoch": 0.633781157583439, "grad_norm": 0.20298810303211212, "learning_rate": 1.0079283212579354e-05, "loss": 0.05445504188537598, "step": 4688 }, { "epoch": 0.633916349809886, "grad_norm": 0.2476663440465927, "learning_rate": 1.0072818059561045e-05, "loss": 0.07100027799606323, "step": 4689 }, { "epoch": 0.634051542036333, "grad_norm": 0.3452564477920532, "learning_rate": 1.0066353932408285e-05, "loss": 0.07155418395996094, "step": 4690 }, { "epoch": 0.6341867342627799, "grad_norm": 0.38678160309791565, "learning_rate": 1.0059890832466948e-05, "loss": 0.06813859939575195, "step": 4691 }, { "epoch": 0.6343219264892269, "grad_norm": 0.2522895336151123, "learning_rate": 1.0053428761082684e-05, "loss": 0.04723095893859863, "step": 4692 }, { "epoch": 0.6344571187156739, "grad_norm": 0.1959107518196106, "learning_rate": 1.0046967719600927e-05, "loss": 0.04604077339172363, "step": 4693 }, { "epoch": 0.6345923109421209, "grad_norm": 0.15962094068527222, "learning_rate": 1.0040507709366912e-05, "loss": 0.040190696716308594, "step": 4694 }, { "epoch": 0.6347275031685679, "grad_norm": 0.30821219086647034, "learning_rate": 1.0034048731725631e-05, "loss": 0.08436393737792969, "step": 4695 }, { "epoch": 0.6348626953950148, "grad_norm": 0.22390079498291016, "learning_rate": 1.0027590788021886e-05, "loss": 0.05712532997131348, "step": 4696 }, { "epoch": 0.6349978876214618, "grad_norm": 0.28145474195480347, "learning_rate": 1.0021133879600258e-05, "loss": 0.07906675338745117, "step": 4697 }, { "epoch": 0.6351330798479088, "grad_norm": 0.23368291556835175, "learning_rate": 1.0014678007805108e-05, "loss": 0.042389869689941406, "step": 4698 }, { "epoch": 0.6352682720743558, "grad_norm": 0.2637837529182434, "learning_rate": 1.0008223173980579e-05, "loss": 0.050539493560791016, "step": 4699 }, { "epoch": 0.6354034643008027, "grad_norm": 0.3639850616455078, "learning_rate": 1.0001769379470604e-05, "loss": 0.058909863233566284, "step": 4700 }, { "epoch": 0.6355386565272497, "grad_norm": 0.4069882035255432, "learning_rate": 9.995316625618898e-06, "loss": 0.06889724731445312, "step": 4701 }, { "epoch": 0.6356738487536967, "grad_norm": 0.45420369505882263, "learning_rate": 9.988864913768962e-06, "loss": 0.06771183013916016, "step": 4702 }, { "epoch": 0.6358090409801437, "grad_norm": 0.1920868158340454, "learning_rate": 9.982414245264071e-06, "loss": 0.03780162334442139, "step": 4703 }, { "epoch": 0.6359442332065907, "grad_norm": 0.24480357766151428, "learning_rate": 9.975964621447293e-06, "loss": 0.04808378219604492, "step": 4704 }, { "epoch": 0.6360794254330376, "grad_norm": 0.3678262531757355, "learning_rate": 9.96951604366147e-06, "loss": 0.0884695053100586, "step": 4705 }, { "epoch": 0.6362146176594846, "grad_norm": 0.2896016836166382, "learning_rate": 9.963068513249233e-06, "loss": 0.06454920768737793, "step": 4706 }, { "epoch": 0.6363498098859316, "grad_norm": 0.23731695115566254, "learning_rate": 9.956622031552996e-06, "loss": 0.04004871845245361, "step": 4707 }, { "epoch": 0.6364850021123786, "grad_norm": 0.25214889645576477, "learning_rate": 9.950176599914942e-06, "loss": 0.05359506607055664, "step": 4708 }, { "epoch": 0.6366201943388256, "grad_norm": 0.2975670397281647, "learning_rate": 9.943732219677048e-06, "loss": 0.06443309783935547, "step": 4709 }, { "epoch": 0.6367553865652725, "grad_norm": 0.19786369800567627, "learning_rate": 9.93728889218107e-06, "loss": 0.06727170944213867, "step": 4710 }, { "epoch": 0.6368905787917195, "grad_norm": 0.2943669557571411, "learning_rate": 9.930846618768543e-06, "loss": 0.06118488311767578, "step": 4711 }, { "epoch": 0.6370257710181665, "grad_norm": 0.2945265471935272, "learning_rate": 9.924405400780784e-06, "loss": 0.0697164535522461, "step": 4712 }, { "epoch": 0.6371609632446135, "grad_norm": 0.2885686457157135, "learning_rate": 9.917965239558885e-06, "loss": 0.06333780288696289, "step": 4713 }, { "epoch": 0.6372961554710604, "grad_norm": 0.4006820619106293, "learning_rate": 9.911526136443726e-06, "loss": 0.059378623962402344, "step": 4714 }, { "epoch": 0.6374313476975074, "grad_norm": 0.186470627784729, "learning_rate": 9.905088092775956e-06, "loss": 0.04387474060058594, "step": 4715 }, { "epoch": 0.6375665399239544, "grad_norm": 0.17098212242126465, "learning_rate": 9.898651109896015e-06, "loss": 0.04893779754638672, "step": 4716 }, { "epoch": 0.6377017321504014, "grad_norm": 0.3996959626674652, "learning_rate": 9.892215189144123e-06, "loss": 0.05804312229156494, "step": 4717 }, { "epoch": 0.6378369243768484, "grad_norm": 0.26427116990089417, "learning_rate": 9.88578033186026e-06, "loss": 0.04778647422790527, "step": 4718 }, { "epoch": 0.6379721166032953, "grad_norm": 0.31532660126686096, "learning_rate": 9.879346539384207e-06, "loss": 0.06215834617614746, "step": 4719 }, { "epoch": 0.6381073088297423, "grad_norm": 0.3271363377571106, "learning_rate": 9.87291381305551e-06, "loss": 0.07738649845123291, "step": 4720 }, { "epoch": 0.6382425010561893, "grad_norm": 0.4354500472545624, "learning_rate": 9.866482154213502e-06, "loss": 0.06219172477722168, "step": 4721 }, { "epoch": 0.6383776932826363, "grad_norm": 0.5227985978126526, "learning_rate": 9.86005156419728e-06, "loss": 0.07993030548095703, "step": 4722 }, { "epoch": 0.6385128855090833, "grad_norm": 0.45466184616088867, "learning_rate": 9.853622044345732e-06, "loss": 0.09350919723510742, "step": 4723 }, { "epoch": 0.6386480777355302, "grad_norm": 0.2696090042591095, "learning_rate": 9.847193595997522e-06, "loss": 0.05406975746154785, "step": 4724 }, { "epoch": 0.6387832699619772, "grad_norm": 0.469687819480896, "learning_rate": 9.840766220491078e-06, "loss": 0.08649110794067383, "step": 4725 }, { "epoch": 0.6389184621884242, "grad_norm": 0.32442426681518555, "learning_rate": 9.834339919164625e-06, "loss": 0.04985618591308594, "step": 4726 }, { "epoch": 0.6390536544148712, "grad_norm": 0.4006844162940979, "learning_rate": 9.827914693356145e-06, "loss": 0.07015800476074219, "step": 4727 }, { "epoch": 0.6391888466413181, "grad_norm": 0.49044305086135864, "learning_rate": 9.821490544403403e-06, "loss": 0.07341480255126953, "step": 4728 }, { "epoch": 0.6393240388677651, "grad_norm": 0.17260241508483887, "learning_rate": 9.815067473643951e-06, "loss": 0.041432857513427734, "step": 4729 }, { "epoch": 0.6394592310942121, "grad_norm": 0.33676180243492126, "learning_rate": 9.808645482415097e-06, "loss": 0.06645870208740234, "step": 4730 }, { "epoch": 0.6395944233206591, "grad_norm": 0.4269150495529175, "learning_rate": 9.80222457205394e-06, "loss": 0.06735152006149292, "step": 4731 }, { "epoch": 0.6397296155471061, "grad_norm": 0.38384002447128296, "learning_rate": 9.795804743897341e-06, "loss": 0.08746576309204102, "step": 4732 }, { "epoch": 0.639864807773553, "grad_norm": 0.3227181136608124, "learning_rate": 9.789385999281948e-06, "loss": 0.06382691860198975, "step": 4733 }, { "epoch": 0.64, "grad_norm": 0.14987395703792572, "learning_rate": 9.782968339544179e-06, "loss": 0.03530263900756836, "step": 4734 }, { "epoch": 0.640135192226447, "grad_norm": 0.6400836110115051, "learning_rate": 9.776551766020219e-06, "loss": 0.07858467102050781, "step": 4735 }, { "epoch": 0.640270384452894, "grad_norm": 0.16940130293369293, "learning_rate": 9.77013628004604e-06, "loss": 0.031525999307632446, "step": 4736 }, { "epoch": 0.640405576679341, "grad_norm": 0.23942354321479797, "learning_rate": 9.763721882957371e-06, "loss": 0.051934123039245605, "step": 4737 }, { "epoch": 0.6405407689057879, "grad_norm": 0.27816030383110046, "learning_rate": 9.757308576089732e-06, "loss": 0.05612468719482422, "step": 4738 }, { "epoch": 0.6406759611322349, "grad_norm": 0.2609564960002899, "learning_rate": 9.750896360778404e-06, "loss": 0.05990457534790039, "step": 4739 }, { "epoch": 0.6408111533586819, "grad_norm": 0.4077971577644348, "learning_rate": 9.744485238358448e-06, "loss": 0.08354783058166504, "step": 4740 }, { "epoch": 0.6409463455851289, "grad_norm": 0.17238418757915497, "learning_rate": 9.73807521016469e-06, "loss": 0.04747796058654785, "step": 4741 }, { "epoch": 0.6410815378115758, "grad_norm": 0.18301935493946075, "learning_rate": 9.731666277531732e-06, "loss": 0.048253536224365234, "step": 4742 }, { "epoch": 0.6412167300380228, "grad_norm": 0.4359232187271118, "learning_rate": 9.725258441793947e-06, "loss": 0.07181811332702637, "step": 4743 }, { "epoch": 0.6413519222644698, "grad_norm": 0.23843605816364288, "learning_rate": 9.71885170428549e-06, "loss": 0.052263736724853516, "step": 4744 }, { "epoch": 0.6414871144909168, "grad_norm": 0.20009545981884003, "learning_rate": 9.712446066340265e-06, "loss": 0.03910231590270996, "step": 4745 }, { "epoch": 0.6416223067173638, "grad_norm": 0.25603142380714417, "learning_rate": 9.70604152929197e-06, "loss": 0.060760498046875, "step": 4746 }, { "epoch": 0.6417574989438107, "grad_norm": 0.25704440474510193, "learning_rate": 9.699638094474054e-06, "loss": 0.0583195686340332, "step": 4747 }, { "epoch": 0.6418926911702577, "grad_norm": 0.31259119510650635, "learning_rate": 9.693235763219752e-06, "loss": 0.06410646438598633, "step": 4748 }, { "epoch": 0.6420278833967047, "grad_norm": 0.26537519693374634, "learning_rate": 9.68683453686207e-06, "loss": 0.059996604919433594, "step": 4749 }, { "epoch": 0.6421630756231517, "grad_norm": 0.4195689558982849, "learning_rate": 9.680434416733763e-06, "loss": 0.06110048294067383, "step": 4750 }, { "epoch": 0.6422982678495986, "grad_norm": 0.2629587948322296, "learning_rate": 9.674035404167381e-06, "loss": 0.04708099365234375, "step": 4751 }, { "epoch": 0.6424334600760456, "grad_norm": 0.43677568435668945, "learning_rate": 9.66763750049523e-06, "loss": 0.07480764389038086, "step": 4752 }, { "epoch": 0.6425686523024926, "grad_norm": 0.3900323212146759, "learning_rate": 9.66124070704939e-06, "loss": 0.04446077346801758, "step": 4753 }, { "epoch": 0.6427038445289396, "grad_norm": 0.39845535159111023, "learning_rate": 9.654845025161699e-06, "loss": 0.050692081451416016, "step": 4754 }, { "epoch": 0.6428390367553866, "grad_norm": 0.44947630167007446, "learning_rate": 9.648450456163777e-06, "loss": 0.0677117109298706, "step": 4755 }, { "epoch": 0.6429742289818335, "grad_norm": 0.3863936960697174, "learning_rate": 9.64205700138701e-06, "loss": 0.06791448593139648, "step": 4756 }, { "epoch": 0.6431094212082805, "grad_norm": 0.21248199045658112, "learning_rate": 9.635664662162548e-06, "loss": 0.06186962127685547, "step": 4757 }, { "epoch": 0.6432446134347275, "grad_norm": 0.6182746887207031, "learning_rate": 9.629273439821315e-06, "loss": 0.09067821502685547, "step": 4758 }, { "epoch": 0.6433798056611745, "grad_norm": 0.568256676197052, "learning_rate": 9.622883335693984e-06, "loss": 0.09613311290740967, "step": 4759 }, { "epoch": 0.6435149978876215, "grad_norm": 0.20267054438591003, "learning_rate": 9.616494351111017e-06, "loss": 0.05497455596923828, "step": 4760 }, { "epoch": 0.6436501901140684, "grad_norm": 0.5923112630844116, "learning_rate": 9.610106487402637e-06, "loss": 0.10086488723754883, "step": 4761 }, { "epoch": 0.6437853823405154, "grad_norm": 0.31522035598754883, "learning_rate": 9.603719745898826e-06, "loss": 0.08345794677734375, "step": 4762 }, { "epoch": 0.6439205745669624, "grad_norm": 0.3246569037437439, "learning_rate": 9.597334127929346e-06, "loss": 0.050672054290771484, "step": 4763 }, { "epoch": 0.6440557667934094, "grad_norm": 0.22075532376766205, "learning_rate": 9.590949634823707e-06, "loss": 0.05805182456970215, "step": 4764 }, { "epoch": 0.6441909590198563, "grad_norm": 0.4642619490623474, "learning_rate": 9.584566267911198e-06, "loss": 0.050574302673339844, "step": 4765 }, { "epoch": 0.6443261512463033, "grad_norm": 0.3408973515033722, "learning_rate": 9.578184028520874e-06, "loss": 0.0832815170288086, "step": 4766 }, { "epoch": 0.6444613434727503, "grad_norm": 0.22710277140140533, "learning_rate": 9.571802917981548e-06, "loss": 0.039969682693481445, "step": 4767 }, { "epoch": 0.6445965356991973, "grad_norm": 0.3381836414337158, "learning_rate": 9.565422937621798e-06, "loss": 0.0602116584777832, "step": 4768 }, { "epoch": 0.6447317279256443, "grad_norm": 0.2356148213148117, "learning_rate": 9.559044088769971e-06, "loss": 0.05148506164550781, "step": 4769 }, { "epoch": 0.6448669201520912, "grad_norm": 0.20472785830497742, "learning_rate": 9.552666372754182e-06, "loss": 0.04103350639343262, "step": 4770 }, { "epoch": 0.6450021123785382, "grad_norm": 0.20671877264976501, "learning_rate": 9.546289790902307e-06, "loss": 0.04780244827270508, "step": 4771 }, { "epoch": 0.6451373046049852, "grad_norm": 0.28850510716438293, "learning_rate": 9.539914344541976e-06, "loss": 0.040769219398498535, "step": 4772 }, { "epoch": 0.6452724968314322, "grad_norm": 0.36963313817977905, "learning_rate": 9.533540035000598e-06, "loss": 0.05578446388244629, "step": 4773 }, { "epoch": 0.6454076890578792, "grad_norm": 0.8242632150650024, "learning_rate": 9.52716686360533e-06, "loss": 0.09585762023925781, "step": 4774 }, { "epoch": 0.6455428812843261, "grad_norm": 0.26632484793663025, "learning_rate": 9.520794831683108e-06, "loss": 0.05351734161376953, "step": 4775 }, { "epoch": 0.6456780735107731, "grad_norm": 0.3278503715991974, "learning_rate": 9.514423940560627e-06, "loss": 0.041861534118652344, "step": 4776 }, { "epoch": 0.6458132657372201, "grad_norm": 0.2619352638721466, "learning_rate": 9.508054191564326e-06, "loss": 0.06525230407714844, "step": 4777 }, { "epoch": 0.6459484579636671, "grad_norm": 0.3093946874141693, "learning_rate": 9.501685586020434e-06, "loss": 0.06375670433044434, "step": 4778 }, { "epoch": 0.646083650190114, "grad_norm": 0.32293379306793213, "learning_rate": 9.495318125254919e-06, "loss": 0.07803821563720703, "step": 4779 }, { "epoch": 0.646218842416561, "grad_norm": 0.32007893919944763, "learning_rate": 9.488951810593527e-06, "loss": 0.05273151397705078, "step": 4780 }, { "epoch": 0.646354034643008, "grad_norm": 0.21784581243991852, "learning_rate": 9.48258664336176e-06, "loss": 0.04521918296813965, "step": 4781 }, { "epoch": 0.646489226869455, "grad_norm": 0.3079139292240143, "learning_rate": 9.476222624884873e-06, "loss": 0.05839395523071289, "step": 4782 }, { "epoch": 0.646624419095902, "grad_norm": 0.335799902677536, "learning_rate": 9.469859756487893e-06, "loss": 0.06855034828186035, "step": 4783 }, { "epoch": 0.6467596113223489, "grad_norm": 0.19430774450302124, "learning_rate": 9.463498039495598e-06, "loss": 0.04897332191467285, "step": 4784 }, { "epoch": 0.6468948035487959, "grad_norm": 0.33547085523605347, "learning_rate": 9.457137475232537e-06, "loss": 0.06531786918640137, "step": 4785 }, { "epoch": 0.6470299957752429, "grad_norm": 0.5125856995582581, "learning_rate": 9.450778065023019e-06, "loss": 0.07713031768798828, "step": 4786 }, { "epoch": 0.6471651880016899, "grad_norm": 0.32903537154197693, "learning_rate": 9.444419810191091e-06, "loss": 0.0629720687866211, "step": 4787 }, { "epoch": 0.6473003802281369, "grad_norm": 0.34005698561668396, "learning_rate": 9.43806271206059e-06, "loss": 0.062456607818603516, "step": 4788 }, { "epoch": 0.6474355724545838, "grad_norm": 0.35717150568962097, "learning_rate": 9.431706771955089e-06, "loss": 0.0530930757522583, "step": 4789 }, { "epoch": 0.6475707646810308, "grad_norm": 0.17114569246768951, "learning_rate": 9.425351991197937e-06, "loss": 0.045818328857421875, "step": 4790 }, { "epoch": 0.6477059569074778, "grad_norm": 0.2643604278564453, "learning_rate": 9.418998371112221e-06, "loss": 0.057444095611572266, "step": 4791 }, { "epoch": 0.6478411491339248, "grad_norm": 0.17470473051071167, "learning_rate": 9.412645913020807e-06, "loss": 0.04263979196548462, "step": 4792 }, { "epoch": 0.6479763413603717, "grad_norm": 0.25354933738708496, "learning_rate": 9.406294618246313e-06, "loss": 0.07153701782226562, "step": 4793 }, { "epoch": 0.6481115335868187, "grad_norm": 0.3025719225406647, "learning_rate": 9.399944488111103e-06, "loss": 0.05858159065246582, "step": 4794 }, { "epoch": 0.6482467258132657, "grad_norm": 0.2793440520763397, "learning_rate": 9.39359552393732e-06, "loss": 0.0573887825012207, "step": 4795 }, { "epoch": 0.6483819180397127, "grad_norm": 0.2921557128429413, "learning_rate": 9.387247727046845e-06, "loss": 0.04311037063598633, "step": 4796 }, { "epoch": 0.6485171102661597, "grad_norm": 0.2962039113044739, "learning_rate": 9.380901098761319e-06, "loss": 0.07325172424316406, "step": 4797 }, { "epoch": 0.6486523024926066, "grad_norm": 0.2610384225845337, "learning_rate": 9.374555640402153e-06, "loss": 0.03996849060058594, "step": 4798 }, { "epoch": 0.6487874947190536, "grad_norm": 0.34995609521865845, "learning_rate": 9.368211353290503e-06, "loss": 0.0531916618347168, "step": 4799 }, { "epoch": 0.6489226869455006, "grad_norm": 0.5493077635765076, "learning_rate": 9.36186823874728e-06, "loss": 0.0944375991821289, "step": 4800 }, { "epoch": 0.6490578791719476, "grad_norm": 0.147910937666893, "learning_rate": 9.355526298093152e-06, "loss": 0.04943704605102539, "step": 4801 }, { "epoch": 0.6491930713983946, "grad_norm": 0.5425330400466919, "learning_rate": 9.34918553264855e-06, "loss": 0.08262139558792114, "step": 4802 }, { "epoch": 0.6493282636248415, "grad_norm": 0.3683369755744934, "learning_rate": 9.342845943733658e-06, "loss": 0.0776968002319336, "step": 4803 }, { "epoch": 0.6494634558512885, "grad_norm": 0.3404667377471924, "learning_rate": 9.336507532668407e-06, "loss": 0.07655715942382812, "step": 4804 }, { "epoch": 0.6495986480777355, "grad_norm": 0.20939965546131134, "learning_rate": 9.33017030077249e-06, "loss": 0.04820966720581055, "step": 4805 }, { "epoch": 0.6497338403041825, "grad_norm": 0.570598304271698, "learning_rate": 9.323834249365346e-06, "loss": 0.07071280479431152, "step": 4806 }, { "epoch": 0.6498690325306294, "grad_norm": 0.1624952256679535, "learning_rate": 9.317499379766183e-06, "loss": 0.03916573524475098, "step": 4807 }, { "epoch": 0.6500042247570764, "grad_norm": 0.22451962530612946, "learning_rate": 9.311165693293954e-06, "loss": 0.05844736099243164, "step": 4808 }, { "epoch": 0.6501394169835234, "grad_norm": 0.24295949935913086, "learning_rate": 9.304833191267364e-06, "loss": 0.05803394317626953, "step": 4809 }, { "epoch": 0.6502746092099704, "grad_norm": 0.3330073356628418, "learning_rate": 9.298501875004874e-06, "loss": 0.07588577270507812, "step": 4810 }, { "epoch": 0.6504098014364174, "grad_norm": 0.2678002715110779, "learning_rate": 9.292171745824695e-06, "loss": 0.06055879592895508, "step": 4811 }, { "epoch": 0.6505449936628643, "grad_norm": 0.2708369195461273, "learning_rate": 9.285842805044797e-06, "loss": 0.06868457794189453, "step": 4812 }, { "epoch": 0.6506801858893113, "grad_norm": 0.19736354053020477, "learning_rate": 9.279515053982905e-06, "loss": 0.04729270935058594, "step": 4813 }, { "epoch": 0.6508153781157583, "grad_norm": 0.3077872693538666, "learning_rate": 9.273188493956476e-06, "loss": 0.056958556175231934, "step": 4814 }, { "epoch": 0.6509505703422053, "grad_norm": 0.362509548664093, "learning_rate": 9.266863126282746e-06, "loss": 0.05745649337768555, "step": 4815 }, { "epoch": 0.6510857625686522, "grad_norm": 0.343902587890625, "learning_rate": 9.260538952278683e-06, "loss": 0.07398080825805664, "step": 4816 }, { "epoch": 0.6512209547950992, "grad_norm": 0.19427059590816498, "learning_rate": 9.254215973261014e-06, "loss": 0.045670509338378906, "step": 4817 }, { "epoch": 0.6513561470215462, "grad_norm": 0.26505959033966064, "learning_rate": 9.247894190546228e-06, "loss": 0.05563807487487793, "step": 4818 }, { "epoch": 0.6514913392479933, "grad_norm": 0.21521027386188507, "learning_rate": 9.241573605450539e-06, "loss": 0.04488945007324219, "step": 4819 }, { "epoch": 0.6516265314744403, "grad_norm": 0.23863205313682556, "learning_rate": 9.235254219289937e-06, "loss": 0.05624032020568848, "step": 4820 }, { "epoch": 0.6517617237008873, "grad_norm": 0.31005093455314636, "learning_rate": 9.228936033380143e-06, "loss": 0.05953836441040039, "step": 4821 }, { "epoch": 0.6518969159273342, "grad_norm": 0.6588156223297119, "learning_rate": 9.222619049036649e-06, "loss": 0.09979248046875, "step": 4822 }, { "epoch": 0.6520321081537812, "grad_norm": 0.33989521861076355, "learning_rate": 9.216303267574674e-06, "loss": 0.09782075881958008, "step": 4823 }, { "epoch": 0.6521673003802282, "grad_norm": 0.2392580211162567, "learning_rate": 9.209988690309198e-06, "loss": 0.061174869537353516, "step": 4824 }, { "epoch": 0.6523024926066752, "grad_norm": 0.28097352385520935, "learning_rate": 9.203675318554956e-06, "loss": 0.05446434020996094, "step": 4825 }, { "epoch": 0.6524376848331221, "grad_norm": 0.30071333050727844, "learning_rate": 9.19736315362642e-06, "loss": 0.07001447677612305, "step": 4826 }, { "epoch": 0.6525728770595691, "grad_norm": 0.27183136343955994, "learning_rate": 9.191052196837825e-06, "loss": 0.07165145874023438, "step": 4827 }, { "epoch": 0.6527080692860161, "grad_norm": 0.23894578218460083, "learning_rate": 9.184742449503135e-06, "loss": 0.04506540298461914, "step": 4828 }, { "epoch": 0.6528432615124631, "grad_norm": 0.4670444428920746, "learning_rate": 9.178433912936077e-06, "loss": 0.07266426086425781, "step": 4829 }, { "epoch": 0.6529784537389101, "grad_norm": 0.2888253629207611, "learning_rate": 9.172126588450125e-06, "loss": 0.08974266052246094, "step": 4830 }, { "epoch": 0.653113645965357, "grad_norm": 0.23133684694766998, "learning_rate": 9.165820477358491e-06, "loss": 0.05648946762084961, "step": 4831 }, { "epoch": 0.653248838191804, "grad_norm": 0.43320906162261963, "learning_rate": 9.159515580974154e-06, "loss": 0.08557617664337158, "step": 4832 }, { "epoch": 0.653384030418251, "grad_norm": 0.26697322726249695, "learning_rate": 9.15321190060981e-06, "loss": 0.06428468227386475, "step": 4833 }, { "epoch": 0.653519222644698, "grad_norm": 0.4658189117908478, "learning_rate": 9.14690943757793e-06, "loss": 0.08957171440124512, "step": 4834 }, { "epoch": 0.653654414871145, "grad_norm": 0.27478429675102234, "learning_rate": 9.14060819319072e-06, "loss": 0.07382440567016602, "step": 4835 }, { "epoch": 0.6537896070975919, "grad_norm": 0.24716217815876007, "learning_rate": 9.134308168760127e-06, "loss": 0.05063152313232422, "step": 4836 }, { "epoch": 0.6539247993240389, "grad_norm": 0.4276491403579712, "learning_rate": 9.128009365597854e-06, "loss": 0.062435150146484375, "step": 4837 }, { "epoch": 0.6540599915504859, "grad_norm": 0.5603787302970886, "learning_rate": 9.121711785015342e-06, "loss": 0.06441593170166016, "step": 4838 }, { "epoch": 0.6541951837769329, "grad_norm": 0.31718993186950684, "learning_rate": 9.115415428323787e-06, "loss": 0.07912254333496094, "step": 4839 }, { "epoch": 0.6543303760033798, "grad_norm": 0.48668864369392395, "learning_rate": 9.109120296834118e-06, "loss": 0.07065677642822266, "step": 4840 }, { "epoch": 0.6544655682298268, "grad_norm": 0.26175540685653687, "learning_rate": 9.10282639185702e-06, "loss": 0.06042909622192383, "step": 4841 }, { "epoch": 0.6546007604562738, "grad_norm": 0.24747289717197418, "learning_rate": 9.096533714702913e-06, "loss": 0.05627012252807617, "step": 4842 }, { "epoch": 0.6547359526827208, "grad_norm": 0.21935872733592987, "learning_rate": 9.090242266681967e-06, "loss": 0.0528559684753418, "step": 4843 }, { "epoch": 0.6548711449091678, "grad_norm": 0.5683515667915344, "learning_rate": 9.083952049104094e-06, "loss": 0.06490063667297363, "step": 4844 }, { "epoch": 0.6550063371356147, "grad_norm": 0.23414266109466553, "learning_rate": 9.07766306327896e-06, "loss": 0.0586695671081543, "step": 4845 }, { "epoch": 0.6551415293620617, "grad_norm": 0.3659723401069641, "learning_rate": 9.071375310515949e-06, "loss": 0.06589281558990479, "step": 4846 }, { "epoch": 0.6552767215885087, "grad_norm": 0.48043200373649597, "learning_rate": 9.065088792124219e-06, "loss": 0.06490880250930786, "step": 4847 }, { "epoch": 0.6554119138149557, "grad_norm": 0.3887607455253601, "learning_rate": 9.058803509412647e-06, "loss": 0.07252264022827148, "step": 4848 }, { "epoch": 0.6555471060414026, "grad_norm": 0.328997939825058, "learning_rate": 9.05251946368987e-06, "loss": 0.057118892669677734, "step": 4849 }, { "epoch": 0.6556822982678496, "grad_norm": 0.5450140237808228, "learning_rate": 9.046236656264258e-06, "loss": 0.05856800079345703, "step": 4850 }, { "epoch": 0.6558174904942966, "grad_norm": 0.3553744852542877, "learning_rate": 9.03995508844392e-06, "loss": 0.08736777305603027, "step": 4851 }, { "epoch": 0.6559526827207436, "grad_norm": 0.1743435114622116, "learning_rate": 9.033674761536718e-06, "loss": 0.040869712829589844, "step": 4852 }, { "epoch": 0.6560878749471906, "grad_norm": 0.2205936163663864, "learning_rate": 9.027395676850244e-06, "loss": 0.04245615005493164, "step": 4853 }, { "epoch": 0.6562230671736375, "grad_norm": 0.38957759737968445, "learning_rate": 9.02111783569184e-06, "loss": 0.07975459098815918, "step": 4854 }, { "epoch": 0.6563582594000845, "grad_norm": 0.14929905533790588, "learning_rate": 9.014841239368591e-06, "loss": 0.03610038757324219, "step": 4855 }, { "epoch": 0.6564934516265315, "grad_norm": 0.33773738145828247, "learning_rate": 9.008565889187308e-06, "loss": 0.0732421875, "step": 4856 }, { "epoch": 0.6566286438529785, "grad_norm": 0.35000213980674744, "learning_rate": 9.00229178645456e-06, "loss": 0.08603858947753906, "step": 4857 }, { "epoch": 0.6567638360794255, "grad_norm": 0.3118942081928253, "learning_rate": 8.996018932476641e-06, "loss": 0.06601238250732422, "step": 4858 }, { "epoch": 0.6568990283058724, "grad_norm": 0.3083891272544861, "learning_rate": 8.989747328559606e-06, "loss": 0.060178279876708984, "step": 4859 }, { "epoch": 0.6570342205323194, "grad_norm": 0.3511293828487396, "learning_rate": 8.98347697600922e-06, "loss": 0.07991886138916016, "step": 4860 }, { "epoch": 0.6571694127587664, "grad_norm": 0.34385982155799866, "learning_rate": 8.977207876131013e-06, "loss": 0.050768136978149414, "step": 4861 }, { "epoch": 0.6573046049852134, "grad_norm": 0.3257488012313843, "learning_rate": 8.970940030230245e-06, "loss": 0.06351375579833984, "step": 4862 }, { "epoch": 0.6574397972116603, "grad_norm": 0.23448587954044342, "learning_rate": 8.96467343961191e-06, "loss": 0.053433895111083984, "step": 4863 }, { "epoch": 0.6575749894381073, "grad_norm": 0.29061970114707947, "learning_rate": 8.958408105580759e-06, "loss": 0.06228160858154297, "step": 4864 }, { "epoch": 0.6577101816645543, "grad_norm": 0.33464333415031433, "learning_rate": 8.952144029441248e-06, "loss": 0.07623767852783203, "step": 4865 }, { "epoch": 0.6578453738910013, "grad_norm": 0.26314496994018555, "learning_rate": 8.945881212497603e-06, "loss": 0.05088233947753906, "step": 4866 }, { "epoch": 0.6579805661174483, "grad_norm": 0.4107857346534729, "learning_rate": 8.939619656053777e-06, "loss": 0.06191253662109375, "step": 4867 }, { "epoch": 0.6581157583438952, "grad_norm": 0.1694977730512619, "learning_rate": 8.933359361413456e-06, "loss": 0.0455777645111084, "step": 4868 }, { "epoch": 0.6582509505703422, "grad_norm": 0.15742100775241852, "learning_rate": 8.92710032988007e-06, "loss": 0.040313780307769775, "step": 4869 }, { "epoch": 0.6583861427967892, "grad_norm": 0.27373185753822327, "learning_rate": 8.920842562756773e-06, "loss": 0.06558692455291748, "step": 4870 }, { "epoch": 0.6585213350232362, "grad_norm": 0.27360764145851135, "learning_rate": 8.914586061346474e-06, "loss": 0.058560848236083984, "step": 4871 }, { "epoch": 0.6586565272496832, "grad_norm": 0.41100335121154785, "learning_rate": 8.908330826951811e-06, "loss": 0.06381797790527344, "step": 4872 }, { "epoch": 0.6587917194761301, "grad_norm": 0.403158038854599, "learning_rate": 8.902076860875155e-06, "loss": 0.08599472045898438, "step": 4873 }, { "epoch": 0.6589269117025771, "grad_norm": 0.2476569265127182, "learning_rate": 8.895824164418615e-06, "loss": 0.06425809860229492, "step": 4874 }, { "epoch": 0.6590621039290241, "grad_norm": 0.2766430675983429, "learning_rate": 8.889572738884033e-06, "loss": 0.04670858383178711, "step": 4875 }, { "epoch": 0.6591972961554711, "grad_norm": 0.20553795993328094, "learning_rate": 8.88332258557299e-06, "loss": 0.027480363845825195, "step": 4876 }, { "epoch": 0.659332488381918, "grad_norm": 0.2489897906780243, "learning_rate": 8.877073705786806e-06, "loss": 0.05231022834777832, "step": 4877 }, { "epoch": 0.659467680608365, "grad_norm": 0.589600682258606, "learning_rate": 8.870826100826527e-06, "loss": 0.07990264892578125, "step": 4878 }, { "epoch": 0.659602872834812, "grad_norm": 0.3277297019958496, "learning_rate": 8.86457977199294e-06, "loss": 0.07029342651367188, "step": 4879 }, { "epoch": 0.659738065061259, "grad_norm": 0.2849866449832916, "learning_rate": 8.85833472058656e-06, "loss": 0.05579376220703125, "step": 4880 }, { "epoch": 0.659873257287706, "grad_norm": 0.2331986129283905, "learning_rate": 8.852090947907643e-06, "loss": 0.04408740997314453, "step": 4881 }, { "epoch": 0.6600084495141529, "grad_norm": 0.46188732981681824, "learning_rate": 8.84584845525618e-06, "loss": 0.08914899826049805, "step": 4882 }, { "epoch": 0.6601436417405999, "grad_norm": 0.18672581017017365, "learning_rate": 8.83960724393188e-06, "loss": 0.044676899909973145, "step": 4883 }, { "epoch": 0.6602788339670469, "grad_norm": 0.368912011384964, "learning_rate": 8.833367315234206e-06, "loss": 0.07218313217163086, "step": 4884 }, { "epoch": 0.6604140261934939, "grad_norm": 0.16541294753551483, "learning_rate": 8.82712867046234e-06, "loss": 0.0427403450012207, "step": 4885 }, { "epoch": 0.6605492184199409, "grad_norm": 0.34060078859329224, "learning_rate": 8.820891310915203e-06, "loss": 0.04632425308227539, "step": 4886 }, { "epoch": 0.6606844106463878, "grad_norm": 0.34567373991012573, "learning_rate": 8.81465523789145e-06, "loss": 0.06366896629333496, "step": 4887 }, { "epoch": 0.6608196028728348, "grad_norm": 0.2853766679763794, "learning_rate": 8.808420452689455e-06, "loss": 0.043752312660217285, "step": 4888 }, { "epoch": 0.6609547950992818, "grad_norm": 0.46743839979171753, "learning_rate": 8.802186956607344e-06, "loss": 0.08290624618530273, "step": 4889 }, { "epoch": 0.6610899873257288, "grad_norm": 0.38309580087661743, "learning_rate": 8.795954750942954e-06, "loss": 0.08163714408874512, "step": 4890 }, { "epoch": 0.6612251795521757, "grad_norm": 0.3507712185382843, "learning_rate": 8.789723836993878e-06, "loss": 0.08322834968566895, "step": 4891 }, { "epoch": 0.6613603717786227, "grad_norm": 0.3610805571079254, "learning_rate": 8.783494216057407e-06, "loss": 0.06266021728515625, "step": 4892 }, { "epoch": 0.6614955640050697, "grad_norm": 0.5142252445220947, "learning_rate": 8.777265889430593e-06, "loss": 0.08034348487854004, "step": 4893 }, { "epoch": 0.6616307562315167, "grad_norm": 0.25728100538253784, "learning_rate": 8.771038858410206e-06, "loss": 0.05245685577392578, "step": 4894 }, { "epoch": 0.6617659484579637, "grad_norm": 0.3067833483219147, "learning_rate": 8.764813124292744e-06, "loss": 0.07212018966674805, "step": 4895 }, { "epoch": 0.6619011406844106, "grad_norm": 0.30051174759864807, "learning_rate": 8.758588688374445e-06, "loss": 0.055178165435791016, "step": 4896 }, { "epoch": 0.6620363329108576, "grad_norm": 0.43290379643440247, "learning_rate": 8.752365551951262e-06, "loss": 0.09206390380859375, "step": 4897 }, { "epoch": 0.6621715251373046, "grad_norm": 0.4254007339477539, "learning_rate": 8.74614371631888e-06, "loss": 0.059029579162597656, "step": 4898 }, { "epoch": 0.6623067173637516, "grad_norm": 0.4177686870098114, "learning_rate": 8.739923182772732e-06, "loss": 0.06552982330322266, "step": 4899 }, { "epoch": 0.6624419095901986, "grad_norm": 0.43566298484802246, "learning_rate": 8.733703952607956e-06, "loss": 0.09009742736816406, "step": 4900 }, { "epoch": 0.6625771018166455, "grad_norm": 0.28038373589515686, "learning_rate": 8.727486027119443e-06, "loss": 0.08281230926513672, "step": 4901 }, { "epoch": 0.6627122940430925, "grad_norm": 0.37428146600723267, "learning_rate": 8.721269407601783e-06, "loss": 0.06670522689819336, "step": 4902 }, { "epoch": 0.6628474862695395, "grad_norm": 0.22457443177700043, "learning_rate": 8.71505409534931e-06, "loss": 0.05708813667297363, "step": 4903 }, { "epoch": 0.6629826784959865, "grad_norm": 0.26721227169036865, "learning_rate": 8.708840091656093e-06, "loss": 0.07717132568359375, "step": 4904 }, { "epoch": 0.6631178707224334, "grad_norm": 0.2777976393699646, "learning_rate": 8.70262739781592e-06, "loss": 0.054625749588012695, "step": 4905 }, { "epoch": 0.6632530629488804, "grad_norm": 0.18250279128551483, "learning_rate": 8.696416015122302e-06, "loss": 0.0447998046875, "step": 4906 }, { "epoch": 0.6633882551753274, "grad_norm": 0.32662713527679443, "learning_rate": 8.690205944868487e-06, "loss": 0.07663249969482422, "step": 4907 }, { "epoch": 0.6635234474017744, "grad_norm": 0.302788108587265, "learning_rate": 8.683997188347436e-06, "loss": 0.05095010995864868, "step": 4908 }, { "epoch": 0.6636586396282214, "grad_norm": 0.5169642567634583, "learning_rate": 8.677789746851855e-06, "loss": 0.07211065292358398, "step": 4909 }, { "epoch": 0.6637938318546683, "grad_norm": 0.38796767592430115, "learning_rate": 8.671583621674167e-06, "loss": 0.058159828186035156, "step": 4910 }, { "epoch": 0.6639290240811153, "grad_norm": 0.20360036194324493, "learning_rate": 8.665378814106512e-06, "loss": 0.05255138874053955, "step": 4911 }, { "epoch": 0.6640642163075623, "grad_norm": 0.16078992187976837, "learning_rate": 8.65917532544077e-06, "loss": 0.05150127410888672, "step": 4912 }, { "epoch": 0.6641994085340093, "grad_norm": 0.2832340896129608, "learning_rate": 8.652973156968532e-06, "loss": 0.05080914497375488, "step": 4913 }, { "epoch": 0.6643346007604563, "grad_norm": 0.31240326166152954, "learning_rate": 8.646772309981141e-06, "loss": 0.06571626663208008, "step": 4914 }, { "epoch": 0.6644697929869032, "grad_norm": 0.2230207473039627, "learning_rate": 8.640572785769624e-06, "loss": 0.059484243392944336, "step": 4915 }, { "epoch": 0.6646049852133502, "grad_norm": 0.21061892807483673, "learning_rate": 8.63437458562477e-06, "loss": 0.06384515762329102, "step": 4916 }, { "epoch": 0.6647401774397972, "grad_norm": 0.49494194984436035, "learning_rate": 8.628177710837068e-06, "loss": 0.054257869720458984, "step": 4917 }, { "epoch": 0.6648753696662442, "grad_norm": 0.3318653106689453, "learning_rate": 8.621982162696752e-06, "loss": 0.05859947204589844, "step": 4918 }, { "epoch": 0.6650105618926911, "grad_norm": 0.2751937806606293, "learning_rate": 8.615787942493766e-06, "loss": 0.062012553215026855, "step": 4919 }, { "epoch": 0.6651457541191381, "grad_norm": 0.2703063488006592, "learning_rate": 8.609595051517765e-06, "loss": 0.05864429473876953, "step": 4920 }, { "epoch": 0.6652809463455851, "grad_norm": 0.3056776523590088, "learning_rate": 8.603403491058157e-06, "loss": 0.08215045928955078, "step": 4921 }, { "epoch": 0.6654161385720321, "grad_norm": 0.21935805678367615, "learning_rate": 8.597213262404046e-06, "loss": 0.04139423370361328, "step": 4922 }, { "epoch": 0.6655513307984791, "grad_norm": 0.4530940651893616, "learning_rate": 8.591024366844291e-06, "loss": 0.11154651641845703, "step": 4923 }, { "epoch": 0.665686523024926, "grad_norm": 0.3644537329673767, "learning_rate": 8.584836805667434e-06, "loss": 0.07296013832092285, "step": 4924 }, { "epoch": 0.665821715251373, "grad_norm": 1.0423619747161865, "learning_rate": 8.578650580161754e-06, "loss": 0.09050202369689941, "step": 4925 }, { "epoch": 0.66595690747782, "grad_norm": 0.4302298426628113, "learning_rate": 8.572465691615275e-06, "loss": 0.086334228515625, "step": 4926 }, { "epoch": 0.666092099704267, "grad_norm": 0.16452211141586304, "learning_rate": 8.56628214131571e-06, "loss": 0.03101372718811035, "step": 4927 }, { "epoch": 0.666227291930714, "grad_norm": 0.2247740477323532, "learning_rate": 8.560099930550523e-06, "loss": 0.06033158302307129, "step": 4928 }, { "epoch": 0.6663624841571609, "grad_norm": 0.3494688868522644, "learning_rate": 8.553919060606866e-06, "loss": 0.06897926330566406, "step": 4929 }, { "epoch": 0.6664976763836079, "grad_norm": 0.22867992520332336, "learning_rate": 8.54773953277163e-06, "loss": 0.0646820068359375, "step": 4930 }, { "epoch": 0.6666328686100549, "grad_norm": 0.2649802565574646, "learning_rate": 8.541561348331433e-06, "loss": 0.04089951515197754, "step": 4931 }, { "epoch": 0.6667680608365019, "grad_norm": 0.23530559241771698, "learning_rate": 8.535384508572603e-06, "loss": 0.05472928285598755, "step": 4932 }, { "epoch": 0.6669032530629488, "grad_norm": 0.30980250239372253, "learning_rate": 8.529209014781202e-06, "loss": 0.08345746994018555, "step": 4933 }, { "epoch": 0.6670384452893958, "grad_norm": 0.3073015809059143, "learning_rate": 8.523034868242984e-06, "loss": 0.07625198364257812, "step": 4934 }, { "epoch": 0.6671736375158428, "grad_norm": 0.3319665193557739, "learning_rate": 8.51686207024344e-06, "loss": 0.05214571952819824, "step": 4935 }, { "epoch": 0.6673088297422898, "grad_norm": 0.4453583061695099, "learning_rate": 8.510690622067792e-06, "loss": 0.06069660186767578, "step": 4936 }, { "epoch": 0.6674440219687368, "grad_norm": 0.25854286551475525, "learning_rate": 8.50452052500096e-06, "loss": 0.0675954818725586, "step": 4937 }, { "epoch": 0.6675792141951837, "grad_norm": 0.35607650876045227, "learning_rate": 8.498351780327594e-06, "loss": 0.059325218200683594, "step": 4938 }, { "epoch": 0.6677144064216307, "grad_norm": 0.6138937473297119, "learning_rate": 8.492184389332061e-06, "loss": 0.08625459671020508, "step": 4939 }, { "epoch": 0.6678495986480777, "grad_norm": 0.33962374925613403, "learning_rate": 8.486018353298432e-06, "loss": 0.05710101127624512, "step": 4940 }, { "epoch": 0.6679847908745247, "grad_norm": 0.3117373585700989, "learning_rate": 8.479853673510528e-06, "loss": 0.0627889633178711, "step": 4941 }, { "epoch": 0.6681199831009716, "grad_norm": 0.3765493333339691, "learning_rate": 8.473690351251855e-06, "loss": 0.08568382263183594, "step": 4942 }, { "epoch": 0.6682551753274186, "grad_norm": 0.2630918025970459, "learning_rate": 8.467528387805656e-06, "loss": 0.04690957069396973, "step": 4943 }, { "epoch": 0.6683903675538656, "grad_norm": 0.29838240146636963, "learning_rate": 8.461367784454881e-06, "loss": 0.06570672988891602, "step": 4944 }, { "epoch": 0.6685255597803126, "grad_norm": 0.34062016010284424, "learning_rate": 8.455208542482195e-06, "loss": 0.06801176071166992, "step": 4945 }, { "epoch": 0.6686607520067596, "grad_norm": 0.1843264400959015, "learning_rate": 8.449050663170004e-06, "loss": 0.03787040710449219, "step": 4946 }, { "epoch": 0.6687959442332065, "grad_norm": 0.1837097555398941, "learning_rate": 8.442894147800387e-06, "loss": 0.0482785701751709, "step": 4947 }, { "epoch": 0.6689311364596535, "grad_norm": 0.2825604975223541, "learning_rate": 8.436738997655184e-06, "loss": 0.06084108352661133, "step": 4948 }, { "epoch": 0.6690663286861005, "grad_norm": 0.35577651858329773, "learning_rate": 8.430585214015918e-06, "loss": 0.06685876846313477, "step": 4949 }, { "epoch": 0.6692015209125475, "grad_norm": 0.24625088274478912, "learning_rate": 8.424432798163838e-06, "loss": 0.05525350570678711, "step": 4950 }, { "epoch": 0.6693367131389945, "grad_norm": 0.1947268694639206, "learning_rate": 8.418281751379926e-06, "loss": 0.0492253303527832, "step": 4951 }, { "epoch": 0.6694719053654414, "grad_norm": 0.32005617022514343, "learning_rate": 8.41213207494484e-06, "loss": 0.060690879821777344, "step": 4952 }, { "epoch": 0.6696070975918884, "grad_norm": 0.1298406720161438, "learning_rate": 8.405983770138992e-06, "loss": 0.03653430938720703, "step": 4953 }, { "epoch": 0.6697422898183355, "grad_norm": 0.17876555025577545, "learning_rate": 8.399836838242479e-06, "loss": 0.0449223518371582, "step": 4954 }, { "epoch": 0.6698774820447825, "grad_norm": 0.34702861309051514, "learning_rate": 8.393691280535143e-06, "loss": 0.07400703430175781, "step": 4955 }, { "epoch": 0.6700126742712295, "grad_norm": 0.21426530182361603, "learning_rate": 8.387547098296516e-06, "loss": 0.05273914337158203, "step": 4956 }, { "epoch": 0.6701478664976764, "grad_norm": 0.24043278396129608, "learning_rate": 8.38140429280583e-06, "loss": 0.05688631534576416, "step": 4957 }, { "epoch": 0.6702830587241234, "grad_norm": 0.5707756280899048, "learning_rate": 8.375262865342073e-06, "loss": 0.0760960578918457, "step": 4958 }, { "epoch": 0.6704182509505704, "grad_norm": 0.2311011552810669, "learning_rate": 8.36912281718391e-06, "loss": 0.061896324157714844, "step": 4959 }, { "epoch": 0.6705534431770174, "grad_norm": 0.23181913793087006, "learning_rate": 8.362984149609748e-06, "loss": 0.0648641586303711, "step": 4960 }, { "epoch": 0.6706886354034644, "grad_norm": 0.47086256742477417, "learning_rate": 8.356846863897672e-06, "loss": 0.06805562973022461, "step": 4961 }, { "epoch": 0.6708238276299113, "grad_norm": 0.3221588730812073, "learning_rate": 8.350710961325498e-06, "loss": 0.06921911239624023, "step": 4962 }, { "epoch": 0.6709590198563583, "grad_norm": 0.19285617768764496, "learning_rate": 8.344576443170768e-06, "loss": 0.040163516998291016, "step": 4963 }, { "epoch": 0.6710942120828053, "grad_norm": 0.46340784430503845, "learning_rate": 8.338443310710708e-06, "loss": 0.07517766952514648, "step": 4964 }, { "epoch": 0.6712294043092523, "grad_norm": 0.3034537732601166, "learning_rate": 8.332311565222284e-06, "loss": 0.0892934799194336, "step": 4965 }, { "epoch": 0.6713645965356992, "grad_norm": 0.5148158669471741, "learning_rate": 8.326181207982145e-06, "loss": 0.0970768928527832, "step": 4966 }, { "epoch": 0.6714997887621462, "grad_norm": 0.2379639893770218, "learning_rate": 8.32005224026666e-06, "loss": 0.05786752700805664, "step": 4967 }, { "epoch": 0.6716349809885932, "grad_norm": 0.5114599466323853, "learning_rate": 8.313924663351927e-06, "loss": 0.0935206413269043, "step": 4968 }, { "epoch": 0.6717701732150402, "grad_norm": 0.31471630930900574, "learning_rate": 8.307798478513733e-06, "loss": 0.0633854866027832, "step": 4969 }, { "epoch": 0.6719053654414872, "grad_norm": 0.3404069244861603, "learning_rate": 8.301673687027583e-06, "loss": 0.050209999084472656, "step": 4970 }, { "epoch": 0.6720405576679341, "grad_norm": 0.12643036246299744, "learning_rate": 8.295550290168692e-06, "loss": 0.027638673782348633, "step": 4971 }, { "epoch": 0.6721757498943811, "grad_norm": 0.2427472323179245, "learning_rate": 8.289428289211977e-06, "loss": 0.05264449119567871, "step": 4972 }, { "epoch": 0.6723109421208281, "grad_norm": 0.2014956772327423, "learning_rate": 8.283307685432083e-06, "loss": 0.03870415687561035, "step": 4973 }, { "epoch": 0.6724461343472751, "grad_norm": 0.24342061579227448, "learning_rate": 8.277188480103348e-06, "loss": 0.06587362289428711, "step": 4974 }, { "epoch": 0.672581326573722, "grad_norm": 0.5443671941757202, "learning_rate": 8.271070674499821e-06, "loss": 0.07341784238815308, "step": 4975 }, { "epoch": 0.672716518800169, "grad_norm": 0.3734854757785797, "learning_rate": 8.264954269895262e-06, "loss": 0.058185577392578125, "step": 4976 }, { "epoch": 0.672851711026616, "grad_norm": 0.29039379954338074, "learning_rate": 8.258839267563134e-06, "loss": 0.07250142097473145, "step": 4977 }, { "epoch": 0.672986903253063, "grad_norm": 0.1804579645395279, "learning_rate": 8.252725668776623e-06, "loss": 0.04205751419067383, "step": 4978 }, { "epoch": 0.67312209547951, "grad_norm": 0.35668709874153137, "learning_rate": 8.24661347480861e-06, "loss": 0.059288978576660156, "step": 4979 }, { "epoch": 0.6732572877059569, "grad_norm": 0.6256442666053772, "learning_rate": 8.240502686931686e-06, "loss": 0.0910944938659668, "step": 4980 }, { "epoch": 0.6733924799324039, "grad_norm": 0.36551544070243835, "learning_rate": 8.234393306418148e-06, "loss": 0.07981348037719727, "step": 4981 }, { "epoch": 0.6735276721588509, "grad_norm": 0.2094249129295349, "learning_rate": 8.228285334539995e-06, "loss": 0.05939030647277832, "step": 4982 }, { "epoch": 0.6736628643852979, "grad_norm": 0.33449500799179077, "learning_rate": 8.22217877256896e-06, "loss": 0.0742940902709961, "step": 4983 }, { "epoch": 0.6737980566117449, "grad_norm": 0.2808586657047272, "learning_rate": 8.216073621776436e-06, "loss": 0.055348336696624756, "step": 4984 }, { "epoch": 0.6739332488381918, "grad_norm": 0.3043719530105591, "learning_rate": 8.209969883433566e-06, "loss": 0.056771039962768555, "step": 4985 }, { "epoch": 0.6740684410646388, "grad_norm": 0.3775617480278015, "learning_rate": 8.203867558811177e-06, "loss": 0.05510234832763672, "step": 4986 }, { "epoch": 0.6742036332910858, "grad_norm": 0.5224157571792603, "learning_rate": 8.197766649179795e-06, "loss": 0.10012221336364746, "step": 4987 }, { "epoch": 0.6743388255175328, "grad_norm": 0.34463098645210266, "learning_rate": 8.191667155809684e-06, "loss": 0.06544208526611328, "step": 4988 }, { "epoch": 0.6744740177439797, "grad_norm": 0.21582862734794617, "learning_rate": 8.185569079970764e-06, "loss": 0.04866981506347656, "step": 4989 }, { "epoch": 0.6746092099704267, "grad_norm": 0.8155859708786011, "learning_rate": 8.179472422932709e-06, "loss": 0.062165021896362305, "step": 4990 }, { "epoch": 0.6747444021968737, "grad_norm": 0.3104049861431122, "learning_rate": 8.17337718596486e-06, "loss": 0.052139997482299805, "step": 4991 }, { "epoch": 0.6748795944233207, "grad_norm": 0.3490195572376251, "learning_rate": 8.167283370336295e-06, "loss": 0.06679701805114746, "step": 4992 }, { "epoch": 0.6750147866497677, "grad_norm": 0.3360089957714081, "learning_rate": 8.161190977315766e-06, "loss": 0.06595468521118164, "step": 4993 }, { "epoch": 0.6751499788762146, "grad_norm": 0.18689323961734772, "learning_rate": 8.155100008171736e-06, "loss": 0.051087379455566406, "step": 4994 }, { "epoch": 0.6752851711026616, "grad_norm": 0.23811936378479004, "learning_rate": 8.149010464172392e-06, "loss": 0.048638343811035156, "step": 4995 }, { "epoch": 0.6754203633291086, "grad_norm": 0.24320784211158752, "learning_rate": 8.142922346585597e-06, "loss": 0.05586671829223633, "step": 4996 }, { "epoch": 0.6755555555555556, "grad_norm": 0.43750205636024475, "learning_rate": 8.13683565667895e-06, "loss": 0.09529876708984375, "step": 4997 }, { "epoch": 0.6756907477820026, "grad_norm": 0.23445385694503784, "learning_rate": 8.13075039571971e-06, "loss": 0.05383414030075073, "step": 4998 }, { "epoch": 0.6758259400084495, "grad_norm": 0.5510262846946716, "learning_rate": 8.124666564974864e-06, "loss": 0.06421995162963867, "step": 4999 }, { "epoch": 0.6759611322348965, "grad_norm": 0.40528297424316406, "learning_rate": 8.11858416571111e-06, "loss": 0.08857870101928711, "step": 5000 }, { "epoch": 0.6760963244613435, "grad_norm": 0.1624949872493744, "learning_rate": 8.112503199194821e-06, "loss": 0.05199551582336426, "step": 5001 }, { "epoch": 0.6762315166877905, "grad_norm": 0.26256316900253296, "learning_rate": 8.106423666692108e-06, "loss": 0.04989594221115112, "step": 5002 }, { "epoch": 0.6763667089142374, "grad_norm": 0.18374936282634735, "learning_rate": 8.100345569468742e-06, "loss": 0.05907416343688965, "step": 5003 }, { "epoch": 0.6765019011406844, "grad_norm": 0.4773852527141571, "learning_rate": 8.094268908790215e-06, "loss": 0.08037519454956055, "step": 5004 }, { "epoch": 0.6766370933671314, "grad_norm": 0.2970427870750427, "learning_rate": 8.088193685921733e-06, "loss": 0.0757899284362793, "step": 5005 }, { "epoch": 0.6767722855935784, "grad_norm": 0.2060684859752655, "learning_rate": 8.082119902128185e-06, "loss": 0.04117774963378906, "step": 5006 }, { "epoch": 0.6769074778200254, "grad_norm": 0.21486613154411316, "learning_rate": 8.076047558674164e-06, "loss": 0.05521678924560547, "step": 5007 }, { "epoch": 0.6770426700464723, "grad_norm": 0.4274241626262665, "learning_rate": 8.069976656823964e-06, "loss": 0.08288079500198364, "step": 5008 }, { "epoch": 0.6771778622729193, "grad_norm": 0.2574945092201233, "learning_rate": 8.063907197841574e-06, "loss": 0.056850433349609375, "step": 5009 }, { "epoch": 0.6773130544993663, "grad_norm": 0.21665945649147034, "learning_rate": 8.057839182990698e-06, "loss": 0.041300177574157715, "step": 5010 }, { "epoch": 0.6774482467258133, "grad_norm": 0.18303847312927246, "learning_rate": 8.051772613534725e-06, "loss": 0.0461878776550293, "step": 5011 }, { "epoch": 0.6775834389522603, "grad_norm": 0.21712854504585266, "learning_rate": 8.045707490736745e-06, "loss": 0.05544382333755493, "step": 5012 }, { "epoch": 0.6777186311787072, "grad_norm": 0.6855584979057312, "learning_rate": 8.039643815859552e-06, "loss": 0.10220527648925781, "step": 5013 }, { "epoch": 0.6778538234051542, "grad_norm": 0.5897433161735535, "learning_rate": 8.033581590165627e-06, "loss": 0.08015584945678711, "step": 5014 }, { "epoch": 0.6779890156316012, "grad_norm": 0.5282954573631287, "learning_rate": 8.027520814917175e-06, "loss": 0.09360551834106445, "step": 5015 }, { "epoch": 0.6781242078580482, "grad_norm": 0.32223278284072876, "learning_rate": 8.021461491376064e-06, "loss": 0.06849193572998047, "step": 5016 }, { "epoch": 0.6782594000844951, "grad_norm": 0.2557280659675598, "learning_rate": 8.015403620803885e-06, "loss": 0.05202054977416992, "step": 5017 }, { "epoch": 0.6783945923109421, "grad_norm": 0.5934728980064392, "learning_rate": 8.009347204461922e-06, "loss": 0.1055903434753418, "step": 5018 }, { "epoch": 0.6785297845373891, "grad_norm": 0.3711104393005371, "learning_rate": 8.003292243611143e-06, "loss": 0.07085001468658447, "step": 5019 }, { "epoch": 0.6786649767638361, "grad_norm": 0.2455320805311203, "learning_rate": 7.99723873951224e-06, "loss": 0.055066585540771484, "step": 5020 }, { "epoch": 0.6788001689902831, "grad_norm": 0.3658488690853119, "learning_rate": 7.991186693425563e-06, "loss": 0.06515157222747803, "step": 5021 }, { "epoch": 0.67893536121673, "grad_norm": 0.27166593074798584, "learning_rate": 7.9851361066112e-06, "loss": 0.060401201248168945, "step": 5022 }, { "epoch": 0.679070553443177, "grad_norm": 0.17168448865413666, "learning_rate": 7.979086980328907e-06, "loss": 0.03423810005187988, "step": 5023 }, { "epoch": 0.679205745669624, "grad_norm": 0.15034841001033783, "learning_rate": 7.973039315838137e-06, "loss": 0.03233814239501953, "step": 5024 }, { "epoch": 0.679340937896071, "grad_norm": 0.39001956582069397, "learning_rate": 7.966993114398067e-06, "loss": 0.07499301433563232, "step": 5025 }, { "epoch": 0.679476130122518, "grad_norm": 0.2622569799423218, "learning_rate": 7.960948377267524e-06, "loss": 0.0768585205078125, "step": 5026 }, { "epoch": 0.6796113223489649, "grad_norm": 0.31650036573410034, "learning_rate": 7.954905105705071e-06, "loss": 0.05341529846191406, "step": 5027 }, { "epoch": 0.6797465145754119, "grad_norm": 0.2731419801712036, "learning_rate": 7.948863300968938e-06, "loss": 0.060317277908325195, "step": 5028 }, { "epoch": 0.6798817068018589, "grad_norm": 0.3227729797363281, "learning_rate": 7.942822964317078e-06, "loss": 0.06663703918457031, "step": 5029 }, { "epoch": 0.6800168990283059, "grad_norm": 0.4316539168357849, "learning_rate": 7.936784097007105e-06, "loss": 0.07207369804382324, "step": 5030 }, { "epoch": 0.6801520912547528, "grad_norm": 0.35629063844680786, "learning_rate": 7.930746700296344e-06, "loss": 0.06932616233825684, "step": 5031 }, { "epoch": 0.6802872834811998, "grad_norm": 0.24202732741832733, "learning_rate": 7.924710775441822e-06, "loss": 0.057445526123046875, "step": 5032 }, { "epoch": 0.6804224757076468, "grad_norm": 0.25883346796035767, "learning_rate": 7.918676323700241e-06, "loss": 0.053176820278167725, "step": 5033 }, { "epoch": 0.6805576679340938, "grad_norm": 0.215104877948761, "learning_rate": 7.912643346328023e-06, "loss": 0.036556124687194824, "step": 5034 }, { "epoch": 0.6806928601605408, "grad_norm": 0.3205788731575012, "learning_rate": 7.906611844581251e-06, "loss": 0.04453611373901367, "step": 5035 }, { "epoch": 0.6808280523869877, "grad_norm": 0.33966442942619324, "learning_rate": 7.900581819715713e-06, "loss": 0.06298494338989258, "step": 5036 }, { "epoch": 0.6809632446134347, "grad_norm": 0.20713776350021362, "learning_rate": 7.894553272986901e-06, "loss": 0.06315851211547852, "step": 5037 }, { "epoch": 0.6810984368398817, "grad_norm": 0.31027498841285706, "learning_rate": 7.888526205649993e-06, "loss": 0.06762075424194336, "step": 5038 }, { "epoch": 0.6812336290663287, "grad_norm": 0.3930688202381134, "learning_rate": 7.882500618959849e-06, "loss": 0.07503199577331543, "step": 5039 }, { "epoch": 0.6813688212927757, "grad_norm": 0.2407899796962738, "learning_rate": 7.876476514171033e-06, "loss": 0.05850982666015625, "step": 5040 }, { "epoch": 0.6815040135192226, "grad_norm": 0.3437891900539398, "learning_rate": 7.870453892537788e-06, "loss": 0.05692625045776367, "step": 5041 }, { "epoch": 0.6816392057456696, "grad_norm": 0.2352464497089386, "learning_rate": 7.864432755314068e-06, "loss": 0.054657936096191406, "step": 5042 }, { "epoch": 0.6817743979721166, "grad_norm": 0.29077890515327454, "learning_rate": 7.858413103753499e-06, "loss": 0.05873918533325195, "step": 5043 }, { "epoch": 0.6819095901985636, "grad_norm": 0.6263427734375, "learning_rate": 7.852394939109408e-06, "loss": 0.06770062446594238, "step": 5044 }, { "epoch": 0.6820447824250105, "grad_norm": 0.29261234402656555, "learning_rate": 7.846378262634803e-06, "loss": 0.0538640022277832, "step": 5045 }, { "epoch": 0.6821799746514575, "grad_norm": 0.18424251675605774, "learning_rate": 7.840363075582385e-06, "loss": 0.04834175109863281, "step": 5046 }, { "epoch": 0.6823151668779045, "grad_norm": 0.3204640746116638, "learning_rate": 7.834349379204565e-06, "loss": 0.06139826774597168, "step": 5047 }, { "epoch": 0.6824503591043515, "grad_norm": 0.2606144845485687, "learning_rate": 7.828337174753411e-06, "loss": 0.0481867790222168, "step": 5048 }, { "epoch": 0.6825855513307985, "grad_norm": 0.2833598256111145, "learning_rate": 7.822326463480703e-06, "loss": 0.07135415077209473, "step": 5049 }, { "epoch": 0.6827207435572454, "grad_norm": 0.37110117077827454, "learning_rate": 7.816317246637901e-06, "loss": 0.06330126523971558, "step": 5050 }, { "epoch": 0.6828559357836924, "grad_norm": 0.28293147683143616, "learning_rate": 7.810309525476152e-06, "loss": 0.07352352142333984, "step": 5051 }, { "epoch": 0.6829911280101394, "grad_norm": 0.15649206936359406, "learning_rate": 7.804303301246311e-06, "loss": 0.036525845527648926, "step": 5052 }, { "epoch": 0.6831263202365864, "grad_norm": 0.23132957518100739, "learning_rate": 7.798298575198884e-06, "loss": 0.05054187774658203, "step": 5053 }, { "epoch": 0.6832615124630333, "grad_norm": 0.3829394280910492, "learning_rate": 7.792295348584103e-06, "loss": 0.05855894088745117, "step": 5054 }, { "epoch": 0.6833967046894803, "grad_norm": 0.39166468381881714, "learning_rate": 7.786293622651866e-06, "loss": 0.07469868659973145, "step": 5055 }, { "epoch": 0.6835318969159273, "grad_norm": 0.21898894011974335, "learning_rate": 7.78029339865176e-06, "loss": 0.06132245063781738, "step": 5056 }, { "epoch": 0.6836670891423743, "grad_norm": 0.29900994896888733, "learning_rate": 7.774294677833078e-06, "loss": 0.05816006660461426, "step": 5057 }, { "epoch": 0.6838022813688213, "grad_norm": 0.48562493920326233, "learning_rate": 7.768297461444766e-06, "loss": 0.09121572971343994, "step": 5058 }, { "epoch": 0.6839374735952682, "grad_norm": 0.2602877914905548, "learning_rate": 7.762301750735494e-06, "loss": 0.06448173522949219, "step": 5059 }, { "epoch": 0.6840726658217152, "grad_norm": 0.5472441911697388, "learning_rate": 7.756307546953592e-06, "loss": 0.061374664306640625, "step": 5060 }, { "epoch": 0.6842078580481622, "grad_norm": 0.2805687487125397, "learning_rate": 7.750314851347087e-06, "loss": 0.06966114044189453, "step": 5061 }, { "epoch": 0.6843430502746092, "grad_norm": 0.31316593289375305, "learning_rate": 7.74432366516369e-06, "loss": 0.08157825469970703, "step": 5062 }, { "epoch": 0.6844782425010562, "grad_norm": 0.27892011404037476, "learning_rate": 7.738333989650794e-06, "loss": 0.0483742356300354, "step": 5063 }, { "epoch": 0.6846134347275031, "grad_norm": 0.21512150764465332, "learning_rate": 7.732345826055487e-06, "loss": 0.04469776153564453, "step": 5064 }, { "epoch": 0.6847486269539501, "grad_norm": 0.24923019111156464, "learning_rate": 7.726359175624537e-06, "loss": 0.05147719383239746, "step": 5065 }, { "epoch": 0.6848838191803971, "grad_norm": 0.37343448400497437, "learning_rate": 7.720374039604395e-06, "loss": 0.07387542724609375, "step": 5066 }, { "epoch": 0.6850190114068441, "grad_norm": 0.21732354164123535, "learning_rate": 7.714390419241198e-06, "loss": 0.05731576681137085, "step": 5067 }, { "epoch": 0.685154203633291, "grad_norm": 0.42002561688423157, "learning_rate": 7.70840831578076e-06, "loss": 0.08342552185058594, "step": 5068 }, { "epoch": 0.685289395859738, "grad_norm": 0.18823859095573425, "learning_rate": 7.702427730468601e-06, "loss": 0.0579681396484375, "step": 5069 }, { "epoch": 0.685424588086185, "grad_norm": 0.2545880079269409, "learning_rate": 7.696448664549898e-06, "loss": 0.05961036682128906, "step": 5070 }, { "epoch": 0.685559780312632, "grad_norm": 0.2552105486392975, "learning_rate": 7.690471119269541e-06, "loss": 0.05908393859863281, "step": 5071 }, { "epoch": 0.685694972539079, "grad_norm": 0.19240762293338776, "learning_rate": 7.684495095872073e-06, "loss": 0.04995250701904297, "step": 5072 }, { "epoch": 0.6858301647655259, "grad_norm": 0.5534252524375916, "learning_rate": 7.678520595601728e-06, "loss": 0.08865594863891602, "step": 5073 }, { "epoch": 0.6859653569919729, "grad_norm": 0.2804965078830719, "learning_rate": 7.672547619702445e-06, "loss": 0.05677509307861328, "step": 5074 }, { "epoch": 0.6861005492184199, "grad_norm": 0.26258593797683716, "learning_rate": 7.666576169417823e-06, "loss": 0.0659494400024414, "step": 5075 }, { "epoch": 0.6862357414448669, "grad_norm": 0.21473030745983124, "learning_rate": 7.660606245991147e-06, "loss": 0.04909515380859375, "step": 5076 }, { "epoch": 0.6863709336713139, "grad_norm": 0.8554005026817322, "learning_rate": 7.654637850665393e-06, "loss": 0.10103797912597656, "step": 5077 }, { "epoch": 0.6865061258977608, "grad_norm": 0.32204896211624146, "learning_rate": 7.648670984683199e-06, "loss": 0.07646989822387695, "step": 5078 }, { "epoch": 0.6866413181242078, "grad_norm": 0.3911975920200348, "learning_rate": 7.642705649286916e-06, "loss": 0.057561635971069336, "step": 5079 }, { "epoch": 0.6867765103506548, "grad_norm": 0.15990619361400604, "learning_rate": 7.63674184571855e-06, "loss": 0.0347064733505249, "step": 5080 }, { "epoch": 0.6869117025771018, "grad_norm": 0.3655719757080078, "learning_rate": 7.630779575219797e-06, "loss": 0.06616973876953125, "step": 5081 }, { "epoch": 0.6870468948035487, "grad_norm": 0.13986630737781525, "learning_rate": 7.6248188390320344e-06, "loss": 0.03964686393737793, "step": 5082 }, { "epoch": 0.6871820870299957, "grad_norm": 0.32323238253593445, "learning_rate": 7.6188596383963135e-06, "loss": 0.06238126754760742, "step": 5083 }, { "epoch": 0.6873172792564427, "grad_norm": 0.2665664553642273, "learning_rate": 7.612901974553388e-06, "loss": 0.06083226203918457, "step": 5084 }, { "epoch": 0.6874524714828897, "grad_norm": 0.23771488666534424, "learning_rate": 7.606945848743653e-06, "loss": 0.04768180847167969, "step": 5085 }, { "epoch": 0.6875876637093367, "grad_norm": 0.18521255254745483, "learning_rate": 7.600991262207221e-06, "loss": 0.05417442321777344, "step": 5086 }, { "epoch": 0.6877228559357836, "grad_norm": 0.2142123281955719, "learning_rate": 7.595038216183867e-06, "loss": 0.059854984283447266, "step": 5087 }, { "epoch": 0.6878580481622306, "grad_norm": 0.15788207948207855, "learning_rate": 7.589086711913037e-06, "loss": 0.04033863544464111, "step": 5088 }, { "epoch": 0.6879932403886776, "grad_norm": 0.1700204759836197, "learning_rate": 7.583136750633885e-06, "loss": 0.04038369655609131, "step": 5089 }, { "epoch": 0.6881284326151247, "grad_norm": 0.26140329241752625, "learning_rate": 7.577188333585202e-06, "loss": 0.07221794128417969, "step": 5090 }, { "epoch": 0.6882636248415717, "grad_norm": 0.32084646821022034, "learning_rate": 7.5712414620054975e-06, "loss": 0.0688483715057373, "step": 5091 }, { "epoch": 0.6883988170680186, "grad_norm": 0.45135194063186646, "learning_rate": 7.565296137132935e-06, "loss": 0.0956869125366211, "step": 5092 }, { "epoch": 0.6885340092944656, "grad_norm": 0.37323784828186035, "learning_rate": 7.559352360205357e-06, "loss": 0.08292341232299805, "step": 5093 }, { "epoch": 0.6886692015209126, "grad_norm": 0.34570854902267456, "learning_rate": 7.553410132460308e-06, "loss": 0.06216931343078613, "step": 5094 }, { "epoch": 0.6888043937473596, "grad_norm": 0.5107104778289795, "learning_rate": 7.547469455134968e-06, "loss": 0.07967972755432129, "step": 5095 }, { "epoch": 0.6889395859738066, "grad_norm": 0.4013323485851288, "learning_rate": 7.541530329466236e-06, "loss": 0.09386730194091797, "step": 5096 }, { "epoch": 0.6890747782002535, "grad_norm": 0.46993210911750793, "learning_rate": 7.535592756690661e-06, "loss": 0.06214261054992676, "step": 5097 }, { "epoch": 0.6892099704267005, "grad_norm": 0.1623968631029129, "learning_rate": 7.52965673804448e-06, "loss": 0.03421461582183838, "step": 5098 }, { "epoch": 0.6893451626531475, "grad_norm": 0.23661763966083527, "learning_rate": 7.5237222747636025e-06, "loss": 0.05874800682067871, "step": 5099 }, { "epoch": 0.6894803548795945, "grad_norm": 0.33158764243125916, "learning_rate": 7.517789368083611e-06, "loss": 0.07294607162475586, "step": 5100 }, { "epoch": 0.6896155471060414, "grad_norm": 0.272876501083374, "learning_rate": 7.511858019239778e-06, "loss": 0.05669736862182617, "step": 5101 }, { "epoch": 0.6897507393324884, "grad_norm": 0.4327941834926605, "learning_rate": 7.505928229467038e-06, "loss": 0.07514095306396484, "step": 5102 }, { "epoch": 0.6898859315589354, "grad_norm": 0.333072692155838, "learning_rate": 7.500000000000004e-06, "loss": 0.049433231353759766, "step": 5103 }, { "epoch": 0.6900211237853824, "grad_norm": 0.3539844751358032, "learning_rate": 7.494073332072963e-06, "loss": 0.0708761215209961, "step": 5104 }, { "epoch": 0.6901563160118294, "grad_norm": 0.38296326994895935, "learning_rate": 7.488148226919877e-06, "loss": 0.06740236282348633, "step": 5105 }, { "epoch": 0.6902915082382763, "grad_norm": 0.2936798930168152, "learning_rate": 7.482224685774393e-06, "loss": 0.045949459075927734, "step": 5106 }, { "epoch": 0.6904267004647233, "grad_norm": 0.4355970323085785, "learning_rate": 7.4763027098698184e-06, "loss": 0.08402824401855469, "step": 5107 }, { "epoch": 0.6905618926911703, "grad_norm": 0.35099583864212036, "learning_rate": 7.470382300439143e-06, "loss": 0.07074165344238281, "step": 5108 }, { "epoch": 0.6906970849176173, "grad_norm": 0.2727399468421936, "learning_rate": 7.4644634587150225e-06, "loss": 0.06038612127304077, "step": 5109 }, { "epoch": 0.6908322771440643, "grad_norm": 0.1744815707206726, "learning_rate": 7.4585461859297906e-06, "loss": 0.04171609878540039, "step": 5110 }, { "epoch": 0.6909674693705112, "grad_norm": 0.3430871069431305, "learning_rate": 7.452630483315463e-06, "loss": 0.06779289245605469, "step": 5111 }, { "epoch": 0.6911026615969582, "grad_norm": 0.27155163884162903, "learning_rate": 7.4467163521037186e-06, "loss": 0.08106803894042969, "step": 5112 }, { "epoch": 0.6912378538234052, "grad_norm": 0.3969530761241913, "learning_rate": 7.440803793525907e-06, "loss": 0.09438371658325195, "step": 5113 }, { "epoch": 0.6913730460498522, "grad_norm": 0.2457677572965622, "learning_rate": 7.434892808813056e-06, "loss": 0.05474114418029785, "step": 5114 }, { "epoch": 0.6915082382762991, "grad_norm": 0.511020302772522, "learning_rate": 7.42898339919586e-06, "loss": 0.06138277053833008, "step": 5115 }, { "epoch": 0.6916434305027461, "grad_norm": 0.2703227698802948, "learning_rate": 7.423075565904698e-06, "loss": 0.0662994384765625, "step": 5116 }, { "epoch": 0.6917786227291931, "grad_norm": 0.34211331605911255, "learning_rate": 7.417169310169609e-06, "loss": 0.0668785572052002, "step": 5117 }, { "epoch": 0.6919138149556401, "grad_norm": 0.22787807881832123, "learning_rate": 7.411264633220305e-06, "loss": 0.04981422424316406, "step": 5118 }, { "epoch": 0.6920490071820871, "grad_norm": 0.3589753210544586, "learning_rate": 7.405361536286174e-06, "loss": 0.0652472972869873, "step": 5119 }, { "epoch": 0.692184199408534, "grad_norm": 0.29489415884017944, "learning_rate": 7.399460020596266e-06, "loss": 0.056148529052734375, "step": 5120 }, { "epoch": 0.692319391634981, "grad_norm": 0.4208012819290161, "learning_rate": 7.393560087379322e-06, "loss": 0.09299284219741821, "step": 5121 }, { "epoch": 0.692454583861428, "grad_norm": 0.2477555274963379, "learning_rate": 7.3876617378637195e-06, "loss": 0.05971670150756836, "step": 5122 }, { "epoch": 0.692589776087875, "grad_norm": 0.3015389144420624, "learning_rate": 7.381764973277543e-06, "loss": 0.05284738540649414, "step": 5123 }, { "epoch": 0.692724968314322, "grad_norm": 0.4995321035385132, "learning_rate": 7.375869794848525e-06, "loss": 0.07973992824554443, "step": 5124 }, { "epoch": 0.6928601605407689, "grad_norm": 0.924176812171936, "learning_rate": 7.3699762038040654e-06, "loss": 0.06792068481445312, "step": 5125 }, { "epoch": 0.6929953527672159, "grad_norm": 0.29413217306137085, "learning_rate": 7.364084201371261e-06, "loss": 0.07237696647644043, "step": 5126 }, { "epoch": 0.6931305449936629, "grad_norm": 0.4069044589996338, "learning_rate": 7.3581937887768334e-06, "loss": 0.10204553604125977, "step": 5127 }, { "epoch": 0.6932657372201099, "grad_norm": 0.4495379328727722, "learning_rate": 7.352304967247217e-06, "loss": 0.05169111490249634, "step": 5128 }, { "epoch": 0.6934009294465568, "grad_norm": 0.22065100073814392, "learning_rate": 7.346417738008487e-06, "loss": 0.046114444732666016, "step": 5129 }, { "epoch": 0.6935361216730038, "grad_norm": 0.31105419993400574, "learning_rate": 7.340532102286399e-06, "loss": 0.07995033264160156, "step": 5130 }, { "epoch": 0.6936713138994508, "grad_norm": 0.24137845635414124, "learning_rate": 7.3346480613063725e-06, "loss": 0.04705667495727539, "step": 5131 }, { "epoch": 0.6938065061258978, "grad_norm": 0.2521708607673645, "learning_rate": 7.328765616293491e-06, "loss": 0.06670522689819336, "step": 5132 }, { "epoch": 0.6939416983523448, "grad_norm": 0.3369907736778259, "learning_rate": 7.322884768472521e-06, "loss": 0.06294393539428711, "step": 5133 }, { "epoch": 0.6940768905787917, "grad_norm": 0.5765347480773926, "learning_rate": 7.317005519067881e-06, "loss": 0.08425617218017578, "step": 5134 }, { "epoch": 0.6942120828052387, "grad_norm": 0.3203013241291046, "learning_rate": 7.311127869303665e-06, "loss": 0.0680396556854248, "step": 5135 }, { "epoch": 0.6943472750316857, "grad_norm": 0.302909791469574, "learning_rate": 7.305251820403628e-06, "loss": 0.06524085998535156, "step": 5136 }, { "epoch": 0.6944824672581327, "grad_norm": 0.4749617874622345, "learning_rate": 7.299377373591188e-06, "loss": 0.05928301811218262, "step": 5137 }, { "epoch": 0.6946176594845797, "grad_norm": 0.5682387351989746, "learning_rate": 7.29350453008945e-06, "loss": 0.09905683994293213, "step": 5138 }, { "epoch": 0.6947528517110266, "grad_norm": 0.35577449202537537, "learning_rate": 7.287633291121166e-06, "loss": 0.06443309783935547, "step": 5139 }, { "epoch": 0.6948880439374736, "grad_norm": 0.2810805141925812, "learning_rate": 7.281763657908756e-06, "loss": 0.05539989471435547, "step": 5140 }, { "epoch": 0.6950232361639206, "grad_norm": 0.26290300488471985, "learning_rate": 7.275895631674313e-06, "loss": 0.05199289321899414, "step": 5141 }, { "epoch": 0.6951584283903676, "grad_norm": 0.17403098940849304, "learning_rate": 7.2700292136395826e-06, "loss": 0.03617501258850098, "step": 5142 }, { "epoch": 0.6952936206168145, "grad_norm": 0.2667497992515564, "learning_rate": 7.264164405025997e-06, "loss": 0.06101787090301514, "step": 5143 }, { "epoch": 0.6954288128432615, "grad_norm": 0.36639004945755005, "learning_rate": 7.2583012070546364e-06, "loss": 0.09653186798095703, "step": 5144 }, { "epoch": 0.6955640050697085, "grad_norm": 0.23468759655952454, "learning_rate": 7.252439620946247e-06, "loss": 0.05823396146297455, "step": 5145 }, { "epoch": 0.6956991972961555, "grad_norm": 0.26787322759628296, "learning_rate": 7.246579647921243e-06, "loss": 0.06267523765563965, "step": 5146 }, { "epoch": 0.6958343895226025, "grad_norm": 0.4143868684768677, "learning_rate": 7.240721289199699e-06, "loss": 0.061948537826538086, "step": 5147 }, { "epoch": 0.6959695817490494, "grad_norm": 0.5479666590690613, "learning_rate": 7.234864546001364e-06, "loss": 0.07314252853393555, "step": 5148 }, { "epoch": 0.6961047739754964, "grad_norm": 0.509655237197876, "learning_rate": 7.229009419545638e-06, "loss": 0.05440187454223633, "step": 5149 }, { "epoch": 0.6962399662019434, "grad_norm": 0.2680950164794922, "learning_rate": 7.223155911051593e-06, "loss": 0.04847145080566406, "step": 5150 }, { "epoch": 0.6963751584283904, "grad_norm": 0.3453781008720398, "learning_rate": 7.2173040217379575e-06, "loss": 0.0657196044921875, "step": 5151 }, { "epoch": 0.6965103506548374, "grad_norm": 0.1617542803287506, "learning_rate": 7.211453752823122e-06, "loss": 0.04308795928955078, "step": 5152 }, { "epoch": 0.6966455428812843, "grad_norm": 0.13926690816879272, "learning_rate": 7.205605105525161e-06, "loss": 0.03722953796386719, "step": 5153 }, { "epoch": 0.6967807351077313, "grad_norm": 0.35108423233032227, "learning_rate": 7.19975808106177e-06, "loss": 0.07530021667480469, "step": 5154 }, { "epoch": 0.6969159273341783, "grad_norm": 0.25188055634498596, "learning_rate": 7.193912680650346e-06, "loss": 0.06133103370666504, "step": 5155 }, { "epoch": 0.6970511195606253, "grad_norm": 0.26292234659194946, "learning_rate": 7.188068905507931e-06, "loss": 0.051703691482543945, "step": 5156 }, { "epoch": 0.6971863117870722, "grad_norm": 0.48832032084465027, "learning_rate": 7.182226756851223e-06, "loss": 0.0809781551361084, "step": 5157 }, { "epoch": 0.6973215040135192, "grad_norm": 2.010105848312378, "learning_rate": 7.176386235896603e-06, "loss": 0.06756019592285156, "step": 5158 }, { "epoch": 0.6974566962399662, "grad_norm": 0.16677618026733398, "learning_rate": 7.170547343860079e-06, "loss": 0.046690940856933594, "step": 5159 }, { "epoch": 0.6975918884664132, "grad_norm": 0.3117125928401947, "learning_rate": 7.164710081957355e-06, "loss": 0.07377815246582031, "step": 5160 }, { "epoch": 0.6977270806928602, "grad_norm": 0.2945198714733124, "learning_rate": 7.158874451403777e-06, "loss": 0.04975748062133789, "step": 5161 }, { "epoch": 0.6978622729193071, "grad_norm": 0.4614226520061493, "learning_rate": 7.15304045341435e-06, "loss": 0.08585166931152344, "step": 5162 }, { "epoch": 0.6979974651457541, "grad_norm": 0.30252423882484436, "learning_rate": 7.147208089203745e-06, "loss": 0.05503225326538086, "step": 5163 }, { "epoch": 0.6981326573722011, "grad_norm": 0.402742475271225, "learning_rate": 7.141377359986288e-06, "loss": 0.06532526016235352, "step": 5164 }, { "epoch": 0.6982678495986481, "grad_norm": 0.28376224637031555, "learning_rate": 7.135548266975978e-06, "loss": 0.0535738468170166, "step": 5165 }, { "epoch": 0.698403041825095, "grad_norm": 0.38947048783302307, "learning_rate": 7.129720811386456e-06, "loss": 0.06785106658935547, "step": 5166 }, { "epoch": 0.698538234051542, "grad_norm": 0.22312481701374054, "learning_rate": 7.12389499443103e-06, "loss": 0.06464958190917969, "step": 5167 }, { "epoch": 0.698673426277989, "grad_norm": 0.46874937415122986, "learning_rate": 7.118070817322668e-06, "loss": 0.08054065704345703, "step": 5168 }, { "epoch": 0.698808618504436, "grad_norm": 0.2867968678474426, "learning_rate": 7.1122482812739885e-06, "loss": 0.06430935859680176, "step": 5169 }, { "epoch": 0.698943810730883, "grad_norm": 0.44140592217445374, "learning_rate": 7.106427387497283e-06, "loss": 0.07148480415344238, "step": 5170 }, { "epoch": 0.6990790029573299, "grad_norm": 0.3615421652793884, "learning_rate": 7.10060813720449e-06, "loss": 0.07390451431274414, "step": 5171 }, { "epoch": 0.6992141951837769, "grad_norm": 0.3820439875125885, "learning_rate": 7.094790531607207e-06, "loss": 0.08645200729370117, "step": 5172 }, { "epoch": 0.6993493874102239, "grad_norm": 0.3789469301700592, "learning_rate": 7.088974571916692e-06, "loss": 0.07278966903686523, "step": 5173 }, { "epoch": 0.6994845796366709, "grad_norm": 0.2615392208099365, "learning_rate": 7.0831602593438515e-06, "loss": 0.04927694797515869, "step": 5174 }, { "epoch": 0.6996197718631179, "grad_norm": 0.20419496297836304, "learning_rate": 7.077347595099269e-06, "loss": 0.047860145568847656, "step": 5175 }, { "epoch": 0.6997549640895648, "grad_norm": 0.26666179299354553, "learning_rate": 7.071536580393166e-06, "loss": 0.05436229705810547, "step": 5176 }, { "epoch": 0.6998901563160118, "grad_norm": 0.2581121623516083, "learning_rate": 7.065727216435426e-06, "loss": 0.04289531707763672, "step": 5177 }, { "epoch": 0.7000253485424588, "grad_norm": 0.39488500356674194, "learning_rate": 7.05991950443559e-06, "loss": 0.08115530014038086, "step": 5178 }, { "epoch": 0.7001605407689058, "grad_norm": 0.3415122330188751, "learning_rate": 7.05411344560285e-06, "loss": 0.05090904235839844, "step": 5179 }, { "epoch": 0.7002957329953527, "grad_norm": 0.4603601098060608, "learning_rate": 7.048309041146069e-06, "loss": 0.07886600494384766, "step": 5180 }, { "epoch": 0.7004309252217997, "grad_norm": 0.2843720614910126, "learning_rate": 7.0425062922737495e-06, "loss": 0.06193208694458008, "step": 5181 }, { "epoch": 0.7005661174482467, "grad_norm": 0.4288102090358734, "learning_rate": 7.036705200194053e-06, "loss": 0.06223297119140625, "step": 5182 }, { "epoch": 0.7007013096746937, "grad_norm": 0.6508801579475403, "learning_rate": 7.0309057661148e-06, "loss": 0.09417963027954102, "step": 5183 }, { "epoch": 0.7008365019011407, "grad_norm": 0.28302961587905884, "learning_rate": 7.0251079912434565e-06, "loss": 0.0530397891998291, "step": 5184 }, { "epoch": 0.7009716941275876, "grad_norm": 0.3496130406856537, "learning_rate": 7.019311876787169e-06, "loss": 0.0873725414276123, "step": 5185 }, { "epoch": 0.7011068863540346, "grad_norm": 0.17822697758674622, "learning_rate": 7.013517423952696e-06, "loss": 0.0391998291015625, "step": 5186 }, { "epoch": 0.7012420785804816, "grad_norm": 0.4951348602771759, "learning_rate": 7.0077246339464904e-06, "loss": 0.1033172607421875, "step": 5187 }, { "epoch": 0.7013772708069286, "grad_norm": 0.31052571535110474, "learning_rate": 7.001933507974635e-06, "loss": 0.0532526969909668, "step": 5188 }, { "epoch": 0.7015124630333756, "grad_norm": 0.2784779667854309, "learning_rate": 6.996144047242868e-06, "loss": 0.057559967041015625, "step": 5189 }, { "epoch": 0.7016476552598225, "grad_norm": 0.2698589265346527, "learning_rate": 6.9903562529566044e-06, "loss": 0.05294942855834961, "step": 5190 }, { "epoch": 0.7017828474862695, "grad_norm": 0.5403467416763306, "learning_rate": 6.984570126320869e-06, "loss": 0.07762980461120605, "step": 5191 }, { "epoch": 0.7019180397127165, "grad_norm": 0.3751070201396942, "learning_rate": 6.978785668540384e-06, "loss": 0.07057070732116699, "step": 5192 }, { "epoch": 0.7020532319391635, "grad_norm": 0.5298959612846375, "learning_rate": 6.973002880819496e-06, "loss": 0.08171343803405762, "step": 5193 }, { "epoch": 0.7021884241656104, "grad_norm": 0.4307243525981903, "learning_rate": 6.96722176436221e-06, "loss": 0.08375358581542969, "step": 5194 }, { "epoch": 0.7023236163920574, "grad_norm": 0.2923785448074341, "learning_rate": 6.9614423203721975e-06, "loss": 0.0440557599067688, "step": 5195 }, { "epoch": 0.7024588086185044, "grad_norm": 0.3036232590675354, "learning_rate": 6.955664550052749e-06, "loss": 0.07849550247192383, "step": 5196 }, { "epoch": 0.7025940008449514, "grad_norm": 0.5459083914756775, "learning_rate": 6.949888454606847e-06, "loss": 0.08972203731536865, "step": 5197 }, { "epoch": 0.7027291930713984, "grad_norm": 0.3295241594314575, "learning_rate": 6.944114035237095e-06, "loss": 0.07895088195800781, "step": 5198 }, { "epoch": 0.7028643852978453, "grad_norm": 0.2893649935722351, "learning_rate": 6.93834129314576e-06, "loss": 0.0626974105834961, "step": 5199 }, { "epoch": 0.7029995775242923, "grad_norm": 0.22155827283859253, "learning_rate": 6.932570229534759e-06, "loss": 0.061110734939575195, "step": 5200 }, { "epoch": 0.7031347697507393, "grad_norm": 0.4836445152759552, "learning_rate": 6.9268008456056505e-06, "loss": 0.07233667373657227, "step": 5201 }, { "epoch": 0.7032699619771863, "grad_norm": 0.2334105670452118, "learning_rate": 6.921033142559664e-06, "loss": 0.05377340316772461, "step": 5202 }, { "epoch": 0.7034051542036333, "grad_norm": 0.39910081028938293, "learning_rate": 6.915267121597659e-06, "loss": 0.06854391098022461, "step": 5203 }, { "epoch": 0.7035403464300802, "grad_norm": 0.21269190311431885, "learning_rate": 6.909502783920153e-06, "loss": 0.04930448532104492, "step": 5204 }, { "epoch": 0.7036755386565272, "grad_norm": 0.23991501331329346, "learning_rate": 6.903740130727312e-06, "loss": 0.06479740142822266, "step": 5205 }, { "epoch": 0.7038107308829742, "grad_norm": 0.18447642028331757, "learning_rate": 6.8979791632189425e-06, "loss": 0.042063236236572266, "step": 5206 }, { "epoch": 0.7039459231094212, "grad_norm": 0.3013533055782318, "learning_rate": 6.892219882594523e-06, "loss": 0.05243110656738281, "step": 5207 }, { "epoch": 0.7040811153358681, "grad_norm": 0.28287580609321594, "learning_rate": 6.886462290053159e-06, "loss": 0.05308115482330322, "step": 5208 }, { "epoch": 0.7042163075623151, "grad_norm": 0.12992000579833984, "learning_rate": 6.880706386793614e-06, "loss": 0.03340864181518555, "step": 5209 }, { "epoch": 0.7043514997887621, "grad_norm": 0.2752667963504791, "learning_rate": 6.874952174014298e-06, "loss": 0.0632176399230957, "step": 5210 }, { "epoch": 0.7044866920152091, "grad_norm": 0.2495708018541336, "learning_rate": 6.8691996529132585e-06, "loss": 0.04654431343078613, "step": 5211 }, { "epoch": 0.7046218842416561, "grad_norm": 0.4489001929759979, "learning_rate": 6.863448824688217e-06, "loss": 0.07932806015014648, "step": 5212 }, { "epoch": 0.704757076468103, "grad_norm": 0.45992836356163025, "learning_rate": 6.857699690536521e-06, "loss": 0.08535480499267578, "step": 5213 }, { "epoch": 0.70489226869455, "grad_norm": 0.41505518555641174, "learning_rate": 6.8519522516551685e-06, "loss": 0.07698339223861694, "step": 5214 }, { "epoch": 0.705027460920997, "grad_norm": 0.25965675711631775, "learning_rate": 6.846206509240807e-06, "loss": 0.06387710571289062, "step": 5215 }, { "epoch": 0.705162653147444, "grad_norm": 0.28890877962112427, "learning_rate": 6.840462464489726e-06, "loss": 0.09001302719116211, "step": 5216 }, { "epoch": 0.705297845373891, "grad_norm": 0.19721145927906036, "learning_rate": 6.834720118597879e-06, "loss": 0.05830240249633789, "step": 5217 }, { "epoch": 0.7054330376003379, "grad_norm": 0.49042749404907227, "learning_rate": 6.828979472760846e-06, "loss": 0.07706403732299805, "step": 5218 }, { "epoch": 0.7055682298267849, "grad_norm": 0.24993939697742462, "learning_rate": 6.823240528173858e-06, "loss": 0.047280073165893555, "step": 5219 }, { "epoch": 0.7057034220532319, "grad_norm": 0.32656624913215637, "learning_rate": 6.817503286031797e-06, "loss": 0.06964492797851562, "step": 5220 }, { "epoch": 0.7058386142796789, "grad_norm": 0.3378008306026459, "learning_rate": 6.811767747529181e-06, "loss": 0.07908868789672852, "step": 5221 }, { "epoch": 0.7059738065061258, "grad_norm": 0.5229734778404236, "learning_rate": 6.806033913860195e-06, "loss": 0.08921527862548828, "step": 5222 }, { "epoch": 0.7061089987325728, "grad_norm": 0.3965039849281311, "learning_rate": 6.800301786218634e-06, "loss": 0.07672739028930664, "step": 5223 }, { "epoch": 0.7062441909590198, "grad_norm": 0.46000194549560547, "learning_rate": 6.794571365797971e-06, "loss": 0.07695484161376953, "step": 5224 }, { "epoch": 0.7063793831854668, "grad_norm": 0.2436547726392746, "learning_rate": 6.788842653791308e-06, "loss": 0.0603257417678833, "step": 5225 }, { "epoch": 0.7065145754119139, "grad_norm": 0.3595168888568878, "learning_rate": 6.7831156513913864e-06, "loss": 0.05218696594238281, "step": 5226 }, { "epoch": 0.7066497676383608, "grad_norm": 0.32203981280326843, "learning_rate": 6.777390359790614e-06, "loss": 0.06536483764648438, "step": 5227 }, { "epoch": 0.7067849598648078, "grad_norm": 0.30787691473960876, "learning_rate": 6.771666780181004e-06, "loss": 0.07740449905395508, "step": 5228 }, { "epoch": 0.7069201520912548, "grad_norm": 0.1736706644296646, "learning_rate": 6.765944913754258e-06, "loss": 0.03533220291137695, "step": 5229 }, { "epoch": 0.7070553443177018, "grad_norm": 0.3201066851615906, "learning_rate": 6.7602247617016885e-06, "loss": 0.08419656753540039, "step": 5230 }, { "epoch": 0.7071905365441488, "grad_norm": 0.3639947175979614, "learning_rate": 6.754506325214265e-06, "loss": 0.07221412658691406, "step": 5231 }, { "epoch": 0.7073257287705957, "grad_norm": 0.41238322854042053, "learning_rate": 6.748789605482593e-06, "loss": 0.09039163589477539, "step": 5232 }, { "epoch": 0.7074609209970427, "grad_norm": 0.21321308612823486, "learning_rate": 6.743074603696922e-06, "loss": 0.037832021713256836, "step": 5233 }, { "epoch": 0.7075961132234897, "grad_norm": 0.2813952565193176, "learning_rate": 6.737361321047155e-06, "loss": 0.05194282531738281, "step": 5234 }, { "epoch": 0.7077313054499367, "grad_norm": 0.4473142623901367, "learning_rate": 6.731649758722823e-06, "loss": 0.09396696090698242, "step": 5235 }, { "epoch": 0.7078664976763837, "grad_norm": 0.1799984872341156, "learning_rate": 6.725939917913102e-06, "loss": 0.043128252029418945, "step": 5236 }, { "epoch": 0.7080016899028306, "grad_norm": 0.2964251637458801, "learning_rate": 6.720231799806814e-06, "loss": 0.06183105707168579, "step": 5237 }, { "epoch": 0.7081368821292776, "grad_norm": 0.16865837574005127, "learning_rate": 6.7145254055924136e-06, "loss": 0.04227583110332489, "step": 5238 }, { "epoch": 0.7082720743557246, "grad_norm": 0.19701269268989563, "learning_rate": 6.70882073645801e-06, "loss": 0.04064035415649414, "step": 5239 }, { "epoch": 0.7084072665821716, "grad_norm": 0.13464172184467316, "learning_rate": 6.703117793591346e-06, "loss": 0.03891634941101074, "step": 5240 }, { "epoch": 0.7085424588086185, "grad_norm": 0.29263293743133545, "learning_rate": 6.6974165781798e-06, "loss": 0.0652322769165039, "step": 5241 }, { "epoch": 0.7086776510350655, "grad_norm": 0.18063101172447205, "learning_rate": 6.691717091410398e-06, "loss": 0.04186379909515381, "step": 5242 }, { "epoch": 0.7088128432615125, "grad_norm": 0.16540196537971497, "learning_rate": 6.686019334469797e-06, "loss": 0.038457632064819336, "step": 5243 }, { "epoch": 0.7089480354879595, "grad_norm": 0.3550252318382263, "learning_rate": 6.680323308544312e-06, "loss": 0.07416820526123047, "step": 5244 }, { "epoch": 0.7090832277144065, "grad_norm": 0.41331881284713745, "learning_rate": 6.674629014819879e-06, "loss": 0.06978464126586914, "step": 5245 }, { "epoch": 0.7092184199408534, "grad_norm": 0.28112414479255676, "learning_rate": 6.668936454482082e-06, "loss": 0.060790300369262695, "step": 5246 }, { "epoch": 0.7093536121673004, "grad_norm": 0.2451619654893875, "learning_rate": 6.6632456287161426e-06, "loss": 0.0528407096862793, "step": 5247 }, { "epoch": 0.7094888043937474, "grad_norm": 0.28554636240005493, "learning_rate": 6.657556538706914e-06, "loss": 0.06327342987060547, "step": 5248 }, { "epoch": 0.7096239966201944, "grad_norm": 0.20135733485221863, "learning_rate": 6.651869185638907e-06, "loss": 0.05432701110839844, "step": 5249 }, { "epoch": 0.7097591888466414, "grad_norm": 0.330646276473999, "learning_rate": 6.646183570696253e-06, "loss": 0.06869697570800781, "step": 5250 }, { "epoch": 0.7098943810730883, "grad_norm": 0.19352610409259796, "learning_rate": 6.6404996950627275e-06, "loss": 0.04934835433959961, "step": 5251 }, { "epoch": 0.7100295732995353, "grad_norm": 0.32562071084976196, "learning_rate": 6.634817559921744e-06, "loss": 0.08568143844604492, "step": 5252 }, { "epoch": 0.7101647655259823, "grad_norm": 0.14758622646331787, "learning_rate": 6.629137166456348e-06, "loss": 0.038468360900878906, "step": 5253 }, { "epoch": 0.7102999577524293, "grad_norm": 0.43937766551971436, "learning_rate": 6.623458515849244e-06, "loss": 0.07604408264160156, "step": 5254 }, { "epoch": 0.7104351499788762, "grad_norm": 0.5796530246734619, "learning_rate": 6.6177816092827354e-06, "loss": 0.08571100234985352, "step": 5255 }, { "epoch": 0.7105703422053232, "grad_norm": 0.5068479180335999, "learning_rate": 6.6121064479388e-06, "loss": 0.07922065258026123, "step": 5256 }, { "epoch": 0.7107055344317702, "grad_norm": 0.25678953528404236, "learning_rate": 6.606433032999031e-06, "loss": 0.06104302406311035, "step": 5257 }, { "epoch": 0.7108407266582172, "grad_norm": 0.4827907383441925, "learning_rate": 6.60076136564466e-06, "loss": 0.05416154861450195, "step": 5258 }, { "epoch": 0.7109759188846642, "grad_norm": 0.45612362027168274, "learning_rate": 6.595091447056574e-06, "loss": 0.07600259780883789, "step": 5259 }, { "epoch": 0.7111111111111111, "grad_norm": 0.3709104061126709, "learning_rate": 6.589423278415259e-06, "loss": 0.0860128402709961, "step": 5260 }, { "epoch": 0.7112463033375581, "grad_norm": 0.2493404597043991, "learning_rate": 6.583756860900872e-06, "loss": 0.05909276008605957, "step": 5261 }, { "epoch": 0.7113814955640051, "grad_norm": 0.2866962254047394, "learning_rate": 6.578092195693187e-06, "loss": 0.05163431167602539, "step": 5262 }, { "epoch": 0.7115166877904521, "grad_norm": 0.6155808568000793, "learning_rate": 6.572429283971614e-06, "loss": 0.0694810152053833, "step": 5263 }, { "epoch": 0.711651880016899, "grad_norm": 0.2943473160266876, "learning_rate": 6.566768126915215e-06, "loss": 0.05535316467285156, "step": 5264 }, { "epoch": 0.711787072243346, "grad_norm": 0.15070411562919617, "learning_rate": 6.561108725702653e-06, "loss": 0.038384437561035156, "step": 5265 }, { "epoch": 0.711922264469793, "grad_norm": 0.4607013463973999, "learning_rate": 6.555451081512262e-06, "loss": 0.07212257385253906, "step": 5266 }, { "epoch": 0.71205745669624, "grad_norm": 0.4438392221927643, "learning_rate": 6.549795195521988e-06, "loss": 0.09726285934448242, "step": 5267 }, { "epoch": 0.712192648922687, "grad_norm": 0.20698747038841248, "learning_rate": 6.544141068909416e-06, "loss": 0.05082368850708008, "step": 5268 }, { "epoch": 0.712327841149134, "grad_norm": 0.3001084625720978, "learning_rate": 6.5384887028517645e-06, "loss": 0.0547177791595459, "step": 5269 }, { "epoch": 0.7124630333755809, "grad_norm": 0.40025028586387634, "learning_rate": 6.532838098525883e-06, "loss": 0.08511734008789062, "step": 5270 }, { "epoch": 0.7125982256020279, "grad_norm": 0.35512030124664307, "learning_rate": 6.5271892571082655e-06, "loss": 0.06788158416748047, "step": 5271 }, { "epoch": 0.7127334178284749, "grad_norm": 0.46489977836608887, "learning_rate": 6.521542179775029e-06, "loss": 0.06070709228515625, "step": 5272 }, { "epoch": 0.7128686100549219, "grad_norm": 0.2922933101654053, "learning_rate": 6.515896867701924e-06, "loss": 0.05234217643737793, "step": 5273 }, { "epoch": 0.7130038022813688, "grad_norm": 0.2974027991294861, "learning_rate": 6.510253322064333e-06, "loss": 0.05164146423339844, "step": 5274 }, { "epoch": 0.7131389945078158, "grad_norm": 0.7776655554771423, "learning_rate": 6.504611544037267e-06, "loss": 0.11277198791503906, "step": 5275 }, { "epoch": 0.7132741867342628, "grad_norm": 0.33653420209884644, "learning_rate": 6.498971534795387e-06, "loss": 0.0775156021118164, "step": 5276 }, { "epoch": 0.7134093789607098, "grad_norm": 0.19908814132213593, "learning_rate": 6.493333295512965e-06, "loss": 0.04477423429489136, "step": 5277 }, { "epoch": 0.7135445711871568, "grad_norm": 0.25313863158226013, "learning_rate": 6.487696827363916e-06, "loss": 0.052074432373046875, "step": 5278 }, { "epoch": 0.7136797634136037, "grad_norm": 0.3654668629169464, "learning_rate": 6.48206213152178e-06, "loss": 0.061371803283691406, "step": 5279 }, { "epoch": 0.7138149556400507, "grad_norm": 0.2771454453468323, "learning_rate": 6.476429209159725e-06, "loss": 0.07045698165893555, "step": 5280 }, { "epoch": 0.7139501478664977, "grad_norm": 0.24215292930603027, "learning_rate": 6.470798061450568e-06, "loss": 0.052469611167907715, "step": 5281 }, { "epoch": 0.7140853400929447, "grad_norm": 0.2678535282611847, "learning_rate": 6.465168689566738e-06, "loss": 0.056267738342285156, "step": 5282 }, { "epoch": 0.7142205323193916, "grad_norm": 0.4750436246395111, "learning_rate": 6.4595410946803e-06, "loss": 0.08125638961791992, "step": 5283 }, { "epoch": 0.7143557245458386, "grad_norm": 0.6535443663597107, "learning_rate": 6.453915277962948e-06, "loss": 0.0846710205078125, "step": 5284 }, { "epoch": 0.7144909167722856, "grad_norm": 0.31681588292121887, "learning_rate": 6.4482912405860055e-06, "loss": 0.07419633865356445, "step": 5285 }, { "epoch": 0.7146261089987326, "grad_norm": 0.5306790471076965, "learning_rate": 6.442668983720434e-06, "loss": 0.07944530248641968, "step": 5286 }, { "epoch": 0.7147613012251796, "grad_norm": 0.42185264825820923, "learning_rate": 6.437048508536813e-06, "loss": 0.07938385009765625, "step": 5287 }, { "epoch": 0.7148964934516265, "grad_norm": 0.16323237121105194, "learning_rate": 6.431429816205357e-06, "loss": 0.03561758995056152, "step": 5288 }, { "epoch": 0.7150316856780735, "grad_norm": 0.29352566599845886, "learning_rate": 6.425812907895904e-06, "loss": 0.05605483055114746, "step": 5289 }, { "epoch": 0.7151668779045205, "grad_norm": 0.25459399819374084, "learning_rate": 6.420197784777925e-06, "loss": 0.054135799407958984, "step": 5290 }, { "epoch": 0.7153020701309675, "grad_norm": 0.4943426549434662, "learning_rate": 6.414584448020528e-06, "loss": 0.08211421966552734, "step": 5291 }, { "epoch": 0.7154372623574144, "grad_norm": 0.21288572251796722, "learning_rate": 6.408972898792423e-06, "loss": 0.06739640235900879, "step": 5292 }, { "epoch": 0.7155724545838614, "grad_norm": 0.18440675735473633, "learning_rate": 6.4033631382619766e-06, "loss": 0.04372048377990723, "step": 5293 }, { "epoch": 0.7157076468103084, "grad_norm": 0.3132110834121704, "learning_rate": 6.397755167597171e-06, "loss": 0.07593369483947754, "step": 5294 }, { "epoch": 0.7158428390367554, "grad_norm": 0.33786335587501526, "learning_rate": 6.392148987965603e-06, "loss": 0.05293989181518555, "step": 5295 }, { "epoch": 0.7159780312632024, "grad_norm": 0.46040597558021545, "learning_rate": 6.386544600534532e-06, "loss": 0.08098006248474121, "step": 5296 }, { "epoch": 0.7161132234896493, "grad_norm": 0.1991870105266571, "learning_rate": 6.3809420064707965e-06, "loss": 0.05335187911987305, "step": 5297 }, { "epoch": 0.7162484157160963, "grad_norm": 0.3220804035663605, "learning_rate": 6.375341206940902e-06, "loss": 0.04731142520904541, "step": 5298 }, { "epoch": 0.7163836079425433, "grad_norm": 0.9213958978652954, "learning_rate": 6.369742203110962e-06, "loss": 0.07104718685150146, "step": 5299 }, { "epoch": 0.7165188001689903, "grad_norm": 0.44197481870651245, "learning_rate": 6.364144996146716e-06, "loss": 0.05142831802368164, "step": 5300 }, { "epoch": 0.7166539923954373, "grad_norm": 0.32369789481163025, "learning_rate": 6.358549587213534e-06, "loss": 0.06956243515014648, "step": 5301 }, { "epoch": 0.7167891846218842, "grad_norm": 0.30617251992225647, "learning_rate": 6.352955977476405e-06, "loss": 0.05008745193481445, "step": 5302 }, { "epoch": 0.7169243768483312, "grad_norm": 0.31715476512908936, "learning_rate": 6.347364168099959e-06, "loss": 0.060420989990234375, "step": 5303 }, { "epoch": 0.7170595690747782, "grad_norm": 0.19058825075626373, "learning_rate": 6.341774160248435e-06, "loss": 0.03251791000366211, "step": 5304 }, { "epoch": 0.7171947613012252, "grad_norm": 0.38277342915534973, "learning_rate": 6.3361859550857e-06, "loss": 0.07580947875976562, "step": 5305 }, { "epoch": 0.7173299535276721, "grad_norm": 0.40562063455581665, "learning_rate": 6.330599553775252e-06, "loss": 0.07514238357543945, "step": 5306 }, { "epoch": 0.7174651457541191, "grad_norm": 0.4485914409160614, "learning_rate": 6.325014957480203e-06, "loss": 0.09454870223999023, "step": 5307 }, { "epoch": 0.7176003379805661, "grad_norm": 0.4483773410320282, "learning_rate": 6.319432167363305e-06, "loss": 0.09784317016601562, "step": 5308 }, { "epoch": 0.7177355302070131, "grad_norm": 0.32551896572113037, "learning_rate": 6.313851184586918e-06, "loss": 0.04758763313293457, "step": 5309 }, { "epoch": 0.7178707224334601, "grad_norm": 0.7177479267120361, "learning_rate": 6.308272010313037e-06, "loss": 0.09788131713867188, "step": 5310 }, { "epoch": 0.718005914659907, "grad_norm": 0.29879510402679443, "learning_rate": 6.302694645703273e-06, "loss": 0.0753021240234375, "step": 5311 }, { "epoch": 0.718141106886354, "grad_norm": 0.44537603855133057, "learning_rate": 6.297119091918857e-06, "loss": 0.08078575134277344, "step": 5312 }, { "epoch": 0.718276299112801, "grad_norm": 0.3085884749889374, "learning_rate": 6.2915453501206634e-06, "loss": 0.05964779853820801, "step": 5313 }, { "epoch": 0.718411491339248, "grad_norm": 0.33680471777915955, "learning_rate": 6.285973421469166e-06, "loss": 0.08896994590759277, "step": 5314 }, { "epoch": 0.718546683565695, "grad_norm": 0.2171669453382492, "learning_rate": 6.28040330712447e-06, "loss": 0.05547380447387695, "step": 5315 }, { "epoch": 0.7186818757921419, "grad_norm": 0.17646951973438263, "learning_rate": 6.274835008246304e-06, "loss": 0.05302882194519043, "step": 5316 }, { "epoch": 0.7188170680185889, "grad_norm": 0.32431310415267944, "learning_rate": 6.269268525994013e-06, "loss": 0.07574915885925293, "step": 5317 }, { "epoch": 0.7189522602450359, "grad_norm": 0.2873489260673523, "learning_rate": 6.263703861526578e-06, "loss": 0.04230654239654541, "step": 5318 }, { "epoch": 0.7190874524714829, "grad_norm": 0.33394187688827515, "learning_rate": 6.258141016002587e-06, "loss": 0.07281017303466797, "step": 5319 }, { "epoch": 0.7192226446979298, "grad_norm": 0.460060179233551, "learning_rate": 6.252579990580254e-06, "loss": 0.08744525909423828, "step": 5320 }, { "epoch": 0.7193578369243768, "grad_norm": 0.3462483882904053, "learning_rate": 6.247020786417412e-06, "loss": 0.04702150821685791, "step": 5321 }, { "epoch": 0.7194930291508238, "grad_norm": 0.1926223337650299, "learning_rate": 6.241463404671516e-06, "loss": 0.0530247688293457, "step": 5322 }, { "epoch": 0.7196282213772708, "grad_norm": 0.3362235724925995, "learning_rate": 6.235907846499655e-06, "loss": 0.06870841979980469, "step": 5323 }, { "epoch": 0.7197634136037178, "grad_norm": 0.25347286462783813, "learning_rate": 6.230354113058505e-06, "loss": 0.063079833984375, "step": 5324 }, { "epoch": 0.7198986058301647, "grad_norm": 0.2189210206270218, "learning_rate": 6.2248022055044e-06, "loss": 0.04768264293670654, "step": 5325 }, { "epoch": 0.7200337980566117, "grad_norm": 0.30733242630958557, "learning_rate": 6.219252124993271e-06, "loss": 0.05814337730407715, "step": 5326 }, { "epoch": 0.7201689902830587, "grad_norm": 0.29368776082992554, "learning_rate": 6.213703872680668e-06, "loss": 0.07378196716308594, "step": 5327 }, { "epoch": 0.7203041825095057, "grad_norm": 0.3042683005332947, "learning_rate": 6.208157449721785e-06, "loss": 0.06443142890930176, "step": 5328 }, { "epoch": 0.7204393747359527, "grad_norm": 0.42213401198387146, "learning_rate": 6.202612857271393e-06, "loss": 0.07448077201843262, "step": 5329 }, { "epoch": 0.7205745669623996, "grad_norm": 0.41049185395240784, "learning_rate": 6.197070096483923e-06, "loss": 0.06332707405090332, "step": 5330 }, { "epoch": 0.7207097591888466, "grad_norm": 0.31218448281288147, "learning_rate": 6.191529168513403e-06, "loss": 0.06288552284240723, "step": 5331 }, { "epoch": 0.7208449514152936, "grad_norm": 0.2701214849948883, "learning_rate": 6.1859900745134755e-06, "loss": 0.057013511657714844, "step": 5332 }, { "epoch": 0.7209801436417406, "grad_norm": 0.2745994031429291, "learning_rate": 6.180452815637429e-06, "loss": 0.05720233917236328, "step": 5333 }, { "epoch": 0.7211153358681875, "grad_norm": 0.39080315828323364, "learning_rate": 6.174917393038126e-06, "loss": 0.06616663932800293, "step": 5334 }, { "epoch": 0.7212505280946345, "grad_norm": 0.3375305235385895, "learning_rate": 6.169383807868088e-06, "loss": 0.083038330078125, "step": 5335 }, { "epoch": 0.7213857203210815, "grad_norm": 0.2510044574737549, "learning_rate": 6.163852061279432e-06, "loss": 0.053519248962402344, "step": 5336 }, { "epoch": 0.7215209125475285, "grad_norm": 0.2927513122558594, "learning_rate": 6.158322154423897e-06, "loss": 0.04729747772216797, "step": 5337 }, { "epoch": 0.7216561047739755, "grad_norm": 0.5078144073486328, "learning_rate": 6.15279408845284e-06, "loss": 0.06560850143432617, "step": 5338 }, { "epoch": 0.7217912970004224, "grad_norm": 0.3834516704082489, "learning_rate": 6.147267864517226e-06, "loss": 0.05035281181335449, "step": 5339 }, { "epoch": 0.7219264892268694, "grad_norm": 0.3078523576259613, "learning_rate": 6.141743483767658e-06, "loss": 0.0785684585571289, "step": 5340 }, { "epoch": 0.7220616814533164, "grad_norm": 0.5647321343421936, "learning_rate": 6.136220947354333e-06, "loss": 0.11573410034179688, "step": 5341 }, { "epoch": 0.7221968736797634, "grad_norm": 0.3144357204437256, "learning_rate": 6.130700256427075e-06, "loss": 0.07591438293457031, "step": 5342 }, { "epoch": 0.7223320659062104, "grad_norm": 0.34056612849235535, "learning_rate": 6.1251814121353204e-06, "loss": 0.06491851806640625, "step": 5343 }, { "epoch": 0.7224672581326573, "grad_norm": 0.18165573477745056, "learning_rate": 6.1196644156281175e-06, "loss": 0.043727874755859375, "step": 5344 }, { "epoch": 0.7226024503591043, "grad_norm": 0.34186694025993347, "learning_rate": 6.114149268054143e-06, "loss": 0.07252335548400879, "step": 5345 }, { "epoch": 0.7227376425855513, "grad_norm": 0.2078217715024948, "learning_rate": 6.108635970561679e-06, "loss": 0.0397036075592041, "step": 5346 }, { "epoch": 0.7228728348119983, "grad_norm": 0.5245160460472107, "learning_rate": 6.103124524298617e-06, "loss": 0.09609150886535645, "step": 5347 }, { "epoch": 0.7230080270384452, "grad_norm": 0.4445883333683014, "learning_rate": 6.097614930412475e-06, "loss": 0.07157659530639648, "step": 5348 }, { "epoch": 0.7231432192648922, "grad_norm": 0.2465265393257141, "learning_rate": 6.092107190050371e-06, "loss": 0.04505133628845215, "step": 5349 }, { "epoch": 0.7232784114913392, "grad_norm": 0.60282963514328, "learning_rate": 6.086601304359059e-06, "loss": 0.07806289196014404, "step": 5350 }, { "epoch": 0.7234136037177862, "grad_norm": 0.1989317238330841, "learning_rate": 6.081097274484887e-06, "loss": 0.058745503425598145, "step": 5351 }, { "epoch": 0.7235487959442332, "grad_norm": 0.28250420093536377, "learning_rate": 6.075595101573825e-06, "loss": 0.043283939361572266, "step": 5352 }, { "epoch": 0.7236839881706801, "grad_norm": 0.452950119972229, "learning_rate": 6.070094786771451e-06, "loss": 0.07983648777008057, "step": 5353 }, { "epoch": 0.7238191803971271, "grad_norm": 0.23428699374198914, "learning_rate": 6.06459633122296e-06, "loss": 0.046811580657958984, "step": 5354 }, { "epoch": 0.7239543726235741, "grad_norm": 0.2537497282028198, "learning_rate": 6.059099736073166e-06, "loss": 0.07736682891845703, "step": 5355 }, { "epoch": 0.7240895648500211, "grad_norm": 0.38944104313850403, "learning_rate": 6.0536050024664865e-06, "loss": 0.05591392517089844, "step": 5356 }, { "epoch": 0.724224757076468, "grad_norm": 0.35275816917419434, "learning_rate": 6.048112131546953e-06, "loss": 0.0651397705078125, "step": 5357 }, { "epoch": 0.724359949302915, "grad_norm": 0.3773675858974457, "learning_rate": 6.0426211244582105e-06, "loss": 0.07550448179244995, "step": 5358 }, { "epoch": 0.724495141529362, "grad_norm": 0.17066816985607147, "learning_rate": 6.03713198234351e-06, "loss": 0.04315590858459473, "step": 5359 }, { "epoch": 0.724630333755809, "grad_norm": 0.27874502539634705, "learning_rate": 6.0316447063457395e-06, "loss": 0.0776357650756836, "step": 5360 }, { "epoch": 0.724765525982256, "grad_norm": 0.285234659910202, "learning_rate": 6.026159297607356e-06, "loss": 0.06747627258300781, "step": 5361 }, { "epoch": 0.724900718208703, "grad_norm": 0.25053516030311584, "learning_rate": 6.020675757270466e-06, "loss": 0.05825948715209961, "step": 5362 }, { "epoch": 0.72503591043515, "grad_norm": 0.3004065155982971, "learning_rate": 6.015194086476766e-06, "loss": 0.043872833251953125, "step": 5363 }, { "epoch": 0.725171102661597, "grad_norm": 0.33169230818748474, "learning_rate": 6.009714286367565e-06, "loss": 0.06735634803771973, "step": 5364 }, { "epoch": 0.725306294888044, "grad_norm": 0.40811291337013245, "learning_rate": 6.004236358083802e-06, "loss": 0.07512474060058594, "step": 5365 }, { "epoch": 0.725441487114491, "grad_norm": 0.35243967175483704, "learning_rate": 5.998760302765989e-06, "loss": 0.06746244430541992, "step": 5366 }, { "epoch": 0.725576679340938, "grad_norm": 0.3048992156982422, "learning_rate": 5.993286121554289e-06, "loss": 0.05666673183441162, "step": 5367 }, { "epoch": 0.7257118715673849, "grad_norm": 0.3580392003059387, "learning_rate": 5.987813815588447e-06, "loss": 0.0666208267211914, "step": 5368 }, { "epoch": 0.7258470637938319, "grad_norm": 0.37930622696876526, "learning_rate": 5.982343386007827e-06, "loss": 0.08964824676513672, "step": 5369 }, { "epoch": 0.7259822560202789, "grad_norm": 0.2211666852235794, "learning_rate": 5.976874833951404e-06, "loss": 0.050545692443847656, "step": 5370 }, { "epoch": 0.7261174482467259, "grad_norm": 0.24112331867218018, "learning_rate": 5.971408160557751e-06, "loss": 0.05179929733276367, "step": 5371 }, { "epoch": 0.7262526404731728, "grad_norm": 0.3979681432247162, "learning_rate": 5.965943366965069e-06, "loss": 0.08137750625610352, "step": 5372 }, { "epoch": 0.7263878326996198, "grad_norm": 0.21101003885269165, "learning_rate": 5.960480454311155e-06, "loss": 0.05811119079589844, "step": 5373 }, { "epoch": 0.7265230249260668, "grad_norm": 0.32828155159950256, "learning_rate": 5.955019423733416e-06, "loss": 0.08378124237060547, "step": 5374 }, { "epoch": 0.7266582171525138, "grad_norm": 0.4171237349510193, "learning_rate": 5.949560276368866e-06, "loss": 0.09240245819091797, "step": 5375 }, { "epoch": 0.7267934093789608, "grad_norm": 0.24032852053642273, "learning_rate": 5.9441030133541235e-06, "loss": 0.07701969146728516, "step": 5376 }, { "epoch": 0.7269286016054077, "grad_norm": 0.24201998114585876, "learning_rate": 5.938647635825432e-06, "loss": 0.05319392681121826, "step": 5377 }, { "epoch": 0.7270637938318547, "grad_norm": 0.332367867231369, "learning_rate": 5.933194144918623e-06, "loss": 0.060341596603393555, "step": 5378 }, { "epoch": 0.7271989860583017, "grad_norm": 0.3521791100502014, "learning_rate": 5.927742541769142e-06, "loss": 0.04864215850830078, "step": 5379 }, { "epoch": 0.7273341782847487, "grad_norm": 0.35975217819213867, "learning_rate": 5.9222928275120445e-06, "loss": 0.05352163314819336, "step": 5380 }, { "epoch": 0.7274693705111956, "grad_norm": 0.2611422836780548, "learning_rate": 5.916845003281983e-06, "loss": 0.061258792877197266, "step": 5381 }, { "epoch": 0.7276045627376426, "grad_norm": 0.3710670471191406, "learning_rate": 5.911399070213234e-06, "loss": 0.07865190505981445, "step": 5382 }, { "epoch": 0.7277397549640896, "grad_norm": 0.25374647974967957, "learning_rate": 5.905955029439665e-06, "loss": 0.06693243980407715, "step": 5383 }, { "epoch": 0.7278749471905366, "grad_norm": 0.688390851020813, "learning_rate": 5.900512882094754e-06, "loss": 0.10462188720703125, "step": 5384 }, { "epoch": 0.7280101394169836, "grad_norm": 0.3552984595298767, "learning_rate": 5.8950726293115855e-06, "loss": 0.059075117111206055, "step": 5385 }, { "epoch": 0.7281453316434305, "grad_norm": 0.28323695063591003, "learning_rate": 5.889634272222844e-06, "loss": 0.07794666290283203, "step": 5386 }, { "epoch": 0.7282805238698775, "grad_norm": 0.2585649788379669, "learning_rate": 5.8841978119608345e-06, "loss": 0.04220008850097656, "step": 5387 }, { "epoch": 0.7284157160963245, "grad_norm": 0.27961984276771545, "learning_rate": 5.878763249657452e-06, "loss": 0.050080060958862305, "step": 5388 }, { "epoch": 0.7285509083227715, "grad_norm": 0.30263832211494446, "learning_rate": 5.873330586444202e-06, "loss": 0.05263853073120117, "step": 5389 }, { "epoch": 0.7286861005492185, "grad_norm": 0.2795065939426422, "learning_rate": 5.867899823452193e-06, "loss": 0.0766400694847107, "step": 5390 }, { "epoch": 0.7288212927756654, "grad_norm": 0.3725234270095825, "learning_rate": 5.862470961812133e-06, "loss": 0.057196617126464844, "step": 5391 }, { "epoch": 0.7289564850021124, "grad_norm": 0.30894455313682556, "learning_rate": 5.857044002654357e-06, "loss": 0.06757664680480957, "step": 5392 }, { "epoch": 0.7290916772285594, "grad_norm": 0.22640785574913025, "learning_rate": 5.851618947108764e-06, "loss": 0.04272150993347168, "step": 5393 }, { "epoch": 0.7292268694550064, "grad_norm": 0.23192906379699707, "learning_rate": 5.8461957963048984e-06, "loss": 0.06345391273498535, "step": 5394 }, { "epoch": 0.7293620616814533, "grad_norm": 0.24933519959449768, "learning_rate": 5.840774551371882e-06, "loss": 0.07121109962463379, "step": 5395 }, { "epoch": 0.7294972539079003, "grad_norm": 0.3676450848579407, "learning_rate": 5.8353552134384405e-06, "loss": 0.07812178134918213, "step": 5396 }, { "epoch": 0.7296324461343473, "grad_norm": 0.32973673939704895, "learning_rate": 5.829937783632926e-06, "loss": 0.076987624168396, "step": 5397 }, { "epoch": 0.7297676383607943, "grad_norm": 0.5101771354675293, "learning_rate": 5.824522263083256e-06, "loss": 0.06301406025886536, "step": 5398 }, { "epoch": 0.7299028305872413, "grad_norm": 0.4950540065765381, "learning_rate": 5.8191086529169855e-06, "loss": 0.051572561264038086, "step": 5399 }, { "epoch": 0.7300380228136882, "grad_norm": 0.3083554208278656, "learning_rate": 5.813696954261253e-06, "loss": 0.06078815460205078, "step": 5400 }, { "epoch": 0.7301732150401352, "grad_norm": 0.323513001203537, "learning_rate": 5.8082871682428e-06, "loss": 0.07897329330444336, "step": 5401 }, { "epoch": 0.7303084072665822, "grad_norm": 0.2907189726829529, "learning_rate": 5.802879295987975e-06, "loss": 0.057209014892578125, "step": 5402 }, { "epoch": 0.7304435994930292, "grad_norm": 0.35615667700767517, "learning_rate": 5.797473338622722e-06, "loss": 0.06593179702758789, "step": 5403 }, { "epoch": 0.7305787917194762, "grad_norm": 0.3863239586353302, "learning_rate": 5.792069297272599e-06, "loss": 0.08330130577087402, "step": 5404 }, { "epoch": 0.7307139839459231, "grad_norm": 0.13111256062984467, "learning_rate": 5.7866671730627485e-06, "loss": 0.044106483459472656, "step": 5405 }, { "epoch": 0.7308491761723701, "grad_norm": 0.47994399070739746, "learning_rate": 5.781266967117925e-06, "loss": 0.07491302490234375, "step": 5406 }, { "epoch": 0.7309843683988171, "grad_norm": 0.3283345103263855, "learning_rate": 5.7758686805624815e-06, "loss": 0.0642390251159668, "step": 5407 }, { "epoch": 0.7311195606252641, "grad_norm": 0.26693981885910034, "learning_rate": 5.7704723145203605e-06, "loss": 0.07425355911254883, "step": 5408 }, { "epoch": 0.731254752851711, "grad_norm": 0.28047069907188416, "learning_rate": 5.765077870115126e-06, "loss": 0.05912092328071594, "step": 5409 }, { "epoch": 0.731389945078158, "grad_norm": 0.3144618272781372, "learning_rate": 5.759685348469928e-06, "loss": 0.07192420959472656, "step": 5410 }, { "epoch": 0.731525137304605, "grad_norm": 0.2807704508304596, "learning_rate": 5.754294750707514e-06, "loss": 0.05621659755706787, "step": 5411 }, { "epoch": 0.731660329531052, "grad_norm": 0.19868092238903046, "learning_rate": 5.748906077950237e-06, "loss": 0.04578137397766113, "step": 5412 }, { "epoch": 0.731795521757499, "grad_norm": 0.3376769423484802, "learning_rate": 5.743519331320042e-06, "loss": 0.0765380859375, "step": 5413 }, { "epoch": 0.7319307139839459, "grad_norm": 0.2248193621635437, "learning_rate": 5.73813451193849e-06, "loss": 0.058924734592437744, "step": 5414 }, { "epoch": 0.7320659062103929, "grad_norm": 0.3640033006668091, "learning_rate": 5.7327516209267225e-06, "loss": 0.08541679382324219, "step": 5415 }, { "epoch": 0.7322010984368399, "grad_norm": 0.18708643317222595, "learning_rate": 5.727370659405486e-06, "loss": 0.046230435371398926, "step": 5416 }, { "epoch": 0.7323362906632869, "grad_norm": 0.22921724617481232, "learning_rate": 5.7219916284951265e-06, "loss": 0.07297617197036743, "step": 5417 }, { "epoch": 0.7324714828897338, "grad_norm": 0.40217429399490356, "learning_rate": 5.716614529315582e-06, "loss": 0.0736227035522461, "step": 5418 }, { "epoch": 0.7326066751161808, "grad_norm": 0.40664488077163696, "learning_rate": 5.711239362986401e-06, "loss": 0.06919479370117188, "step": 5419 }, { "epoch": 0.7327418673426278, "grad_norm": 0.19880792498588562, "learning_rate": 5.705866130626719e-06, "loss": 0.03999662399291992, "step": 5420 }, { "epoch": 0.7328770595690748, "grad_norm": 0.3941669464111328, "learning_rate": 5.700494833355271e-06, "loss": 0.08850431442260742, "step": 5421 }, { "epoch": 0.7330122517955218, "grad_norm": 0.3398006558418274, "learning_rate": 5.69512547229039e-06, "loss": 0.0892648696899414, "step": 5422 }, { "epoch": 0.7331474440219687, "grad_norm": 0.4490032494068146, "learning_rate": 5.689758048550001e-06, "loss": 0.08185863494873047, "step": 5423 }, { "epoch": 0.7332826362484157, "grad_norm": 0.23250049352645874, "learning_rate": 5.684392563251644e-06, "loss": 0.04620325565338135, "step": 5424 }, { "epoch": 0.7334178284748627, "grad_norm": 0.3230533003807068, "learning_rate": 5.679029017512422e-06, "loss": 0.07272601127624512, "step": 5425 }, { "epoch": 0.7335530207013097, "grad_norm": 0.465389609336853, "learning_rate": 5.6736674124490684e-06, "loss": 0.10321903228759766, "step": 5426 }, { "epoch": 0.7336882129277567, "grad_norm": 0.2790832221508026, "learning_rate": 5.6683077491778935e-06, "loss": 0.08725085854530334, "step": 5427 }, { "epoch": 0.7338234051542036, "grad_norm": 0.14180271327495575, "learning_rate": 5.6629500288148044e-06, "loss": 0.034332990646362305, "step": 5428 }, { "epoch": 0.7339585973806506, "grad_norm": 0.197455495595932, "learning_rate": 5.657594252475319e-06, "loss": 0.03261923789978027, "step": 5429 }, { "epoch": 0.7340937896070976, "grad_norm": 0.33095598220825195, "learning_rate": 5.652240421274521e-06, "loss": 0.06939244270324707, "step": 5430 }, { "epoch": 0.7342289818335446, "grad_norm": 0.27738887071609497, "learning_rate": 5.646888536327121e-06, "loss": 0.08073282241821289, "step": 5431 }, { "epoch": 0.7343641740599915, "grad_norm": 0.26231980323791504, "learning_rate": 5.641538598747403e-06, "loss": 0.059847116470336914, "step": 5432 }, { "epoch": 0.7344993662864385, "grad_norm": 0.2408897876739502, "learning_rate": 5.6361906096492495e-06, "loss": 0.05955934524536133, "step": 5433 }, { "epoch": 0.7346345585128855, "grad_norm": 0.1614508181810379, "learning_rate": 5.630844570146157e-06, "loss": 0.03942972421646118, "step": 5434 }, { "epoch": 0.7347697507393325, "grad_norm": 0.22522589564323425, "learning_rate": 5.625500481351176e-06, "loss": 0.052280426025390625, "step": 5435 }, { "epoch": 0.7349049429657795, "grad_norm": 0.313833624124527, "learning_rate": 5.6201583443769895e-06, "loss": 0.06171607971191406, "step": 5436 }, { "epoch": 0.7350401351922264, "grad_norm": 0.36819571256637573, "learning_rate": 5.614818160335857e-06, "loss": 0.06548786163330078, "step": 5437 }, { "epoch": 0.7351753274186734, "grad_norm": 0.5532251596450806, "learning_rate": 5.6094799303396315e-06, "loss": 0.08300328254699707, "step": 5438 }, { "epoch": 0.7353105196451204, "grad_norm": 0.30740243196487427, "learning_rate": 5.6041436554997595e-06, "loss": 0.06314802169799805, "step": 5439 }, { "epoch": 0.7354457118715674, "grad_norm": 0.11969193816184998, "learning_rate": 5.598809336927278e-06, "loss": 0.03349733352661133, "step": 5440 }, { "epoch": 0.7355809040980144, "grad_norm": 0.4020923376083374, "learning_rate": 5.5934769757328325e-06, "loss": 0.0655984878540039, "step": 5441 }, { "epoch": 0.7357160963244613, "grad_norm": 0.4389861822128296, "learning_rate": 5.588146573026642e-06, "loss": 0.07568025588989258, "step": 5442 }, { "epoch": 0.7358512885509083, "grad_norm": 0.2634657323360443, "learning_rate": 5.582818129918525e-06, "loss": 0.0838460922241211, "step": 5443 }, { "epoch": 0.7359864807773553, "grad_norm": 0.5413923263549805, "learning_rate": 5.5774916475178915e-06, "loss": 0.09250211715698242, "step": 5444 }, { "epoch": 0.7361216730038023, "grad_norm": 0.46186718344688416, "learning_rate": 5.572167126933738e-06, "loss": 0.06212717294692993, "step": 5445 }, { "epoch": 0.7362568652302492, "grad_norm": 0.41852033138275146, "learning_rate": 5.566844569274669e-06, "loss": 0.09307289123535156, "step": 5446 }, { "epoch": 0.7363920574566962, "grad_norm": 0.2844407260417938, "learning_rate": 5.5615239756488665e-06, "loss": 0.07479667663574219, "step": 5447 }, { "epoch": 0.7365272496831432, "grad_norm": 0.42158037424087524, "learning_rate": 5.556205347164104e-06, "loss": 0.0788877010345459, "step": 5448 }, { "epoch": 0.7366624419095902, "grad_norm": 0.2536088526248932, "learning_rate": 5.550888684927746e-06, "loss": 0.052857398986816406, "step": 5449 }, { "epoch": 0.7367976341360372, "grad_norm": 0.6148597002029419, "learning_rate": 5.545573990046752e-06, "loss": 0.08426094055175781, "step": 5450 }, { "epoch": 0.7369328263624841, "grad_norm": 0.44636231660842896, "learning_rate": 5.540261263627672e-06, "loss": 0.05566573143005371, "step": 5451 }, { "epoch": 0.7370680185889311, "grad_norm": 0.3422273099422455, "learning_rate": 5.534950506776644e-06, "loss": 0.07265329360961914, "step": 5452 }, { "epoch": 0.7372032108153781, "grad_norm": 0.34439584612846375, "learning_rate": 5.529641720599393e-06, "loss": 0.06811285018920898, "step": 5453 }, { "epoch": 0.7373384030418251, "grad_norm": 0.5353866815567017, "learning_rate": 5.52433490620124e-06, "loss": 0.08996915817260742, "step": 5454 }, { "epoch": 0.737473595268272, "grad_norm": 0.24170364439487457, "learning_rate": 5.519030064687082e-06, "loss": 0.057979583740234375, "step": 5455 }, { "epoch": 0.737608787494719, "grad_norm": 0.35214370489120483, "learning_rate": 5.51372719716143e-06, "loss": 0.06314373016357422, "step": 5456 }, { "epoch": 0.737743979721166, "grad_norm": 0.38871315121650696, "learning_rate": 5.508426304728363e-06, "loss": 0.0693821907043457, "step": 5457 }, { "epoch": 0.737879171947613, "grad_norm": 0.31814950704574585, "learning_rate": 5.503127388491552e-06, "loss": 0.06226396560668945, "step": 5458 }, { "epoch": 0.73801436417406, "grad_norm": 0.4149168133735657, "learning_rate": 5.497830449554266e-06, "loss": 0.08545207977294922, "step": 5459 }, { "epoch": 0.738149556400507, "grad_norm": 0.4107033908367157, "learning_rate": 5.492535489019344e-06, "loss": 0.057828426361083984, "step": 5460 }, { "epoch": 0.7382847486269539, "grad_norm": 0.545863151550293, "learning_rate": 5.4872425079892454e-06, "loss": 0.08362340927124023, "step": 5461 }, { "epoch": 0.7384199408534009, "grad_norm": 0.5547133088111877, "learning_rate": 5.481951507565973e-06, "loss": 0.08692812919616699, "step": 5462 }, { "epoch": 0.7385551330798479, "grad_norm": 0.40846455097198486, "learning_rate": 5.476662488851159e-06, "loss": 0.06932711601257324, "step": 5463 }, { "epoch": 0.7386903253062949, "grad_norm": 0.18166811764240265, "learning_rate": 5.471375452946e-06, "loss": 0.05360269546508789, "step": 5464 }, { "epoch": 0.7388255175327418, "grad_norm": 0.3002277612686157, "learning_rate": 5.466090400951279e-06, "loss": 0.0629281997680664, "step": 5465 }, { "epoch": 0.7389607097591888, "grad_norm": 0.2697925269603729, "learning_rate": 5.460807333967387e-06, "loss": 0.0595545768737793, "step": 5466 }, { "epoch": 0.7390959019856358, "grad_norm": 0.3708248734474182, "learning_rate": 5.455526253094267e-06, "loss": 0.06967902183532715, "step": 5467 }, { "epoch": 0.7392310942120828, "grad_norm": 0.29452773928642273, "learning_rate": 5.450247159431486e-06, "loss": 0.03252696990966797, "step": 5468 }, { "epoch": 0.7393662864385298, "grad_norm": 0.4548581540584564, "learning_rate": 5.44497005407817e-06, "loss": 0.08553218841552734, "step": 5469 }, { "epoch": 0.7395014786649767, "grad_norm": 0.3221522867679596, "learning_rate": 5.439694938133042e-06, "loss": 0.09247910976409912, "step": 5470 }, { "epoch": 0.7396366708914237, "grad_norm": 0.7349978685379028, "learning_rate": 5.434421812694409e-06, "loss": 0.08335566520690918, "step": 5471 }, { "epoch": 0.7397718631178707, "grad_norm": 0.32768312096595764, "learning_rate": 5.4291506788601624e-06, "loss": 0.04716801643371582, "step": 5472 }, { "epoch": 0.7399070553443177, "grad_norm": 0.4047926664352417, "learning_rate": 5.423881537727785e-06, "loss": 0.08875751495361328, "step": 5473 }, { "epoch": 0.7400422475707646, "grad_norm": 0.302165687084198, "learning_rate": 5.418614390394338e-06, "loss": 0.05476647615432739, "step": 5474 }, { "epoch": 0.7401774397972116, "grad_norm": 0.6546356678009033, "learning_rate": 5.413349237956469e-06, "loss": 0.10552167892456055, "step": 5475 }, { "epoch": 0.7403126320236586, "grad_norm": 0.586347222328186, "learning_rate": 5.4080860815104125e-06, "loss": 0.10285234451293945, "step": 5476 }, { "epoch": 0.7404478242501056, "grad_norm": 0.3252721428871155, "learning_rate": 5.402824922151977e-06, "loss": 0.05477714538574219, "step": 5477 }, { "epoch": 0.7405830164765526, "grad_norm": 0.30552777647972107, "learning_rate": 5.397565760976577e-06, "loss": 0.0667119026184082, "step": 5478 }, { "epoch": 0.7407182087029995, "grad_norm": 0.40809082984924316, "learning_rate": 5.392308599079193e-06, "loss": 0.07940340042114258, "step": 5479 }, { "epoch": 0.7408534009294465, "grad_norm": 0.26088157296180725, "learning_rate": 5.3870534375543916e-06, "loss": 0.0644269585609436, "step": 5480 }, { "epoch": 0.7409885931558935, "grad_norm": 0.3476581275463104, "learning_rate": 5.381800277496328e-06, "loss": 0.0823659896850586, "step": 5481 }, { "epoch": 0.7411237853823405, "grad_norm": 0.35466960072517395, "learning_rate": 5.376549119998731e-06, "loss": 0.06915223598480225, "step": 5482 }, { "epoch": 0.7412589776087875, "grad_norm": 0.3833767771720886, "learning_rate": 5.3712999661549314e-06, "loss": 0.07703423500061035, "step": 5483 }, { "epoch": 0.7413941698352344, "grad_norm": 0.36239394545555115, "learning_rate": 5.366052817057826e-06, "loss": 0.06222820281982422, "step": 5484 }, { "epoch": 0.7415293620616814, "grad_norm": 0.40474584698677063, "learning_rate": 5.360807673799899e-06, "loss": 0.06822347640991211, "step": 5485 }, { "epoch": 0.7416645542881284, "grad_norm": 0.3074905276298523, "learning_rate": 5.355564537473214e-06, "loss": 0.07065606117248535, "step": 5486 }, { "epoch": 0.7417997465145754, "grad_norm": 0.3259347081184387, "learning_rate": 5.35032340916942e-06, "loss": 0.07717752456665039, "step": 5487 }, { "epoch": 0.7419349387410223, "grad_norm": 0.3179175555706024, "learning_rate": 5.345084289979755e-06, "loss": 0.05710721015930176, "step": 5488 }, { "epoch": 0.7420701309674693, "grad_norm": 0.30644139647483826, "learning_rate": 5.339847180995026e-06, "loss": 0.04585671424865723, "step": 5489 }, { "epoch": 0.7422053231939163, "grad_norm": 0.25662174820899963, "learning_rate": 5.33461208330563e-06, "loss": 0.04766833782196045, "step": 5490 }, { "epoch": 0.7423405154203633, "grad_norm": 0.3106755018234253, "learning_rate": 5.32937899800154e-06, "loss": 0.0757284164428711, "step": 5491 }, { "epoch": 0.7424757076468103, "grad_norm": 0.322922945022583, "learning_rate": 5.324147926172307e-06, "loss": 0.07869482040405273, "step": 5492 }, { "epoch": 0.7426108998732572, "grad_norm": 0.26171329617500305, "learning_rate": 5.318918868907084e-06, "loss": 0.06150388717651367, "step": 5493 }, { "epoch": 0.7427460920997042, "grad_norm": 0.21698588132858276, "learning_rate": 5.313691827294568e-06, "loss": 0.05281102657318115, "step": 5494 }, { "epoch": 0.7428812843261512, "grad_norm": 0.25994858145713806, "learning_rate": 5.308466802423072e-06, "loss": 0.05610466003417969, "step": 5495 }, { "epoch": 0.7430164765525982, "grad_norm": 0.3296898305416107, "learning_rate": 5.303243795380471e-06, "loss": 0.06893157958984375, "step": 5496 }, { "epoch": 0.7431516687790453, "grad_norm": 0.46212247014045715, "learning_rate": 5.298022807254215e-06, "loss": 0.07411670684814453, "step": 5497 }, { "epoch": 0.7432868610054922, "grad_norm": 0.24345175921916962, "learning_rate": 5.292803839131358e-06, "loss": 0.05167675018310547, "step": 5498 }, { "epoch": 0.7434220532319392, "grad_norm": 0.29237526655197144, "learning_rate": 5.287586892098496e-06, "loss": 0.06631326675415039, "step": 5499 }, { "epoch": 0.7435572454583862, "grad_norm": 0.3614571690559387, "learning_rate": 5.282371967241842e-06, "loss": 0.09359931945800781, "step": 5500 }, { "epoch": 0.7436924376848332, "grad_norm": 0.2692449986934662, "learning_rate": 5.277159065647164e-06, "loss": 0.06752258539199829, "step": 5501 }, { "epoch": 0.7438276299112802, "grad_norm": 1.1080949306488037, "learning_rate": 5.271948188399814e-06, "loss": 0.09859180450439453, "step": 5502 }, { "epoch": 0.7439628221377271, "grad_norm": 0.351872980594635, "learning_rate": 5.266739336584735e-06, "loss": 0.07297849655151367, "step": 5503 }, { "epoch": 0.7440980143641741, "grad_norm": 0.46755194664001465, "learning_rate": 5.261532511286422e-06, "loss": 0.07714438438415527, "step": 5504 }, { "epoch": 0.7442332065906211, "grad_norm": 0.4388406276702881, "learning_rate": 5.256327713588977e-06, "loss": 0.0654829740524292, "step": 5505 }, { "epoch": 0.7443683988170681, "grad_norm": 0.6473273634910583, "learning_rate": 5.25112494457606e-06, "loss": 0.07860946655273438, "step": 5506 }, { "epoch": 0.744503591043515, "grad_norm": 0.5071631073951721, "learning_rate": 5.245924205330919e-06, "loss": 0.07981467247009277, "step": 5507 }, { "epoch": 0.744638783269962, "grad_norm": 0.2399771809577942, "learning_rate": 5.240725496936373e-06, "loss": 0.0605168342590332, "step": 5508 }, { "epoch": 0.744773975496409, "grad_norm": 0.2920646369457245, "learning_rate": 5.2355288204748145e-06, "loss": 0.062044739723205566, "step": 5509 }, { "epoch": 0.744909167722856, "grad_norm": 0.4028262197971344, "learning_rate": 5.230334177028233e-06, "loss": 0.06833958625793457, "step": 5510 }, { "epoch": 0.745044359949303, "grad_norm": 0.4029131233692169, "learning_rate": 5.2251415676781726e-06, "loss": 0.07670450210571289, "step": 5511 }, { "epoch": 0.7451795521757499, "grad_norm": 0.3253644108772278, "learning_rate": 5.2199509935057655e-06, "loss": 0.058380842208862305, "step": 5512 }, { "epoch": 0.7453147444021969, "grad_norm": 0.1138840913772583, "learning_rate": 5.214762455591713e-06, "loss": 0.028551578521728516, "step": 5513 }, { "epoch": 0.7454499366286439, "grad_norm": 0.2654602825641632, "learning_rate": 5.209575955016295e-06, "loss": 0.07141876220703125, "step": 5514 }, { "epoch": 0.7455851288550909, "grad_norm": 0.2957044839859009, "learning_rate": 5.204391492859377e-06, "loss": 0.06686973571777344, "step": 5515 }, { "epoch": 0.7457203210815379, "grad_norm": 0.3932427167892456, "learning_rate": 5.199209070200388e-06, "loss": 0.07320308685302734, "step": 5516 }, { "epoch": 0.7458555133079848, "grad_norm": 0.19216471910476685, "learning_rate": 5.194028688118332e-06, "loss": 0.04555559158325195, "step": 5517 }, { "epoch": 0.7459907055344318, "grad_norm": 0.23753167688846588, "learning_rate": 5.188850347691797e-06, "loss": 0.05741691589355469, "step": 5518 }, { "epoch": 0.7461258977608788, "grad_norm": 0.4030016362667084, "learning_rate": 5.183674049998934e-06, "loss": 0.07119739055633545, "step": 5519 }, { "epoch": 0.7462610899873258, "grad_norm": 0.29846253991127014, "learning_rate": 5.178499796117485e-06, "loss": 0.06371355056762695, "step": 5520 }, { "epoch": 0.7463962822137727, "grad_norm": 0.25891730189323425, "learning_rate": 5.173327587124753e-06, "loss": 0.06730508804321289, "step": 5521 }, { "epoch": 0.7465314744402197, "grad_norm": 0.35525593161582947, "learning_rate": 5.16815742409762e-06, "loss": 0.08658409118652344, "step": 5522 }, { "epoch": 0.7466666666666667, "grad_norm": 0.19685225188732147, "learning_rate": 5.16298930811254e-06, "loss": 0.05870342254638672, "step": 5523 }, { "epoch": 0.7468018588931137, "grad_norm": 0.3284025192260742, "learning_rate": 5.15782324024554e-06, "loss": 0.06280958652496338, "step": 5524 }, { "epoch": 0.7469370511195607, "grad_norm": 0.3075641393661499, "learning_rate": 5.152659221572231e-06, "loss": 0.05977821350097656, "step": 5525 }, { "epoch": 0.7470722433460076, "grad_norm": 0.25582441687583923, "learning_rate": 5.147497253167784e-06, "loss": 0.057166099548339844, "step": 5526 }, { "epoch": 0.7472074355724546, "grad_norm": 0.5908964276313782, "learning_rate": 5.142337336106948e-06, "loss": 0.09470438957214355, "step": 5527 }, { "epoch": 0.7473426277989016, "grad_norm": 0.3191787004470825, "learning_rate": 5.137179471464047e-06, "loss": 0.06888628005981445, "step": 5528 }, { "epoch": 0.7474778200253486, "grad_norm": 0.32175543904304504, "learning_rate": 5.13202366031297e-06, "loss": 0.06878399848937988, "step": 5529 }, { "epoch": 0.7476130122517956, "grad_norm": 0.3113773763179779, "learning_rate": 5.1268699037272e-06, "loss": 0.06818485260009766, "step": 5530 }, { "epoch": 0.7477482044782425, "grad_norm": 0.2162005603313446, "learning_rate": 5.121718202779756e-06, "loss": 0.05885887145996094, "step": 5531 }, { "epoch": 0.7478833967046895, "grad_norm": 0.4877583086490631, "learning_rate": 5.116568558543264e-06, "loss": 0.09424686431884766, "step": 5532 }, { "epoch": 0.7480185889311365, "grad_norm": 0.31521716713905334, "learning_rate": 5.1114209720899025e-06, "loss": 0.06360030174255371, "step": 5533 }, { "epoch": 0.7481537811575835, "grad_norm": 0.12664315104484558, "learning_rate": 5.106275444491423e-06, "loss": 0.025168538093566895, "step": 5534 }, { "epoch": 0.7482889733840304, "grad_norm": 0.36357641220092773, "learning_rate": 5.101131976819165e-06, "loss": 0.07224273681640625, "step": 5535 }, { "epoch": 0.7484241656104774, "grad_norm": 0.2043297439813614, "learning_rate": 5.095990570144008e-06, "loss": 0.054151058197021484, "step": 5536 }, { "epoch": 0.7485593578369244, "grad_norm": 0.2542010247707367, "learning_rate": 5.090851225536432e-06, "loss": 0.037700653076171875, "step": 5537 }, { "epoch": 0.7486945500633714, "grad_norm": 0.13683252036571503, "learning_rate": 5.085713944066474e-06, "loss": 0.0431215763092041, "step": 5538 }, { "epoch": 0.7488297422898184, "grad_norm": 0.3221246004104614, "learning_rate": 5.080578726803741e-06, "loss": 0.059185028076171875, "step": 5539 }, { "epoch": 0.7489649345162653, "grad_norm": 0.2807500660419464, "learning_rate": 5.075445574817415e-06, "loss": 0.07166194915771484, "step": 5540 }, { "epoch": 0.7491001267427123, "grad_norm": 0.4837017357349396, "learning_rate": 5.07031448917624e-06, "loss": 0.07048225402832031, "step": 5541 }, { "epoch": 0.7492353189691593, "grad_norm": 0.30621686577796936, "learning_rate": 5.065185470948544e-06, "loss": 0.07455205917358398, "step": 5542 }, { "epoch": 0.7493705111956063, "grad_norm": 0.5735632181167603, "learning_rate": 5.060058521202211e-06, "loss": 0.07740139961242676, "step": 5543 }, { "epoch": 0.7495057034220532, "grad_norm": 0.38835254311561584, "learning_rate": 5.054933641004703e-06, "loss": 0.08188867568969727, "step": 5544 }, { "epoch": 0.7496408956485002, "grad_norm": 0.3186468183994293, "learning_rate": 5.0498108314230425e-06, "loss": 0.07472836971282959, "step": 5545 }, { "epoch": 0.7497760878749472, "grad_norm": 0.23334920406341553, "learning_rate": 5.044690093523823e-06, "loss": 0.049179792404174805, "step": 5546 }, { "epoch": 0.7499112801013942, "grad_norm": 0.2741117775440216, "learning_rate": 5.039571428373219e-06, "loss": 0.05063951015472412, "step": 5547 }, { "epoch": 0.7500464723278412, "grad_norm": 0.3651045560836792, "learning_rate": 5.034454837036959e-06, "loss": 0.06481456756591797, "step": 5548 }, { "epoch": 0.7501816645542881, "grad_norm": 0.29171857237815857, "learning_rate": 5.0293403205803455e-06, "loss": 0.06099820137023926, "step": 5549 }, { "epoch": 0.7503168567807351, "grad_norm": 0.33192285895347595, "learning_rate": 5.024227880068247e-06, "loss": 0.058637142181396484, "step": 5550 }, { "epoch": 0.7504520490071821, "grad_norm": 0.2700657248497009, "learning_rate": 5.019117516565096e-06, "loss": 0.04500782489776611, "step": 5551 }, { "epoch": 0.7505872412336291, "grad_norm": 0.23167628049850464, "learning_rate": 5.014009231134908e-06, "loss": 0.0492253303527832, "step": 5552 }, { "epoch": 0.750722433460076, "grad_norm": 0.3039982318878174, "learning_rate": 5.008903024841248e-06, "loss": 0.06557083129882812, "step": 5553 }, { "epoch": 0.750857625686523, "grad_norm": 0.5059700608253479, "learning_rate": 5.0037988987472595e-06, "loss": 0.07209920883178711, "step": 5554 }, { "epoch": 0.75099281791297, "grad_norm": 0.3052729070186615, "learning_rate": 4.998696853915646e-06, "loss": 0.059540748596191406, "step": 5555 }, { "epoch": 0.751128010139417, "grad_norm": 0.3321710526943207, "learning_rate": 4.993596891408676e-06, "loss": 0.07263684272766113, "step": 5556 }, { "epoch": 0.751263202365864, "grad_norm": 0.47605693340301514, "learning_rate": 4.988499012288198e-06, "loss": 0.0726243257522583, "step": 5557 }, { "epoch": 0.751398394592311, "grad_norm": 0.11801926791667938, "learning_rate": 4.983403217615614e-06, "loss": 0.033270835876464844, "step": 5558 }, { "epoch": 0.7515335868187579, "grad_norm": 0.5052714943885803, "learning_rate": 4.978309508451896e-06, "loss": 0.09722709655761719, "step": 5559 }, { "epoch": 0.7516687790452049, "grad_norm": 0.25686025619506836, "learning_rate": 4.973217885857578e-06, "loss": 0.053447723388671875, "step": 5560 }, { "epoch": 0.7518039712716519, "grad_norm": 0.26720160245895386, "learning_rate": 4.968128350892763e-06, "loss": 0.058026671409606934, "step": 5561 }, { "epoch": 0.7519391634980989, "grad_norm": 0.34509679675102234, "learning_rate": 4.963040904617131e-06, "loss": 0.0823512077331543, "step": 5562 }, { "epoch": 0.7520743557245458, "grad_norm": 0.2777736485004425, "learning_rate": 4.9579555480898955e-06, "loss": 0.0454862117767334, "step": 5563 }, { "epoch": 0.7522095479509928, "grad_norm": 0.22768519818782806, "learning_rate": 4.952872282369873e-06, "loss": 0.04216265678405762, "step": 5564 }, { "epoch": 0.7523447401774398, "grad_norm": 0.8205633759498596, "learning_rate": 4.947791108515417e-06, "loss": 0.04407358169555664, "step": 5565 }, { "epoch": 0.7524799324038868, "grad_norm": 0.4990426301956177, "learning_rate": 4.942712027584453e-06, "loss": 0.06133723258972168, "step": 5566 }, { "epoch": 0.7526151246303338, "grad_norm": 0.3296782970428467, "learning_rate": 4.937635040634485e-06, "loss": 0.07139110565185547, "step": 5567 }, { "epoch": 0.7527503168567807, "grad_norm": 0.25605350732803345, "learning_rate": 4.9325601487225545e-06, "loss": 0.05712485313415527, "step": 5568 }, { "epoch": 0.7528855090832277, "grad_norm": 0.4864124357700348, "learning_rate": 4.927487352905289e-06, "loss": 0.08876609802246094, "step": 5569 }, { "epoch": 0.7530207013096747, "grad_norm": 0.26311105489730835, "learning_rate": 4.92241665423887e-06, "loss": 0.04821968078613281, "step": 5570 }, { "epoch": 0.7531558935361217, "grad_norm": 0.38874098658561707, "learning_rate": 4.917348053779039e-06, "loss": 0.06451725959777832, "step": 5571 }, { "epoch": 0.7532910857625686, "grad_norm": 0.262574702501297, "learning_rate": 4.912281552581122e-06, "loss": 0.05056285858154297, "step": 5572 }, { "epoch": 0.7534262779890156, "grad_norm": 0.36956506967544556, "learning_rate": 4.907217151699969e-06, "loss": 0.05255019664764404, "step": 5573 }, { "epoch": 0.7535614702154626, "grad_norm": 0.1910557746887207, "learning_rate": 4.9021548521900305e-06, "loss": 0.03656738996505737, "step": 5574 }, { "epoch": 0.7536966624419096, "grad_norm": 0.3456321954727173, "learning_rate": 4.8970946551053005e-06, "loss": 0.0818791389465332, "step": 5575 }, { "epoch": 0.7538318546683566, "grad_norm": 0.3904494047164917, "learning_rate": 4.892036561499339e-06, "loss": 0.06348228454589844, "step": 5576 }, { "epoch": 0.7539670468948035, "grad_norm": 0.21301542222499847, "learning_rate": 4.8869805724252675e-06, "loss": 0.06483745574951172, "step": 5577 }, { "epoch": 0.7541022391212505, "grad_norm": 0.403944730758667, "learning_rate": 4.8819266889357665e-06, "loss": 0.06790876388549805, "step": 5578 }, { "epoch": 0.7542374313476975, "grad_norm": 0.5573452711105347, "learning_rate": 4.876874912083088e-06, "loss": 0.06965208053588867, "step": 5579 }, { "epoch": 0.7543726235741445, "grad_norm": 0.6763182282447815, "learning_rate": 4.871825242919037e-06, "loss": 0.09247112274169922, "step": 5580 }, { "epoch": 0.7545078158005915, "grad_norm": 0.19230929017066956, "learning_rate": 4.866777682494978e-06, "loss": 0.05852961540222168, "step": 5581 }, { "epoch": 0.7546430080270384, "grad_norm": 0.33645913004875183, "learning_rate": 4.861732231861845e-06, "loss": 0.07019758224487305, "step": 5582 }, { "epoch": 0.7547782002534854, "grad_norm": 0.1876869797706604, "learning_rate": 4.85668889207012e-06, "loss": 0.0445864200592041, "step": 5583 }, { "epoch": 0.7549133924799324, "grad_norm": 0.3423718810081482, "learning_rate": 4.851647664169862e-06, "loss": 0.06366348266601562, "step": 5584 }, { "epoch": 0.7550485847063794, "grad_norm": 0.40339142084121704, "learning_rate": 4.846608549210679e-06, "loss": 0.08297139406204224, "step": 5585 }, { "epoch": 0.7551837769328263, "grad_norm": 0.38900959491729736, "learning_rate": 4.841571548241741e-06, "loss": 0.04147911071777344, "step": 5586 }, { "epoch": 0.7553189691592733, "grad_norm": 0.27130749821662903, "learning_rate": 4.836536662311777e-06, "loss": 0.06513595581054688, "step": 5587 }, { "epoch": 0.7554541613857203, "grad_norm": 0.3792252540588379, "learning_rate": 4.8315038924690745e-06, "loss": 0.06888812780380249, "step": 5588 }, { "epoch": 0.7555893536121673, "grad_norm": 0.5799320936203003, "learning_rate": 4.82647323976149e-06, "loss": 0.10873842239379883, "step": 5589 }, { "epoch": 0.7557245458386143, "grad_norm": 0.19998204708099365, "learning_rate": 4.821444705236429e-06, "loss": 0.04957246780395508, "step": 5590 }, { "epoch": 0.7558597380650612, "grad_norm": 0.2670244574546814, "learning_rate": 4.81641828994086e-06, "loss": 0.05125004053115845, "step": 5591 }, { "epoch": 0.7559949302915082, "grad_norm": 0.28263622522354126, "learning_rate": 4.811393994921308e-06, "loss": 0.0540766716003418, "step": 5592 }, { "epoch": 0.7561301225179552, "grad_norm": 0.6768190860748291, "learning_rate": 4.806371821223854e-06, "loss": 0.08544349670410156, "step": 5593 }, { "epoch": 0.7562653147444022, "grad_norm": 0.210227370262146, "learning_rate": 4.801351769894151e-06, "loss": 0.039162009954452515, "step": 5594 }, { "epoch": 0.7564005069708492, "grad_norm": 0.25639989972114563, "learning_rate": 4.796333841977394e-06, "loss": 0.05685591697692871, "step": 5595 }, { "epoch": 0.7565356991972961, "grad_norm": 0.3441276550292969, "learning_rate": 4.791318038518345e-06, "loss": 0.0647817850112915, "step": 5596 }, { "epoch": 0.7566708914237431, "grad_norm": 0.27997344732284546, "learning_rate": 4.7863043605613185e-06, "loss": 0.054930925369262695, "step": 5597 }, { "epoch": 0.7568060836501901, "grad_norm": 0.1451415866613388, "learning_rate": 4.7812928091501865e-06, "loss": 0.05081653594970703, "step": 5598 }, { "epoch": 0.7569412758766371, "grad_norm": 0.2508980333805084, "learning_rate": 4.7762833853283935e-06, "loss": 0.04594683647155762, "step": 5599 }, { "epoch": 0.757076468103084, "grad_norm": 0.38984835147857666, "learning_rate": 4.77127609013891e-06, "loss": 0.07874011993408203, "step": 5600 }, { "epoch": 0.757211660329531, "grad_norm": 0.3819902241230011, "learning_rate": 4.766270924624295e-06, "loss": 0.08634424209594727, "step": 5601 }, { "epoch": 0.757346852555978, "grad_norm": 0.44710078835487366, "learning_rate": 4.761267889826647e-06, "loss": 0.07189011573791504, "step": 5602 }, { "epoch": 0.757482044782425, "grad_norm": 0.6716780662536621, "learning_rate": 4.756266986787619e-06, "loss": 0.07186508178710938, "step": 5603 }, { "epoch": 0.757617237008872, "grad_norm": 0.20318977534770966, "learning_rate": 4.751268216548439e-06, "loss": 0.04666495323181152, "step": 5604 }, { "epoch": 0.7577524292353189, "grad_norm": 0.29602131247520447, "learning_rate": 4.746271580149861e-06, "loss": 0.0380130410194397, "step": 5605 }, { "epoch": 0.7578876214617659, "grad_norm": 0.4363838732242584, "learning_rate": 4.7412770786322244e-06, "loss": 0.06917369365692139, "step": 5606 }, { "epoch": 0.7580228136882129, "grad_norm": 0.23910731077194214, "learning_rate": 4.736284713035406e-06, "loss": 0.04737520217895508, "step": 5607 }, { "epoch": 0.7581580059146599, "grad_norm": 0.19320856034755707, "learning_rate": 4.731294484398843e-06, "loss": 0.049562811851501465, "step": 5608 }, { "epoch": 0.7582931981411069, "grad_norm": 0.30218300223350525, "learning_rate": 4.726306393761526e-06, "loss": 0.056662678718566895, "step": 5609 }, { "epoch": 0.7584283903675538, "grad_norm": 0.22269980609416962, "learning_rate": 4.721320442162001e-06, "loss": 0.06302642822265625, "step": 5610 }, { "epoch": 0.7585635825940008, "grad_norm": 0.373076468706131, "learning_rate": 4.716336630638378e-06, "loss": 0.05855274200439453, "step": 5611 }, { "epoch": 0.7586987748204478, "grad_norm": 0.3565768897533417, "learning_rate": 4.711354960228306e-06, "loss": 0.08982467651367188, "step": 5612 }, { "epoch": 0.7588339670468948, "grad_norm": 0.1863771378993988, "learning_rate": 4.706375431968998e-06, "loss": 0.04405832290649414, "step": 5613 }, { "epoch": 0.7589691592733417, "grad_norm": 0.4505642354488373, "learning_rate": 4.701398046897218e-06, "loss": 0.06322681903839111, "step": 5614 }, { "epoch": 0.7591043514997887, "grad_norm": 0.17876534163951874, "learning_rate": 4.696422806049277e-06, "loss": 0.05791473388671875, "step": 5615 }, { "epoch": 0.7592395437262357, "grad_norm": 0.1927298903465271, "learning_rate": 4.69144971046106e-06, "loss": 0.03877538442611694, "step": 5616 }, { "epoch": 0.7593747359526827, "grad_norm": 0.23871183395385742, "learning_rate": 4.686478761167984e-06, "loss": 0.06244182586669922, "step": 5617 }, { "epoch": 0.7595099281791297, "grad_norm": 0.29319432377815247, "learning_rate": 4.681509959205028e-06, "loss": 0.07445716857910156, "step": 5618 }, { "epoch": 0.7596451204055766, "grad_norm": 0.32316696643829346, "learning_rate": 4.676543305606724e-06, "loss": 0.047013282775878906, "step": 5619 }, { "epoch": 0.7597803126320236, "grad_norm": 0.24482862651348114, "learning_rate": 4.67157880140715e-06, "loss": 0.056181907653808594, "step": 5620 }, { "epoch": 0.7599155048584706, "grad_norm": 0.3955845236778259, "learning_rate": 4.666616447639952e-06, "loss": 0.06490731239318848, "step": 5621 }, { "epoch": 0.7600506970849176, "grad_norm": 0.3440779149532318, "learning_rate": 4.661656245338314e-06, "loss": 0.06698846817016602, "step": 5622 }, { "epoch": 0.7601858893113645, "grad_norm": 0.6869900226593018, "learning_rate": 4.656698195534978e-06, "loss": 0.07489728927612305, "step": 5623 }, { "epoch": 0.7603210815378115, "grad_norm": 0.32057246565818787, "learning_rate": 4.651742299262233e-06, "loss": 0.058403968811035156, "step": 5624 }, { "epoch": 0.7604562737642585, "grad_norm": 0.2266540676355362, "learning_rate": 4.646788557551921e-06, "loss": 0.04056048393249512, "step": 5625 }, { "epoch": 0.7605914659907055, "grad_norm": 0.3709670305252075, "learning_rate": 4.641836971435445e-06, "loss": 0.08048868179321289, "step": 5626 }, { "epoch": 0.7607266582171525, "grad_norm": 0.2754816710948944, "learning_rate": 4.63688754194375e-06, "loss": 0.0615391731262207, "step": 5627 }, { "epoch": 0.7608618504435994, "grad_norm": 0.22325558960437775, "learning_rate": 4.6319402701073295e-06, "loss": 0.06150674819946289, "step": 5628 }, { "epoch": 0.7609970426700464, "grad_norm": 0.35540273785591125, "learning_rate": 4.6269951569562355e-06, "loss": 0.057094573974609375, "step": 5629 }, { "epoch": 0.7611322348964934, "grad_norm": 0.33841997385025024, "learning_rate": 4.622052203520061e-06, "loss": 0.06940567493438721, "step": 5630 }, { "epoch": 0.7612674271229404, "grad_norm": 0.27616506814956665, "learning_rate": 4.617111410827968e-06, "loss": 0.060190439224243164, "step": 5631 }, { "epoch": 0.7614026193493874, "grad_norm": 0.44101110100746155, "learning_rate": 4.612172779908639e-06, "loss": 0.053999900817871094, "step": 5632 }, { "epoch": 0.7615378115758344, "grad_norm": 0.43242114782333374, "learning_rate": 4.607236311790335e-06, "loss": 0.08671188354492188, "step": 5633 }, { "epoch": 0.7616730038022814, "grad_norm": 0.518928050994873, "learning_rate": 4.602302007500854e-06, "loss": 0.07594013214111328, "step": 5634 }, { "epoch": 0.7618081960287284, "grad_norm": 0.2190275490283966, "learning_rate": 4.597369868067537e-06, "loss": 0.047338008880615234, "step": 5635 }, { "epoch": 0.7619433882551754, "grad_norm": 0.24892202019691467, "learning_rate": 4.592439894517296e-06, "loss": 0.055782198905944824, "step": 5636 }, { "epoch": 0.7620785804816224, "grad_norm": 0.21689964830875397, "learning_rate": 4.587512087876559e-06, "loss": 0.05381143093109131, "step": 5637 }, { "epoch": 0.7622137727080693, "grad_norm": 0.36855658888816833, "learning_rate": 4.582586449171336e-06, "loss": 0.05515718460083008, "step": 5638 }, { "epoch": 0.7623489649345163, "grad_norm": 0.22664356231689453, "learning_rate": 4.577662979427168e-06, "loss": 0.04027509689331055, "step": 5639 }, { "epoch": 0.7624841571609633, "grad_norm": 0.5098745226860046, "learning_rate": 4.572741679669147e-06, "loss": 0.0943603515625, "step": 5640 }, { "epoch": 0.7626193493874103, "grad_norm": 0.2838180959224701, "learning_rate": 4.567822550921912e-06, "loss": 0.0681074857711792, "step": 5641 }, { "epoch": 0.7627545416138573, "grad_norm": 0.28811103105545044, "learning_rate": 4.562905594209647e-06, "loss": 0.05295705795288086, "step": 5642 }, { "epoch": 0.7628897338403042, "grad_norm": 0.4805182218551636, "learning_rate": 4.557990810556102e-06, "loss": 0.08113241195678711, "step": 5643 }, { "epoch": 0.7630249260667512, "grad_norm": 0.26393046975135803, "learning_rate": 4.553078200984553e-06, "loss": 0.0574946403503418, "step": 5644 }, { "epoch": 0.7631601182931982, "grad_norm": 0.41263237595558167, "learning_rate": 4.548167766517832e-06, "loss": 0.08613967895507812, "step": 5645 }, { "epoch": 0.7632953105196452, "grad_norm": 0.2544111907482147, "learning_rate": 4.543259508178318e-06, "loss": 0.05940127372741699, "step": 5646 }, { "epoch": 0.7634305027460921, "grad_norm": 0.21949402987957, "learning_rate": 4.538353426987931e-06, "loss": 0.05451488494873047, "step": 5647 }, { "epoch": 0.7635656949725391, "grad_norm": 0.30404141545295715, "learning_rate": 4.533449523968154e-06, "loss": 0.07459366321563721, "step": 5648 }, { "epoch": 0.7637008871989861, "grad_norm": 0.26245078444480896, "learning_rate": 4.528547800140001e-06, "loss": 0.05554288625717163, "step": 5649 }, { "epoch": 0.7638360794254331, "grad_norm": 0.3203220069408417, "learning_rate": 4.523648256524037e-06, "loss": 0.05026090145111084, "step": 5650 }, { "epoch": 0.7639712716518801, "grad_norm": 0.5115206837654114, "learning_rate": 4.518750894140372e-06, "loss": 0.09605836868286133, "step": 5651 }, { "epoch": 0.764106463878327, "grad_norm": 0.30319446325302124, "learning_rate": 4.513855714008659e-06, "loss": 0.07964706420898438, "step": 5652 }, { "epoch": 0.764241656104774, "grad_norm": 0.3309054970741272, "learning_rate": 4.508962717148111e-06, "loss": 0.060248732566833496, "step": 5653 }, { "epoch": 0.764376848331221, "grad_norm": 0.3443457782268524, "learning_rate": 4.504071904577469e-06, "loss": 0.07103848457336426, "step": 5654 }, { "epoch": 0.764512040557668, "grad_norm": 0.20302394032478333, "learning_rate": 4.499183277315027e-06, "loss": 0.040272653102874756, "step": 5655 }, { "epoch": 0.764647232784115, "grad_norm": 0.1711495816707611, "learning_rate": 4.494296836378625e-06, "loss": 0.04271411895751953, "step": 5656 }, { "epoch": 0.7647824250105619, "grad_norm": 0.3251838982105255, "learning_rate": 4.4894125827856415e-06, "loss": 0.0497438907623291, "step": 5657 }, { "epoch": 0.7649176172370089, "grad_norm": 0.26741740107536316, "learning_rate": 4.4845305175530105e-06, "loss": 0.053354740142822266, "step": 5658 }, { "epoch": 0.7650528094634559, "grad_norm": 0.3099420964717865, "learning_rate": 4.479650641697201e-06, "loss": 0.0755605697631836, "step": 5659 }, { "epoch": 0.7651880016899029, "grad_norm": 0.36197108030319214, "learning_rate": 4.4747729562342305e-06, "loss": 0.07172966003417969, "step": 5660 }, { "epoch": 0.7653231939163498, "grad_norm": 0.39862021803855896, "learning_rate": 4.469897462179656e-06, "loss": 0.06305938959121704, "step": 5661 }, { "epoch": 0.7654583861427968, "grad_norm": 0.4498077630996704, "learning_rate": 4.46502416054858e-06, "loss": 0.0718998908996582, "step": 5662 }, { "epoch": 0.7655935783692438, "grad_norm": 0.38541167974472046, "learning_rate": 4.460153052355663e-06, "loss": 0.06909990310668945, "step": 5663 }, { "epoch": 0.7657287705956908, "grad_norm": 0.2797292470932007, "learning_rate": 4.455284138615074e-06, "loss": 0.05756568908691406, "step": 5664 }, { "epoch": 0.7658639628221378, "grad_norm": 0.19385021924972534, "learning_rate": 4.4504174203405656e-06, "loss": 0.05148935317993164, "step": 5665 }, { "epoch": 0.7659991550485847, "grad_norm": 0.30873343348503113, "learning_rate": 4.445552898545407e-06, "loss": 0.0481414794921875, "step": 5666 }, { "epoch": 0.7661343472750317, "grad_norm": 0.21540817618370056, "learning_rate": 4.440690574242413e-06, "loss": 0.052638351917266846, "step": 5667 }, { "epoch": 0.7662695395014787, "grad_norm": 0.30132579803466797, "learning_rate": 4.435830448443961e-06, "loss": 0.05691814422607422, "step": 5668 }, { "epoch": 0.7664047317279257, "grad_norm": 0.37630772590637207, "learning_rate": 4.430972522161934e-06, "loss": 0.07995986938476562, "step": 5669 }, { "epoch": 0.7665399239543726, "grad_norm": 0.29022759199142456, "learning_rate": 4.426116796407794e-06, "loss": 0.06147289276123047, "step": 5670 }, { "epoch": 0.7666751161808196, "grad_norm": 0.28281182050704956, "learning_rate": 4.421263272192523e-06, "loss": 0.06667089462280273, "step": 5671 }, { "epoch": 0.7668103084072666, "grad_norm": 0.36075347661972046, "learning_rate": 4.416411950526648e-06, "loss": 0.06223917007446289, "step": 5672 }, { "epoch": 0.7669455006337136, "grad_norm": 0.24650578200817108, "learning_rate": 4.411562832420252e-06, "loss": 0.055058956146240234, "step": 5673 }, { "epoch": 0.7670806928601606, "grad_norm": 0.3108918070793152, "learning_rate": 4.406715918882929e-06, "loss": 0.059282779693603516, "step": 5674 }, { "epoch": 0.7672158850866075, "grad_norm": 0.2328052818775177, "learning_rate": 4.4018712109238475e-06, "loss": 0.05899167060852051, "step": 5675 }, { "epoch": 0.7673510773130545, "grad_norm": 0.4828590452671051, "learning_rate": 4.3970287095516965e-06, "loss": 0.052896976470947266, "step": 5676 }, { "epoch": 0.7674862695395015, "grad_norm": 0.2692180573940277, "learning_rate": 4.39218841577471e-06, "loss": 0.0655522346496582, "step": 5677 }, { "epoch": 0.7676214617659485, "grad_norm": 0.44871872663497925, "learning_rate": 4.387350330600662e-06, "loss": 0.07520866394042969, "step": 5678 }, { "epoch": 0.7677566539923955, "grad_norm": 0.3476618826389313, "learning_rate": 4.382514455036864e-06, "loss": 0.06399822235107422, "step": 5679 }, { "epoch": 0.7678918462188424, "grad_norm": 0.19479790329933167, "learning_rate": 4.377680790090182e-06, "loss": 0.05185079574584961, "step": 5680 }, { "epoch": 0.7680270384452894, "grad_norm": 0.2459198385477066, "learning_rate": 4.372849336767004e-06, "loss": 0.06309765577316284, "step": 5681 }, { "epoch": 0.7681622306717364, "grad_norm": 0.34146684408187866, "learning_rate": 4.3680200960732645e-06, "loss": 0.07272100448608398, "step": 5682 }, { "epoch": 0.7682974228981834, "grad_norm": 0.3710181713104248, "learning_rate": 4.363193069014439e-06, "loss": 0.0505526065826416, "step": 5683 }, { "epoch": 0.7684326151246303, "grad_norm": 0.4271155893802643, "learning_rate": 4.3583682565955325e-06, "loss": 0.09519720077514648, "step": 5684 }, { "epoch": 0.7685678073510773, "grad_norm": 0.3301430940628052, "learning_rate": 4.3535456598211074e-06, "loss": 0.061621665954589844, "step": 5685 }, { "epoch": 0.7687029995775243, "grad_norm": 0.3569306433200836, "learning_rate": 4.348725279695251e-06, "loss": 0.0776815414428711, "step": 5686 }, { "epoch": 0.7688381918039713, "grad_norm": 0.19864505529403687, "learning_rate": 4.343907117221591e-06, "loss": 0.03427314758300781, "step": 5687 }, { "epoch": 0.7689733840304183, "grad_norm": 0.1509825736284256, "learning_rate": 4.339091173403294e-06, "loss": 0.041939735412597656, "step": 5688 }, { "epoch": 0.7691085762568652, "grad_norm": 0.20300842821598053, "learning_rate": 4.334277449243061e-06, "loss": 0.041044920682907104, "step": 5689 }, { "epoch": 0.7692437684833122, "grad_norm": 0.20278188586235046, "learning_rate": 4.329465945743144e-06, "loss": 0.048657894134521484, "step": 5690 }, { "epoch": 0.7693789607097592, "grad_norm": 0.41579997539520264, "learning_rate": 4.32465666390532e-06, "loss": 0.0794367790222168, "step": 5691 }, { "epoch": 0.7695141529362062, "grad_norm": 0.4104713797569275, "learning_rate": 4.319849604730905e-06, "loss": 0.06893670558929443, "step": 5692 }, { "epoch": 0.7696493451626532, "grad_norm": 0.2362470179796219, "learning_rate": 4.315044769220758e-06, "loss": 0.06392478942871094, "step": 5693 }, { "epoch": 0.7697845373891001, "grad_norm": 0.3901192545890808, "learning_rate": 4.310242158375264e-06, "loss": 0.06928801536560059, "step": 5694 }, { "epoch": 0.7699197296155471, "grad_norm": 0.19646479189395905, "learning_rate": 4.30544177319436e-06, "loss": 0.04744386672973633, "step": 5695 }, { "epoch": 0.7700549218419941, "grad_norm": 0.3500657081604004, "learning_rate": 4.300643614677511e-06, "loss": 0.05603682994842529, "step": 5696 }, { "epoch": 0.7701901140684411, "grad_norm": 0.3439147472381592, "learning_rate": 4.2958476838237165e-06, "loss": 0.06445038318634033, "step": 5697 }, { "epoch": 0.770325306294888, "grad_norm": 0.2937512695789337, "learning_rate": 4.2910539816315166e-06, "loss": 0.06619882583618164, "step": 5698 }, { "epoch": 0.770460498521335, "grad_norm": 0.1523667722940445, "learning_rate": 4.286262509098979e-06, "loss": 0.043492794036865234, "step": 5699 }, { "epoch": 0.770595690747782, "grad_norm": 0.44285938143730164, "learning_rate": 4.28147326722373e-06, "loss": 0.09971809387207031, "step": 5700 }, { "epoch": 0.770730882974229, "grad_norm": 0.17285388708114624, "learning_rate": 4.2766862570028965e-06, "loss": 0.04359164834022522, "step": 5701 }, { "epoch": 0.770866075200676, "grad_norm": 0.21552710235118866, "learning_rate": 4.2719014794331715e-06, "loss": 0.05307817459106445, "step": 5702 }, { "epoch": 0.7710012674271229, "grad_norm": 0.2892248034477234, "learning_rate": 4.267118935510767e-06, "loss": 0.0508270263671875, "step": 5703 }, { "epoch": 0.7711364596535699, "grad_norm": 0.2051730453968048, "learning_rate": 4.2623386262314306e-06, "loss": 0.0408627986907959, "step": 5704 }, { "epoch": 0.7712716518800169, "grad_norm": 0.24431021511554718, "learning_rate": 4.257560552590461e-06, "loss": 0.05776238441467285, "step": 5705 }, { "epoch": 0.7714068441064639, "grad_norm": 0.49438828229904175, "learning_rate": 4.252784715582661e-06, "loss": 0.053160011768341064, "step": 5706 }, { "epoch": 0.7715420363329109, "grad_norm": 0.46312135457992554, "learning_rate": 4.2480111162024e-06, "loss": 0.060565948486328125, "step": 5707 }, { "epoch": 0.7716772285593578, "grad_norm": 0.3173055350780487, "learning_rate": 4.243239755443561e-06, "loss": 0.0741962194442749, "step": 5708 }, { "epoch": 0.7718124207858048, "grad_norm": 0.3241714537143707, "learning_rate": 4.238470634299567e-06, "loss": 0.06093263626098633, "step": 5709 }, { "epoch": 0.7719476130122518, "grad_norm": 0.41119104623794556, "learning_rate": 4.233703753763375e-06, "loss": 0.08883190155029297, "step": 5710 }, { "epoch": 0.7720828052386988, "grad_norm": 0.3727583587169647, "learning_rate": 4.228939114827469e-06, "loss": 0.08051329851150513, "step": 5711 }, { "epoch": 0.7722179974651457, "grad_norm": 0.22532910108566284, "learning_rate": 4.224176718483881e-06, "loss": 0.04881012439727783, "step": 5712 }, { "epoch": 0.7723531896915927, "grad_norm": 0.199547678232193, "learning_rate": 4.219416565724165e-06, "loss": 0.03948569297790527, "step": 5713 }, { "epoch": 0.7724883819180397, "grad_norm": 0.3262271285057068, "learning_rate": 4.21465865753941e-06, "loss": 0.058938026428222656, "step": 5714 }, { "epoch": 0.7726235741444867, "grad_norm": 0.3589162528514862, "learning_rate": 4.209902994920236e-06, "loss": 0.07290387153625488, "step": 5715 }, { "epoch": 0.7727587663709337, "grad_norm": 0.45577287673950195, "learning_rate": 4.205149578856794e-06, "loss": 0.07767486572265625, "step": 5716 }, { "epoch": 0.7728939585973806, "grad_norm": 0.3757721483707428, "learning_rate": 4.200398410338779e-06, "loss": 0.0785365104675293, "step": 5717 }, { "epoch": 0.7730291508238276, "grad_norm": 0.38008055090904236, "learning_rate": 4.1956494903554056e-06, "loss": 0.08467793464660645, "step": 5718 }, { "epoch": 0.7731643430502746, "grad_norm": 0.2644762396812439, "learning_rate": 4.190902819895425e-06, "loss": 0.05213308334350586, "step": 5719 }, { "epoch": 0.7732995352767216, "grad_norm": 0.29041609168052673, "learning_rate": 4.186158399947118e-06, "loss": 0.07232236862182617, "step": 5720 }, { "epoch": 0.7734347275031686, "grad_norm": 0.2394200712442398, "learning_rate": 4.181416231498292e-06, "loss": 0.05535078048706055, "step": 5721 }, { "epoch": 0.7735699197296155, "grad_norm": 0.33892616629600525, "learning_rate": 4.176676315536306e-06, "loss": 0.06801414489746094, "step": 5722 }, { "epoch": 0.7737051119560625, "grad_norm": 0.3413313329219818, "learning_rate": 4.171938653048027e-06, "loss": 0.06963539123535156, "step": 5723 }, { "epoch": 0.7738403041825095, "grad_norm": 0.39870211482048035, "learning_rate": 4.1672032450198616e-06, "loss": 0.0765681266784668, "step": 5724 }, { "epoch": 0.7739754964089565, "grad_norm": 0.7043061852455139, "learning_rate": 4.16247009243775e-06, "loss": 0.10092782974243164, "step": 5725 }, { "epoch": 0.7741106886354034, "grad_norm": 0.36140236258506775, "learning_rate": 4.1577391962871504e-06, "loss": 0.07085418701171875, "step": 5726 }, { "epoch": 0.7742458808618504, "grad_norm": 0.38208651542663574, "learning_rate": 4.153010557553076e-06, "loss": 0.07592105865478516, "step": 5727 }, { "epoch": 0.7743810730882974, "grad_norm": 0.1892228126525879, "learning_rate": 4.148284177220045e-06, "loss": 0.04671287536621094, "step": 5728 }, { "epoch": 0.7745162653147444, "grad_norm": 0.28675931692123413, "learning_rate": 4.143560056272117e-06, "loss": 0.07384371757507324, "step": 5729 }, { "epoch": 0.7746514575411914, "grad_norm": 0.30072644352912903, "learning_rate": 4.1388381956928796e-06, "loss": 0.05128073692321777, "step": 5730 }, { "epoch": 0.7747866497676383, "grad_norm": 0.284824401140213, "learning_rate": 4.134118596465443e-06, "loss": 0.0562664270401001, "step": 5731 }, { "epoch": 0.7749218419940853, "grad_norm": 0.416551411151886, "learning_rate": 4.1294012595724675e-06, "loss": 0.06118416786193848, "step": 5732 }, { "epoch": 0.7750570342205323, "grad_norm": 0.26712268590927124, "learning_rate": 4.1246861859961114e-06, "loss": 0.07384276390075684, "step": 5733 }, { "epoch": 0.7751922264469793, "grad_norm": 0.3755200207233429, "learning_rate": 4.119973376718089e-06, "loss": 0.07685661315917969, "step": 5734 }, { "epoch": 0.7753274186734262, "grad_norm": 0.3086282014846802, "learning_rate": 4.115262832719628e-06, "loss": 0.04568958282470703, "step": 5735 }, { "epoch": 0.7754626108998732, "grad_norm": 0.36932235956192017, "learning_rate": 4.110554554981486e-06, "loss": 0.06133699417114258, "step": 5736 }, { "epoch": 0.7755978031263202, "grad_norm": 0.1857132464647293, "learning_rate": 4.1058485444839655e-06, "loss": 0.04334378242492676, "step": 5737 }, { "epoch": 0.7757329953527672, "grad_norm": 0.3986019790172577, "learning_rate": 4.101144802206862e-06, "loss": 0.07707595825195312, "step": 5738 }, { "epoch": 0.7758681875792142, "grad_norm": 0.19556692242622375, "learning_rate": 4.096443329129535e-06, "loss": 0.0461277961730957, "step": 5739 }, { "epoch": 0.7760033798056611, "grad_norm": 0.3307970464229584, "learning_rate": 4.091744126230853e-06, "loss": 0.0712122917175293, "step": 5740 }, { "epoch": 0.7761385720321081, "grad_norm": 0.4119226634502411, "learning_rate": 4.08704719448921e-06, "loss": 0.07254886627197266, "step": 5741 }, { "epoch": 0.7762737642585551, "grad_norm": 0.1848524808883667, "learning_rate": 4.082352534882543e-06, "loss": 0.03823113441467285, "step": 5742 }, { "epoch": 0.7764089564850021, "grad_norm": 0.2473663091659546, "learning_rate": 4.07766014838829e-06, "loss": 0.061011314392089844, "step": 5743 }, { "epoch": 0.7765441487114491, "grad_norm": 0.2692262530326843, "learning_rate": 4.072970035983443e-06, "loss": 0.05391550064086914, "step": 5744 }, { "epoch": 0.776679340937896, "grad_norm": 0.3563028872013092, "learning_rate": 4.068282198644505e-06, "loss": 0.08204364776611328, "step": 5745 }, { "epoch": 0.776814533164343, "grad_norm": 0.34030359983444214, "learning_rate": 4.06359663734751e-06, "loss": 0.04147148132324219, "step": 5746 }, { "epoch": 0.77694972539079, "grad_norm": 0.6173545122146606, "learning_rate": 4.058913353068013e-06, "loss": 0.08217430114746094, "step": 5747 }, { "epoch": 0.777084917617237, "grad_norm": 0.3259998559951782, "learning_rate": 4.0542323467810985e-06, "loss": 0.06691491603851318, "step": 5748 }, { "epoch": 0.777220109843684, "grad_norm": 0.28305602073669434, "learning_rate": 4.049553619461381e-06, "loss": 0.03863120079040527, "step": 5749 }, { "epoch": 0.7773553020701309, "grad_norm": 0.232561394572258, "learning_rate": 4.044877172082997e-06, "loss": 0.05492901802062988, "step": 5750 }, { "epoch": 0.7774904942965779, "grad_norm": 0.37049171328544617, "learning_rate": 4.040203005619604e-06, "loss": 0.06410980224609375, "step": 5751 }, { "epoch": 0.7776256865230249, "grad_norm": 0.24890120327472687, "learning_rate": 4.035531121044392e-06, "loss": 0.07123613357543945, "step": 5752 }, { "epoch": 0.7777608787494719, "grad_norm": 0.2749079763889313, "learning_rate": 4.030861519330065e-06, "loss": 0.06884384155273438, "step": 5753 }, { "epoch": 0.7778960709759188, "grad_norm": 0.5079923868179321, "learning_rate": 4.026194201448868e-06, "loss": 0.08815860748291016, "step": 5754 }, { "epoch": 0.7780312632023658, "grad_norm": 0.46686694025993347, "learning_rate": 4.021529168372558e-06, "loss": 0.0786428451538086, "step": 5755 }, { "epoch": 0.7781664554288128, "grad_norm": 0.2763957977294922, "learning_rate": 4.01686642107242e-06, "loss": 0.05726909637451172, "step": 5756 }, { "epoch": 0.7783016476552598, "grad_norm": 0.29116520285606384, "learning_rate": 4.0122059605192624e-06, "loss": 0.06893444061279297, "step": 5757 }, { "epoch": 0.7784368398817068, "grad_norm": 0.19644325971603394, "learning_rate": 4.007547787683412e-06, "loss": 0.04460024833679199, "step": 5758 }, { "epoch": 0.7785720321081537, "grad_norm": 0.19754086434841156, "learning_rate": 4.002891903534736e-06, "loss": 0.05706286430358887, "step": 5759 }, { "epoch": 0.7787072243346007, "grad_norm": 0.5186466574668884, "learning_rate": 3.998238309042611e-06, "loss": 0.096160888671875, "step": 5760 }, { "epoch": 0.7788424165610477, "grad_norm": 0.30563274025917053, "learning_rate": 3.993587005175937e-06, "loss": 0.06788778305053711, "step": 5761 }, { "epoch": 0.7789776087874947, "grad_norm": 0.2985718846321106, "learning_rate": 3.988937992903144e-06, "loss": 0.07439279556274414, "step": 5762 }, { "epoch": 0.7791128010139416, "grad_norm": 0.3044978678226471, "learning_rate": 3.9842912731921716e-06, "loss": 0.06938791275024414, "step": 5763 }, { "epoch": 0.7792479932403886, "grad_norm": 0.1739664226770401, "learning_rate": 3.979646847010506e-06, "loss": 0.05845212936401367, "step": 5764 }, { "epoch": 0.7793831854668356, "grad_norm": 0.4711625576019287, "learning_rate": 3.975004715325134e-06, "loss": 0.08249151706695557, "step": 5765 }, { "epoch": 0.7795183776932826, "grad_norm": 0.28729113936424255, "learning_rate": 3.970364879102572e-06, "loss": 0.06833052635192871, "step": 5766 }, { "epoch": 0.7796535699197296, "grad_norm": 0.3286392390727997, "learning_rate": 3.96572733930886e-06, "loss": 0.058071136474609375, "step": 5767 }, { "epoch": 0.7797887621461765, "grad_norm": 0.43296244740486145, "learning_rate": 3.961092096909552e-06, "loss": 0.06349897384643555, "step": 5768 }, { "epoch": 0.7799239543726236, "grad_norm": 0.28133082389831543, "learning_rate": 3.9564591528697455e-06, "loss": 0.046489715576171875, "step": 5769 }, { "epoch": 0.7800591465990706, "grad_norm": 0.44722869992256165, "learning_rate": 3.9518285081540275e-06, "loss": 0.0754251480102539, "step": 5770 }, { "epoch": 0.7801943388255176, "grad_norm": 0.40870973467826843, "learning_rate": 3.947200163726534e-06, "loss": 0.06529468297958374, "step": 5771 }, { "epoch": 0.7803295310519646, "grad_norm": 0.14248031377792358, "learning_rate": 3.9425741205509055e-06, "loss": 0.033555030822753906, "step": 5772 }, { "epoch": 0.7804647232784115, "grad_norm": 0.3367745876312256, "learning_rate": 3.9379503795903065e-06, "loss": 0.06927061080932617, "step": 5773 }, { "epoch": 0.7805999155048585, "grad_norm": 0.29812562465667725, "learning_rate": 3.933328941807439e-06, "loss": 0.06696891784667969, "step": 5774 }, { "epoch": 0.7807351077313055, "grad_norm": 0.349993497133255, "learning_rate": 3.928709808164491e-06, "loss": 0.0759577751159668, "step": 5775 }, { "epoch": 0.7808702999577525, "grad_norm": 0.36581605672836304, "learning_rate": 3.924092979623203e-06, "loss": 0.08065176010131836, "step": 5776 }, { "epoch": 0.7810054921841995, "grad_norm": 0.21663056313991547, "learning_rate": 3.919478457144824e-06, "loss": 0.047911882400512695, "step": 5777 }, { "epoch": 0.7811406844106464, "grad_norm": 0.3258569836616516, "learning_rate": 3.914866241690115e-06, "loss": 0.06820964813232422, "step": 5778 }, { "epoch": 0.7812758766370934, "grad_norm": 0.2599864900112152, "learning_rate": 3.9102563342193695e-06, "loss": 0.044528961181640625, "step": 5779 }, { "epoch": 0.7814110688635404, "grad_norm": 0.2585674226284027, "learning_rate": 3.905648735692389e-06, "loss": 0.04642355442047119, "step": 5780 }, { "epoch": 0.7815462610899874, "grad_norm": 0.35366174578666687, "learning_rate": 3.901043447068508e-06, "loss": 0.06553888320922852, "step": 5781 }, { "epoch": 0.7816814533164343, "grad_norm": 0.37189918756484985, "learning_rate": 3.896440469306567e-06, "loss": 0.08901119232177734, "step": 5782 }, { "epoch": 0.7818166455428813, "grad_norm": 0.20578940212726593, "learning_rate": 3.891839803364934e-06, "loss": 0.04856395721435547, "step": 5783 }, { "epoch": 0.7819518377693283, "grad_norm": 0.17473185062408447, "learning_rate": 3.887241450201487e-06, "loss": 0.03539228439331055, "step": 5784 }, { "epoch": 0.7820870299957753, "grad_norm": 0.30684182047843933, "learning_rate": 3.882645410773629e-06, "loss": 0.06535720825195312, "step": 5785 }, { "epoch": 0.7822222222222223, "grad_norm": 0.22718492150306702, "learning_rate": 3.878051686038284e-06, "loss": 0.04875516891479492, "step": 5786 }, { "epoch": 0.7823574144486692, "grad_norm": 0.27810609340667725, "learning_rate": 3.873460276951889e-06, "loss": 0.047110557556152344, "step": 5787 }, { "epoch": 0.7824926066751162, "grad_norm": 0.3875526189804077, "learning_rate": 3.868871184470397e-06, "loss": 0.06734180450439453, "step": 5788 }, { "epoch": 0.7826277989015632, "grad_norm": 0.36902740597724915, "learning_rate": 3.864284409549282e-06, "loss": 0.08010327816009521, "step": 5789 }, { "epoch": 0.7827629911280102, "grad_norm": 0.2538996636867523, "learning_rate": 3.859699953143532e-06, "loss": 0.05434715747833252, "step": 5790 }, { "epoch": 0.7828981833544572, "grad_norm": 0.21697865426540375, "learning_rate": 3.855117816207665e-06, "loss": 0.04898500442504883, "step": 5791 }, { "epoch": 0.7830333755809041, "grad_norm": 0.17005237936973572, "learning_rate": 3.850537999695699e-06, "loss": 0.04483509063720703, "step": 5792 }, { "epoch": 0.7831685678073511, "grad_norm": 0.2478007972240448, "learning_rate": 3.845960504561179e-06, "loss": 0.05867290496826172, "step": 5793 }, { "epoch": 0.7833037600337981, "grad_norm": 0.219625785946846, "learning_rate": 3.841385331757161e-06, "loss": 0.04551362991333008, "step": 5794 }, { "epoch": 0.7834389522602451, "grad_norm": 0.26169317960739136, "learning_rate": 3.8368124822362184e-06, "loss": 0.06520521640777588, "step": 5795 }, { "epoch": 0.783574144486692, "grad_norm": 0.35230061411857605, "learning_rate": 3.832241956950449e-06, "loss": 0.057332515716552734, "step": 5796 }, { "epoch": 0.783709336713139, "grad_norm": 0.32186058163642883, "learning_rate": 3.82767375685146e-06, "loss": 0.07688665390014648, "step": 5797 }, { "epoch": 0.783844528939586, "grad_norm": 0.5881981253623962, "learning_rate": 3.823107882890373e-06, "loss": 0.09408235549926758, "step": 5798 }, { "epoch": 0.783979721166033, "grad_norm": 0.29914772510528564, "learning_rate": 3.8185443360178265e-06, "loss": 0.05277276039123535, "step": 5799 }, { "epoch": 0.78411491339248, "grad_norm": 0.2215801328420639, "learning_rate": 3.813983117183973e-06, "loss": 0.049486637115478516, "step": 5800 }, { "epoch": 0.7842501056189269, "grad_norm": 0.35752561688423157, "learning_rate": 3.8094242273384932e-06, "loss": 0.0811152458190918, "step": 5801 }, { "epoch": 0.7843852978453739, "grad_norm": 0.42034009099006653, "learning_rate": 3.804867667430555e-06, "loss": 0.0776982307434082, "step": 5802 }, { "epoch": 0.7845204900718209, "grad_norm": 0.18439221382141113, "learning_rate": 3.800313438408874e-06, "loss": 0.04037666320800781, "step": 5803 }, { "epoch": 0.7846556822982679, "grad_norm": 0.3341442942619324, "learning_rate": 3.7957615412216582e-06, "loss": 0.07306098937988281, "step": 5804 }, { "epoch": 0.7847908745247149, "grad_norm": 0.4744199216365814, "learning_rate": 3.791211976816634e-06, "loss": 0.0776371955871582, "step": 5805 }, { "epoch": 0.7849260667511618, "grad_norm": 0.4500978887081146, "learning_rate": 3.786664746141057e-06, "loss": 0.06372880935668945, "step": 5806 }, { "epoch": 0.7850612589776088, "grad_norm": 0.3885696232318878, "learning_rate": 3.782119850141669e-06, "loss": 0.060495853424072266, "step": 5807 }, { "epoch": 0.7851964512040558, "grad_norm": 0.7141410112380981, "learning_rate": 3.777577289764752e-06, "loss": 0.08289527893066406, "step": 5808 }, { "epoch": 0.7853316434305028, "grad_norm": 0.3788013756275177, "learning_rate": 3.7730370659560904e-06, "loss": 0.05214107036590576, "step": 5809 }, { "epoch": 0.7854668356569497, "grad_norm": 0.28666380047798157, "learning_rate": 3.7684991796609746e-06, "loss": 0.05033731460571289, "step": 5810 }, { "epoch": 0.7856020278833967, "grad_norm": 0.21786850690841675, "learning_rate": 3.7639636318242344e-06, "loss": 0.05078268051147461, "step": 5811 }, { "epoch": 0.7857372201098437, "grad_norm": 0.34412282705307007, "learning_rate": 3.7594304233901738e-06, "loss": 0.08062267303466797, "step": 5812 }, { "epoch": 0.7858724123362907, "grad_norm": 0.22413675487041473, "learning_rate": 3.754899555302645e-06, "loss": 0.03797823190689087, "step": 5813 }, { "epoch": 0.7860076045627377, "grad_norm": 0.19559744000434875, "learning_rate": 3.7503710285049964e-06, "loss": 0.04233598709106445, "step": 5814 }, { "epoch": 0.7861427967891846, "grad_norm": 0.32189080119132996, "learning_rate": 3.7458448439400888e-06, "loss": 0.061079978942871094, "step": 5815 }, { "epoch": 0.7862779890156316, "grad_norm": 0.1955747902393341, "learning_rate": 3.7413210025502985e-06, "loss": 0.03945612907409668, "step": 5816 }, { "epoch": 0.7864131812420786, "grad_norm": 0.3130621016025543, "learning_rate": 3.7367995052775123e-06, "loss": 0.05901479721069336, "step": 5817 }, { "epoch": 0.7865483734685256, "grad_norm": 0.19198979437351227, "learning_rate": 3.732280353063133e-06, "loss": 0.05481517314910889, "step": 5818 }, { "epoch": 0.7866835656949726, "grad_norm": 0.46729937195777893, "learning_rate": 3.727763546848074e-06, "loss": 0.06125986576080322, "step": 5819 }, { "epoch": 0.7868187579214195, "grad_norm": 0.24033065140247345, "learning_rate": 3.7232490875727544e-06, "loss": 0.04735279083251953, "step": 5820 }, { "epoch": 0.7869539501478665, "grad_norm": 0.3184490501880646, "learning_rate": 3.718736976177108e-06, "loss": 0.07044410705566406, "step": 5821 }, { "epoch": 0.7870891423743135, "grad_norm": 0.24775823950767517, "learning_rate": 3.71422721360058e-06, "loss": 0.055655479431152344, "step": 5822 }, { "epoch": 0.7872243346007605, "grad_norm": 0.18539845943450928, "learning_rate": 3.709719800782133e-06, "loss": 0.04623746871948242, "step": 5823 }, { "epoch": 0.7873595268272074, "grad_norm": 0.47386887669563293, "learning_rate": 3.7052147386602304e-06, "loss": 0.0531386137008667, "step": 5824 }, { "epoch": 0.7874947190536544, "grad_norm": 0.2164570838212967, "learning_rate": 3.700712028172851e-06, "loss": 0.04325532913208008, "step": 5825 }, { "epoch": 0.7876299112801014, "grad_norm": 0.29097384214401245, "learning_rate": 3.696211670257481e-06, "loss": 0.06789112091064453, "step": 5826 }, { "epoch": 0.7877651035065484, "grad_norm": 0.5457591414451599, "learning_rate": 3.691713665851117e-06, "loss": 0.08944034576416016, "step": 5827 }, { "epoch": 0.7879002957329954, "grad_norm": 0.3387489914894104, "learning_rate": 3.6872180158902764e-06, "loss": 0.0661458969116211, "step": 5828 }, { "epoch": 0.7880354879594423, "grad_norm": 0.5111722946166992, "learning_rate": 3.6827247213109705e-06, "loss": 0.060814619064331055, "step": 5829 }, { "epoch": 0.7881706801858893, "grad_norm": 0.28169646859169006, "learning_rate": 3.6782337830487294e-06, "loss": 0.05659973621368408, "step": 5830 }, { "epoch": 0.7883058724123363, "grad_norm": 0.3352935314178467, "learning_rate": 3.6737452020385886e-06, "loss": 0.05640769004821777, "step": 5831 }, { "epoch": 0.7884410646387833, "grad_norm": 0.20845596492290497, "learning_rate": 3.6692589792150923e-06, "loss": 0.04909205436706543, "step": 5832 }, { "epoch": 0.7885762568652303, "grad_norm": 0.20986799895763397, "learning_rate": 3.6647751155123026e-06, "loss": 0.05309581756591797, "step": 5833 }, { "epoch": 0.7887114490916772, "grad_norm": 0.1778966784477234, "learning_rate": 3.660293611863782e-06, "loss": 0.050807952880859375, "step": 5834 }, { "epoch": 0.7888466413181242, "grad_norm": 0.473964124917984, "learning_rate": 3.655814469202602e-06, "loss": 0.0950632095336914, "step": 5835 }, { "epoch": 0.7889818335445712, "grad_norm": 0.29631367325782776, "learning_rate": 3.6513376884613446e-06, "loss": 0.07864713668823242, "step": 5836 }, { "epoch": 0.7891170257710182, "grad_norm": 0.25132322311401367, "learning_rate": 3.6468632705720934e-06, "loss": 0.05677342414855957, "step": 5837 }, { "epoch": 0.7892522179974651, "grad_norm": 0.39402106404304504, "learning_rate": 3.6423912164664606e-06, "loss": 0.06469893455505371, "step": 5838 }, { "epoch": 0.7893874102239121, "grad_norm": 0.18110719323158264, "learning_rate": 3.637921527075534e-06, "loss": 0.04526472091674805, "step": 5839 }, { "epoch": 0.7895226024503591, "grad_norm": 0.31015798449516296, "learning_rate": 3.63345420332994e-06, "loss": 0.06031608581542969, "step": 5840 }, { "epoch": 0.7896577946768061, "grad_norm": 0.2274787276983261, "learning_rate": 3.628989246159795e-06, "loss": 0.06720209121704102, "step": 5841 }, { "epoch": 0.7897929869032531, "grad_norm": 0.42186152935028076, "learning_rate": 3.6245266564947205e-06, "loss": 0.06429600715637207, "step": 5842 }, { "epoch": 0.7899281791297, "grad_norm": 0.2879282534122467, "learning_rate": 3.620066435263868e-06, "loss": 0.06023454666137695, "step": 5843 }, { "epoch": 0.790063371356147, "grad_norm": 0.5149502158164978, "learning_rate": 3.6156085833958596e-06, "loss": 0.06293487548828125, "step": 5844 }, { "epoch": 0.790198563582594, "grad_norm": 0.3775785565376282, "learning_rate": 3.6111531018188584e-06, "loss": 0.07205677032470703, "step": 5845 }, { "epoch": 0.790333755809041, "grad_norm": 0.24140354990959167, "learning_rate": 3.606699991460513e-06, "loss": 0.046868324279785156, "step": 5846 }, { "epoch": 0.790468948035488, "grad_norm": 0.4178244173526764, "learning_rate": 3.602249253247986e-06, "loss": 0.08034706115722656, "step": 5847 }, { "epoch": 0.7906041402619349, "grad_norm": 0.2813854217529297, "learning_rate": 3.5978008881079445e-06, "loss": 0.05697774887084961, "step": 5848 }, { "epoch": 0.7907393324883819, "grad_norm": 0.21201984584331512, "learning_rate": 3.5933548969665587e-06, "loss": 0.038930654525756836, "step": 5849 }, { "epoch": 0.7908745247148289, "grad_norm": 0.18337924778461456, "learning_rate": 3.5889112807495152e-06, "loss": 0.04210853576660156, "step": 5850 }, { "epoch": 0.7910097169412759, "grad_norm": 0.3261737525463104, "learning_rate": 3.5844700403819935e-06, "loss": 0.06548404693603516, "step": 5851 }, { "epoch": 0.7911449091677228, "grad_norm": 0.2853728234767914, "learning_rate": 3.5800311767886847e-06, "loss": 0.05063962936401367, "step": 5852 }, { "epoch": 0.7912801013941698, "grad_norm": 0.31865543127059937, "learning_rate": 3.575594690893784e-06, "loss": 0.07824563980102539, "step": 5853 }, { "epoch": 0.7914152936206168, "grad_norm": 0.30337998270988464, "learning_rate": 3.5711605836209853e-06, "loss": 0.05076479911804199, "step": 5854 }, { "epoch": 0.7915504858470638, "grad_norm": 0.3420272469520569, "learning_rate": 3.566728855893505e-06, "loss": 0.07276201248168945, "step": 5855 }, { "epoch": 0.7916856780735108, "grad_norm": 0.28453660011291504, "learning_rate": 3.5622995086340466e-06, "loss": 0.07154130935668945, "step": 5856 }, { "epoch": 0.7918208702999577, "grad_norm": 0.2686252295970917, "learning_rate": 3.5578725427648233e-06, "loss": 0.05756998062133789, "step": 5857 }, { "epoch": 0.7919560625264047, "grad_norm": 0.20000793039798737, "learning_rate": 3.553447959207553e-06, "loss": 0.04842329025268555, "step": 5858 }, { "epoch": 0.7920912547528517, "grad_norm": 0.4091678857803345, "learning_rate": 3.5490257588834552e-06, "loss": 0.07534807920455933, "step": 5859 }, { "epoch": 0.7922264469792987, "grad_norm": 0.48165163397789, "learning_rate": 3.5446059427132615e-06, "loss": 0.06319570541381836, "step": 5860 }, { "epoch": 0.7923616392057456, "grad_norm": 0.32107535004615784, "learning_rate": 3.5401885116171977e-06, "loss": 0.06496143341064453, "step": 5861 }, { "epoch": 0.7924968314321926, "grad_norm": 0.34414857625961304, "learning_rate": 3.5357734665149983e-06, "loss": 0.060967087745666504, "step": 5862 }, { "epoch": 0.7926320236586396, "grad_norm": 0.23059622943401337, "learning_rate": 3.5313608083258975e-06, "loss": 0.04185652732849121, "step": 5863 }, { "epoch": 0.7927672158850866, "grad_norm": 0.38646480441093445, "learning_rate": 3.526950537968629e-06, "loss": 0.07922554016113281, "step": 5864 }, { "epoch": 0.7929024081115336, "grad_norm": 0.28850066661834717, "learning_rate": 3.5225426563614466e-06, "loss": 0.05431365966796875, "step": 5865 }, { "epoch": 0.7930376003379805, "grad_norm": 0.12900696694850922, "learning_rate": 3.518137164422088e-06, "loss": 0.03278541564941406, "step": 5866 }, { "epoch": 0.7931727925644275, "grad_norm": 0.35914337635040283, "learning_rate": 3.513734063067799e-06, "loss": 0.0921776294708252, "step": 5867 }, { "epoch": 0.7933079847908745, "grad_norm": 0.14453966915607452, "learning_rate": 3.5093333532153316e-06, "loss": 0.03630661964416504, "step": 5868 }, { "epoch": 0.7934431770173215, "grad_norm": 0.2894953489303589, "learning_rate": 3.504935035780931e-06, "loss": 0.06284189224243164, "step": 5869 }, { "epoch": 0.7935783692437685, "grad_norm": 0.30163708329200745, "learning_rate": 3.500539111680364e-06, "loss": 0.07058584690093994, "step": 5870 }, { "epoch": 0.7937135614702154, "grad_norm": 0.524236798286438, "learning_rate": 3.4961455818288683e-06, "loss": 0.09031009674072266, "step": 5871 }, { "epoch": 0.7938487536966624, "grad_norm": 0.23061595857143402, "learning_rate": 3.491754447141212e-06, "loss": 0.04869508743286133, "step": 5872 }, { "epoch": 0.7939839459231094, "grad_norm": 0.3281051814556122, "learning_rate": 3.4873657085316504e-06, "loss": 0.06250953674316406, "step": 5873 }, { "epoch": 0.7941191381495564, "grad_norm": 0.33008918166160583, "learning_rate": 3.482979366913935e-06, "loss": 0.06384420394897461, "step": 5874 }, { "epoch": 0.7942543303760033, "grad_norm": 0.4086730182170868, "learning_rate": 3.4785954232013423e-06, "loss": 0.0685889720916748, "step": 5875 }, { "epoch": 0.7943895226024503, "grad_norm": 0.3937242925167084, "learning_rate": 3.4742138783066122e-06, "loss": 0.08018016815185547, "step": 5876 }, { "epoch": 0.7945247148288973, "grad_norm": 0.5728943943977356, "learning_rate": 3.4698347331420206e-06, "loss": 0.08187556266784668, "step": 5877 }, { "epoch": 0.7946599070553443, "grad_norm": 0.2489577978849411, "learning_rate": 3.4654579886193223e-06, "loss": 0.061530113220214844, "step": 5878 }, { "epoch": 0.7947950992817913, "grad_norm": 0.29962947964668274, "learning_rate": 3.461083645649782e-06, "loss": 0.061600685119628906, "step": 5879 }, { "epoch": 0.7949302915082382, "grad_norm": 0.4102258086204529, "learning_rate": 3.4567117051441594e-06, "loss": 0.05533194541931152, "step": 5880 }, { "epoch": 0.7950654837346852, "grad_norm": 0.16216807067394257, "learning_rate": 3.4523421680127115e-06, "loss": 0.03956878185272217, "step": 5881 }, { "epoch": 0.7952006759611322, "grad_norm": 0.4279293119907379, "learning_rate": 3.447975035165209e-06, "loss": 0.07097578048706055, "step": 5882 }, { "epoch": 0.7953358681875792, "grad_norm": 0.3215303122997284, "learning_rate": 3.4436103075109076e-06, "loss": 0.05742359161376953, "step": 5883 }, { "epoch": 0.7954710604140262, "grad_norm": 0.34310388565063477, "learning_rate": 3.4392479859585642e-06, "loss": 0.07660746574401855, "step": 5884 }, { "epoch": 0.7956062526404731, "grad_norm": 0.36039555072784424, "learning_rate": 3.4348880714164416e-06, "loss": 0.06647872924804688, "step": 5885 }, { "epoch": 0.7957414448669201, "grad_norm": 0.2750612795352936, "learning_rate": 3.430530564792289e-06, "loss": 0.056557655334472656, "step": 5886 }, { "epoch": 0.7958766370933671, "grad_norm": 0.5428061485290527, "learning_rate": 3.426175466993374e-06, "loss": 0.07390868663787842, "step": 5887 }, { "epoch": 0.7960118293198141, "grad_norm": 0.2898423969745636, "learning_rate": 3.4218227789264468e-06, "loss": 0.05636954307556152, "step": 5888 }, { "epoch": 0.796147021546261, "grad_norm": 0.21055658161640167, "learning_rate": 3.417472501497758e-06, "loss": 0.04013681411743164, "step": 5889 }, { "epoch": 0.796282213772708, "grad_norm": 0.6051457524299622, "learning_rate": 3.413124635613061e-06, "loss": 0.07826828956604004, "step": 5890 }, { "epoch": 0.796417405999155, "grad_norm": 0.2119661122560501, "learning_rate": 3.4087791821775986e-06, "loss": 0.05274820327758789, "step": 5891 }, { "epoch": 0.796552598225602, "grad_norm": 0.3171161711215973, "learning_rate": 3.4044361420961285e-06, "loss": 0.06368637084960938, "step": 5892 }, { "epoch": 0.796687790452049, "grad_norm": 0.1474030613899231, "learning_rate": 3.4000955162728866e-06, "loss": 0.041254520416259766, "step": 5893 }, { "epoch": 0.7968229826784959, "grad_norm": 0.30869102478027344, "learning_rate": 3.3957573056116164e-06, "loss": 0.06182575225830078, "step": 5894 }, { "epoch": 0.7969581749049429, "grad_norm": 0.26358070969581604, "learning_rate": 3.391421511015558e-06, "loss": 0.04130411148071289, "step": 5895 }, { "epoch": 0.7970933671313899, "grad_norm": 0.22798016667366028, "learning_rate": 3.38708813338744e-06, "loss": 0.041803598403930664, "step": 5896 }, { "epoch": 0.7972285593578369, "grad_norm": 0.49194031953811646, "learning_rate": 3.382757173629506e-06, "loss": 0.06548714637756348, "step": 5897 }, { "epoch": 0.7973637515842839, "grad_norm": 0.2816850543022156, "learning_rate": 3.378428632643478e-06, "loss": 0.07463455200195312, "step": 5898 }, { "epoch": 0.7974989438107308, "grad_norm": 0.17052793502807617, "learning_rate": 3.3741025113305825e-06, "loss": 0.04622983932495117, "step": 5899 }, { "epoch": 0.7976341360371778, "grad_norm": 0.23189640045166016, "learning_rate": 3.369778810591541e-06, "loss": 0.061908721923828125, "step": 5900 }, { "epoch": 0.7977693282636248, "grad_norm": 0.2454507201910019, "learning_rate": 3.3654575313265664e-06, "loss": 0.05745649337768555, "step": 5901 }, { "epoch": 0.7979045204900718, "grad_norm": 0.4106588363647461, "learning_rate": 3.361138674435386e-06, "loss": 0.05560147762298584, "step": 5902 }, { "epoch": 0.7980397127165187, "grad_norm": 0.29690682888031006, "learning_rate": 3.35682224081719e-06, "loss": 0.06985664367675781, "step": 5903 }, { "epoch": 0.7981749049429658, "grad_norm": 0.2685743272304535, "learning_rate": 3.352508231370699e-06, "loss": 0.059015750885009766, "step": 5904 }, { "epoch": 0.7983100971694128, "grad_norm": 0.20073962211608887, "learning_rate": 3.3481966469941044e-06, "loss": 0.03816652297973633, "step": 5905 }, { "epoch": 0.7984452893958598, "grad_norm": 0.3108757436275482, "learning_rate": 3.3438874885850984e-06, "loss": 0.07769346237182617, "step": 5906 }, { "epoch": 0.7985804816223068, "grad_norm": 0.2990017533302307, "learning_rate": 3.3395807570408847e-06, "loss": 0.06526756286621094, "step": 5907 }, { "epoch": 0.7987156738487537, "grad_norm": 0.609994649887085, "learning_rate": 3.33527645325813e-06, "loss": 0.07463288307189941, "step": 5908 }, { "epoch": 0.7988508660752007, "grad_norm": 0.456243097782135, "learning_rate": 3.3309745781330247e-06, "loss": 0.06634783744812012, "step": 5909 }, { "epoch": 0.7989860583016477, "grad_norm": 0.24894843995571136, "learning_rate": 3.32667513256124e-06, "loss": 0.056507110595703125, "step": 5910 }, { "epoch": 0.7991212505280947, "grad_norm": 0.20762842893600464, "learning_rate": 3.3223781174379375e-06, "loss": 0.04933905601501465, "step": 5911 }, { "epoch": 0.7992564427545417, "grad_norm": 0.5355972647666931, "learning_rate": 3.3180835336577917e-06, "loss": 0.06595611572265625, "step": 5912 }, { "epoch": 0.7993916349809886, "grad_norm": 0.23957689106464386, "learning_rate": 3.313791382114943e-06, "loss": 0.0684194564819336, "step": 5913 }, { "epoch": 0.7995268272074356, "grad_norm": 0.3925321400165558, "learning_rate": 3.3095016637030505e-06, "loss": 0.09416913986206055, "step": 5914 }, { "epoch": 0.7996620194338826, "grad_norm": 0.23857423663139343, "learning_rate": 3.3052143793152524e-06, "loss": 0.053450584411621094, "step": 5915 }, { "epoch": 0.7997972116603296, "grad_norm": 0.2391202598810196, "learning_rate": 3.3009295298441855e-06, "loss": 0.049214839935302734, "step": 5916 }, { "epoch": 0.7999324038867766, "grad_norm": 0.32320865988731384, "learning_rate": 3.2966471161819767e-06, "loss": 0.05562543869018555, "step": 5917 }, { "epoch": 0.8000675961132235, "grad_norm": 0.29675161838531494, "learning_rate": 3.292367139220246e-06, "loss": 0.057188987731933594, "step": 5918 }, { "epoch": 0.8002027883396705, "grad_norm": 0.18438327312469482, "learning_rate": 3.288089599850112e-06, "loss": 0.03248238563537598, "step": 5919 }, { "epoch": 0.8003379805661175, "grad_norm": 0.36449599266052246, "learning_rate": 3.2838144989621795e-06, "loss": 0.06055879592895508, "step": 5920 }, { "epoch": 0.8004731727925645, "grad_norm": 0.42434123158454895, "learning_rate": 3.2795418374465458e-06, "loss": 0.06690025329589844, "step": 5921 }, { "epoch": 0.8006083650190114, "grad_norm": 0.308083176612854, "learning_rate": 3.275271616192803e-06, "loss": 0.05982637405395508, "step": 5922 }, { "epoch": 0.8007435572454584, "grad_norm": 0.30605998635292053, "learning_rate": 3.2710038360900303e-06, "loss": 0.06424713134765625, "step": 5923 }, { "epoch": 0.8008787494719054, "grad_norm": 0.23421098291873932, "learning_rate": 3.266738498026808e-06, "loss": 0.059783935546875, "step": 5924 }, { "epoch": 0.8010139416983524, "grad_norm": 0.23105604946613312, "learning_rate": 3.2624756028912005e-06, "loss": 0.05173146724700928, "step": 5925 }, { "epoch": 0.8011491339247994, "grad_norm": 0.4146019518375397, "learning_rate": 3.2582151515707655e-06, "loss": 0.076080322265625, "step": 5926 }, { "epoch": 0.8012843261512463, "grad_norm": 0.3450862467288971, "learning_rate": 3.253957144952551e-06, "loss": 0.07610392570495605, "step": 5927 }, { "epoch": 0.8014195183776933, "grad_norm": 0.3583201766014099, "learning_rate": 3.249701583923091e-06, "loss": 0.07472729682922363, "step": 5928 }, { "epoch": 0.8015547106041403, "grad_norm": 0.395272821187973, "learning_rate": 3.2454484693684257e-06, "loss": 0.07549095153808594, "step": 5929 }, { "epoch": 0.8016899028305873, "grad_norm": 0.2994285821914673, "learning_rate": 3.2411978021740727e-06, "loss": 0.04777020215988159, "step": 5930 }, { "epoch": 0.8018250950570343, "grad_norm": 0.25583797693252563, "learning_rate": 3.2369495832250434e-06, "loss": 0.0537114143371582, "step": 5931 }, { "epoch": 0.8019602872834812, "grad_norm": 0.28064626455307007, "learning_rate": 3.2327038134058378e-06, "loss": 0.05535268783569336, "step": 5932 }, { "epoch": 0.8020954795099282, "grad_norm": 0.28295469284057617, "learning_rate": 3.228460493600446e-06, "loss": 0.07952594757080078, "step": 5933 }, { "epoch": 0.8022306717363752, "grad_norm": 0.3977964520454407, "learning_rate": 3.2242196246923554e-06, "loss": 0.0649835467338562, "step": 5934 }, { "epoch": 0.8023658639628222, "grad_norm": 0.37444525957107544, "learning_rate": 3.2199812075645375e-06, "loss": 0.08574485778808594, "step": 5935 }, { "epoch": 0.8025010561892691, "grad_norm": 0.19850070774555206, "learning_rate": 3.2157452430994487e-06, "loss": 0.04955482482910156, "step": 5936 }, { "epoch": 0.8026362484157161, "grad_norm": 0.62302565574646, "learning_rate": 3.2115117321790427e-06, "loss": 0.10436153411865234, "step": 5937 }, { "epoch": 0.8027714406421631, "grad_norm": 0.4690047800540924, "learning_rate": 3.207280675684754e-06, "loss": 0.06291484832763672, "step": 5938 }, { "epoch": 0.8029066328686101, "grad_norm": 0.3041515648365021, "learning_rate": 3.203052074497523e-06, "loss": 0.05617856979370117, "step": 5939 }, { "epoch": 0.8030418250950571, "grad_norm": 0.25223445892333984, "learning_rate": 3.198825929497752e-06, "loss": 0.049494028091430664, "step": 5940 }, { "epoch": 0.803177017321504, "grad_norm": 0.2859956920146942, "learning_rate": 3.194602241565357e-06, "loss": 0.053922057151794434, "step": 5941 }, { "epoch": 0.803312209547951, "grad_norm": 0.5730636715888977, "learning_rate": 3.1903810115797282e-06, "loss": 0.053530335426330566, "step": 5942 }, { "epoch": 0.803447401774398, "grad_norm": 0.24570654332637787, "learning_rate": 3.1861622404197475e-06, "loss": 0.04247868061065674, "step": 5943 }, { "epoch": 0.803582594000845, "grad_norm": 0.22582951188087463, "learning_rate": 3.181945928963794e-06, "loss": 0.05179309844970703, "step": 5944 }, { "epoch": 0.803717786227292, "grad_norm": 0.24250128865242004, "learning_rate": 3.1777320780897124e-06, "loss": 0.05829143524169922, "step": 5945 }, { "epoch": 0.8038529784537389, "grad_norm": 0.2799808979034424, "learning_rate": 3.1735206886748602e-06, "loss": 0.05966770648956299, "step": 5946 }, { "epoch": 0.8039881706801859, "grad_norm": 0.3474852442741394, "learning_rate": 3.1693117615960665e-06, "loss": 0.06831169128417969, "step": 5947 }, { "epoch": 0.8041233629066329, "grad_norm": 0.4207124710083008, "learning_rate": 3.1651052977296537e-06, "loss": 0.09029388427734375, "step": 5948 }, { "epoch": 0.8042585551330799, "grad_norm": 0.2723088264465332, "learning_rate": 3.1609012979514273e-06, "loss": 0.0634164810180664, "step": 5949 }, { "epoch": 0.8043937473595268, "grad_norm": 0.41889747977256775, "learning_rate": 3.156699763136683e-06, "loss": 0.08170890808105469, "step": 5950 }, { "epoch": 0.8045289395859738, "grad_norm": 0.29862862825393677, "learning_rate": 3.152500694160207e-06, "loss": 0.06790554523468018, "step": 5951 }, { "epoch": 0.8046641318124208, "grad_norm": 0.5785182118415833, "learning_rate": 3.148304091896265e-06, "loss": 0.07030558586120605, "step": 5952 }, { "epoch": 0.8047993240388678, "grad_norm": 0.3827217221260071, "learning_rate": 3.144109957218612e-06, "loss": 0.06324422359466553, "step": 5953 }, { "epoch": 0.8049345162653148, "grad_norm": 0.4175521433353424, "learning_rate": 3.1399182910004893e-06, "loss": 0.08026123046875, "step": 5954 }, { "epoch": 0.8050697084917617, "grad_norm": 0.28439274430274963, "learning_rate": 3.1357290941146215e-06, "loss": 0.04622602462768555, "step": 5955 }, { "epoch": 0.8052049007182087, "grad_norm": 0.2630952000617981, "learning_rate": 3.1315423674332265e-06, "loss": 0.06127119064331055, "step": 5956 }, { "epoch": 0.8053400929446557, "grad_norm": 0.2002151608467102, "learning_rate": 3.127358111828002e-06, "loss": 0.05383944511413574, "step": 5957 }, { "epoch": 0.8054752851711027, "grad_norm": 0.1441686749458313, "learning_rate": 3.123176328170131e-06, "loss": 0.04285383224487305, "step": 5958 }, { "epoch": 0.8056104773975497, "grad_norm": 0.31765300035476685, "learning_rate": 3.1189970173302816e-06, "loss": 0.07411348819732666, "step": 5959 }, { "epoch": 0.8057456696239966, "grad_norm": 0.2846148908138275, "learning_rate": 3.1148201801786085e-06, "loss": 0.07054901123046875, "step": 5960 }, { "epoch": 0.8058808618504436, "grad_norm": 0.24253971874713898, "learning_rate": 3.1106458175847572e-06, "loss": 0.045412540435791016, "step": 5961 }, { "epoch": 0.8060160540768906, "grad_norm": 0.11731071770191193, "learning_rate": 3.106473930417848e-06, "loss": 0.03174871206283569, "step": 5962 }, { "epoch": 0.8061512463033376, "grad_norm": 0.29594147205352783, "learning_rate": 3.1023045195464903e-06, "loss": 0.051989197731018066, "step": 5963 }, { "epoch": 0.8062864385297845, "grad_norm": 0.22837471961975098, "learning_rate": 3.098137585838779e-06, "loss": 0.05999565124511719, "step": 5964 }, { "epoch": 0.8064216307562315, "grad_norm": 0.13394711911678314, "learning_rate": 3.093973130162286e-06, "loss": 0.031126737594604492, "step": 5965 }, { "epoch": 0.8065568229826785, "grad_norm": 0.28221338987350464, "learning_rate": 3.089811153384083e-06, "loss": 0.04995131492614746, "step": 5966 }, { "epoch": 0.8066920152091255, "grad_norm": 0.2014472335577011, "learning_rate": 3.08565165637071e-06, "loss": 0.05839824676513672, "step": 5967 }, { "epoch": 0.8068272074355725, "grad_norm": 0.18108515441417694, "learning_rate": 3.081494639988196e-06, "loss": 0.045146942138671875, "step": 5968 }, { "epoch": 0.8069623996620194, "grad_norm": 0.22258733212947845, "learning_rate": 3.077340105102057e-06, "loss": 0.0531005859375, "step": 5969 }, { "epoch": 0.8070975918884664, "grad_norm": 0.2183443009853363, "learning_rate": 3.0731880525772817e-06, "loss": 0.05909299850463867, "step": 5970 }, { "epoch": 0.8072327841149134, "grad_norm": 0.23940695822238922, "learning_rate": 3.069038483278364e-06, "loss": 0.056195735931396484, "step": 5971 }, { "epoch": 0.8073679763413604, "grad_norm": 0.3401727080345154, "learning_rate": 3.0648913980692505e-06, "loss": 0.0833749771118164, "step": 5972 }, { "epoch": 0.8075031685678074, "grad_norm": 0.30367034673690796, "learning_rate": 3.0607467978133985e-06, "loss": 0.05263376235961914, "step": 5973 }, { "epoch": 0.8076383607942543, "grad_norm": 0.33931174874305725, "learning_rate": 3.0566046833737294e-06, "loss": 0.07328853011131287, "step": 5974 }, { "epoch": 0.8077735530207013, "grad_norm": 0.3551097810268402, "learning_rate": 3.0524650556126517e-06, "loss": 0.07681083679199219, "step": 5975 }, { "epoch": 0.8079087452471483, "grad_norm": 0.13805121183395386, "learning_rate": 3.048327915392069e-06, "loss": 0.03244829177856445, "step": 5976 }, { "epoch": 0.8080439374735953, "grad_norm": 0.33022773265838623, "learning_rate": 3.044193263573341e-06, "loss": 0.0790395736694336, "step": 5977 }, { "epoch": 0.8081791297000422, "grad_norm": 0.15861350297927856, "learning_rate": 3.0400611010173355e-06, "loss": 0.04596138000488281, "step": 5978 }, { "epoch": 0.8083143219264892, "grad_norm": 0.32104912400245667, "learning_rate": 3.0359314285843863e-06, "loss": 0.07611799240112305, "step": 5979 }, { "epoch": 0.8084495141529362, "grad_norm": 0.3161945641040802, "learning_rate": 3.0318042471343104e-06, "loss": 0.07168233394622803, "step": 5980 }, { "epoch": 0.8085847063793832, "grad_norm": 0.3164866268634796, "learning_rate": 3.027679557526422e-06, "loss": 0.07014846801757812, "step": 5981 }, { "epoch": 0.8087198986058302, "grad_norm": 0.4257417917251587, "learning_rate": 3.0235573606194844e-06, "loss": 0.0629739761352539, "step": 5982 }, { "epoch": 0.8088550908322771, "grad_norm": 0.5392917990684509, "learning_rate": 3.0194376572717743e-06, "loss": 0.09183883666992188, "step": 5983 }, { "epoch": 0.8089902830587241, "grad_norm": 0.20555761456489563, "learning_rate": 3.0153204483410318e-06, "loss": 0.05644631385803223, "step": 5984 }, { "epoch": 0.8091254752851711, "grad_norm": 0.4321295917034149, "learning_rate": 3.0112057346844834e-06, "loss": 0.08102297782897949, "step": 5985 }, { "epoch": 0.8092606675116181, "grad_norm": 0.2864816188812256, "learning_rate": 3.007093517158832e-06, "loss": 0.07088708877563477, "step": 5986 }, { "epoch": 0.809395859738065, "grad_norm": 0.27273401618003845, "learning_rate": 3.002983796620261e-06, "loss": 0.053546905517578125, "step": 5987 }, { "epoch": 0.809531051964512, "grad_norm": 0.5449060201644897, "learning_rate": 2.9988765739244427e-06, "loss": 0.0780942440032959, "step": 5988 }, { "epoch": 0.809666244190959, "grad_norm": 0.3616132438182831, "learning_rate": 2.9947718499265197e-06, "loss": 0.08637809753417969, "step": 5989 }, { "epoch": 0.809801436417406, "grad_norm": 0.17864102125167847, "learning_rate": 2.9906696254811184e-06, "loss": 0.046005964279174805, "step": 5990 }, { "epoch": 0.809936628643853, "grad_norm": 0.384751558303833, "learning_rate": 2.9865699014423404e-06, "loss": 0.049784183502197266, "step": 5991 }, { "epoch": 0.8100718208702999, "grad_norm": 0.22934868931770325, "learning_rate": 2.9824726786637698e-06, "loss": 0.050343096256256104, "step": 5992 }, { "epoch": 0.8102070130967469, "grad_norm": 0.48752734065055847, "learning_rate": 2.978377957998477e-06, "loss": 0.09496164321899414, "step": 5993 }, { "epoch": 0.8103422053231939, "grad_norm": 0.27358347177505493, "learning_rate": 2.974285740299001e-06, "loss": 0.0584406852722168, "step": 5994 }, { "epoch": 0.8104773975496409, "grad_norm": 0.3991025388240814, "learning_rate": 2.9701960264173612e-06, "loss": 0.0663614273071289, "step": 5995 }, { "epoch": 0.8106125897760879, "grad_norm": 0.33150970935821533, "learning_rate": 2.96610881720506e-06, "loss": 0.05559682846069336, "step": 5996 }, { "epoch": 0.8107477820025348, "grad_norm": 0.31655383110046387, "learning_rate": 2.9620241135130715e-06, "loss": 0.0678856372833252, "step": 5997 }, { "epoch": 0.8108829742289818, "grad_norm": 0.6779840588569641, "learning_rate": 2.9579419161918607e-06, "loss": 0.08355236053466797, "step": 5998 }, { "epoch": 0.8110181664554288, "grad_norm": 0.19206367433071136, "learning_rate": 2.9538622260913595e-06, "loss": 0.04270792007446289, "step": 5999 }, { "epoch": 0.8111533586818758, "grad_norm": 0.3747929036617279, "learning_rate": 2.9497850440609814e-06, "loss": 0.06484103202819824, "step": 6000 }, { "epoch": 0.8112885509083227, "grad_norm": 0.2159186452627182, "learning_rate": 2.945710370949616e-06, "loss": 0.05137217044830322, "step": 6001 }, { "epoch": 0.8114237431347697, "grad_norm": 0.19466176629066467, "learning_rate": 2.941638207605629e-06, "loss": 0.03656339645385742, "step": 6002 }, { "epoch": 0.8115589353612167, "grad_norm": 0.36241474747657776, "learning_rate": 2.937568554876873e-06, "loss": 0.08582782745361328, "step": 6003 }, { "epoch": 0.8116941275876637, "grad_norm": 0.2982935607433319, "learning_rate": 2.9335014136106704e-06, "loss": 0.06539297103881836, "step": 6004 }, { "epoch": 0.8118293198141107, "grad_norm": 0.33213871717453003, "learning_rate": 2.929436784653818e-06, "loss": 0.059813737869262695, "step": 6005 }, { "epoch": 0.8119645120405576, "grad_norm": 0.3062089681625366, "learning_rate": 2.925374668852597e-06, "loss": 0.06751632690429688, "step": 6006 }, { "epoch": 0.8120997042670046, "grad_norm": 0.19817759096622467, "learning_rate": 2.921315067052754e-06, "loss": 0.05138111114501953, "step": 6007 }, { "epoch": 0.8122348964934516, "grad_norm": 0.2660796046257019, "learning_rate": 2.917257980099535e-06, "loss": 0.057718753814697266, "step": 6008 }, { "epoch": 0.8123700887198986, "grad_norm": 0.17224065959453583, "learning_rate": 2.913203408837629e-06, "loss": 0.05161905288696289, "step": 6009 }, { "epoch": 0.8125052809463456, "grad_norm": 0.1696305274963379, "learning_rate": 2.909151354111232e-06, "loss": 0.04230022430419922, "step": 6010 }, { "epoch": 0.8126404731727925, "grad_norm": 0.3935920298099518, "learning_rate": 2.905101816763998e-06, "loss": 0.07367944717407227, "step": 6011 }, { "epoch": 0.8127756653992395, "grad_norm": 0.1789814680814743, "learning_rate": 2.9010547976390617e-06, "loss": 0.04308140277862549, "step": 6012 }, { "epoch": 0.8129108576256865, "grad_norm": 0.2439608871936798, "learning_rate": 2.897010297579042e-06, "loss": 0.06258678436279297, "step": 6013 }, { "epoch": 0.8130460498521335, "grad_norm": 0.25089651346206665, "learning_rate": 2.8929683174260133e-06, "loss": 0.05801820755004883, "step": 6014 }, { "epoch": 0.8131812420785804, "grad_norm": 0.2685256600379944, "learning_rate": 2.8889288580215467e-06, "loss": 0.035607337951660156, "step": 6015 }, { "epoch": 0.8133164343050274, "grad_norm": 0.28391289710998535, "learning_rate": 2.8848919202066752e-06, "loss": 0.07107210159301758, "step": 6016 }, { "epoch": 0.8134516265314744, "grad_norm": 0.7301496863365173, "learning_rate": 2.8808575048219123e-06, "loss": 0.12547564506530762, "step": 6017 }, { "epoch": 0.8135868187579214, "grad_norm": 0.2270822525024414, "learning_rate": 2.8768256127072436e-06, "loss": 0.04033780097961426, "step": 6018 }, { "epoch": 0.8137220109843684, "grad_norm": 0.2530202269554138, "learning_rate": 2.872796244702128e-06, "loss": 0.052034854888916016, "step": 6019 }, { "epoch": 0.8138572032108153, "grad_norm": 0.45868924260139465, "learning_rate": 2.8687694016455075e-06, "loss": 0.09524834156036377, "step": 6020 }, { "epoch": 0.8139923954372623, "grad_norm": 0.2891275882720947, "learning_rate": 2.86474508437579e-06, "loss": 0.05807161331176758, "step": 6021 }, { "epoch": 0.8141275876637093, "grad_norm": 0.19792482256889343, "learning_rate": 2.8607232937308587e-06, "loss": 0.04277324676513672, "step": 6022 }, { "epoch": 0.8142627798901563, "grad_norm": 0.5736647844314575, "learning_rate": 2.856704030548072e-06, "loss": 0.0836835503578186, "step": 6023 }, { "epoch": 0.8143979721166033, "grad_norm": 0.1461063027381897, "learning_rate": 2.8526872956642568e-06, "loss": 0.03426766395568848, "step": 6024 }, { "epoch": 0.8145331643430502, "grad_norm": 0.2663962244987488, "learning_rate": 2.84867308991573e-06, "loss": 0.04257082939147949, "step": 6025 }, { "epoch": 0.8146683565694972, "grad_norm": 0.34636518359184265, "learning_rate": 2.8446614141382638e-06, "loss": 0.08236312866210938, "step": 6026 }, { "epoch": 0.8148035487959442, "grad_norm": 0.3127804100513458, "learning_rate": 2.8406522691671104e-06, "loss": 0.068572998046875, "step": 6027 }, { "epoch": 0.8149387410223912, "grad_norm": 0.2658383548259735, "learning_rate": 2.8366456558369975e-06, "loss": 0.061776161193847656, "step": 6028 }, { "epoch": 0.8150739332488381, "grad_norm": 0.1475418657064438, "learning_rate": 2.8326415749821186e-06, "loss": 0.044802188873291016, "step": 6029 }, { "epoch": 0.8152091254752851, "grad_norm": 0.32792845368385315, "learning_rate": 2.828640027436151e-06, "loss": 0.06208324432373047, "step": 6030 }, { "epoch": 0.8153443177017321, "grad_norm": 0.30077195167541504, "learning_rate": 2.824641014032235e-06, "loss": 0.056476593017578125, "step": 6031 }, { "epoch": 0.8154795099281791, "grad_norm": 0.21988511085510254, "learning_rate": 2.820644535602987e-06, "loss": 0.05349874496459961, "step": 6032 }, { "epoch": 0.8156147021546261, "grad_norm": 0.28167712688446045, "learning_rate": 2.8166505929804953e-06, "loss": 0.07236099243164062, "step": 6033 }, { "epoch": 0.815749894381073, "grad_norm": 0.2284173220396042, "learning_rate": 2.8126591869963163e-06, "loss": 0.03714609146118164, "step": 6034 }, { "epoch": 0.81588508660752, "grad_norm": 0.3360145688056946, "learning_rate": 2.8086703184814887e-06, "loss": 0.07312464714050293, "step": 6035 }, { "epoch": 0.816020278833967, "grad_norm": 0.2188270539045334, "learning_rate": 2.8046839882665134e-06, "loss": 0.050281524658203125, "step": 6036 }, { "epoch": 0.816155471060414, "grad_norm": 0.4058283567428589, "learning_rate": 2.800700197181364e-06, "loss": 0.08154439926147461, "step": 6037 }, { "epoch": 0.816290663286861, "grad_norm": 0.40668433904647827, "learning_rate": 2.7967189460554876e-06, "loss": 0.06850337982177734, "step": 6038 }, { "epoch": 0.8164258555133079, "grad_norm": 0.46174219250679016, "learning_rate": 2.792740235717801e-06, "loss": 0.08393049240112305, "step": 6039 }, { "epoch": 0.816561047739755, "grad_norm": 0.2694415748119354, "learning_rate": 2.7887640669967e-06, "loss": 0.06118583679199219, "step": 6040 }, { "epoch": 0.816696239966202, "grad_norm": 0.28720611333847046, "learning_rate": 2.7847904407200327e-06, "loss": 0.07139301300048828, "step": 6041 }, { "epoch": 0.816831432192649, "grad_norm": 0.3160383105278015, "learning_rate": 2.7808193577151363e-06, "loss": 0.061397552490234375, "step": 6042 }, { "epoch": 0.816966624419096, "grad_norm": 0.2736820876598358, "learning_rate": 2.776850818808812e-06, "loss": 0.06472396850585938, "step": 6043 }, { "epoch": 0.8171018166455429, "grad_norm": 0.2239132821559906, "learning_rate": 2.772884824827325e-06, "loss": 0.0593411922454834, "step": 6044 }, { "epoch": 0.8172370088719899, "grad_norm": 0.21632635593414307, "learning_rate": 2.768921376596429e-06, "loss": 0.04656839370727539, "step": 6045 }, { "epoch": 0.8173722010984369, "grad_norm": 0.3758517801761627, "learning_rate": 2.7649604749413176e-06, "loss": 0.07829809188842773, "step": 6046 }, { "epoch": 0.8175073933248839, "grad_norm": 0.3210703432559967, "learning_rate": 2.7610021206866837e-06, "loss": 0.06727910041809082, "step": 6047 }, { "epoch": 0.8176425855513308, "grad_norm": 0.3311633765697479, "learning_rate": 2.757046314656676e-06, "loss": 0.08884382247924805, "step": 6048 }, { "epoch": 0.8177777777777778, "grad_norm": 0.5232706665992737, "learning_rate": 2.753093057674909e-06, "loss": 0.08345842361450195, "step": 6049 }, { "epoch": 0.8179129700042248, "grad_norm": 0.30730652809143066, "learning_rate": 2.749142350564483e-06, "loss": 0.061348915100097656, "step": 6050 }, { "epoch": 0.8180481622306718, "grad_norm": 0.3373664617538452, "learning_rate": 2.7451941941479414e-06, "loss": 0.05693459510803223, "step": 6051 }, { "epoch": 0.8181833544571188, "grad_norm": 0.5161705017089844, "learning_rate": 2.741248589247323e-06, "loss": 0.07698631286621094, "step": 6052 }, { "epoch": 0.8183185466835657, "grad_norm": 0.2258588671684265, "learning_rate": 2.73730553668412e-06, "loss": 0.04509568214416504, "step": 6053 }, { "epoch": 0.8184537389100127, "grad_norm": 0.3730873763561249, "learning_rate": 2.7333650372792978e-06, "loss": 0.0525355339050293, "step": 6054 }, { "epoch": 0.8185889311364597, "grad_norm": 0.17569658160209656, "learning_rate": 2.7294270918532876e-06, "loss": 0.042198896408081055, "step": 6055 }, { "epoch": 0.8187241233629067, "grad_norm": 0.20935864746570587, "learning_rate": 2.7254917012259882e-06, "loss": 0.03873276710510254, "step": 6056 }, { "epoch": 0.8188593155893537, "grad_norm": 0.28307756781578064, "learning_rate": 2.721558866216776e-06, "loss": 0.050560712814331055, "step": 6057 }, { "epoch": 0.8189945078158006, "grad_norm": 0.5794156789779663, "learning_rate": 2.7176285876444846e-06, "loss": 0.11113327741622925, "step": 6058 }, { "epoch": 0.8191297000422476, "grad_norm": 0.313987672328949, "learning_rate": 2.713700866327417e-06, "loss": 0.0690147876739502, "step": 6059 }, { "epoch": 0.8192648922686946, "grad_norm": 0.21553422510623932, "learning_rate": 2.7097757030833497e-06, "loss": 0.043886661529541016, "step": 6060 }, { "epoch": 0.8194000844951416, "grad_norm": 0.3039560317993164, "learning_rate": 2.705853098729517e-06, "loss": 0.06674432754516602, "step": 6061 }, { "epoch": 0.8195352767215885, "grad_norm": 0.21206343173980713, "learning_rate": 2.7019330540826325e-06, "loss": 0.038247108459472656, "step": 6062 }, { "epoch": 0.8196704689480355, "grad_norm": 0.3314515948295593, "learning_rate": 2.6980155699588666e-06, "loss": 0.07146835327148438, "step": 6063 }, { "epoch": 0.8198056611744825, "grad_norm": 0.3042283058166504, "learning_rate": 2.6941006471738633e-06, "loss": 0.07489490509033203, "step": 6064 }, { "epoch": 0.8199408534009295, "grad_norm": 0.22345556318759918, "learning_rate": 2.690188286542726e-06, "loss": 0.04886627197265625, "step": 6065 }, { "epoch": 0.8200760456273765, "grad_norm": 0.22376970946788788, "learning_rate": 2.686278488880029e-06, "loss": 0.04528689384460449, "step": 6066 }, { "epoch": 0.8202112378538234, "grad_norm": 0.33989548683166504, "learning_rate": 2.6823712549998187e-06, "loss": 0.0739513635635376, "step": 6067 }, { "epoch": 0.8203464300802704, "grad_norm": 0.5353378057479858, "learning_rate": 2.678466585715599e-06, "loss": 0.06407666206359863, "step": 6068 }, { "epoch": 0.8204816223067174, "grad_norm": 0.4994027316570282, "learning_rate": 2.6745644818403426e-06, "loss": 0.09122371673583984, "step": 6069 }, { "epoch": 0.8206168145331644, "grad_norm": 0.5354039072990417, "learning_rate": 2.6706649441864883e-06, "loss": 0.08888816833496094, "step": 6070 }, { "epoch": 0.8207520067596114, "grad_norm": 0.23649027943611145, "learning_rate": 2.666767973565937e-06, "loss": 0.05955362319946289, "step": 6071 }, { "epoch": 0.8208871989860583, "grad_norm": 0.4597260355949402, "learning_rate": 2.6628735707900653e-06, "loss": 0.07512426376342773, "step": 6072 }, { "epoch": 0.8210223912125053, "grad_norm": 0.39809486269950867, "learning_rate": 2.658981736669707e-06, "loss": 0.10382843017578125, "step": 6073 }, { "epoch": 0.8211575834389523, "grad_norm": 0.3359104096889496, "learning_rate": 2.655092472015161e-06, "loss": 0.0579218864440918, "step": 6074 }, { "epoch": 0.8212927756653993, "grad_norm": 0.4085558354854584, "learning_rate": 2.6512057776361935e-06, "loss": 0.0943596363067627, "step": 6075 }, { "epoch": 0.8214279678918462, "grad_norm": 0.4857843220233917, "learning_rate": 2.64732165434203e-06, "loss": 0.0741429328918457, "step": 6076 }, { "epoch": 0.8215631601182932, "grad_norm": 0.2855377495288849, "learning_rate": 2.6434401029413792e-06, "loss": 0.06378507614135742, "step": 6077 }, { "epoch": 0.8216983523447402, "grad_norm": 0.5213587880134583, "learning_rate": 2.639561124242385e-06, "loss": 0.08265399932861328, "step": 6078 }, { "epoch": 0.8218335445711872, "grad_norm": 0.28476154804229736, "learning_rate": 2.635684719052682e-06, "loss": 0.04998493194580078, "step": 6079 }, { "epoch": 0.8219687367976342, "grad_norm": 0.2014065831899643, "learning_rate": 2.631810888179355e-06, "loss": 0.04931068420410156, "step": 6080 }, { "epoch": 0.8221039290240811, "grad_norm": 0.2453290820121765, "learning_rate": 2.627939632428952e-06, "loss": 0.04505336284637451, "step": 6081 }, { "epoch": 0.8222391212505281, "grad_norm": 0.360598623752594, "learning_rate": 2.624070952607502e-06, "loss": 0.0605320930480957, "step": 6082 }, { "epoch": 0.8223743134769751, "grad_norm": 0.26906445622444153, "learning_rate": 2.620204849520468e-06, "loss": 0.07411766052246094, "step": 6083 }, { "epoch": 0.8225095057034221, "grad_norm": 0.29481276869773865, "learning_rate": 2.616341323972806e-06, "loss": 0.0637279748916626, "step": 6084 }, { "epoch": 0.822644697929869, "grad_norm": 0.25700411200523376, "learning_rate": 2.612480376768917e-06, "loss": 0.06525135040283203, "step": 6085 }, { "epoch": 0.822779890156316, "grad_norm": 0.14568747580051422, "learning_rate": 2.608622008712672e-06, "loss": 0.02857351303100586, "step": 6086 }, { "epoch": 0.822915082382763, "grad_norm": 0.4644869565963745, "learning_rate": 2.6047662206074034e-06, "loss": 0.08104497194290161, "step": 6087 }, { "epoch": 0.82305027460921, "grad_norm": 0.28576111793518066, "learning_rate": 2.600913013255904e-06, "loss": 0.05170249938964844, "step": 6088 }, { "epoch": 0.823185466835657, "grad_norm": 0.23014803230762482, "learning_rate": 2.59706238746044e-06, "loss": 0.051761627197265625, "step": 6089 }, { "epoch": 0.8233206590621039, "grad_norm": 0.4032961130142212, "learning_rate": 2.593214344022725e-06, "loss": 0.09865593910217285, "step": 6090 }, { "epoch": 0.8234558512885509, "grad_norm": 0.5228615999221802, "learning_rate": 2.5893688837439474e-06, "loss": 0.09393692016601562, "step": 6091 }, { "epoch": 0.8235910435149979, "grad_norm": 0.3720521628856659, "learning_rate": 2.5855260074247473e-06, "loss": 0.045456886291503906, "step": 6092 }, { "epoch": 0.8237262357414449, "grad_norm": 0.27444711327552795, "learning_rate": 2.581685715865232e-06, "loss": 0.07110810279846191, "step": 6093 }, { "epoch": 0.8238614279678919, "grad_norm": 0.38014987111091614, "learning_rate": 2.5778480098649766e-06, "loss": 0.10168582201004028, "step": 6094 }, { "epoch": 0.8239966201943388, "grad_norm": 0.2758583128452301, "learning_rate": 2.5740128902230087e-06, "loss": 0.07007026672363281, "step": 6095 }, { "epoch": 0.8241318124207858, "grad_norm": 0.22813992202281952, "learning_rate": 2.5701803577378214e-06, "loss": 0.05235719680786133, "step": 6096 }, { "epoch": 0.8242670046472328, "grad_norm": 0.42385274171829224, "learning_rate": 2.566350413207366e-06, "loss": 0.08513689041137695, "step": 6097 }, { "epoch": 0.8244021968736798, "grad_norm": 0.3720324635505676, "learning_rate": 2.5625230574290554e-06, "loss": 0.07318711280822754, "step": 6098 }, { "epoch": 0.8245373891001267, "grad_norm": 0.33294564485549927, "learning_rate": 2.558698291199773e-06, "loss": 0.056952476501464844, "step": 6099 }, { "epoch": 0.8246725813265737, "grad_norm": 0.3401033580303192, "learning_rate": 2.5548761153158524e-06, "loss": 0.06594085693359375, "step": 6100 }, { "epoch": 0.8248077735530207, "grad_norm": 0.255386620759964, "learning_rate": 2.55105653057309e-06, "loss": 0.05640685558319092, "step": 6101 }, { "epoch": 0.8249429657794677, "grad_norm": 0.41073131561279297, "learning_rate": 2.547239537766743e-06, "loss": 0.07777738571166992, "step": 6102 }, { "epoch": 0.8250781580059147, "grad_norm": 0.2563095986843109, "learning_rate": 2.543425137691526e-06, "loss": 0.053259849548339844, "step": 6103 }, { "epoch": 0.8252133502323616, "grad_norm": 0.40238744020462036, "learning_rate": 2.5396133311416264e-06, "loss": 0.07820796966552734, "step": 6104 }, { "epoch": 0.8253485424588086, "grad_norm": 0.4039735794067383, "learning_rate": 2.5358041189106784e-06, "loss": 0.08084726333618164, "step": 6105 }, { "epoch": 0.8254837346852556, "grad_norm": 0.28965792059898376, "learning_rate": 2.531997501791779e-06, "loss": 0.04353666305541992, "step": 6106 }, { "epoch": 0.8256189269117026, "grad_norm": 0.4189249575138092, "learning_rate": 2.528193480577489e-06, "loss": 0.04222249984741211, "step": 6107 }, { "epoch": 0.8257541191381496, "grad_norm": 0.22650189697742462, "learning_rate": 2.5243920560598186e-06, "loss": 0.04802274703979492, "step": 6108 }, { "epoch": 0.8258893113645965, "grad_norm": 0.3336423933506012, "learning_rate": 2.5205932290302598e-06, "loss": 0.07125353813171387, "step": 6109 }, { "epoch": 0.8260245035910435, "grad_norm": 0.18595555424690247, "learning_rate": 2.516797000279729e-06, "loss": 0.03815329074859619, "step": 6110 }, { "epoch": 0.8261596958174905, "grad_norm": 0.3717397451400757, "learning_rate": 2.513003370598637e-06, "loss": 0.08181643486022949, "step": 6111 }, { "epoch": 0.8262948880439375, "grad_norm": 0.3404862880706787, "learning_rate": 2.509212340776832e-06, "loss": 0.0374302864074707, "step": 6112 }, { "epoch": 0.8264300802703844, "grad_norm": 0.28415605425834656, "learning_rate": 2.505423911603622e-06, "loss": 0.05466151237487793, "step": 6113 }, { "epoch": 0.8265652724968314, "grad_norm": 0.19801393151283264, "learning_rate": 2.501638083867789e-06, "loss": 0.047365665435791016, "step": 6114 }, { "epoch": 0.8267004647232784, "grad_norm": 0.24348372220993042, "learning_rate": 2.497854858357552e-06, "loss": 0.06348276138305664, "step": 6115 }, { "epoch": 0.8268356569497254, "grad_norm": 0.2263336181640625, "learning_rate": 2.494074235860604e-06, "loss": 0.055605411529541016, "step": 6116 }, { "epoch": 0.8269708491761724, "grad_norm": 0.192909374833107, "learning_rate": 2.4902962171640913e-06, "loss": 0.04351067543029785, "step": 6117 }, { "epoch": 0.8271060414026193, "grad_norm": 0.3105994462966919, "learning_rate": 2.4865208030546167e-06, "loss": 0.06407403945922852, "step": 6118 }, { "epoch": 0.8272412336290663, "grad_norm": 0.28907307982444763, "learning_rate": 2.482747994318239e-06, "loss": 0.04296064376831055, "step": 6119 }, { "epoch": 0.8273764258555133, "grad_norm": 0.17278222739696503, "learning_rate": 2.478977791740477e-06, "loss": 0.03933072090148926, "step": 6120 }, { "epoch": 0.8275116180819603, "grad_norm": 0.4144207239151001, "learning_rate": 2.475210196106313e-06, "loss": 0.08549785614013672, "step": 6121 }, { "epoch": 0.8276468103084073, "grad_norm": 0.26255449652671814, "learning_rate": 2.4714452082001753e-06, "loss": 0.05323374271392822, "step": 6122 }, { "epoch": 0.8277820025348542, "grad_norm": 0.36885082721710205, "learning_rate": 2.467682828805956e-06, "loss": 0.07731294631958008, "step": 6123 }, { "epoch": 0.8279171947613012, "grad_norm": 0.31515181064605713, "learning_rate": 2.4639230587070017e-06, "loss": 0.07038754224777222, "step": 6124 }, { "epoch": 0.8280523869877482, "grad_norm": 0.3263688385486603, "learning_rate": 2.460165898686114e-06, "loss": 0.08073592185974121, "step": 6125 }, { "epoch": 0.8281875792141952, "grad_norm": 0.3848995566368103, "learning_rate": 2.4564113495255597e-06, "loss": 0.0795893669128418, "step": 6126 }, { "epoch": 0.8283227714406421, "grad_norm": 0.28529009222984314, "learning_rate": 2.4526594120070545e-06, "loss": 0.06337499618530273, "step": 6127 }, { "epoch": 0.8284579636670891, "grad_norm": 0.322240948677063, "learning_rate": 2.4489100869117686e-06, "loss": 0.06416690349578857, "step": 6128 }, { "epoch": 0.8285931558935361, "grad_norm": 0.4755936563014984, "learning_rate": 2.4451633750203344e-06, "loss": 0.09140324592590332, "step": 6129 }, { "epoch": 0.8287283481199831, "grad_norm": 0.29732969403266907, "learning_rate": 2.441419277112831e-06, "loss": 0.0648496150970459, "step": 6130 }, { "epoch": 0.8288635403464301, "grad_norm": 0.23001553118228912, "learning_rate": 2.4376777939688107e-06, "loss": 0.06379508972167969, "step": 6131 }, { "epoch": 0.828998732572877, "grad_norm": 0.31154295802116394, "learning_rate": 2.4339389263672625e-06, "loss": 0.054573774337768555, "step": 6132 }, { "epoch": 0.829133924799324, "grad_norm": 0.19406574964523315, "learning_rate": 2.4302026750866406e-06, "loss": 0.03343939781188965, "step": 6133 }, { "epoch": 0.829269117025771, "grad_norm": 0.2403125911951065, "learning_rate": 2.4264690409048517e-06, "loss": 0.04829525947570801, "step": 6134 }, { "epoch": 0.829404309252218, "grad_norm": 0.34234941005706787, "learning_rate": 2.4227380245992555e-06, "loss": 0.06604862213134766, "step": 6135 }, { "epoch": 0.829539501478665, "grad_norm": 0.22851765155792236, "learning_rate": 2.4190096269466767e-06, "loss": 0.0566706657409668, "step": 6136 }, { "epoch": 0.8296746937051119, "grad_norm": 0.3730808198451996, "learning_rate": 2.415283848723383e-06, "loss": 0.06548690795898438, "step": 6137 }, { "epoch": 0.8298098859315589, "grad_norm": 0.28897351026535034, "learning_rate": 2.411560690705101e-06, "loss": 0.07234847545623779, "step": 6138 }, { "epoch": 0.8299450781580059, "grad_norm": 0.3524039685726166, "learning_rate": 2.4078401536670146e-06, "loss": 0.07749509811401367, "step": 6139 }, { "epoch": 0.8300802703844529, "grad_norm": 0.2997666597366333, "learning_rate": 2.4041222383837538e-06, "loss": 0.06832456588745117, "step": 6140 }, { "epoch": 0.8302154626108998, "grad_norm": 0.3855161666870117, "learning_rate": 2.400406945629418e-06, "loss": 0.06428265571594238, "step": 6141 }, { "epoch": 0.8303506548373468, "grad_norm": 0.159975066781044, "learning_rate": 2.3966942761775396e-06, "loss": 0.03423929214477539, "step": 6142 }, { "epoch": 0.8304858470637938, "grad_norm": 0.1802024245262146, "learning_rate": 2.3929842308011263e-06, "loss": 0.03900432586669922, "step": 6143 }, { "epoch": 0.8306210392902408, "grad_norm": 0.3452477753162384, "learning_rate": 2.3892768102726236e-06, "loss": 0.07439923286437988, "step": 6144 }, { "epoch": 0.8307562315166878, "grad_norm": 0.4625161588191986, "learning_rate": 2.3855720153639344e-06, "loss": 0.07840752601623535, "step": 6145 }, { "epoch": 0.8308914237431347, "grad_norm": 0.35174861550331116, "learning_rate": 2.381869846846428e-06, "loss": 0.06035876274108887, "step": 6146 }, { "epoch": 0.8310266159695817, "grad_norm": 0.36810293793678284, "learning_rate": 2.3781703054908993e-06, "loss": 0.05990767478942871, "step": 6147 }, { "epoch": 0.8311618081960287, "grad_norm": 0.46127569675445557, "learning_rate": 2.374473392067624e-06, "loss": 0.0831599235534668, "step": 6148 }, { "epoch": 0.8312970004224757, "grad_norm": 0.2868509590625763, "learning_rate": 2.370779107346317e-06, "loss": 0.047698140144348145, "step": 6149 }, { "epoch": 0.8314321926489227, "grad_norm": 0.20161646604537964, "learning_rate": 2.3670874520961437e-06, "loss": 0.04446291923522949, "step": 6150 }, { "epoch": 0.8315673848753696, "grad_norm": 0.19508378207683563, "learning_rate": 2.3633984270857367e-06, "loss": 0.043001532554626465, "step": 6151 }, { "epoch": 0.8317025771018166, "grad_norm": 0.4475431740283966, "learning_rate": 2.359712033083156e-06, "loss": 0.061325669288635254, "step": 6152 }, { "epoch": 0.8318377693282636, "grad_norm": 0.21206475794315338, "learning_rate": 2.35602827085594e-06, "loss": 0.050330162048339844, "step": 6153 }, { "epoch": 0.8319729615547106, "grad_norm": 0.5338788628578186, "learning_rate": 2.3523471411710644e-06, "loss": 0.08822345733642578, "step": 6154 }, { "epoch": 0.8321081537811575, "grad_norm": 0.3041779696941376, "learning_rate": 2.3486686447949585e-06, "loss": 0.062389373779296875, "step": 6155 }, { "epoch": 0.8322433460076045, "grad_norm": 0.5393385291099548, "learning_rate": 2.3449927824935075e-06, "loss": 0.0924372673034668, "step": 6156 }, { "epoch": 0.8323785382340515, "grad_norm": 0.32297420501708984, "learning_rate": 2.3413195550320393e-06, "loss": 0.0675058364868164, "step": 6157 }, { "epoch": 0.8325137304604985, "grad_norm": 0.25539419054985046, "learning_rate": 2.3376489631753474e-06, "loss": 0.06180715560913086, "step": 6158 }, { "epoch": 0.8326489226869455, "grad_norm": 0.2430296391248703, "learning_rate": 2.3339810076876665e-06, "loss": 0.0410078763961792, "step": 6159 }, { "epoch": 0.8327841149133924, "grad_norm": 0.30485233664512634, "learning_rate": 2.3303156893326815e-06, "loss": 0.060146331787109375, "step": 6160 }, { "epoch": 0.8329193071398394, "grad_norm": 0.3537285625934601, "learning_rate": 2.326653008873535e-06, "loss": 0.09344768524169922, "step": 6161 }, { "epoch": 0.8330544993662864, "grad_norm": 0.19936206936836243, "learning_rate": 2.3229929670728085e-06, "loss": 0.05114603042602539, "step": 6162 }, { "epoch": 0.8331896915927334, "grad_norm": 0.5031552314758301, "learning_rate": 2.319335564692554e-06, "loss": 0.07624435424804688, "step": 6163 }, { "epoch": 0.8333248838191804, "grad_norm": 0.202814981341362, "learning_rate": 2.315680802494256e-06, "loss": 0.045780181884765625, "step": 6164 }, { "epoch": 0.8334600760456273, "grad_norm": 0.25232431292533875, "learning_rate": 2.312028681238856e-06, "loss": 0.07368946075439453, "step": 6165 }, { "epoch": 0.8335952682720743, "grad_norm": 0.4809962213039398, "learning_rate": 2.3083792016867434e-06, "loss": 0.06068849563598633, "step": 6166 }, { "epoch": 0.8337304604985213, "grad_norm": 0.32066455483436584, "learning_rate": 2.304732364597759e-06, "loss": 0.036364734172821045, "step": 6167 }, { "epoch": 0.8338656527249683, "grad_norm": 0.328524649143219, "learning_rate": 2.3010881707311994e-06, "loss": 0.07408523559570312, "step": 6168 }, { "epoch": 0.8340008449514152, "grad_norm": 0.40732115507125854, "learning_rate": 2.2974466208458017e-06, "loss": 0.048784077167510986, "step": 6169 }, { "epoch": 0.8341360371778622, "grad_norm": 0.34277865290641785, "learning_rate": 2.293807715699755e-06, "loss": 0.08569717407226562, "step": 6170 }, { "epoch": 0.8342712294043092, "grad_norm": 0.2897603511810303, "learning_rate": 2.2901714560507e-06, "loss": 0.08003807067871094, "step": 6171 }, { "epoch": 0.8344064216307562, "grad_norm": 0.3742716312408447, "learning_rate": 2.286537842655722e-06, "loss": 0.06242990493774414, "step": 6172 }, { "epoch": 0.8345416138572032, "grad_norm": 0.44942155480384827, "learning_rate": 2.2829068762713633e-06, "loss": 0.05798053741455078, "step": 6173 }, { "epoch": 0.8346768060836501, "grad_norm": 0.45306596159935, "learning_rate": 2.279278557653611e-06, "loss": 0.0857686996459961, "step": 6174 }, { "epoch": 0.8348119983100971, "grad_norm": 0.4919930696487427, "learning_rate": 2.2756528875578965e-06, "loss": 0.08963966369628906, "step": 6175 }, { "epoch": 0.8349471905365442, "grad_norm": 0.18259121477603912, "learning_rate": 2.2720298667391067e-06, "loss": 0.03711986541748047, "step": 6176 }, { "epoch": 0.8350823827629912, "grad_norm": 0.45871835947036743, "learning_rate": 2.268409495951568e-06, "loss": 0.08042430877685547, "step": 6177 }, { "epoch": 0.8352175749894382, "grad_norm": 0.26225897669792175, "learning_rate": 2.2647917759490723e-06, "loss": 0.05420804023742676, "step": 6178 }, { "epoch": 0.8353527672158851, "grad_norm": 0.13552093505859375, "learning_rate": 2.261176707484834e-06, "loss": 0.03930556774139404, "step": 6179 }, { "epoch": 0.8354879594423321, "grad_norm": 0.232827827334404, "learning_rate": 2.2575642913115408e-06, "loss": 0.06036567687988281, "step": 6180 }, { "epoch": 0.8356231516687791, "grad_norm": 0.19267582893371582, "learning_rate": 2.253954528181313e-06, "loss": 0.05502462387084961, "step": 6181 }, { "epoch": 0.8357583438952261, "grad_norm": 0.46230658888816833, "learning_rate": 2.2503474188457206e-06, "loss": 0.09131145477294922, "step": 6182 }, { "epoch": 0.835893536121673, "grad_norm": 0.29804858565330505, "learning_rate": 2.2467429640557903e-06, "loss": 0.04694533348083496, "step": 6183 }, { "epoch": 0.83602872834812, "grad_norm": 0.42065635323524475, "learning_rate": 2.2431411645619776e-06, "loss": 0.056189775466918945, "step": 6184 }, { "epoch": 0.836163920574567, "grad_norm": 0.3421494662761688, "learning_rate": 2.239542021114205e-06, "loss": 0.06933164596557617, "step": 6185 }, { "epoch": 0.836299112801014, "grad_norm": 0.38711124658584595, "learning_rate": 2.2359455344618306e-06, "loss": 0.08873653411865234, "step": 6186 }, { "epoch": 0.836434305027461, "grad_norm": 0.3926772475242615, "learning_rate": 2.232351705353663e-06, "loss": 0.08452796936035156, "step": 6187 }, { "epoch": 0.836569497253908, "grad_norm": 0.35300230979919434, "learning_rate": 2.228760534537955e-06, "loss": 0.08152008056640625, "step": 6188 }, { "epoch": 0.8367046894803549, "grad_norm": 0.27519428730010986, "learning_rate": 2.2251720227624044e-06, "loss": 0.06858253479003906, "step": 6189 }, { "epoch": 0.8368398817068019, "grad_norm": 0.4944668114185333, "learning_rate": 2.2215861707741666e-06, "loss": 0.09633874893188477, "step": 6190 }, { "epoch": 0.8369750739332489, "grad_norm": 0.348843514919281, "learning_rate": 2.2180029793198313e-06, "loss": 0.059391021728515625, "step": 6191 }, { "epoch": 0.8371102661596959, "grad_norm": 0.3249976933002472, "learning_rate": 2.2144224491454363e-06, "loss": 0.06228768825531006, "step": 6192 }, { "epoch": 0.8372454583861428, "grad_norm": 0.4631642997264862, "learning_rate": 2.2108445809964695e-06, "loss": 0.06086874008178711, "step": 6193 }, { "epoch": 0.8373806506125898, "grad_norm": 0.32808300852775574, "learning_rate": 2.2072693756178567e-06, "loss": 0.07857704162597656, "step": 6194 }, { "epoch": 0.8375158428390368, "grad_norm": 0.28030431270599365, "learning_rate": 2.203696833753983e-06, "loss": 0.06250762939453125, "step": 6195 }, { "epoch": 0.8376510350654838, "grad_norm": 0.31675535440444946, "learning_rate": 2.200126956148668e-06, "loss": 0.05798828601837158, "step": 6196 }, { "epoch": 0.8377862272919308, "grad_norm": 0.33596113324165344, "learning_rate": 2.196559743545177e-06, "loss": 0.0630028247833252, "step": 6197 }, { "epoch": 0.8379214195183777, "grad_norm": 0.23111020028591156, "learning_rate": 2.1929951966862233e-06, "loss": 0.05372738838195801, "step": 6198 }, { "epoch": 0.8380566117448247, "grad_norm": 0.45794913172721863, "learning_rate": 2.1894333163139607e-06, "loss": 0.0893559455871582, "step": 6199 }, { "epoch": 0.8381918039712717, "grad_norm": 0.26923510432243347, "learning_rate": 2.1858741031700015e-06, "loss": 0.05803251266479492, "step": 6200 }, { "epoch": 0.8383269961977187, "grad_norm": 0.16260357201099396, "learning_rate": 2.1823175579953856e-06, "loss": 0.03156423568725586, "step": 6201 }, { "epoch": 0.8384621884241656, "grad_norm": 0.4199337959289551, "learning_rate": 2.1787636815306065e-06, "loss": 0.08827757835388184, "step": 6202 }, { "epoch": 0.8385973806506126, "grad_norm": 0.47525399923324585, "learning_rate": 2.1752124745156005e-06, "loss": 0.07206487655639648, "step": 6203 }, { "epoch": 0.8387325728770596, "grad_norm": 0.32013580203056335, "learning_rate": 2.171663937689744e-06, "loss": 0.06802785396575928, "step": 6204 }, { "epoch": 0.8388677651035066, "grad_norm": 0.23136280477046967, "learning_rate": 2.168118071791868e-06, "loss": 0.041516780853271484, "step": 6205 }, { "epoch": 0.8390029573299536, "grad_norm": 0.37992942333221436, "learning_rate": 2.164574877560237e-06, "loss": 0.08926022052764893, "step": 6206 }, { "epoch": 0.8391381495564005, "grad_norm": 0.40065091848373413, "learning_rate": 2.161034355732564e-06, "loss": 0.09440803527832031, "step": 6207 }, { "epoch": 0.8392733417828475, "grad_norm": 0.3198845088481903, "learning_rate": 2.1574965070460047e-06, "loss": 0.06119346618652344, "step": 6208 }, { "epoch": 0.8394085340092945, "grad_norm": 0.391506165266037, "learning_rate": 2.1539613322371527e-06, "loss": 0.05940985679626465, "step": 6209 }, { "epoch": 0.8395437262357415, "grad_norm": 0.368939071893692, "learning_rate": 2.1504288320420613e-06, "loss": 0.08240604400634766, "step": 6210 }, { "epoch": 0.8396789184621885, "grad_norm": 0.2728017270565033, "learning_rate": 2.1468990071962038e-06, "loss": 0.05499708652496338, "step": 6211 }, { "epoch": 0.8398141106886354, "grad_norm": 0.2578313648700714, "learning_rate": 2.143371858434515e-06, "loss": 0.05638861656188965, "step": 6212 }, { "epoch": 0.8399493029150824, "grad_norm": 0.379812091588974, "learning_rate": 2.139847386491367e-06, "loss": 0.0633387565612793, "step": 6213 }, { "epoch": 0.8400844951415294, "grad_norm": 0.3931023180484772, "learning_rate": 2.1363255921005685e-06, "loss": 0.08740377426147461, "step": 6214 }, { "epoch": 0.8402196873679764, "grad_norm": 0.2514142692089081, "learning_rate": 2.1328064759953853e-06, "loss": 0.0461575984954834, "step": 6215 }, { "epoch": 0.8403548795944233, "grad_norm": 0.16072365641593933, "learning_rate": 2.129290038908504e-06, "loss": 0.03383278846740723, "step": 6216 }, { "epoch": 0.8404900718208703, "grad_norm": 0.2062246948480606, "learning_rate": 2.1257762815720745e-06, "loss": 0.046831727027893066, "step": 6217 }, { "epoch": 0.8406252640473173, "grad_norm": 0.4210064709186554, "learning_rate": 2.122265204717678e-06, "loss": 0.07840967178344727, "step": 6218 }, { "epoch": 0.8407604562737643, "grad_norm": 0.36550840735435486, "learning_rate": 2.1187568090763328e-06, "loss": 0.06644487380981445, "step": 6219 }, { "epoch": 0.8408956485002113, "grad_norm": 0.20956704020500183, "learning_rate": 2.1152510953785196e-06, "loss": 0.0396723747253418, "step": 6220 }, { "epoch": 0.8410308407266582, "grad_norm": 0.22913040220737457, "learning_rate": 2.1117480643541304e-06, "loss": 0.05521726608276367, "step": 6221 }, { "epoch": 0.8411660329531052, "grad_norm": 0.34781816601753235, "learning_rate": 2.1082477167325275e-06, "loss": 0.05765199661254883, "step": 6222 }, { "epoch": 0.8413012251795522, "grad_norm": 0.32238680124282837, "learning_rate": 2.1047500532424968e-06, "loss": 0.06810712814331055, "step": 6223 }, { "epoch": 0.8414364174059992, "grad_norm": 0.2920459806919098, "learning_rate": 2.1012550746122705e-06, "loss": 0.07100939750671387, "step": 6224 }, { "epoch": 0.8415716096324461, "grad_norm": 0.35092565417289734, "learning_rate": 2.0977627815695217e-06, "loss": 0.07351398468017578, "step": 6225 }, { "epoch": 0.8417068018588931, "grad_norm": 0.2670595645904541, "learning_rate": 2.094273174841362e-06, "loss": 0.04958820343017578, "step": 6226 }, { "epoch": 0.8418419940853401, "grad_norm": 0.38839006423950195, "learning_rate": 2.0907862551543516e-06, "loss": 0.058518409729003906, "step": 6227 }, { "epoch": 0.8419771863117871, "grad_norm": 0.3022480010986328, "learning_rate": 2.087302023234485e-06, "loss": 0.06590914726257324, "step": 6228 }, { "epoch": 0.8421123785382341, "grad_norm": 0.1451275497674942, "learning_rate": 2.083820479807194e-06, "loss": 0.031537652015686035, "step": 6229 }, { "epoch": 0.842247570764681, "grad_norm": 0.3312985301017761, "learning_rate": 2.0803416255973585e-06, "loss": 0.08653116226196289, "step": 6230 }, { "epoch": 0.842382762991128, "grad_norm": 0.3806116282939911, "learning_rate": 2.0768654613292887e-06, "loss": 0.06750082969665527, "step": 6231 }, { "epoch": 0.842517955217575, "grad_norm": 0.6721171140670776, "learning_rate": 2.0733919877267477e-06, "loss": 0.08948570489883423, "step": 6232 }, { "epoch": 0.842653147444022, "grad_norm": 0.2881007194519043, "learning_rate": 2.0699212055129268e-06, "loss": 0.05344271659851074, "step": 6233 }, { "epoch": 0.842788339670469, "grad_norm": 0.3937076926231384, "learning_rate": 2.066453115410463e-06, "loss": 0.05979776382446289, "step": 6234 }, { "epoch": 0.8429235318969159, "grad_norm": 0.6550187468528748, "learning_rate": 2.062987718141431e-06, "loss": 0.09293651580810547, "step": 6235 }, { "epoch": 0.8430587241233629, "grad_norm": 0.3156546652317047, "learning_rate": 2.0595250144273423e-06, "loss": 0.054172515869140625, "step": 6236 }, { "epoch": 0.8431939163498099, "grad_norm": 0.2758477032184601, "learning_rate": 2.056065004989155e-06, "loss": 0.06659078598022461, "step": 6237 }, { "epoch": 0.8433291085762569, "grad_norm": 0.25198909640312195, "learning_rate": 2.0526076905472585e-06, "loss": 0.05923342704772949, "step": 6238 }, { "epoch": 0.8434643008027038, "grad_norm": 0.49366337060928345, "learning_rate": 2.0491530718214855e-06, "loss": 0.06829285621643066, "step": 6239 }, { "epoch": 0.8435994930291508, "grad_norm": 0.2753165364265442, "learning_rate": 2.0457011495311045e-06, "loss": 0.05621814727783203, "step": 6240 }, { "epoch": 0.8437346852555978, "grad_norm": 0.2757529616355896, "learning_rate": 2.0422519243948232e-06, "loss": 0.04640340805053711, "step": 6241 }, { "epoch": 0.8438698774820448, "grad_norm": 0.4832799434661865, "learning_rate": 2.0388053971307927e-06, "loss": 0.09984779357910156, "step": 6242 }, { "epoch": 0.8440050697084918, "grad_norm": 0.22580106556415558, "learning_rate": 2.0353615684565956e-06, "loss": 0.041429996490478516, "step": 6243 }, { "epoch": 0.8441402619349387, "grad_norm": 0.29000264406204224, "learning_rate": 2.0319204390892566e-06, "loss": 0.05859375, "step": 6244 }, { "epoch": 0.8442754541613857, "grad_norm": 0.15063822269439697, "learning_rate": 2.0284820097452374e-06, "loss": 0.03659510612487793, "step": 6245 }, { "epoch": 0.8444106463878327, "grad_norm": 0.40340688824653625, "learning_rate": 2.02504628114043e-06, "loss": 0.05350208282470703, "step": 6246 }, { "epoch": 0.8445458386142797, "grad_norm": 0.1933862268924713, "learning_rate": 2.0216132539901865e-06, "loss": 0.04899430274963379, "step": 6247 }, { "epoch": 0.8446810308407267, "grad_norm": 0.2647876739501953, "learning_rate": 2.0181829290092663e-06, "loss": 0.05927896499633789, "step": 6248 }, { "epoch": 0.8448162230671736, "grad_norm": 0.21264617145061493, "learning_rate": 2.014755306911891e-06, "loss": 0.046753883361816406, "step": 6249 }, { "epoch": 0.8449514152936206, "grad_norm": 0.3327547609806061, "learning_rate": 2.0113303884117057e-06, "loss": 0.04807734489440918, "step": 6250 }, { "epoch": 0.8450866075200676, "grad_norm": 0.2826550602912903, "learning_rate": 2.0079081742217957e-06, "loss": 0.044869422912597656, "step": 6251 }, { "epoch": 0.8452217997465146, "grad_norm": 0.2259238064289093, "learning_rate": 2.0044886650546915e-06, "loss": 0.048828125, "step": 6252 }, { "epoch": 0.8453569919729615, "grad_norm": 0.2367219179868698, "learning_rate": 2.0010718616223406e-06, "loss": 0.057765960693359375, "step": 6253 }, { "epoch": 0.8454921841994085, "grad_norm": 0.18984411656856537, "learning_rate": 1.9976577646361514e-06, "loss": 0.0418243408203125, "step": 6254 }, { "epoch": 0.8456273764258555, "grad_norm": 0.4283030927181244, "learning_rate": 1.994246374806953e-06, "loss": 0.0626521110534668, "step": 6255 }, { "epoch": 0.8457625686523025, "grad_norm": 0.4187628924846649, "learning_rate": 1.9908376928450128e-06, "loss": 0.07620739936828613, "step": 6256 }, { "epoch": 0.8458977608787495, "grad_norm": 0.1862669140100479, "learning_rate": 1.987431719460039e-06, "loss": 0.04925107955932617, "step": 6257 }, { "epoch": 0.8460329531051964, "grad_norm": 0.17124955356121063, "learning_rate": 1.9840284553611706e-06, "loss": 0.047887325286865234, "step": 6258 }, { "epoch": 0.8461681453316434, "grad_norm": 0.3132363259792328, "learning_rate": 1.980627901256989e-06, "loss": 0.06468343734741211, "step": 6259 }, { "epoch": 0.8463033375580904, "grad_norm": 0.31490230560302734, "learning_rate": 1.9772300578555062e-06, "loss": 0.05545806884765625, "step": 6260 }, { "epoch": 0.8464385297845374, "grad_norm": 0.3261524438858032, "learning_rate": 1.973834925864172e-06, "loss": 0.0727154016494751, "step": 6261 }, { "epoch": 0.8465737220109844, "grad_norm": 0.31746163964271545, "learning_rate": 1.97044250598987e-06, "loss": 0.0643925666809082, "step": 6262 }, { "epoch": 0.8467089142374313, "grad_norm": 0.5831186175346375, "learning_rate": 1.9670527989389177e-06, "loss": 0.09212028980255127, "step": 6263 }, { "epoch": 0.8468441064638783, "grad_norm": 0.6222190260887146, "learning_rate": 1.9636658054170747e-06, "loss": 0.07861185073852539, "step": 6264 }, { "epoch": 0.8469792986903253, "grad_norm": 0.26287853717803955, "learning_rate": 1.960281526129531e-06, "loss": 0.06790542602539062, "step": 6265 }, { "epoch": 0.8471144909167723, "grad_norm": 0.36416393518447876, "learning_rate": 1.9568999617809077e-06, "loss": 0.059990644454956055, "step": 6266 }, { "epoch": 0.8472496831432192, "grad_norm": 0.26137810945510864, "learning_rate": 1.9535211130752676e-06, "loss": 0.055304527282714844, "step": 6267 }, { "epoch": 0.8473848753696662, "grad_norm": 0.30726176500320435, "learning_rate": 1.950144980716101e-06, "loss": 0.06860160827636719, "step": 6268 }, { "epoch": 0.8475200675961132, "grad_norm": 0.34413042664527893, "learning_rate": 1.9467715654063444e-06, "loss": 0.04716300964355469, "step": 6269 }, { "epoch": 0.8476552598225602, "grad_norm": 0.3800031244754791, "learning_rate": 1.9434008678483532e-06, "loss": 0.06565046310424805, "step": 6270 }, { "epoch": 0.8477904520490072, "grad_norm": 0.30992594361305237, "learning_rate": 1.9400328887439295e-06, "loss": 0.0719594955444336, "step": 6271 }, { "epoch": 0.8479256442754541, "grad_norm": 0.2748137414455414, "learning_rate": 1.9366676287943038e-06, "loss": 0.05409574508666992, "step": 6272 }, { "epoch": 0.8480608365019011, "grad_norm": 0.2654644846916199, "learning_rate": 1.9333050887001337e-06, "loss": 0.05579793453216553, "step": 6273 }, { "epoch": 0.8481960287283481, "grad_norm": 0.2429201453924179, "learning_rate": 1.9299452691615293e-06, "loss": 0.0350804328918457, "step": 6274 }, { "epoch": 0.8483312209547951, "grad_norm": 0.22114381194114685, "learning_rate": 1.9265881708780182e-06, "loss": 0.057952880859375, "step": 6275 }, { "epoch": 0.848466413181242, "grad_norm": 0.3228522837162018, "learning_rate": 1.9232337945485657e-06, "loss": 0.06509852409362793, "step": 6276 }, { "epoch": 0.848601605407689, "grad_norm": 0.3310171663761139, "learning_rate": 1.91988214087157e-06, "loss": 0.07742607593536377, "step": 6277 }, { "epoch": 0.848736797634136, "grad_norm": 0.28843954205513, "learning_rate": 1.9165332105448613e-06, "loss": 0.08643460273742676, "step": 6278 }, { "epoch": 0.848871989860583, "grad_norm": 0.3420291244983673, "learning_rate": 1.913187004265715e-06, "loss": 0.07580137252807617, "step": 6279 }, { "epoch": 0.84900718208703, "grad_norm": 0.19976767897605896, "learning_rate": 1.909843522730814e-06, "loss": 0.05601692199707031, "step": 6280 }, { "epoch": 0.8491423743134769, "grad_norm": 0.2512938380241394, "learning_rate": 1.9065027666363017e-06, "loss": 0.048183441162109375, "step": 6281 }, { "epoch": 0.8492775665399239, "grad_norm": 0.5086610913276672, "learning_rate": 1.903164736677736e-06, "loss": 0.07693183422088623, "step": 6282 }, { "epoch": 0.8494127587663709, "grad_norm": 0.21472832560539246, "learning_rate": 1.8998294335501082e-06, "loss": 0.049195051193237305, "step": 6283 }, { "epoch": 0.8495479509928179, "grad_norm": 0.4109760522842407, "learning_rate": 1.8964968579478592e-06, "loss": 0.0492548942565918, "step": 6284 }, { "epoch": 0.8496831432192649, "grad_norm": 0.4223787784576416, "learning_rate": 1.893167010564834e-06, "loss": 0.04801177978515625, "step": 6285 }, { "epoch": 0.8498183354457118, "grad_norm": 0.40585052967071533, "learning_rate": 1.8898398920943349e-06, "loss": 0.08501172065734863, "step": 6286 }, { "epoch": 0.8499535276721588, "grad_norm": 0.3826312720775604, "learning_rate": 1.886515503229081e-06, "loss": 0.06133604049682617, "step": 6287 }, { "epoch": 0.8500887198986058, "grad_norm": 0.3597368597984314, "learning_rate": 1.8831938446612269e-06, "loss": 0.047585248947143555, "step": 6288 }, { "epoch": 0.8502239121250528, "grad_norm": 0.3577701151371002, "learning_rate": 1.8798749170823676e-06, "loss": 0.046039581298828125, "step": 6289 }, { "epoch": 0.8503591043514998, "grad_norm": 0.28235092759132385, "learning_rate": 1.8765587211835089e-06, "loss": 0.05596637725830078, "step": 6290 }, { "epoch": 0.8504942965779467, "grad_norm": 0.362808495759964, "learning_rate": 1.8732452576551102e-06, "loss": 0.0826263427734375, "step": 6291 }, { "epoch": 0.8506294888043937, "grad_norm": 0.40157967805862427, "learning_rate": 1.8699345271870493e-06, "loss": 0.06565523147583008, "step": 6292 }, { "epoch": 0.8507646810308407, "grad_norm": 0.24076251685619354, "learning_rate": 1.8666265304686387e-06, "loss": 0.0528717041015625, "step": 6293 }, { "epoch": 0.8508998732572877, "grad_norm": 0.3019689619541168, "learning_rate": 1.8633212681886203e-06, "loss": 0.049294471740722656, "step": 6294 }, { "epoch": 0.8510350654837346, "grad_norm": 0.49325188994407654, "learning_rate": 1.8600187410351621e-06, "loss": 0.07761192321777344, "step": 6295 }, { "epoch": 0.8511702577101816, "grad_norm": 0.1678040772676468, "learning_rate": 1.8567189496958776e-06, "loss": 0.03497183322906494, "step": 6296 }, { "epoch": 0.8513054499366286, "grad_norm": 0.22160252928733826, "learning_rate": 1.853421894857797e-06, "loss": 0.042299747467041016, "step": 6297 }, { "epoch": 0.8514406421630756, "grad_norm": 0.2647058665752411, "learning_rate": 1.8501275772073827e-06, "loss": 0.06014204025268555, "step": 6298 }, { "epoch": 0.8515758343895226, "grad_norm": 0.20429734885692596, "learning_rate": 1.8468359974305315e-06, "loss": 0.048209190368652344, "step": 6299 }, { "epoch": 0.8517110266159695, "grad_norm": 0.38039442896842957, "learning_rate": 1.8435471562125633e-06, "loss": 0.04333961009979248, "step": 6300 }, { "epoch": 0.8518462188424165, "grad_norm": 0.22751489281654358, "learning_rate": 1.8402610542382386e-06, "loss": 0.043682098388671875, "step": 6301 }, { "epoch": 0.8519814110688635, "grad_norm": 0.41948744654655457, "learning_rate": 1.836977692191742e-06, "loss": 0.0705876350402832, "step": 6302 }, { "epoch": 0.8521166032953105, "grad_norm": 0.3832312524318695, "learning_rate": 1.8336970707566781e-06, "loss": 0.07243871688842773, "step": 6303 }, { "epoch": 0.8522517955217574, "grad_norm": 0.3912931978702545, "learning_rate": 1.8304191906160973e-06, "loss": 0.06734943389892578, "step": 6304 }, { "epoch": 0.8523869877482044, "grad_norm": 0.3388195037841797, "learning_rate": 1.8271440524524668e-06, "loss": 0.06375408172607422, "step": 6305 }, { "epoch": 0.8525221799746514, "grad_norm": 0.9208351373672485, "learning_rate": 1.8238716569476949e-06, "loss": 0.08703303337097168, "step": 6306 }, { "epoch": 0.8526573722010984, "grad_norm": 0.5578576922416687, "learning_rate": 1.8206020047831078e-06, "loss": 0.06214499473571777, "step": 6307 }, { "epoch": 0.8527925644275454, "grad_norm": 0.20516066253185272, "learning_rate": 1.8173350966394648e-06, "loss": 0.04634857177734375, "step": 6308 }, { "epoch": 0.8529277566539923, "grad_norm": 0.3526214361190796, "learning_rate": 1.8140709331969513e-06, "loss": 0.06889486312866211, "step": 6309 }, { "epoch": 0.8530629488804393, "grad_norm": 0.5277717709541321, "learning_rate": 1.810809515135184e-06, "loss": 0.07633543014526367, "step": 6310 }, { "epoch": 0.8531981411068863, "grad_norm": 0.36538299918174744, "learning_rate": 1.8075508431332111e-06, "loss": 0.0766000747680664, "step": 6311 }, { "epoch": 0.8533333333333334, "grad_norm": 0.16320344805717468, "learning_rate": 1.8042949178695034e-06, "loss": 0.03675508499145508, "step": 6312 }, { "epoch": 0.8534685255597804, "grad_norm": 0.29728835821151733, "learning_rate": 1.8010417400219636e-06, "loss": 0.0659797191619873, "step": 6313 }, { "epoch": 0.8536037177862273, "grad_norm": 0.35085615515708923, "learning_rate": 1.7977913102679167e-06, "loss": 0.0602877140045166, "step": 6314 }, { "epoch": 0.8537389100126743, "grad_norm": 0.33504942059516907, "learning_rate": 1.7945436292841193e-06, "loss": 0.07447564601898193, "step": 6315 }, { "epoch": 0.8538741022391213, "grad_norm": 0.24959515035152435, "learning_rate": 1.791298697746766e-06, "loss": 0.052007198333740234, "step": 6316 }, { "epoch": 0.8540092944655683, "grad_norm": 0.36414414644241333, "learning_rate": 1.7880565163314545e-06, "loss": 0.06575441360473633, "step": 6317 }, { "epoch": 0.8541444866920153, "grad_norm": 0.2594071626663208, "learning_rate": 1.784817085713233e-06, "loss": 0.05369305610656738, "step": 6318 }, { "epoch": 0.8542796789184622, "grad_norm": 0.18506945669651031, "learning_rate": 1.7815804065665669e-06, "loss": 0.04692578315734863, "step": 6319 }, { "epoch": 0.8544148711449092, "grad_norm": 0.5307829976081848, "learning_rate": 1.778346479565346e-06, "loss": 0.07484650611877441, "step": 6320 }, { "epoch": 0.8545500633713562, "grad_norm": 0.4423481523990631, "learning_rate": 1.7751153053829011e-06, "loss": 0.0657958984375, "step": 6321 }, { "epoch": 0.8546852555978032, "grad_norm": 0.9577394127845764, "learning_rate": 1.7718868846919662e-06, "loss": 0.07073616981506348, "step": 6322 }, { "epoch": 0.8548204478242502, "grad_norm": 0.32180920243263245, "learning_rate": 1.7686612181647266e-06, "loss": 0.0552525520324707, "step": 6323 }, { "epoch": 0.8549556400506971, "grad_norm": 0.29027029871940613, "learning_rate": 1.7654383064727802e-06, "loss": 0.05681467056274414, "step": 6324 }, { "epoch": 0.8550908322771441, "grad_norm": 0.20261068642139435, "learning_rate": 1.762218150287152e-06, "loss": 0.04305607080459595, "step": 6325 }, { "epoch": 0.8552260245035911, "grad_norm": 0.4485626518726349, "learning_rate": 1.759000750278299e-06, "loss": 0.07387542724609375, "step": 6326 }, { "epoch": 0.8553612167300381, "grad_norm": 0.3260519504547119, "learning_rate": 1.7557861071160953e-06, "loss": 0.07190752029418945, "step": 6327 }, { "epoch": 0.855496408956485, "grad_norm": 0.11344873160123825, "learning_rate": 1.7525742214698538e-06, "loss": 0.028392374515533447, "step": 6328 }, { "epoch": 0.855631601182932, "grad_norm": 0.21213459968566895, "learning_rate": 1.7493650940083045e-06, "loss": 0.043506622314453125, "step": 6329 }, { "epoch": 0.855766793409379, "grad_norm": 0.17933131754398346, "learning_rate": 1.746158725399603e-06, "loss": 0.04773974418640137, "step": 6330 }, { "epoch": 0.855901985635826, "grad_norm": 0.24295440316200256, "learning_rate": 1.7429551163113322e-06, "loss": 0.06258130073547363, "step": 6331 }, { "epoch": 0.856037177862273, "grad_norm": 0.3535444438457489, "learning_rate": 1.7397542674105e-06, "loss": 0.07405781745910645, "step": 6332 }, { "epoch": 0.8561723700887199, "grad_norm": 0.22392292320728302, "learning_rate": 1.7365561793635431e-06, "loss": 0.056971073150634766, "step": 6333 }, { "epoch": 0.8563075623151669, "grad_norm": 0.4279118478298187, "learning_rate": 1.7333608528363227e-06, "loss": 0.07200860977172852, "step": 6334 }, { "epoch": 0.8564427545416139, "grad_norm": 0.2679310739040375, "learning_rate": 1.7301682884941128e-06, "loss": 0.06176877021789551, "step": 6335 }, { "epoch": 0.8565779467680609, "grad_norm": 0.2931089699268341, "learning_rate": 1.726978487001632e-06, "loss": 0.055226802825927734, "step": 6336 }, { "epoch": 0.8567131389945079, "grad_norm": 0.36156946420669556, "learning_rate": 1.7237914490230072e-06, "loss": 0.09280610084533691, "step": 6337 }, { "epoch": 0.8568483312209548, "grad_norm": 0.33498939871788025, "learning_rate": 1.7206071752218027e-06, "loss": 0.05925607681274414, "step": 6338 }, { "epoch": 0.8569835234474018, "grad_norm": 0.2283213585615158, "learning_rate": 1.7174256662610032e-06, "loss": 0.042500972747802734, "step": 6339 }, { "epoch": 0.8571187156738488, "grad_norm": 0.39317595958709717, "learning_rate": 1.714246922803004e-06, "loss": 0.07482242584228516, "step": 6340 }, { "epoch": 0.8572539079002958, "grad_norm": 0.4603132903575897, "learning_rate": 1.7110709455096468e-06, "loss": 0.04822203516960144, "step": 6341 }, { "epoch": 0.8573891001267427, "grad_norm": 0.5118915438652039, "learning_rate": 1.7078977350421815e-06, "loss": 0.06947517395019531, "step": 6342 }, { "epoch": 0.8575242923531897, "grad_norm": 0.22298239171504974, "learning_rate": 1.7047272920612926e-06, "loss": 0.04576289653778076, "step": 6343 }, { "epoch": 0.8576594845796367, "grad_norm": 0.2964182198047638, "learning_rate": 1.7015596172270841e-06, "loss": 0.06253862380981445, "step": 6344 }, { "epoch": 0.8577946768060837, "grad_norm": 0.3406200408935547, "learning_rate": 1.6983947111990717e-06, "loss": 0.0810537338256836, "step": 6345 }, { "epoch": 0.8579298690325307, "grad_norm": 0.689912736415863, "learning_rate": 1.695232574636218e-06, "loss": 0.08181846141815186, "step": 6346 }, { "epoch": 0.8580650612589776, "grad_norm": 0.28458845615386963, "learning_rate": 1.6920732081968882e-06, "loss": 0.0625309944152832, "step": 6347 }, { "epoch": 0.8582002534854246, "grad_norm": 0.29292938113212585, "learning_rate": 1.6889166125388878e-06, "loss": 0.056079864501953125, "step": 6348 }, { "epoch": 0.8583354457118716, "grad_norm": 0.38736993074417114, "learning_rate": 1.6857627883194277e-06, "loss": 0.06703901290893555, "step": 6349 }, { "epoch": 0.8584706379383186, "grad_norm": 0.5575077533721924, "learning_rate": 1.6826117361951577e-06, "loss": 0.08139348030090332, "step": 6350 }, { "epoch": 0.8586058301647655, "grad_norm": 0.21039235591888428, "learning_rate": 1.6794634568221412e-06, "loss": 0.05111062526702881, "step": 6351 }, { "epoch": 0.8587410223912125, "grad_norm": 0.2495880275964737, "learning_rate": 1.676317950855864e-06, "loss": 0.07018160820007324, "step": 6352 }, { "epoch": 0.8588762146176595, "grad_norm": 0.28126993775367737, "learning_rate": 1.6731752189512456e-06, "loss": 0.04293978214263916, "step": 6353 }, { "epoch": 0.8590114068441065, "grad_norm": 0.29608556628227234, "learning_rate": 1.6700352617626092e-06, "loss": 0.06672406196594238, "step": 6354 }, { "epoch": 0.8591465990705535, "grad_norm": 0.19749513268470764, "learning_rate": 1.6668980799437167e-06, "loss": 0.044228553771972656, "step": 6355 }, { "epoch": 0.8592817912970004, "grad_norm": 0.227354034781456, "learning_rate": 1.6637636741477458e-06, "loss": 0.041936516761779785, "step": 6356 }, { "epoch": 0.8594169835234474, "grad_norm": 0.2797456681728363, "learning_rate": 1.6606320450272943e-06, "loss": 0.06017112731933594, "step": 6357 }, { "epoch": 0.8595521757498944, "grad_norm": 0.2043454349040985, "learning_rate": 1.657503193234386e-06, "loss": 0.04774284362792969, "step": 6358 }, { "epoch": 0.8596873679763414, "grad_norm": 0.25736096501350403, "learning_rate": 1.654377119420461e-06, "loss": 0.05237627029418945, "step": 6359 }, { "epoch": 0.8598225602027884, "grad_norm": 0.2511076033115387, "learning_rate": 1.6512538242363889e-06, "loss": 0.04599905014038086, "step": 6360 }, { "epoch": 0.8599577524292353, "grad_norm": 0.35518598556518555, "learning_rate": 1.6481333083324563e-06, "loss": 0.07319915294647217, "step": 6361 }, { "epoch": 0.8600929446556823, "grad_norm": 0.2266836315393448, "learning_rate": 1.6450155723583698e-06, "loss": 0.04404246807098389, "step": 6362 }, { "epoch": 0.8602281368821293, "grad_norm": 0.25336751341819763, "learning_rate": 1.6419006169632573e-06, "loss": 0.04238080978393555, "step": 6363 }, { "epoch": 0.8603633291085763, "grad_norm": 0.47562745213508606, "learning_rate": 1.638788442795668e-06, "loss": 0.07038497924804688, "step": 6364 }, { "epoch": 0.8604985213350232, "grad_norm": 0.44237881898880005, "learning_rate": 1.6356790505035785e-06, "loss": 0.0656578540802002, "step": 6365 }, { "epoch": 0.8606337135614702, "grad_norm": 0.34531697630882263, "learning_rate": 1.6325724407343795e-06, "loss": 0.07509088516235352, "step": 6366 }, { "epoch": 0.8607689057879172, "grad_norm": 0.29616063833236694, "learning_rate": 1.6294686141348801e-06, "loss": 0.06722640991210938, "step": 6367 }, { "epoch": 0.8609040980143642, "grad_norm": 0.47764697670936584, "learning_rate": 1.626367571351317e-06, "loss": 0.07256317138671875, "step": 6368 }, { "epoch": 0.8610392902408112, "grad_norm": 0.3566100001335144, "learning_rate": 1.6232693130293386e-06, "loss": 0.0624079704284668, "step": 6369 }, { "epoch": 0.8611744824672581, "grad_norm": 0.3481585681438446, "learning_rate": 1.6201738398140254e-06, "loss": 0.06539058685302734, "step": 6370 }, { "epoch": 0.8613096746937051, "grad_norm": 0.31021958589553833, "learning_rate": 1.6170811523498718e-06, "loss": 0.05242288112640381, "step": 6371 }, { "epoch": 0.8614448669201521, "grad_norm": 0.26138874888420105, "learning_rate": 1.613991251280783e-06, "loss": 0.06061601638793945, "step": 6372 }, { "epoch": 0.8615800591465991, "grad_norm": 0.36094316840171814, "learning_rate": 1.6109041372501028e-06, "loss": 0.06380867958068848, "step": 6373 }, { "epoch": 0.861715251373046, "grad_norm": 0.33734700083732605, "learning_rate": 1.6078198109005766e-06, "loss": 0.061342716217041016, "step": 6374 }, { "epoch": 0.861850443599493, "grad_norm": 0.35576608777046204, "learning_rate": 1.6047382728743843e-06, "loss": 0.08353328704833984, "step": 6375 }, { "epoch": 0.86198563582594, "grad_norm": 0.3268064856529236, "learning_rate": 1.6016595238131176e-06, "loss": 0.06797528266906738, "step": 6376 }, { "epoch": 0.862120828052387, "grad_norm": 0.34670352935791016, "learning_rate": 1.5985835643577824e-06, "loss": 0.05814170837402344, "step": 6377 }, { "epoch": 0.862256020278834, "grad_norm": 0.3909570872783661, "learning_rate": 1.5955103951488177e-06, "loss": 0.0650014877319336, "step": 6378 }, { "epoch": 0.862391212505281, "grad_norm": 0.5527427196502686, "learning_rate": 1.5924400168260666e-06, "loss": 0.07494068145751953, "step": 6379 }, { "epoch": 0.8625264047317279, "grad_norm": 0.24005034565925598, "learning_rate": 1.5893724300288064e-06, "loss": 0.05875349044799805, "step": 6380 }, { "epoch": 0.8626615969581749, "grad_norm": 0.2700231671333313, "learning_rate": 1.5863076353957196e-06, "loss": 0.06253039836883545, "step": 6381 }, { "epoch": 0.8627967891846219, "grad_norm": 0.3574848473072052, "learning_rate": 1.5832456335649104e-06, "loss": 0.07944250106811523, "step": 6382 }, { "epoch": 0.8629319814110689, "grad_norm": 0.2679492235183716, "learning_rate": 1.580186425173909e-06, "loss": 0.06584036350250244, "step": 6383 }, { "epoch": 0.8630671736375158, "grad_norm": 0.25383493304252625, "learning_rate": 1.5771300108596543e-06, "loss": 0.04120588302612305, "step": 6384 }, { "epoch": 0.8632023658639628, "grad_norm": 0.2823106050491333, "learning_rate": 1.5740763912585171e-06, "loss": 0.0635061264038086, "step": 6385 }, { "epoch": 0.8633375580904098, "grad_norm": 0.23393428325653076, "learning_rate": 1.5710255670062657e-06, "loss": 0.06041669845581055, "step": 6386 }, { "epoch": 0.8634727503168568, "grad_norm": 0.6842442154884338, "learning_rate": 1.567977538738105e-06, "loss": 0.08539104461669922, "step": 6387 }, { "epoch": 0.8636079425433038, "grad_norm": 0.22804664075374603, "learning_rate": 1.5649323070886494e-06, "loss": 0.0572429895401001, "step": 6388 }, { "epoch": 0.8637431347697507, "grad_norm": 0.2404095083475113, "learning_rate": 1.5618898726919284e-06, "loss": 0.05065178871154785, "step": 6389 }, { "epoch": 0.8638783269961977, "grad_norm": 0.19856587052345276, "learning_rate": 1.5588502361814032e-06, "loss": 0.04634976387023926, "step": 6390 }, { "epoch": 0.8640135192226447, "grad_norm": 0.3801005780696869, "learning_rate": 1.5558133981899314e-06, "loss": 0.07269001007080078, "step": 6391 }, { "epoch": 0.8641487114490917, "grad_norm": 0.5373231172561646, "learning_rate": 1.5527793593498053e-06, "loss": 0.08079099655151367, "step": 6392 }, { "epoch": 0.8642839036755386, "grad_norm": 0.22107817232608795, "learning_rate": 1.5497481202927244e-06, "loss": 0.045655250549316406, "step": 6393 }, { "epoch": 0.8644190959019856, "grad_norm": 0.3428370952606201, "learning_rate": 1.5467196816498107e-06, "loss": 0.07546043395996094, "step": 6394 }, { "epoch": 0.8645542881284326, "grad_norm": 0.19990627467632294, "learning_rate": 1.5436940440516018e-06, "loss": 0.039630770683288574, "step": 6395 }, { "epoch": 0.8646894803548796, "grad_norm": 0.3012692630290985, "learning_rate": 1.5406712081280484e-06, "loss": 0.07137680053710938, "step": 6396 }, { "epoch": 0.8648246725813266, "grad_norm": 0.29851406812667847, "learning_rate": 1.5376511745085254e-06, "loss": 0.06910943984985352, "step": 6397 }, { "epoch": 0.8649598648077735, "grad_norm": 0.3407972455024719, "learning_rate": 1.5346339438218181e-06, "loss": 0.07711267471313477, "step": 6398 }, { "epoch": 0.8650950570342205, "grad_norm": 0.24595056474208832, "learning_rate": 1.5316195166961295e-06, "loss": 0.04909813404083252, "step": 6399 }, { "epoch": 0.8652302492606675, "grad_norm": 0.2624797523021698, "learning_rate": 1.5286078937590802e-06, "loss": 0.05071711540222168, "step": 6400 }, { "epoch": 0.8653654414871145, "grad_norm": 0.2919888496398926, "learning_rate": 1.5255990756377025e-06, "loss": 0.06617045402526855, "step": 6401 }, { "epoch": 0.8655006337135615, "grad_norm": 0.2081049531698227, "learning_rate": 1.5225930629584534e-06, "loss": 0.04576921463012695, "step": 6402 }, { "epoch": 0.8656358259400084, "grad_norm": 0.6203389167785645, "learning_rate": 1.5195898563472038e-06, "loss": 0.08098185062408447, "step": 6403 }, { "epoch": 0.8657710181664554, "grad_norm": 0.30733442306518555, "learning_rate": 1.5165894564292254e-06, "loss": 0.05830669403076172, "step": 6404 }, { "epoch": 0.8659062103929024, "grad_norm": 0.25245144963264465, "learning_rate": 1.5135918638292269e-06, "loss": 0.05776369571685791, "step": 6405 }, { "epoch": 0.8660414026193494, "grad_norm": 0.3321148157119751, "learning_rate": 1.5105970791713186e-06, "loss": 0.06632804870605469, "step": 6406 }, { "epoch": 0.8661765948457963, "grad_norm": 0.4239051043987274, "learning_rate": 1.5076051030790355e-06, "loss": 0.09187984466552734, "step": 6407 }, { "epoch": 0.8663117870722433, "grad_norm": 0.26415812969207764, "learning_rate": 1.5046159361753226e-06, "loss": 0.06444501876831055, "step": 6408 }, { "epoch": 0.8664469792986903, "grad_norm": 0.2414625734090805, "learning_rate": 1.5016295790825336e-06, "loss": 0.061753273010253906, "step": 6409 }, { "epoch": 0.8665821715251373, "grad_norm": 0.2623870074748993, "learning_rate": 1.4986460324224493e-06, "loss": 0.06393909454345703, "step": 6410 }, { "epoch": 0.8667173637515843, "grad_norm": 0.30190572142601013, "learning_rate": 1.4956652968162582e-06, "loss": 0.05830419063568115, "step": 6411 }, { "epoch": 0.8668525559780312, "grad_norm": 0.28045520186424255, "learning_rate": 1.492687372884567e-06, "loss": 0.06609934568405151, "step": 6412 }, { "epoch": 0.8669877482044782, "grad_norm": 0.3405071794986725, "learning_rate": 1.4897122612473978e-06, "loss": 0.0882425308227539, "step": 6413 }, { "epoch": 0.8671229404309252, "grad_norm": 0.2653646469116211, "learning_rate": 1.4867399625241772e-06, "loss": 0.06584668159484863, "step": 6414 }, { "epoch": 0.8672581326573722, "grad_norm": 0.2610979676246643, "learning_rate": 1.4837704773337602e-06, "loss": 0.05601084232330322, "step": 6415 }, { "epoch": 0.8673933248838192, "grad_norm": 0.20413154363632202, "learning_rate": 1.4808038062944036e-06, "loss": 0.05117321014404297, "step": 6416 }, { "epoch": 0.8675285171102661, "grad_norm": 0.35246792435646057, "learning_rate": 1.4778399500237933e-06, "loss": 0.062796950340271, "step": 6417 }, { "epoch": 0.8676637093367131, "grad_norm": 0.26034027338027954, "learning_rate": 1.4748789091390124e-06, "loss": 0.04463851451873779, "step": 6418 }, { "epoch": 0.8677989015631601, "grad_norm": 0.28708788752555847, "learning_rate": 1.471920684256563e-06, "loss": 0.06418430805206299, "step": 6419 }, { "epoch": 0.8679340937896071, "grad_norm": 0.2791190445423126, "learning_rate": 1.4689652759923721e-06, "loss": 0.04939424991607666, "step": 6420 }, { "epoch": 0.868069286016054, "grad_norm": 0.45456793904304504, "learning_rate": 1.4660126849617645e-06, "loss": 0.06717270612716675, "step": 6421 }, { "epoch": 0.868204478242501, "grad_norm": 0.25810715556144714, "learning_rate": 1.4630629117794914e-06, "loss": 0.06435108184814453, "step": 6422 }, { "epoch": 0.868339670468948, "grad_norm": 0.2916312515735626, "learning_rate": 1.4601159570597033e-06, "loss": 0.06496095657348633, "step": 6423 }, { "epoch": 0.868474862695395, "grad_norm": 0.30769601464271545, "learning_rate": 1.4571718214159795e-06, "loss": 0.06269550323486328, "step": 6424 }, { "epoch": 0.868610054921842, "grad_norm": 0.3282182812690735, "learning_rate": 1.454230505461303e-06, "loss": 0.058388471603393555, "step": 6425 }, { "epoch": 0.8687452471482889, "grad_norm": 0.41010263562202454, "learning_rate": 1.4512920098080672e-06, "loss": 0.08212709426879883, "step": 6426 }, { "epoch": 0.8688804393747359, "grad_norm": 0.31919345259666443, "learning_rate": 1.4483563350680878e-06, "loss": 0.059348106384277344, "step": 6427 }, { "epoch": 0.8690156316011829, "grad_norm": 0.41883012652397156, "learning_rate": 1.4454234818525824e-06, "loss": 0.09795856475830078, "step": 6428 }, { "epoch": 0.8691508238276299, "grad_norm": 0.2800425887107849, "learning_rate": 1.4424934507721926e-06, "loss": 0.0751805305480957, "step": 6429 }, { "epoch": 0.8692860160540768, "grad_norm": 0.3102518618106842, "learning_rate": 1.4395662424369622e-06, "loss": 0.056693315505981445, "step": 6430 }, { "epoch": 0.8694212082805238, "grad_norm": 0.33799704909324646, "learning_rate": 1.436641857456355e-06, "loss": 0.054157257080078125, "step": 6431 }, { "epoch": 0.8695564005069708, "grad_norm": 0.2771012485027313, "learning_rate": 1.4337202964392409e-06, "loss": 0.05644416809082031, "step": 6432 }, { "epoch": 0.8696915927334178, "grad_norm": 0.26662665605545044, "learning_rate": 1.4308015599939033e-06, "loss": 0.04773449897766113, "step": 6433 }, { "epoch": 0.8698267849598648, "grad_norm": 0.36784619092941284, "learning_rate": 1.4278856487280428e-06, "loss": 0.09210872650146484, "step": 6434 }, { "epoch": 0.8699619771863117, "grad_norm": 0.18432922661304474, "learning_rate": 1.4249725632487653e-06, "loss": 0.04014027118682861, "step": 6435 }, { "epoch": 0.8700971694127587, "grad_norm": 0.18665963411331177, "learning_rate": 1.4220623041625924e-06, "loss": 0.030408620834350586, "step": 6436 }, { "epoch": 0.8702323616392057, "grad_norm": 0.47944337129592896, "learning_rate": 1.4191548720754527e-06, "loss": 0.07666683197021484, "step": 6437 }, { "epoch": 0.8703675538656527, "grad_norm": 0.18948031961917877, "learning_rate": 1.4162502675926887e-06, "loss": 0.04884648323059082, "step": 6438 }, { "epoch": 0.8705027460920997, "grad_norm": 0.3038956820964813, "learning_rate": 1.4133484913190596e-06, "loss": 0.06389188766479492, "step": 6439 }, { "epoch": 0.8706379383185466, "grad_norm": 0.1291380524635315, "learning_rate": 1.4104495438587295e-06, "loss": 0.03372645378112793, "step": 6440 }, { "epoch": 0.8707731305449936, "grad_norm": 0.1952275037765503, "learning_rate": 1.4075534258152667e-06, "loss": 0.04088759422302246, "step": 6441 }, { "epoch": 0.8709083227714406, "grad_norm": 0.2805889844894409, "learning_rate": 1.4046601377916673e-06, "loss": 0.07518148422241211, "step": 6442 }, { "epoch": 0.8710435149978876, "grad_norm": 0.23121748864650726, "learning_rate": 1.4017696803903246e-06, "loss": 0.054850101470947266, "step": 6443 }, { "epoch": 0.8711787072243345, "grad_norm": 0.2672847509384155, "learning_rate": 1.3988820542130504e-06, "loss": 0.058417320251464844, "step": 6444 }, { "epoch": 0.8713138994507815, "grad_norm": 0.4756321310997009, "learning_rate": 1.395997259861067e-06, "loss": 0.05745410919189453, "step": 6445 }, { "epoch": 0.8714490916772285, "grad_norm": 0.113944873213768, "learning_rate": 1.3931152979349926e-06, "loss": 0.030443429946899414, "step": 6446 }, { "epoch": 0.8715842839036756, "grad_norm": 0.37563037872314453, "learning_rate": 1.3902361690348769e-06, "loss": 0.07578229904174805, "step": 6447 }, { "epoch": 0.8717194761301226, "grad_norm": 0.4331814646720886, "learning_rate": 1.3873598737601639e-06, "loss": 0.1031179428100586, "step": 6448 }, { "epoch": 0.8718546683565696, "grad_norm": 0.16388234496116638, "learning_rate": 1.3844864127097229e-06, "loss": 0.04517841339111328, "step": 6449 }, { "epoch": 0.8719898605830165, "grad_norm": 0.44739118218421936, "learning_rate": 1.3816157864818151e-06, "loss": 0.06436538696289062, "step": 6450 }, { "epoch": 0.8721250528094635, "grad_norm": 0.2669844627380371, "learning_rate": 1.3787479956741194e-06, "loss": 0.08227872848510742, "step": 6451 }, { "epoch": 0.8722602450359105, "grad_norm": 0.24546951055526733, "learning_rate": 1.3758830408837314e-06, "loss": 0.04765129089355469, "step": 6452 }, { "epoch": 0.8723954372623575, "grad_norm": 0.2474411427974701, "learning_rate": 1.3730209227071439e-06, "loss": 0.04482245445251465, "step": 6453 }, { "epoch": 0.8725306294888044, "grad_norm": 1.1443266868591309, "learning_rate": 1.3701616417402734e-06, "loss": 0.07504284381866455, "step": 6454 }, { "epoch": 0.8726658217152514, "grad_norm": 0.37688857316970825, "learning_rate": 1.367305198578429e-06, "loss": 0.06220531463623047, "step": 6455 }, { "epoch": 0.8728010139416984, "grad_norm": 0.3509252965450287, "learning_rate": 1.36445159381634e-06, "loss": 0.07417893409729004, "step": 6456 }, { "epoch": 0.8729362061681454, "grad_norm": 0.3079795837402344, "learning_rate": 1.361600828048144e-06, "loss": 0.0628666877746582, "step": 6457 }, { "epoch": 0.8730713983945924, "grad_norm": 0.2026120275259018, "learning_rate": 1.3587529018673816e-06, "loss": 0.05359983444213867, "step": 6458 }, { "epoch": 0.8732065906210393, "grad_norm": 0.1779441386461258, "learning_rate": 1.3559078158670152e-06, "loss": 0.03676795959472656, "step": 6459 }, { "epoch": 0.8733417828474863, "grad_norm": 0.28361883759498596, "learning_rate": 1.353065570639394e-06, "loss": 0.06538915634155273, "step": 6460 }, { "epoch": 0.8734769750739333, "grad_norm": 0.4011971354484558, "learning_rate": 1.3502261667763e-06, "loss": 0.06628942489624023, "step": 6461 }, { "epoch": 0.8736121673003803, "grad_norm": 0.2483941912651062, "learning_rate": 1.3473896048689067e-06, "loss": 0.06180381774902344, "step": 6462 }, { "epoch": 0.8737473595268272, "grad_norm": 0.2961508631706238, "learning_rate": 1.3445558855078017e-06, "loss": 0.0854034423828125, "step": 6463 }, { "epoch": 0.8738825517532742, "grad_norm": 0.3669814467430115, "learning_rate": 1.3417250092829814e-06, "loss": 0.05763888359069824, "step": 6464 }, { "epoch": 0.8740177439797212, "grad_norm": 0.18326736986637115, "learning_rate": 1.338896976783846e-06, "loss": 0.04211020469665527, "step": 6465 }, { "epoch": 0.8741529362061682, "grad_norm": 0.31272953748703003, "learning_rate": 1.336071788599213e-06, "loss": 0.04508852958679199, "step": 6466 }, { "epoch": 0.8742881284326152, "grad_norm": 0.18777328729629517, "learning_rate": 1.3332494453172982e-06, "loss": 0.04358077049255371, "step": 6467 }, { "epoch": 0.8744233206590621, "grad_norm": 0.6595178246498108, "learning_rate": 1.3304299475257287e-06, "loss": 0.1093130111694336, "step": 6468 }, { "epoch": 0.8745585128855091, "grad_norm": 0.2750481069087982, "learning_rate": 1.3276132958115394e-06, "loss": 0.06076812744140625, "step": 6469 }, { "epoch": 0.8746937051119561, "grad_norm": 0.1557319164276123, "learning_rate": 1.32479949076117e-06, "loss": 0.047522544860839844, "step": 6470 }, { "epoch": 0.8748288973384031, "grad_norm": 0.2280968874692917, "learning_rate": 1.3219885329604747e-06, "loss": 0.057415008544921875, "step": 6471 }, { "epoch": 0.8749640895648501, "grad_norm": 0.2835663855075836, "learning_rate": 1.319180422994709e-06, "loss": 0.06455183029174805, "step": 6472 }, { "epoch": 0.875099281791297, "grad_norm": 0.3616994321346283, "learning_rate": 1.3163751614485287e-06, "loss": 0.053040504455566406, "step": 6473 }, { "epoch": 0.875234474017744, "grad_norm": 0.4216803014278412, "learning_rate": 1.3135727489060113e-06, "loss": 0.07476139068603516, "step": 6474 }, { "epoch": 0.875369666244191, "grad_norm": 0.49317875504493713, "learning_rate": 1.3107731859506317e-06, "loss": 0.06447434425354004, "step": 6475 }, { "epoch": 0.875504858470638, "grad_norm": 0.2787105441093445, "learning_rate": 1.3079764731652772e-06, "loss": 0.05824148654937744, "step": 6476 }, { "epoch": 0.875640050697085, "grad_norm": 0.3161103427410126, "learning_rate": 1.3051826111322368e-06, "loss": 0.07431697845458984, "step": 6477 }, { "epoch": 0.8757752429235319, "grad_norm": 0.2119627445936203, "learning_rate": 1.3023916004332021e-06, "loss": 0.054253339767456055, "step": 6478 }, { "epoch": 0.8759104351499789, "grad_norm": 0.35281893610954285, "learning_rate": 1.2996034416492847e-06, "loss": 0.0543445348739624, "step": 6479 }, { "epoch": 0.8760456273764259, "grad_norm": 0.34522977471351624, "learning_rate": 1.2968181353609854e-06, "loss": 0.06467533111572266, "step": 6480 }, { "epoch": 0.8761808196028729, "grad_norm": 0.26410940289497375, "learning_rate": 1.2940356821482285e-06, "loss": 0.0499957799911499, "step": 6481 }, { "epoch": 0.8763160118293198, "grad_norm": 0.29039788246154785, "learning_rate": 1.291256082590334e-06, "loss": 0.06514358520507812, "step": 6482 }, { "epoch": 0.8764512040557668, "grad_norm": 0.25613754987716675, "learning_rate": 1.2884793372660208e-06, "loss": 0.07200336456298828, "step": 6483 }, { "epoch": 0.8765863962822138, "grad_norm": 0.2594621777534485, "learning_rate": 1.285705446753433e-06, "loss": 0.04393339157104492, "step": 6484 }, { "epoch": 0.8767215885086608, "grad_norm": 0.25352996587753296, "learning_rate": 1.2829344116301e-06, "loss": 0.05715656280517578, "step": 6485 }, { "epoch": 0.8768567807351078, "grad_norm": 0.48465704917907715, "learning_rate": 1.2801662324729774e-06, "loss": 0.09092998504638672, "step": 6486 }, { "epoch": 0.8769919729615547, "grad_norm": 0.4309283494949341, "learning_rate": 1.2774009098584055e-06, "loss": 0.07410907745361328, "step": 6487 }, { "epoch": 0.8771271651880017, "grad_norm": 0.2961377799510956, "learning_rate": 1.274638444362139e-06, "loss": 0.06084012985229492, "step": 6488 }, { "epoch": 0.8772623574144487, "grad_norm": 0.25042974948883057, "learning_rate": 1.2718788365593443e-06, "loss": 0.0670318603515625, "step": 6489 }, { "epoch": 0.8773975496408957, "grad_norm": 0.28116375207901, "learning_rate": 1.26912208702458e-06, "loss": 0.04669928550720215, "step": 6490 }, { "epoch": 0.8775327418673426, "grad_norm": 0.2501729726791382, "learning_rate": 1.2663681963318242e-06, "loss": 0.04172492027282715, "step": 6491 }, { "epoch": 0.8776679340937896, "grad_norm": 0.28565719723701477, "learning_rate": 1.2636171650544443e-06, "loss": 0.04670226573944092, "step": 6492 }, { "epoch": 0.8778031263202366, "grad_norm": 0.261688232421875, "learning_rate": 1.260868993765219e-06, "loss": 0.05399513244628906, "step": 6493 }, { "epoch": 0.8779383185466836, "grad_norm": 0.27596351504325867, "learning_rate": 1.258123683036339e-06, "loss": 0.05591607093811035, "step": 6494 }, { "epoch": 0.8780735107731306, "grad_norm": 0.290924072265625, "learning_rate": 1.2553812334393872e-06, "loss": 0.06591248512268066, "step": 6495 }, { "epoch": 0.8782087029995775, "grad_norm": 0.3651522994041443, "learning_rate": 1.2526416455453582e-06, "loss": 0.0653609037399292, "step": 6496 }, { "epoch": 0.8783438952260245, "grad_norm": 0.38874951004981995, "learning_rate": 1.249904919924646e-06, "loss": 0.06723189353942871, "step": 6497 }, { "epoch": 0.8784790874524715, "grad_norm": 0.309001624584198, "learning_rate": 1.2471710571470579e-06, "loss": 0.058152854442596436, "step": 6498 }, { "epoch": 0.8786142796789185, "grad_norm": 0.20150302350521088, "learning_rate": 1.2444400577817922e-06, "loss": 0.043239593505859375, "step": 6499 }, { "epoch": 0.8787494719053655, "grad_norm": 0.32289421558380127, "learning_rate": 1.2417119223974621e-06, "loss": 0.06432437896728516, "step": 6500 }, { "epoch": 0.8788846641318124, "grad_norm": 0.3440784811973572, "learning_rate": 1.2389866515620768e-06, "loss": 0.04827141761779785, "step": 6501 }, { "epoch": 0.8790198563582594, "grad_norm": 0.337507039308548, "learning_rate": 1.2362642458430505e-06, "loss": 0.056401729583740234, "step": 6502 }, { "epoch": 0.8791550485847064, "grad_norm": 0.30003872513771057, "learning_rate": 1.2335447058072103e-06, "loss": 0.0617830753326416, "step": 6503 }, { "epoch": 0.8792902408111534, "grad_norm": 0.2757251560688019, "learning_rate": 1.230828032020771e-06, "loss": 0.05974173545837402, "step": 6504 }, { "epoch": 0.8794254330376003, "grad_norm": 0.3699774146080017, "learning_rate": 1.2281142250493638e-06, "loss": 0.06078982353210449, "step": 6505 }, { "epoch": 0.8795606252640473, "grad_norm": 0.33825087547302246, "learning_rate": 1.225403285458015e-06, "loss": 0.06746101379394531, "step": 6506 }, { "epoch": 0.8796958174904943, "grad_norm": 0.15928079187870026, "learning_rate": 1.2226952138111546e-06, "loss": 0.0419154167175293, "step": 6507 }, { "epoch": 0.8798310097169413, "grad_norm": 0.20003150403499603, "learning_rate": 1.219990010672622e-06, "loss": 0.04114890098571777, "step": 6508 }, { "epoch": 0.8799662019433883, "grad_norm": 0.20392554998397827, "learning_rate": 1.2172876766056562e-06, "loss": 0.053170204162597656, "step": 6509 }, { "epoch": 0.8801013941698352, "grad_norm": 0.17940299212932587, "learning_rate": 1.2145882121728906e-06, "loss": 0.04723215103149414, "step": 6510 }, { "epoch": 0.8802365863962822, "grad_norm": 0.2120024710893631, "learning_rate": 1.2118916179363727e-06, "loss": 0.03814142942428589, "step": 6511 }, { "epoch": 0.8803717786227292, "grad_norm": 0.2852402329444885, "learning_rate": 1.209197894457546e-06, "loss": 0.059386253356933594, "step": 6512 }, { "epoch": 0.8805069708491762, "grad_norm": 0.24861997365951538, "learning_rate": 1.2065070422972606e-06, "loss": 0.05455970764160156, "step": 6513 }, { "epoch": 0.8806421630756232, "grad_norm": 0.398517906665802, "learning_rate": 1.2038190620157685e-06, "loss": 0.10115814208984375, "step": 6514 }, { "epoch": 0.8807773553020701, "grad_norm": 0.37528836727142334, "learning_rate": 1.2011339541727117e-06, "loss": 0.07947158813476562, "step": 6515 }, { "epoch": 0.8809125475285171, "grad_norm": 0.1211782917380333, "learning_rate": 1.198451719327155e-06, "loss": 0.033257246017456055, "step": 6516 }, { "epoch": 0.8810477397549641, "grad_norm": 0.4164644181728363, "learning_rate": 1.1957723580375447e-06, "loss": 0.0876150131225586, "step": 6517 }, { "epoch": 0.8811829319814111, "grad_norm": 0.5009216666221619, "learning_rate": 1.193095870861748e-06, "loss": 0.08709907531738281, "step": 6518 }, { "epoch": 0.881318124207858, "grad_norm": 0.21098342537879944, "learning_rate": 1.1904222583570156e-06, "loss": 0.04839920997619629, "step": 6519 }, { "epoch": 0.881453316434305, "grad_norm": 0.40740567445755005, "learning_rate": 1.1877515210800077e-06, "loss": 0.07437682151794434, "step": 6520 }, { "epoch": 0.881588508660752, "grad_norm": 0.296480655670166, "learning_rate": 1.1850836595867925e-06, "loss": 0.059606075286865234, "step": 6521 }, { "epoch": 0.881723700887199, "grad_norm": 0.4370913505554199, "learning_rate": 1.1824186744328259e-06, "loss": 0.08585071563720703, "step": 6522 }, { "epoch": 0.881858893113646, "grad_norm": 0.28805118799209595, "learning_rate": 1.179756566172982e-06, "loss": 0.05884349346160889, "step": 6523 }, { "epoch": 0.8819940853400929, "grad_norm": 0.3825747072696686, "learning_rate": 1.177097335361516e-06, "loss": 0.08362960815429688, "step": 6524 }, { "epoch": 0.8821292775665399, "grad_norm": 0.2790987193584442, "learning_rate": 1.1744409825520969e-06, "loss": 0.04493391513824463, "step": 6525 }, { "epoch": 0.8822644697929869, "grad_norm": 0.26410335302352905, "learning_rate": 1.171787508297792e-06, "loss": 0.056577205657958984, "step": 6526 }, { "epoch": 0.8823996620194339, "grad_norm": 0.36177676916122437, "learning_rate": 1.1691369131510676e-06, "loss": 0.06201958656311035, "step": 6527 }, { "epoch": 0.8825348542458809, "grad_norm": 0.21636635065078735, "learning_rate": 1.1664891976637992e-06, "loss": 0.04668569564819336, "step": 6528 }, { "epoch": 0.8826700464723278, "grad_norm": 0.49301567673683167, "learning_rate": 1.1638443623872442e-06, "loss": 0.09611129760742188, "step": 6529 }, { "epoch": 0.8828052386987748, "grad_norm": 0.1500963568687439, "learning_rate": 1.1612024078720752e-06, "loss": 0.035099148750305176, "step": 6530 }, { "epoch": 0.8829404309252218, "grad_norm": 0.45520225167274475, "learning_rate": 1.1585633346683655e-06, "loss": 0.06321001052856445, "step": 6531 }, { "epoch": 0.8830756231516688, "grad_norm": 0.22220589220523834, "learning_rate": 1.155927143325579e-06, "loss": 0.05169510841369629, "step": 6532 }, { "epoch": 0.8832108153781157, "grad_norm": 0.32102254033088684, "learning_rate": 1.1532938343925887e-06, "loss": 0.059134721755981445, "step": 6533 }, { "epoch": 0.8833460076045627, "grad_norm": 0.37606751918792725, "learning_rate": 1.1506634084176587e-06, "loss": 0.07471990585327148, "step": 6534 }, { "epoch": 0.8834811998310097, "grad_norm": 0.30589964985847473, "learning_rate": 1.148035865948463e-06, "loss": 0.03421425819396973, "step": 6535 }, { "epoch": 0.8836163920574567, "grad_norm": 0.2541763186454773, "learning_rate": 1.1454112075320688e-06, "loss": 0.06158638000488281, "step": 6536 }, { "epoch": 0.8837515842839037, "grad_norm": 0.31818345189094543, "learning_rate": 1.1427894337149426e-06, "loss": 0.03979086875915527, "step": 6537 }, { "epoch": 0.8838867765103506, "grad_norm": 0.1632911115884781, "learning_rate": 1.1401705450429506e-06, "loss": 0.04642629623413086, "step": 6538 }, { "epoch": 0.8840219687367976, "grad_norm": 0.6135344505310059, "learning_rate": 1.1375545420613586e-06, "loss": 0.07690876722335815, "step": 6539 }, { "epoch": 0.8841571609632446, "grad_norm": 0.30065006017684937, "learning_rate": 1.1349414253148377e-06, "loss": 0.06247854232788086, "step": 6540 }, { "epoch": 0.8842923531896916, "grad_norm": 0.24106524884700775, "learning_rate": 1.1323311953474524e-06, "loss": 0.061649322509765625, "step": 6541 }, { "epoch": 0.8844275454161385, "grad_norm": 0.26848623156547546, "learning_rate": 1.1297238527026582e-06, "loss": 0.06455421447753906, "step": 6542 }, { "epoch": 0.8845627376425855, "grad_norm": 0.20980583131313324, "learning_rate": 1.1271193979233258e-06, "loss": 0.039904117584228516, "step": 6543 }, { "epoch": 0.8846979298690325, "grad_norm": 0.35986241698265076, "learning_rate": 1.1245178315517113e-06, "loss": 0.0874481201171875, "step": 6544 }, { "epoch": 0.8848331220954795, "grad_norm": 0.251010924577713, "learning_rate": 1.1219191541294798e-06, "loss": 0.044727861881256104, "step": 6545 }, { "epoch": 0.8849683143219265, "grad_norm": 0.156352698802948, "learning_rate": 1.1193233661976887e-06, "loss": 0.04927706718444824, "step": 6546 }, { "epoch": 0.8851035065483734, "grad_norm": 0.26124900579452515, "learning_rate": 1.1167304682967904e-06, "loss": 0.0682673454284668, "step": 6547 }, { "epoch": 0.8852386987748204, "grad_norm": 0.32066163420677185, "learning_rate": 1.114140460966645e-06, "loss": 0.06539630889892578, "step": 6548 }, { "epoch": 0.8853738910012674, "grad_norm": 0.3091585040092468, "learning_rate": 1.111553344746501e-06, "loss": 0.05526590347290039, "step": 6549 }, { "epoch": 0.8855090832277144, "grad_norm": 0.22207492589950562, "learning_rate": 1.1089691201750174e-06, "loss": 0.04162168502807617, "step": 6550 }, { "epoch": 0.8856442754541614, "grad_norm": 0.2518909275531769, "learning_rate": 1.106387787790239e-06, "loss": 0.053722500801086426, "step": 6551 }, { "epoch": 0.8857794676806083, "grad_norm": 0.43330979347229004, "learning_rate": 1.1038093481296091e-06, "loss": 0.09711885452270508, "step": 6552 }, { "epoch": 0.8859146599070553, "grad_norm": 0.32390764355659485, "learning_rate": 1.10123380172998e-06, "loss": 0.06100893020629883, "step": 6553 }, { "epoch": 0.8860498521335023, "grad_norm": 2.1229729652404785, "learning_rate": 1.098661149127586e-06, "loss": 0.04858589172363281, "step": 6554 }, { "epoch": 0.8861850443599493, "grad_norm": 0.20098699629306793, "learning_rate": 1.0960913908580788e-06, "loss": 0.03773832321166992, "step": 6555 }, { "epoch": 0.8863202365863962, "grad_norm": 0.2583629786968231, "learning_rate": 1.0935245274564852e-06, "loss": 0.06451964378356934, "step": 6556 }, { "epoch": 0.8864554288128432, "grad_norm": 0.3037578761577606, "learning_rate": 1.0909605594572413e-06, "loss": 0.06290674209594727, "step": 6557 }, { "epoch": 0.8865906210392902, "grad_norm": 0.31126680970191956, "learning_rate": 1.0883994873941816e-06, "loss": 0.052382588386535645, "step": 6558 }, { "epoch": 0.8867258132657372, "grad_norm": 0.2369667887687683, "learning_rate": 1.0858413118005345e-06, "loss": 0.06488275527954102, "step": 6559 }, { "epoch": 0.8868610054921842, "grad_norm": 0.1917002946138382, "learning_rate": 1.0832860332089288e-06, "loss": 0.03648841381072998, "step": 6560 }, { "epoch": 0.8869961977186311, "grad_norm": 0.3454590141773224, "learning_rate": 1.0807336521513828e-06, "loss": 0.0711512565612793, "step": 6561 }, { "epoch": 0.8871313899450781, "grad_norm": 0.24861523509025574, "learning_rate": 1.0781841691593142e-06, "loss": 0.05977523326873779, "step": 6562 }, { "epoch": 0.8872665821715251, "grad_norm": 0.4173514246940613, "learning_rate": 1.0756375847635435e-06, "loss": 0.06826353073120117, "step": 6563 }, { "epoch": 0.8874017743979721, "grad_norm": 0.20597350597381592, "learning_rate": 1.0730938994942818e-06, "loss": 0.03624802827835083, "step": 6564 }, { "epoch": 0.887536966624419, "grad_norm": 0.21884912252426147, "learning_rate": 1.070553113881137e-06, "loss": 0.04353678226470947, "step": 6565 }, { "epoch": 0.887672158850866, "grad_norm": 0.6119565963745117, "learning_rate": 1.0680152284531158e-06, "loss": 0.06782770156860352, "step": 6566 }, { "epoch": 0.887807351077313, "grad_norm": 0.5683345794677734, "learning_rate": 1.0654802437386157e-06, "loss": 0.08216142654418945, "step": 6567 }, { "epoch": 0.88794254330376, "grad_norm": 0.290649950504303, "learning_rate": 1.062948160265438e-06, "loss": 0.062177419662475586, "step": 6568 }, { "epoch": 0.888077735530207, "grad_norm": 0.3959812521934509, "learning_rate": 1.0604189785607772e-06, "loss": 0.0941762924194336, "step": 6569 }, { "epoch": 0.888212927756654, "grad_norm": 0.3331816792488098, "learning_rate": 1.0578926991512171e-06, "loss": 0.0622258186340332, "step": 6570 }, { "epoch": 0.8883481199831009, "grad_norm": 0.25859034061431885, "learning_rate": 1.0553693225627458e-06, "loss": 0.05439615249633789, "step": 6571 }, { "epoch": 0.8884833122095479, "grad_norm": 0.33288052678108215, "learning_rate": 1.0528488493207444e-06, "loss": 0.07359981536865234, "step": 6572 }, { "epoch": 0.8886185044359949, "grad_norm": 0.31173184514045715, "learning_rate": 1.0503312799499898e-06, "loss": 0.057944297790527344, "step": 6573 }, { "epoch": 0.8887536966624419, "grad_norm": 0.20959161221981049, "learning_rate": 1.0478166149746476e-06, "loss": 0.050884902477264404, "step": 6574 }, { "epoch": 0.8888888888888888, "grad_norm": 0.4633617401123047, "learning_rate": 1.0453048549182892e-06, "loss": 0.07828986644744873, "step": 6575 }, { "epoch": 0.8890240811153358, "grad_norm": 0.347409725189209, "learning_rate": 1.0427960003038744e-06, "loss": 0.048555612564086914, "step": 6576 }, { "epoch": 0.8891592733417828, "grad_norm": 0.22929781675338745, "learning_rate": 1.040290051653764e-06, "loss": 0.03709220886230469, "step": 6577 }, { "epoch": 0.8892944655682298, "grad_norm": 0.3411514163017273, "learning_rate": 1.0377870094897085e-06, "loss": 0.07570827007293701, "step": 6578 }, { "epoch": 0.8894296577946768, "grad_norm": 0.32179558277130127, "learning_rate": 1.0352868743328497e-06, "loss": 0.052057743072509766, "step": 6579 }, { "epoch": 0.8895648500211237, "grad_norm": 0.36982619762420654, "learning_rate": 1.032789646703733e-06, "loss": 0.05653858184814453, "step": 6580 }, { "epoch": 0.8897000422475707, "grad_norm": 0.23097890615463257, "learning_rate": 1.0302953271222938e-06, "loss": 0.051781654357910156, "step": 6581 }, { "epoch": 0.8898352344740177, "grad_norm": 0.5853762626647949, "learning_rate": 1.0278039161078634e-06, "loss": 0.09038478136062622, "step": 6582 }, { "epoch": 0.8899704267004648, "grad_norm": 0.2296319156885147, "learning_rate": 1.0253154141791705e-06, "loss": 0.05182623863220215, "step": 6583 }, { "epoch": 0.8901056189269118, "grad_norm": 0.1901443749666214, "learning_rate": 1.0228298218543253e-06, "loss": 0.03893446922302246, "step": 6584 }, { "epoch": 0.8902408111533587, "grad_norm": 0.1707037389278412, "learning_rate": 1.020347139650849e-06, "loss": 0.04085683822631836, "step": 6585 }, { "epoch": 0.8903760033798057, "grad_norm": 0.29270249605178833, "learning_rate": 1.0178673680856448e-06, "loss": 0.07581907510757446, "step": 6586 }, { "epoch": 0.8905111956062527, "grad_norm": 0.3180233836174011, "learning_rate": 1.0153905076750196e-06, "loss": 0.05968618392944336, "step": 6587 }, { "epoch": 0.8906463878326997, "grad_norm": 0.2173004299402237, "learning_rate": 1.0129165589346644e-06, "loss": 0.0568547248840332, "step": 6588 }, { "epoch": 0.8907815800591466, "grad_norm": 0.3191970884799957, "learning_rate": 1.0104455223796688e-06, "loss": 0.050228118896484375, "step": 6589 }, { "epoch": 0.8909167722855936, "grad_norm": 0.31993502378463745, "learning_rate": 1.0079773985245178e-06, "loss": 0.06128692626953125, "step": 6590 }, { "epoch": 0.8910519645120406, "grad_norm": 0.35776931047439575, "learning_rate": 1.0055121878830837e-06, "loss": 0.08691418170928955, "step": 6591 }, { "epoch": 0.8911871567384876, "grad_norm": 0.35664451122283936, "learning_rate": 1.0030498909686458e-06, "loss": 0.07600116729736328, "step": 6592 }, { "epoch": 0.8913223489649346, "grad_norm": 0.21843941509723663, "learning_rate": 1.0005905082938593e-06, "loss": 0.06284713745117188, "step": 6593 }, { "epoch": 0.8914575411913815, "grad_norm": 0.2739088237285614, "learning_rate": 9.981340403707794e-07, "loss": 0.058686017990112305, "step": 6594 }, { "epoch": 0.8915927334178285, "grad_norm": 0.1872408241033554, "learning_rate": 9.956804877108638e-07, "loss": 0.029108881950378418, "step": 6595 }, { "epoch": 0.8917279256442755, "grad_norm": 0.2940690815448761, "learning_rate": 9.932298508249488e-07, "loss": 0.06449627876281738, "step": 6596 }, { "epoch": 0.8918631178707225, "grad_norm": 0.35881543159484863, "learning_rate": 9.907821302232729e-07, "loss": 0.06825017929077148, "step": 6597 }, { "epoch": 0.8919983100971695, "grad_norm": 0.19285565614700317, "learning_rate": 9.883373264154633e-07, "loss": 0.048618316650390625, "step": 6598 }, { "epoch": 0.8921335023236164, "grad_norm": 0.23325026035308838, "learning_rate": 9.858954399105397e-07, "loss": 0.06113481521606445, "step": 6599 }, { "epoch": 0.8922686945500634, "grad_norm": 0.5605015158653259, "learning_rate": 9.834564712169202e-07, "loss": 0.08972692489624023, "step": 6600 }, { "epoch": 0.8924038867765104, "grad_norm": 0.34347522258758545, "learning_rate": 9.81020420842409e-07, "loss": 0.07176446914672852, "step": 6601 }, { "epoch": 0.8925390790029574, "grad_norm": 0.27367064356803894, "learning_rate": 9.785872892942033e-07, "loss": 0.07134056091308594, "step": 6602 }, { "epoch": 0.8926742712294043, "grad_norm": 0.25994497537612915, "learning_rate": 9.761570770788964e-07, "loss": 0.06648492813110352, "step": 6603 }, { "epoch": 0.8928094634558513, "grad_norm": 0.5097453594207764, "learning_rate": 9.737297847024685e-07, "loss": 0.07680559158325195, "step": 6604 }, { "epoch": 0.8929446556822983, "grad_norm": 0.288347989320755, "learning_rate": 9.713054126702968e-07, "loss": 0.06078195571899414, "step": 6605 }, { "epoch": 0.8930798479087453, "grad_norm": 0.2569486200809479, "learning_rate": 9.688839614871497e-07, "loss": 0.05570650100708008, "step": 6606 }, { "epoch": 0.8932150401351923, "grad_norm": 0.29454556107521057, "learning_rate": 9.664654316571852e-07, "loss": 0.07751941680908203, "step": 6607 }, { "epoch": 0.8933502323616392, "grad_norm": 0.39530429244041443, "learning_rate": 9.640498236839507e-07, "loss": 0.08114385604858398, "step": 6608 }, { "epoch": 0.8934854245880862, "grad_norm": 0.2667555510997772, "learning_rate": 9.616371380703953e-07, "loss": 0.0582427978515625, "step": 6609 }, { "epoch": 0.8936206168145332, "grad_norm": 0.17970451712608337, "learning_rate": 9.592273753188507e-07, "loss": 0.03761601448059082, "step": 6610 }, { "epoch": 0.8937558090409802, "grad_norm": 0.31466150283813477, "learning_rate": 9.568205359310372e-07, "loss": 0.06345224380493164, "step": 6611 }, { "epoch": 0.8938910012674272, "grad_norm": 0.5389094352722168, "learning_rate": 9.544166204080772e-07, "loss": 0.07818794250488281, "step": 6612 }, { "epoch": 0.8940261934938741, "grad_norm": 0.23919075727462769, "learning_rate": 9.520156292504739e-07, "loss": 0.04798460006713867, "step": 6613 }, { "epoch": 0.8941613857203211, "grad_norm": 0.46265608072280884, "learning_rate": 9.496175629581322e-07, "loss": 0.07718026638031006, "step": 6614 }, { "epoch": 0.8942965779467681, "grad_norm": 0.39650270342826843, "learning_rate": 9.472224220303427e-07, "loss": 0.0720372200012207, "step": 6615 }, { "epoch": 0.8944317701732151, "grad_norm": 0.1982034146785736, "learning_rate": 9.448302069657799e-07, "loss": 0.04893851280212402, "step": 6616 }, { "epoch": 0.894566962399662, "grad_norm": 0.35795077681541443, "learning_rate": 9.424409182625205e-07, "loss": 0.056674957275390625, "step": 6617 }, { "epoch": 0.894702154626109, "grad_norm": 0.3393133878707886, "learning_rate": 9.40054556418023e-07, "loss": 0.08192062377929688, "step": 6618 }, { "epoch": 0.894837346852556, "grad_norm": 0.4634474217891693, "learning_rate": 9.376711219291483e-07, "loss": 0.08518218994140625, "step": 6619 }, { "epoch": 0.894972539079003, "grad_norm": 0.1800144761800766, "learning_rate": 9.352906152921348e-07, "loss": 0.049013614654541016, "step": 6620 }, { "epoch": 0.89510773130545, "grad_norm": 0.2422923594713211, "learning_rate": 9.32913037002614e-07, "loss": 0.07240152359008789, "step": 6621 }, { "epoch": 0.8952429235318969, "grad_norm": 0.2702312171459198, "learning_rate": 9.30538387555615e-07, "loss": 0.03886592388153076, "step": 6622 }, { "epoch": 0.8953781157583439, "grad_norm": 0.27723395824432373, "learning_rate": 9.281666674455508e-07, "loss": 0.07527542114257812, "step": 6623 }, { "epoch": 0.8955133079847909, "grad_norm": 0.23852206766605377, "learning_rate": 9.257978771662295e-07, "loss": 0.0500791072845459, "step": 6624 }, { "epoch": 0.8956485002112379, "grad_norm": 0.296090692281723, "learning_rate": 9.234320172108418e-07, "loss": 0.07099008560180664, "step": 6625 }, { "epoch": 0.8957836924376849, "grad_norm": 0.2960122227668762, "learning_rate": 9.210690880719719e-07, "loss": 0.07332897186279297, "step": 6626 }, { "epoch": 0.8959188846641318, "grad_norm": 0.33863869309425354, "learning_rate": 9.187090902415962e-07, "loss": 0.07859325408935547, "step": 6627 }, { "epoch": 0.8960540768905788, "grad_norm": 0.3466472029685974, "learning_rate": 9.163520242110784e-07, "loss": 0.06715989112854004, "step": 6628 }, { "epoch": 0.8961892691170258, "grad_norm": 0.32666298747062683, "learning_rate": 9.13997890471176e-07, "loss": 0.060756683349609375, "step": 6629 }, { "epoch": 0.8963244613434728, "grad_norm": 0.26205384731292725, "learning_rate": 9.116466895120251e-07, "loss": 0.05854201316833496, "step": 6630 }, { "epoch": 0.8964596535699197, "grad_norm": 0.2584305703639984, "learning_rate": 9.092984218231609e-07, "loss": 0.05685114860534668, "step": 6631 }, { "epoch": 0.8965948457963667, "grad_norm": 0.607467770576477, "learning_rate": 9.069530878935072e-07, "loss": 0.08487892150878906, "step": 6632 }, { "epoch": 0.8967300380228137, "grad_norm": 0.3411499857902527, "learning_rate": 9.046106882113753e-07, "loss": 0.059922218322753906, "step": 6633 }, { "epoch": 0.8968652302492607, "grad_norm": 0.21270091831684113, "learning_rate": 9.022712232644631e-07, "loss": 0.05106949806213379, "step": 6634 }, { "epoch": 0.8970004224757077, "grad_norm": 0.32793837785720825, "learning_rate": 8.999346935398611e-07, "loss": 0.048998117446899414, "step": 6635 }, { "epoch": 0.8971356147021546, "grad_norm": 0.43046560883522034, "learning_rate": 8.976010995240436e-07, "loss": 0.0726022720336914, "step": 6636 }, { "epoch": 0.8972708069286016, "grad_norm": 0.4204343259334564, "learning_rate": 8.952704417028818e-07, "loss": 0.05670487880706787, "step": 6637 }, { "epoch": 0.8974059991550486, "grad_norm": 0.17617012560367584, "learning_rate": 8.929427205616308e-07, "loss": 0.0431976318359375, "step": 6638 }, { "epoch": 0.8975411913814956, "grad_norm": 0.5180122256278992, "learning_rate": 8.906179365849332e-07, "loss": 0.09676170349121094, "step": 6639 }, { "epoch": 0.8976763836079426, "grad_norm": 0.34688785672187805, "learning_rate": 8.882960902568216e-07, "loss": 0.0789722204208374, "step": 6640 }, { "epoch": 0.8978115758343895, "grad_norm": 0.5823962092399597, "learning_rate": 8.85977182060716e-07, "loss": 0.08455848693847656, "step": 6641 }, { "epoch": 0.8979467680608365, "grad_norm": 0.31170040369033813, "learning_rate": 8.836612124794285e-07, "loss": 0.06884980201721191, "step": 6642 }, { "epoch": 0.8980819602872835, "grad_norm": 0.1792592853307724, "learning_rate": 8.813481819951502e-07, "loss": 0.0627431869506836, "step": 6643 }, { "epoch": 0.8982171525137305, "grad_norm": 0.29300495982170105, "learning_rate": 8.790380910894724e-07, "loss": 0.05310249328613281, "step": 6644 }, { "epoch": 0.8983523447401774, "grad_norm": 0.38170912861824036, "learning_rate": 8.767309402433671e-07, "loss": 0.06711244583129883, "step": 6645 }, { "epoch": 0.8984875369666244, "grad_norm": 0.19290301203727722, "learning_rate": 8.744267299371917e-07, "loss": 0.0653371810913086, "step": 6646 }, { "epoch": 0.8986227291930714, "grad_norm": 0.5893896818161011, "learning_rate": 8.721254606507023e-07, "loss": 0.10550880432128906, "step": 6647 }, { "epoch": 0.8987579214195184, "grad_norm": 0.3628849685192108, "learning_rate": 8.698271328630275e-07, "loss": 0.05511760711669922, "step": 6648 }, { "epoch": 0.8988931136459654, "grad_norm": 0.38034719228744507, "learning_rate": 8.675317470526961e-07, "loss": 0.05885505676269531, "step": 6649 }, { "epoch": 0.8990283058724123, "grad_norm": 0.5596439838409424, "learning_rate": 8.652393036976159e-07, "loss": 0.09777212142944336, "step": 6650 }, { "epoch": 0.8991634980988593, "grad_norm": 0.2142232209444046, "learning_rate": 8.629498032750916e-07, "loss": 0.04836559295654297, "step": 6651 }, { "epoch": 0.8992986903253063, "grad_norm": 0.17117846012115479, "learning_rate": 8.606632462618069e-07, "loss": 0.04703378677368164, "step": 6652 }, { "epoch": 0.8994338825517533, "grad_norm": 0.29976606369018555, "learning_rate": 8.583796331338311e-07, "loss": 0.062484025955200195, "step": 6653 }, { "epoch": 0.8995690747782003, "grad_norm": 0.6134013533592224, "learning_rate": 8.560989643666306e-07, "loss": 0.09510993957519531, "step": 6654 }, { "epoch": 0.8997042670046472, "grad_norm": 0.39132779836654663, "learning_rate": 8.538212404350471e-07, "loss": 0.06367635726928711, "step": 6655 }, { "epoch": 0.8998394592310942, "grad_norm": 0.25858405232429504, "learning_rate": 8.515464618133228e-07, "loss": 0.0593571662902832, "step": 6656 }, { "epoch": 0.8999746514575412, "grad_norm": 0.2673133909702301, "learning_rate": 8.492746289750725e-07, "loss": 0.06683647632598877, "step": 6657 }, { "epoch": 0.9001098436839882, "grad_norm": 0.1790904849767685, "learning_rate": 8.470057423933026e-07, "loss": 0.044456422328948975, "step": 6658 }, { "epoch": 0.9002450359104351, "grad_norm": 0.39251089096069336, "learning_rate": 8.447398025404118e-07, "loss": 0.06880021095275879, "step": 6659 }, { "epoch": 0.9003802281368821, "grad_norm": 0.2714976668357849, "learning_rate": 8.42476809888178e-07, "loss": 0.053466737270355225, "step": 6660 }, { "epoch": 0.9005154203633291, "grad_norm": 0.3734161853790283, "learning_rate": 8.402167649077725e-07, "loss": 0.07591962814331055, "step": 6661 }, { "epoch": 0.9006506125897761, "grad_norm": 0.27058759331703186, "learning_rate": 8.379596680697454e-07, "loss": 0.07022619247436523, "step": 6662 }, { "epoch": 0.9007858048162231, "grad_norm": 0.474793016910553, "learning_rate": 8.357055198440328e-07, "loss": 0.0609898567199707, "step": 6663 }, { "epoch": 0.90092099704267, "grad_norm": 0.5272277593612671, "learning_rate": 8.334543206999673e-07, "loss": 0.10007524490356445, "step": 6664 }, { "epoch": 0.901056189269117, "grad_norm": 0.5144278407096863, "learning_rate": 8.312060711062558e-07, "loss": 0.1001276969909668, "step": 6665 }, { "epoch": 0.901191381495564, "grad_norm": 0.4902416169643402, "learning_rate": 8.289607715309988e-07, "loss": 0.09573745727539062, "step": 6666 }, { "epoch": 0.901326573722011, "grad_norm": 0.22319334745407104, "learning_rate": 8.267184224416791e-07, "loss": 0.04765605926513672, "step": 6667 }, { "epoch": 0.901461765948458, "grad_norm": 0.3537706434726715, "learning_rate": 8.244790243051614e-07, "loss": 0.06658744812011719, "step": 6668 }, { "epoch": 0.9015969581749049, "grad_norm": 0.35011163353919983, "learning_rate": 8.222425775877079e-07, "loss": 0.08805465698242188, "step": 6669 }, { "epoch": 0.9017321504013519, "grad_norm": 0.44920170307159424, "learning_rate": 8.200090827549527e-07, "loss": 0.06653451919555664, "step": 6670 }, { "epoch": 0.9018673426277989, "grad_norm": 0.3928013741970062, "learning_rate": 8.17778540271924e-07, "loss": 0.05397987365722656, "step": 6671 }, { "epoch": 0.9020025348542459, "grad_norm": 0.24984316527843475, "learning_rate": 8.155509506030334e-07, "loss": 0.054976463317871094, "step": 6672 }, { "epoch": 0.9021377270806928, "grad_norm": 0.18410629034042358, "learning_rate": 8.133263142120717e-07, "loss": 0.030864715576171875, "step": 6673 }, { "epoch": 0.9022729193071398, "grad_norm": 0.37082603573799133, "learning_rate": 8.111046315622284e-07, "loss": 0.07468032836914062, "step": 6674 }, { "epoch": 0.9024081115335868, "grad_norm": 0.26319846510887146, "learning_rate": 8.088859031160633e-07, "loss": 0.04930877685546875, "step": 6675 }, { "epoch": 0.9025433037600338, "grad_norm": 0.3214379847049713, "learning_rate": 8.066701293355288e-07, "loss": 0.056827664375305176, "step": 6676 }, { "epoch": 0.9026784959864808, "grad_norm": 0.2739381194114685, "learning_rate": 8.044573106819625e-07, "loss": 0.07780933380126953, "step": 6677 }, { "epoch": 0.9028136882129277, "grad_norm": 0.27685046195983887, "learning_rate": 8.022474476160824e-07, "loss": 0.04792189598083496, "step": 6678 }, { "epoch": 0.9029488804393747, "grad_norm": 0.2327430546283722, "learning_rate": 8.000405405979988e-07, "loss": 0.043967247009277344, "step": 6679 }, { "epoch": 0.9030840726658217, "grad_norm": 0.4293173849582672, "learning_rate": 7.978365900871943e-07, "loss": 0.07202434539794922, "step": 6680 }, { "epoch": 0.9032192648922687, "grad_norm": 0.3255581855773926, "learning_rate": 7.956355965425482e-07, "loss": 0.04250144958496094, "step": 6681 }, { "epoch": 0.9033544571187156, "grad_norm": 0.41620928049087524, "learning_rate": 7.934375604223193e-07, "loss": 0.09491443634033203, "step": 6682 }, { "epoch": 0.9034896493451626, "grad_norm": 0.2380094975233078, "learning_rate": 7.912424821841463e-07, "loss": 0.04782736301422119, "step": 6683 }, { "epoch": 0.9036248415716096, "grad_norm": 0.1913856416940689, "learning_rate": 7.89050362285062e-07, "loss": 0.04988408088684082, "step": 6684 }, { "epoch": 0.9037600337980566, "grad_norm": 0.22582587599754333, "learning_rate": 7.868612011814713e-07, "loss": 0.047875404357910156, "step": 6685 }, { "epoch": 0.9038952260245036, "grad_norm": 0.28033584356307983, "learning_rate": 7.846749993291746e-07, "loss": 0.05279040336608887, "step": 6686 }, { "epoch": 0.9040304182509505, "grad_norm": 0.31647419929504395, "learning_rate": 7.824917571833445e-07, "loss": 0.05295419692993164, "step": 6687 }, { "epoch": 0.9041656104773975, "grad_norm": 0.3373316526412964, "learning_rate": 7.80311475198554e-07, "loss": 0.04968404769897461, "step": 6688 }, { "epoch": 0.9043008027038445, "grad_norm": 0.22313278913497925, "learning_rate": 7.781341538287384e-07, "loss": 0.05129897594451904, "step": 6689 }, { "epoch": 0.9044359949302915, "grad_norm": 0.4062902331352234, "learning_rate": 7.759597935272316e-07, "loss": 0.06038779020309448, "step": 6690 }, { "epoch": 0.9045711871567385, "grad_norm": 0.4131912887096405, "learning_rate": 7.7378839474675e-07, "loss": 0.05027008056640625, "step": 6691 }, { "epoch": 0.9047063793831854, "grad_norm": 0.3348398804664612, "learning_rate": 7.716199579393851e-07, "loss": 0.06008744239807129, "step": 6692 }, { "epoch": 0.9048415716096324, "grad_norm": 0.2865051329135895, "learning_rate": 7.694544835566259e-07, "loss": 0.05042457580566406, "step": 6693 }, { "epoch": 0.9049767638360794, "grad_norm": 0.2408238798379898, "learning_rate": 7.672919720493249e-07, "loss": 0.06216144561767578, "step": 6694 }, { "epoch": 0.9051119560625264, "grad_norm": 0.2508491575717926, "learning_rate": 7.651324238677338e-07, "loss": 0.054028868675231934, "step": 6695 }, { "epoch": 0.9052471482889733, "grad_norm": 0.2306889146566391, "learning_rate": 7.629758394614828e-07, "loss": 0.05232954025268555, "step": 6696 }, { "epoch": 0.9053823405154203, "grad_norm": 0.2982030510902405, "learning_rate": 7.608222192795794e-07, "loss": 0.05411696434020996, "step": 6697 }, { "epoch": 0.9055175327418673, "grad_norm": 0.27433010935783386, "learning_rate": 7.586715637704284e-07, "loss": 0.05402827262878418, "step": 6698 }, { "epoch": 0.9056527249683143, "grad_norm": 0.36263376474380493, "learning_rate": 7.565238733817998e-07, "loss": 0.061466217041015625, "step": 6699 }, { "epoch": 0.9057879171947613, "grad_norm": 0.24579842388629913, "learning_rate": 7.543791485608542e-07, "loss": 0.07651901245117188, "step": 6700 }, { "epoch": 0.9059231094212082, "grad_norm": 0.2787382900714874, "learning_rate": 7.52237389754138e-07, "loss": 0.07133591175079346, "step": 6701 }, { "epoch": 0.9060583016476552, "grad_norm": 0.21613316237926483, "learning_rate": 7.500985974075758e-07, "loss": 0.04439842700958252, "step": 6702 }, { "epoch": 0.9061934938741022, "grad_norm": 0.2324300855398178, "learning_rate": 7.479627719664767e-07, "loss": 0.0670309066772461, "step": 6703 }, { "epoch": 0.9063286861005492, "grad_norm": 0.21317701041698456, "learning_rate": 7.458299138755281e-07, "loss": 0.05440521240234375, "step": 6704 }, { "epoch": 0.9064638783269962, "grad_norm": 0.2382166087627411, "learning_rate": 7.437000235788033e-07, "loss": 0.049439430236816406, "step": 6705 }, { "epoch": 0.9065990705534431, "grad_norm": 0.35315731167793274, "learning_rate": 7.415731015197575e-07, "loss": 0.06222355365753174, "step": 6706 }, { "epoch": 0.9067342627798901, "grad_norm": 0.4295409619808197, "learning_rate": 7.39449148141228e-07, "loss": 0.046143293380737305, "step": 6707 }, { "epoch": 0.9068694550063371, "grad_norm": 0.24091030657291412, "learning_rate": 7.373281638854329e-07, "loss": 0.0615234375, "step": 6708 }, { "epoch": 0.9070046472327841, "grad_norm": 0.29950758814811707, "learning_rate": 7.352101491939722e-07, "loss": 0.06075394153594971, "step": 6709 }, { "epoch": 0.907139839459231, "grad_norm": 0.2839188575744629, "learning_rate": 7.330951045078249e-07, "loss": 0.05700480937957764, "step": 6710 }, { "epoch": 0.907275031685678, "grad_norm": 0.4149356782436371, "learning_rate": 7.309830302673621e-07, "loss": 0.06901121139526367, "step": 6711 }, { "epoch": 0.907410223912125, "grad_norm": 0.17082051932811737, "learning_rate": 7.288739269123184e-07, "loss": 0.046177804470062256, "step": 6712 }, { "epoch": 0.907545416138572, "grad_norm": 0.4979169964790344, "learning_rate": 7.267677948818296e-07, "loss": 0.06458663940429688, "step": 6713 }, { "epoch": 0.907680608365019, "grad_norm": 0.17504222691059113, "learning_rate": 7.246646346143997e-07, "loss": 0.033037662506103516, "step": 6714 }, { "epoch": 0.9078158005914659, "grad_norm": 0.23155361413955688, "learning_rate": 7.225644465479153e-07, "loss": 0.03666114807128906, "step": 6715 }, { "epoch": 0.9079509928179129, "grad_norm": 0.4524311125278473, "learning_rate": 7.204672311196547e-07, "loss": 0.06278514862060547, "step": 6716 }, { "epoch": 0.9080861850443599, "grad_norm": 0.1646474003791809, "learning_rate": 7.183729887662604e-07, "loss": 0.033927321434020996, "step": 6717 }, { "epoch": 0.9082213772708069, "grad_norm": 0.2800999879837036, "learning_rate": 7.162817199237703e-07, "loss": 0.06309652328491211, "step": 6718 }, { "epoch": 0.908356569497254, "grad_norm": 0.2834169268608093, "learning_rate": 7.141934250275978e-07, "loss": 0.06281876564025879, "step": 6719 }, { "epoch": 0.9084917617237009, "grad_norm": 0.3598640263080597, "learning_rate": 7.121081045125316e-07, "loss": 0.08004379272460938, "step": 6720 }, { "epoch": 0.9086269539501479, "grad_norm": 0.38457831740379333, "learning_rate": 7.100257588127545e-07, "loss": 0.0518949031829834, "step": 6721 }, { "epoch": 0.9087621461765949, "grad_norm": 0.6134139895439148, "learning_rate": 7.079463883618148e-07, "loss": 0.0683935284614563, "step": 6722 }, { "epoch": 0.9088973384030419, "grad_norm": 0.2467769980430603, "learning_rate": 7.058699935926527e-07, "loss": 0.05888652801513672, "step": 6723 }, { "epoch": 0.9090325306294889, "grad_norm": 0.2695505917072296, "learning_rate": 7.037965749375808e-07, "loss": 0.05156731605529785, "step": 6724 }, { "epoch": 0.9091677228559358, "grad_norm": 0.21168826520442963, "learning_rate": 7.017261328283037e-07, "loss": 0.0583263635635376, "step": 6725 }, { "epoch": 0.9093029150823828, "grad_norm": 0.2085266411304474, "learning_rate": 6.996586676958916e-07, "loss": 0.056255340576171875, "step": 6726 }, { "epoch": 0.9094381073088298, "grad_norm": 0.3592725396156311, "learning_rate": 6.975941799708019e-07, "loss": 0.06098222732543945, "step": 6727 }, { "epoch": 0.9095732995352768, "grad_norm": 0.1824830025434494, "learning_rate": 6.955326700828757e-07, "loss": 0.05001598596572876, "step": 6728 }, { "epoch": 0.9097084917617237, "grad_norm": 0.4089336097240448, "learning_rate": 6.934741384613246e-07, "loss": 0.053939104080200195, "step": 6729 }, { "epoch": 0.9098436839881707, "grad_norm": 0.2246934175491333, "learning_rate": 6.91418585534756e-07, "loss": 0.06101179122924805, "step": 6730 }, { "epoch": 0.9099788762146177, "grad_norm": 0.41654840111732483, "learning_rate": 6.893660117311373e-07, "loss": 0.08330154418945312, "step": 6731 }, { "epoch": 0.9101140684410647, "grad_norm": 0.41293761134147644, "learning_rate": 6.873164174778252e-07, "loss": 0.06404829025268555, "step": 6732 }, { "epoch": 0.9102492606675117, "grad_norm": 0.30583998560905457, "learning_rate": 6.852698032015631e-07, "loss": 0.048659563064575195, "step": 6733 }, { "epoch": 0.9103844528939586, "grad_norm": 0.24714858829975128, "learning_rate": 6.832261693284636e-07, "loss": 0.039984941482543945, "step": 6734 }, { "epoch": 0.9105196451204056, "grad_norm": 0.31316837668418884, "learning_rate": 6.811855162840214e-07, "loss": 0.061365604400634766, "step": 6735 }, { "epoch": 0.9106548373468526, "grad_norm": 0.399715781211853, "learning_rate": 6.791478444931132e-07, "loss": 0.07841825485229492, "step": 6736 }, { "epoch": 0.9107900295732996, "grad_norm": 0.302395761013031, "learning_rate": 6.77113154379988e-07, "loss": 0.0564122200012207, "step": 6737 }, { "epoch": 0.9109252217997466, "grad_norm": 0.38264378905296326, "learning_rate": 6.75081446368287e-07, "loss": 0.07637691497802734, "step": 6738 }, { "epoch": 0.9110604140261935, "grad_norm": 0.35266685485839844, "learning_rate": 6.730527208810166e-07, "loss": 0.06119513511657715, "step": 6739 }, { "epoch": 0.9111956062526405, "grad_norm": 0.25630560517311096, "learning_rate": 6.710269783405709e-07, "loss": 0.06500899791717529, "step": 6740 }, { "epoch": 0.9113307984790875, "grad_norm": 0.318916380405426, "learning_rate": 6.690042191687206e-07, "loss": 0.061501502990722656, "step": 6741 }, { "epoch": 0.9114659907055345, "grad_norm": 0.29961079359054565, "learning_rate": 6.669844437866124e-07, "loss": 0.060685157775878906, "step": 6742 }, { "epoch": 0.9116011829319814, "grad_norm": 0.2267502248287201, "learning_rate": 6.649676526147764e-07, "loss": 0.05499839782714844, "step": 6743 }, { "epoch": 0.9117363751584284, "grad_norm": 0.2665746212005615, "learning_rate": 6.629538460731199e-07, "loss": 0.07109498977661133, "step": 6744 }, { "epoch": 0.9118715673848754, "grad_norm": 0.18866756558418274, "learning_rate": 6.609430245809261e-07, "loss": 0.05088448524475098, "step": 6745 }, { "epoch": 0.9120067596113224, "grad_norm": 0.30700844526290894, "learning_rate": 6.589351885568617e-07, "loss": 0.055596351623535156, "step": 6746 }, { "epoch": 0.9121419518377694, "grad_norm": 0.314453661441803, "learning_rate": 6.569303384189624e-07, "loss": 0.08609580993652344, "step": 6747 }, { "epoch": 0.9122771440642163, "grad_norm": 0.23913970589637756, "learning_rate": 6.54928474584659e-07, "loss": 0.07298660278320312, "step": 6748 }, { "epoch": 0.9124123362906633, "grad_norm": 0.36009082198143005, "learning_rate": 6.5292959747074e-07, "loss": 0.09228992462158203, "step": 6749 }, { "epoch": 0.9125475285171103, "grad_norm": 0.28268963098526, "learning_rate": 6.509337074933891e-07, "loss": 0.07158422470092773, "step": 6750 }, { "epoch": 0.9126827207435573, "grad_norm": 0.22034013271331787, "learning_rate": 6.489408050681589e-07, "loss": 0.0398256778717041, "step": 6751 }, { "epoch": 0.9128179129700043, "grad_norm": 0.2606247067451477, "learning_rate": 6.469508906099792e-07, "loss": 0.05727100372314453, "step": 6752 }, { "epoch": 0.9129531051964512, "grad_norm": 0.25070828199386597, "learning_rate": 6.449639645331684e-07, "loss": 0.05663037300109863, "step": 6753 }, { "epoch": 0.9130882974228982, "grad_norm": 0.5489130020141602, "learning_rate": 6.429800272514058e-07, "loss": 0.08507394790649414, "step": 6754 }, { "epoch": 0.9132234896493452, "grad_norm": 0.19063083827495575, "learning_rate": 6.409990791777659e-07, "loss": 0.03476130962371826, "step": 6755 }, { "epoch": 0.9133586818757922, "grad_norm": 0.27534112334251404, "learning_rate": 6.390211207246888e-07, "loss": 0.06719374656677246, "step": 6756 }, { "epoch": 0.9134938741022391, "grad_norm": 0.2823287844657898, "learning_rate": 6.370461523039967e-07, "loss": 0.07176494598388672, "step": 6757 }, { "epoch": 0.9136290663286861, "grad_norm": 0.20179909467697144, "learning_rate": 6.350741743268873e-07, "loss": 0.05219745635986328, "step": 6758 }, { "epoch": 0.9137642585551331, "grad_norm": 0.33456456661224365, "learning_rate": 6.331051872039373e-07, "loss": 0.06334924697875977, "step": 6759 }, { "epoch": 0.9138994507815801, "grad_norm": 0.32684987783432007, "learning_rate": 6.31139191345102e-07, "loss": 0.03999185562133789, "step": 6760 }, { "epoch": 0.9140346430080271, "grad_norm": 0.3114374577999115, "learning_rate": 6.291761871597091e-07, "loss": 0.07558441162109375, "step": 6761 }, { "epoch": 0.914169835234474, "grad_norm": 0.33083027601242065, "learning_rate": 6.272161750564731e-07, "loss": 0.05785322189331055, "step": 6762 }, { "epoch": 0.914305027460921, "grad_norm": 0.48567214608192444, "learning_rate": 6.252591554434728e-07, "loss": 0.06440496444702148, "step": 6763 }, { "epoch": 0.914440219687368, "grad_norm": 0.3678141236305237, "learning_rate": 6.233051287281688e-07, "loss": 0.08142662048339844, "step": 6764 }, { "epoch": 0.914575411913815, "grad_norm": 0.7030377388000488, "learning_rate": 6.213540953174057e-07, "loss": 0.07967948913574219, "step": 6765 }, { "epoch": 0.914710604140262, "grad_norm": 0.4859228730201721, "learning_rate": 6.194060556173953e-07, "loss": 0.06463241577148438, "step": 6766 }, { "epoch": 0.9148457963667089, "grad_norm": 0.2780775725841522, "learning_rate": 6.17461010033733e-07, "loss": 0.048812687397003174, "step": 6767 }, { "epoch": 0.9149809885931559, "grad_norm": 0.22061297297477722, "learning_rate": 6.155189589713833e-07, "loss": 0.047280073165893555, "step": 6768 }, { "epoch": 0.9151161808196029, "grad_norm": 0.2835690379142761, "learning_rate": 6.135799028346928e-07, "loss": 0.0580897331237793, "step": 6769 }, { "epoch": 0.9152513730460499, "grad_norm": 0.28901752829551697, "learning_rate": 6.116438420273868e-07, "loss": 0.06691884994506836, "step": 6770 }, { "epoch": 0.9153865652724968, "grad_norm": 0.3729507625102997, "learning_rate": 6.097107769525595e-07, "loss": 0.049791932106018066, "step": 6771 }, { "epoch": 0.9155217574989438, "grad_norm": 0.3505827486515045, "learning_rate": 6.077807080126873e-07, "loss": 0.0691225528717041, "step": 6772 }, { "epoch": 0.9156569497253908, "grad_norm": 0.2565617561340332, "learning_rate": 6.058536356096206e-07, "loss": 0.0696115493774414, "step": 6773 }, { "epoch": 0.9157921419518378, "grad_norm": 0.4454917013645172, "learning_rate": 6.039295601445833e-07, "loss": 0.07009410858154297, "step": 6774 }, { "epoch": 0.9159273341782848, "grad_norm": 0.34154823422431946, "learning_rate": 6.020084820181831e-07, "loss": 0.06250905990600586, "step": 6775 }, { "epoch": 0.9160625264047317, "grad_norm": 0.20340220630168915, "learning_rate": 6.000904016303971e-07, "loss": 0.04442262649536133, "step": 6776 }, { "epoch": 0.9161977186311787, "grad_norm": 0.2908305823802948, "learning_rate": 5.981753193805789e-07, "loss": 0.05586540699005127, "step": 6777 }, { "epoch": 0.9163329108576257, "grad_norm": 0.22283963859081268, "learning_rate": 5.962632356674597e-07, "loss": 0.04918527603149414, "step": 6778 }, { "epoch": 0.9164681030840727, "grad_norm": 0.2516134977340698, "learning_rate": 5.94354150889141e-07, "loss": 0.05009293556213379, "step": 6779 }, { "epoch": 0.9166032953105197, "grad_norm": 0.23964817821979523, "learning_rate": 5.924480654431147e-07, "loss": 0.04578065872192383, "step": 6780 }, { "epoch": 0.9167384875369666, "grad_norm": 0.42407140135765076, "learning_rate": 5.905449797262252e-07, "loss": 0.06870126724243164, "step": 6781 }, { "epoch": 0.9168736797634136, "grad_norm": 0.27598291635513306, "learning_rate": 5.886448941347156e-07, "loss": 0.04606819152832031, "step": 6782 }, { "epoch": 0.9170088719898606, "grad_norm": 0.3802034556865692, "learning_rate": 5.867478090641892e-07, "loss": 0.0780797004699707, "step": 6783 }, { "epoch": 0.9171440642163076, "grad_norm": 0.23938871920108795, "learning_rate": 5.848537249096269e-07, "loss": 0.05454683303833008, "step": 6784 }, { "epoch": 0.9172792564427545, "grad_norm": 0.33651578426361084, "learning_rate": 5.829626420653949e-07, "loss": 0.07274627685546875, "step": 6785 }, { "epoch": 0.9174144486692015, "grad_norm": 0.3467169404029846, "learning_rate": 5.810745609252166e-07, "loss": 0.0642080307006836, "step": 6786 }, { "epoch": 0.9175496408956485, "grad_norm": 0.25199273228645325, "learning_rate": 5.791894818822091e-07, "loss": 0.05392313003540039, "step": 6787 }, { "epoch": 0.9176848331220955, "grad_norm": 0.3076941967010498, "learning_rate": 5.773074053288519e-07, "loss": 0.06535136699676514, "step": 6788 }, { "epoch": 0.9178200253485425, "grad_norm": 0.40538471937179565, "learning_rate": 5.75428331657003e-07, "loss": 0.05321621894836426, "step": 6789 }, { "epoch": 0.9179552175749894, "grad_norm": 0.16008977591991425, "learning_rate": 5.735522612578998e-07, "loss": 0.031260907649993896, "step": 6790 }, { "epoch": 0.9180904098014364, "grad_norm": 0.3699582815170288, "learning_rate": 5.716791945221444e-07, "loss": 0.06197977066040039, "step": 6791 }, { "epoch": 0.9182256020278834, "grad_norm": 0.20649270713329315, "learning_rate": 5.698091318397219e-07, "loss": 0.047204017639160156, "step": 6792 }, { "epoch": 0.9183607942543304, "grad_norm": 0.3541818857192993, "learning_rate": 5.679420735999908e-07, "loss": 0.0786905288696289, "step": 6793 }, { "epoch": 0.9184959864807773, "grad_norm": 0.18114563822746277, "learning_rate": 5.660780201916799e-07, "loss": 0.038895606994628906, "step": 6794 }, { "epoch": 0.9186311787072243, "grad_norm": 0.3154070973396301, "learning_rate": 5.642169720028973e-07, "loss": 0.08837461471557617, "step": 6795 }, { "epoch": 0.9187663709336713, "grad_norm": 0.20202623307704926, "learning_rate": 5.623589294211196e-07, "loss": 0.04513823986053467, "step": 6796 }, { "epoch": 0.9189015631601183, "grad_norm": 0.5871425271034241, "learning_rate": 5.605038928332057e-07, "loss": 0.09560298919677734, "step": 6797 }, { "epoch": 0.9190367553865653, "grad_norm": 0.47953614592552185, "learning_rate": 5.586518626253817e-07, "loss": 0.06708073616027832, "step": 6798 }, { "epoch": 0.9191719476130122, "grad_norm": 0.25075042247772217, "learning_rate": 5.568028391832524e-07, "loss": 0.053061485290527344, "step": 6799 }, { "epoch": 0.9193071398394592, "grad_norm": 0.33691227436065674, "learning_rate": 5.549568228917917e-07, "loss": 0.06907331943511963, "step": 6800 }, { "epoch": 0.9194423320659062, "grad_norm": 0.5234549045562744, "learning_rate": 5.531138141353486e-07, "loss": 0.06320500373840332, "step": 6801 }, { "epoch": 0.9195775242923532, "grad_norm": 0.35388708114624023, "learning_rate": 5.512738132976514e-07, "loss": 0.06179189682006836, "step": 6802 }, { "epoch": 0.9197127165188002, "grad_norm": 0.2902572453022003, "learning_rate": 5.49436820761795e-07, "loss": 0.05215871334075928, "step": 6803 }, { "epoch": 0.9198479087452471, "grad_norm": 0.3174911141395569, "learning_rate": 5.476028369102537e-07, "loss": 0.05806374549865723, "step": 6804 }, { "epoch": 0.9199831009716941, "grad_norm": 0.18217256665229797, "learning_rate": 5.45771862124872e-07, "loss": 0.03731536865234375, "step": 6805 }, { "epoch": 0.9201182931981411, "grad_norm": 0.43400830030441284, "learning_rate": 5.439438967868649e-07, "loss": 0.08292078971862793, "step": 6806 }, { "epoch": 0.9202534854245881, "grad_norm": 0.3415291905403137, "learning_rate": 5.421189412768296e-07, "loss": 0.05956315994262695, "step": 6807 }, { "epoch": 0.920388677651035, "grad_norm": 0.2812170386314392, "learning_rate": 5.402969959747306e-07, "loss": 0.06839466094970703, "step": 6808 }, { "epoch": 0.920523869877482, "grad_norm": 0.2639061212539673, "learning_rate": 5.384780612599044e-07, "loss": 0.06360101699829102, "step": 6809 }, { "epoch": 0.920659062103929, "grad_norm": 0.2789705693721771, "learning_rate": 5.366621375110647e-07, "loss": 0.04472571611404419, "step": 6810 }, { "epoch": 0.920794254330376, "grad_norm": 0.21159721910953522, "learning_rate": 5.348492251062942e-07, "loss": 0.05944252014160156, "step": 6811 }, { "epoch": 0.920929446556823, "grad_norm": 0.306159108877182, "learning_rate": 5.330393244230558e-07, "loss": 0.06237149238586426, "step": 6812 }, { "epoch": 0.9210646387832699, "grad_norm": 0.4733816981315613, "learning_rate": 5.312324358381731e-07, "loss": 0.09130477905273438, "step": 6813 }, { "epoch": 0.9211998310097169, "grad_norm": 0.2981599271297455, "learning_rate": 5.29428559727857e-07, "loss": 0.06065654754638672, "step": 6814 }, { "epoch": 0.9213350232361639, "grad_norm": 0.3937837779521942, "learning_rate": 5.276276964676802e-07, "loss": 0.07400846481323242, "step": 6815 }, { "epoch": 0.9214702154626109, "grad_norm": 0.389931857585907, "learning_rate": 5.258298464325928e-07, "loss": 0.0682833194732666, "step": 6816 }, { "epoch": 0.9216054076890579, "grad_norm": 0.2545563578605652, "learning_rate": 5.240350099969204e-07, "loss": 0.06184971332550049, "step": 6817 }, { "epoch": 0.9217405999155048, "grad_norm": 0.2999067008495331, "learning_rate": 5.222431875343492e-07, "loss": 0.061218976974487305, "step": 6818 }, { "epoch": 0.9218757921419518, "grad_norm": 0.18707145750522614, "learning_rate": 5.204543794179539e-07, "loss": 0.04723691940307617, "step": 6819 }, { "epoch": 0.9220109843683988, "grad_norm": 0.2815069854259491, "learning_rate": 5.186685860201717e-07, "loss": 0.05034446716308594, "step": 6820 }, { "epoch": 0.9221461765948458, "grad_norm": 0.23036843538284302, "learning_rate": 5.16885807712812e-07, "loss": 0.04822182655334473, "step": 6821 }, { "epoch": 0.9222813688212927, "grad_norm": 0.11852409690618515, "learning_rate": 5.151060448670625e-07, "loss": 0.03097057342529297, "step": 6822 }, { "epoch": 0.9224165610477397, "grad_norm": 0.257041335105896, "learning_rate": 5.133292978534754e-07, "loss": 0.05121743679046631, "step": 6823 }, { "epoch": 0.9225517532741867, "grad_norm": 0.478397935628891, "learning_rate": 5.115555670419814e-07, "loss": 0.06816601753234863, "step": 6824 }, { "epoch": 0.9226869455006337, "grad_norm": 0.43804439902305603, "learning_rate": 5.097848528018817e-07, "loss": 0.06466591358184814, "step": 6825 }, { "epoch": 0.9228221377270807, "grad_norm": 0.2722177803516388, "learning_rate": 5.080171555018448e-07, "loss": 0.06040811538696289, "step": 6826 }, { "epoch": 0.9229573299535276, "grad_norm": 0.39246806502342224, "learning_rate": 5.06252475509918e-07, "loss": 0.0710756778717041, "step": 6827 }, { "epoch": 0.9230925221799746, "grad_norm": 0.34272870421409607, "learning_rate": 5.044908131935139e-07, "loss": 0.09210443496704102, "step": 6828 }, { "epoch": 0.9232277144064216, "grad_norm": 0.4193495213985443, "learning_rate": 5.027321689194242e-07, "loss": 0.06343269348144531, "step": 6829 }, { "epoch": 0.9233629066328686, "grad_norm": 0.3049297034740448, "learning_rate": 5.009765430538061e-07, "loss": 0.06045010685920715, "step": 6830 }, { "epoch": 0.9234980988593156, "grad_norm": 0.29091471433639526, "learning_rate": 4.992239359621886e-07, "loss": 0.05863833427429199, "step": 6831 }, { "epoch": 0.9236332910857625, "grad_norm": 0.1579255610704422, "learning_rate": 4.974743480094767e-07, "loss": 0.04657268524169922, "step": 6832 }, { "epoch": 0.9237684833122095, "grad_norm": 0.3201614320278168, "learning_rate": 4.957277795599407e-07, "loss": 0.04777860641479492, "step": 6833 }, { "epoch": 0.9239036755386565, "grad_norm": 0.17296139895915985, "learning_rate": 4.93984230977228e-07, "loss": 0.03850138187408447, "step": 6834 }, { "epoch": 0.9240388677651035, "grad_norm": 0.24672360718250275, "learning_rate": 4.922437026243531e-07, "loss": 0.06394791603088379, "step": 6835 }, { "epoch": 0.9241740599915504, "grad_norm": 0.3869103193283081, "learning_rate": 4.905061948637063e-07, "loss": 0.09241247177124023, "step": 6836 }, { "epoch": 0.9243092522179974, "grad_norm": 0.196123406291008, "learning_rate": 4.887717080570431e-07, "loss": 0.04959821701049805, "step": 6837 }, { "epoch": 0.9244444444444444, "grad_norm": 0.29419219493865967, "learning_rate": 4.870402425654913e-07, "loss": 0.05415534973144531, "step": 6838 }, { "epoch": 0.9245796366708914, "grad_norm": 0.251487135887146, "learning_rate": 4.853117987495542e-07, "loss": 0.06594061851501465, "step": 6839 }, { "epoch": 0.9247148288973384, "grad_norm": 0.32523199915885925, "learning_rate": 4.83586376969104e-07, "loss": 0.07776308059692383, "step": 6840 }, { "epoch": 0.9248500211237853, "grad_norm": 0.30944687128067017, "learning_rate": 4.818639775833816e-07, "loss": 0.07084798812866211, "step": 6841 }, { "epoch": 0.9249852133502323, "grad_norm": 0.25586116313934326, "learning_rate": 4.801446009509969e-07, "loss": 0.05023503303527832, "step": 6842 }, { "epoch": 0.9251204055766793, "grad_norm": 0.3534484803676605, "learning_rate": 4.784282474299367e-07, "loss": 0.06169772148132324, "step": 6843 }, { "epoch": 0.9252555978031263, "grad_norm": 0.25079667568206787, "learning_rate": 4.767149173775537e-07, "loss": 0.05755043029785156, "step": 6844 }, { "epoch": 0.9253907900295733, "grad_norm": 0.45221132040023804, "learning_rate": 4.750046111505724e-07, "loss": 0.07118082046508789, "step": 6845 }, { "epoch": 0.9255259822560202, "grad_norm": 0.3971695601940155, "learning_rate": 4.732973291050896e-07, "loss": 0.0812540054321289, "step": 6846 }, { "epoch": 0.9256611744824672, "grad_norm": 0.2767832279205322, "learning_rate": 4.7159307159656607e-07, "loss": 0.05943197011947632, "step": 6847 }, { "epoch": 0.9257963667089142, "grad_norm": 0.1832389086484909, "learning_rate": 4.6989183897983954e-07, "loss": 0.05013716220855713, "step": 6848 }, { "epoch": 0.9259315589353612, "grad_norm": 0.2647983431816101, "learning_rate": 4.681936316091201e-07, "loss": 0.054838478565216064, "step": 6849 }, { "epoch": 0.9260667511618081, "grad_norm": 0.26054203510284424, "learning_rate": 4.664984498379765e-07, "loss": 0.05982637405395508, "step": 6850 }, { "epoch": 0.9262019433882551, "grad_norm": 0.3227074146270752, "learning_rate": 4.6480629401935814e-07, "loss": 0.05984306335449219, "step": 6851 }, { "epoch": 0.9263371356147021, "grad_norm": 0.30183130502700806, "learning_rate": 4.631171645055815e-07, "loss": 0.07209181785583496, "step": 6852 }, { "epoch": 0.9264723278411491, "grad_norm": 0.2954559624195099, "learning_rate": 4.614310616483286e-07, "loss": 0.04852783679962158, "step": 6853 }, { "epoch": 0.9266075200675962, "grad_norm": 0.23906128108501434, "learning_rate": 4.5974798579866193e-07, "loss": 0.06581377983093262, "step": 6854 }, { "epoch": 0.9267427122940431, "grad_norm": 0.4055463969707489, "learning_rate": 4.580679373069996e-07, "loss": 0.08595585823059082, "step": 6855 }, { "epoch": 0.9268779045204901, "grad_norm": 0.5272423624992371, "learning_rate": 4.5639091652314e-07, "loss": 0.09612560272216797, "step": 6856 }, { "epoch": 0.9270130967469371, "grad_norm": 0.28300583362579346, "learning_rate": 4.54716923796249e-07, "loss": 0.06563735008239746, "step": 6857 }, { "epoch": 0.9271482889733841, "grad_norm": 0.2970777750015259, "learning_rate": 4.5304595947485927e-07, "loss": 0.04386281967163086, "step": 6858 }, { "epoch": 0.9272834811998311, "grad_norm": 0.3516584038734436, "learning_rate": 4.5137802390687433e-07, "loss": 0.05901622772216797, "step": 6859 }, { "epoch": 0.927418673426278, "grad_norm": 0.23541265726089478, "learning_rate": 4.497131174395663e-07, "loss": 0.056168556213378906, "step": 6860 }, { "epoch": 0.927553865652725, "grad_norm": 0.29414865374565125, "learning_rate": 4.4805124041957967e-07, "loss": 0.05954241752624512, "step": 6861 }, { "epoch": 0.927689057879172, "grad_norm": 0.36588066816329956, "learning_rate": 4.463923931929259e-07, "loss": 0.0796670913696289, "step": 6862 }, { "epoch": 0.927824250105619, "grad_norm": 0.2690092623233795, "learning_rate": 4.4473657610498377e-07, "loss": 0.05303668975830078, "step": 6863 }, { "epoch": 0.927959442332066, "grad_norm": 0.36585530638694763, "learning_rate": 4.430837895005058e-07, "loss": 0.05746293067932129, "step": 6864 }, { "epoch": 0.9280946345585129, "grad_norm": 0.20604927837848663, "learning_rate": 4.4143403372360836e-07, "loss": 0.039200663566589355, "step": 6865 }, { "epoch": 0.9282298267849599, "grad_norm": 0.3721497356891632, "learning_rate": 4.3978730911778176e-07, "loss": 0.07439613342285156, "step": 6866 }, { "epoch": 0.9283650190114069, "grad_norm": 0.2101733386516571, "learning_rate": 4.381436160258834e-07, "loss": 0.058396339416503906, "step": 6867 }, { "epoch": 0.9285002112378539, "grad_norm": 0.43266987800598145, "learning_rate": 4.3650295479013615e-07, "loss": 0.09219121932983398, "step": 6868 }, { "epoch": 0.9286354034643008, "grad_norm": 0.4713047444820404, "learning_rate": 4.348653257521351e-07, "loss": 0.07306671142578125, "step": 6869 }, { "epoch": 0.9287705956907478, "grad_norm": 0.24050207436084747, "learning_rate": 4.332307292528442e-07, "loss": 0.058725953102111816, "step": 6870 }, { "epoch": 0.9289057879171948, "grad_norm": 0.4709692597389221, "learning_rate": 4.315991656325946e-07, "loss": 0.0814213752746582, "step": 6871 }, { "epoch": 0.9290409801436418, "grad_norm": 0.22877316176891327, "learning_rate": 4.299706352310895e-07, "loss": 0.058759212493896484, "step": 6872 }, { "epoch": 0.9291761723700888, "grad_norm": 0.4701555371284485, "learning_rate": 4.283451383873926e-07, "loss": 0.08330810070037842, "step": 6873 }, { "epoch": 0.9293113645965357, "grad_norm": 0.35321491956710815, "learning_rate": 4.26722675439945e-07, "loss": 0.05710732936859131, "step": 6874 }, { "epoch": 0.9294465568229827, "grad_norm": 0.2343061864376068, "learning_rate": 4.251032467265481e-07, "loss": 0.06146240234375, "step": 6875 }, { "epoch": 0.9295817490494297, "grad_norm": 0.3130585849285126, "learning_rate": 4.234868525843805e-07, "loss": 0.06228923797607422, "step": 6876 }, { "epoch": 0.9297169412758767, "grad_norm": 0.44318705797195435, "learning_rate": 4.218734933499796e-07, "loss": 0.08170604705810547, "step": 6877 }, { "epoch": 0.9298521335023237, "grad_norm": 0.16497454047203064, "learning_rate": 4.202631693592601e-07, "loss": 0.036772727966308594, "step": 6878 }, { "epoch": 0.9299873257287706, "grad_norm": 0.2680743932723999, "learning_rate": 4.186558809474955e-07, "loss": 0.07556962966918945, "step": 6879 }, { "epoch": 0.9301225179552176, "grad_norm": 0.26797208189964294, "learning_rate": 4.170516284493331e-07, "loss": 0.05643129348754883, "step": 6880 }, { "epoch": 0.9302577101816646, "grad_norm": 0.2170102447271347, "learning_rate": 4.1545041219879063e-07, "loss": 0.049879372119903564, "step": 6881 }, { "epoch": 0.9303929024081116, "grad_norm": 0.3908720314502716, "learning_rate": 4.138522325292432e-07, "loss": 0.05023479461669922, "step": 6882 }, { "epoch": 0.9305280946345585, "grad_norm": 0.3174516558647156, "learning_rate": 4.1225708977344457e-07, "loss": 0.061882972717285156, "step": 6883 }, { "epoch": 0.9306632868610055, "grad_norm": 0.26105132699012756, "learning_rate": 4.106649842635124e-07, "loss": 0.06191539764404297, "step": 6884 }, { "epoch": 0.9307984790874525, "grad_norm": 0.24962367117404938, "learning_rate": 4.090759163309282e-07, "loss": 0.07141685485839844, "step": 6885 }, { "epoch": 0.9309336713138995, "grad_norm": 0.31625574827194214, "learning_rate": 4.07489886306549e-07, "loss": 0.05345571041107178, "step": 6886 }, { "epoch": 0.9310688635403465, "grad_norm": 0.1707090139389038, "learning_rate": 4.059068945205907e-07, "loss": 0.038506269454956055, "step": 6887 }, { "epoch": 0.9312040557667934, "grad_norm": 0.17831982672214508, "learning_rate": 4.043269413026429e-07, "loss": 0.04871511459350586, "step": 6888 }, { "epoch": 0.9313392479932404, "grad_norm": 0.22578223049640656, "learning_rate": 4.027500269816592e-07, "loss": 0.0528789758682251, "step": 6889 }, { "epoch": 0.9314744402196874, "grad_norm": 0.20749430358409882, "learning_rate": 4.011761518859619e-07, "loss": 0.051172733306884766, "step": 6890 }, { "epoch": 0.9316096324461344, "grad_norm": 0.25292253494262695, "learning_rate": 3.996053163432406e-07, "loss": 0.05469846725463867, "step": 6891 }, { "epoch": 0.9317448246725814, "grad_norm": 0.22334545850753784, "learning_rate": 3.980375206805503e-07, "loss": 0.05817916989326477, "step": 6892 }, { "epoch": 0.9318800168990283, "grad_norm": 0.27013280987739563, "learning_rate": 3.9647276522431664e-07, "loss": 0.054511189460754395, "step": 6893 }, { "epoch": 0.9320152091254753, "grad_norm": 0.6775422692298889, "learning_rate": 3.949110503003289e-07, "loss": 0.08910083770751953, "step": 6894 }, { "epoch": 0.9321504013519223, "grad_norm": 0.21085061132907867, "learning_rate": 3.9335237623374377e-07, "loss": 0.05225324630737305, "step": 6895 }, { "epoch": 0.9322855935783693, "grad_norm": 0.4456910192966461, "learning_rate": 3.917967433490849e-07, "loss": 0.0707392692565918, "step": 6896 }, { "epoch": 0.9324207858048162, "grad_norm": 0.2432425171136856, "learning_rate": 3.902441519702449e-07, "loss": 0.06799256801605225, "step": 6897 }, { "epoch": 0.9325559780312632, "grad_norm": 0.3617327809333801, "learning_rate": 3.886946024204818e-07, "loss": 0.051616370677948, "step": 6898 }, { "epoch": 0.9326911702577102, "grad_norm": 0.9337500333786011, "learning_rate": 3.871480950224193e-07, "loss": 0.08326995372772217, "step": 6899 }, { "epoch": 0.9328263624841572, "grad_norm": 0.46419233083724976, "learning_rate": 3.856046300980498e-07, "loss": 0.06556767225265503, "step": 6900 }, { "epoch": 0.9329615547106042, "grad_norm": 0.17622384428977966, "learning_rate": 3.8406420796872953e-07, "loss": 0.0465550422668457, "step": 6901 }, { "epoch": 0.9330967469370511, "grad_norm": 0.36447086930274963, "learning_rate": 3.825268289551803e-07, "loss": 0.060393333435058594, "step": 6902 }, { "epoch": 0.9332319391634981, "grad_norm": 0.4901309609413147, "learning_rate": 3.8099249337749777e-07, "loss": 0.09293699264526367, "step": 6903 }, { "epoch": 0.9333671313899451, "grad_norm": 0.2701214849948883, "learning_rate": 3.7946120155513465e-07, "loss": 0.04080629348754883, "step": 6904 }, { "epoch": 0.9335023236163921, "grad_norm": 0.2428189516067505, "learning_rate": 3.7793295380691595e-07, "loss": 0.04715776443481445, "step": 6905 }, { "epoch": 0.933637515842839, "grad_norm": 0.4616331458091736, "learning_rate": 3.7640775045103214e-07, "loss": 0.07706832885742188, "step": 6906 }, { "epoch": 0.933772708069286, "grad_norm": 0.23217806220054626, "learning_rate": 3.7488559180503423e-07, "loss": 0.05143463611602783, "step": 6907 }, { "epoch": 0.933907900295733, "grad_norm": 0.33055245876312256, "learning_rate": 3.7336647818584866e-07, "loss": 0.049836158752441406, "step": 6908 }, { "epoch": 0.93404309252218, "grad_norm": 0.24888643622398376, "learning_rate": 3.718504099097625e-07, "loss": 0.03773140907287598, "step": 6909 }, { "epoch": 0.934178284748627, "grad_norm": 0.28889983892440796, "learning_rate": 3.703373872924265e-07, "loss": 0.05840158462524414, "step": 6910 }, { "epoch": 0.9343134769750739, "grad_norm": 0.29102545976638794, "learning_rate": 3.688274106488604e-07, "loss": 0.0663900375366211, "step": 6911 }, { "epoch": 0.9344486692015209, "grad_norm": 0.39317840337753296, "learning_rate": 3.67320480293451e-07, "loss": 0.0639181137084961, "step": 6912 }, { "epoch": 0.9345838614279679, "grad_norm": 0.35694122314453125, "learning_rate": 3.6581659653994736e-07, "loss": 0.06257116794586182, "step": 6913 }, { "epoch": 0.9347190536544149, "grad_norm": 0.42753785848617554, "learning_rate": 3.64315759701469e-07, "loss": 0.06165933609008789, "step": 6914 }, { "epoch": 0.9348542458808619, "grad_norm": 0.2149232029914856, "learning_rate": 3.6281797009049765e-07, "loss": 0.04876965284347534, "step": 6915 }, { "epoch": 0.9349894381073088, "grad_norm": 0.33355018496513367, "learning_rate": 3.613232280188772e-07, "loss": 0.07210326194763184, "step": 6916 }, { "epoch": 0.9351246303337558, "grad_norm": 0.24563607573509216, "learning_rate": 3.5983153379782363e-07, "loss": 0.04275798797607422, "step": 6917 }, { "epoch": 0.9352598225602028, "grad_norm": 0.3190612494945526, "learning_rate": 3.5834288773791854e-07, "loss": 0.055036067962646484, "step": 6918 }, { "epoch": 0.9353950147866498, "grad_norm": 0.6189736723899841, "learning_rate": 3.568572901491007e-07, "loss": 0.07856535911560059, "step": 6919 }, { "epoch": 0.9355302070130967, "grad_norm": 0.5020914673805237, "learning_rate": 3.553747413406827e-07, "loss": 0.07719254493713379, "step": 6920 }, { "epoch": 0.9356653992395437, "grad_norm": 0.3558565378189087, "learning_rate": 3.538952416213376e-07, "loss": 0.06122446060180664, "step": 6921 }, { "epoch": 0.9358005914659907, "grad_norm": 0.3230915665626526, "learning_rate": 3.524187912991056e-07, "loss": 0.07481193542480469, "step": 6922 }, { "epoch": 0.9359357836924377, "grad_norm": 0.22972339391708374, "learning_rate": 3.5094539068139254e-07, "loss": 0.04806637763977051, "step": 6923 }, { "epoch": 0.9360709759188847, "grad_norm": 0.24116207659244537, "learning_rate": 3.494750400749663e-07, "loss": 0.036308228969573975, "step": 6924 }, { "epoch": 0.9362061681453316, "grad_norm": 0.30507636070251465, "learning_rate": 3.480077397859638e-07, "loss": 0.05286884307861328, "step": 6925 }, { "epoch": 0.9363413603717786, "grad_norm": 0.2481544464826584, "learning_rate": 3.4654349011988384e-07, "loss": 0.06706428527832031, "step": 6926 }, { "epoch": 0.9364765525982256, "grad_norm": 0.5513602495193481, "learning_rate": 3.4508229138159095e-07, "loss": 0.07376748323440552, "step": 6927 }, { "epoch": 0.9366117448246726, "grad_norm": 0.27851566672325134, "learning_rate": 3.4362414387531516e-07, "loss": 0.04263782501220703, "step": 6928 }, { "epoch": 0.9367469370511196, "grad_norm": 0.3507442772388458, "learning_rate": 3.4216904790464854e-07, "loss": 0.0760190486907959, "step": 6929 }, { "epoch": 0.9368821292775665, "grad_norm": 0.38494110107421875, "learning_rate": 3.407170037725521e-07, "loss": 0.08167338371276855, "step": 6930 }, { "epoch": 0.9370173215040135, "grad_norm": 0.3074328303337097, "learning_rate": 3.3926801178134737e-07, "loss": 0.07122206687927246, "step": 6931 }, { "epoch": 0.9371525137304605, "grad_norm": 0.2962808310985565, "learning_rate": 3.3782207223272467e-07, "loss": 0.04630577564239502, "step": 6932 }, { "epoch": 0.9372877059569075, "grad_norm": 0.3505702018737793, "learning_rate": 3.363791854277348e-07, "loss": 0.06173086166381836, "step": 6933 }, { "epoch": 0.9374228981833544, "grad_norm": 0.4292900562286377, "learning_rate": 3.349393516667926e-07, "loss": 0.07289457321166992, "step": 6934 }, { "epoch": 0.9375580904098014, "grad_norm": 0.22617672383785248, "learning_rate": 3.335025712496814e-07, "loss": 0.051015377044677734, "step": 6935 }, { "epoch": 0.9376932826362484, "grad_norm": 0.3592321276664734, "learning_rate": 3.320688444755471e-07, "loss": 0.07427334785461426, "step": 6936 }, { "epoch": 0.9378284748626954, "grad_norm": 0.33925876021385193, "learning_rate": 3.306381716428991e-07, "loss": 0.08819043636322021, "step": 6937 }, { "epoch": 0.9379636670891424, "grad_norm": 0.3518395721912384, "learning_rate": 3.2921055304960925e-07, "loss": 0.07595157623291016, "step": 6938 }, { "epoch": 0.9380988593155893, "grad_norm": 0.32647407054901123, "learning_rate": 3.277859889929147e-07, "loss": 0.0714254379272461, "step": 6939 }, { "epoch": 0.9382340515420363, "grad_norm": 0.18644031882286072, "learning_rate": 3.263644797694215e-07, "loss": 0.04388880729675293, "step": 6940 }, { "epoch": 0.9383692437684833, "grad_norm": 0.14926472306251526, "learning_rate": 3.2494602567509303e-07, "loss": 0.035822153091430664, "step": 6941 }, { "epoch": 0.9385044359949303, "grad_norm": 0.3328128457069397, "learning_rate": 3.2353062700525794e-07, "loss": 0.08460092544555664, "step": 6942 }, { "epoch": 0.9386396282213773, "grad_norm": 0.22114552557468414, "learning_rate": 3.221182840546122e-07, "loss": 0.055501580238342285, "step": 6943 }, { "epoch": 0.9387748204478242, "grad_norm": 0.15361034870147705, "learning_rate": 3.207089971172089e-07, "loss": 0.040936946868896484, "step": 6944 }, { "epoch": 0.9389100126742712, "grad_norm": 0.2066817581653595, "learning_rate": 3.1930276648647504e-07, "loss": 0.05524873733520508, "step": 6945 }, { "epoch": 0.9390452049007182, "grad_norm": 0.26075878739356995, "learning_rate": 3.178995924551914e-07, "loss": 0.050370216369628906, "step": 6946 }, { "epoch": 0.9391803971271652, "grad_norm": 0.2528645098209381, "learning_rate": 3.164994753155059e-07, "loss": 0.05456829071044922, "step": 6947 }, { "epoch": 0.9393155893536121, "grad_norm": 0.29967832565307617, "learning_rate": 3.1510241535893215e-07, "loss": 0.07141304016113281, "step": 6948 }, { "epoch": 0.9394507815800591, "grad_norm": 0.28389590978622437, "learning_rate": 3.1370841287634567e-07, "loss": 0.05772805213928223, "step": 6949 }, { "epoch": 0.9395859738065061, "grad_norm": 0.32096779346466064, "learning_rate": 3.1231746815798436e-07, "loss": 0.06969261169433594, "step": 6950 }, { "epoch": 0.9397211660329531, "grad_norm": 0.20565739274024963, "learning_rate": 3.1092958149344985e-07, "loss": 0.04460740089416504, "step": 6951 }, { "epoch": 0.9398563582594001, "grad_norm": 0.18816794455051422, "learning_rate": 3.095447531717077e-07, "loss": 0.040885329246520996, "step": 6952 }, { "epoch": 0.939991550485847, "grad_norm": 0.524341881275177, "learning_rate": 3.08162983481089e-07, "loss": 0.08219385147094727, "step": 6953 }, { "epoch": 0.940126742712294, "grad_norm": 0.14859233796596527, "learning_rate": 3.067842727092801e-07, "loss": 0.03764939308166504, "step": 6954 }, { "epoch": 0.940261934938741, "grad_norm": 0.22179381549358368, "learning_rate": 3.0540862114334323e-07, "loss": 0.050228118896484375, "step": 6955 }, { "epoch": 0.940397127165188, "grad_norm": 0.2436661720275879, "learning_rate": 3.0403602906969086e-07, "loss": 0.04270893335342407, "step": 6956 }, { "epoch": 0.940532319391635, "grad_norm": 0.2125399112701416, "learning_rate": 3.0266649677410605e-07, "loss": 0.06122398376464844, "step": 6957 }, { "epoch": 0.9406675116180819, "grad_norm": 0.20838016271591187, "learning_rate": 3.0130002454173243e-07, "loss": 0.0460360050201416, "step": 6958 }, { "epoch": 0.9408027038445289, "grad_norm": 0.34980130195617676, "learning_rate": 2.9993661265707407e-07, "loss": 0.07094597816467285, "step": 6959 }, { "epoch": 0.9409378960709759, "grad_norm": 0.364028662443161, "learning_rate": 2.985762614040072e-07, "loss": 0.06134223937988281, "step": 6960 }, { "epoch": 0.9410730882974229, "grad_norm": 0.21083399653434753, "learning_rate": 2.972189710657586e-07, "loss": 0.04069042205810547, "step": 6961 }, { "epoch": 0.9412082805238698, "grad_norm": 0.16762679815292358, "learning_rate": 2.958647419249255e-07, "loss": 0.03345155715942383, "step": 6962 }, { "epoch": 0.9413434727503168, "grad_norm": 0.26082301139831543, "learning_rate": 2.9451357426346415e-07, "loss": 0.05316615104675293, "step": 6963 }, { "epoch": 0.9414786649767638, "grad_norm": 0.2976631224155426, "learning_rate": 2.9316546836269776e-07, "loss": 0.06584155559539795, "step": 6964 }, { "epoch": 0.9416138572032108, "grad_norm": 0.5661160349845886, "learning_rate": 2.9182042450330516e-07, "loss": 0.08023929595947266, "step": 6965 }, { "epoch": 0.9417490494296578, "grad_norm": 0.3103455603122711, "learning_rate": 2.9047844296533397e-07, "loss": 0.07018375396728516, "step": 6966 }, { "epoch": 0.9418842416561047, "grad_norm": 0.3112802803516388, "learning_rate": 2.8913952402819246e-07, "loss": 0.06319558620452881, "step": 6967 }, { "epoch": 0.9420194338825517, "grad_norm": 0.4048909842967987, "learning_rate": 2.878036679706492e-07, "loss": 0.08551788330078125, "step": 6968 }, { "epoch": 0.9421546261089987, "grad_norm": 0.37367865443229675, "learning_rate": 2.8647087507083837e-07, "loss": 0.0600590705871582, "step": 6969 }, { "epoch": 0.9422898183354457, "grad_norm": 0.567959725856781, "learning_rate": 2.8514114560625303e-07, "loss": 0.08487677574157715, "step": 6970 }, { "epoch": 0.9424250105618927, "grad_norm": 0.38909992575645447, "learning_rate": 2.8381447985375007e-07, "loss": 0.07983970642089844, "step": 6971 }, { "epoch": 0.9425602027883396, "grad_norm": 0.27797582745552063, "learning_rate": 2.8249087808954853e-07, "loss": 0.07405376434326172, "step": 6972 }, { "epoch": 0.9426953950147866, "grad_norm": 0.3441482186317444, "learning_rate": 2.811703405892296e-07, "loss": 0.08803129196166992, "step": 6973 }, { "epoch": 0.9428305872412336, "grad_norm": 0.21144941449165344, "learning_rate": 2.798528676277368e-07, "loss": 0.04326224327087402, "step": 6974 }, { "epoch": 0.9429657794676806, "grad_norm": 0.5733159184455872, "learning_rate": 2.785384594793738e-07, "loss": 0.061722517013549805, "step": 6975 }, { "epoch": 0.9431009716941275, "grad_norm": 0.4013594388961792, "learning_rate": 2.772271164178086e-07, "loss": 0.07347822189331055, "step": 6976 }, { "epoch": 0.9432361639205745, "grad_norm": 0.31069520115852356, "learning_rate": 2.759188387160677e-07, "loss": 0.07013297080993652, "step": 6977 }, { "epoch": 0.9433713561470215, "grad_norm": 0.37476569414138794, "learning_rate": 2.746136266465449e-07, "loss": 0.06091737747192383, "step": 6978 }, { "epoch": 0.9435065483734685, "grad_norm": 0.40252289175987244, "learning_rate": 2.7331148048098943e-07, "loss": 0.05612373352050781, "step": 6979 }, { "epoch": 0.9436417405999155, "grad_norm": 0.7355493307113647, "learning_rate": 2.7201240049051613e-07, "loss": 0.062238216400146484, "step": 6980 }, { "epoch": 0.9437769328263624, "grad_norm": 0.5777159333229065, "learning_rate": 2.707163869455986e-07, "loss": 0.0664830207824707, "step": 6981 }, { "epoch": 0.9439121250528094, "grad_norm": 0.28528687357902527, "learning_rate": 2.694234401160778e-07, "loss": 0.07567930221557617, "step": 6982 }, { "epoch": 0.9440473172792564, "grad_norm": 0.2785634398460388, "learning_rate": 2.6813356027114986e-07, "loss": 0.06937122344970703, "step": 6983 }, { "epoch": 0.9441825095057034, "grad_norm": 0.21990138292312622, "learning_rate": 2.6684674767937346e-07, "loss": 0.05024909973144531, "step": 6984 }, { "epoch": 0.9443177017321503, "grad_norm": 0.3015028238296509, "learning_rate": 2.655630026086708e-07, "loss": 0.08124089241027832, "step": 6985 }, { "epoch": 0.9444528939585973, "grad_norm": 0.2186887264251709, "learning_rate": 2.642823253263249e-07, "loss": 0.0534210205078125, "step": 6986 }, { "epoch": 0.9445880861850443, "grad_norm": 0.1894114762544632, "learning_rate": 2.630047160989807e-07, "loss": 0.04532814025878906, "step": 6987 }, { "epoch": 0.9447232784114913, "grad_norm": 0.41836366057395935, "learning_rate": 2.6173017519263875e-07, "loss": 0.07275712490081787, "step": 6988 }, { "epoch": 0.9448584706379383, "grad_norm": 0.3209652304649353, "learning_rate": 2.6045870287267014e-07, "loss": 0.035703182220458984, "step": 6989 }, { "epoch": 0.9449936628643854, "grad_norm": 0.451030969619751, "learning_rate": 2.5919029940380147e-07, "loss": 0.11251354217529297, "step": 6990 }, { "epoch": 0.9451288550908323, "grad_norm": 0.3431937098503113, "learning_rate": 2.5792496505011807e-07, "loss": 0.056937217712402344, "step": 6991 }, { "epoch": 0.9452640473172793, "grad_norm": 0.2512618601322174, "learning_rate": 2.5666270007507266e-07, "loss": 0.06287622451782227, "step": 6992 }, { "epoch": 0.9453992395437263, "grad_norm": 0.21283449232578278, "learning_rate": 2.5540350474147324e-07, "loss": 0.048261940479278564, "step": 6993 }, { "epoch": 0.9455344317701733, "grad_norm": 0.14424695074558258, "learning_rate": 2.5414737931149346e-07, "loss": 0.03175187110900879, "step": 6994 }, { "epoch": 0.9456696239966202, "grad_norm": 0.20185905694961548, "learning_rate": 2.5289432404666246e-07, "loss": 0.0544891357421875, "step": 6995 }, { "epoch": 0.9458048162230672, "grad_norm": 0.40715834498405457, "learning_rate": 2.5164433920787487e-07, "loss": 0.08652234077453613, "step": 6996 }, { "epoch": 0.9459400084495142, "grad_norm": 0.34804922342300415, "learning_rate": 2.503974250553842e-07, "loss": 0.07156586647033691, "step": 6997 }, { "epoch": 0.9460752006759612, "grad_norm": 0.3125251829624176, "learning_rate": 2.491535818488011e-07, "loss": 0.054769039154052734, "step": 6998 }, { "epoch": 0.9462103929024082, "grad_norm": 0.16978426277637482, "learning_rate": 2.479128098471067e-07, "loss": 0.05221843719482422, "step": 6999 }, { "epoch": 0.9463455851288551, "grad_norm": 0.2932808995246887, "learning_rate": 2.466751093086328e-07, "loss": 0.05717134475708008, "step": 7000 }, { "epoch": 0.9464807773553021, "grad_norm": 0.24437984824180603, "learning_rate": 2.454404804910748e-07, "loss": 0.06388866901397705, "step": 7001 }, { "epoch": 0.9466159695817491, "grad_norm": 0.34393393993377686, "learning_rate": 2.442089236514888e-07, "loss": 0.06814050674438477, "step": 7002 }, { "epoch": 0.9467511618081961, "grad_norm": 0.21317288279533386, "learning_rate": 2.429804390462931e-07, "loss": 0.046701908111572266, "step": 7003 }, { "epoch": 0.946886354034643, "grad_norm": 0.39425522089004517, "learning_rate": 2.4175502693126293e-07, "loss": 0.08556795120239258, "step": 7004 }, { "epoch": 0.94702154626109, "grad_norm": 0.34288930892944336, "learning_rate": 2.4053268756153933e-07, "loss": 0.05003559589385986, "step": 7005 }, { "epoch": 0.947156738487537, "grad_norm": 0.3130124807357788, "learning_rate": 2.393134211916154e-07, "loss": 0.08249187469482422, "step": 7006 }, { "epoch": 0.947291930713984, "grad_norm": 0.43975111842155457, "learning_rate": 2.3809722807535128e-07, "loss": 0.07442378997802734, "step": 7007 }, { "epoch": 0.947427122940431, "grad_norm": 0.2952204644680023, "learning_rate": 2.3688410846596287e-07, "loss": 0.07238435745239258, "step": 7008 }, { "epoch": 0.9475623151668779, "grad_norm": 0.1816512793302536, "learning_rate": 2.3567406261603143e-07, "loss": 0.043540239334106445, "step": 7009 }, { "epoch": 0.9476975073933249, "grad_norm": 0.3818410336971283, "learning_rate": 2.3446709077749206e-07, "loss": 0.06792497634887695, "step": 7010 }, { "epoch": 0.9478326996197719, "grad_norm": 0.3031005561351776, "learning_rate": 2.3326319320164546e-07, "loss": 0.06437397003173828, "step": 7011 }, { "epoch": 0.9479678918462189, "grad_norm": 0.28739050030708313, "learning_rate": 2.320623701391461e-07, "loss": 0.054616451263427734, "step": 7012 }, { "epoch": 0.9481030840726659, "grad_norm": 0.2807123363018036, "learning_rate": 2.30864621840014e-07, "loss": 0.05580496788024902, "step": 7013 }, { "epoch": 0.9482382762991128, "grad_norm": 0.35229331254959106, "learning_rate": 2.2966994855362633e-07, "loss": 0.060595035552978516, "step": 7014 }, { "epoch": 0.9483734685255598, "grad_norm": 0.38257506489753723, "learning_rate": 2.2847835052872079e-07, "loss": 0.08170199394226074, "step": 7015 }, { "epoch": 0.9485086607520068, "grad_norm": 0.23198705911636353, "learning_rate": 2.2728982801339392e-07, "loss": 0.049857258796691895, "step": 7016 }, { "epoch": 0.9486438529784538, "grad_norm": 0.5656779408454895, "learning_rate": 2.261043812551028e-07, "loss": 0.07619524002075195, "step": 7017 }, { "epoch": 0.9487790452049008, "grad_norm": 0.34326377511024475, "learning_rate": 2.249220105006633e-07, "loss": 0.0723569393157959, "step": 7018 }, { "epoch": 0.9489142374313477, "grad_norm": 0.872576892375946, "learning_rate": 2.2374271599625185e-07, "loss": 0.06780195236206055, "step": 7019 }, { "epoch": 0.9490494296577947, "grad_norm": 0.3512718975543976, "learning_rate": 2.2256649798740204e-07, "loss": 0.07598590850830078, "step": 7020 }, { "epoch": 0.9491846218842417, "grad_norm": 0.20659223198890686, "learning_rate": 2.2139335671901294e-07, "loss": 0.048117876052856445, "step": 7021 }, { "epoch": 0.9493198141106887, "grad_norm": 0.4059467613697052, "learning_rate": 2.2022329243533422e-07, "loss": 0.07088565826416016, "step": 7022 }, { "epoch": 0.9494550063371356, "grad_norm": 0.25918686389923096, "learning_rate": 2.19056305379981e-07, "loss": 0.0582733154296875, "step": 7023 }, { "epoch": 0.9495901985635826, "grad_norm": 0.4293784201145172, "learning_rate": 2.178923957959289e-07, "loss": 0.08427047729492188, "step": 7024 }, { "epoch": 0.9497253907900296, "grad_norm": 0.3285316526889801, "learning_rate": 2.1673156392550408e-07, "loss": 0.07593441009521484, "step": 7025 }, { "epoch": 0.9498605830164766, "grad_norm": 0.21758310496807098, "learning_rate": 2.155738100104049e-07, "loss": 0.052509307861328125, "step": 7026 }, { "epoch": 0.9499957752429236, "grad_norm": 0.45218056440353394, "learning_rate": 2.1441913429167682e-07, "loss": 0.08870363235473633, "step": 7027 }, { "epoch": 0.9501309674693705, "grad_norm": 0.24682217836380005, "learning_rate": 2.1326753700973256e-07, "loss": 0.05330920219421387, "step": 7028 }, { "epoch": 0.9502661596958175, "grad_norm": 0.22413359582424164, "learning_rate": 2.1211901840434034e-07, "loss": 0.050739288330078125, "step": 7029 }, { "epoch": 0.9504013519222645, "grad_norm": 0.3305968940258026, "learning_rate": 2.1097357871462386e-07, "loss": 0.07822704315185547, "step": 7030 }, { "epoch": 0.9505365441487115, "grad_norm": 0.6246079206466675, "learning_rate": 2.098312181790757e-07, "loss": 0.10966968536376953, "step": 7031 }, { "epoch": 0.9506717363751584, "grad_norm": 0.20123112201690674, "learning_rate": 2.086919370355389e-07, "loss": 0.0447077751159668, "step": 7032 }, { "epoch": 0.9508069286016054, "grad_norm": 0.255611777305603, "learning_rate": 2.075557355212171e-07, "loss": 0.052898406982421875, "step": 7033 }, { "epoch": 0.9509421208280524, "grad_norm": 0.3235122263431549, "learning_rate": 2.0642261387267268e-07, "loss": 0.0729207992553711, "step": 7034 }, { "epoch": 0.9510773130544994, "grad_norm": 0.28758180141448975, "learning_rate": 2.0529257232583033e-07, "loss": 0.08346271514892578, "step": 7035 }, { "epoch": 0.9512125052809464, "grad_norm": 0.4493849277496338, "learning_rate": 2.0416561111596844e-07, "loss": 0.0783393383026123, "step": 7036 }, { "epoch": 0.9513476975073933, "grad_norm": 0.3834860920906067, "learning_rate": 2.0304173047772933e-07, "loss": 0.08037376403808594, "step": 7037 }, { "epoch": 0.9514828897338403, "grad_norm": 0.4030027389526367, "learning_rate": 2.0192093064510753e-07, "loss": 0.07949918508529663, "step": 7038 }, { "epoch": 0.9516180819602873, "grad_norm": 0.2951275408267975, "learning_rate": 2.0080321185146134e-07, "loss": 0.06663179397583008, "step": 7039 }, { "epoch": 0.9517532741867343, "grad_norm": 0.3480106592178345, "learning_rate": 1.996885743295046e-07, "loss": 0.0656588077545166, "step": 7040 }, { "epoch": 0.9518884664131813, "grad_norm": 0.2948501706123352, "learning_rate": 1.985770183113117e-07, "loss": 0.07940053939819336, "step": 7041 }, { "epoch": 0.9520236586396282, "grad_norm": 0.40841129422187805, "learning_rate": 1.9746854402831583e-07, "loss": 0.06979656219482422, "step": 7042 }, { "epoch": 0.9521588508660752, "grad_norm": 0.2220703512430191, "learning_rate": 1.963631517113057e-07, "loss": 0.036959171295166016, "step": 7043 }, { "epoch": 0.9522940430925222, "grad_norm": 0.4347338378429413, "learning_rate": 1.952608415904289e-07, "loss": 0.07242012023925781, "step": 7044 }, { "epoch": 0.9524292353189692, "grad_norm": 0.23542477190494537, "learning_rate": 1.9416161389519348e-07, "loss": 0.05971503257751465, "step": 7045 }, { "epoch": 0.9525644275454161, "grad_norm": 0.3125111758708954, "learning_rate": 1.9306546885446475e-07, "loss": 0.06570959091186523, "step": 7046 }, { "epoch": 0.9526996197718631, "grad_norm": 0.3278358280658722, "learning_rate": 1.919724066964651e-07, "loss": 0.05412447452545166, "step": 7047 }, { "epoch": 0.9528348119983101, "grad_norm": 0.3037605285644531, "learning_rate": 1.908824276487775e-07, "loss": 0.05943584442138672, "step": 7048 }, { "epoch": 0.9529700042247571, "grad_norm": 0.3124946057796478, "learning_rate": 1.8979553193833876e-07, "loss": 0.06500482559204102, "step": 7049 }, { "epoch": 0.9531051964512041, "grad_norm": 0.2885887026786804, "learning_rate": 1.8871171979144786e-07, "loss": 0.05474728345870972, "step": 7050 }, { "epoch": 0.953240388677651, "grad_norm": 0.2927285432815552, "learning_rate": 1.8763099143376262e-07, "loss": 0.06365370750427246, "step": 7051 }, { "epoch": 0.953375580904098, "grad_norm": 0.40386590361595154, "learning_rate": 1.8655334709029303e-07, "loss": 0.08675789833068848, "step": 7052 }, { "epoch": 0.953510773130545, "grad_norm": 0.19471007585525513, "learning_rate": 1.8547878698541132e-07, "loss": 0.04690289497375488, "step": 7053 }, { "epoch": 0.953645965356992, "grad_norm": 0.29762133955955505, "learning_rate": 1.8440731134284684e-07, "loss": 0.04564952850341797, "step": 7054 }, { "epoch": 0.953781157583439, "grad_norm": 0.341513991355896, "learning_rate": 1.833389203856861e-07, "loss": 0.07873868942260742, "step": 7055 }, { "epoch": 0.9539163498098859, "grad_norm": 0.24388428032398224, "learning_rate": 1.8227361433637625e-07, "loss": 0.06833457946777344, "step": 7056 }, { "epoch": 0.9540515420363329, "grad_norm": 0.2513248026371002, "learning_rate": 1.812113934167148e-07, "loss": 0.07186603546142578, "step": 7057 }, { "epoch": 0.9541867342627799, "grad_norm": 0.263601154088974, "learning_rate": 1.8015225784786483e-07, "loss": 0.06045389175415039, "step": 7058 }, { "epoch": 0.9543219264892269, "grad_norm": 0.34621843695640564, "learning_rate": 1.7909620785034663e-07, "loss": 0.07863807678222656, "step": 7059 }, { "epoch": 0.9544571187156738, "grad_norm": 0.24743366241455078, "learning_rate": 1.7804324364402936e-07, "loss": 0.056606292724609375, "step": 7060 }, { "epoch": 0.9545923109421208, "grad_norm": 0.3017016053199768, "learning_rate": 1.769933654481526e-07, "loss": 0.07212352752685547, "step": 7061 }, { "epoch": 0.9547275031685678, "grad_norm": 0.2539996802806854, "learning_rate": 1.7594657348129984e-07, "loss": 0.06033825874328613, "step": 7062 }, { "epoch": 0.9548626953950148, "grad_norm": 0.24367156624794006, "learning_rate": 1.749028679614234e-07, "loss": 0.0654909610748291, "step": 7063 }, { "epoch": 0.9549978876214618, "grad_norm": 0.501304566860199, "learning_rate": 1.7386224910582615e-07, "loss": 0.0479697585105896, "step": 7064 }, { "epoch": 0.9551330798479087, "grad_norm": 0.21164806187152863, "learning_rate": 1.728247171311731e-07, "loss": 0.05570030212402344, "step": 7065 }, { "epoch": 0.9552682720743557, "grad_norm": 0.20658764243125916, "learning_rate": 1.7179027225348142e-07, "loss": 0.05929446220397949, "step": 7066 }, { "epoch": 0.9554034643008027, "grad_norm": 0.2435864955186844, "learning_rate": 1.7075891468812722e-07, "loss": 0.05292034149169922, "step": 7067 }, { "epoch": 0.9555386565272497, "grad_norm": 0.2956370711326599, "learning_rate": 1.69730644649847e-07, "loss": 0.0629395842552185, "step": 7068 }, { "epoch": 0.9556738487536967, "grad_norm": 0.30187246203422546, "learning_rate": 1.687054623527312e-07, "loss": 0.0683140754699707, "step": 7069 }, { "epoch": 0.9558090409801436, "grad_norm": 0.1824604570865631, "learning_rate": 1.676833680102291e-07, "loss": 0.03387176990509033, "step": 7070 }, { "epoch": 0.9559442332065906, "grad_norm": 0.15600618720054626, "learning_rate": 1.6666436183514378e-07, "loss": 0.03223252296447754, "step": 7071 }, { "epoch": 0.9560794254330376, "grad_norm": 0.21348808705806732, "learning_rate": 1.6564844403964053e-07, "loss": 0.05842113494873047, "step": 7072 }, { "epoch": 0.9562146176594846, "grad_norm": 0.22913876175880432, "learning_rate": 1.6463561483523682e-07, "loss": 0.04892158508300781, "step": 7073 }, { "epoch": 0.9563498098859315, "grad_norm": 0.279837429523468, "learning_rate": 1.6362587443281063e-07, "loss": 0.05359315872192383, "step": 7074 }, { "epoch": 0.9564850021123785, "grad_norm": 0.40154993534088135, "learning_rate": 1.626192230425938e-07, "loss": 0.07887077331542969, "step": 7075 }, { "epoch": 0.9566201943388255, "grad_norm": 0.43644243478775024, "learning_rate": 1.6161566087417868e-07, "loss": 0.08739185333251953, "step": 7076 }, { "epoch": 0.9567553865652725, "grad_norm": 0.2820413410663605, "learning_rate": 1.6061518813650977e-07, "loss": 0.06787729263305664, "step": 7077 }, { "epoch": 0.9568905787917195, "grad_norm": 0.27579060196876526, "learning_rate": 1.5961780503789215e-07, "loss": 0.050455570220947266, "step": 7078 }, { "epoch": 0.9570257710181664, "grad_norm": 0.2353002429008484, "learning_rate": 1.5862351178598633e-07, "loss": 0.055228233337402344, "step": 7079 }, { "epoch": 0.9571609632446134, "grad_norm": 0.22778448462486267, "learning_rate": 1.5763230858781008e-07, "loss": 0.04465299844741821, "step": 7080 }, { "epoch": 0.9572961554710604, "grad_norm": 0.3392725884914398, "learning_rate": 1.5664419564973497e-07, "loss": 0.06159639358520508, "step": 7081 }, { "epoch": 0.9574313476975074, "grad_norm": 0.31525975465774536, "learning_rate": 1.5565917317749146e-07, "loss": 0.07815194129943848, "step": 7082 }, { "epoch": 0.9575665399239544, "grad_norm": 0.28295913338661194, "learning_rate": 1.5467724137617046e-07, "loss": 0.06441307067871094, "step": 7083 }, { "epoch": 0.9577017321504013, "grad_norm": 0.30150237679481506, "learning_rate": 1.5369840045021178e-07, "loss": 0.06901931762695312, "step": 7084 }, { "epoch": 0.9578369243768483, "grad_norm": 0.2961951792240143, "learning_rate": 1.5272265060341572e-07, "loss": 0.06909561157226562, "step": 7085 }, { "epoch": 0.9579721166032953, "grad_norm": 0.18949323892593384, "learning_rate": 1.517499920389398e-07, "loss": 0.04059183597564697, "step": 7086 }, { "epoch": 0.9581073088297423, "grad_norm": 0.373098760843277, "learning_rate": 1.5078042495929534e-07, "loss": 0.07132530212402344, "step": 7087 }, { "epoch": 0.9582425010561892, "grad_norm": 0.3374219536781311, "learning_rate": 1.498139495663542e-07, "loss": 0.06792446970939636, "step": 7088 }, { "epoch": 0.9583776932826362, "grad_norm": 0.46853941679000854, "learning_rate": 1.4885056606133707e-07, "loss": 0.08032035827636719, "step": 7089 }, { "epoch": 0.9585128855090832, "grad_norm": 0.1993354856967926, "learning_rate": 1.478902746448302e-07, "loss": 0.04134368896484375, "step": 7090 }, { "epoch": 0.9586480777355302, "grad_norm": 0.34106630086898804, "learning_rate": 1.469330755167686e-07, "loss": 0.06944561004638672, "step": 7091 }, { "epoch": 0.9587832699619772, "grad_norm": 0.3594677150249481, "learning_rate": 1.4597896887644458e-07, "loss": 0.06741833686828613, "step": 7092 }, { "epoch": 0.9589184621884241, "grad_norm": 0.3508850038051605, "learning_rate": 1.4502795492251418e-07, "loss": 0.0678098201751709, "step": 7093 }, { "epoch": 0.9590536544148711, "grad_norm": 0.16216158866882324, "learning_rate": 1.4408003385297742e-07, "loss": 0.03735470771789551, "step": 7094 }, { "epoch": 0.9591888466413181, "grad_norm": 0.2320837825536728, "learning_rate": 1.4313520586519968e-07, "loss": 0.04517108201980591, "step": 7095 }, { "epoch": 0.9593240388677651, "grad_norm": 0.2104930579662323, "learning_rate": 1.4219347115589863e-07, "loss": 0.04501771926879883, "step": 7096 }, { "epoch": 0.959459231094212, "grad_norm": 0.21901686489582062, "learning_rate": 1.4125482992114914e-07, "loss": 0.051828861236572266, "step": 7097 }, { "epoch": 0.959594423320659, "grad_norm": 0.2667662501335144, "learning_rate": 1.403192823563798e-07, "loss": 0.05644547939300537, "step": 7098 }, { "epoch": 0.959729615547106, "grad_norm": 0.33934083580970764, "learning_rate": 1.3938682865637654e-07, "loss": 0.07100439071655273, "step": 7099 }, { "epoch": 0.959864807773553, "grad_norm": 0.5984017848968506, "learning_rate": 1.38457469015284e-07, "loss": 0.07280826568603516, "step": 7100 }, { "epoch": 0.96, "grad_norm": 1.2662726640701294, "learning_rate": 1.3753120362659576e-07, "loss": 0.05173593759536743, "step": 7101 }, { "epoch": 0.9601351922264469, "grad_norm": 0.3209396004676819, "learning_rate": 1.3660803268316925e-07, "loss": 0.06452083587646484, "step": 7102 }, { "epoch": 0.9602703844528939, "grad_norm": 0.23695382475852966, "learning_rate": 1.3568795637721065e-07, "loss": 0.058144211769104004, "step": 7103 }, { "epoch": 0.9604055766793409, "grad_norm": 0.3340555429458618, "learning_rate": 1.347709749002851e-07, "loss": 0.06425333023071289, "step": 7104 }, { "epoch": 0.9605407689057879, "grad_norm": 0.37709692120552063, "learning_rate": 1.338570884433149e-07, "loss": 0.07504653930664062, "step": 7105 }, { "epoch": 0.9606759611322349, "grad_norm": 0.44086870551109314, "learning_rate": 1.3294629719657448e-07, "loss": 0.09051513671875, "step": 7106 }, { "epoch": 0.9608111533586818, "grad_norm": 0.27949607372283936, "learning_rate": 1.3203860134969548e-07, "loss": 0.07896995544433594, "step": 7107 }, { "epoch": 0.9609463455851288, "grad_norm": 0.18890832364559174, "learning_rate": 1.3113400109166508e-07, "loss": 0.04881572723388672, "step": 7108 }, { "epoch": 0.9610815378115758, "grad_norm": 0.7075672149658203, "learning_rate": 1.3023249661082592e-07, "loss": 0.10370111465454102, "step": 7109 }, { "epoch": 0.9612167300380228, "grad_norm": 0.38277676701545715, "learning_rate": 1.2933408809487623e-07, "loss": 0.08023452758789062, "step": 7110 }, { "epoch": 0.9613519222644697, "grad_norm": 0.35066574811935425, "learning_rate": 1.2843877573086972e-07, "loss": 0.0773056149482727, "step": 7111 }, { "epoch": 0.9614871144909167, "grad_norm": 0.17431510984897614, "learning_rate": 1.2754655970521556e-07, "loss": 0.03415632247924805, "step": 7112 }, { "epoch": 0.9616223067173637, "grad_norm": 0.32355645298957825, "learning_rate": 1.2665744020367686e-07, "loss": 0.07684391736984253, "step": 7113 }, { "epoch": 0.9617574989438107, "grad_norm": 0.2118271142244339, "learning_rate": 1.2577141741137388e-07, "loss": 0.053942203521728516, "step": 7114 }, { "epoch": 0.9618926911702577, "grad_norm": 0.4489447772502899, "learning_rate": 1.248884915127807e-07, "loss": 0.07523274421691895, "step": 7115 }, { "epoch": 0.9620278833967046, "grad_norm": 0.13798269629478455, "learning_rate": 1.2400866269172694e-07, "loss": 0.038768768310546875, "step": 7116 }, { "epoch": 0.9621630756231516, "grad_norm": 0.56400465965271, "learning_rate": 1.2313193113139777e-07, "loss": 0.07834219932556152, "step": 7117 }, { "epoch": 0.9622982678495986, "grad_norm": 0.22872433066368103, "learning_rate": 1.2225829701433545e-07, "loss": 0.04670572280883789, "step": 7118 }, { "epoch": 0.9624334600760456, "grad_norm": 0.18286460638046265, "learning_rate": 1.2138776052243116e-07, "loss": 0.041526854038238525, "step": 7119 }, { "epoch": 0.9625686523024926, "grad_norm": 0.40548887848854065, "learning_rate": 1.2052032183693996e-07, "loss": 0.07451128959655762, "step": 7120 }, { "epoch": 0.9627038445289395, "grad_norm": 0.43057578802108765, "learning_rate": 1.196559811384623e-07, "loss": 0.09782171249389648, "step": 7121 }, { "epoch": 0.9628390367553865, "grad_norm": 0.3550565540790558, "learning_rate": 1.1879473860696266e-07, "loss": 0.06103086471557617, "step": 7122 }, { "epoch": 0.9629742289818335, "grad_norm": 0.5130022168159485, "learning_rate": 1.179365944217542e-07, "loss": 0.09292316436767578, "step": 7123 }, { "epoch": 0.9631094212082805, "grad_norm": 0.20128019154071808, "learning_rate": 1.1708154876150735e-07, "loss": 0.05059695243835449, "step": 7124 }, { "epoch": 0.9632446134347274, "grad_norm": 0.16424371302127838, "learning_rate": 1.1622960180424801e-07, "loss": 0.0390017032623291, "step": 7125 }, { "epoch": 0.9633798056611745, "grad_norm": 0.3007664978504181, "learning_rate": 1.1538075372735435e-07, "loss": 0.06849813461303711, "step": 7126 }, { "epoch": 0.9635149978876215, "grad_norm": 0.33393779397010803, "learning_rate": 1.1453500470756328e-07, "loss": 0.0654149055480957, "step": 7127 }, { "epoch": 0.9636501901140685, "grad_norm": 0.36746785044670105, "learning_rate": 1.1369235492096397e-07, "loss": 0.07008492946624756, "step": 7128 }, { "epoch": 0.9637853823405155, "grad_norm": 0.24565951526165009, "learning_rate": 1.1285280454299774e-07, "loss": 0.05627632141113281, "step": 7129 }, { "epoch": 0.9639205745669625, "grad_norm": 0.2792106568813324, "learning_rate": 1.1201635374846808e-07, "loss": 0.04860877990722656, "step": 7130 }, { "epoch": 0.9640557667934094, "grad_norm": 0.2769196033477783, "learning_rate": 1.1118300271152404e-07, "loss": 0.05370759963989258, "step": 7131 }, { "epoch": 0.9641909590198564, "grad_norm": 0.32462385296821594, "learning_rate": 1.1035275160567682e-07, "loss": 0.050774216651916504, "step": 7132 }, { "epoch": 0.9643261512463034, "grad_norm": 0.29091402888298035, "learning_rate": 1.0952560060378813e-07, "loss": 0.06504011154174805, "step": 7133 }, { "epoch": 0.9644613434727504, "grad_norm": 0.24630087614059448, "learning_rate": 1.0870154987807523e-07, "loss": 0.059615135192871094, "step": 7134 }, { "epoch": 0.9645965356991973, "grad_norm": 0.36789265275001526, "learning_rate": 1.0788059960010921e-07, "loss": 0.06681013107299805, "step": 7135 }, { "epoch": 0.9647317279256443, "grad_norm": 0.5074768662452698, "learning_rate": 1.0706274994081499e-07, "loss": 0.0800933837890625, "step": 7136 }, { "epoch": 0.9648669201520913, "grad_norm": 0.3125888407230377, "learning_rate": 1.0624800107047805e-07, "loss": 0.05780029296875, "step": 7137 }, { "epoch": 0.9650021123785383, "grad_norm": 0.24129152297973633, "learning_rate": 1.0543635315872934e-07, "loss": 0.05544567108154297, "step": 7138 }, { "epoch": 0.9651373046049853, "grad_norm": 0.22976483404636383, "learning_rate": 1.0462780637455871e-07, "loss": 0.05627632141113281, "step": 7139 }, { "epoch": 0.9652724968314322, "grad_norm": 0.36747923493385315, "learning_rate": 1.0382236088631148e-07, "loss": 0.07140541076660156, "step": 7140 }, { "epoch": 0.9654076890578792, "grad_norm": 0.37640753388404846, "learning_rate": 1.0302001686168349e-07, "loss": 0.07625985145568848, "step": 7141 }, { "epoch": 0.9655428812843262, "grad_norm": 0.3509242832660675, "learning_rate": 1.0222077446772949e-07, "loss": 0.0732278823852539, "step": 7142 }, { "epoch": 0.9656780735107732, "grad_norm": 0.3466155230998993, "learning_rate": 1.0142463387085465e-07, "loss": 0.05558037757873535, "step": 7143 }, { "epoch": 0.9658132657372202, "grad_norm": 0.36956116557121277, "learning_rate": 1.0063159523682142e-07, "loss": 0.06895875930786133, "step": 7144 }, { "epoch": 0.9659484579636671, "grad_norm": 0.15760314464569092, "learning_rate": 9.984165873074102e-08, "loss": 0.03542661666870117, "step": 7145 }, { "epoch": 0.9660836501901141, "grad_norm": 0.2602253258228302, "learning_rate": 9.905482451708526e-08, "loss": 0.06183439493179321, "step": 7146 }, { "epoch": 0.9662188424165611, "grad_norm": 0.32091230154037476, "learning_rate": 9.827109275967638e-08, "loss": 0.06027671694755554, "step": 7147 }, { "epoch": 0.9663540346430081, "grad_norm": 0.8618775010108948, "learning_rate": 9.749046362169223e-08, "loss": 0.08211898803710938, "step": 7148 }, { "epoch": 0.966489226869455, "grad_norm": 0.18240094184875488, "learning_rate": 9.671293726566443e-08, "loss": 0.042287349700927734, "step": 7149 }, { "epoch": 0.966624419095902, "grad_norm": 0.22764787077903748, "learning_rate": 9.593851385347518e-08, "loss": 0.05000448226928711, "step": 7150 }, { "epoch": 0.966759611322349, "grad_norm": 0.22367732226848602, "learning_rate": 9.516719354636716e-08, "loss": 0.04016005992889404, "step": 7151 }, { "epoch": 0.966894803548796, "grad_norm": 0.16878516972064972, "learning_rate": 9.439897650493024e-08, "loss": 0.04114675521850586, "step": 7152 }, { "epoch": 0.967029995775243, "grad_norm": 0.41311609745025635, "learning_rate": 9.363386288911313e-08, "loss": 0.07525777816772461, "step": 7153 }, { "epoch": 0.9671651880016899, "grad_norm": 0.48723217844963074, "learning_rate": 9.287185285821675e-08, "loss": 0.06959009170532227, "step": 7154 }, { "epoch": 0.9673003802281369, "grad_norm": 0.3045305907726288, "learning_rate": 9.211294657089587e-08, "loss": 0.07018661499023438, "step": 7155 }, { "epoch": 0.9674355724545839, "grad_norm": 0.39163950085639954, "learning_rate": 9.135714418515573e-08, "loss": 0.07484912872314453, "step": 7156 }, { "epoch": 0.9675707646810309, "grad_norm": 0.2506008744239807, "learning_rate": 9.060444585836381e-08, "loss": 0.0506129264831543, "step": 7157 }, { "epoch": 0.9677059569074778, "grad_norm": 0.3191521167755127, "learning_rate": 8.985485174722974e-08, "loss": 0.07621145248413086, "step": 7158 }, { "epoch": 0.9678411491339248, "grad_norm": 0.25807011127471924, "learning_rate": 8.910836200782868e-08, "loss": 0.053945839405059814, "step": 7159 }, { "epoch": 0.9679763413603718, "grad_norm": 0.15680165588855743, "learning_rate": 8.836497679557964e-08, "loss": 0.04192084074020386, "step": 7160 }, { "epoch": 0.9681115335868188, "grad_norm": 0.3204350769519806, "learning_rate": 8.762469626526048e-08, "loss": 0.06955146789550781, "step": 7161 }, { "epoch": 0.9682467258132658, "grad_norm": 0.16617003083229065, "learning_rate": 8.688752057100457e-08, "loss": 0.041983604431152344, "step": 7162 }, { "epoch": 0.9683819180397127, "grad_norm": 0.29832717776298523, "learning_rate": 8.615344986629082e-08, "loss": 0.06909561157226562, "step": 7163 }, { "epoch": 0.9685171102661597, "grad_norm": 0.24804559350013733, "learning_rate": 8.542248430396027e-08, "loss": 0.05909538269042969, "step": 7164 }, { "epoch": 0.9686523024926067, "grad_norm": 0.26386067271232605, "learning_rate": 8.469462403620287e-08, "loss": 0.06704902648925781, "step": 7165 }, { "epoch": 0.9687874947190537, "grad_norm": 0.2206057906150818, "learning_rate": 8.39698692145624e-08, "loss": 0.04303264617919922, "step": 7166 }, { "epoch": 0.9689226869455007, "grad_norm": 0.4482867121696472, "learning_rate": 8.324821998993648e-08, "loss": 0.08037328720092773, "step": 7167 }, { "epoch": 0.9690578791719476, "grad_norm": 0.28336599469184875, "learning_rate": 8.252967651257826e-08, "loss": 0.06237316131591797, "step": 7168 }, { "epoch": 0.9691930713983946, "grad_norm": 0.31158414483070374, "learning_rate": 8.181423893208973e-08, "loss": 0.05685222148895264, "step": 7169 }, { "epoch": 0.9693282636248416, "grad_norm": 0.4446757733821869, "learning_rate": 8.110190739743172e-08, "loss": 0.0708761215209961, "step": 7170 }, { "epoch": 0.9694634558512886, "grad_norm": 0.2637402415275574, "learning_rate": 8.03926820569123e-08, "loss": 0.06328153610229492, "step": 7171 }, { "epoch": 0.9695986480777355, "grad_norm": 0.34525594115257263, "learning_rate": 7.968656305819833e-08, "loss": 0.0838625431060791, "step": 7172 }, { "epoch": 0.9697338403041825, "grad_norm": 0.34984028339385986, "learning_rate": 7.898355054830719e-08, "loss": 0.06069469451904297, "step": 7173 }, { "epoch": 0.9698690325306295, "grad_norm": 0.21088449656963348, "learning_rate": 7.828364467360849e-08, "loss": 0.04669666290283203, "step": 7174 }, { "epoch": 0.9700042247570765, "grad_norm": 0.35954582691192627, "learning_rate": 7.758684557982731e-08, "loss": 0.06922340393066406, "step": 7175 }, { "epoch": 0.9701394169835235, "grad_norm": 0.4271712899208069, "learning_rate": 7.689315341204262e-08, "loss": 0.08757305145263672, "step": 7176 }, { "epoch": 0.9702746092099704, "grad_norm": 0.3467499613761902, "learning_rate": 7.62025683146822e-08, "loss": 0.05325937271118164, "step": 7177 }, { "epoch": 0.9704098014364174, "grad_norm": 0.23939074575901031, "learning_rate": 7.551509043152937e-08, "loss": 0.061994075775146484, "step": 7178 }, { "epoch": 0.9705449936628644, "grad_norm": 0.2400912642478943, "learning_rate": 7.483071990572132e-08, "loss": 0.05511891841888428, "step": 7179 }, { "epoch": 0.9706801858893114, "grad_norm": 0.23460254073143005, "learning_rate": 7.414945687975072e-08, "loss": 0.03956270217895508, "step": 7180 }, { "epoch": 0.9708153781157584, "grad_norm": 0.25749945640563965, "learning_rate": 7.347130149545578e-08, "loss": 0.05767488479614258, "step": 7181 }, { "epoch": 0.9709505703422053, "grad_norm": 0.32734954357147217, "learning_rate": 7.279625389403355e-08, "loss": 0.06890296936035156, "step": 7182 }, { "epoch": 0.9710857625686523, "grad_norm": 0.2572149932384491, "learning_rate": 7.212431421603327e-08, "loss": 0.0688391923904419, "step": 7183 }, { "epoch": 0.9712209547950993, "grad_norm": 0.3380451202392578, "learning_rate": 7.145548260135638e-08, "loss": 0.07073593139648438, "step": 7184 }, { "epoch": 0.9713561470215463, "grad_norm": 0.463604211807251, "learning_rate": 7.078975918925645e-08, "loss": 0.08678054809570312, "step": 7185 }, { "epoch": 0.9714913392479932, "grad_norm": 0.320751816034317, "learning_rate": 7.012714411834098e-08, "loss": 0.08072137832641602, "step": 7186 }, { "epoch": 0.9716265314744402, "grad_norm": 0.2804392874240875, "learning_rate": 6.946763752656959e-08, "loss": 0.06387472152709961, "step": 7187 }, { "epoch": 0.9717617237008872, "grad_norm": 0.3662104606628418, "learning_rate": 6.881123955125579e-08, "loss": 0.07719039916992188, "step": 7188 }, { "epoch": 0.9718969159273342, "grad_norm": 0.19144761562347412, "learning_rate": 6.815795032906524e-08, "loss": 0.050726890563964844, "step": 7189 }, { "epoch": 0.9720321081537812, "grad_norm": 0.19675759971141815, "learning_rate": 6.750776999601415e-08, "loss": 0.04305756092071533, "step": 7190 }, { "epoch": 0.9721673003802281, "grad_norm": 0.45745009183883667, "learning_rate": 6.68606986874759e-08, "loss": 0.07642531394958496, "step": 7191 }, { "epoch": 0.9723024926066751, "grad_norm": 0.23230880498886108, "learning_rate": 6.62167365381744e-08, "loss": 0.05736708641052246, "step": 7192 }, { "epoch": 0.9724376848331221, "grad_norm": 0.29215314984321594, "learning_rate": 6.557588368218237e-08, "loss": 0.06390190124511719, "step": 7193 }, { "epoch": 0.9725728770595691, "grad_norm": 0.2768566906452179, "learning_rate": 6.493814025293476e-08, "loss": 0.06568622589111328, "step": 7194 }, { "epoch": 0.972708069286016, "grad_norm": 0.3239062428474426, "learning_rate": 6.430350638320704e-08, "loss": 0.07325363159179688, "step": 7195 }, { "epoch": 0.972843261512463, "grad_norm": 0.5337064862251282, "learning_rate": 6.367198220513848e-08, "loss": 0.11066055297851562, "step": 7196 }, { "epoch": 0.97297845373891, "grad_norm": 0.23224112391471863, "learning_rate": 6.304356785021226e-08, "loss": 0.05916738510131836, "step": 7197 }, { "epoch": 0.973113645965357, "grad_norm": 0.5669562816619873, "learning_rate": 6.241826344926704e-08, "loss": 0.08518695831298828, "step": 7198 }, { "epoch": 0.973248838191804, "grad_norm": 0.28966808319091797, "learning_rate": 6.17960691324987e-08, "loss": 0.0686178207397461, "step": 7199 }, { "epoch": 0.973384030418251, "grad_norm": 0.45771700143814087, "learning_rate": 6.117698502944857e-08, "loss": 0.07668817043304443, "step": 7200 }, { "epoch": 0.9735192226446979, "grad_norm": 0.20307712256908417, "learning_rate": 6.056101126901358e-08, "loss": 0.05368947982788086, "step": 7201 }, { "epoch": 0.9736544148711449, "grad_norm": 0.5064142346382141, "learning_rate": 5.994814797944281e-08, "loss": 0.07696700096130371, "step": 7202 }, { "epoch": 0.9737896070975919, "grad_norm": 0.15008561313152313, "learning_rate": 5.933839528833751e-08, "loss": 0.023941993713378906, "step": 7203 }, { "epoch": 0.9739247993240389, "grad_norm": 0.17597651481628418, "learning_rate": 5.873175332265279e-08, "loss": 0.03881549835205078, "step": 7204 }, { "epoch": 0.9740599915504858, "grad_norm": 0.46539902687072754, "learning_rate": 5.812822220869096e-08, "loss": 0.08562612533569336, "step": 7205 }, { "epoch": 0.9741951837769328, "grad_norm": 0.36038830876350403, "learning_rate": 5.752780207211483e-08, "loss": 0.07819366455078125, "step": 7206 }, { "epoch": 0.9743303760033798, "grad_norm": 0.21653492748737335, "learning_rate": 5.693049303793274e-08, "loss": 0.04432499408721924, "step": 7207 }, { "epoch": 0.9744655682298268, "grad_norm": 0.4385357201099396, "learning_rate": 5.6336295230508536e-08, "loss": 0.05430912971496582, "step": 7208 }, { "epoch": 0.9746007604562738, "grad_norm": 0.3062569797039032, "learning_rate": 5.5745208773558266e-08, "loss": 0.06261181831359863, "step": 7209 }, { "epoch": 0.9747359526827207, "grad_norm": 0.288725882768631, "learning_rate": 5.515723379014681e-08, "loss": 0.056552886962890625, "step": 7210 }, { "epoch": 0.9748711449091677, "grad_norm": 0.2708200216293335, "learning_rate": 5.4572370402694583e-08, "loss": 0.057570457458496094, "step": 7211 }, { "epoch": 0.9750063371356147, "grad_norm": 0.48602837324142456, "learning_rate": 5.399061873297417e-08, "loss": 0.0813295841217041, "step": 7212 }, { "epoch": 0.9751415293620617, "grad_norm": 0.2376367151737213, "learning_rate": 5.341197890210869e-08, "loss": 0.060228586196899414, "step": 7213 }, { "epoch": 0.9752767215885086, "grad_norm": 0.26292112469673157, "learning_rate": 5.283645103057344e-08, "loss": 0.04542899131774902, "step": 7214 }, { "epoch": 0.9754119138149556, "grad_norm": 0.2552328407764435, "learning_rate": 5.226403523819756e-08, "loss": 0.062427520751953125, "step": 7215 }, { "epoch": 0.9755471060414026, "grad_norm": 0.3440481424331665, "learning_rate": 5.169473164416072e-08, "loss": 0.08122038841247559, "step": 7216 }, { "epoch": 0.9756822982678496, "grad_norm": 0.1281677782535553, "learning_rate": 5.112854036699477e-08, "loss": 0.039642333984375, "step": 7217 }, { "epoch": 0.9758174904942966, "grad_norm": 0.30934494733810425, "learning_rate": 5.0565461524583745e-08, "loss": 0.0696796178817749, "step": 7218 }, { "epoch": 0.9759526827207435, "grad_norm": 0.3011772334575653, "learning_rate": 5.0005495234163865e-08, "loss": 0.05882906913757324, "step": 7219 }, { "epoch": 0.9760878749471905, "grad_norm": 0.2965177297592163, "learning_rate": 4.9448641612321874e-08, "loss": 0.0741887092590332, "step": 7220 }, { "epoch": 0.9762230671736375, "grad_norm": 0.1637433022260666, "learning_rate": 4.889490077500003e-08, "loss": 0.042545318603515625, "step": 7221 }, { "epoch": 0.9763582594000845, "grad_norm": 0.49014654755592346, "learning_rate": 4.8344272837489434e-08, "loss": 0.07047206163406372, "step": 7222 }, { "epoch": 0.9764934516265315, "grad_norm": 0.42014312744140625, "learning_rate": 4.779675791443172e-08, "loss": 0.06003129482269287, "step": 7223 }, { "epoch": 0.9766286438529784, "grad_norm": 0.4761996865272522, "learning_rate": 4.72523561198257e-08, "loss": 0.07059383392333984, "step": 7224 }, { "epoch": 0.9767638360794254, "grad_norm": 0.24438482522964478, "learning_rate": 4.6711067567014044e-08, "loss": 0.06229877471923828, "step": 7225 }, { "epoch": 0.9768990283058724, "grad_norm": 0.26329684257507324, "learning_rate": 4.6172892368701595e-08, "loss": 0.06503629684448242, "step": 7226 }, { "epoch": 0.9770342205323194, "grad_norm": 0.2561330795288086, "learning_rate": 4.5637830636935385e-08, "loss": 0.048719167709350586, "step": 7227 }, { "epoch": 0.9771694127587663, "grad_norm": 0.3064153492450714, "learning_rate": 4.5105882483119643e-08, "loss": 0.055995821952819824, "step": 7228 }, { "epoch": 0.9773046049852133, "grad_norm": 0.3868359625339508, "learning_rate": 4.4577048018007436e-08, "loss": 0.06254005432128906, "step": 7229 }, { "epoch": 0.9774397972116603, "grad_norm": 0.24148406088352203, "learning_rate": 4.405132735170569e-08, "loss": 0.050814270973205566, "step": 7230 }, { "epoch": 0.9775749894381073, "grad_norm": 0.2694374918937683, "learning_rate": 4.3528720593675184e-08, "loss": 0.052547454833984375, "step": 7231 }, { "epoch": 0.9777101816645543, "grad_norm": 0.2057885378599167, "learning_rate": 4.300922785271888e-08, "loss": 0.048792481422424316, "step": 7232 }, { "epoch": 0.9778453738910012, "grad_norm": 0.18473146855831146, "learning_rate": 4.249284923700358e-08, "loss": 0.0374605655670166, "step": 7233 }, { "epoch": 0.9779805661174482, "grad_norm": 0.2718767523765564, "learning_rate": 4.197958485404163e-08, "loss": 0.059324026107788086, "step": 7234 }, { "epoch": 0.9781157583438952, "grad_norm": 0.2652534544467926, "learning_rate": 4.1469434810694206e-08, "loss": 0.0555729866027832, "step": 7235 }, { "epoch": 0.9782509505703422, "grad_norm": 0.6530328989028931, "learning_rate": 4.096239921317968e-08, "loss": 0.08185791969299316, "step": 7236 }, { "epoch": 0.9783861427967891, "grad_norm": 0.23014208674430847, "learning_rate": 4.045847816706361e-08, "loss": 0.04120290279388428, "step": 7237 }, { "epoch": 0.9785213350232361, "grad_norm": 0.2994944453239441, "learning_rate": 3.9957671777268724e-08, "loss": 0.07490706443786621, "step": 7238 }, { "epoch": 0.9786565272496831, "grad_norm": 0.463821679353714, "learning_rate": 3.945998014806163e-08, "loss": 0.08352804183959961, "step": 7239 }, { "epoch": 0.9787917194761301, "grad_norm": 0.3556688725948334, "learning_rate": 3.896540338306609e-08, "loss": 0.07006645202636719, "step": 7240 }, { "epoch": 0.9789269117025771, "grad_norm": 0.26082634925842285, "learning_rate": 3.847394158525641e-08, "loss": 0.06986570358276367, "step": 7241 }, { "epoch": 0.979062103929024, "grad_norm": 0.2542496621608734, "learning_rate": 3.798559485695574e-08, "loss": 0.07001972198486328, "step": 7242 }, { "epoch": 0.979197296155471, "grad_norm": 0.26027971506118774, "learning_rate": 3.7500363299842746e-08, "loss": 0.05592823028564453, "step": 7243 }, { "epoch": 0.979332488381918, "grad_norm": 0.16457578539848328, "learning_rate": 3.701824701494327e-08, "loss": 0.046762943267822266, "step": 7244 }, { "epoch": 0.979467680608365, "grad_norm": 0.2241346538066864, "learning_rate": 3.653924610263703e-08, "loss": 0.055150508880615234, "step": 7245 }, { "epoch": 0.979602872834812, "grad_norm": 0.2701142132282257, "learning_rate": 3.6063360662654255e-08, "loss": 0.05116415023803711, "step": 7246 }, { "epoch": 0.9797380650612589, "grad_norm": 0.3890156149864197, "learning_rate": 3.559059079407734e-08, "loss": 0.05099201202392578, "step": 7247 }, { "epoch": 0.9798732572877059, "grad_norm": 0.39969396591186523, "learning_rate": 3.512093659533922e-08, "loss": 0.08316755294799805, "step": 7248 }, { "epoch": 0.9800084495141529, "grad_norm": 0.33844202756881714, "learning_rate": 3.4654398164225e-08, "loss": 0.058484554290771484, "step": 7249 }, { "epoch": 0.9801436417405999, "grad_norm": 0.29372870922088623, "learning_rate": 3.4190975597870325e-08, "loss": 0.07604849338531494, "step": 7250 }, { "epoch": 0.9802788339670468, "grad_norm": 0.24319076538085938, "learning_rate": 3.373066899276134e-08, "loss": 0.05558300018310547, "step": 7251 }, { "epoch": 0.9804140261934938, "grad_norm": 0.3392714858055115, "learning_rate": 3.3273478444736386e-08, "loss": 0.05737876892089844, "step": 7252 }, { "epoch": 0.9805492184199408, "grad_norm": 0.2648780643939972, "learning_rate": 3.281940404898764e-08, "loss": 0.05582237243652344, "step": 7253 }, { "epoch": 0.9806844106463878, "grad_norm": 0.31967294216156006, "learning_rate": 3.236844590005117e-08, "loss": 0.06507253646850586, "step": 7254 }, { "epoch": 0.9808196028728348, "grad_norm": 0.7116628289222717, "learning_rate": 3.192060409182351e-08, "loss": 0.09038543701171875, "step": 7255 }, { "epoch": 0.9809547950992817, "grad_norm": 0.28085264563560486, "learning_rate": 3.147587871754509e-08, "loss": 0.06172943115234375, "step": 7256 }, { "epoch": 0.9810899873257287, "grad_norm": 0.4066506326198578, "learning_rate": 3.1034269869810174e-08, "loss": 0.06013298034667969, "step": 7257 }, { "epoch": 0.9812251795521757, "grad_norm": 0.47677507996559143, "learning_rate": 3.05957776405652e-08, "loss": 0.06224295496940613, "step": 7258 }, { "epoch": 0.9813603717786227, "grad_norm": 0.2198784202337265, "learning_rate": 3.016040212110549e-08, "loss": 0.04728579521179199, "step": 7259 }, { "epoch": 0.9814955640050697, "grad_norm": 0.7776672840118408, "learning_rate": 2.9728143402078522e-08, "loss": 0.09712910652160645, "step": 7260 }, { "epoch": 0.9816307562315166, "grad_norm": 0.27797162532806396, "learning_rate": 2.9299001573483975e-08, "loss": 0.05040121078491211, "step": 7261 }, { "epoch": 0.9817659484579637, "grad_norm": 0.29053762555122375, "learning_rate": 2.8872976724670375e-08, "loss": 0.07041716575622559, "step": 7262 }, { "epoch": 0.9819011406844107, "grad_norm": 0.24438482522964478, "learning_rate": 2.8450068944338436e-08, "loss": 0.04060029983520508, "step": 7263 }, { "epoch": 0.9820363329108577, "grad_norm": 0.2871078550815582, "learning_rate": 2.803027832054106e-08, "loss": 0.06870365142822266, "step": 7264 }, { "epoch": 0.9821715251373047, "grad_norm": 0.2679024934768677, "learning_rate": 2.7613604940679995e-08, "loss": 0.05549955368041992, "step": 7265 }, { "epoch": 0.9823067173637516, "grad_norm": 0.32359033823013306, "learning_rate": 2.7200048891509176e-08, "loss": 0.06971162557601929, "step": 7266 }, { "epoch": 0.9824419095901986, "grad_norm": 0.35307279229164124, "learning_rate": 2.67896102591314e-08, "loss": 0.06990706920623779, "step": 7267 }, { "epoch": 0.9825771018166456, "grad_norm": 0.2598518431186676, "learning_rate": 2.6382289129004978e-08, "loss": 0.060251712799072266, "step": 7268 }, { "epoch": 0.9827122940430926, "grad_norm": 0.4024988114833832, "learning_rate": 2.5978085585935395e-08, "loss": 0.06672167778015137, "step": 7269 }, { "epoch": 0.9828474862695395, "grad_norm": 0.19556914269924164, "learning_rate": 2.5576999714078676e-08, "loss": 0.05283021926879883, "step": 7270 }, { "epoch": 0.9829826784959865, "grad_norm": 0.12614038586616516, "learning_rate": 2.517903159694468e-08, "loss": 0.03987550735473633, "step": 7271 }, { "epoch": 0.9831178707224335, "grad_norm": 0.49008631706237793, "learning_rate": 2.4784181317390465e-08, "loss": 0.07659709453582764, "step": 7272 }, { "epoch": 0.9832530629488805, "grad_norm": 0.3741593360900879, "learning_rate": 2.4392448957628598e-08, "loss": 0.08503150939941406, "step": 7273 }, { "epoch": 0.9833882551753275, "grad_norm": 0.2883245050907135, "learning_rate": 2.4003834599217177e-08, "loss": 0.06562232971191406, "step": 7274 }, { "epoch": 0.9835234474017744, "grad_norm": 0.20501579344272614, "learning_rate": 2.3618338323071474e-08, "loss": 0.03697967529296875, "step": 7275 }, { "epoch": 0.9836586396282214, "grad_norm": 0.10614482313394547, "learning_rate": 2.3235960209448958e-08, "loss": 0.022292613983154297, "step": 7276 }, { "epoch": 0.9837938318546684, "grad_norm": 0.2681671977043152, "learning_rate": 2.2856700337967606e-08, "loss": 0.06901407241821289, "step": 7277 }, { "epoch": 0.9839290240811154, "grad_norm": 0.35393616557121277, "learning_rate": 2.2480558787587592e-08, "loss": 0.05017447471618652, "step": 7278 }, { "epoch": 0.9840642163075624, "grad_norm": 0.15665088593959808, "learning_rate": 2.2107535636626263e-08, "loss": 0.04813027381896973, "step": 7279 }, { "epoch": 0.9841994085340093, "grad_norm": 0.39157634973526, "learning_rate": 2.1737630962746502e-08, "loss": 0.05547666549682617, "step": 7280 }, { "epoch": 0.9843346007604563, "grad_norm": 0.2564446032047272, "learning_rate": 2.1370844842966696e-08, "loss": 0.051375389099121094, "step": 7281 }, { "epoch": 0.9844697929869033, "grad_norm": 0.1833972930908203, "learning_rate": 2.100717735365243e-08, "loss": 0.03956317901611328, "step": 7282 }, { "epoch": 0.9846049852133503, "grad_norm": 0.19278490543365479, "learning_rate": 2.0646628570521464e-08, "loss": 0.05518698692321777, "step": 7283 }, { "epoch": 0.9847401774397972, "grad_norm": 0.6498562693595886, "learning_rate": 2.028919856864375e-08, "loss": 0.08387279510498047, "step": 7284 }, { "epoch": 0.9848753696662442, "grad_norm": 0.18656525015830994, "learning_rate": 1.9934887422434766e-08, "loss": 0.04199361801147461, "step": 7285 }, { "epoch": 0.9850105618926912, "grad_norm": 0.3669340908527374, "learning_rate": 1.9583695205665496e-08, "loss": 0.07400131225585938, "step": 7286 }, { "epoch": 0.9851457541191382, "grad_norm": 0.16416636109352112, "learning_rate": 1.9235621991457454e-08, "loss": 0.03388023376464844, "step": 7287 }, { "epoch": 0.9852809463455852, "grad_norm": 0.238312765955925, "learning_rate": 1.889066785227933e-08, "loss": 0.05614876747131348, "step": 7288 }, { "epoch": 0.9854161385720321, "grad_norm": 0.333103746175766, "learning_rate": 1.854883285995368e-08, "loss": 0.06418037414550781, "step": 7289 }, { "epoch": 0.9855513307984791, "grad_norm": 0.4479035437107086, "learning_rate": 1.8210117085651902e-08, "loss": 0.07337713241577148, "step": 7290 }, { "epoch": 0.9856865230249261, "grad_norm": 0.21257217228412628, "learning_rate": 1.7874520599894252e-08, "loss": 0.04496932029724121, "step": 7291 }, { "epoch": 0.9858217152513731, "grad_norm": 0.42596161365509033, "learning_rate": 1.7542043472558166e-08, "loss": 0.07498455047607422, "step": 7292 }, { "epoch": 0.98595690747782, "grad_norm": 0.1502128541469574, "learning_rate": 1.7212685772864945e-08, "loss": 0.04128670692443848, "step": 7293 }, { "epoch": 0.986092099704267, "grad_norm": 0.35827237367630005, "learning_rate": 1.68864475693864e-08, "loss": 0.056105613708496094, "step": 7294 }, { "epoch": 0.986227291930714, "grad_norm": 0.4056648910045624, "learning_rate": 1.6563328930051526e-08, "loss": 0.08824682235717773, "step": 7295 }, { "epoch": 0.986362484157161, "grad_norm": 0.26973649859428406, "learning_rate": 1.624332992213151e-08, "loss": 0.06273508071899414, "step": 7296 }, { "epoch": 0.986497676383608, "grad_norm": 0.38383224606513977, "learning_rate": 1.5926450612254728e-08, "loss": 0.0930633544921875, "step": 7297 }, { "epoch": 0.986632868610055, "grad_norm": 0.27624213695526123, "learning_rate": 1.5612691066395068e-08, "loss": 0.06635808944702148, "step": 7298 }, { "epoch": 0.9867680608365019, "grad_norm": 0.1757369339466095, "learning_rate": 1.530205134987861e-08, "loss": 0.03658497333526611, "step": 7299 }, { "epoch": 0.9869032530629489, "grad_norm": 0.27076634764671326, "learning_rate": 1.499453152738528e-08, "loss": 0.0631113052368164, "step": 7300 }, { "epoch": 0.9870384452893959, "grad_norm": 0.3757174015045166, "learning_rate": 1.4690131662938866e-08, "loss": 0.0719306468963623, "step": 7301 }, { "epoch": 0.9871736375158429, "grad_norm": 0.25306931138038635, "learning_rate": 1.438885181991867e-08, "loss": 0.06742358207702637, "step": 7302 }, { "epoch": 0.9873088297422898, "grad_norm": 0.21392711997032166, "learning_rate": 1.4090692061052846e-08, "loss": 0.04740428924560547, "step": 7303 }, { "epoch": 0.9874440219687368, "grad_norm": 0.4029611051082611, "learning_rate": 1.3795652448420071e-08, "loss": 0.06776756048202515, "step": 7304 }, { "epoch": 0.9875792141951838, "grad_norm": 0.3412213623523712, "learning_rate": 1.3503733043447874e-08, "loss": 0.04248058795928955, "step": 7305 }, { "epoch": 0.9877144064216308, "grad_norm": 0.5584572553634644, "learning_rate": 1.3214933906915971e-08, "loss": 0.08418989181518555, "step": 7306 }, { "epoch": 0.9878495986480778, "grad_norm": 0.20628269016742706, "learning_rate": 1.2929255098954596e-08, "loss": 0.05633211135864258, "step": 7307 }, { "epoch": 0.9879847908745247, "grad_norm": 0.1590343862771988, "learning_rate": 1.2646696679042835e-08, "loss": 0.040009498596191406, "step": 7308 }, { "epoch": 0.9881199831009717, "grad_norm": 0.28296276926994324, "learning_rate": 1.2367258706010298e-08, "loss": 0.07425785064697266, "step": 7309 }, { "epoch": 0.9882551753274187, "grad_norm": 0.27983108162879944, "learning_rate": 1.2090941238040443e-08, "loss": 0.05311179161071777, "step": 7310 }, { "epoch": 0.9883903675538657, "grad_norm": 0.4429514706134796, "learning_rate": 1.1817744332660584e-08, "loss": 0.08093523979187012, "step": 7311 }, { "epoch": 0.9885255597803126, "grad_norm": 0.41722482442855835, "learning_rate": 1.1547668046751891e-08, "loss": 0.06028485298156738, "step": 7312 }, { "epoch": 0.9886607520067596, "grad_norm": 0.5115593671798706, "learning_rate": 1.1280712436549379e-08, "loss": 0.0912933349609375, "step": 7313 }, { "epoch": 0.9887959442332066, "grad_norm": 0.4645446538925171, "learning_rate": 1.1016877557630257e-08, "loss": 0.056313514709472656, "step": 7314 }, { "epoch": 0.9889311364596536, "grad_norm": 0.31187713146209717, "learning_rate": 1.0756163464928915e-08, "loss": 0.05369257926940918, "step": 7315 }, { "epoch": 0.9890663286861006, "grad_norm": 0.1749580204486847, "learning_rate": 1.0498570212726932e-08, "loss": 0.03262472152709961, "step": 7316 }, { "epoch": 0.9892015209125475, "grad_norm": 0.27190452814102173, "learning_rate": 1.024409785465641e-08, "loss": 0.07194137573242188, "step": 7317 }, { "epoch": 0.9893367131389945, "grad_norm": 0.27393752336502075, "learning_rate": 9.992746443699962e-09, "loss": 0.057901859283447266, "step": 7318 }, { "epoch": 0.9894719053654415, "grad_norm": 0.29374632239341736, "learning_rate": 9.744516032190731e-09, "loss": 0.05480337142944336, "step": 7319 }, { "epoch": 0.9896070975918885, "grad_norm": 0.204203262925148, "learning_rate": 9.499406671809041e-09, "loss": 0.045647382736206055, "step": 7320 }, { "epoch": 0.9897422898183355, "grad_norm": 0.3786998987197876, "learning_rate": 9.2574184135924e-09, "loss": 0.06525158882141113, "step": 7321 }, { "epoch": 0.9898774820447824, "grad_norm": 0.28615784645080566, "learning_rate": 9.018551307920508e-09, "loss": 0.06687963008880615, "step": 7322 }, { "epoch": 0.9900126742712294, "grad_norm": 0.33808377385139465, "learning_rate": 8.782805404526917e-09, "loss": 0.059148550033569336, "step": 7323 }, { "epoch": 0.9901478664976764, "grad_norm": 0.2744404077529907, "learning_rate": 8.55018075249736e-09, "loss": 0.05714988708496094, "step": 7324 }, { "epoch": 0.9902830587241234, "grad_norm": 0.18194349110126495, "learning_rate": 8.320677400264764e-09, "loss": 0.041713714599609375, "step": 7325 }, { "epoch": 0.9904182509505703, "grad_norm": 0.29345688223838806, "learning_rate": 8.094295395610906e-09, "loss": 0.05753040313720703, "step": 7326 }, { "epoch": 0.9905534431770173, "grad_norm": 0.1978025585412979, "learning_rate": 7.87103478567308e-09, "loss": 0.05142700672149658, "step": 7327 }, { "epoch": 0.9906886354034643, "grad_norm": 0.33782291412353516, "learning_rate": 7.65089561693244e-09, "loss": 0.06603479385375977, "step": 7328 }, { "epoch": 0.9908238276299113, "grad_norm": 0.31460145115852356, "learning_rate": 7.433877935225652e-09, "loss": 0.08867168426513672, "step": 7329 }, { "epoch": 0.9909590198563583, "grad_norm": 0.5301055908203125, "learning_rate": 7.219981785733243e-09, "loss": 0.10008573532104492, "step": 7330 }, { "epoch": 0.9910942120828052, "grad_norm": 0.27034464478492737, "learning_rate": 7.009207212992919e-09, "loss": 0.05199456214904785, "step": 7331 }, { "epoch": 0.9912294043092522, "grad_norm": 0.3467728793621063, "learning_rate": 6.801554260889575e-09, "loss": 0.06944608688354492, "step": 7332 }, { "epoch": 0.9913645965356992, "grad_norm": 0.23734015226364136, "learning_rate": 6.5970229726552976e-09, "loss": 0.061892032623291016, "step": 7333 }, { "epoch": 0.9914997887621462, "grad_norm": 0.2939838767051697, "learning_rate": 6.3956133908743556e-09, "loss": 0.06702494621276855, "step": 7334 }, { "epoch": 0.9916349809885932, "grad_norm": 0.46972307562828064, "learning_rate": 6.197325557483202e-09, "loss": 0.08815503120422363, "step": 7335 }, { "epoch": 0.9917701732150401, "grad_norm": 0.3913094401359558, "learning_rate": 6.002159513765482e-09, "loss": 0.07429647445678711, "step": 7336 }, { "epoch": 0.9919053654414871, "grad_norm": 0.25788867473602295, "learning_rate": 5.810115300355357e-09, "loss": 0.05698126554489136, "step": 7337 }, { "epoch": 0.9920405576679341, "grad_norm": 0.29297149181365967, "learning_rate": 5.621192957239174e-09, "loss": 0.06451702117919922, "step": 7338 }, { "epoch": 0.9921757498943811, "grad_norm": 0.20773686468601227, "learning_rate": 5.435392523748806e-09, "loss": 0.06315559148788452, "step": 7339 }, { "epoch": 0.992310942120828, "grad_norm": 0.2527959644794464, "learning_rate": 5.252714038571638e-09, "loss": 0.0677328109741211, "step": 7340 }, { "epoch": 0.992446134347275, "grad_norm": 0.24812406301498413, "learning_rate": 5.073157539742246e-09, "loss": 0.06432628631591797, "step": 7341 }, { "epoch": 0.992581326573722, "grad_norm": 0.22527673840522766, "learning_rate": 4.896723064642394e-09, "loss": 0.05236053466796875, "step": 7342 }, { "epoch": 0.992716518800169, "grad_norm": 0.7520393133163452, "learning_rate": 4.723410650009363e-09, "loss": 0.07049024105072021, "step": 7343 }, { "epoch": 0.992851711026616, "grad_norm": 0.2114769071340561, "learning_rate": 4.553220331925956e-09, "loss": 0.047716617584228516, "step": 7344 }, { "epoch": 0.9929869032530629, "grad_norm": 0.21498046815395355, "learning_rate": 4.38615214582716e-09, "loss": 0.05808830261230469, "step": 7345 }, { "epoch": 0.9931220954795099, "grad_norm": 0.42126530408859253, "learning_rate": 4.2222061265001496e-09, "loss": 0.04510009288787842, "step": 7346 }, { "epoch": 0.9932572877059569, "grad_norm": 0.23278068006038666, "learning_rate": 4.0613823080742905e-09, "loss": 0.04508709907531738, "step": 7347 }, { "epoch": 0.9933924799324039, "grad_norm": 0.4372054934501648, "learning_rate": 3.903680724037795e-09, "loss": 0.06105327606201172, "step": 7348 }, { "epoch": 0.9935276721588508, "grad_norm": 0.15601420402526855, "learning_rate": 3.749101407224398e-09, "loss": 0.03406357765197754, "step": 7349 }, { "epoch": 0.9936628643852978, "grad_norm": 0.2650086283683777, "learning_rate": 3.597644389818355e-09, "loss": 0.0564914345741272, "step": 7350 }, { "epoch": 0.9937980566117448, "grad_norm": 0.24166135489940643, "learning_rate": 3.4493097033527767e-09, "loss": 0.055477142333984375, "step": 7351 }, { "epoch": 0.9939332488381918, "grad_norm": 0.3690648078918457, "learning_rate": 3.3040973787112904e-09, "loss": 0.0753936767578125, "step": 7352 }, { "epoch": 0.9940684410646388, "grad_norm": 0.2443806380033493, "learning_rate": 3.162007446129711e-09, "loss": 0.05772757530212402, "step": 7353 }, { "epoch": 0.9942036332910857, "grad_norm": 0.29930561780929565, "learning_rate": 3.023039935191041e-09, "loss": 0.05570673942565918, "step": 7354 }, { "epoch": 0.9943388255175327, "grad_norm": 0.3058774173259735, "learning_rate": 2.887194874830468e-09, "loss": 0.07515096664428711, "step": 7355 }, { "epoch": 0.9944740177439797, "grad_norm": 0.21760514378547668, "learning_rate": 2.7544722933287026e-09, "loss": 0.051554203033447266, "step": 7356 }, { "epoch": 0.9946092099704267, "grad_norm": 0.3237847685813904, "learning_rate": 2.6248722183203066e-09, "loss": 0.06708765029907227, "step": 7357 }, { "epoch": 0.9947444021968737, "grad_norm": 0.24604564905166626, "learning_rate": 2.498394676790361e-09, "loss": 0.04879283905029297, "step": 7358 }, { "epoch": 0.9948795944233206, "grad_norm": 0.43226322531700134, "learning_rate": 2.375039695071135e-09, "loss": 0.08555889129638672, "step": 7359 }, { "epoch": 0.9950147866497676, "grad_norm": 0.3227103352546692, "learning_rate": 2.2548072988454184e-09, "loss": 0.07320523262023926, "step": 7360 }, { "epoch": 0.9951499788762146, "grad_norm": 0.5129017233848572, "learning_rate": 2.1376975131465194e-09, "loss": 0.09375405311584473, "step": 7361 }, { "epoch": 0.9952851711026616, "grad_norm": 0.35747861862182617, "learning_rate": 2.023710362356601e-09, "loss": 0.06770801544189453, "step": 7362 }, { "epoch": 0.9954203633291085, "grad_norm": 0.2408422976732254, "learning_rate": 1.9128458702100117e-09, "loss": 0.037334144115448, "step": 7363 }, { "epoch": 0.9955555555555555, "grad_norm": 0.3053073287010193, "learning_rate": 1.8051040597882873e-09, "loss": 0.05077546834945679, "step": 7364 }, { "epoch": 0.9956907477820025, "grad_norm": 0.23652294278144836, "learning_rate": 1.70048495352515e-09, "loss": 0.04577064514160156, "step": 7365 }, { "epoch": 0.9958259400084495, "grad_norm": 0.3810080885887146, "learning_rate": 1.5989885731998443e-09, "loss": 0.07359600067138672, "step": 7366 }, { "epoch": 0.9959611322348965, "grad_norm": 0.2785953879356384, "learning_rate": 1.5006149399487966e-09, "loss": 0.05605125427246094, "step": 7367 }, { "epoch": 0.9960963244613434, "grad_norm": 0.30559104681015015, "learning_rate": 1.4053640742489604e-09, "loss": 0.049391746520996094, "step": 7368 }, { "epoch": 0.9962315166877904, "grad_norm": 0.21644267439842224, "learning_rate": 1.3132359959361351e-09, "loss": 0.04492950439453125, "step": 7369 }, { "epoch": 0.9963667089142374, "grad_norm": 0.15193523466587067, "learning_rate": 1.2242307241899787e-09, "loss": 0.0377657413482666, "step": 7370 }, { "epoch": 0.9965019011406844, "grad_norm": 0.6023728847503662, "learning_rate": 1.1383482775406685e-09, "loss": 0.07380247116088867, "step": 7371 }, { "epoch": 0.9966370933671314, "grad_norm": 0.2014322578907013, "learning_rate": 1.0555886738738973e-09, "loss": 0.04391944408416748, "step": 7372 }, { "epoch": 0.9967722855935783, "grad_norm": 0.2580380439758301, "learning_rate": 9.75951930415886e-10, "loss": 0.06332147121429443, "step": 7373 }, { "epoch": 0.9969074778200253, "grad_norm": 0.2405771017074585, "learning_rate": 8.994380637483701e-10, "loss": 0.05062103271484375, "step": 7374 }, { "epoch": 0.9970426700464723, "grad_norm": 0.25464409589767456, "learning_rate": 8.260470898036054e-10, "loss": 0.0672445297241211, "step": 7375 }, { "epoch": 0.9971778622729193, "grad_norm": 0.24049629271030426, "learning_rate": 7.557790238627016e-10, "loss": 0.03889155387878418, "step": 7376 }, { "epoch": 0.9973130544993662, "grad_norm": 0.378265380859375, "learning_rate": 6.886338805522918e-10, "loss": 0.06474447250366211, "step": 7377 }, { "epoch": 0.9974482467258132, "grad_norm": 0.35563474893569946, "learning_rate": 6.246116738561903e-10, "loss": 0.06737220287322998, "step": 7378 }, { "epoch": 0.9975834389522602, "grad_norm": 0.40668416023254395, "learning_rate": 5.637124171004038e-10, "loss": 0.08114171028137207, "step": 7379 }, { "epoch": 0.9977186311787072, "grad_norm": 0.2842503786087036, "learning_rate": 5.059361229681203e-10, "loss": 0.06357669830322266, "step": 7380 }, { "epoch": 0.9978538234051542, "grad_norm": 0.30374807119369507, "learning_rate": 4.5128280348638583e-10, "loss": 0.07045936584472656, "step": 7381 }, { "epoch": 0.9979890156316011, "grad_norm": 0.4822167158126831, "learning_rate": 3.9975247003443127e-10, "loss": 0.08537006378173828, "step": 7382 }, { "epoch": 0.9981242078580481, "grad_norm": 0.3208787739276886, "learning_rate": 3.51345133342007e-10, "loss": 0.05112278461456299, "step": 7383 }, { "epoch": 0.9982594000844951, "grad_norm": 0.43507885932922363, "learning_rate": 3.060608034877177e-10, "loss": 0.05134916305541992, "step": 7384 }, { "epoch": 0.9983945923109421, "grad_norm": 0.32505616545677185, "learning_rate": 2.638994898990221e-10, "loss": 0.07700061798095703, "step": 7385 }, { "epoch": 0.998529784537389, "grad_norm": 0.22194541990756989, "learning_rate": 2.2486120135556398e-10, "loss": 0.05081915855407715, "step": 7386 }, { "epoch": 0.998664976763836, "grad_norm": 0.35013654828071594, "learning_rate": 1.889459459841758e-10, "loss": 0.060456275939941406, "step": 7387 }, { "epoch": 0.998800168990283, "grad_norm": 0.26419302821159363, "learning_rate": 1.56153731263875e-10, "loss": 0.04935884475708008, "step": 7388 }, { "epoch": 0.99893536121673, "grad_norm": 0.28081369400024414, "learning_rate": 1.2648456402086784e-10, "loss": 0.055562734603881836, "step": 7389 }, { "epoch": 0.999070553443177, "grad_norm": 0.252122163772583, "learning_rate": 9.99384504318801e-11, "loss": 0.05839800834655762, "step": 7390 }, { "epoch": 0.999205745669624, "grad_norm": 0.4285535514354706, "learning_rate": 7.651539602582247e-11, "loss": 0.055081844329833984, "step": 7391 }, { "epoch": 0.9993409378960709, "grad_norm": 0.3640401363372803, "learning_rate": 5.6215405678794464e-11, "loss": 0.07813167572021484, "step": 7392 }, { "epoch": 0.9994761301225179, "grad_norm": 0.3815842270851135, "learning_rate": 3.9038483615749795e-11, "loss": 0.0639352798461914, "step": 7393 }, { "epoch": 0.9996113223489649, "grad_norm": 0.22268462181091309, "learning_rate": 2.4984633415492398e-11, "loss": 0.05288577079772949, "step": 7394 }, { "epoch": 0.9997465145754119, "grad_norm": 0.2326449155807495, "learning_rate": 1.4053858004015041e-11, "loss": 0.058682918548583984, "step": 7395 }, { "epoch": 0.9998817068018588, "grad_norm": 0.460287481546402, "learning_rate": 6.246159654499373e-12, "loss": 0.0901026725769043, "step": 7396 }, { "epoch": 1.0, "grad_norm": 0.29362422227859497, "learning_rate": 1.561539995642569e-12, "loss": 0.06835460662841797, "step": 7397 }, { "epoch": 1.0, "step": 7397, "total_flos": 1.847331328879619e+20, "train_loss": 0.0730548164313333, "train_runtime": 153923.9428, "train_samples_per_second": 12.302, "train_steps_per_second": 0.048 } ], "logging_steps": 1.0, "max_steps": 7397, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.847331328879619e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }