{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7397, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013519679583593868, "grad_norm": 10.255340576171875, "learning_rate": 0.0, "loss": 0.69140625, "step": 1 }, { "epoch": 0.00027039359167187736, "grad_norm": 10.892648696899414, "learning_rate": 5.859375e-08, "loss": 0.69140625, "step": 2 }, { "epoch": 0.0004055903875078161, "grad_norm": 10.60339641571045, "learning_rate": 1.171875e-07, "loss": 0.69140625, "step": 3 }, { "epoch": 0.0005407871833437547, "grad_norm": 10.201833724975586, "learning_rate": 1.7578125e-07, "loss": 0.69140625, "step": 4 }, { "epoch": 0.0006759839791796934, "grad_norm": 10.66333293914795, "learning_rate": 2.34375e-07, "loss": 0.69140625, "step": 5 }, { "epoch": 0.0008111807750156322, "grad_norm": 10.464648246765137, "learning_rate": 2.9296875000000003e-07, "loss": 0.69140625, "step": 6 }, { "epoch": 0.0009463775708515709, "grad_norm": 10.621415138244629, "learning_rate": 3.515625e-07, "loss": 0.69140625, "step": 7 }, { "epoch": 0.0010815743666875094, "grad_norm": 10.700018882751465, "learning_rate": 4.1015625e-07, "loss": 0.69140625, "step": 8 }, { "epoch": 0.0012167711625234482, "grad_norm": 10.476875305175781, "learning_rate": 4.6875e-07, "loss": 0.69140625, "step": 9 }, { "epoch": 0.0013519679583593868, "grad_norm": 10.614548683166504, "learning_rate": 5.2734375e-07, "loss": 0.69140625, "step": 10 }, { "epoch": 0.0014871647541953256, "grad_norm": 10.9356050491333, "learning_rate": 5.859375000000001e-07, "loss": 0.69140625, "step": 11 }, { "epoch": 0.0016223615500312644, "grad_norm": 11.010387420654297, "learning_rate": 6.4453125e-07, "loss": 0.69140625, "step": 12 }, { "epoch": 0.001757558345867203, "grad_norm": 10.474565505981445, "learning_rate": 7.03125e-07, "loss": 0.6888427734375, "step": 13 }, { "epoch": 0.0018927551417031417, "grad_norm": 11.14912223815918, "learning_rate": 7.6171875e-07, "loss": 0.68798828125, "step": 14 }, { "epoch": 0.0020279519375390803, "grad_norm": 10.89780330657959, "learning_rate": 8.203125e-07, "loss": 0.68792724609375, "step": 15 }, { "epoch": 0.002163148733375019, "grad_norm": 10.518672943115234, "learning_rate": 8.7890625e-07, "loss": 0.68798828125, "step": 16 }, { "epoch": 0.002298345529210958, "grad_norm": 10.299038887023926, "learning_rate": 9.375e-07, "loss": 0.6878662109375, "step": 17 }, { "epoch": 0.0024335423250468964, "grad_norm": 10.66930866241455, "learning_rate": 9.9609375e-07, "loss": 0.683837890625, "step": 18 }, { "epoch": 0.002568739120882835, "grad_norm": 10.663459777832031, "learning_rate": 1.0546875e-06, "loss": 0.68377685546875, "step": 19 }, { "epoch": 0.0027039359167187736, "grad_norm": 10.752281188964844, "learning_rate": 1.11328125e-06, "loss": 0.68316650390625, "step": 20 }, { "epoch": 0.0028391327125547126, "grad_norm": 10.501089096069336, "learning_rate": 1.1718750000000001e-06, "loss": 0.68310546875, "step": 21 }, { "epoch": 0.002974329508390651, "grad_norm": 10.665032386779785, "learning_rate": 1.23046875e-06, "loss": 0.68096923828125, "step": 22 }, { "epoch": 0.0031095263042265897, "grad_norm": 9.829651832580566, "learning_rate": 1.2890625e-06, "loss": 0.68170166015625, "step": 23 }, { "epoch": 0.0032447231000625287, "grad_norm": 10.657003402709961, "learning_rate": 1.34765625e-06, "loss": 0.68011474609375, "step": 24 }, { "epoch": 0.0033799198958984673, "grad_norm": 10.138802528381348, "learning_rate": 1.40625e-06, "loss": 0.66058349609375, "step": 25 }, { "epoch": 0.003515116691734406, "grad_norm": 9.759446144104004, "learning_rate": 1.46484375e-06, "loss": 0.66119384765625, "step": 26 }, { "epoch": 0.0036503134875703444, "grad_norm": 10.706598281860352, "learning_rate": 1.5234375e-06, "loss": 0.65679931640625, "step": 27 }, { "epoch": 0.0037855102834062834, "grad_norm": 9.85043716430664, "learning_rate": 1.5820312500000001e-06, "loss": 0.65850830078125, "step": 28 }, { "epoch": 0.003920707079242222, "grad_norm": 9.268957138061523, "learning_rate": 1.640625e-06, "loss": 0.6593017578125, "step": 29 }, { "epoch": 0.004055903875078161, "grad_norm": 9.965956687927246, "learning_rate": 1.69921875e-06, "loss": 0.6513671875, "step": 30 }, { "epoch": 0.004191100670914099, "grad_norm": 9.81229305267334, "learning_rate": 1.7578125e-06, "loss": 0.65045166015625, "step": 31 }, { "epoch": 0.004326297466750038, "grad_norm": 10.022581100463867, "learning_rate": 1.81640625e-06, "loss": 0.6463623046875, "step": 32 }, { "epoch": 0.004461494262585977, "grad_norm": 9.77592658996582, "learning_rate": 1.875e-06, "loss": 0.63909912109375, "step": 33 }, { "epoch": 0.004596691058421916, "grad_norm": 9.745993614196777, "learning_rate": 1.93359375e-06, "loss": 0.61114501953125, "step": 34 }, { "epoch": 0.004731887854257854, "grad_norm": 9.962318420410156, "learning_rate": 1.9921875e-06, "loss": 0.596435546875, "step": 35 }, { "epoch": 0.004867084650093793, "grad_norm": 9.316697120666504, "learning_rate": 2.05078125e-06, "loss": 0.59722900390625, "step": 36 }, { "epoch": 0.005002281445929731, "grad_norm": 10.021162986755371, "learning_rate": 2.109375e-06, "loss": 0.583251953125, "step": 37 }, { "epoch": 0.00513747824176567, "grad_norm": 8.996946334838867, "learning_rate": 2.16796875e-06, "loss": 0.58868408203125, "step": 38 }, { "epoch": 0.0052726750376016086, "grad_norm": 9.7518310546875, "learning_rate": 2.2265625e-06, "loss": 0.57568359375, "step": 39 }, { "epoch": 0.005407871833437547, "grad_norm": 8.269822120666504, "learning_rate": 2.28515625e-06, "loss": 0.5845947265625, "step": 40 }, { "epoch": 0.005543068629273487, "grad_norm": 8.733393669128418, "learning_rate": 2.3437500000000002e-06, "loss": 0.5721435546875, "step": 41 }, { "epoch": 0.005678265425109425, "grad_norm": 8.683788299560547, "learning_rate": 2.40234375e-06, "loss": 0.56158447265625, "step": 42 }, { "epoch": 0.005813462220945364, "grad_norm": 8.420989036560059, "learning_rate": 2.4609375e-06, "loss": 0.55609130859375, "step": 43 }, { "epoch": 0.005948659016781302, "grad_norm": 9.148038864135742, "learning_rate": 2.5195312500000003e-06, "loss": 0.53692626953125, "step": 44 }, { "epoch": 0.006083855812617241, "grad_norm": 8.447755813598633, "learning_rate": 2.578125e-06, "loss": 0.5377197265625, "step": 45 }, { "epoch": 0.006219052608453179, "grad_norm": 8.374534606933594, "learning_rate": 2.63671875e-06, "loss": 0.531005859375, "step": 46 }, { "epoch": 0.006354249404289118, "grad_norm": 8.3541841506958, "learning_rate": 2.6953125e-06, "loss": 0.51959228515625, "step": 47 }, { "epoch": 0.006489446200125057, "grad_norm": 8.071650505065918, "learning_rate": 2.75390625e-06, "loss": 0.51409912109375, "step": 48 }, { "epoch": 0.006624642995960996, "grad_norm": 8.372121810913086, "learning_rate": 2.8125e-06, "loss": 0.49664306640625, "step": 49 }, { "epoch": 0.006759839791796935, "grad_norm": 7.649506568908691, "learning_rate": 2.87109375e-06, "loss": 0.501007080078125, "step": 50 }, { "epoch": 0.006895036587632873, "grad_norm": 7.1636643409729, "learning_rate": 2.9296875e-06, "loss": 0.4979248046875, "step": 51 }, { "epoch": 0.007030233383468812, "grad_norm": 8.19225788116455, "learning_rate": 2.9882812500000002e-06, "loss": 0.463165283203125, "step": 52 }, { "epoch": 0.00716543017930475, "grad_norm": 7.3633198738098145, "learning_rate": 3.046875e-06, "loss": 0.46734619140625, "step": 53 }, { "epoch": 0.007300626975140689, "grad_norm": 6.5898332595825195, "learning_rate": 3.10546875e-06, "loss": 0.47491455078125, "step": 54 }, { "epoch": 0.007435823770976628, "grad_norm": 7.3989129066467285, "learning_rate": 3.1640625000000003e-06, "loss": 0.4415283203125, "step": 55 }, { "epoch": 0.007571020566812567, "grad_norm": 7.007622241973877, "learning_rate": 3.22265625e-06, "loss": 0.43731689453125, "step": 56 }, { "epoch": 0.007706217362648505, "grad_norm": 6.6982245445251465, "learning_rate": 3.28125e-06, "loss": 0.43072509765625, "step": 57 }, { "epoch": 0.007841414158484444, "grad_norm": 6.32157039642334, "learning_rate": 3.3398437500000003e-06, "loss": 0.426788330078125, "step": 58 }, { "epoch": 0.007976610954320383, "grad_norm": 6.236541748046875, "learning_rate": 3.3984375e-06, "loss": 0.415924072265625, "step": 59 }, { "epoch": 0.008111807750156321, "grad_norm": 5.945745468139648, "learning_rate": 3.45703125e-06, "loss": 0.411834716796875, "step": 60 }, { "epoch": 0.00824700454599226, "grad_norm": 6.02635383605957, "learning_rate": 3.515625e-06, "loss": 0.395233154296875, "step": 61 }, { "epoch": 0.008382201341828198, "grad_norm": 5.314901351928711, "learning_rate": 3.57421875e-06, "loss": 0.409942626953125, "step": 62 }, { "epoch": 0.008517398137664138, "grad_norm": 6.0076003074646, "learning_rate": 3.6328125e-06, "loss": 0.370849609375, "step": 63 }, { "epoch": 0.008652594933500075, "grad_norm": 5.681765556335449, "learning_rate": 3.69140625e-06, "loss": 0.368499755859375, "step": 64 }, { "epoch": 0.008787791729336015, "grad_norm": 5.089432239532471, "learning_rate": 3.75e-06, "loss": 0.378570556640625, "step": 65 }, { "epoch": 0.008922988525171954, "grad_norm": 5.389379978179932, "learning_rate": 3.8085937500000002e-06, "loss": 0.35345458984375, "step": 66 }, { "epoch": 0.009058185321007892, "grad_norm": 4.493364334106445, "learning_rate": 3.8671875e-06, "loss": 0.376190185546875, "step": 67 }, { "epoch": 0.009193382116843831, "grad_norm": 4.8557562828063965, "learning_rate": 3.92578125e-06, "loss": 0.345947265625, "step": 68 }, { "epoch": 0.009328578912679769, "grad_norm": 3.7139086723327637, "learning_rate": 3.984375e-06, "loss": 0.381378173828125, "step": 69 }, { "epoch": 0.009463775708515709, "grad_norm": 4.213521480560303, "learning_rate": 4.0429687500000004e-06, "loss": 0.346466064453125, "step": 70 }, { "epoch": 0.009598972504351646, "grad_norm": 3.739476442337036, "learning_rate": 4.1015625e-06, "loss": 0.356231689453125, "step": 71 }, { "epoch": 0.009734169300187586, "grad_norm": 4.2015557289123535, "learning_rate": 4.16015625e-06, "loss": 0.319122314453125, "step": 72 }, { "epoch": 0.009869366096023523, "grad_norm": 3.780320882797241, "learning_rate": 4.21875e-06, "loss": 0.3252105712890625, "step": 73 }, { "epoch": 0.010004562891859463, "grad_norm": 3.6442320346832275, "learning_rate": 4.27734375e-06, "loss": 0.3150482177734375, "step": 74 }, { "epoch": 0.010139759687695402, "grad_norm": 3.2569968700408936, "learning_rate": 4.3359375e-06, "loss": 0.3176116943359375, "step": 75 }, { "epoch": 0.01027495648353134, "grad_norm": 3.287576675415039, "learning_rate": 4.3945312500000005e-06, "loss": 0.3000030517578125, "step": 76 }, { "epoch": 0.01041015327936728, "grad_norm": 3.3834593296051025, "learning_rate": 4.453125e-06, "loss": 0.2772979736328125, "step": 77 }, { "epoch": 0.010545350075203217, "grad_norm": 2.0733957290649414, "learning_rate": 4.51171875e-06, "loss": 0.3407135009765625, "step": 78 }, { "epoch": 0.010680546871039157, "grad_norm": 3.333418369293213, "learning_rate": 4.5703125e-06, "loss": 0.246673583984375, "step": 79 }, { "epoch": 0.010815743666875094, "grad_norm": 3.1509034633636475, "learning_rate": 4.62890625e-06, "loss": 0.2439117431640625, "step": 80 }, { "epoch": 0.010950940462711034, "grad_norm": 2.4727230072021484, "learning_rate": 4.6875000000000004e-06, "loss": 0.2735137939453125, "step": 81 }, { "epoch": 0.011086137258546973, "grad_norm": 2.4484498500823975, "learning_rate": 4.74609375e-06, "loss": 0.259796142578125, "step": 82 }, { "epoch": 0.01122133405438291, "grad_norm": 2.03958797454834, "learning_rate": 4.8046875e-06, "loss": 0.2721710205078125, "step": 83 }, { "epoch": 0.01135653085021885, "grad_norm": 1.9284204244613647, "learning_rate": 4.86328125e-06, "loss": 0.266845703125, "step": 84 }, { "epoch": 0.011491727646054788, "grad_norm": 2.418253183364868, "learning_rate": 4.921875e-06, "loss": 0.216827392578125, "step": 85 }, { "epoch": 0.011626924441890727, "grad_norm": 1.4608980417251587, "learning_rate": 4.98046875e-06, "loss": 0.2739410400390625, "step": 86 }, { "epoch": 0.011762121237726665, "grad_norm": 1.9801719188690186, "learning_rate": 5.0390625000000005e-06, "loss": 0.2206573486328125, "step": 87 }, { "epoch": 0.011897318033562605, "grad_norm": 1.5869640111923218, "learning_rate": 5.09765625e-06, "loss": 0.2376556396484375, "step": 88 }, { "epoch": 0.012032514829398544, "grad_norm": 1.680525302886963, "learning_rate": 5.15625e-06, "loss": 0.215789794921875, "step": 89 }, { "epoch": 0.012167711625234482, "grad_norm": 0.4875149726867676, "learning_rate": 5.21484375e-06, "loss": 0.301513671875, "step": 90 }, { "epoch": 0.012302908421070421, "grad_norm": 0.4136301875114441, "learning_rate": 5.2734375e-06, "loss": 0.29526519775390625, "step": 91 }, { "epoch": 0.012438105216906359, "grad_norm": 1.1960359811782837, "learning_rate": 5.3320312500000004e-06, "loss": 0.219207763671875, "step": 92 }, { "epoch": 0.012573302012742298, "grad_norm": 0.6939961314201355, "learning_rate": 5.390625e-06, "loss": 0.25130462646484375, "step": 93 }, { "epoch": 0.012708498808578236, "grad_norm": 0.48938074707984924, "learning_rate": 5.44921875e-06, "loss": 0.25917816162109375, "step": 94 }, { "epoch": 0.012843695604414175, "grad_norm": 1.3375189304351807, "learning_rate": 5.5078125e-06, "loss": 0.17076873779296875, "step": 95 }, { "epoch": 0.012978892400250115, "grad_norm": 0.21868669986724854, "learning_rate": 5.56640625e-06, "loss": 0.26607513427734375, "step": 96 }, { "epoch": 0.013114089196086053, "grad_norm": 0.37352919578552246, "learning_rate": 5.625e-06, "loss": 0.243377685546875, "step": 97 }, { "epoch": 0.013249285991921992, "grad_norm": 0.1982581615447998, "learning_rate": 5.6835937500000005e-06, "loss": 0.25643157958984375, "step": 98 }, { "epoch": 0.01338448278775793, "grad_norm": 0.8889334201812744, "learning_rate": 5.7421875e-06, "loss": 0.18682861328125, "step": 99 }, { "epoch": 0.01351967958359387, "grad_norm": 0.14042750000953674, "learning_rate": 5.80078125e-06, "loss": 0.2624664306640625, "step": 100 }, { "epoch": 0.013654876379429807, "grad_norm": 0.462993860244751, "learning_rate": 5.859375e-06, "loss": 0.21479034423828125, "step": 101 }, { "epoch": 0.013790073175265746, "grad_norm": 0.6529134511947632, "learning_rate": 5.91796875e-06, "loss": 0.191650390625, "step": 102 }, { "epoch": 0.013925269971101686, "grad_norm": 1.1363776922225952, "learning_rate": 5.9765625000000004e-06, "loss": 0.1736297607421875, "step": 103 }, { "epoch": 0.014060466766937623, "grad_norm": 0.6305656433105469, "learning_rate": 6.03515625e-06, "loss": 0.245880126953125, "step": 104 }, { "epoch": 0.014195663562773563, "grad_norm": 1.309865117073059, "learning_rate": 6.09375e-06, "loss": 0.16274261474609375, "step": 105 }, { "epoch": 0.0143308603586095, "grad_norm": 7.6630096435546875, "learning_rate": 6.15234375e-06, "loss": 0.1968994140625, "step": 106 }, { "epoch": 0.01446605715444544, "grad_norm": 6.000809192657471, "learning_rate": 6.2109375e-06, "loss": 0.23690032958984375, "step": 107 }, { "epoch": 0.014601253950281378, "grad_norm": 1.0207101106643677, "learning_rate": 6.26953125e-06, "loss": 0.21165084838867188, "step": 108 }, { "epoch": 0.014736450746117317, "grad_norm": 1.7974658012390137, "learning_rate": 6.3281250000000005e-06, "loss": 0.20142364501953125, "step": 109 }, { "epoch": 0.014871647541953257, "grad_norm": 1.8257349729537964, "learning_rate": 6.38671875e-06, "loss": 0.13892745971679688, "step": 110 }, { "epoch": 0.015006844337789194, "grad_norm": 1.1191425323486328, "learning_rate": 6.4453125e-06, "loss": 0.18349456787109375, "step": 111 }, { "epoch": 0.015142041133625134, "grad_norm": 1.1117126941680908, "learning_rate": 6.50390625e-06, "loss": 0.17523574829101562, "step": 112 }, { "epoch": 0.015277237929461071, "grad_norm": 1.6576142311096191, "learning_rate": 6.5625e-06, "loss": 0.21729660034179688, "step": 113 }, { "epoch": 0.01541243472529701, "grad_norm": 1.931712031364441, "learning_rate": 6.6210937500000004e-06, "loss": 0.16382980346679688, "step": 114 }, { "epoch": 0.015547631521132949, "grad_norm": 2.039163827896118, "learning_rate": 6.679687500000001e-06, "loss": 0.20510101318359375, "step": 115 }, { "epoch": 0.015682828316968888, "grad_norm": 0.7490442991256714, "learning_rate": 6.73828125e-06, "loss": 0.14907455444335938, "step": 116 }, { "epoch": 0.015818025112804827, "grad_norm": 1.7908422946929932, "learning_rate": 6.796875e-06, "loss": 0.14217758178710938, "step": 117 }, { "epoch": 0.015953221908640767, "grad_norm": 2.6014528274536133, "learning_rate": 6.85546875e-06, "loss": 0.17675018310546875, "step": 118 }, { "epoch": 0.016088418704476703, "grad_norm": 1.5167361497879028, "learning_rate": 6.9140625e-06, "loss": 0.1981201171875, "step": 119 }, { "epoch": 0.016223615500312642, "grad_norm": 3.2530808448791504, "learning_rate": 6.9726562500000005e-06, "loss": 0.15254592895507812, "step": 120 }, { "epoch": 0.01635881229614858, "grad_norm": 1.5909086465835571, "learning_rate": 7.03125e-06, "loss": 0.12428665161132812, "step": 121 }, { "epoch": 0.01649400909198452, "grad_norm": 0.829672634601593, "learning_rate": 7.08984375e-06, "loss": 0.18768692016601562, "step": 122 }, { "epoch": 0.016629205887820457, "grad_norm": 5.443108081817627, "learning_rate": 7.1484375e-06, "loss": 0.19033050537109375, "step": 123 }, { "epoch": 0.016764402683656397, "grad_norm": 5.772006034851074, "learning_rate": 7.20703125e-06, "loss": 0.25315093994140625, "step": 124 }, { "epoch": 0.016899599479492336, "grad_norm": 2.46826171875, "learning_rate": 7.265625e-06, "loss": 0.17133712768554688, "step": 125 }, { "epoch": 0.017034796275328275, "grad_norm": 4.126870155334473, "learning_rate": 7.3242187500000006e-06, "loss": 0.22769737243652344, "step": 126 }, { "epoch": 0.017169993071164215, "grad_norm": 4.511805057525635, "learning_rate": 7.3828125e-06, "loss": 0.20490646362304688, "step": 127 }, { "epoch": 0.01730518986700015, "grad_norm": 1.9656882286071777, "learning_rate": 7.44140625e-06, "loss": 0.16280364990234375, "step": 128 }, { "epoch": 0.01744038666283609, "grad_norm": 2.4700119495391846, "learning_rate": 7.5e-06, "loss": 0.12662124633789062, "step": 129 }, { "epoch": 0.01757558345867203, "grad_norm": 2.5934202671051025, "learning_rate": 7.55859375e-06, "loss": 0.22777557373046875, "step": 130 }, { "epoch": 0.01771078025450797, "grad_norm": 1.3021607398986816, "learning_rate": 7.6171875000000005e-06, "loss": 0.15658187866210938, "step": 131 }, { "epoch": 0.01784597705034391, "grad_norm": 1.5999529361724854, "learning_rate": 7.67578125e-06, "loss": 0.16962051391601562, "step": 132 }, { "epoch": 0.017981173846179845, "grad_norm": 2.5243165493011475, "learning_rate": 7.734375e-06, "loss": 0.20207977294921875, "step": 133 }, { "epoch": 0.018116370642015784, "grad_norm": 0.7855330109596252, "learning_rate": 7.792968750000001e-06, "loss": 0.10043716430664062, "step": 134 }, { "epoch": 0.018251567437851723, "grad_norm": 0.9127803444862366, "learning_rate": 7.8515625e-06, "loss": 0.15300750732421875, "step": 135 }, { "epoch": 0.018386764233687663, "grad_norm": 1.8121592998504639, "learning_rate": 7.91015625e-06, "loss": 0.1618938446044922, "step": 136 }, { "epoch": 0.0185219610295236, "grad_norm": 1.502724051475525, "learning_rate": 7.96875e-06, "loss": 0.1136932373046875, "step": 137 }, { "epoch": 0.018657157825359538, "grad_norm": 4.455524444580078, "learning_rate": 8.02734375e-06, "loss": 0.1859893798828125, "step": 138 }, { "epoch": 0.018792354621195478, "grad_norm": 3.528498888015747, "learning_rate": 8.085937500000001e-06, "loss": 0.13478469848632812, "step": 139 }, { "epoch": 0.018927551417031417, "grad_norm": 0.6664129495620728, "learning_rate": 8.14453125e-06, "loss": 0.1552581787109375, "step": 140 }, { "epoch": 0.019062748212867357, "grad_norm": 1.9160336256027222, "learning_rate": 8.203125e-06, "loss": 0.11435508728027344, "step": 141 }, { "epoch": 0.019197945008703293, "grad_norm": 2.0927252769470215, "learning_rate": 8.26171875e-06, "loss": 0.14061355590820312, "step": 142 }, { "epoch": 0.019333141804539232, "grad_norm": 0.8855465650558472, "learning_rate": 8.3203125e-06, "loss": 0.17259597778320312, "step": 143 }, { "epoch": 0.01946833860037517, "grad_norm": 2.003345012664795, "learning_rate": 8.37890625e-06, "loss": 0.16485595703125, "step": 144 }, { "epoch": 0.01960353539621111, "grad_norm": 1.8325819969177246, "learning_rate": 8.4375e-06, "loss": 0.17378997802734375, "step": 145 }, { "epoch": 0.019738732192047047, "grad_norm": 0.9648417830467224, "learning_rate": 8.49609375e-06, "loss": 0.1458587646484375, "step": 146 }, { "epoch": 0.019873928987882986, "grad_norm": 1.092206597328186, "learning_rate": 8.5546875e-06, "loss": 0.139373779296875, "step": 147 }, { "epoch": 0.020009125783718926, "grad_norm": 2.4173755645751953, "learning_rate": 8.61328125e-06, "loss": 0.17518234252929688, "step": 148 }, { "epoch": 0.020144322579554865, "grad_norm": 1.0320264101028442, "learning_rate": 8.671875e-06, "loss": 0.19220733642578125, "step": 149 }, { "epoch": 0.020279519375390805, "grad_norm": 1.2767515182495117, "learning_rate": 8.73046875e-06, "loss": 0.183380126953125, "step": 150 }, { "epoch": 0.02041471617122674, "grad_norm": 1.3617839813232422, "learning_rate": 8.789062500000001e-06, "loss": 0.14226531982421875, "step": 151 }, { "epoch": 0.02054991296706268, "grad_norm": 1.1166024208068848, "learning_rate": 8.84765625e-06, "loss": 0.07677078247070312, "step": 152 }, { "epoch": 0.02068510976289862, "grad_norm": 1.617315411567688, "learning_rate": 8.90625e-06, "loss": 0.13544845581054688, "step": 153 }, { "epoch": 0.02082030655873456, "grad_norm": 1.0335773229599, "learning_rate": 8.96484375e-06, "loss": 0.12308311462402344, "step": 154 }, { "epoch": 0.0209555033545705, "grad_norm": 0.4307231903076172, "learning_rate": 9.0234375e-06, "loss": 0.06521034240722656, "step": 155 }, { "epoch": 0.021090700150406434, "grad_norm": 2.0309338569641113, "learning_rate": 9.082031250000001e-06, "loss": 0.12531280517578125, "step": 156 }, { "epoch": 0.021225896946242374, "grad_norm": 0.3681405186653137, "learning_rate": 9.140625e-06, "loss": 0.11372756958007812, "step": 157 }, { "epoch": 0.021361093742078313, "grad_norm": 0.9422040581703186, "learning_rate": 9.19921875e-06, "loss": 0.1118621826171875, "step": 158 }, { "epoch": 0.021496290537914253, "grad_norm": 1.0494142770767212, "learning_rate": 9.2578125e-06, "loss": 0.14452552795410156, "step": 159 }, { "epoch": 0.02163148733375019, "grad_norm": 0.5977887511253357, "learning_rate": 9.31640625e-06, "loss": 0.15737152099609375, "step": 160 }, { "epoch": 0.021766684129586128, "grad_norm": 1.815451979637146, "learning_rate": 9.375000000000001e-06, "loss": 0.15938377380371094, "step": 161 }, { "epoch": 0.021901880925422067, "grad_norm": 1.7669764757156372, "learning_rate": 9.43359375e-06, "loss": 0.13509750366210938, "step": 162 }, { "epoch": 0.022037077721258007, "grad_norm": 0.8248637914657593, "learning_rate": 9.4921875e-06, "loss": 0.11051368713378906, "step": 163 }, { "epoch": 0.022172274517093946, "grad_norm": 0.9761935472488403, "learning_rate": 9.55078125e-06, "loss": 0.0999603271484375, "step": 164 }, { "epoch": 0.022307471312929882, "grad_norm": 1.2425165176391602, "learning_rate": 9.609375e-06, "loss": 0.10451698303222656, "step": 165 }, { "epoch": 0.02244266810876582, "grad_norm": 1.2928447723388672, "learning_rate": 9.66796875e-06, "loss": 0.14514541625976562, "step": 166 }, { "epoch": 0.02257786490460176, "grad_norm": 0.67860347032547, "learning_rate": 9.7265625e-06, "loss": 0.1896514892578125, "step": 167 }, { "epoch": 0.0227130617004377, "grad_norm": 0.5783755779266357, "learning_rate": 9.78515625e-06, "loss": 0.1671886444091797, "step": 168 }, { "epoch": 0.02284825849627364, "grad_norm": 2.328864574432373, "learning_rate": 9.84375e-06, "loss": 0.19832992553710938, "step": 169 }, { "epoch": 0.022983455292109576, "grad_norm": 0.7465437650680542, "learning_rate": 9.90234375e-06, "loss": 0.097137451171875, "step": 170 }, { "epoch": 0.023118652087945515, "grad_norm": 0.47173887491226196, "learning_rate": 9.9609375e-06, "loss": 0.09764289855957031, "step": 171 }, { "epoch": 0.023253848883781455, "grad_norm": 0.4709882438182831, "learning_rate": 1.001953125e-05, "loss": 0.0984792709350586, "step": 172 }, { "epoch": 0.023389045679617394, "grad_norm": 1.0793781280517578, "learning_rate": 1.0078125000000001e-05, "loss": 0.16312408447265625, "step": 173 }, { "epoch": 0.02352424247545333, "grad_norm": 0.39833131432533264, "learning_rate": 1.013671875e-05, "loss": 0.08628463745117188, "step": 174 }, { "epoch": 0.02365943927128927, "grad_norm": 1.002116084098816, "learning_rate": 1.01953125e-05, "loss": 0.125457763671875, "step": 175 }, { "epoch": 0.02379463606712521, "grad_norm": 1.1118419170379639, "learning_rate": 1.025390625e-05, "loss": 0.09708786010742188, "step": 176 }, { "epoch": 0.02392983286296115, "grad_norm": 0.7531958818435669, "learning_rate": 1.03125e-05, "loss": 0.1278400421142578, "step": 177 }, { "epoch": 0.024065029658797088, "grad_norm": 0.866471529006958, "learning_rate": 1.0371093750000001e-05, "loss": 0.14974689483642578, "step": 178 }, { "epoch": 0.024200226454633024, "grad_norm": 1.445363163948059, "learning_rate": 1.04296875e-05, "loss": 0.15588951110839844, "step": 179 }, { "epoch": 0.024335423250468963, "grad_norm": 0.8952563405036926, "learning_rate": 1.048828125e-05, "loss": 0.11579322814941406, "step": 180 }, { "epoch": 0.024470620046304903, "grad_norm": 1.4956169128417969, "learning_rate": 1.0546875e-05, "loss": 0.16865921020507812, "step": 181 }, { "epoch": 0.024605816842140842, "grad_norm": 0.9510613679885864, "learning_rate": 1.060546875e-05, "loss": 0.11063385009765625, "step": 182 }, { "epoch": 0.024741013637976778, "grad_norm": 0.6214854121208191, "learning_rate": 1.0664062500000001e-05, "loss": 0.1398639678955078, "step": 183 }, { "epoch": 0.024876210433812718, "grad_norm": 0.7311033010482788, "learning_rate": 1.072265625e-05, "loss": 0.11818122863769531, "step": 184 }, { "epoch": 0.025011407229648657, "grad_norm": 1.4142619371414185, "learning_rate": 1.078125e-05, "loss": 0.1476421356201172, "step": 185 }, { "epoch": 0.025146604025484597, "grad_norm": 1.758416771888733, "learning_rate": 1.083984375e-05, "loss": 0.13826560974121094, "step": 186 }, { "epoch": 0.025281800821320536, "grad_norm": 2.2945053577423096, "learning_rate": 1.08984375e-05, "loss": 0.1462249755859375, "step": 187 }, { "epoch": 0.025416997617156472, "grad_norm": 1.3065208196640015, "learning_rate": 1.095703125e-05, "loss": 0.12948226928710938, "step": 188 }, { "epoch": 0.02555219441299241, "grad_norm": 2.0528178215026855, "learning_rate": 1.1015625e-05, "loss": 0.1477680206298828, "step": 189 }, { "epoch": 0.02568739120882835, "grad_norm": 1.039638876914978, "learning_rate": 1.1074218750000001e-05, "loss": 0.08852577209472656, "step": 190 }, { "epoch": 0.02582258800466429, "grad_norm": 0.7299044728279114, "learning_rate": 1.11328125e-05, "loss": 0.10076332092285156, "step": 191 }, { "epoch": 0.02595778480050023, "grad_norm": 1.6996442079544067, "learning_rate": 1.119140625e-05, "loss": 0.14342498779296875, "step": 192 }, { "epoch": 0.026092981596336166, "grad_norm": 0.8143086433410645, "learning_rate": 1.125e-05, "loss": 0.09293174743652344, "step": 193 }, { "epoch": 0.026228178392172105, "grad_norm": 1.3359870910644531, "learning_rate": 1.130859375e-05, "loss": 0.12241172790527344, "step": 194 }, { "epoch": 0.026363375188008045, "grad_norm": 1.9629666805267334, "learning_rate": 1.1367187500000001e-05, "loss": 0.15043258666992188, "step": 195 }, { "epoch": 0.026498571983843984, "grad_norm": 0.39924514293670654, "learning_rate": 1.142578125e-05, "loss": 0.08169937133789062, "step": 196 }, { "epoch": 0.02663376877967992, "grad_norm": 1.4615709781646729, "learning_rate": 1.1484375e-05, "loss": 0.08629798889160156, "step": 197 }, { "epoch": 0.02676896557551586, "grad_norm": 0.9599554538726807, "learning_rate": 1.154296875e-05, "loss": 0.11198616027832031, "step": 198 }, { "epoch": 0.0269041623713518, "grad_norm": 2.5499606132507324, "learning_rate": 1.16015625e-05, "loss": 0.13975954055786133, "step": 199 }, { "epoch": 0.02703935916718774, "grad_norm": 3.666128158569336, "learning_rate": 1.1660156250000001e-05, "loss": 0.22464752197265625, "step": 200 }, { "epoch": 0.027174555963023678, "grad_norm": 0.8818196654319763, "learning_rate": 1.171875e-05, "loss": 0.11071205139160156, "step": 201 }, { "epoch": 0.027309752758859614, "grad_norm": 0.9403067827224731, "learning_rate": 1.177734375e-05, "loss": 0.11906719207763672, "step": 202 }, { "epoch": 0.027444949554695553, "grad_norm": 0.9900426864624023, "learning_rate": 1.18359375e-05, "loss": 0.07979202270507812, "step": 203 }, { "epoch": 0.027580146350531493, "grad_norm": 0.8115234971046448, "learning_rate": 1.189453125e-05, "loss": 0.11782455444335938, "step": 204 }, { "epoch": 0.027715343146367432, "grad_norm": 0.45454850792884827, "learning_rate": 1.1953125000000001e-05, "loss": 0.09505510330200195, "step": 205 }, { "epoch": 0.02785053994220337, "grad_norm": 1.3852804899215698, "learning_rate": 1.201171875e-05, "loss": 0.07553958892822266, "step": 206 }, { "epoch": 0.027985736738039307, "grad_norm": 2.505516767501831, "learning_rate": 1.20703125e-05, "loss": 0.2158184051513672, "step": 207 }, { "epoch": 0.028120933533875247, "grad_norm": 1.5190612077713013, "learning_rate": 1.212890625e-05, "loss": 0.14534378051757812, "step": 208 }, { "epoch": 0.028256130329711186, "grad_norm": 0.30262491106987, "learning_rate": 1.21875e-05, "loss": 0.11007308959960938, "step": 209 }, { "epoch": 0.028391327125547126, "grad_norm": 2.6057796478271484, "learning_rate": 1.224609375e-05, "loss": 0.20543670654296875, "step": 210 }, { "epoch": 0.02852652392138306, "grad_norm": 0.42562001943588257, "learning_rate": 1.23046875e-05, "loss": 0.10055351257324219, "step": 211 }, { "epoch": 0.028661720717219, "grad_norm": 0.7556141018867493, "learning_rate": 1.2363281250000001e-05, "loss": 0.08016586303710938, "step": 212 }, { "epoch": 0.02879691751305494, "grad_norm": 0.3173273205757141, "learning_rate": 1.2421875e-05, "loss": 0.11756706237792969, "step": 213 }, { "epoch": 0.02893211430889088, "grad_norm": 0.25473323464393616, "learning_rate": 1.248046875e-05, "loss": 0.07790184020996094, "step": 214 }, { "epoch": 0.02906731110472682, "grad_norm": 0.4120863378047943, "learning_rate": 1.25390625e-05, "loss": 0.10120487213134766, "step": 215 }, { "epoch": 0.029202507900562755, "grad_norm": 0.38646212220191956, "learning_rate": 1.259765625e-05, "loss": 0.1260519027709961, "step": 216 }, { "epoch": 0.029337704696398695, "grad_norm": 0.46892306208610535, "learning_rate": 1.2656250000000001e-05, "loss": 0.10096931457519531, "step": 217 }, { "epoch": 0.029472901492234634, "grad_norm": 1.1855638027191162, "learning_rate": 1.271484375e-05, "loss": 0.08810997009277344, "step": 218 }, { "epoch": 0.029608098288070574, "grad_norm": 0.3958824574947357, "learning_rate": 1.27734375e-05, "loss": 0.09965038299560547, "step": 219 }, { "epoch": 0.029743295083906513, "grad_norm": 0.7300817370414734, "learning_rate": 1.283203125e-05, "loss": 0.07414054870605469, "step": 220 }, { "epoch": 0.02987849187974245, "grad_norm": 1.615120768547058, "learning_rate": 1.2890625e-05, "loss": 0.10107898712158203, "step": 221 }, { "epoch": 0.03001368867557839, "grad_norm": 0.8337105512619019, "learning_rate": 1.2949218750000001e-05, "loss": 0.1640777587890625, "step": 222 }, { "epoch": 0.030148885471414328, "grad_norm": 0.8477329015731812, "learning_rate": 1.30078125e-05, "loss": 0.07065868377685547, "step": 223 }, { "epoch": 0.030284082267250267, "grad_norm": 0.8389713764190674, "learning_rate": 1.306640625e-05, "loss": 0.08960819244384766, "step": 224 }, { "epoch": 0.030419279063086203, "grad_norm": 0.8623062968254089, "learning_rate": 1.3125e-05, "loss": 0.09574508666992188, "step": 225 }, { "epoch": 0.030554475858922143, "grad_norm": 1.7276952266693115, "learning_rate": 1.318359375e-05, "loss": 0.1452322006225586, "step": 226 }, { "epoch": 0.030689672654758082, "grad_norm": 2.4967522621154785, "learning_rate": 1.3242187500000001e-05, "loss": 0.15061664581298828, "step": 227 }, { "epoch": 0.03082486945059402, "grad_norm": 0.8809515237808228, "learning_rate": 1.330078125e-05, "loss": 0.11009407043457031, "step": 228 }, { "epoch": 0.03096006624642996, "grad_norm": 1.7495862245559692, "learning_rate": 1.3359375000000001e-05, "loss": 0.10504150390625, "step": 229 }, { "epoch": 0.031095263042265897, "grad_norm": 1.010521411895752, "learning_rate": 1.341796875e-05, "loss": 0.06667327880859375, "step": 230 }, { "epoch": 0.031230459838101837, "grad_norm": 0.7836189270019531, "learning_rate": 1.34765625e-05, "loss": 0.09214496612548828, "step": 231 }, { "epoch": 0.031365656633937776, "grad_norm": 0.7970775961875916, "learning_rate": 1.353515625e-05, "loss": 0.13988971710205078, "step": 232 }, { "epoch": 0.03150085342977371, "grad_norm": 3.22719407081604, "learning_rate": 1.359375e-05, "loss": 0.1107487678527832, "step": 233 }, { "epoch": 0.031636050225609655, "grad_norm": 4.931375503540039, "learning_rate": 1.3652343750000001e-05, "loss": 0.14489173889160156, "step": 234 }, { "epoch": 0.03177124702144559, "grad_norm": 1.9380743503570557, "learning_rate": 1.37109375e-05, "loss": 0.1752605438232422, "step": 235 }, { "epoch": 0.031906443817281534, "grad_norm": 0.9338816404342651, "learning_rate": 1.376953125e-05, "loss": 0.11135673522949219, "step": 236 }, { "epoch": 0.03204164061311747, "grad_norm": 0.9291431307792664, "learning_rate": 1.3828125e-05, "loss": 0.11193370819091797, "step": 237 }, { "epoch": 0.032176837408953406, "grad_norm": 0.6870521306991577, "learning_rate": 1.388671875e-05, "loss": 0.10838890075683594, "step": 238 }, { "epoch": 0.03231203420478935, "grad_norm": 0.7145580053329468, "learning_rate": 1.3945312500000001e-05, "loss": 0.09445381164550781, "step": 239 }, { "epoch": 0.032447231000625285, "grad_norm": 0.21662800014019012, "learning_rate": 1.400390625e-05, "loss": 0.0929708480834961, "step": 240 }, { "epoch": 0.03258242779646122, "grad_norm": 1.7928955554962158, "learning_rate": 1.40625e-05, "loss": 0.1270885467529297, "step": 241 }, { "epoch": 0.03271762459229716, "grad_norm": 1.30325186252594, "learning_rate": 1.412109375e-05, "loss": 0.08766555786132812, "step": 242 }, { "epoch": 0.0328528213881331, "grad_norm": 5.986721515655518, "learning_rate": 1.41796875e-05, "loss": 0.09464168548583984, "step": 243 }, { "epoch": 0.03298801818396904, "grad_norm": 8.504470825195312, "learning_rate": 1.4238281250000001e-05, "loss": 0.1564788818359375, "step": 244 }, { "epoch": 0.03312321497980498, "grad_norm": 1.1269668340682983, "learning_rate": 1.4296875e-05, "loss": 0.06866836547851562, "step": 245 }, { "epoch": 0.033258411775640914, "grad_norm": 2.1340441703796387, "learning_rate": 1.435546875e-05, "loss": 0.1367034912109375, "step": 246 }, { "epoch": 0.03339360857147686, "grad_norm": 1.495689034461975, "learning_rate": 1.44140625e-05, "loss": 0.11866950988769531, "step": 247 }, { "epoch": 0.03352880536731279, "grad_norm": 0.45152899622917175, "learning_rate": 1.447265625e-05, "loss": 0.0792856216430664, "step": 248 }, { "epoch": 0.033664002163148736, "grad_norm": 0.5372416973114014, "learning_rate": 1.453125e-05, "loss": 0.08746719360351562, "step": 249 }, { "epoch": 0.03379919895898467, "grad_norm": 0.32935744524002075, "learning_rate": 1.458984375e-05, "loss": 0.10184097290039062, "step": 250 }, { "epoch": 0.03393439575482061, "grad_norm": 0.4165351688861847, "learning_rate": 1.4648437500000001e-05, "loss": 0.10245704650878906, "step": 251 }, { "epoch": 0.03406959255065655, "grad_norm": 0.3766869604587555, "learning_rate": 1.470703125e-05, "loss": 0.0938405990600586, "step": 252 }, { "epoch": 0.03420478934649249, "grad_norm": 0.4120754301548004, "learning_rate": 1.4765625e-05, "loss": 0.13604450225830078, "step": 253 }, { "epoch": 0.03433998614232843, "grad_norm": 0.5975791811943054, "learning_rate": 1.482421875e-05, "loss": 0.08067655563354492, "step": 254 }, { "epoch": 0.034475182938164366, "grad_norm": 0.8303422331809998, "learning_rate": 1.48828125e-05, "loss": 0.11191844940185547, "step": 255 }, { "epoch": 0.0346103797340003, "grad_norm": 0.38064515590667725, "learning_rate": 1.4941406250000001e-05, "loss": 0.1566314697265625, "step": 256 }, { "epoch": 0.034745576529836245, "grad_norm": 2.072690010070801, "learning_rate": 1.5e-05, "loss": 0.10210418701171875, "step": 257 }, { "epoch": 0.03488077332567218, "grad_norm": 0.5118513703346252, "learning_rate": 1.505859375e-05, "loss": 0.11151313781738281, "step": 258 }, { "epoch": 0.03501597012150812, "grad_norm": 0.8528406620025635, "learning_rate": 1.51171875e-05, "loss": 0.10950851440429688, "step": 259 }, { "epoch": 0.03515116691734406, "grad_norm": 0.3790138363838196, "learning_rate": 1.517578125e-05, "loss": 0.07526636123657227, "step": 260 }, { "epoch": 0.035286363713179995, "grad_norm": 1.4999076128005981, "learning_rate": 1.5234375000000001e-05, "loss": 0.1217947006225586, "step": 261 }, { "epoch": 0.03542156050901594, "grad_norm": 1.1162285804748535, "learning_rate": 1.529296875e-05, "loss": 0.15423822402954102, "step": 262 }, { "epoch": 0.035556757304851874, "grad_norm": 0.17001789808273315, "learning_rate": 1.53515625e-05, "loss": 0.08667278289794922, "step": 263 }, { "epoch": 0.03569195410068782, "grad_norm": 1.3194137811660767, "learning_rate": 1.541015625e-05, "loss": 0.11114215850830078, "step": 264 }, { "epoch": 0.03582715089652375, "grad_norm": 0.6932324171066284, "learning_rate": 1.546875e-05, "loss": 0.10363578796386719, "step": 265 }, { "epoch": 0.03596234769235969, "grad_norm": 0.8319248557090759, "learning_rate": 1.552734375e-05, "loss": 0.1459178924560547, "step": 266 }, { "epoch": 0.03609754448819563, "grad_norm": 1.287670373916626, "learning_rate": 1.5585937500000002e-05, "loss": 0.08773994445800781, "step": 267 }, { "epoch": 0.03623274128403157, "grad_norm": 0.494658887386322, "learning_rate": 1.564453125e-05, "loss": 0.08005809783935547, "step": 268 }, { "epoch": 0.036367938079867504, "grad_norm": 0.7535601258277893, "learning_rate": 1.5703125e-05, "loss": 0.1199045181274414, "step": 269 }, { "epoch": 0.03650313487570345, "grad_norm": 0.422181636095047, "learning_rate": 1.576171875e-05, "loss": 0.08992600440979004, "step": 270 }, { "epoch": 0.03663833167153938, "grad_norm": 0.5136080980300903, "learning_rate": 1.58203125e-05, "loss": 0.08193111419677734, "step": 271 }, { "epoch": 0.036773528467375326, "grad_norm": 0.3550981283187866, "learning_rate": 1.587890625e-05, "loss": 0.0819244384765625, "step": 272 }, { "epoch": 0.03690872526321126, "grad_norm": 0.8455483317375183, "learning_rate": 1.59375e-05, "loss": 0.10983848571777344, "step": 273 }, { "epoch": 0.0370439220590472, "grad_norm": 0.4411819279193878, "learning_rate": 1.599609375e-05, "loss": 0.06752872467041016, "step": 274 }, { "epoch": 0.03717911885488314, "grad_norm": 1.496036171913147, "learning_rate": 1.60546875e-05, "loss": 0.13500022888183594, "step": 275 }, { "epoch": 0.037314315650719077, "grad_norm": 0.26362380385398865, "learning_rate": 1.611328125e-05, "loss": 0.07960224151611328, "step": 276 }, { "epoch": 0.03744951244655502, "grad_norm": 0.9426568150520325, "learning_rate": 1.6171875000000002e-05, "loss": 0.13000869750976562, "step": 277 }, { "epoch": 0.037584709242390955, "grad_norm": 0.37183016538619995, "learning_rate": 1.623046875e-05, "loss": 0.13940811157226562, "step": 278 }, { "epoch": 0.03771990603822689, "grad_norm": 1.4855290651321411, "learning_rate": 1.62890625e-05, "loss": 0.13234710693359375, "step": 279 }, { "epoch": 0.037855102834062834, "grad_norm": 0.38084182143211365, "learning_rate": 1.634765625e-05, "loss": 0.07734251022338867, "step": 280 }, { "epoch": 0.03799029962989877, "grad_norm": 0.6273846626281738, "learning_rate": 1.640625e-05, "loss": 0.10966873168945312, "step": 281 }, { "epoch": 0.03812549642573471, "grad_norm": 0.2489965260028839, "learning_rate": 1.646484375e-05, "loss": 0.1035623550415039, "step": 282 }, { "epoch": 0.03826069322157065, "grad_norm": 0.47680971026420593, "learning_rate": 1.65234375e-05, "loss": 0.12607383728027344, "step": 283 }, { "epoch": 0.038395890017406585, "grad_norm": 0.4228799343109131, "learning_rate": 1.6582031250000002e-05, "loss": 0.10958576202392578, "step": 284 }, { "epoch": 0.03853108681324253, "grad_norm": 0.27856719493865967, "learning_rate": 1.6640625e-05, "loss": 0.12144136428833008, "step": 285 }, { "epoch": 0.038666283609078464, "grad_norm": 0.6729941964149475, "learning_rate": 1.669921875e-05, "loss": 0.10310649871826172, "step": 286 }, { "epoch": 0.03880148040491441, "grad_norm": 0.21593409776687622, "learning_rate": 1.67578125e-05, "loss": 0.12016677856445312, "step": 287 }, { "epoch": 0.03893667720075034, "grad_norm": 0.40790697932243347, "learning_rate": 1.681640625e-05, "loss": 0.09090614318847656, "step": 288 }, { "epoch": 0.03907187399658628, "grad_norm": 0.2896917760372162, "learning_rate": 1.6875e-05, "loss": 0.040282249450683594, "step": 289 }, { "epoch": 0.03920707079242222, "grad_norm": 0.3372872471809387, "learning_rate": 1.693359375e-05, "loss": 0.09852027893066406, "step": 290 }, { "epoch": 0.03934226758825816, "grad_norm": 1.4609146118164062, "learning_rate": 1.69921875e-05, "loss": 0.13547325134277344, "step": 291 }, { "epoch": 0.039477464384094094, "grad_norm": 0.720673143863678, "learning_rate": 1.705078125e-05, "loss": 0.0884857177734375, "step": 292 }, { "epoch": 0.03961266117993004, "grad_norm": 0.6555048227310181, "learning_rate": 1.7109375e-05, "loss": 0.1395282745361328, "step": 293 }, { "epoch": 0.03974785797576597, "grad_norm": 0.16606144607067108, "learning_rate": 1.7167968750000002e-05, "loss": 0.10356521606445312, "step": 294 }, { "epoch": 0.039883054771601915, "grad_norm": 0.3708697259426117, "learning_rate": 1.72265625e-05, "loss": 0.07172584533691406, "step": 295 }, { "epoch": 0.04001825156743785, "grad_norm": 1.0216091871261597, "learning_rate": 1.728515625e-05, "loss": 0.13167762756347656, "step": 296 }, { "epoch": 0.04015344836327379, "grad_norm": 0.7634631991386414, "learning_rate": 1.734375e-05, "loss": 0.08865737915039062, "step": 297 }, { "epoch": 0.04028864515910973, "grad_norm": 1.1382628679275513, "learning_rate": 1.740234375e-05, "loss": 0.11255073547363281, "step": 298 }, { "epoch": 0.040423841954945666, "grad_norm": 0.41765883564949036, "learning_rate": 1.74609375e-05, "loss": 0.07486391067504883, "step": 299 }, { "epoch": 0.04055903875078161, "grad_norm": 0.7868423461914062, "learning_rate": 1.751953125e-05, "loss": 0.10659217834472656, "step": 300 }, { "epoch": 0.040694235546617545, "grad_norm": 0.7546616792678833, "learning_rate": 1.7578125000000002e-05, "loss": 0.08617591857910156, "step": 301 }, { "epoch": 0.04082943234245348, "grad_norm": 0.6418368220329285, "learning_rate": 1.763671875e-05, "loss": 0.1351461410522461, "step": 302 }, { "epoch": 0.040964629138289424, "grad_norm": 0.463878870010376, "learning_rate": 1.76953125e-05, "loss": 0.12404346466064453, "step": 303 }, { "epoch": 0.04109982593412536, "grad_norm": 0.5326356291770935, "learning_rate": 1.775390625e-05, "loss": 0.08386039733886719, "step": 304 }, { "epoch": 0.0412350227299613, "grad_norm": 0.3665894865989685, "learning_rate": 1.78125e-05, "loss": 0.08456230163574219, "step": 305 }, { "epoch": 0.04137021952579724, "grad_norm": 0.6875031590461731, "learning_rate": 1.787109375e-05, "loss": 0.09281349182128906, "step": 306 }, { "epoch": 0.041505416321633175, "grad_norm": 0.9184910655021667, "learning_rate": 1.79296875e-05, "loss": 0.0856485366821289, "step": 307 }, { "epoch": 0.04164061311746912, "grad_norm": 0.6181513071060181, "learning_rate": 1.798828125e-05, "loss": 0.0825490951538086, "step": 308 }, { "epoch": 0.041775809913305054, "grad_norm": 0.8047035336494446, "learning_rate": 1.8046875e-05, "loss": 0.0860300064086914, "step": 309 }, { "epoch": 0.041911006709141, "grad_norm": 1.3716039657592773, "learning_rate": 1.810546875e-05, "loss": 0.09758186340332031, "step": 310 }, { "epoch": 0.04204620350497693, "grad_norm": 0.2594820559024811, "learning_rate": 1.8164062500000002e-05, "loss": 0.042504310607910156, "step": 311 }, { "epoch": 0.04218140030081287, "grad_norm": 0.5145635008811951, "learning_rate": 1.822265625e-05, "loss": 0.13232803344726562, "step": 312 }, { "epoch": 0.04231659709664881, "grad_norm": 0.4917304217815399, "learning_rate": 1.828125e-05, "loss": 0.08692359924316406, "step": 313 }, { "epoch": 0.04245179389248475, "grad_norm": 0.3924074172973633, "learning_rate": 1.833984375e-05, "loss": 0.12353849411010742, "step": 314 }, { "epoch": 0.04258699068832068, "grad_norm": 0.33571773767471313, "learning_rate": 1.83984375e-05, "loss": 0.0854043960571289, "step": 315 }, { "epoch": 0.042722187484156626, "grad_norm": 0.21847468614578247, "learning_rate": 1.845703125e-05, "loss": 0.08770465850830078, "step": 316 }, { "epoch": 0.04285738427999256, "grad_norm": 0.6506348252296448, "learning_rate": 1.8515625e-05, "loss": 0.11607933044433594, "step": 317 }, { "epoch": 0.042992581075828505, "grad_norm": 1.0390257835388184, "learning_rate": 1.8574218750000002e-05, "loss": 0.10783767700195312, "step": 318 }, { "epoch": 0.04312777787166444, "grad_norm": 0.21384234726428986, "learning_rate": 1.86328125e-05, "loss": 0.09239387512207031, "step": 319 }, { "epoch": 0.04326297466750038, "grad_norm": 0.2513214349746704, "learning_rate": 1.869140625e-05, "loss": 0.0860748291015625, "step": 320 }, { "epoch": 0.04339817146333632, "grad_norm": 0.13531465828418732, "learning_rate": 1.8750000000000002e-05, "loss": 0.06541728973388672, "step": 321 }, { "epoch": 0.043533368259172256, "grad_norm": 0.5940647721290588, "learning_rate": 1.880859375e-05, "loss": 0.09384822845458984, "step": 322 }, { "epoch": 0.0436685650550082, "grad_norm": 0.24893070757389069, "learning_rate": 1.88671875e-05, "loss": 0.0641794204711914, "step": 323 }, { "epoch": 0.043803761850844135, "grad_norm": 0.24023190140724182, "learning_rate": 1.892578125e-05, "loss": 0.1150503158569336, "step": 324 }, { "epoch": 0.04393895864668007, "grad_norm": 0.36042916774749756, "learning_rate": 1.8984375e-05, "loss": 0.09014892578125, "step": 325 }, { "epoch": 0.044074155442516014, "grad_norm": 2.0375735759735107, "learning_rate": 1.904296875e-05, "loss": 0.10724353790283203, "step": 326 }, { "epoch": 0.04420935223835195, "grad_norm": 0.5884652733802795, "learning_rate": 1.91015625e-05, "loss": 0.08040904998779297, "step": 327 }, { "epoch": 0.04434454903418789, "grad_norm": 0.2396320104598999, "learning_rate": 1.9160156250000002e-05, "loss": 0.0972442626953125, "step": 328 }, { "epoch": 0.04447974583002383, "grad_norm": 0.6353582143783569, "learning_rate": 1.921875e-05, "loss": 0.09917926788330078, "step": 329 }, { "epoch": 0.044614942625859765, "grad_norm": 0.4164353907108307, "learning_rate": 1.927734375e-05, "loss": 0.1053004264831543, "step": 330 }, { "epoch": 0.04475013942169571, "grad_norm": 0.8356876969337463, "learning_rate": 1.93359375e-05, "loss": 0.10482120513916016, "step": 331 }, { "epoch": 0.04488533621753164, "grad_norm": 0.21361364424228668, "learning_rate": 1.939453125e-05, "loss": 0.11343526840209961, "step": 332 }, { "epoch": 0.045020533013367586, "grad_norm": 0.14705726504325867, "learning_rate": 1.9453125e-05, "loss": 0.052163124084472656, "step": 333 }, { "epoch": 0.04515572980920352, "grad_norm": 0.46907126903533936, "learning_rate": 1.951171875e-05, "loss": 0.08867025375366211, "step": 334 }, { "epoch": 0.04529092660503946, "grad_norm": 0.7444183230400085, "learning_rate": 1.95703125e-05, "loss": 0.1376323699951172, "step": 335 }, { "epoch": 0.0454261234008754, "grad_norm": 0.8058706521987915, "learning_rate": 1.962890625e-05, "loss": 0.0719766616821289, "step": 336 }, { "epoch": 0.04556132019671134, "grad_norm": 0.5213733315467834, "learning_rate": 1.96875e-05, "loss": 0.11151313781738281, "step": 337 }, { "epoch": 0.04569651699254728, "grad_norm": 0.5198162794113159, "learning_rate": 1.9746093750000002e-05, "loss": 0.15637779235839844, "step": 338 }, { "epoch": 0.045831713788383216, "grad_norm": 0.8097032308578491, "learning_rate": 1.98046875e-05, "loss": 0.1443328857421875, "step": 339 }, { "epoch": 0.04596691058421915, "grad_norm": 0.24106748402118683, "learning_rate": 1.986328125e-05, "loss": 0.08120203018188477, "step": 340 }, { "epoch": 0.046102107380055095, "grad_norm": 0.39901643991470337, "learning_rate": 1.9921875e-05, "loss": 0.07796096801757812, "step": 341 }, { "epoch": 0.04623730417589103, "grad_norm": 0.5847902297973633, "learning_rate": 1.998046875e-05, "loss": 0.10563230514526367, "step": 342 }, { "epoch": 0.04637250097172697, "grad_norm": 0.5063965916633606, "learning_rate": 2.00390625e-05, "loss": 0.07470321655273438, "step": 343 }, { "epoch": 0.04650769776756291, "grad_norm": 0.1668115109205246, "learning_rate": 2.009765625e-05, "loss": 0.0937337875366211, "step": 344 }, { "epoch": 0.046642894563398846, "grad_norm": 0.5499696731567383, "learning_rate": 2.0156250000000002e-05, "loss": 0.11548519134521484, "step": 345 }, { "epoch": 0.04677809135923479, "grad_norm": 0.7989301085472107, "learning_rate": 2.021484375e-05, "loss": 0.09680986404418945, "step": 346 }, { "epoch": 0.046913288155070725, "grad_norm": 0.5955407619476318, "learning_rate": 2.02734375e-05, "loss": 0.09231758117675781, "step": 347 }, { "epoch": 0.04704848495090666, "grad_norm": 0.4440248906612396, "learning_rate": 2.033203125e-05, "loss": 0.11703872680664062, "step": 348 }, { "epoch": 0.0471836817467426, "grad_norm": 0.34554049372673035, "learning_rate": 2.0390625e-05, "loss": 0.07292795181274414, "step": 349 }, { "epoch": 0.04731887854257854, "grad_norm": 0.6034355163574219, "learning_rate": 2.044921875e-05, "loss": 0.07951879501342773, "step": 350 }, { "epoch": 0.04745407533841448, "grad_norm": 1.147836685180664, "learning_rate": 2.05078125e-05, "loss": 0.1187601089477539, "step": 351 }, { "epoch": 0.04758927213425042, "grad_norm": 0.35816168785095215, "learning_rate": 2.056640625e-05, "loss": 0.1359539031982422, "step": 352 }, { "epoch": 0.047724468930086354, "grad_norm": 0.25260066986083984, "learning_rate": 2.0625e-05, "loss": 0.08533668518066406, "step": 353 }, { "epoch": 0.0478596657259223, "grad_norm": 0.15749254822731018, "learning_rate": 2.068359375e-05, "loss": 0.1195363998413086, "step": 354 }, { "epoch": 0.04799486252175823, "grad_norm": 0.18505169451236725, "learning_rate": 2.0742187500000002e-05, "loss": 0.09904670715332031, "step": 355 }, { "epoch": 0.048130059317594176, "grad_norm": 0.16219796240329742, "learning_rate": 2.080078125e-05, "loss": 0.0858449935913086, "step": 356 }, { "epoch": 0.04826525611343011, "grad_norm": 0.19502174854278564, "learning_rate": 2.0859375e-05, "loss": 0.09736824035644531, "step": 357 }, { "epoch": 0.04840045290926605, "grad_norm": 0.43384259939193726, "learning_rate": 2.091796875e-05, "loss": 0.126800537109375, "step": 358 }, { "epoch": 0.04853564970510199, "grad_norm": 0.2482476532459259, "learning_rate": 2.09765625e-05, "loss": 0.09348392486572266, "step": 359 }, { "epoch": 0.04867084650093793, "grad_norm": 0.20153117179870605, "learning_rate": 2.103515625e-05, "loss": 0.0736231803894043, "step": 360 }, { "epoch": 0.04880604329677387, "grad_norm": 0.41558927297592163, "learning_rate": 2.109375e-05, "loss": 0.10034418106079102, "step": 361 }, { "epoch": 0.048941240092609806, "grad_norm": 0.3068966567516327, "learning_rate": 2.1152343750000002e-05, "loss": 0.1041402816772461, "step": 362 }, { "epoch": 0.04907643688844574, "grad_norm": 0.27594423294067383, "learning_rate": 2.12109375e-05, "loss": 0.09015178680419922, "step": 363 }, { "epoch": 0.049211633684281685, "grad_norm": 0.7957682609558105, "learning_rate": 2.126953125e-05, "loss": 0.09404563903808594, "step": 364 }, { "epoch": 0.04934683048011762, "grad_norm": 1.0707272291183472, "learning_rate": 2.1328125000000002e-05, "loss": 0.06875801086425781, "step": 365 }, { "epoch": 0.049482027275953556, "grad_norm": 0.48567718267440796, "learning_rate": 2.138671875e-05, "loss": 0.08456039428710938, "step": 366 }, { "epoch": 0.0496172240717895, "grad_norm": 0.870097815990448, "learning_rate": 2.14453125e-05, "loss": 0.10132884979248047, "step": 367 }, { "epoch": 0.049752420867625435, "grad_norm": 0.6377391219139099, "learning_rate": 2.150390625e-05, "loss": 0.07217788696289062, "step": 368 }, { "epoch": 0.04988761766346138, "grad_norm": 0.788158655166626, "learning_rate": 2.15625e-05, "loss": 0.11425590515136719, "step": 369 }, { "epoch": 0.050022814459297314, "grad_norm": 0.21216727793216705, "learning_rate": 2.162109375e-05, "loss": 0.07319450378417969, "step": 370 }, { "epoch": 0.05015801125513325, "grad_norm": 0.25803834199905396, "learning_rate": 2.16796875e-05, "loss": 0.12201118469238281, "step": 371 }, { "epoch": 0.05029320805096919, "grad_norm": 0.41703903675079346, "learning_rate": 2.1738281250000002e-05, "loss": 0.08775997161865234, "step": 372 }, { "epoch": 0.05042840484680513, "grad_norm": 0.20716947317123413, "learning_rate": 2.1796875e-05, "loss": 0.11542510986328125, "step": 373 }, { "epoch": 0.05056360164264107, "grad_norm": 0.18524552881717682, "learning_rate": 2.185546875e-05, "loss": 0.06212902069091797, "step": 374 }, { "epoch": 0.05069879843847701, "grad_norm": 0.12163855135440826, "learning_rate": 2.19140625e-05, "loss": 0.05814361572265625, "step": 375 }, { "epoch": 0.050833995234312944, "grad_norm": 0.15177761018276215, "learning_rate": 2.197265625e-05, "loss": 0.0757136344909668, "step": 376 }, { "epoch": 0.05096919203014889, "grad_norm": 0.14696864783763885, "learning_rate": 2.203125e-05, "loss": 0.06543827056884766, "step": 377 }, { "epoch": 0.05110438882598482, "grad_norm": 0.6678949594497681, "learning_rate": 2.208984375e-05, "loss": 0.14141273498535156, "step": 378 }, { "epoch": 0.051239585621820766, "grad_norm": 0.386575311422348, "learning_rate": 2.2148437500000002e-05, "loss": 0.08747196197509766, "step": 379 }, { "epoch": 0.0513747824176567, "grad_norm": 0.9145892858505249, "learning_rate": 2.220703125e-05, "loss": 0.09427738189697266, "step": 380 }, { "epoch": 0.05150997921349264, "grad_norm": 0.6047827005386353, "learning_rate": 2.2265625e-05, "loss": 0.09445476531982422, "step": 381 }, { "epoch": 0.05164517600932858, "grad_norm": 0.830271303653717, "learning_rate": 2.2324218750000002e-05, "loss": 0.10351085662841797, "step": 382 }, { "epoch": 0.051780372805164517, "grad_norm": 0.48638877272605896, "learning_rate": 2.23828125e-05, "loss": 0.07607555389404297, "step": 383 }, { "epoch": 0.05191556960100046, "grad_norm": 3.4073305130004883, "learning_rate": 2.244140625e-05, "loss": 0.1930704116821289, "step": 384 }, { "epoch": 0.052050766396836395, "grad_norm": 1.4241673946380615, "learning_rate": 2.25e-05, "loss": 0.14117431640625, "step": 385 }, { "epoch": 0.05218596319267233, "grad_norm": 0.2762816250324249, "learning_rate": 2.255859375e-05, "loss": 0.0757436752319336, "step": 386 }, { "epoch": 0.052321159988508274, "grad_norm": 0.9581100940704346, "learning_rate": 2.26171875e-05, "loss": 0.09415817260742188, "step": 387 }, { "epoch": 0.05245635678434421, "grad_norm": 0.7780214548110962, "learning_rate": 2.267578125e-05, "loss": 0.10392951965332031, "step": 388 }, { "epoch": 0.05259155358018015, "grad_norm": 0.34639620780944824, "learning_rate": 2.2734375000000002e-05, "loss": 0.06629467010498047, "step": 389 }, { "epoch": 0.05272675037601609, "grad_norm": 0.5023885369300842, "learning_rate": 2.279296875e-05, "loss": 0.11465740203857422, "step": 390 }, { "epoch": 0.052861947171852025, "grad_norm": 0.725202202796936, "learning_rate": 2.28515625e-05, "loss": 0.10161781311035156, "step": 391 }, { "epoch": 0.05299714396768797, "grad_norm": 0.33798712491989136, "learning_rate": 2.291015625e-05, "loss": 0.10046005249023438, "step": 392 }, { "epoch": 0.053132340763523904, "grad_norm": 0.5291942358016968, "learning_rate": 2.296875e-05, "loss": 0.08743858337402344, "step": 393 }, { "epoch": 0.05326753755935984, "grad_norm": 0.9972317814826965, "learning_rate": 2.302734375e-05, "loss": 0.14546585083007812, "step": 394 }, { "epoch": 0.05340273435519578, "grad_norm": 0.8500605225563049, "learning_rate": 2.30859375e-05, "loss": 0.09073972702026367, "step": 395 }, { "epoch": 0.05353793115103172, "grad_norm": 1.0873994827270508, "learning_rate": 2.3144531250000002e-05, "loss": 0.0867767333984375, "step": 396 }, { "epoch": 0.05367312794686766, "grad_norm": 0.30353739857673645, "learning_rate": 2.3203125e-05, "loss": 0.12094783782958984, "step": 397 }, { "epoch": 0.0538083247427036, "grad_norm": 0.642541229724884, "learning_rate": 2.326171875e-05, "loss": 0.13100242614746094, "step": 398 }, { "epoch": 0.053943521538539534, "grad_norm": 0.37056803703308105, "learning_rate": 2.3320312500000002e-05, "loss": 0.12386131286621094, "step": 399 }, { "epoch": 0.05407871833437548, "grad_norm": 0.26606371998786926, "learning_rate": 2.337890625e-05, "loss": 0.10099029541015625, "step": 400 }, { "epoch": 0.05421391513021141, "grad_norm": 0.598811686038971, "learning_rate": 2.34375e-05, "loss": 0.10195493698120117, "step": 401 }, { "epoch": 0.054349111926047355, "grad_norm": 0.23437465727329254, "learning_rate": 2.349609375e-05, "loss": 0.10207271575927734, "step": 402 }, { "epoch": 0.05448430872188329, "grad_norm": 0.7463119029998779, "learning_rate": 2.35546875e-05, "loss": 0.11443519592285156, "step": 403 }, { "epoch": 0.05461950551771923, "grad_norm": 0.23719719052314758, "learning_rate": 2.361328125e-05, "loss": 0.06537270545959473, "step": 404 }, { "epoch": 0.05475470231355517, "grad_norm": 0.2601531445980072, "learning_rate": 2.3671875e-05, "loss": 0.08099842071533203, "step": 405 }, { "epoch": 0.054889899109391106, "grad_norm": 0.8800279498100281, "learning_rate": 2.3730468750000002e-05, "loss": 0.08411026000976562, "step": 406 }, { "epoch": 0.05502509590522705, "grad_norm": 0.14519470930099487, "learning_rate": 2.37890625e-05, "loss": 0.059525489807128906, "step": 407 }, { "epoch": 0.055160292701062985, "grad_norm": 0.17120572924613953, "learning_rate": 2.384765625e-05, "loss": 0.06172466278076172, "step": 408 }, { "epoch": 0.05529548949689892, "grad_norm": 0.5239396095275879, "learning_rate": 2.3906250000000002e-05, "loss": 0.09552001953125, "step": 409 }, { "epoch": 0.055430686292734864, "grad_norm": 0.1637723594903946, "learning_rate": 2.396484375e-05, "loss": 0.059935569763183594, "step": 410 }, { "epoch": 0.0555658830885708, "grad_norm": 0.5600265860557556, "learning_rate": 2.40234375e-05, "loss": 0.09963130950927734, "step": 411 }, { "epoch": 0.05570107988440674, "grad_norm": 0.3125258982181549, "learning_rate": 2.408203125e-05, "loss": 0.08180046081542969, "step": 412 }, { "epoch": 0.05583627668024268, "grad_norm": 0.14712771773338318, "learning_rate": 2.4140625e-05, "loss": 0.1001739501953125, "step": 413 }, { "epoch": 0.055971473476078615, "grad_norm": 0.2574838399887085, "learning_rate": 2.419921875e-05, "loss": 0.07106208801269531, "step": 414 }, { "epoch": 0.05610667027191456, "grad_norm": 0.41405096650123596, "learning_rate": 2.42578125e-05, "loss": 0.0865478515625, "step": 415 }, { "epoch": 0.056241867067750494, "grad_norm": 0.08951804786920547, "learning_rate": 2.4316406250000002e-05, "loss": 0.06560754776000977, "step": 416 }, { "epoch": 0.05637706386358643, "grad_norm": 0.24774175882339478, "learning_rate": 2.4375e-05, "loss": 0.09889507293701172, "step": 417 }, { "epoch": 0.05651226065942237, "grad_norm": 0.42182087898254395, "learning_rate": 2.443359375e-05, "loss": 0.12457895278930664, "step": 418 }, { "epoch": 0.05664745745525831, "grad_norm": 0.21307718753814697, "learning_rate": 2.44921875e-05, "loss": 0.08582401275634766, "step": 419 }, { "epoch": 0.05678265425109425, "grad_norm": 0.32010015845298767, "learning_rate": 2.455078125e-05, "loss": 0.12688255310058594, "step": 420 }, { "epoch": 0.05691785104693019, "grad_norm": 0.2119021713733673, "learning_rate": 2.4609375e-05, "loss": 0.09719276428222656, "step": 421 }, { "epoch": 0.05705304784276612, "grad_norm": 0.20008584856987, "learning_rate": 2.466796875e-05, "loss": 0.08666133880615234, "step": 422 }, { "epoch": 0.057188244638602066, "grad_norm": 0.1773962676525116, "learning_rate": 2.4726562500000002e-05, "loss": 0.06006050109863281, "step": 423 }, { "epoch": 0.057323441434438, "grad_norm": 0.1694437563419342, "learning_rate": 2.478515625e-05, "loss": 0.07963371276855469, "step": 424 }, { "epoch": 0.057458638230273945, "grad_norm": 0.3612242341041565, "learning_rate": 2.484375e-05, "loss": 0.12116003036499023, "step": 425 }, { "epoch": 0.05759383502610988, "grad_norm": 0.18567639589309692, "learning_rate": 2.4902343750000002e-05, "loss": 0.09574651718139648, "step": 426 }, { "epoch": 0.05772903182194582, "grad_norm": 0.21031694114208221, "learning_rate": 2.49609375e-05, "loss": 0.10147476196289062, "step": 427 }, { "epoch": 0.05786422861778176, "grad_norm": 0.34142056107521057, "learning_rate": 2.501953125e-05, "loss": 0.09202718734741211, "step": 428 }, { "epoch": 0.057999425413617696, "grad_norm": 0.15303567051887512, "learning_rate": 2.5078125e-05, "loss": 0.06485652923583984, "step": 429 }, { "epoch": 0.05813462220945364, "grad_norm": 0.5797934532165527, "learning_rate": 2.513671875e-05, "loss": 0.13192272186279297, "step": 430 }, { "epoch": 0.058269819005289575, "grad_norm": 0.5082610845565796, "learning_rate": 2.51953125e-05, "loss": 0.053334712982177734, "step": 431 }, { "epoch": 0.05840501580112551, "grad_norm": 0.3315902352333069, "learning_rate": 2.525390625e-05, "loss": 0.09614801406860352, "step": 432 }, { "epoch": 0.058540212596961454, "grad_norm": 0.2403307408094406, "learning_rate": 2.5312500000000002e-05, "loss": 0.07897329330444336, "step": 433 }, { "epoch": 0.05867540939279739, "grad_norm": 0.6379460692405701, "learning_rate": 2.537109375e-05, "loss": 0.1441669464111328, "step": 434 }, { "epoch": 0.05881060618863333, "grad_norm": 0.46444088220596313, "learning_rate": 2.54296875e-05, "loss": 0.12706565856933594, "step": 435 }, { "epoch": 0.05894580298446927, "grad_norm": 0.6619452238082886, "learning_rate": 2.548828125e-05, "loss": 0.10677719116210938, "step": 436 }, { "epoch": 0.059080999780305204, "grad_norm": 0.3971615731716156, "learning_rate": 2.5546875e-05, "loss": 0.07642936706542969, "step": 437 }, { "epoch": 0.05921619657614115, "grad_norm": 0.3409765362739563, "learning_rate": 2.560546875e-05, "loss": 0.09028291702270508, "step": 438 }, { "epoch": 0.05935139337197708, "grad_norm": 0.4880635440349579, "learning_rate": 2.56640625e-05, "loss": 0.0882730484008789, "step": 439 }, { "epoch": 0.059486590167813026, "grad_norm": 0.2580116391181946, "learning_rate": 2.5722656250000002e-05, "loss": 0.11551690101623535, "step": 440 }, { "epoch": 0.05962178696364896, "grad_norm": 0.28127238154411316, "learning_rate": 2.578125e-05, "loss": 0.08358478546142578, "step": 441 }, { "epoch": 0.0597569837594849, "grad_norm": 0.36428654193878174, "learning_rate": 2.583984375e-05, "loss": 0.10225105285644531, "step": 442 }, { "epoch": 0.05989218055532084, "grad_norm": 0.15446828305721283, "learning_rate": 2.5898437500000002e-05, "loss": 0.07058906555175781, "step": 443 }, { "epoch": 0.06002737735115678, "grad_norm": 0.4826681911945343, "learning_rate": 2.595703125e-05, "loss": 0.1385936737060547, "step": 444 }, { "epoch": 0.06016257414699271, "grad_norm": 0.17592550814151764, "learning_rate": 2.6015625e-05, "loss": 0.0698246955871582, "step": 445 }, { "epoch": 0.060297770942828656, "grad_norm": 0.31240314245224, "learning_rate": 2.607421875e-05, "loss": 0.08679771423339844, "step": 446 }, { "epoch": 0.06043296773866459, "grad_norm": 0.6398367285728455, "learning_rate": 2.61328125e-05, "loss": 0.1202545166015625, "step": 447 }, { "epoch": 0.060568164534500535, "grad_norm": 0.3890048861503601, "learning_rate": 2.619140625e-05, "loss": 0.05002260208129883, "step": 448 }, { "epoch": 0.06070336133033647, "grad_norm": 0.46764808893203735, "learning_rate": 2.625e-05, "loss": 0.07270145416259766, "step": 449 }, { "epoch": 0.06083855812617241, "grad_norm": 0.3413294553756714, "learning_rate": 2.6308593750000002e-05, "loss": 0.08463382720947266, "step": 450 }, { "epoch": 0.06097375492200835, "grad_norm": 0.2559318244457245, "learning_rate": 2.63671875e-05, "loss": 0.12294769287109375, "step": 451 }, { "epoch": 0.061108951717844286, "grad_norm": 0.7449362874031067, "learning_rate": 2.642578125e-05, "loss": 0.07621002197265625, "step": 452 }, { "epoch": 0.06124414851368023, "grad_norm": 0.3746051788330078, "learning_rate": 2.6484375000000002e-05, "loss": 0.09185314178466797, "step": 453 }, { "epoch": 0.061379345309516165, "grad_norm": 0.5289967656135559, "learning_rate": 2.654296875e-05, "loss": 0.11911392211914062, "step": 454 }, { "epoch": 0.0615145421053521, "grad_norm": 0.22747808694839478, "learning_rate": 2.66015625e-05, "loss": 0.07048225402832031, "step": 455 }, { "epoch": 0.06164973890118804, "grad_norm": 0.42176374793052673, "learning_rate": 2.666015625e-05, "loss": 0.09965133666992188, "step": 456 }, { "epoch": 0.06178493569702398, "grad_norm": 0.21731840074062347, "learning_rate": 2.6718750000000002e-05, "loss": 0.08211088180541992, "step": 457 }, { "epoch": 0.06192013249285992, "grad_norm": 0.4568416476249695, "learning_rate": 2.677734375e-05, "loss": 0.1116476058959961, "step": 458 }, { "epoch": 0.06205532928869586, "grad_norm": 0.22233718633651733, "learning_rate": 2.68359375e-05, "loss": 0.05214977264404297, "step": 459 }, { "epoch": 0.062190526084531794, "grad_norm": 0.43788546323776245, "learning_rate": 2.6894531250000002e-05, "loss": 0.09244871139526367, "step": 460 }, { "epoch": 0.06232572288036774, "grad_norm": 0.5135902166366577, "learning_rate": 2.6953125e-05, "loss": 0.10364055633544922, "step": 461 }, { "epoch": 0.06246091967620367, "grad_norm": 0.2793516218662262, "learning_rate": 2.701171875e-05, "loss": 0.05248737335205078, "step": 462 }, { "epoch": 0.06259611647203961, "grad_norm": 0.406421422958374, "learning_rate": 2.70703125e-05, "loss": 0.08672618865966797, "step": 463 }, { "epoch": 0.06273131326787555, "grad_norm": 0.12528029084205627, "learning_rate": 2.712890625e-05, "loss": 0.029212474822998047, "step": 464 }, { "epoch": 0.0628665100637115, "grad_norm": 0.3743863105773926, "learning_rate": 2.71875e-05, "loss": 0.0905771255493164, "step": 465 }, { "epoch": 0.06300170685954742, "grad_norm": 0.4710961580276489, "learning_rate": 2.724609375e-05, "loss": 0.08863449096679688, "step": 466 }, { "epoch": 0.06313690365538337, "grad_norm": 0.2887037396430969, "learning_rate": 2.7304687500000002e-05, "loss": 0.07930278778076172, "step": 467 }, { "epoch": 0.06327210045121931, "grad_norm": 0.3721965551376343, "learning_rate": 2.736328125e-05, "loss": 0.07076358795166016, "step": 468 }, { "epoch": 0.06340729724705524, "grad_norm": 0.1717955321073532, "learning_rate": 2.7421875e-05, "loss": 0.07027316093444824, "step": 469 }, { "epoch": 0.06354249404289118, "grad_norm": 0.24996301531791687, "learning_rate": 2.7480468750000002e-05, "loss": 0.12811613082885742, "step": 470 }, { "epoch": 0.06367769083872712, "grad_norm": 0.30291178822517395, "learning_rate": 2.75390625e-05, "loss": 0.16044998168945312, "step": 471 }, { "epoch": 0.06381288763456307, "grad_norm": 0.2250363528728485, "learning_rate": 2.759765625e-05, "loss": 0.07297325134277344, "step": 472 }, { "epoch": 0.063948084430399, "grad_norm": 0.2488800436258316, "learning_rate": 2.765625e-05, "loss": 0.07302236557006836, "step": 473 }, { "epoch": 0.06408328122623494, "grad_norm": 0.14107242226600647, "learning_rate": 2.7714843750000002e-05, "loss": 0.06640434265136719, "step": 474 }, { "epoch": 0.06421847802207088, "grad_norm": 0.16244164109230042, "learning_rate": 2.77734375e-05, "loss": 0.07704639434814453, "step": 475 }, { "epoch": 0.06435367481790681, "grad_norm": 0.2833482623100281, "learning_rate": 2.783203125e-05, "loss": 0.10689687728881836, "step": 476 }, { "epoch": 0.06448887161374275, "grad_norm": 0.3703344166278839, "learning_rate": 2.7890625000000002e-05, "loss": 0.1478271484375, "step": 477 }, { "epoch": 0.0646240684095787, "grad_norm": 0.38901495933532715, "learning_rate": 2.794921875e-05, "loss": 0.09612178802490234, "step": 478 }, { "epoch": 0.06475926520541463, "grad_norm": 0.2891184389591217, "learning_rate": 2.80078125e-05, "loss": 0.06588220596313477, "step": 479 }, { "epoch": 0.06489446200125057, "grad_norm": 0.1334463655948639, "learning_rate": 2.806640625e-05, "loss": 0.07320594787597656, "step": 480 }, { "epoch": 0.06502965879708651, "grad_norm": 0.23207786679267883, "learning_rate": 2.8125e-05, "loss": 0.05840778350830078, "step": 481 }, { "epoch": 0.06516485559292244, "grad_norm": 0.4443608820438385, "learning_rate": 2.818359375e-05, "loss": 0.0740361213684082, "step": 482 }, { "epoch": 0.06530005238875838, "grad_norm": 0.3327508568763733, "learning_rate": 2.82421875e-05, "loss": 0.10940694808959961, "step": 483 }, { "epoch": 0.06543524918459433, "grad_norm": 0.32395318150520325, "learning_rate": 2.8300781250000002e-05, "loss": 0.08982467651367188, "step": 484 }, { "epoch": 0.06557044598043027, "grad_norm": 0.7124205231666565, "learning_rate": 2.8359375e-05, "loss": 0.121795654296875, "step": 485 }, { "epoch": 0.0657056427762662, "grad_norm": 0.521129310131073, "learning_rate": 2.841796875e-05, "loss": 0.09668445587158203, "step": 486 }, { "epoch": 0.06584083957210214, "grad_norm": 0.31626927852630615, "learning_rate": 2.8476562500000002e-05, "loss": 0.05739116668701172, "step": 487 }, { "epoch": 0.06597603636793808, "grad_norm": 0.17424233257770538, "learning_rate": 2.853515625e-05, "loss": 0.08448123931884766, "step": 488 }, { "epoch": 0.06611123316377401, "grad_norm": 0.22088785469532013, "learning_rate": 2.859375e-05, "loss": 0.08600044250488281, "step": 489 }, { "epoch": 0.06624642995960996, "grad_norm": 0.14359068870544434, "learning_rate": 2.865234375e-05, "loss": 0.06694495677947998, "step": 490 }, { "epoch": 0.0663816267554459, "grad_norm": 0.5119166970252991, "learning_rate": 2.87109375e-05, "loss": 0.13225746154785156, "step": 491 }, { "epoch": 0.06651682355128183, "grad_norm": 0.11170045286417007, "learning_rate": 2.876953125e-05, "loss": 0.0768272876739502, "step": 492 }, { "epoch": 0.06665202034711777, "grad_norm": 0.24550208449363708, "learning_rate": 2.8828125e-05, "loss": 0.09083175659179688, "step": 493 }, { "epoch": 0.06678721714295371, "grad_norm": 0.49827250838279724, "learning_rate": 2.8886718750000002e-05, "loss": 0.12553882598876953, "step": 494 }, { "epoch": 0.06692241393878966, "grad_norm": 0.1018979400396347, "learning_rate": 2.89453125e-05, "loss": 0.0836629867553711, "step": 495 }, { "epoch": 0.06705761073462559, "grad_norm": 0.7441481351852417, "learning_rate": 2.900390625e-05, "loss": 0.09560012817382812, "step": 496 }, { "epoch": 0.06719280753046153, "grad_norm": 0.16811202466487885, "learning_rate": 2.90625e-05, "loss": 0.0786895751953125, "step": 497 }, { "epoch": 0.06732800432629747, "grad_norm": 0.3336220979690552, "learning_rate": 2.912109375e-05, "loss": 0.08087158203125, "step": 498 }, { "epoch": 0.0674632011221334, "grad_norm": 0.32104796171188354, "learning_rate": 2.91796875e-05, "loss": 0.09322166442871094, "step": 499 }, { "epoch": 0.06759839791796934, "grad_norm": 0.2070852816104889, "learning_rate": 2.923828125e-05, "loss": 0.09871625900268555, "step": 500 }, { "epoch": 0.06773359471380529, "grad_norm": 0.215023472905159, "learning_rate": 2.9296875000000002e-05, "loss": 0.10586929321289062, "step": 501 }, { "epoch": 0.06786879150964122, "grad_norm": 0.4073386490345001, "learning_rate": 2.935546875e-05, "loss": 0.12042808532714844, "step": 502 }, { "epoch": 0.06800398830547716, "grad_norm": 0.16417402029037476, "learning_rate": 2.94140625e-05, "loss": 0.08030986785888672, "step": 503 }, { "epoch": 0.0681391851013131, "grad_norm": 0.18574321269989014, "learning_rate": 2.9472656250000002e-05, "loss": 0.10937690734863281, "step": 504 }, { "epoch": 0.06827438189714903, "grad_norm": 0.2531127333641052, "learning_rate": 2.953125e-05, "loss": 0.09542155265808105, "step": 505 }, { "epoch": 0.06840957869298497, "grad_norm": 0.2664662003517151, "learning_rate": 2.958984375e-05, "loss": 0.08902454376220703, "step": 506 }, { "epoch": 0.06854477548882092, "grad_norm": 0.25008097290992737, "learning_rate": 2.96484375e-05, "loss": 0.10931873321533203, "step": 507 }, { "epoch": 0.06867997228465686, "grad_norm": 0.38765203952789307, "learning_rate": 2.970703125e-05, "loss": 0.09329700469970703, "step": 508 }, { "epoch": 0.06881516908049279, "grad_norm": 0.26607415080070496, "learning_rate": 2.9765625e-05, "loss": 0.07357215881347656, "step": 509 }, { "epoch": 0.06895036587632873, "grad_norm": 0.17761532962322235, "learning_rate": 2.982421875e-05, "loss": 0.07666778564453125, "step": 510 }, { "epoch": 0.06908556267216467, "grad_norm": 0.26230624318122864, "learning_rate": 2.9882812500000002e-05, "loss": 0.10649490356445312, "step": 511 }, { "epoch": 0.0692207594680006, "grad_norm": 0.1139485239982605, "learning_rate": 2.994140625e-05, "loss": 0.07679367065429688, "step": 512 }, { "epoch": 0.06935595626383655, "grad_norm": 0.2930458188056946, "learning_rate": 3e-05, "loss": 0.0900421142578125, "step": 513 }, { "epoch": 0.06949115305967249, "grad_norm": 0.3094085454940796, "learning_rate": 2.9999998438460004e-05, "loss": 0.06854057312011719, "step": 514 }, { "epoch": 0.06962634985550842, "grad_norm": 0.1503412425518036, "learning_rate": 2.9999993753840344e-05, "loss": 0.08107662200927734, "step": 515 }, { "epoch": 0.06976154665134436, "grad_norm": 0.8429868817329407, "learning_rate": 2.9999985946141995e-05, "loss": 0.11447715759277344, "step": 516 }, { "epoch": 0.0698967434471803, "grad_norm": 0.34427618980407715, "learning_rate": 2.9999975015366586e-05, "loss": 0.12001323699951172, "step": 517 }, { "epoch": 0.07003194024301625, "grad_norm": 0.5228518843650818, "learning_rate": 2.9999960961516384e-05, "loss": 0.11302566528320312, "step": 518 }, { "epoch": 0.07016713703885218, "grad_norm": 0.2178574502468109, "learning_rate": 2.9999943784594325e-05, "loss": 0.08069896697998047, "step": 519 }, { "epoch": 0.07030233383468812, "grad_norm": 0.2845768332481384, "learning_rate": 2.9999923484603975e-05, "loss": 0.09895515441894531, "step": 520 }, { "epoch": 0.07043753063052406, "grad_norm": 0.13979288935661316, "learning_rate": 2.999990006154957e-05, "loss": 0.09073543548583984, "step": 521 }, { "epoch": 0.07057272742635999, "grad_norm": 0.14410565793514252, "learning_rate": 2.9999873515435977e-05, "loss": 0.0753030776977539, "step": 522 }, { "epoch": 0.07070792422219593, "grad_norm": 0.39464297890663147, "learning_rate": 2.9999843846268735e-05, "loss": 0.10400676727294922, "step": 523 }, { "epoch": 0.07084312101803188, "grad_norm": 0.2655804753303528, "learning_rate": 2.9999811054054018e-05, "loss": 0.06646251678466797, "step": 524 }, { "epoch": 0.0709783178138678, "grad_norm": 0.19320404529571533, "learning_rate": 2.9999775138798646e-05, "loss": 0.09278392791748047, "step": 525 }, { "epoch": 0.07111351460970375, "grad_norm": 0.11904240399599075, "learning_rate": 2.99997361005101e-05, "loss": 0.08670663833618164, "step": 526 }, { "epoch": 0.07124871140553969, "grad_norm": 0.3094334602355957, "learning_rate": 2.9999693939196513e-05, "loss": 0.0844879150390625, "step": 527 }, { "epoch": 0.07138390820137563, "grad_norm": 0.36753204464912415, "learning_rate": 2.999964865486666e-05, "loss": 0.07069790363311768, "step": 528 }, { "epoch": 0.07151910499721156, "grad_norm": 0.31463855504989624, "learning_rate": 2.999960024752997e-05, "loss": 0.12663936614990234, "step": 529 }, { "epoch": 0.0716543017930475, "grad_norm": 0.16702313721179962, "learning_rate": 2.9999548717196516e-05, "loss": 0.08238887786865234, "step": 530 }, { "epoch": 0.07178949858888345, "grad_norm": 0.11972741037607193, "learning_rate": 2.999949406387703e-05, "loss": 0.08971595764160156, "step": 531 }, { "epoch": 0.07192469538471938, "grad_norm": 0.2606874406337738, "learning_rate": 2.9999436287582903e-05, "loss": 0.10538005828857422, "step": 532 }, { "epoch": 0.07205989218055532, "grad_norm": 0.19562457501888275, "learning_rate": 2.9999375388326145e-05, "loss": 0.11307525634765625, "step": 533 }, { "epoch": 0.07219508897639126, "grad_norm": 0.1900082677602768, "learning_rate": 2.9999311366119447e-05, "loss": 0.06357097625732422, "step": 534 }, { "epoch": 0.07233028577222719, "grad_norm": 0.35355332493782043, "learning_rate": 2.9999244220976137e-05, "loss": 0.12403202056884766, "step": 535 }, { "epoch": 0.07246548256806314, "grad_norm": 0.1257934272289276, "learning_rate": 2.9999173952910197e-05, "loss": 0.0693352222442627, "step": 536 }, { "epoch": 0.07260067936389908, "grad_norm": 0.3972124755382538, "learning_rate": 2.9999100561936252e-05, "loss": 0.10654115676879883, "step": 537 }, { "epoch": 0.07273587615973501, "grad_norm": 0.21748265624046326, "learning_rate": 2.9999024048069585e-05, "loss": 0.08458232879638672, "step": 538 }, { "epoch": 0.07287107295557095, "grad_norm": 0.22351795434951782, "learning_rate": 2.9998944411326127e-05, "loss": 0.060929298400878906, "step": 539 }, { "epoch": 0.0730062697514069, "grad_norm": 0.3372923731803894, "learning_rate": 2.999886165172246e-05, "loss": 0.08164787292480469, "step": 540 }, { "epoch": 0.07314146654724284, "grad_norm": 0.1859789341688156, "learning_rate": 2.9998775769275814e-05, "loss": 0.10498714447021484, "step": 541 }, { "epoch": 0.07327666334307877, "grad_norm": 0.18278458714485168, "learning_rate": 2.9998686764004067e-05, "loss": 0.09410762786865234, "step": 542 }, { "epoch": 0.07341186013891471, "grad_norm": 0.2038678228855133, "learning_rate": 2.9998594635925755e-05, "loss": 0.08800554275512695, "step": 543 }, { "epoch": 0.07354705693475065, "grad_norm": 0.32873788475990295, "learning_rate": 2.999849938506005e-05, "loss": 0.08473777770996094, "step": 544 }, { "epoch": 0.07368225373058658, "grad_norm": 0.21102842688560486, "learning_rate": 2.99984010114268e-05, "loss": 0.07731151580810547, "step": 545 }, { "epoch": 0.07381745052642252, "grad_norm": 0.37139272689819336, "learning_rate": 2.9998299515046475e-05, "loss": 0.10659599304199219, "step": 546 }, { "epoch": 0.07395264732225847, "grad_norm": 0.4046103358268738, "learning_rate": 2.9998194895940213e-05, "loss": 0.08731555938720703, "step": 547 }, { "epoch": 0.0740878441180944, "grad_norm": 0.09351778030395508, "learning_rate": 2.9998087154129792e-05, "loss": 0.07852745056152344, "step": 548 }, { "epoch": 0.07422304091393034, "grad_norm": 0.3234085440635681, "learning_rate": 2.9997976289637645e-05, "loss": 0.0915689468383789, "step": 549 }, { "epoch": 0.07435823770976628, "grad_norm": 0.20921041071414948, "learning_rate": 2.9997862302486855e-05, "loss": 0.07576560974121094, "step": 550 }, { "epoch": 0.07449343450560222, "grad_norm": 0.3197173774242401, "learning_rate": 2.9997745192701153e-05, "loss": 0.08384895324707031, "step": 551 }, { "epoch": 0.07462863130143815, "grad_norm": 0.11288613080978394, "learning_rate": 2.9997624960304926e-05, "loss": 0.061016082763671875, "step": 552 }, { "epoch": 0.0747638280972741, "grad_norm": 0.08520299196243286, "learning_rate": 2.9997501605323214e-05, "loss": 0.06275606155395508, "step": 553 }, { "epoch": 0.07489902489311004, "grad_norm": 0.30760088562965393, "learning_rate": 2.999737512778168e-05, "loss": 0.07462167739868164, "step": 554 }, { "epoch": 0.07503422168894597, "grad_norm": 0.3377489447593689, "learning_rate": 2.9997245527706674e-05, "loss": 0.08373498916625977, "step": 555 }, { "epoch": 0.07516941848478191, "grad_norm": 0.37358325719833374, "learning_rate": 2.999711280512517e-05, "loss": 0.0783548355102539, "step": 556 }, { "epoch": 0.07530461528061785, "grad_norm": 0.2781330347061157, "learning_rate": 2.9996976960064807e-05, "loss": 0.07808208465576172, "step": 557 }, { "epoch": 0.07543981207645378, "grad_norm": 0.3915441036224365, "learning_rate": 2.999683799255387e-05, "loss": 0.08591938018798828, "step": 558 }, { "epoch": 0.07557500887228973, "grad_norm": 0.35622817277908325, "learning_rate": 2.999669590262129e-05, "loss": 0.047222137451171875, "step": 559 }, { "epoch": 0.07571020566812567, "grad_norm": 0.27966177463531494, "learning_rate": 2.999655069029665e-05, "loss": 0.10179281234741211, "step": 560 }, { "epoch": 0.0758454024639616, "grad_norm": 0.13936161994934082, "learning_rate": 2.9996402355610183e-05, "loss": 0.057047367095947266, "step": 561 }, { "epoch": 0.07598059925979754, "grad_norm": 0.11600785702466965, "learning_rate": 2.9996250898592777e-05, "loss": 0.07817554473876953, "step": 562 }, { "epoch": 0.07611579605563348, "grad_norm": 0.3167460560798645, "learning_rate": 2.9996096319275962e-05, "loss": 0.06467565894126892, "step": 563 }, { "epoch": 0.07625099285146943, "grad_norm": 0.40994006395339966, "learning_rate": 2.9995938617691925e-05, "loss": 0.12283134460449219, "step": 564 }, { "epoch": 0.07638618964730536, "grad_norm": 0.25212064385414124, "learning_rate": 2.9995777793873504e-05, "loss": 0.08774852752685547, "step": 565 }, { "epoch": 0.0765213864431413, "grad_norm": 0.21666939556598663, "learning_rate": 2.9995613847854176e-05, "loss": 0.06520652770996094, "step": 566 }, { "epoch": 0.07665658323897724, "grad_norm": 0.27924904227256775, "learning_rate": 2.9995446779668078e-05, "loss": 0.10445094108581543, "step": 567 }, { "epoch": 0.07679178003481317, "grad_norm": 0.141977921128273, "learning_rate": 2.9995276589349992e-05, "loss": 0.10594844818115234, "step": 568 }, { "epoch": 0.07692697683064911, "grad_norm": 0.06773504614830017, "learning_rate": 2.9995103276935357e-05, "loss": 0.0640711784362793, "step": 569 }, { "epoch": 0.07706217362648506, "grad_norm": 0.5213485956192017, "learning_rate": 2.9994926842460258e-05, "loss": 0.08524489402770996, "step": 570 }, { "epoch": 0.07719737042232099, "grad_norm": 0.10157053172588348, "learning_rate": 2.9994747285961428e-05, "loss": 0.06107282638549805, "step": 571 }, { "epoch": 0.07733256721815693, "grad_norm": 0.09224223345518112, "learning_rate": 2.9994564607476255e-05, "loss": 0.08637428283691406, "step": 572 }, { "epoch": 0.07746776401399287, "grad_norm": 0.3050849735736847, "learning_rate": 2.9994378807042762e-05, "loss": 0.1074533462524414, "step": 573 }, { "epoch": 0.07760296080982881, "grad_norm": 0.22146910429000854, "learning_rate": 2.9994189884699647e-05, "loss": 0.1300792694091797, "step": 574 }, { "epoch": 0.07773815760566474, "grad_norm": 0.07144106179475784, "learning_rate": 2.9993997840486233e-05, "loss": 0.04799842834472656, "step": 575 }, { "epoch": 0.07787335440150069, "grad_norm": 0.32296156883239746, "learning_rate": 2.9993802674442516e-05, "loss": 0.07802867889404297, "step": 576 }, { "epoch": 0.07800855119733663, "grad_norm": 0.11064080893993378, "learning_rate": 2.999360438660913e-05, "loss": 0.06294059753417969, "step": 577 }, { "epoch": 0.07814374799317256, "grad_norm": 0.2089330106973648, "learning_rate": 2.9993402977027346e-05, "loss": 0.054758548736572266, "step": 578 }, { "epoch": 0.0782789447890085, "grad_norm": 0.13595914840698242, "learning_rate": 2.999319844573911e-05, "loss": 0.042527198791503906, "step": 579 }, { "epoch": 0.07841414158484444, "grad_norm": 0.3079105019569397, "learning_rate": 2.9992990792787007e-05, "loss": 0.07813072204589844, "step": 580 }, { "epoch": 0.07854933838068037, "grad_norm": 0.25272977352142334, "learning_rate": 2.999278001821427e-05, "loss": 0.11185455322265625, "step": 581 }, { "epoch": 0.07868453517651632, "grad_norm": 0.26442980766296387, "learning_rate": 2.9992566122064775e-05, "loss": 0.09828448295593262, "step": 582 }, { "epoch": 0.07881973197235226, "grad_norm": 0.2732599377632141, "learning_rate": 2.999234910438307e-05, "loss": 0.07195329666137695, "step": 583 }, { "epoch": 0.07895492876818819, "grad_norm": 0.05204978585243225, "learning_rate": 2.999212896521433e-05, "loss": 0.039618730545043945, "step": 584 }, { "epoch": 0.07909012556402413, "grad_norm": 0.12640728056430817, "learning_rate": 2.999190570460439e-05, "loss": 0.05958151817321777, "step": 585 }, { "epoch": 0.07922532235986007, "grad_norm": 0.32790225744247437, "learning_rate": 2.9991679322599734e-05, "loss": 0.06865406036376953, "step": 586 }, { "epoch": 0.07936051915569602, "grad_norm": 0.15171629190444946, "learning_rate": 2.9991449819247505e-05, "loss": 0.04877662658691406, "step": 587 }, { "epoch": 0.07949571595153195, "grad_norm": 0.10906587541103363, "learning_rate": 2.9991217194595474e-05, "loss": 0.06815218925476074, "step": 588 }, { "epoch": 0.07963091274736789, "grad_norm": 0.2120455801486969, "learning_rate": 2.9990981448692078e-05, "loss": 0.0928192138671875, "step": 589 }, { "epoch": 0.07976610954320383, "grad_norm": 0.1074172630906105, "learning_rate": 2.999074258158641e-05, "loss": 0.09325790405273438, "step": 590 }, { "epoch": 0.07990130633903976, "grad_norm": 0.19498956203460693, "learning_rate": 2.9990500593328192e-05, "loss": 0.05649304389953613, "step": 591 }, { "epoch": 0.0800365031348757, "grad_norm": 0.14760342240333557, "learning_rate": 2.999025548396781e-05, "loss": 0.10672998428344727, "step": 592 }, { "epoch": 0.08017169993071165, "grad_norm": 0.21737149357795715, "learning_rate": 2.9990007253556302e-05, "loss": 0.07566475868225098, "step": 593 }, { "epoch": 0.08030689672654757, "grad_norm": 0.2974933385848999, "learning_rate": 2.9989755902145345e-05, "loss": 0.11906719207763672, "step": 594 }, { "epoch": 0.08044209352238352, "grad_norm": 0.11337985843420029, "learning_rate": 2.9989501429787273e-05, "loss": 0.06615829467773438, "step": 595 }, { "epoch": 0.08057729031821946, "grad_norm": 0.21340131759643555, "learning_rate": 2.9989243836535073e-05, "loss": 0.06207704544067383, "step": 596 }, { "epoch": 0.0807124871140554, "grad_norm": 0.2859881818294525, "learning_rate": 2.998898312244237e-05, "loss": 0.07950067520141602, "step": 597 }, { "epoch": 0.08084768390989133, "grad_norm": 0.16309234499931335, "learning_rate": 2.9988719287563452e-05, "loss": 0.0576624870300293, "step": 598 }, { "epoch": 0.08098288070572728, "grad_norm": 0.10070134699344635, "learning_rate": 2.998845233195325e-05, "loss": 0.08256387710571289, "step": 599 }, { "epoch": 0.08111807750156322, "grad_norm": 0.15067686140537262, "learning_rate": 2.998818225566734e-05, "loss": 0.09409618377685547, "step": 600 }, { "epoch": 0.08125327429739915, "grad_norm": 0.10668405145406723, "learning_rate": 2.998790905876196e-05, "loss": 0.11132514476776123, "step": 601 }, { "epoch": 0.08138847109323509, "grad_norm": 0.16517981886863708, "learning_rate": 2.9987632741293987e-05, "loss": 0.07035708427429199, "step": 602 }, { "epoch": 0.08152366788907103, "grad_norm": 0.09393040090799332, "learning_rate": 2.998735330332096e-05, "loss": 0.07950484752655029, "step": 603 }, { "epoch": 0.08165886468490696, "grad_norm": 0.12008967995643616, "learning_rate": 2.9987070744901046e-05, "loss": 0.06570839881896973, "step": 604 }, { "epoch": 0.0817940614807429, "grad_norm": 0.2272833287715912, "learning_rate": 2.9986785066093084e-05, "loss": 0.09181094169616699, "step": 605 }, { "epoch": 0.08192925827657885, "grad_norm": 0.25803667306900024, "learning_rate": 2.9986496266956556e-05, "loss": 0.09886741638183594, "step": 606 }, { "epoch": 0.08206445507241478, "grad_norm": 0.4905322790145874, "learning_rate": 2.9986204347551583e-05, "loss": 0.12009239196777344, "step": 607 }, { "epoch": 0.08219965186825072, "grad_norm": 0.0775560736656189, "learning_rate": 2.9985909307938948e-05, "loss": 0.06000089645385742, "step": 608 }, { "epoch": 0.08233484866408666, "grad_norm": 0.2988452613353729, "learning_rate": 2.9985611148180082e-05, "loss": 0.08237457275390625, "step": 609 }, { "epoch": 0.0824700454599226, "grad_norm": 0.31842100620269775, "learning_rate": 2.9985309868337063e-05, "loss": 0.0988306999206543, "step": 610 }, { "epoch": 0.08260524225575853, "grad_norm": 0.1052919328212738, "learning_rate": 2.9985005468472617e-05, "loss": 0.030002593994140625, "step": 611 }, { "epoch": 0.08274043905159448, "grad_norm": 0.28633493185043335, "learning_rate": 2.9984697948650124e-05, "loss": 0.10738992691040039, "step": 612 }, { "epoch": 0.08287563584743042, "grad_norm": 0.28117629885673523, "learning_rate": 2.998438730893361e-05, "loss": 0.08930015563964844, "step": 613 }, { "epoch": 0.08301083264326635, "grad_norm": 0.1861264705657959, "learning_rate": 2.9984073549387747e-05, "loss": 0.05366230010986328, "step": 614 }, { "epoch": 0.08314602943910229, "grad_norm": 0.41998764872550964, "learning_rate": 2.998375667007787e-05, "loss": 0.07553625106811523, "step": 615 }, { "epoch": 0.08328122623493824, "grad_norm": 0.19615380465984344, "learning_rate": 2.998343667106995e-05, "loss": 0.06767797470092773, "step": 616 }, { "epoch": 0.08341642303077416, "grad_norm": 0.18898122012615204, "learning_rate": 2.9983113552430616e-05, "loss": 0.0434575080871582, "step": 617 }, { "epoch": 0.08355161982661011, "grad_norm": 0.45470932126045227, "learning_rate": 2.9982787314227134e-05, "loss": 0.07708358764648438, "step": 618 }, { "epoch": 0.08368681662244605, "grad_norm": 0.22009018063545227, "learning_rate": 2.998245795652744e-05, "loss": 0.09606266021728516, "step": 619 }, { "epoch": 0.083822013418282, "grad_norm": 0.18823456764221191, "learning_rate": 2.9982125479400106e-05, "loss": 0.06135225296020508, "step": 620 }, { "epoch": 0.08395721021411792, "grad_norm": 0.5377417802810669, "learning_rate": 2.9981789882914352e-05, "loss": 0.08295488357543945, "step": 621 }, { "epoch": 0.08409240700995387, "grad_norm": 0.4763088822364807, "learning_rate": 2.9981451167140048e-05, "loss": 0.0911707878112793, "step": 622 }, { "epoch": 0.08422760380578981, "grad_norm": 0.22370657324790955, "learning_rate": 2.9981109332147722e-05, "loss": 0.08115291595458984, "step": 623 }, { "epoch": 0.08436280060162574, "grad_norm": 0.2826088070869446, "learning_rate": 2.9980764378008545e-05, "loss": 0.06379127502441406, "step": 624 }, { "epoch": 0.08449799739746168, "grad_norm": 0.9952225089073181, "learning_rate": 2.9980416304794332e-05, "loss": 0.11029529571533203, "step": 625 }, { "epoch": 0.08463319419329762, "grad_norm": 0.18860769271850586, "learning_rate": 2.9980065112577565e-05, "loss": 0.099334716796875, "step": 626 }, { "epoch": 0.08476839098913355, "grad_norm": 0.28185638785362244, "learning_rate": 2.9979710801431357e-05, "loss": 0.08804464340209961, "step": 627 }, { "epoch": 0.0849035877849695, "grad_norm": 0.14749231934547424, "learning_rate": 2.997935337142948e-05, "loss": 0.05631279945373535, "step": 628 }, { "epoch": 0.08503878458080544, "grad_norm": 0.4178594946861267, "learning_rate": 2.9978992822646347e-05, "loss": 0.06901311874389648, "step": 629 }, { "epoch": 0.08517398137664137, "grad_norm": 0.1803293079137802, "learning_rate": 2.9978629155157036e-05, "loss": 0.08359909057617188, "step": 630 }, { "epoch": 0.08530917817247731, "grad_norm": 0.19294004142284393, "learning_rate": 2.9978262369037252e-05, "loss": 0.07879948616027832, "step": 631 }, { "epoch": 0.08544437496831325, "grad_norm": 0.1940276175737381, "learning_rate": 2.9977892464363375e-05, "loss": 0.05239057540893555, "step": 632 }, { "epoch": 0.0855795717641492, "grad_norm": 0.2245698869228363, "learning_rate": 2.9977519441212412e-05, "loss": 0.09803438186645508, "step": 633 }, { "epoch": 0.08571476855998512, "grad_norm": 0.23050887882709503, "learning_rate": 2.9977143299662034e-05, "loss": 0.08642864227294922, "step": 634 }, { "epoch": 0.08584996535582107, "grad_norm": 0.09768661856651306, "learning_rate": 2.997676403979055e-05, "loss": 0.03159141540527344, "step": 635 }, { "epoch": 0.08598516215165701, "grad_norm": 0.09331204742193222, "learning_rate": 2.997638166167693e-05, "loss": 0.07317638397216797, "step": 636 }, { "epoch": 0.08612035894749294, "grad_norm": 0.06804801523685455, "learning_rate": 2.9975996165400786e-05, "loss": 0.044899582862854004, "step": 637 }, { "epoch": 0.08625555574332888, "grad_norm": 0.3908451199531555, "learning_rate": 2.9975607551042373e-05, "loss": 0.0752716064453125, "step": 638 }, { "epoch": 0.08639075253916483, "grad_norm": 0.12349463999271393, "learning_rate": 2.9975215818682607e-05, "loss": 0.07203435897827148, "step": 639 }, { "epoch": 0.08652594933500075, "grad_norm": 0.21345306932926178, "learning_rate": 2.9974820968403056e-05, "loss": 0.08788847923278809, "step": 640 }, { "epoch": 0.0866611461308367, "grad_norm": 0.1577584445476532, "learning_rate": 2.9974423000285923e-05, "loss": 0.07614803314208984, "step": 641 }, { "epoch": 0.08679634292667264, "grad_norm": 0.09714977443218231, "learning_rate": 2.9974021914414068e-05, "loss": 0.04539823532104492, "step": 642 }, { "epoch": 0.08693153972250858, "grad_norm": 0.11560861021280289, "learning_rate": 2.9973617710871e-05, "loss": 0.0796351432800293, "step": 643 }, { "epoch": 0.08706673651834451, "grad_norm": 0.21095207333564758, "learning_rate": 2.997321038974087e-05, "loss": 0.07535362243652344, "step": 644 }, { "epoch": 0.08720193331418045, "grad_norm": 0.13098455965518951, "learning_rate": 2.997279995110849e-05, "loss": 0.08330440521240234, "step": 645 }, { "epoch": 0.0873371301100164, "grad_norm": 0.2241521030664444, "learning_rate": 2.997238639505932e-05, "loss": 0.05736207962036133, "step": 646 }, { "epoch": 0.08747232690585233, "grad_norm": 0.08733268082141876, "learning_rate": 2.997196972167946e-05, "loss": 0.07244205474853516, "step": 647 }, { "epoch": 0.08760752370168827, "grad_norm": 0.19124197959899902, "learning_rate": 2.9971549931055665e-05, "loss": 0.09659004211425781, "step": 648 }, { "epoch": 0.08774272049752421, "grad_norm": 0.3027733266353607, "learning_rate": 2.997112702327533e-05, "loss": 0.11511707305908203, "step": 649 }, { "epoch": 0.08787791729336014, "grad_norm": 0.07912140339612961, "learning_rate": 2.9970700998426518e-05, "loss": 0.05508708953857422, "step": 650 }, { "epoch": 0.08801311408919608, "grad_norm": 0.27462825179100037, "learning_rate": 2.9970271856597925e-05, "loss": 0.08562397956848145, "step": 651 }, { "epoch": 0.08814831088503203, "grad_norm": 0.1728072464466095, "learning_rate": 2.9969839597878896e-05, "loss": 0.09009361267089844, "step": 652 }, { "epoch": 0.08828350768086797, "grad_norm": 0.08717627823352814, "learning_rate": 2.9969404222359436e-05, "loss": 0.05742001533508301, "step": 653 }, { "epoch": 0.0884187044767039, "grad_norm": 0.228158637881279, "learning_rate": 2.9968965730130188e-05, "loss": 0.07795238494873047, "step": 654 }, { "epoch": 0.08855390127253984, "grad_norm": 0.11197508126497269, "learning_rate": 2.9968524121282455e-05, "loss": 0.06445789337158203, "step": 655 }, { "epoch": 0.08868909806837579, "grad_norm": 0.2086934596300125, "learning_rate": 2.9968079395908178e-05, "loss": 0.10188674926757812, "step": 656 }, { "epoch": 0.08882429486421171, "grad_norm": 0.22176457941532135, "learning_rate": 2.9967631554099947e-05, "loss": 0.1122288703918457, "step": 657 }, { "epoch": 0.08895949166004766, "grad_norm": 0.1395748257637024, "learning_rate": 2.996718059595101e-05, "loss": 0.0919804573059082, "step": 658 }, { "epoch": 0.0890946884558836, "grad_norm": 0.20750685036182404, "learning_rate": 2.9966726521555265e-05, "loss": 0.07076454162597656, "step": 659 }, { "epoch": 0.08922988525171953, "grad_norm": 0.1286686807870865, "learning_rate": 2.996626933100724e-05, "loss": 0.052811622619628906, "step": 660 }, { "epoch": 0.08936508204755547, "grad_norm": 0.44623395800590515, "learning_rate": 2.996580902440213e-05, "loss": 0.09715652465820312, "step": 661 }, { "epoch": 0.08950027884339141, "grad_norm": 0.16575433313846588, "learning_rate": 2.9965345601835773e-05, "loss": 0.08658599853515625, "step": 662 }, { "epoch": 0.08963547563922734, "grad_norm": 0.29066911339759827, "learning_rate": 2.996487906340466e-05, "loss": 0.07785797119140625, "step": 663 }, { "epoch": 0.08977067243506329, "grad_norm": 0.2852063477039337, "learning_rate": 2.996440940920592e-05, "loss": 0.08462285995483398, "step": 664 }, { "epoch": 0.08990586923089923, "grad_norm": 0.1689836084842682, "learning_rate": 2.996393663933735e-05, "loss": 0.06775045394897461, "step": 665 }, { "epoch": 0.09004106602673517, "grad_norm": 0.12417086213827133, "learning_rate": 2.9963460753897364e-05, "loss": 0.05731785297393799, "step": 666 }, { "epoch": 0.0901762628225711, "grad_norm": 0.244721457362175, "learning_rate": 2.996298175298506e-05, "loss": 0.09905529022216797, "step": 667 }, { "epoch": 0.09031145961840704, "grad_norm": 0.18479038774967194, "learning_rate": 2.996249963670016e-05, "loss": 0.0888524055480957, "step": 668 }, { "epoch": 0.09044665641424299, "grad_norm": 0.10865657031536102, "learning_rate": 2.9962014405143042e-05, "loss": 0.06245851516723633, "step": 669 }, { "epoch": 0.09058185321007892, "grad_norm": 0.27855703234672546, "learning_rate": 2.9961526058414745e-05, "loss": 0.11337447166442871, "step": 670 }, { "epoch": 0.09071705000591486, "grad_norm": 0.10137587040662766, "learning_rate": 2.9961034596616936e-05, "loss": 0.06521320343017578, "step": 671 }, { "epoch": 0.0908522468017508, "grad_norm": 0.24208283424377441, "learning_rate": 2.996054001985194e-05, "loss": 0.095703125, "step": 672 }, { "epoch": 0.09098744359758673, "grad_norm": 0.08551014214754105, "learning_rate": 2.9960042328222732e-05, "loss": 0.06468367576599121, "step": 673 }, { "epoch": 0.09112264039342267, "grad_norm": 0.21869981288909912, "learning_rate": 2.995954152183294e-05, "loss": 0.05954790115356445, "step": 674 }, { "epoch": 0.09125783718925862, "grad_norm": 0.31491169333457947, "learning_rate": 2.9959037600786822e-05, "loss": 0.06922221183776855, "step": 675 }, { "epoch": 0.09139303398509456, "grad_norm": 0.28219854831695557, "learning_rate": 2.9958530565189307e-05, "loss": 0.08938407897949219, "step": 676 }, { "epoch": 0.09152823078093049, "grad_norm": 0.09948960691690445, "learning_rate": 2.995802041514596e-05, "loss": 0.056900978088378906, "step": 677 }, { "epoch": 0.09166342757676643, "grad_norm": 0.2522660195827484, "learning_rate": 2.9957507150762996e-05, "loss": 0.047379493713378906, "step": 678 }, { "epoch": 0.09179862437260237, "grad_norm": 0.2659372389316559, "learning_rate": 2.9956990772147283e-05, "loss": 0.06755733489990234, "step": 679 }, { "epoch": 0.0919338211684383, "grad_norm": 0.10989851504564285, "learning_rate": 2.9956471279406324e-05, "loss": 0.060033321380615234, "step": 680 }, { "epoch": 0.09206901796427425, "grad_norm": 0.09935111552476883, "learning_rate": 2.9955948672648298e-05, "loss": 0.07506513595581055, "step": 681 }, { "epoch": 0.09220421476011019, "grad_norm": 0.1358737349510193, "learning_rate": 2.9955422951981994e-05, "loss": 0.08691072463989258, "step": 682 }, { "epoch": 0.09233941155594612, "grad_norm": 0.3513760566711426, "learning_rate": 2.995489411751688e-05, "loss": 0.07443714141845703, "step": 683 }, { "epoch": 0.09247460835178206, "grad_norm": 0.09077506512403488, "learning_rate": 2.9954362169363064e-05, "loss": 0.05679333209991455, "step": 684 }, { "epoch": 0.092609805147618, "grad_norm": 0.21559298038482666, "learning_rate": 2.99538271076313e-05, "loss": 0.10883903503417969, "step": 685 }, { "epoch": 0.09274500194345393, "grad_norm": 0.15053609013557434, "learning_rate": 2.9953288932432985e-05, "loss": 0.06873083114624023, "step": 686 }, { "epoch": 0.09288019873928988, "grad_norm": 0.10975361615419388, "learning_rate": 2.995274764388018e-05, "loss": 0.05933570861816406, "step": 687 }, { "epoch": 0.09301539553512582, "grad_norm": 0.1789533495903015, "learning_rate": 2.9952203242085566e-05, "loss": 0.08390617370605469, "step": 688 }, { "epoch": 0.09315059233096176, "grad_norm": 0.07746067643165588, "learning_rate": 2.995165572716251e-05, "loss": 0.049941062927246094, "step": 689 }, { "epoch": 0.09328578912679769, "grad_norm": 0.24971401691436768, "learning_rate": 2.9951105099225003e-05, "loss": 0.07831621170043945, "step": 690 }, { "epoch": 0.09342098592263363, "grad_norm": 0.36833053827285767, "learning_rate": 2.995055135838768e-05, "loss": 0.0910797119140625, "step": 691 }, { "epoch": 0.09355618271846958, "grad_norm": 0.10221574455499649, "learning_rate": 2.994999450476584e-05, "loss": 0.05471158027648926, "step": 692 }, { "epoch": 0.0936913795143055, "grad_norm": 0.1148780956864357, "learning_rate": 2.9949434538475414e-05, "loss": 0.06614893674850464, "step": 693 }, { "epoch": 0.09382657631014145, "grad_norm": 0.2983362078666687, "learning_rate": 2.9948871459633008e-05, "loss": 0.07979011535644531, "step": 694 }, { "epoch": 0.09396177310597739, "grad_norm": 0.2829235792160034, "learning_rate": 2.994830526835584e-05, "loss": 0.08431506156921387, "step": 695 }, { "epoch": 0.09409696990181332, "grad_norm": 0.13727952539920807, "learning_rate": 2.9947735964761803e-05, "loss": 0.05151629447937012, "step": 696 }, { "epoch": 0.09423216669764926, "grad_norm": 0.6994669437408447, "learning_rate": 2.9947163548969428e-05, "loss": 0.1470623016357422, "step": 697 }, { "epoch": 0.0943673634934852, "grad_norm": 0.1264650821685791, "learning_rate": 2.9946588021097893e-05, "loss": 0.06728172302246094, "step": 698 }, { "epoch": 0.09450256028932115, "grad_norm": 0.15813982486724854, "learning_rate": 2.9946009381267028e-05, "loss": 0.06923913955688477, "step": 699 }, { "epoch": 0.09463775708515708, "grad_norm": 0.3901388943195343, "learning_rate": 2.9945427629597306e-05, "loss": 0.08095240592956543, "step": 700 }, { "epoch": 0.09477295388099302, "grad_norm": 0.7803894877433777, "learning_rate": 2.9944842766209853e-05, "loss": 0.05688786506652832, "step": 701 }, { "epoch": 0.09490815067682896, "grad_norm": 0.5690038800239563, "learning_rate": 2.9944254791226444e-05, "loss": 0.10587501525878906, "step": 702 }, { "epoch": 0.0950433474726649, "grad_norm": 0.4294602870941162, "learning_rate": 2.994366370476949e-05, "loss": 0.10778427124023438, "step": 703 }, { "epoch": 0.09517854426850084, "grad_norm": 0.4204708933830261, "learning_rate": 2.9943069506962067e-05, "loss": 0.09755897521972656, "step": 704 }, { "epoch": 0.09531374106433678, "grad_norm": 0.46001702547073364, "learning_rate": 2.9942472197927886e-05, "loss": 0.06331729888916016, "step": 705 }, { "epoch": 0.09544893786017271, "grad_norm": 0.5181862711906433, "learning_rate": 2.994187177779131e-05, "loss": 0.11969566345214844, "step": 706 }, { "epoch": 0.09558413465600865, "grad_norm": 0.12053836137056351, "learning_rate": 2.9941268246677353e-05, "loss": 0.0626363754272461, "step": 707 }, { "epoch": 0.0957193314518446, "grad_norm": 0.15417523682117462, "learning_rate": 2.9940661604711664e-05, "loss": 0.07751274108886719, "step": 708 }, { "epoch": 0.09585452824768052, "grad_norm": 0.47470811009407043, "learning_rate": 2.994005185202056e-05, "loss": 0.12705039978027344, "step": 709 }, { "epoch": 0.09598972504351647, "grad_norm": 0.27987006306648254, "learning_rate": 2.9939438988730986e-05, "loss": 0.07154560089111328, "step": 710 }, { "epoch": 0.09612492183935241, "grad_norm": 0.15930283069610596, "learning_rate": 2.9938823014970553e-05, "loss": 0.10855388641357422, "step": 711 }, { "epoch": 0.09626011863518835, "grad_norm": 0.257501482963562, "learning_rate": 2.99382039308675e-05, "loss": 0.05958271026611328, "step": 712 }, { "epoch": 0.09639531543102428, "grad_norm": 0.7364037036895752, "learning_rate": 2.993758173655073e-05, "loss": 0.12431597709655762, "step": 713 }, { "epoch": 0.09653051222686022, "grad_norm": 0.17351634800434113, "learning_rate": 2.993695643214979e-05, "loss": 0.0854029655456543, "step": 714 }, { "epoch": 0.09666570902269617, "grad_norm": 0.298469603061676, "learning_rate": 2.9936328017794864e-05, "loss": 0.07146835327148438, "step": 715 }, { "epoch": 0.0968009058185321, "grad_norm": 0.3269720673561096, "learning_rate": 2.9935696493616796e-05, "loss": 0.08095455169677734, "step": 716 }, { "epoch": 0.09693610261436804, "grad_norm": 1.599771499633789, "learning_rate": 2.9935061859747065e-05, "loss": 0.0824437141418457, "step": 717 }, { "epoch": 0.09707129941020398, "grad_norm": 0.1393277794122696, "learning_rate": 2.993442411631782e-05, "loss": 0.0891885757446289, "step": 718 }, { "epoch": 0.09720649620603991, "grad_norm": 0.0796637013554573, "learning_rate": 2.9933783263461827e-05, "loss": 0.09085512161254883, "step": 719 }, { "epoch": 0.09734169300187585, "grad_norm": 0.3463320732116699, "learning_rate": 2.9933139301312526e-05, "loss": 0.09558963775634766, "step": 720 }, { "epoch": 0.0974768897977118, "grad_norm": 0.29950204491615295, "learning_rate": 2.9932492230003984e-05, "loss": 0.061611175537109375, "step": 721 }, { "epoch": 0.09761208659354774, "grad_norm": 0.29633015394210815, "learning_rate": 2.993184204967094e-05, "loss": 0.08249521255493164, "step": 722 }, { "epoch": 0.09774728338938367, "grad_norm": 0.4554542005062103, "learning_rate": 2.9931188760448748e-05, "loss": 0.10138177871704102, "step": 723 }, { "epoch": 0.09788248018521961, "grad_norm": 0.06369206309318542, "learning_rate": 2.9930532362473433e-05, "loss": 0.056447505950927734, "step": 724 }, { "epoch": 0.09801767698105555, "grad_norm": 0.26517704129219055, "learning_rate": 2.9929872855881663e-05, "loss": 0.08902454376220703, "step": 725 }, { "epoch": 0.09815287377689148, "grad_norm": 0.1894245445728302, "learning_rate": 2.9929210240810744e-05, "loss": 0.051939964294433594, "step": 726 }, { "epoch": 0.09828807057272743, "grad_norm": 0.10839513689279556, "learning_rate": 2.9928544517398644e-05, "loss": 0.06834030151367188, "step": 727 }, { "epoch": 0.09842326736856337, "grad_norm": 0.13607367873191833, "learning_rate": 2.9927875685783966e-05, "loss": 0.0683584213256836, "step": 728 }, { "epoch": 0.0985584641643993, "grad_norm": 0.1251877248287201, "learning_rate": 2.9927203746105968e-05, "loss": 0.08129453659057617, "step": 729 }, { "epoch": 0.09869366096023524, "grad_norm": 0.24035900831222534, "learning_rate": 2.9926528698504546e-05, "loss": 0.09148168563842773, "step": 730 }, { "epoch": 0.09882885775607118, "grad_norm": 0.3103516399860382, "learning_rate": 2.992585054312025e-05, "loss": 0.0979318618774414, "step": 731 }, { "epoch": 0.09896405455190711, "grad_norm": 0.23814600706100464, "learning_rate": 2.9925169280094278e-05, "loss": 0.11593031883239746, "step": 732 }, { "epoch": 0.09909925134774306, "grad_norm": 0.20139487087726593, "learning_rate": 2.9924484909568472e-05, "loss": 0.07658267021179199, "step": 733 }, { "epoch": 0.099234448143579, "grad_norm": 0.13799434900283813, "learning_rate": 2.9923797431685322e-05, "loss": 0.08754158020019531, "step": 734 }, { "epoch": 0.09936964493941494, "grad_norm": 0.16388531029224396, "learning_rate": 2.992310684658796e-05, "loss": 0.08730697631835938, "step": 735 }, { "epoch": 0.09950484173525087, "grad_norm": 0.21970367431640625, "learning_rate": 2.9922413154420173e-05, "loss": 0.07097339630126953, "step": 736 }, { "epoch": 0.09964003853108681, "grad_norm": 0.3876591622829437, "learning_rate": 2.9921716355326393e-05, "loss": 0.10553598403930664, "step": 737 }, { "epoch": 0.09977523532692276, "grad_norm": 0.2511739730834961, "learning_rate": 2.9921016449451695e-05, "loss": 0.0774528980255127, "step": 738 }, { "epoch": 0.09991043212275869, "grad_norm": 0.3090797960758209, "learning_rate": 2.9920313436941805e-05, "loss": 0.06734907627105713, "step": 739 }, { "epoch": 0.10004562891859463, "grad_norm": 0.16804896295070648, "learning_rate": 2.991960731794309e-05, "loss": 0.07374244928359985, "step": 740 }, { "epoch": 0.10018082571443057, "grad_norm": 0.12314272671937943, "learning_rate": 2.991889809260257e-05, "loss": 0.03489363193511963, "step": 741 }, { "epoch": 0.1003160225102665, "grad_norm": 0.15799863636493683, "learning_rate": 2.9918185761067912e-05, "loss": 0.0667719841003418, "step": 742 }, { "epoch": 0.10045121930610244, "grad_norm": 0.12534253299236298, "learning_rate": 2.9917470323487423e-05, "loss": 0.05903005599975586, "step": 743 }, { "epoch": 0.10058641610193839, "grad_norm": 0.10759688913822174, "learning_rate": 2.9916751780010063e-05, "loss": 0.0698390007019043, "step": 744 }, { "epoch": 0.10072161289777433, "grad_norm": 0.12864547967910767, "learning_rate": 2.9916030130785436e-05, "loss": 0.09099912643432617, "step": 745 }, { "epoch": 0.10085680969361026, "grad_norm": 0.20595256984233856, "learning_rate": 2.99153053759638e-05, "loss": 0.08784770965576172, "step": 746 }, { "epoch": 0.1009920064894462, "grad_norm": 0.17962120473384857, "learning_rate": 2.991457751569604e-05, "loss": 0.09574031829833984, "step": 747 }, { "epoch": 0.10112720328528214, "grad_norm": 0.17928659915924072, "learning_rate": 2.991384655013371e-05, "loss": 0.09390830993652344, "step": 748 }, { "epoch": 0.10126240008111807, "grad_norm": 0.3044493794441223, "learning_rate": 2.9913112479429e-05, "loss": 0.09512901306152344, "step": 749 }, { "epoch": 0.10139759687695402, "grad_norm": 0.19055142998695374, "learning_rate": 2.991237530373474e-05, "loss": 0.09835147857666016, "step": 750 }, { "epoch": 0.10153279367278996, "grad_norm": 0.08671168237924576, "learning_rate": 2.9911635023204423e-05, "loss": 0.05415821075439453, "step": 751 }, { "epoch": 0.10166799046862589, "grad_norm": 0.21877916157245636, "learning_rate": 2.9910891637992172e-05, "loss": 0.08973956108093262, "step": 752 }, { "epoch": 0.10180318726446183, "grad_norm": 0.1237107366323471, "learning_rate": 2.991014514825277e-05, "loss": 0.0618128776550293, "step": 753 }, { "epoch": 0.10193838406029777, "grad_norm": 0.20009958744049072, "learning_rate": 2.9909395554141638e-05, "loss": 0.06196999549865723, "step": 754 }, { "epoch": 0.10207358085613372, "grad_norm": 0.2767101228237152, "learning_rate": 2.9908642855814844e-05, "loss": 0.0626363754272461, "step": 755 }, { "epoch": 0.10220877765196965, "grad_norm": 0.12315303832292557, "learning_rate": 2.9907887053429107e-05, "loss": 0.10156726837158203, "step": 756 }, { "epoch": 0.10234397444780559, "grad_norm": 0.3240935802459717, "learning_rate": 2.9907128147141783e-05, "loss": 0.06920528411865234, "step": 757 }, { "epoch": 0.10247917124364153, "grad_norm": 0.3710269629955292, "learning_rate": 2.990636613711089e-05, "loss": 0.10161972045898438, "step": 758 }, { "epoch": 0.10261436803947746, "grad_norm": 0.29096660017967224, "learning_rate": 2.990560102349507e-05, "loss": 0.10406351089477539, "step": 759 }, { "epoch": 0.1027495648353134, "grad_norm": 0.23447288572788239, "learning_rate": 2.9904832806453635e-05, "loss": 0.07109212875366211, "step": 760 }, { "epoch": 0.10288476163114935, "grad_norm": 0.35149598121643066, "learning_rate": 2.9904061486146524e-05, "loss": 0.09916567802429199, "step": 761 }, { "epoch": 0.10301995842698528, "grad_norm": 0.18237677216529846, "learning_rate": 2.9903287062734333e-05, "loss": 0.06481671333312988, "step": 762 }, { "epoch": 0.10315515522282122, "grad_norm": 0.17816005647182465, "learning_rate": 2.990250953637831e-05, "loss": 0.08998489379882812, "step": 763 }, { "epoch": 0.10329035201865716, "grad_norm": 0.19520476460456848, "learning_rate": 2.9901728907240326e-05, "loss": 0.07070064544677734, "step": 764 }, { "epoch": 0.10342554881449309, "grad_norm": 0.4232277572154999, "learning_rate": 2.9900945175482916e-05, "loss": 0.11032295227050781, "step": 765 }, { "epoch": 0.10356074561032903, "grad_norm": 0.16400767862796783, "learning_rate": 2.990015834126926e-05, "loss": 0.09066009521484375, "step": 766 }, { "epoch": 0.10369594240616498, "grad_norm": 0.08228255063295364, "learning_rate": 2.989936840476318e-05, "loss": 0.06066131591796875, "step": 767 }, { "epoch": 0.10383113920200092, "grad_norm": 0.2542920410633087, "learning_rate": 2.9898575366129145e-05, "loss": 0.07512092590332031, "step": 768 }, { "epoch": 0.10396633599783685, "grad_norm": 0.2543547749519348, "learning_rate": 2.9897779225532273e-05, "loss": 0.08338165283203125, "step": 769 }, { "epoch": 0.10410153279367279, "grad_norm": 0.11839622259140015, "learning_rate": 2.989697998313832e-05, "loss": 0.0728607177734375, "step": 770 }, { "epoch": 0.10423672958950873, "grad_norm": 0.08058282732963562, "learning_rate": 2.989617763911369e-05, "loss": 0.07061290740966797, "step": 771 }, { "epoch": 0.10437192638534466, "grad_norm": 0.23767045140266418, "learning_rate": 2.9895372193625442e-05, "loss": 0.09198951721191406, "step": 772 }, { "epoch": 0.1045071231811806, "grad_norm": 0.12013694643974304, "learning_rate": 2.9894563646841273e-05, "loss": 0.06464433670043945, "step": 773 }, { "epoch": 0.10464231997701655, "grad_norm": 0.12468311935663223, "learning_rate": 2.9893751998929523e-05, "loss": 0.05020427703857422, "step": 774 }, { "epoch": 0.10477751677285248, "grad_norm": 0.19225336611270905, "learning_rate": 2.9892937250059187e-05, "loss": 0.07311725616455078, "step": 775 }, { "epoch": 0.10491271356868842, "grad_norm": 0.11506348848342896, "learning_rate": 2.9892119400399894e-05, "loss": 0.07399535179138184, "step": 776 }, { "epoch": 0.10504791036452436, "grad_norm": 0.052996713668107986, "learning_rate": 2.989129845012193e-05, "loss": 0.04266500473022461, "step": 777 }, { "epoch": 0.1051831071603603, "grad_norm": 0.41333791613578796, "learning_rate": 2.989047439939621e-05, "loss": 0.11240839958190918, "step": 778 }, { "epoch": 0.10531830395619624, "grad_norm": 0.16591086983680725, "learning_rate": 2.9889647248394324e-05, "loss": 0.08731245994567871, "step": 779 }, { "epoch": 0.10545350075203218, "grad_norm": 0.39323684573173523, "learning_rate": 2.9888816997288475e-05, "loss": 0.09854698181152344, "step": 780 }, { "epoch": 0.10558869754786812, "grad_norm": 0.18708565831184387, "learning_rate": 2.988798364625153e-05, "loss": 0.0652015209197998, "step": 781 }, { "epoch": 0.10572389434370405, "grad_norm": 0.1845155507326126, "learning_rate": 2.9887147195457002e-05, "loss": 0.10908269882202148, "step": 782 }, { "epoch": 0.10585909113953999, "grad_norm": 0.2472580224275589, "learning_rate": 2.9886307645079037e-05, "loss": 0.08659076690673828, "step": 783 }, { "epoch": 0.10599428793537594, "grad_norm": 0.12436066567897797, "learning_rate": 2.9885464995292436e-05, "loss": 0.09352922439575195, "step": 784 }, { "epoch": 0.10612948473121186, "grad_norm": 0.08763474971055984, "learning_rate": 2.9884619246272648e-05, "loss": 0.059453725814819336, "step": 785 }, { "epoch": 0.10626468152704781, "grad_norm": 0.22112756967544556, "learning_rate": 2.988377039819575e-05, "loss": 0.07128620147705078, "step": 786 }, { "epoch": 0.10639987832288375, "grad_norm": 0.2052437663078308, "learning_rate": 2.9882918451238494e-05, "loss": 0.059258460998535156, "step": 787 }, { "epoch": 0.10653507511871968, "grad_norm": 0.24719718098640442, "learning_rate": 2.988206340557825e-05, "loss": 0.08119010925292969, "step": 788 }, { "epoch": 0.10667027191455562, "grad_norm": 0.1153087466955185, "learning_rate": 2.9881205261393037e-05, "loss": 0.08741474151611328, "step": 789 }, { "epoch": 0.10680546871039157, "grad_norm": 0.07043509185314178, "learning_rate": 2.988034401886154e-05, "loss": 0.0507049560546875, "step": 790 }, { "epoch": 0.10694066550622751, "grad_norm": 0.15033923089504242, "learning_rate": 2.9879479678163065e-05, "loss": 0.09711217880249023, "step": 791 }, { "epoch": 0.10707586230206344, "grad_norm": 0.11357401311397552, "learning_rate": 2.9878612239477568e-05, "loss": 0.07277107238769531, "step": 792 }, { "epoch": 0.10721105909789938, "grad_norm": 0.3398059010505676, "learning_rate": 2.9877741702985666e-05, "loss": 0.08458948135375977, "step": 793 }, { "epoch": 0.10734625589373532, "grad_norm": 0.3241231143474579, "learning_rate": 2.98768680688686e-05, "loss": 0.08054161071777344, "step": 794 }, { "epoch": 0.10748145268957125, "grad_norm": 0.09487687796354294, "learning_rate": 2.9875991337308274e-05, "loss": 0.06821727752685547, "step": 795 }, { "epoch": 0.1076166494854072, "grad_norm": 0.2654559314250946, "learning_rate": 2.987511150848722e-05, "loss": 0.09479951858520508, "step": 796 }, { "epoch": 0.10775184628124314, "grad_norm": 0.1296280026435852, "learning_rate": 2.9874228582588627e-05, "loss": 0.06629657745361328, "step": 797 }, { "epoch": 0.10788704307707907, "grad_norm": 0.328866571187973, "learning_rate": 2.9873342559796325e-05, "loss": 0.0834050178527832, "step": 798 }, { "epoch": 0.10802223987291501, "grad_norm": 0.07810252904891968, "learning_rate": 2.9872453440294787e-05, "loss": 0.04237055778503418, "step": 799 }, { "epoch": 0.10815743666875095, "grad_norm": 0.18273776769638062, "learning_rate": 2.9871561224269134e-05, "loss": 0.08586692810058594, "step": 800 }, { "epoch": 0.1082926334645869, "grad_norm": 0.1715489625930786, "learning_rate": 2.9870665911905127e-05, "loss": 0.04636788368225098, "step": 801 }, { "epoch": 0.10842783026042283, "grad_norm": 0.301963746547699, "learning_rate": 2.9869767503389176e-05, "loss": 0.08504486083984375, "step": 802 }, { "epoch": 0.10856302705625877, "grad_norm": 0.13930180668830872, "learning_rate": 2.986886599890834e-05, "loss": 0.060051679611206055, "step": 803 }, { "epoch": 0.10869822385209471, "grad_norm": 0.18955019116401672, "learning_rate": 2.9867961398650306e-05, "loss": 0.0805504322052002, "step": 804 }, { "epoch": 0.10883342064793064, "grad_norm": 0.18851809203624725, "learning_rate": 2.9867053702803425e-05, "loss": 0.08764410018920898, "step": 805 }, { "epoch": 0.10896861744376658, "grad_norm": 0.0662231594324112, "learning_rate": 2.9866142911556685e-05, "loss": 0.056791067123413086, "step": 806 }, { "epoch": 0.10910381423960253, "grad_norm": 0.08169761300086975, "learning_rate": 2.9865229025099713e-05, "loss": 0.0470128059387207, "step": 807 }, { "epoch": 0.10923901103543845, "grad_norm": 0.15803854167461395, "learning_rate": 2.986431204362279e-05, "loss": 0.061837196350097656, "step": 808 }, { "epoch": 0.1093742078312744, "grad_norm": 0.3536088466644287, "learning_rate": 2.9863391967316835e-05, "loss": 0.11529064178466797, "step": 809 }, { "epoch": 0.10950940462711034, "grad_norm": 0.40793943405151367, "learning_rate": 2.9862468796373404e-05, "loss": 0.11717033386230469, "step": 810 }, { "epoch": 0.10964460142294627, "grad_norm": 0.2415672242641449, "learning_rate": 2.9861542530984718e-05, "loss": 0.05767989158630371, "step": 811 }, { "epoch": 0.10977979821878221, "grad_norm": 0.7584882378578186, "learning_rate": 2.9860613171343624e-05, "loss": 0.10846614837646484, "step": 812 }, { "epoch": 0.10991499501461816, "grad_norm": 0.516636073589325, "learning_rate": 2.9859680717643623e-05, "loss": 0.10421156883239746, "step": 813 }, { "epoch": 0.1100501918104541, "grad_norm": 0.13690637052059174, "learning_rate": 2.985874517007885e-05, "loss": 0.09915876388549805, "step": 814 }, { "epoch": 0.11018538860629003, "grad_norm": 0.32932132482528687, "learning_rate": 2.98578065288441e-05, "loss": 0.08774566650390625, "step": 815 }, { "epoch": 0.11032058540212597, "grad_norm": 0.3442085385322571, "learning_rate": 2.9856864794134798e-05, "loss": 0.0969090461730957, "step": 816 }, { "epoch": 0.11045578219796191, "grad_norm": 0.13868650794029236, "learning_rate": 2.9855919966147025e-05, "loss": 0.054688215255737305, "step": 817 }, { "epoch": 0.11059097899379784, "grad_norm": 0.19820687174797058, "learning_rate": 2.9854972045077485e-05, "loss": 0.07607817649841309, "step": 818 }, { "epoch": 0.11072617578963379, "grad_norm": 0.2938211262226105, "learning_rate": 2.9854021031123555e-05, "loss": 0.08491277694702148, "step": 819 }, { "epoch": 0.11086137258546973, "grad_norm": 0.1576143354177475, "learning_rate": 2.9853066924483232e-05, "loss": 0.09095048904418945, "step": 820 }, { "epoch": 0.11099656938130566, "grad_norm": 0.1545390784740448, "learning_rate": 2.9852109725355173e-05, "loss": 0.09976053237915039, "step": 821 }, { "epoch": 0.1111317661771416, "grad_norm": 0.2253318727016449, "learning_rate": 2.9851149433938662e-05, "loss": 0.08770465850830078, "step": 822 }, { "epoch": 0.11126696297297754, "grad_norm": 0.20439563691616058, "learning_rate": 2.9850186050433645e-05, "loss": 0.08408737182617188, "step": 823 }, { "epoch": 0.11140215976881349, "grad_norm": 0.21274667978286743, "learning_rate": 2.9849219575040708e-05, "loss": 0.06603193283081055, "step": 824 }, { "epoch": 0.11153735656464941, "grad_norm": 0.20621870458126068, "learning_rate": 2.984825000796106e-05, "loss": 0.057804107666015625, "step": 825 }, { "epoch": 0.11167255336048536, "grad_norm": 0.0705530196428299, "learning_rate": 2.9847277349396586e-05, "loss": 0.043094635009765625, "step": 826 }, { "epoch": 0.1118077501563213, "grad_norm": 0.4527442753314972, "learning_rate": 2.984630159954979e-05, "loss": 0.10841941833496094, "step": 827 }, { "epoch": 0.11194294695215723, "grad_norm": 0.08340879529714584, "learning_rate": 2.9845322758623833e-05, "loss": 0.0617976188659668, "step": 828 }, { "epoch": 0.11207814374799317, "grad_norm": 0.19706197082996368, "learning_rate": 2.984434082682251e-05, "loss": 0.07596111297607422, "step": 829 }, { "epoch": 0.11221334054382912, "grad_norm": 0.1969340741634369, "learning_rate": 2.984335580435027e-05, "loss": 0.06574010848999023, "step": 830 }, { "epoch": 0.11234853733966504, "grad_norm": 0.08985655009746552, "learning_rate": 2.9842367691412192e-05, "loss": 0.06956326961517334, "step": 831 }, { "epoch": 0.11248373413550099, "grad_norm": 0.21356859803199768, "learning_rate": 2.9841376488214015e-05, "loss": 0.1064915657043457, "step": 832 }, { "epoch": 0.11261893093133693, "grad_norm": 0.12347124516963959, "learning_rate": 2.984038219496211e-05, "loss": 0.07301950454711914, "step": 833 }, { "epoch": 0.11275412772717286, "grad_norm": 0.1629743129014969, "learning_rate": 2.9839384811863493e-05, "loss": 0.044960975646972656, "step": 834 }, { "epoch": 0.1128893245230088, "grad_norm": 0.0998326987028122, "learning_rate": 2.9838384339125824e-05, "loss": 0.08784270286560059, "step": 835 }, { "epoch": 0.11302452131884475, "grad_norm": 0.3424812853336334, "learning_rate": 2.9837380776957405e-05, "loss": 0.09475421905517578, "step": 836 }, { "epoch": 0.11315971811468069, "grad_norm": 0.11877866834402084, "learning_rate": 2.9836374125567193e-05, "loss": 0.07374238967895508, "step": 837 }, { "epoch": 0.11329491491051662, "grad_norm": 0.11771272122859955, "learning_rate": 2.9835364385164764e-05, "loss": 0.0850377082824707, "step": 838 }, { "epoch": 0.11343011170635256, "grad_norm": 0.18212012946605682, "learning_rate": 2.983435155596036e-05, "loss": 0.07221508026123047, "step": 839 }, { "epoch": 0.1135653085021885, "grad_norm": 0.2089482694864273, "learning_rate": 2.9833335638164858e-05, "loss": 0.06322050094604492, "step": 840 }, { "epoch": 0.11370050529802443, "grad_norm": 0.11180892586708069, "learning_rate": 2.9832316631989774e-05, "loss": 0.08978652954101562, "step": 841 }, { "epoch": 0.11383570209386037, "grad_norm": 0.10422135889530182, "learning_rate": 2.9831294537647272e-05, "loss": 0.0949702262878418, "step": 842 }, { "epoch": 0.11397089888969632, "grad_norm": 0.0864141434431076, "learning_rate": 2.9830269355350155e-05, "loss": 0.040509819984436035, "step": 843 }, { "epoch": 0.11410609568553225, "grad_norm": 0.10074340552091599, "learning_rate": 2.9829241085311872e-05, "loss": 0.08821964263916016, "step": 844 }, { "epoch": 0.11424129248136819, "grad_norm": 0.10178901255130768, "learning_rate": 2.9828209727746522e-05, "loss": 0.08177757263183594, "step": 845 }, { "epoch": 0.11437648927720413, "grad_norm": 0.40882372856140137, "learning_rate": 2.982717528286883e-05, "loss": 0.09736502170562744, "step": 846 }, { "epoch": 0.11451168607304008, "grad_norm": 0.20394711196422577, "learning_rate": 2.9826137750894176e-05, "loss": 0.06695842742919922, "step": 847 }, { "epoch": 0.114646882868876, "grad_norm": 0.2837764620780945, "learning_rate": 2.9825097132038578e-05, "loss": 0.11022567749023438, "step": 848 }, { "epoch": 0.11478207966471195, "grad_norm": 0.17595382034778595, "learning_rate": 2.9824053426518703e-05, "loss": 0.06178617477416992, "step": 849 }, { "epoch": 0.11491727646054789, "grad_norm": 0.08936288952827454, "learning_rate": 2.9823006634551848e-05, "loss": 0.042883872985839844, "step": 850 }, { "epoch": 0.11505247325638382, "grad_norm": 0.0923553854227066, "learning_rate": 2.9821956756355973e-05, "loss": 0.052956581115722656, "step": 851 }, { "epoch": 0.11518767005221976, "grad_norm": 0.15883170068264008, "learning_rate": 2.9820903792149653e-05, "loss": 0.0631551742553711, "step": 852 }, { "epoch": 0.1153228668480557, "grad_norm": 0.12394951283931732, "learning_rate": 2.981984774215214e-05, "loss": 0.056340694427490234, "step": 853 }, { "epoch": 0.11545806364389163, "grad_norm": 0.16585946083068848, "learning_rate": 2.9818788606583286e-05, "loss": 0.0750889778137207, "step": 854 }, { "epoch": 0.11559326043972758, "grad_norm": 0.11545179039239883, "learning_rate": 2.9817726385663627e-05, "loss": 0.06773948669433594, "step": 855 }, { "epoch": 0.11572845723556352, "grad_norm": 0.06577248126268387, "learning_rate": 2.9816661079614316e-05, "loss": 0.03995513916015625, "step": 856 }, { "epoch": 0.11586365403139945, "grad_norm": 0.09831870347261429, "learning_rate": 2.9815592688657154e-05, "loss": 0.05192971229553223, "step": 857 }, { "epoch": 0.11599885082723539, "grad_norm": 0.2290714532136917, "learning_rate": 2.9814521213014588e-05, "loss": 0.0670008659362793, "step": 858 }, { "epoch": 0.11613404762307133, "grad_norm": 0.07488477975130081, "learning_rate": 2.9813446652909707e-05, "loss": 0.05266571044921875, "step": 859 }, { "epoch": 0.11626924441890728, "grad_norm": 0.07828672975301743, "learning_rate": 2.981236900856624e-05, "loss": 0.08294129371643066, "step": 860 }, { "epoch": 0.1164044412147432, "grad_norm": 0.15161457657814026, "learning_rate": 2.9811288280208552e-05, "loss": 0.07205438613891602, "step": 861 }, { "epoch": 0.11653963801057915, "grad_norm": 0.24158193171024323, "learning_rate": 2.9810204468061664e-05, "loss": 0.06822800636291504, "step": 862 }, { "epoch": 0.11667483480641509, "grad_norm": 0.09941220283508301, "learning_rate": 2.9809117572351223e-05, "loss": 0.09187650680541992, "step": 863 }, { "epoch": 0.11681003160225102, "grad_norm": 0.2299419641494751, "learning_rate": 2.9808027593303537e-05, "loss": 0.0739750862121582, "step": 864 }, { "epoch": 0.11694522839808696, "grad_norm": 0.24607360363006592, "learning_rate": 2.980693453114554e-05, "loss": 0.09745121002197266, "step": 865 }, { "epoch": 0.11708042519392291, "grad_norm": 0.09835226088762283, "learning_rate": 2.980583838610481e-05, "loss": 0.06097698211669922, "step": 866 }, { "epoch": 0.11721562198975884, "grad_norm": 0.1356116682291031, "learning_rate": 2.980473915840957e-05, "loss": 0.048302650451660156, "step": 867 }, { "epoch": 0.11735081878559478, "grad_norm": 0.09727887064218521, "learning_rate": 2.9803636848288696e-05, "loss": 0.062195539474487305, "step": 868 }, { "epoch": 0.11748601558143072, "grad_norm": 0.09921512007713318, "learning_rate": 2.9802531455971686e-05, "loss": 0.11292636394500732, "step": 869 }, { "epoch": 0.11762121237726667, "grad_norm": 0.13198482990264893, "learning_rate": 2.980142298168869e-05, "loss": 0.0755453109741211, "step": 870 }, { "epoch": 0.1177564091731026, "grad_norm": 0.1393927037715912, "learning_rate": 2.9800311425670495e-05, "loss": 0.08868122100830078, "step": 871 }, { "epoch": 0.11789160596893854, "grad_norm": 0.1366884410381317, "learning_rate": 2.9799196788148538e-05, "loss": 0.07724571228027344, "step": 872 }, { "epoch": 0.11802680276477448, "grad_norm": 0.19587735831737518, "learning_rate": 2.9798079069354893e-05, "loss": 0.06855130195617676, "step": 873 }, { "epoch": 0.11816199956061041, "grad_norm": 0.10926695168018341, "learning_rate": 2.9796958269522273e-05, "loss": 0.0879659652709961, "step": 874 }, { "epoch": 0.11829719635644635, "grad_norm": 0.09635842591524124, "learning_rate": 2.9795834388884034e-05, "loss": 0.08609294891357422, "step": 875 }, { "epoch": 0.1184323931522823, "grad_norm": 0.2814229726791382, "learning_rate": 2.979470742767417e-05, "loss": 0.10121011734008789, "step": 876 }, { "epoch": 0.11856758994811822, "grad_norm": 0.06718067824840546, "learning_rate": 2.9793577386127327e-05, "loss": 0.048627495765686035, "step": 877 }, { "epoch": 0.11870278674395417, "grad_norm": 0.2509063482284546, "learning_rate": 2.9792444264478784e-05, "loss": 0.09848976135253906, "step": 878 }, { "epoch": 0.11883798353979011, "grad_norm": 0.1740858256816864, "learning_rate": 2.979130806296446e-05, "loss": 0.0972909927368164, "step": 879 }, { "epoch": 0.11897318033562605, "grad_norm": 0.1094849482178688, "learning_rate": 2.9790168781820925e-05, "loss": 0.10734844207763672, "step": 880 }, { "epoch": 0.11910837713146198, "grad_norm": 0.11823206394910812, "learning_rate": 2.9789026421285375e-05, "loss": 0.046347856521606445, "step": 881 }, { "epoch": 0.11924357392729792, "grad_norm": 0.17739428579807281, "learning_rate": 2.9787880981595663e-05, "loss": 0.056365013122558594, "step": 882 }, { "epoch": 0.11937877072313387, "grad_norm": 0.08121839165687561, "learning_rate": 2.9786732462990267e-05, "loss": 0.03412294387817383, "step": 883 }, { "epoch": 0.1195139675189698, "grad_norm": 0.10083824396133423, "learning_rate": 2.9785580865708323e-05, "loss": 0.08484792709350586, "step": 884 }, { "epoch": 0.11964916431480574, "grad_norm": 0.0700761154294014, "learning_rate": 2.97844261899896e-05, "loss": 0.07140254974365234, "step": 885 }, { "epoch": 0.11978436111064168, "grad_norm": 0.19332964718341827, "learning_rate": 2.9783268436074495e-05, "loss": 0.0855860710144043, "step": 886 }, { "epoch": 0.11991955790647761, "grad_norm": 0.07933145016431808, "learning_rate": 2.978210760420407e-05, "loss": 0.06059074401855469, "step": 887 }, { "epoch": 0.12005475470231355, "grad_norm": 0.26722756028175354, "learning_rate": 2.978094369462002e-05, "loss": 0.05805253982543945, "step": 888 }, { "epoch": 0.1201899514981495, "grad_norm": 0.1603512465953827, "learning_rate": 2.977977670756467e-05, "loss": 0.09630918502807617, "step": 889 }, { "epoch": 0.12032514829398543, "grad_norm": 0.10066097229719162, "learning_rate": 2.9778606643280987e-05, "loss": 0.05611252784729004, "step": 890 }, { "epoch": 0.12046034508982137, "grad_norm": 0.5782535076141357, "learning_rate": 2.97774335020126e-05, "loss": 0.1215205192565918, "step": 891 }, { "epoch": 0.12059554188565731, "grad_norm": 0.10206296294927597, "learning_rate": 2.9776257284003748e-05, "loss": 0.06757831573486328, "step": 892 }, { "epoch": 0.12073073868149325, "grad_norm": 0.14233049750328064, "learning_rate": 2.9775077989499338e-05, "loss": 0.057381629943847656, "step": 893 }, { "epoch": 0.12086593547732918, "grad_norm": 0.11968830227851868, "learning_rate": 2.97738956187449e-05, "loss": 0.06141304969787598, "step": 894 }, { "epoch": 0.12100113227316513, "grad_norm": 0.35995978116989136, "learning_rate": 2.9772710171986605e-05, "loss": 0.0654524564743042, "step": 895 }, { "epoch": 0.12113632906900107, "grad_norm": 0.44974932074546814, "learning_rate": 2.977152164947128e-05, "loss": 0.10231733322143555, "step": 896 }, { "epoch": 0.121271525864837, "grad_norm": 0.17205770313739777, "learning_rate": 2.9770330051446373e-05, "loss": 0.08052492141723633, "step": 897 }, { "epoch": 0.12140672266067294, "grad_norm": 0.1110287681221962, "learning_rate": 2.976913537815999e-05, "loss": 0.0387725830078125, "step": 898 }, { "epoch": 0.12154191945650888, "grad_norm": 0.1932668685913086, "learning_rate": 2.9767937629860853e-05, "loss": 0.06683921813964844, "step": 899 }, { "epoch": 0.12167711625234481, "grad_norm": 0.3327488601207733, "learning_rate": 2.9766736806798353e-05, "loss": 0.11077547073364258, "step": 900 }, { "epoch": 0.12181231304818076, "grad_norm": 0.3324854373931885, "learning_rate": 2.9765532909222512e-05, "loss": 0.11431884765625, "step": 901 }, { "epoch": 0.1219475098440167, "grad_norm": 0.16411514580249786, "learning_rate": 2.976432593738397e-05, "loss": 0.07039260864257812, "step": 902 }, { "epoch": 0.12208270663985264, "grad_norm": 0.19517670571804047, "learning_rate": 2.9763115891534036e-05, "loss": 0.07362890243530273, "step": 903 }, { "epoch": 0.12221790343568857, "grad_norm": 0.09529386460781097, "learning_rate": 2.9761902771924648e-05, "loss": 0.07908391952514648, "step": 904 }, { "epoch": 0.12235310023152451, "grad_norm": 0.47957566380500793, "learning_rate": 2.9760686578808387e-05, "loss": 0.11242246627807617, "step": 905 }, { "epoch": 0.12248829702736046, "grad_norm": 0.08849037438631058, "learning_rate": 2.9759467312438462e-05, "loss": 0.05784595012664795, "step": 906 }, { "epoch": 0.12262349382319639, "grad_norm": 0.2830328643321991, "learning_rate": 2.975824497306874e-05, "loss": 0.07963132858276367, "step": 907 }, { "epoch": 0.12275869061903233, "grad_norm": 0.08956493437290192, "learning_rate": 2.9757019560953707e-05, "loss": 0.055341243743896484, "step": 908 }, { "epoch": 0.12289388741486827, "grad_norm": 0.20775741338729858, "learning_rate": 2.9755791076348517e-05, "loss": 0.08177757263183594, "step": 909 }, { "epoch": 0.1230290842107042, "grad_norm": 0.1353548914194107, "learning_rate": 2.9754559519508924e-05, "loss": 0.05702543258666992, "step": 910 }, { "epoch": 0.12316428100654014, "grad_norm": 0.20207025110721588, "learning_rate": 2.975332489069137e-05, "loss": 0.11015200614929199, "step": 911 }, { "epoch": 0.12329947780237609, "grad_norm": 0.13142721354961395, "learning_rate": 2.9752087190152893e-05, "loss": 0.06446266174316406, "step": 912 }, { "epoch": 0.12343467459821202, "grad_norm": 0.061199259012937546, "learning_rate": 2.97508464181512e-05, "loss": 0.04959380626678467, "step": 913 }, { "epoch": 0.12356987139404796, "grad_norm": 0.05604036897420883, "learning_rate": 2.9749602574944615e-05, "loss": 0.06218576431274414, "step": 914 }, { "epoch": 0.1237050681898839, "grad_norm": 0.050905805081129074, "learning_rate": 2.9748355660792125e-05, "loss": 0.058411598205566406, "step": 915 }, { "epoch": 0.12384026498571984, "grad_norm": 0.07387355715036392, "learning_rate": 2.9747105675953338e-05, "loss": 0.06812095642089844, "step": 916 }, { "epoch": 0.12397546178155577, "grad_norm": 0.09652038663625717, "learning_rate": 2.9745852620688506e-05, "loss": 0.06022357940673828, "step": 917 }, { "epoch": 0.12411065857739172, "grad_norm": 0.09810265898704529, "learning_rate": 2.974459649525853e-05, "loss": 0.09165668487548828, "step": 918 }, { "epoch": 0.12424585537322766, "grad_norm": 0.1119033694267273, "learning_rate": 2.9743337299924925e-05, "loss": 0.06702995300292969, "step": 919 }, { "epoch": 0.12438105216906359, "grad_norm": 0.1399790346622467, "learning_rate": 2.9742075034949883e-05, "loss": 0.0867147445678711, "step": 920 }, { "epoch": 0.12451624896489953, "grad_norm": 0.07810115814208984, "learning_rate": 2.97408097005962e-05, "loss": 0.046642303466796875, "step": 921 }, { "epoch": 0.12465144576073547, "grad_norm": 0.06395487487316132, "learning_rate": 2.973954129712733e-05, "loss": 0.06725311279296875, "step": 922 }, { "epoch": 0.1247866425565714, "grad_norm": 0.11322113126516342, "learning_rate": 2.973826982480736e-05, "loss": 0.08335018157958984, "step": 923 }, { "epoch": 0.12492183935240735, "grad_norm": 0.11602670699357986, "learning_rate": 2.9736995283901022e-05, "loss": 0.06998157501220703, "step": 924 }, { "epoch": 0.1250570361482433, "grad_norm": 0.08187684416770935, "learning_rate": 2.9735717674673676e-05, "loss": 0.05464744567871094, "step": 925 }, { "epoch": 0.12519223294407922, "grad_norm": 0.06290099024772644, "learning_rate": 2.973443699739133e-05, "loss": 0.06893730163574219, "step": 926 }, { "epoch": 0.12532742973991517, "grad_norm": 0.08969343453645706, "learning_rate": 2.973315325232063e-05, "loss": 0.07574081420898438, "step": 927 }, { "epoch": 0.1254626265357511, "grad_norm": 0.060889434069395065, "learning_rate": 2.9731866439728853e-05, "loss": 0.05339348316192627, "step": 928 }, { "epoch": 0.12559782333158703, "grad_norm": 0.1132717877626419, "learning_rate": 2.9730576559883924e-05, "loss": 0.07853174209594727, "step": 929 }, { "epoch": 0.125733020127423, "grad_norm": 0.18988798558712006, "learning_rate": 2.97292836130544e-05, "loss": 0.10422897338867188, "step": 930 }, { "epoch": 0.12586821692325892, "grad_norm": 0.24295906722545624, "learning_rate": 2.9727987599509485e-05, "loss": 0.10706615447998047, "step": 931 }, { "epoch": 0.12600341371909485, "grad_norm": 0.08881915360689163, "learning_rate": 2.972668851951901e-05, "loss": 0.08465337753295898, "step": 932 }, { "epoch": 0.1261386105149308, "grad_norm": 0.14923670887947083, "learning_rate": 2.9725386373353455e-05, "loss": 0.08785533905029297, "step": 933 }, { "epoch": 0.12627380731076673, "grad_norm": 0.15341024100780487, "learning_rate": 2.972408116128393e-05, "loss": 0.0878748893737793, "step": 934 }, { "epoch": 0.12640900410660266, "grad_norm": 0.11445598304271698, "learning_rate": 2.972277288358219e-05, "loss": 0.04725363850593567, "step": 935 }, { "epoch": 0.12654420090243862, "grad_norm": 0.1122552752494812, "learning_rate": 2.9721461540520628e-05, "loss": 0.05743694305419922, "step": 936 }, { "epoch": 0.12667939769827455, "grad_norm": 0.09951417148113251, "learning_rate": 2.9720147132372265e-05, "loss": 0.06026816368103027, "step": 937 }, { "epoch": 0.12681459449411048, "grad_norm": 0.22975791990756989, "learning_rate": 2.9718829659410772e-05, "loss": 0.07405209541320801, "step": 938 }, { "epoch": 0.12694979128994643, "grad_norm": 0.33335110545158386, "learning_rate": 2.9717509121910453e-05, "loss": 0.09210205078125, "step": 939 }, { "epoch": 0.12708498808578236, "grad_norm": 0.1875367909669876, "learning_rate": 2.971618552014625e-05, "loss": 0.09831809997558594, "step": 940 }, { "epoch": 0.1272201848816183, "grad_norm": 0.16147367656230927, "learning_rate": 2.971485885439375e-05, "loss": 0.08818912506103516, "step": 941 }, { "epoch": 0.12735538167745425, "grad_norm": 0.2637520134449005, "learning_rate": 2.9713529124929163e-05, "loss": 0.07030582427978516, "step": 942 }, { "epoch": 0.12749057847329018, "grad_norm": 0.17659729719161987, "learning_rate": 2.9712196332029352e-05, "loss": 0.08257389068603516, "step": 943 }, { "epoch": 0.12762577526912613, "grad_norm": 0.1788230687379837, "learning_rate": 2.971086047597181e-05, "loss": 0.05866813659667969, "step": 944 }, { "epoch": 0.12776097206496206, "grad_norm": 0.09143399447202682, "learning_rate": 2.9709521557034668e-05, "loss": 0.07986783981323242, "step": 945 }, { "epoch": 0.127896168860798, "grad_norm": 0.05168140307068825, "learning_rate": 2.9708179575496696e-05, "loss": 0.04710984230041504, "step": 946 }, { "epoch": 0.12803136565663395, "grad_norm": 0.17141243815422058, "learning_rate": 2.9706834531637303e-05, "loss": 0.07384252548217773, "step": 947 }, { "epoch": 0.12816656245246988, "grad_norm": 0.09293242543935776, "learning_rate": 2.9705486425736537e-05, "loss": 0.05687761306762695, "step": 948 }, { "epoch": 0.1283017592483058, "grad_norm": 0.13287778198719025, "learning_rate": 2.9704135258075077e-05, "loss": 0.09968948364257812, "step": 949 }, { "epoch": 0.12843695604414176, "grad_norm": 0.17102405428886414, "learning_rate": 2.970278102893424e-05, "loss": 0.08391761779785156, "step": 950 }, { "epoch": 0.1285721528399777, "grad_norm": 0.07498197257518768, "learning_rate": 2.9701423738595992e-05, "loss": 0.05570793151855469, "step": 951 }, { "epoch": 0.12870734963581362, "grad_norm": 0.2092883437871933, "learning_rate": 2.9700063387342925e-05, "loss": 0.08389425277709961, "step": 952 }, { "epoch": 0.12884254643164958, "grad_norm": 0.11228356510400772, "learning_rate": 2.969869997545827e-05, "loss": 0.06782150268554688, "step": 953 }, { "epoch": 0.1289777432274855, "grad_norm": 0.31297019124031067, "learning_rate": 2.9697333503225897e-05, "loss": 0.09388470649719238, "step": 954 }, { "epoch": 0.12911294002332144, "grad_norm": 0.1070636734366417, "learning_rate": 2.969596397093031e-05, "loss": 0.04721426963806152, "step": 955 }, { "epoch": 0.1292481368191574, "grad_norm": 0.4006432294845581, "learning_rate": 2.969459137885666e-05, "loss": 0.09582674503326416, "step": 956 }, { "epoch": 0.12938333361499332, "grad_norm": 0.05496162921190262, "learning_rate": 2.969321572729072e-05, "loss": 0.057157039642333984, "step": 957 }, { "epoch": 0.12951853041082925, "grad_norm": 0.1691799759864807, "learning_rate": 2.9691837016518915e-05, "loss": 0.10239124298095703, "step": 958 }, { "epoch": 0.1296537272066652, "grad_norm": 0.11953835934400558, "learning_rate": 2.9690455246828294e-05, "loss": 0.0597691535949707, "step": 959 }, { "epoch": 0.12978892400250114, "grad_norm": 0.10983915627002716, "learning_rate": 2.968907041850655e-05, "loss": 0.08515787124633789, "step": 960 }, { "epoch": 0.12992412079833707, "grad_norm": 0.4553511440753937, "learning_rate": 2.968768253184202e-05, "loss": 0.10332679748535156, "step": 961 }, { "epoch": 0.13005931759417302, "grad_norm": 0.20180925726890564, "learning_rate": 2.9686291587123655e-05, "loss": 0.06601858139038086, "step": 962 }, { "epoch": 0.13019451439000895, "grad_norm": 0.2608233690261841, "learning_rate": 2.968489758464107e-05, "loss": 0.08244514465332031, "step": 963 }, { "epoch": 0.13032971118584488, "grad_norm": 0.16600099205970764, "learning_rate": 2.9683500524684494e-05, "loss": 0.09819841384887695, "step": 964 }, { "epoch": 0.13046490798168084, "grad_norm": 0.10266660153865814, "learning_rate": 2.9682100407544812e-05, "loss": 0.09746980667114258, "step": 965 }, { "epoch": 0.13060010477751677, "grad_norm": 0.21525733172893524, "learning_rate": 2.9680697233513526e-05, "loss": 0.07324165105819702, "step": 966 }, { "epoch": 0.13073530157335272, "grad_norm": 0.0983826220035553, "learning_rate": 2.9679291002882793e-05, "loss": 0.04398226737976074, "step": 967 }, { "epoch": 0.13087049836918865, "grad_norm": 0.08117310702800751, "learning_rate": 2.967788171594539e-05, "loss": 0.06145191192626953, "step": 968 }, { "epoch": 0.13100569516502458, "grad_norm": 0.06245778501033783, "learning_rate": 2.967646937299474e-05, "loss": 0.057662010192871094, "step": 969 }, { "epoch": 0.13114089196086054, "grad_norm": 0.1285935938358307, "learning_rate": 2.9675053974324907e-05, "loss": 0.05845296382904053, "step": 970 }, { "epoch": 0.13127608875669647, "grad_norm": 0.25850266218185425, "learning_rate": 2.9673635520230576e-05, "loss": 0.09070491790771484, "step": 971 }, { "epoch": 0.1314112855525324, "grad_norm": 0.10657283663749695, "learning_rate": 2.9672214011007087e-05, "loss": 0.06935310363769531, "step": 972 }, { "epoch": 0.13154648234836835, "grad_norm": 0.14891095459461212, "learning_rate": 2.9670789446950396e-05, "loss": 0.07125735282897949, "step": 973 }, { "epoch": 0.13168167914420428, "grad_norm": 0.06608948856592178, "learning_rate": 2.9669361828357105e-05, "loss": 0.07768774032592773, "step": 974 }, { "epoch": 0.1318168759400402, "grad_norm": 0.4589276611804962, "learning_rate": 2.9667931155524454e-05, "loss": 0.12001943588256836, "step": 975 }, { "epoch": 0.13195207273587617, "grad_norm": 0.11258334666490555, "learning_rate": 2.966649742875032e-05, "loss": 0.06550455093383789, "step": 976 }, { "epoch": 0.1320872695317121, "grad_norm": 0.11503694951534271, "learning_rate": 2.9665060648333206e-05, "loss": 0.06824827194213867, "step": 977 }, { "epoch": 0.13222246632754803, "grad_norm": 0.10666774213314056, "learning_rate": 2.9663620814572266e-05, "loss": 0.06955242156982422, "step": 978 }, { "epoch": 0.13235766312338398, "grad_norm": 0.2170712649822235, "learning_rate": 2.966217792776728e-05, "loss": 0.10490036010742188, "step": 979 }, { "epoch": 0.1324928599192199, "grad_norm": 0.17030830681324005, "learning_rate": 2.9660731988218652e-05, "loss": 0.1233377456665039, "step": 980 }, { "epoch": 0.13262805671505584, "grad_norm": 0.22178776562213898, "learning_rate": 2.965928299622745e-05, "loss": 0.11655044555664062, "step": 981 }, { "epoch": 0.1327632535108918, "grad_norm": 0.17455221712589264, "learning_rate": 2.965783095209535e-05, "loss": 0.07438206672668457, "step": 982 }, { "epoch": 0.13289845030672773, "grad_norm": 0.10594431310892105, "learning_rate": 2.965637585612469e-05, "loss": 0.06237363815307617, "step": 983 }, { "epoch": 0.13303364710256366, "grad_norm": 0.22808212041854858, "learning_rate": 2.965491770861841e-05, "loss": 0.06763434410095215, "step": 984 }, { "epoch": 0.1331688438983996, "grad_norm": 0.09387168288230896, "learning_rate": 2.965345650988012e-05, "loss": 0.06642758846282959, "step": 985 }, { "epoch": 0.13330404069423554, "grad_norm": 0.20906609296798706, "learning_rate": 2.9651992260214035e-05, "loss": 0.055144548416137695, "step": 986 }, { "epoch": 0.13343923749007147, "grad_norm": 0.12606149911880493, "learning_rate": 2.9650524959925037e-05, "loss": 0.06200456619262695, "step": 987 }, { "epoch": 0.13357443428590743, "grad_norm": 0.19453994929790497, "learning_rate": 2.9649054609318607e-05, "loss": 0.05778694152832031, "step": 988 }, { "epoch": 0.13370963108174336, "grad_norm": 0.0841677114367485, "learning_rate": 2.9647581208700894e-05, "loss": 0.04187750816345215, "step": 989 }, { "epoch": 0.13384482787757931, "grad_norm": 0.06964724510908127, "learning_rate": 2.9646104758378666e-05, "loss": 0.040574073791503906, "step": 990 }, { "epoch": 0.13398002467341524, "grad_norm": 0.14623387157917023, "learning_rate": 2.964462525865932e-05, "loss": 0.08375215530395508, "step": 991 }, { "epoch": 0.13411522146925117, "grad_norm": 0.12396002560853958, "learning_rate": 2.96431427098509e-05, "loss": 0.042246341705322266, "step": 992 }, { "epoch": 0.13425041826508713, "grad_norm": 0.07503268122673035, "learning_rate": 2.9641657112262084e-05, "loss": 0.07427215576171875, "step": 993 }, { "epoch": 0.13438561506092306, "grad_norm": 0.18358756601810455, "learning_rate": 2.9640168466202174e-05, "loss": 0.0798649787902832, "step": 994 }, { "epoch": 0.134520811856759, "grad_norm": 0.06345956027507782, "learning_rate": 2.9638676771981124e-05, "loss": 0.07236003875732422, "step": 995 }, { "epoch": 0.13465600865259494, "grad_norm": 0.13814058899879456, "learning_rate": 2.9637182029909508e-05, "loss": 0.074737548828125, "step": 996 }, { "epoch": 0.13479120544843087, "grad_norm": 0.19358345866203308, "learning_rate": 2.9635684240298532e-05, "loss": 0.08238029479980469, "step": 997 }, { "epoch": 0.1349264022442668, "grad_norm": 0.1957382708787918, "learning_rate": 2.9634183403460053e-05, "loss": 0.09319698810577393, "step": 998 }, { "epoch": 0.13506159904010276, "grad_norm": 0.25816795229911804, "learning_rate": 2.9632679519706553e-05, "loss": 0.05713081359863281, "step": 999 }, { "epoch": 0.1351967958359387, "grad_norm": 0.08890276402235031, "learning_rate": 2.9631172589351137e-05, "loss": 0.06299352645874023, "step": 1000 }, { "epoch": 0.13533199263177462, "grad_norm": 0.08693654090166092, "learning_rate": 2.962966261270758e-05, "loss": 0.07686614990234375, "step": 1001 }, { "epoch": 0.13546718942761057, "grad_norm": 0.22788509726524353, "learning_rate": 2.962814959009024e-05, "loss": 0.07744884490966797, "step": 1002 }, { "epoch": 0.1356023862234465, "grad_norm": 0.07897160202264786, "learning_rate": 2.962663352181415e-05, "loss": 0.07285690307617188, "step": 1003 }, { "epoch": 0.13573758301928243, "grad_norm": 0.08798522502183914, "learning_rate": 2.9625114408194966e-05, "loss": 0.08118867874145508, "step": 1004 }, { "epoch": 0.1358727798151184, "grad_norm": 0.058871470391750336, "learning_rate": 2.962359224954897e-05, "loss": 0.07728958129882812, "step": 1005 }, { "epoch": 0.13600797661095432, "grad_norm": 0.10668406635522842, "learning_rate": 2.9622067046193086e-05, "loss": 0.07386445999145508, "step": 1006 }, { "epoch": 0.13614317340679025, "grad_norm": 0.06813656538724899, "learning_rate": 2.9620538798444867e-05, "loss": 0.05845308303833008, "step": 1007 }, { "epoch": 0.1362783702026262, "grad_norm": 0.06310522556304932, "learning_rate": 2.9619007506622506e-05, "loss": 0.05810260772705078, "step": 1008 }, { "epoch": 0.13641356699846213, "grad_norm": 0.10126934200525284, "learning_rate": 2.961747317104482e-05, "loss": 0.05495619773864746, "step": 1009 }, { "epoch": 0.13654876379429806, "grad_norm": 0.09578578919172287, "learning_rate": 2.9615935792031274e-05, "loss": 0.06580543518066406, "step": 1010 }, { "epoch": 0.13668396059013402, "grad_norm": 0.05667395517230034, "learning_rate": 2.9614395369901953e-05, "loss": 0.05591392517089844, "step": 1011 }, { "epoch": 0.13681915738596995, "grad_norm": 0.15220583975315094, "learning_rate": 2.9612851904977582e-05, "loss": 0.07385396957397461, "step": 1012 }, { "epoch": 0.1369543541818059, "grad_norm": 0.08858183771371841, "learning_rate": 2.9611305397579518e-05, "loss": 0.058804988861083984, "step": 1013 }, { "epoch": 0.13708955097764183, "grad_norm": 0.192491352558136, "learning_rate": 2.9609755848029755e-05, "loss": 0.08931732177734375, "step": 1014 }, { "epoch": 0.13722474777347776, "grad_norm": 0.08235607296228409, "learning_rate": 2.9608203256650916e-05, "loss": 0.06106531620025635, "step": 1015 }, { "epoch": 0.13735994456931372, "grad_norm": 0.05607544630765915, "learning_rate": 2.9606647623766257e-05, "loss": 0.06089353561401367, "step": 1016 }, { "epoch": 0.13749514136514965, "grad_norm": 0.09464474767446518, "learning_rate": 2.9605088949699672e-05, "loss": 0.05215764045715332, "step": 1017 }, { "epoch": 0.13763033816098558, "grad_norm": 0.23505446314811707, "learning_rate": 2.9603527234775682e-05, "loss": 0.0972222089767456, "step": 1018 }, { "epoch": 0.13776553495682153, "grad_norm": 0.17249640822410583, "learning_rate": 2.960196247931945e-05, "loss": 0.07334613800048828, "step": 1019 }, { "epoch": 0.13790073175265746, "grad_norm": 0.13054914772510529, "learning_rate": 2.960039468365676e-05, "loss": 0.06658458709716797, "step": 1020 }, { "epoch": 0.1380359285484934, "grad_norm": 0.08485963940620422, "learning_rate": 2.959882384811404e-05, "loss": 0.053723812103271484, "step": 1021 }, { "epoch": 0.13817112534432935, "grad_norm": 0.1172117218375206, "learning_rate": 2.9597249973018343e-05, "loss": 0.07741475105285645, "step": 1022 }, { "epoch": 0.13830632214016528, "grad_norm": 0.10905864089727402, "learning_rate": 2.959567305869736e-05, "loss": 0.075042724609375, "step": 1023 }, { "epoch": 0.1384415189360012, "grad_norm": 0.23235218226909637, "learning_rate": 2.9594093105479413e-05, "loss": 0.08209609985351562, "step": 1024 }, { "epoch": 0.13857671573183716, "grad_norm": 0.1621515452861786, "learning_rate": 2.959251011369345e-05, "loss": 0.07379913330078125, "step": 1025 }, { "epoch": 0.1387119125276731, "grad_norm": 0.13144813477993011, "learning_rate": 2.959092408366907e-05, "loss": 0.06781506538391113, "step": 1026 }, { "epoch": 0.13884710932350902, "grad_norm": 0.17865842580795288, "learning_rate": 2.958933501573649e-05, "loss": 0.10501575469970703, "step": 1027 }, { "epoch": 0.13898230611934498, "grad_norm": 0.18086294829845428, "learning_rate": 2.9587742910226555e-05, "loss": 0.06138896942138672, "step": 1028 }, { "epoch": 0.1391175029151809, "grad_norm": 0.24017727375030518, "learning_rate": 2.958614776747076e-05, "loss": 0.09089183807373047, "step": 1029 }, { "epoch": 0.13925269971101684, "grad_norm": 0.163410484790802, "learning_rate": 2.9584549587801213e-05, "loss": 0.10097527503967285, "step": 1030 }, { "epoch": 0.1393878965068528, "grad_norm": 0.12211813032627106, "learning_rate": 2.958294837155067e-05, "loss": 0.08792757987976074, "step": 1031 }, { "epoch": 0.13952309330268872, "grad_norm": 0.08845590800046921, "learning_rate": 2.9581344119052508e-05, "loss": 0.05278968811035156, "step": 1032 }, { "epoch": 0.13965829009852465, "grad_norm": 0.2417311668395996, "learning_rate": 2.957973683064074e-05, "loss": 0.08494710922241211, "step": 1033 }, { "epoch": 0.1397934868943606, "grad_norm": 0.09509695321321487, "learning_rate": 2.957812650665002e-05, "loss": 0.08342361450195312, "step": 1034 }, { "epoch": 0.13992868369019654, "grad_norm": 0.08689370006322861, "learning_rate": 2.957651314741562e-05, "loss": 0.06669139862060547, "step": 1035 }, { "epoch": 0.1400638804860325, "grad_norm": 0.14130592346191406, "learning_rate": 2.9574896753273454e-05, "loss": 0.07112407684326172, "step": 1036 }, { "epoch": 0.14019907728186842, "grad_norm": 0.10611394792795181, "learning_rate": 2.9573277324560058e-05, "loss": 0.09284114837646484, "step": 1037 }, { "epoch": 0.14033427407770435, "grad_norm": 0.09717437624931335, "learning_rate": 2.9571654861612608e-05, "loss": 0.06455516815185547, "step": 1038 }, { "epoch": 0.1404694708735403, "grad_norm": 0.09705877304077148, "learning_rate": 2.957002936476891e-05, "loss": 0.10404205322265625, "step": 1039 }, { "epoch": 0.14060466766937624, "grad_norm": 0.07513929158449173, "learning_rate": 2.9568400834367406e-05, "loss": 0.043250322341918945, "step": 1040 }, { "epoch": 0.14073986446521217, "grad_norm": 0.2907352149486542, "learning_rate": 2.9566769270747158e-05, "loss": 0.07195138931274414, "step": 1041 }, { "epoch": 0.14087506126104812, "grad_norm": 0.2050797939300537, "learning_rate": 2.9565134674247864e-05, "loss": 0.0687749981880188, "step": 1042 }, { "epoch": 0.14101025805688405, "grad_norm": 0.39528849720954895, "learning_rate": 2.9563497045209866e-05, "loss": 0.09494972229003906, "step": 1043 }, { "epoch": 0.14114545485271998, "grad_norm": 0.07782010734081268, "learning_rate": 2.9561856383974118e-05, "loss": 0.06363105773925781, "step": 1044 }, { "epoch": 0.14128065164855594, "grad_norm": 0.1474486142396927, "learning_rate": 2.9560212690882218e-05, "loss": 0.05806732177734375, "step": 1045 }, { "epoch": 0.14141584844439187, "grad_norm": 0.10805214196443558, "learning_rate": 2.9558565966276395e-05, "loss": 0.07311058044433594, "step": 1046 }, { "epoch": 0.1415510452402278, "grad_norm": 0.4037792384624481, "learning_rate": 2.9556916210499497e-05, "loss": 0.09949612617492676, "step": 1047 }, { "epoch": 0.14168624203606375, "grad_norm": 0.1432960480451584, "learning_rate": 2.9555263423895016e-05, "loss": 0.0778653621673584, "step": 1048 }, { "epoch": 0.14182143883189968, "grad_norm": 0.056540507823228836, "learning_rate": 2.955360760680708e-05, "loss": 0.0507662296295166, "step": 1049 }, { "epoch": 0.1419566356277356, "grad_norm": 0.09269817918539047, "learning_rate": 2.9551948759580423e-05, "loss": 0.06925535202026367, "step": 1050 }, { "epoch": 0.14209183242357157, "grad_norm": 0.23423974215984344, "learning_rate": 2.9550286882560435e-05, "loss": 0.14203643798828125, "step": 1051 }, { "epoch": 0.1422270292194075, "grad_norm": 0.04959813877940178, "learning_rate": 2.9548621976093126e-05, "loss": 0.05577993392944336, "step": 1052 }, { "epoch": 0.14236222601524343, "grad_norm": 0.2520340383052826, "learning_rate": 2.9546954040525144e-05, "loss": 0.08503293991088867, "step": 1053 }, { "epoch": 0.14249742281107938, "grad_norm": 0.12083239108324051, "learning_rate": 2.9545283076203753e-05, "loss": 0.05936098098754883, "step": 1054 }, { "epoch": 0.1426326196069153, "grad_norm": 0.2398243099451065, "learning_rate": 2.954360908347686e-05, "loss": 0.0962214469909668, "step": 1055 }, { "epoch": 0.14276781640275127, "grad_norm": 0.09604228287935257, "learning_rate": 2.9541932062693e-05, "loss": 0.06707572937011719, "step": 1056 }, { "epoch": 0.1429030131985872, "grad_norm": 0.08386726677417755, "learning_rate": 2.954025201420134e-05, "loss": 0.07052421569824219, "step": 1057 }, { "epoch": 0.14303820999442313, "grad_norm": 0.5823044180870056, "learning_rate": 2.9538568938351672e-05, "loss": 0.10803794860839844, "step": 1058 }, { "epoch": 0.14317340679025908, "grad_norm": 0.0700690820813179, "learning_rate": 2.953688283549442e-05, "loss": 0.08406209945678711, "step": 1059 }, { "epoch": 0.143308603586095, "grad_norm": 0.09271769225597382, "learning_rate": 2.9535193705980642e-05, "loss": 0.09754276275634766, "step": 1060 }, { "epoch": 0.14344380038193094, "grad_norm": 0.18816334009170532, "learning_rate": 2.9533501550162028e-05, "loss": 0.12133526802062988, "step": 1061 }, { "epoch": 0.1435789971777669, "grad_norm": 0.06169494241476059, "learning_rate": 2.9531806368390882e-05, "loss": 0.04880106449127197, "step": 1062 }, { "epoch": 0.14371419397360283, "grad_norm": 0.11677113175392151, "learning_rate": 2.953010816102016e-05, "loss": 0.0968770980834961, "step": 1063 }, { "epoch": 0.14384939076943876, "grad_norm": 0.34681445360183716, "learning_rate": 2.952840692840343e-05, "loss": 0.08517265319824219, "step": 1064 }, { "epoch": 0.1439845875652747, "grad_norm": 0.19578534364700317, "learning_rate": 2.9526702670894914e-05, "loss": 0.09182238578796387, "step": 1065 }, { "epoch": 0.14411978436111064, "grad_norm": 0.06605084240436554, "learning_rate": 2.952499538884943e-05, "loss": 0.04653024673461914, "step": 1066 }, { "epoch": 0.14425498115694657, "grad_norm": 0.08458984643220901, "learning_rate": 2.9523285082622448e-05, "loss": 0.07516765594482422, "step": 1067 }, { "epoch": 0.14439017795278253, "grad_norm": 0.23083150386810303, "learning_rate": 2.9521571752570064e-05, "loss": 0.0717320442199707, "step": 1068 }, { "epoch": 0.14452537474861846, "grad_norm": 0.1745014637708664, "learning_rate": 2.9519855399049004e-05, "loss": 0.08561468124389648, "step": 1069 }, { "epoch": 0.14466057154445439, "grad_norm": 0.09659496694803238, "learning_rate": 2.951813602241662e-05, "loss": 0.07809066772460938, "step": 1070 }, { "epoch": 0.14479576834029034, "grad_norm": 0.2586137056350708, "learning_rate": 2.9516413623030896e-05, "loss": 0.09674263000488281, "step": 1071 }, { "epoch": 0.14493096513612627, "grad_norm": 0.10419290512800217, "learning_rate": 2.951468820125045e-05, "loss": 0.0826728343963623, "step": 1072 }, { "epoch": 0.1450661619319622, "grad_norm": 0.12646038830280304, "learning_rate": 2.9512959757434508e-05, "loss": 0.07571268081665039, "step": 1073 }, { "epoch": 0.14520135872779816, "grad_norm": 0.1377960443496704, "learning_rate": 2.951122829194296e-05, "loss": 0.057172298431396484, "step": 1074 }, { "epoch": 0.1453365555236341, "grad_norm": 0.24179475009441376, "learning_rate": 2.9509493805136296e-05, "loss": 0.05516242980957031, "step": 1075 }, { "epoch": 0.14547175231947002, "grad_norm": 0.251004695892334, "learning_rate": 2.9507756297375648e-05, "loss": 0.09329891204833984, "step": 1076 }, { "epoch": 0.14560694911530597, "grad_norm": 0.12214822322130203, "learning_rate": 2.9506015769022778e-05, "loss": 0.07416343688964844, "step": 1077 }, { "epoch": 0.1457421459111419, "grad_norm": 0.11316990107297897, "learning_rate": 2.950427222044006e-05, "loss": 0.06862330436706543, "step": 1078 }, { "epoch": 0.14587734270697786, "grad_norm": 0.10652310401201248, "learning_rate": 2.9502525651990525e-05, "loss": 0.07209396362304688, "step": 1079 }, { "epoch": 0.1460125395028138, "grad_norm": 0.14423726499080658, "learning_rate": 2.9500776064037813e-05, "loss": 0.058995723724365234, "step": 1080 }, { "epoch": 0.14614773629864972, "grad_norm": 0.16166767477989197, "learning_rate": 2.9499023456946194e-05, "loss": 0.0825796127319336, "step": 1081 }, { "epoch": 0.14628293309448567, "grad_norm": 0.2889252305030823, "learning_rate": 2.9497267831080575e-05, "loss": 0.08362865447998047, "step": 1082 }, { "epoch": 0.1464181298903216, "grad_norm": 0.11203325539827347, "learning_rate": 2.949550918680649e-05, "loss": 0.0511401891708374, "step": 1083 }, { "epoch": 0.14655332668615753, "grad_norm": 0.09853683412075043, "learning_rate": 2.9493747524490086e-05, "loss": 0.049675941467285156, "step": 1084 }, { "epoch": 0.1466885234819935, "grad_norm": 0.0966378003358841, "learning_rate": 2.9491982844498156e-05, "loss": 0.04838848114013672, "step": 1085 }, { "epoch": 0.14682372027782942, "grad_norm": 0.21662436425685883, "learning_rate": 2.949021514719812e-05, "loss": 0.07179498672485352, "step": 1086 }, { "epoch": 0.14695891707366535, "grad_norm": 0.08794362843036652, "learning_rate": 2.948844443295802e-05, "loss": 0.0547785758972168, "step": 1087 }, { "epoch": 0.1470941138695013, "grad_norm": 0.1866329461336136, "learning_rate": 2.9486670702146526e-05, "loss": 0.07671117782592773, "step": 1088 }, { "epoch": 0.14722931066533723, "grad_norm": 0.1171988844871521, "learning_rate": 2.948489395513294e-05, "loss": 0.051657676696777344, "step": 1089 }, { "epoch": 0.14736450746117316, "grad_norm": 0.17662739753723145, "learning_rate": 2.948311419228719e-05, "loss": 0.047513484954833984, "step": 1090 }, { "epoch": 0.14749970425700912, "grad_norm": 0.1720883548259735, "learning_rate": 2.948133141397983e-05, "loss": 0.07854557037353516, "step": 1091 }, { "epoch": 0.14763490105284505, "grad_norm": 0.09083975851535797, "learning_rate": 2.9479545620582047e-05, "loss": 0.067718505859375, "step": 1092 }, { "epoch": 0.14777009784868098, "grad_norm": 0.19623054563999176, "learning_rate": 2.9477756812465652e-05, "loss": 0.09048688411712646, "step": 1093 }, { "epoch": 0.14790529464451693, "grad_norm": 0.11081378161907196, "learning_rate": 2.9475964990003085e-05, "loss": 0.07282352447509766, "step": 1094 }, { "epoch": 0.14804049144035286, "grad_norm": 0.21468673646450043, "learning_rate": 2.9474170153567406e-05, "loss": 0.09254264831542969, "step": 1095 }, { "epoch": 0.1481756882361888, "grad_norm": 0.14591306447982788, "learning_rate": 2.947237230353232e-05, "loss": 0.05003070831298828, "step": 1096 }, { "epoch": 0.14831088503202475, "grad_norm": 0.10723714530467987, "learning_rate": 2.9470571440272147e-05, "loss": 0.07667946815490723, "step": 1097 }, { "epoch": 0.14844608182786068, "grad_norm": 0.11086497455835342, "learning_rate": 2.946876756416183e-05, "loss": 0.08315134048461914, "step": 1098 }, { "epoch": 0.1485812786236966, "grad_norm": 0.17267684638500214, "learning_rate": 2.946696067557695e-05, "loss": 0.0909261703491211, "step": 1099 }, { "epoch": 0.14871647541953256, "grad_norm": 0.2377883642911911, "learning_rate": 2.9465150774893706e-05, "loss": 0.08050012588500977, "step": 1100 }, { "epoch": 0.1488516722153685, "grad_norm": 0.16752363741397858, "learning_rate": 2.9463337862488938e-05, "loss": 0.05095905065536499, "step": 1101 }, { "epoch": 0.14898686901120445, "grad_norm": 0.2799507975578308, "learning_rate": 2.9461521938740096e-05, "loss": 0.0885920524597168, "step": 1102 }, { "epoch": 0.14912206580704038, "grad_norm": 0.06536594033241272, "learning_rate": 2.9459703004025273e-05, "loss": 0.050568580627441406, "step": 1103 }, { "epoch": 0.1492572626028763, "grad_norm": 0.06007133796811104, "learning_rate": 2.9457881058723174e-05, "loss": 0.06733274459838867, "step": 1104 }, { "epoch": 0.14939245939871226, "grad_norm": 0.2571377158164978, "learning_rate": 2.9456056103213137e-05, "loss": 0.07812309265136719, "step": 1105 }, { "epoch": 0.1495276561945482, "grad_norm": 0.4716612994670868, "learning_rate": 2.945422813787513e-05, "loss": 0.11354351043701172, "step": 1106 }, { "epoch": 0.14966285299038412, "grad_norm": 0.3563525676727295, "learning_rate": 2.9452397163089748e-05, "loss": 0.06363677978515625, "step": 1107 }, { "epoch": 0.14979804978622008, "grad_norm": 0.07930450141429901, "learning_rate": 2.9450563179238207e-05, "loss": 0.07386207580566406, "step": 1108 }, { "epoch": 0.149933246582056, "grad_norm": 0.15830406546592712, "learning_rate": 2.9448726186702354e-05, "loss": 0.06419610977172852, "step": 1109 }, { "epoch": 0.15006844337789194, "grad_norm": 0.41483670473098755, "learning_rate": 2.9446886185864652e-05, "loss": 0.07947468757629395, "step": 1110 }, { "epoch": 0.1502036401737279, "grad_norm": 0.23662076890468597, "learning_rate": 2.944504317710821e-05, "loss": 0.06407642364501953, "step": 1111 }, { "epoch": 0.15033883696956382, "grad_norm": 0.26178011298179626, "learning_rate": 2.944319716081675e-05, "loss": 0.048445701599121094, "step": 1112 }, { "epoch": 0.15047403376539975, "grad_norm": 0.20574241876602173, "learning_rate": 2.944134813737462e-05, "loss": 0.0809546709060669, "step": 1113 }, { "epoch": 0.1506092305612357, "grad_norm": 0.14405415952205658, "learning_rate": 2.9439496107166796e-05, "loss": 0.09885573387145996, "step": 1114 }, { "epoch": 0.15074442735707164, "grad_norm": 0.07743999361991882, "learning_rate": 2.943764107057888e-05, "loss": 0.033701419830322266, "step": 1115 }, { "epoch": 0.15087962415290757, "grad_norm": 0.36796656250953674, "learning_rate": 2.9435783027997106e-05, "loss": 0.06601905822753906, "step": 1116 }, { "epoch": 0.15101482094874352, "grad_norm": 0.17643453180789948, "learning_rate": 2.9433921979808323e-05, "loss": 0.06286144256591797, "step": 1117 }, { "epoch": 0.15115001774457945, "grad_norm": 0.3632143437862396, "learning_rate": 2.9432057926400014e-05, "loss": 0.07956409454345703, "step": 1118 }, { "epoch": 0.15128521454041538, "grad_norm": 0.23792868852615356, "learning_rate": 2.943019086816028e-05, "loss": 0.05762052536010742, "step": 1119 }, { "epoch": 0.15142041133625134, "grad_norm": 0.12486688047647476, "learning_rate": 2.9428320805477855e-05, "loss": 0.06322145462036133, "step": 1120 }, { "epoch": 0.15155560813208727, "grad_norm": 0.22102205455303192, "learning_rate": 2.9426447738742104e-05, "loss": 0.07183337211608887, "step": 1121 }, { "epoch": 0.1516908049279232, "grad_norm": 0.17263171076774597, "learning_rate": 2.9424571668343e-05, "loss": 0.06249523162841797, "step": 1122 }, { "epoch": 0.15182600172375915, "grad_norm": 0.17273354530334473, "learning_rate": 2.942269259467115e-05, "loss": 0.060373783111572266, "step": 1123 }, { "epoch": 0.15196119851959508, "grad_norm": 0.1259409636259079, "learning_rate": 2.9420810518117794e-05, "loss": 0.08330488204956055, "step": 1124 }, { "epoch": 0.15209639531543104, "grad_norm": 0.2565913498401642, "learning_rate": 2.9418925439074784e-05, "loss": 0.053517818450927734, "step": 1125 }, { "epoch": 0.15223159211126697, "grad_norm": 0.09067431092262268, "learning_rate": 2.9417037357934606e-05, "loss": 0.06913185119628906, "step": 1126 }, { "epoch": 0.1523667889071029, "grad_norm": 0.06976161897182465, "learning_rate": 2.9415146275090373e-05, "loss": 0.04553675651550293, "step": 1127 }, { "epoch": 0.15250198570293885, "grad_norm": 0.0911419540643692, "learning_rate": 2.9413252190935813e-05, "loss": 0.07584905624389648, "step": 1128 }, { "epoch": 0.15263718249877478, "grad_norm": 0.07555969804525375, "learning_rate": 2.9411355105865286e-05, "loss": 0.08169174194335938, "step": 1129 }, { "epoch": 0.1527723792946107, "grad_norm": 0.13209667801856995, "learning_rate": 2.9409455020273775e-05, "loss": 0.059699058532714844, "step": 1130 }, { "epoch": 0.15290757609044667, "grad_norm": 0.07090654969215393, "learning_rate": 2.940755193455689e-05, "loss": 0.06534671783447266, "step": 1131 }, { "epoch": 0.1530427728862826, "grad_norm": 0.12138225883245468, "learning_rate": 2.940564584911086e-05, "loss": 0.10901832580566406, "step": 1132 }, { "epoch": 0.15317796968211853, "grad_norm": 0.13750989735126495, "learning_rate": 2.9403736764332543e-05, "loss": 0.0717926025390625, "step": 1133 }, { "epoch": 0.15331316647795448, "grad_norm": 0.19666792452335358, "learning_rate": 2.9401824680619423e-05, "loss": 0.03712654113769531, "step": 1134 }, { "epoch": 0.1534483632737904, "grad_norm": 0.09003041684627533, "learning_rate": 2.9399909598369604e-05, "loss": 0.0582585334777832, "step": 1135 }, { "epoch": 0.15358356006962634, "grad_norm": 0.09080123901367188, "learning_rate": 2.939799151798182e-05, "loss": 0.0539020299911499, "step": 1136 }, { "epoch": 0.1537187568654623, "grad_norm": 0.21130475401878357, "learning_rate": 2.9396070439855417e-05, "loss": 0.0970611572265625, "step": 1137 }, { "epoch": 0.15385395366129823, "grad_norm": 0.2650197446346283, "learning_rate": 2.9394146364390382e-05, "loss": 0.0815286636352539, "step": 1138 }, { "epoch": 0.15398915045713416, "grad_norm": 0.2304464727640152, "learning_rate": 2.9392219291987315e-05, "loss": 0.06618404388427734, "step": 1139 }, { "epoch": 0.1541243472529701, "grad_norm": 0.10913332551717758, "learning_rate": 2.939028922304744e-05, "loss": 0.06919169425964355, "step": 1140 }, { "epoch": 0.15425954404880604, "grad_norm": 0.17635436356067657, "learning_rate": 2.9388356157972615e-05, "loss": 0.0587315559387207, "step": 1141 }, { "epoch": 0.15439474084464197, "grad_norm": 0.1624658703804016, "learning_rate": 2.938642009716531e-05, "loss": 0.06929469108581543, "step": 1142 }, { "epoch": 0.15452993764047793, "grad_norm": 0.09626515954732895, "learning_rate": 2.938448104102862e-05, "loss": 0.05512237548828125, "step": 1143 }, { "epoch": 0.15466513443631386, "grad_norm": 0.35350921750068665, "learning_rate": 2.9382538989966267e-05, "loss": 0.08420944213867188, "step": 1144 }, { "epoch": 0.15480033123214978, "grad_norm": 0.055737074464559555, "learning_rate": 2.9380593944382605e-05, "loss": 0.06031656265258789, "step": 1145 }, { "epoch": 0.15493552802798574, "grad_norm": 0.09223190695047379, "learning_rate": 2.9378645904682596e-05, "loss": 0.06427526473999023, "step": 1146 }, { "epoch": 0.15507072482382167, "grad_norm": 0.22569283843040466, "learning_rate": 2.937669487127183e-05, "loss": 0.061815738677978516, "step": 1147 }, { "epoch": 0.15520592161965763, "grad_norm": 0.11823631823062897, "learning_rate": 2.9374740844556532e-05, "loss": 0.09103202819824219, "step": 1148 }, { "epoch": 0.15534111841549356, "grad_norm": 0.274277925491333, "learning_rate": 2.937278382494353e-05, "loss": 0.09007656574249268, "step": 1149 }, { "epoch": 0.15547631521132949, "grad_norm": 0.15700848400592804, "learning_rate": 2.9370823812840287e-05, "loss": 0.08530592918395996, "step": 1150 }, { "epoch": 0.15561151200716544, "grad_norm": 0.09494221955537796, "learning_rate": 2.93688608086549e-05, "loss": 0.07379150390625, "step": 1151 }, { "epoch": 0.15574670880300137, "grad_norm": 0.08665430545806885, "learning_rate": 2.9366894812796064e-05, "loss": 0.07693290710449219, "step": 1152 }, { "epoch": 0.1558819055988373, "grad_norm": 0.17276400327682495, "learning_rate": 2.9364925825673117e-05, "loss": 0.06704044342041016, "step": 1153 }, { "epoch": 0.15601710239467326, "grad_norm": 0.1022331714630127, "learning_rate": 2.9362953847696006e-05, "loss": 0.06880712509155273, "step": 1154 }, { "epoch": 0.1561522991905092, "grad_norm": 0.10129193961620331, "learning_rate": 2.9360978879275313e-05, "loss": 0.06425905227661133, "step": 1155 }, { "epoch": 0.15628749598634512, "grad_norm": 0.1395132690668106, "learning_rate": 2.9359000920822237e-05, "loss": 0.09534454345703125, "step": 1156 }, { "epoch": 0.15642269278218107, "grad_norm": 0.05284448713064194, "learning_rate": 2.9357019972748594e-05, "loss": 0.04510068893432617, "step": 1157 }, { "epoch": 0.156557889578017, "grad_norm": 0.10269604623317719, "learning_rate": 2.9355036035466836e-05, "loss": 0.07177257537841797, "step": 1158 }, { "epoch": 0.15669308637385293, "grad_norm": 0.19277019798755646, "learning_rate": 2.935304910939002e-05, "loss": 0.0703129768371582, "step": 1159 }, { "epoch": 0.1568282831696889, "grad_norm": 0.09898892045021057, "learning_rate": 2.935105919493184e-05, "loss": 0.0472712516784668, "step": 1160 }, { "epoch": 0.15696347996552482, "grad_norm": 0.09241020679473877, "learning_rate": 2.9349066292506613e-05, "loss": 0.049954891204833984, "step": 1161 }, { "epoch": 0.15709867676136074, "grad_norm": 0.0797291249036789, "learning_rate": 2.934707040252926e-05, "loss": 0.04801750183105469, "step": 1162 }, { "epoch": 0.1572338735571967, "grad_norm": 0.184931218624115, "learning_rate": 2.9345071525415342e-05, "loss": 0.10203933715820312, "step": 1163 }, { "epoch": 0.15736907035303263, "grad_norm": 0.2322974056005478, "learning_rate": 2.9343069661581035e-05, "loss": 0.09958314895629883, "step": 1164 }, { "epoch": 0.15750426714886856, "grad_norm": 0.2699923813343048, "learning_rate": 2.9341064811443138e-05, "loss": 0.13122177124023438, "step": 1165 }, { "epoch": 0.15763946394470452, "grad_norm": 0.06598154455423355, "learning_rate": 2.9339056975419078e-05, "loss": 0.07757329940795898, "step": 1166 }, { "epoch": 0.15777466074054045, "grad_norm": 0.12984755635261536, "learning_rate": 2.9337046153926882e-05, "loss": 0.06386709213256836, "step": 1167 }, { "epoch": 0.15790985753637637, "grad_norm": 0.4202303886413574, "learning_rate": 2.9335032347385224e-05, "loss": 0.12419319152832031, "step": 1168 }, { "epoch": 0.15804505433221233, "grad_norm": 0.24104250967502594, "learning_rate": 2.933301555621339e-05, "loss": 0.08004891872406006, "step": 1169 }, { "epoch": 0.15818025112804826, "grad_norm": 0.08389753848314285, "learning_rate": 2.933099578083128e-05, "loss": 0.07021570205688477, "step": 1170 }, { "epoch": 0.15831544792388422, "grad_norm": 0.27718332409858704, "learning_rate": 2.932897302165943e-05, "loss": 0.06672859191894531, "step": 1171 }, { "epoch": 0.15845064471972015, "grad_norm": 0.17038077116012573, "learning_rate": 2.9326947279118983e-05, "loss": 0.06165933609008789, "step": 1172 }, { "epoch": 0.15858584151555608, "grad_norm": 0.1964806616306305, "learning_rate": 2.9324918553631716e-05, "loss": 0.05494403839111328, "step": 1173 }, { "epoch": 0.15872103831139203, "grad_norm": 0.1367480605840683, "learning_rate": 2.9322886845620013e-05, "loss": 0.04713630676269531, "step": 1174 }, { "epoch": 0.15885623510722796, "grad_norm": 0.3066956698894501, "learning_rate": 2.932085215550689e-05, "loss": 0.08920001983642578, "step": 1175 }, { "epoch": 0.1589914319030639, "grad_norm": 0.21093346178531647, "learning_rate": 2.9318814483715982e-05, "loss": 0.08451461791992188, "step": 1176 }, { "epoch": 0.15912662869889985, "grad_norm": 0.11341755092144012, "learning_rate": 2.9316773830671537e-05, "loss": 0.07023680210113525, "step": 1177 }, { "epoch": 0.15926182549473578, "grad_norm": 0.29472970962524414, "learning_rate": 2.9314730196798437e-05, "loss": 0.07318401336669922, "step": 1178 }, { "epoch": 0.1593970222905717, "grad_norm": 0.1765746772289276, "learning_rate": 2.9312683582522178e-05, "loss": 0.07141256332397461, "step": 1179 }, { "epoch": 0.15953221908640766, "grad_norm": 0.1467924416065216, "learning_rate": 2.9310633988268868e-05, "loss": 0.0755157470703125, "step": 1180 }, { "epoch": 0.1596674158822436, "grad_norm": 0.08082759380340576, "learning_rate": 2.9308581414465246e-05, "loss": 0.05016326904296875, "step": 1181 }, { "epoch": 0.15980261267807952, "grad_norm": 0.17234677076339722, "learning_rate": 2.9306525861538674e-05, "loss": 0.07890582084655762, "step": 1182 }, { "epoch": 0.15993780947391548, "grad_norm": 0.21386606991291046, "learning_rate": 2.9304467329917127e-05, "loss": 0.09691619873046875, "step": 1183 }, { "epoch": 0.1600730062697514, "grad_norm": 0.3143528997898102, "learning_rate": 2.9302405820029198e-05, "loss": 0.09788703918457031, "step": 1184 }, { "epoch": 0.16020820306558733, "grad_norm": 0.07750120759010315, "learning_rate": 2.9300341332304114e-05, "loss": 0.04978680610656738, "step": 1185 }, { "epoch": 0.1603433998614233, "grad_norm": 0.10350902378559113, "learning_rate": 2.9298273867171697e-05, "loss": 0.08543968200683594, "step": 1186 }, { "epoch": 0.16047859665725922, "grad_norm": 0.06081010401248932, "learning_rate": 2.929620342506242e-05, "loss": 0.05190324783325195, "step": 1187 }, { "epoch": 0.16061379345309515, "grad_norm": 0.14381834864616394, "learning_rate": 2.929413000640735e-05, "loss": 0.05739545822143555, "step": 1188 }, { "epoch": 0.1607489902489311, "grad_norm": 0.11380499601364136, "learning_rate": 2.9292053611638187e-05, "loss": 0.06984281539916992, "step": 1189 }, { "epoch": 0.16088418704476704, "grad_norm": 0.16667023301124573, "learning_rate": 2.928997424118725e-05, "loss": 0.07844972610473633, "step": 1190 }, { "epoch": 0.16101938384060296, "grad_norm": 0.15189296007156372, "learning_rate": 2.928789189548747e-05, "loss": 0.05687522888183594, "step": 1191 }, { "epoch": 0.16115458063643892, "grad_norm": 0.24288663268089294, "learning_rate": 2.9285806574972405e-05, "loss": 0.0876169204711914, "step": 1192 }, { "epoch": 0.16128977743227485, "grad_norm": 0.13709843158721924, "learning_rate": 2.928371828007623e-05, "loss": 0.0818643569946289, "step": 1193 }, { "epoch": 0.1614249742281108, "grad_norm": 0.2224666327238083, "learning_rate": 2.928162701123374e-05, "loss": 0.04541301727294922, "step": 1194 }, { "epoch": 0.16156017102394674, "grad_norm": 0.37950173020362854, "learning_rate": 2.9279532768880345e-05, "loss": 0.09860467910766602, "step": 1195 }, { "epoch": 0.16169536781978266, "grad_norm": 0.25968003273010254, "learning_rate": 2.9277435553452084e-05, "loss": 0.0796588659286499, "step": 1196 }, { "epoch": 0.16183056461561862, "grad_norm": 0.15009881556034088, "learning_rate": 2.9275335365385602e-05, "loss": 0.09694814682006836, "step": 1197 }, { "epoch": 0.16196576141145455, "grad_norm": 0.07834871113300323, "learning_rate": 2.927323220511817e-05, "loss": 0.04359889030456543, "step": 1198 }, { "epoch": 0.16210095820729048, "grad_norm": 0.07751213759183884, "learning_rate": 2.9271126073087684e-05, "loss": 0.06636476516723633, "step": 1199 }, { "epoch": 0.16223615500312644, "grad_norm": 0.2132425308227539, "learning_rate": 2.926901696973264e-05, "loss": 0.06555485725402832, "step": 1200 }, { "epoch": 0.16237135179896237, "grad_norm": 0.24026857316493988, "learning_rate": 2.9266904895492177e-05, "loss": 0.08239245414733887, "step": 1201 }, { "epoch": 0.1625065485947983, "grad_norm": 0.10026822984218597, "learning_rate": 2.926478985080603e-05, "loss": 0.08301210403442383, "step": 1202 }, { "epoch": 0.16264174539063425, "grad_norm": 0.0927710011601448, "learning_rate": 2.9262671836114568e-05, "loss": 0.05316925048828125, "step": 1203 }, { "epoch": 0.16277694218647018, "grad_norm": 0.053466539829969406, "learning_rate": 2.9260550851858774e-05, "loss": 0.07049942016601562, "step": 1204 }, { "epoch": 0.1629121389823061, "grad_norm": 0.23009340465068817, "learning_rate": 2.9258426898480243e-05, "loss": 0.0935511589050293, "step": 1205 }, { "epoch": 0.16304733577814207, "grad_norm": 0.15983623266220093, "learning_rate": 2.9256299976421198e-05, "loss": 0.08137989044189453, "step": 1206 }, { "epoch": 0.163182532573978, "grad_norm": 0.16412276029586792, "learning_rate": 2.9254170086124474e-05, "loss": 0.06665325164794922, "step": 1207 }, { "epoch": 0.16331772936981392, "grad_norm": 0.23292100429534912, "learning_rate": 2.9252037228033526e-05, "loss": 0.06276392936706543, "step": 1208 }, { "epoch": 0.16345292616564988, "grad_norm": 0.10810549557209015, "learning_rate": 2.9249901402592424e-05, "loss": 0.07190942764282227, "step": 1209 }, { "epoch": 0.1635881229614858, "grad_norm": 0.1365727186203003, "learning_rate": 2.9247762610245863e-05, "loss": 0.05400681495666504, "step": 1210 }, { "epoch": 0.16372331975732174, "grad_norm": 0.05971809849143028, "learning_rate": 2.9245620851439146e-05, "loss": 0.037708282470703125, "step": 1211 }, { "epoch": 0.1638585165531577, "grad_norm": 0.12509702146053314, "learning_rate": 2.92434761266182e-05, "loss": 0.08895158767700195, "step": 1212 }, { "epoch": 0.16399371334899362, "grad_norm": 0.08663970232009888, "learning_rate": 2.924132843622957e-05, "loss": 0.07162857055664062, "step": 1213 }, { "epoch": 0.16412891014482955, "grad_norm": 0.08429182320833206, "learning_rate": 2.9239177780720418e-05, "loss": 0.06822454929351807, "step": 1214 }, { "epoch": 0.1642641069406655, "grad_norm": 0.12882648408412933, "learning_rate": 2.923702416053852e-05, "loss": 0.08619308471679688, "step": 1215 }, { "epoch": 0.16439930373650144, "grad_norm": 0.13457289338111877, "learning_rate": 2.9234867576132268e-05, "loss": 0.08995437622070312, "step": 1216 }, { "epoch": 0.1645345005323374, "grad_norm": 0.10224489867687225, "learning_rate": 2.923270802795068e-05, "loss": 0.07589328289031982, "step": 1217 }, { "epoch": 0.16466969732817333, "grad_norm": 0.19573554396629333, "learning_rate": 2.9230545516443378e-05, "loss": 0.11031937599182129, "step": 1218 }, { "epoch": 0.16480489412400925, "grad_norm": 0.08408115804195404, "learning_rate": 2.9228380042060615e-05, "loss": 0.04436063766479492, "step": 1219 }, { "epoch": 0.1649400909198452, "grad_norm": 0.059222813695669174, "learning_rate": 2.9226211605253252e-05, "loss": 0.06900215148925781, "step": 1220 }, { "epoch": 0.16507528771568114, "grad_norm": 0.11509162187576294, "learning_rate": 2.922404020647277e-05, "loss": 0.07071709632873535, "step": 1221 }, { "epoch": 0.16521048451151707, "grad_norm": 0.15091605484485626, "learning_rate": 2.9221865846171264e-05, "loss": 0.07544474303722382, "step": 1222 }, { "epoch": 0.16534568130735303, "grad_norm": 0.09125815331935883, "learning_rate": 2.9219688524801446e-05, "loss": 0.049719929695129395, "step": 1223 }, { "epoch": 0.16548087810318896, "grad_norm": 0.14215806126594543, "learning_rate": 2.9217508242816653e-05, "loss": 0.07222175598144531, "step": 1224 }, { "epoch": 0.16561607489902488, "grad_norm": 0.0726814940571785, "learning_rate": 2.921532500067083e-05, "loss": 0.046596527099609375, "step": 1225 }, { "epoch": 0.16575127169486084, "grad_norm": 0.0951012521982193, "learning_rate": 2.9213138798818528e-05, "loss": 0.06709766387939453, "step": 1226 }, { "epoch": 0.16588646849069677, "grad_norm": 0.14965760707855225, "learning_rate": 2.921094963771494e-05, "loss": 0.09727096557617188, "step": 1227 }, { "epoch": 0.1660216652865327, "grad_norm": 0.08207011967897415, "learning_rate": 2.9208757517815855e-05, "loss": 0.08413529396057129, "step": 1228 }, { "epoch": 0.16615686208236866, "grad_norm": 0.0920029953122139, "learning_rate": 2.9206562439577684e-05, "loss": 0.06987190246582031, "step": 1229 }, { "epoch": 0.16629205887820458, "grad_norm": 0.04928401857614517, "learning_rate": 2.9204364403457452e-05, "loss": 0.054015159606933594, "step": 1230 }, { "epoch": 0.16642725567404051, "grad_norm": 0.0512600913643837, "learning_rate": 2.9202163409912808e-05, "loss": 0.04779958724975586, "step": 1231 }, { "epoch": 0.16656245246987647, "grad_norm": 0.07469185441732407, "learning_rate": 2.9199959459402003e-05, "loss": 0.0555112361907959, "step": 1232 }, { "epoch": 0.1666976492657124, "grad_norm": 0.07954122126102448, "learning_rate": 2.919775255238392e-05, "loss": 0.05206298828125, "step": 1233 }, { "epoch": 0.16683284606154833, "grad_norm": 0.042790018022060394, "learning_rate": 2.919554268931804e-05, "loss": 0.0326237678527832, "step": 1234 }, { "epoch": 0.16696804285738429, "grad_norm": 0.09722354263067245, "learning_rate": 2.9193329870664475e-05, "loss": 0.07882452011108398, "step": 1235 }, { "epoch": 0.16710323965322021, "grad_norm": 0.13021519780158997, "learning_rate": 2.9191114096883938e-05, "loss": 0.08313465118408203, "step": 1236 }, { "epoch": 0.16723843644905614, "grad_norm": 0.07478612661361694, "learning_rate": 2.9188895368437774e-05, "loss": 0.05598902702331543, "step": 1237 }, { "epoch": 0.1673736332448921, "grad_norm": 0.18403148651123047, "learning_rate": 2.9186673685787926e-05, "loss": 0.0759739875793457, "step": 1238 }, { "epoch": 0.16750883004072803, "grad_norm": 0.08487872779369354, "learning_rate": 2.918444904939697e-05, "loss": 0.06282567977905273, "step": 1239 }, { "epoch": 0.167644026836564, "grad_norm": 0.08897240459918976, "learning_rate": 2.9182221459728078e-05, "loss": 0.06490278244018555, "step": 1240 }, { "epoch": 0.16777922363239992, "grad_norm": 0.1542828381061554, "learning_rate": 2.917999091724505e-05, "loss": 0.0795431137084961, "step": 1241 }, { "epoch": 0.16791442042823584, "grad_norm": 0.11625979095697403, "learning_rate": 2.9177757422412294e-05, "loss": 0.08052492141723633, "step": 1242 }, { "epoch": 0.1680496172240718, "grad_norm": 0.09489346295595169, "learning_rate": 2.917552097569484e-05, "loss": 0.05592775344848633, "step": 1243 }, { "epoch": 0.16818481401990773, "grad_norm": 0.158501997590065, "learning_rate": 2.917328157755832e-05, "loss": 0.06336051225662231, "step": 1244 }, { "epoch": 0.16832001081574366, "grad_norm": 0.07722340524196625, "learning_rate": 2.9171039228469003e-05, "loss": 0.06006908416748047, "step": 1245 }, { "epoch": 0.16845520761157962, "grad_norm": 0.07774420827627182, "learning_rate": 2.9168793928893747e-05, "loss": 0.06841564178466797, "step": 1246 }, { "epoch": 0.16859040440741555, "grad_norm": 0.15228503942489624, "learning_rate": 2.9166545679300036e-05, "loss": 0.0656576156616211, "step": 1247 }, { "epoch": 0.16872560120325147, "grad_norm": 0.16187550127506256, "learning_rate": 2.9164294480155966e-05, "loss": 0.054795265197753906, "step": 1248 }, { "epoch": 0.16886079799908743, "grad_norm": 0.1031121239066124, "learning_rate": 2.9162040331930256e-05, "loss": 0.0848090648651123, "step": 1249 }, { "epoch": 0.16899599479492336, "grad_norm": 0.21679864823818207, "learning_rate": 2.915978323509223e-05, "loss": 0.06322550773620605, "step": 1250 }, { "epoch": 0.1691311915907593, "grad_norm": 0.24488388001918793, "learning_rate": 2.915752319011182e-05, "loss": 0.10214900970458984, "step": 1251 }, { "epoch": 0.16926638838659525, "grad_norm": 0.16754993796348572, "learning_rate": 2.9155260197459588e-05, "loss": 0.10164070129394531, "step": 1252 }, { "epoch": 0.16940158518243117, "grad_norm": 0.15058457851409912, "learning_rate": 2.91529942576067e-05, "loss": 0.09105300903320312, "step": 1253 }, { "epoch": 0.1695367819782671, "grad_norm": 0.06617345660924911, "learning_rate": 2.915072537102493e-05, "loss": 0.05120724439620972, "step": 1254 }, { "epoch": 0.16967197877410306, "grad_norm": 0.062166474759578705, "learning_rate": 2.914845353818668e-05, "loss": 0.04924631118774414, "step": 1255 }, { "epoch": 0.169807175569939, "grad_norm": 0.34717512130737305, "learning_rate": 2.9146178759564953e-05, "loss": 0.0993661880493164, "step": 1256 }, { "epoch": 0.16994237236577492, "grad_norm": 0.04060515761375427, "learning_rate": 2.914390103563337e-05, "loss": 0.0339890718460083, "step": 1257 }, { "epoch": 0.17007756916161088, "grad_norm": 0.19279055297374725, "learning_rate": 2.914162036686617e-05, "loss": 0.07509946823120117, "step": 1258 }, { "epoch": 0.1702127659574468, "grad_norm": 0.10772058367729187, "learning_rate": 2.9139336753738196e-05, "loss": 0.10341405868530273, "step": 1259 }, { "epoch": 0.17034796275328273, "grad_norm": 0.11588366329669952, "learning_rate": 2.913705019672491e-05, "loss": 0.06786322593688965, "step": 1260 }, { "epoch": 0.1704831595491187, "grad_norm": 0.34414902329444885, "learning_rate": 2.9134760696302386e-05, "loss": 0.09893321990966797, "step": 1261 }, { "epoch": 0.17061835634495462, "grad_norm": 0.180582657456398, "learning_rate": 2.9132468252947306e-05, "loss": 0.08387088775634766, "step": 1262 }, { "epoch": 0.17075355314079058, "grad_norm": 0.4320274889469147, "learning_rate": 2.9130172867136974e-05, "loss": 0.09543085098266602, "step": 1263 }, { "epoch": 0.1708887499366265, "grad_norm": 0.17885689437389374, "learning_rate": 2.91278745393493e-05, "loss": 0.07803922891616821, "step": 1264 }, { "epoch": 0.17102394673246243, "grad_norm": 0.13092473149299622, "learning_rate": 2.9125573270062812e-05, "loss": 0.06229686737060547, "step": 1265 }, { "epoch": 0.1711591435282984, "grad_norm": 0.18804676830768585, "learning_rate": 2.9123269059756634e-05, "loss": 0.060498714447021484, "step": 1266 }, { "epoch": 0.17129434032413432, "grad_norm": 0.08068740367889404, "learning_rate": 2.9120961908910528e-05, "loss": 0.07163596153259277, "step": 1267 }, { "epoch": 0.17142953711997025, "grad_norm": 0.22152014076709747, "learning_rate": 2.911865181800485e-05, "loss": 0.10860919952392578, "step": 1268 }, { "epoch": 0.1715647339158062, "grad_norm": 0.11795254796743393, "learning_rate": 2.9116338787520577e-05, "loss": 0.10941553115844727, "step": 1269 }, { "epoch": 0.17169993071164213, "grad_norm": 0.10287696123123169, "learning_rate": 2.9114022817939283e-05, "loss": 0.08036231994628906, "step": 1270 }, { "epoch": 0.17183512750747806, "grad_norm": 0.07889505475759506, "learning_rate": 2.911170390974318e-05, "loss": 0.09151744842529297, "step": 1271 }, { "epoch": 0.17197032430331402, "grad_norm": 0.13890427350997925, "learning_rate": 2.9109382063415067e-05, "loss": 0.04506111145019531, "step": 1272 }, { "epoch": 0.17210552109914995, "grad_norm": 0.08674082905054092, "learning_rate": 2.9107057279438372e-05, "loss": 0.04581022262573242, "step": 1273 }, { "epoch": 0.17224071789498588, "grad_norm": 0.12049552798271179, "learning_rate": 2.910472955829712e-05, "loss": 0.11568403244018555, "step": 1274 }, { "epoch": 0.17237591469082184, "grad_norm": 0.08607936650514603, "learning_rate": 2.9102398900475958e-05, "loss": 0.08767271041870117, "step": 1275 }, { "epoch": 0.17251111148665776, "grad_norm": 0.13958725333213806, "learning_rate": 2.910006530646014e-05, "loss": 0.05032539367675781, "step": 1276 }, { "epoch": 0.1726463082824937, "grad_norm": 0.09221465140581131, "learning_rate": 2.909772877673554e-05, "loss": 0.04073596000671387, "step": 1277 }, { "epoch": 0.17278150507832965, "grad_norm": 0.07107503712177277, "learning_rate": 2.9095389311788626e-05, "loss": 0.05596303939819336, "step": 1278 }, { "epoch": 0.17291670187416558, "grad_norm": 0.07603446394205093, "learning_rate": 2.9093046912106494e-05, "loss": 0.07087564468383789, "step": 1279 }, { "epoch": 0.1730518986700015, "grad_norm": 0.141872376203537, "learning_rate": 2.909070157817684e-05, "loss": 0.06925201416015625, "step": 1280 }, { "epoch": 0.17318709546583747, "grad_norm": 0.16083335876464844, "learning_rate": 2.9088353310487976e-05, "loss": 0.09833264350891113, "step": 1281 }, { "epoch": 0.1733222922616734, "grad_norm": 0.35496050119400024, "learning_rate": 2.9086002109528825e-05, "loss": 0.09937858581542969, "step": 1282 }, { "epoch": 0.17345748905750935, "grad_norm": 0.16016283631324768, "learning_rate": 2.908364797578892e-05, "loss": 0.0793687105178833, "step": 1283 }, { "epoch": 0.17359268585334528, "grad_norm": 0.05883108451962471, "learning_rate": 2.9081290909758405e-05, "loss": 0.060964226722717285, "step": 1284 }, { "epoch": 0.1737278826491812, "grad_norm": 0.1897485852241516, "learning_rate": 2.9078930911928033e-05, "loss": 0.07701396942138672, "step": 1285 }, { "epoch": 0.17386307944501717, "grad_norm": 0.11912494897842407, "learning_rate": 2.907656798278916e-05, "loss": 0.06920289993286133, "step": 1286 }, { "epoch": 0.1739982762408531, "grad_norm": 0.07339036464691162, "learning_rate": 2.9074202122833773e-05, "loss": 0.0572047233581543, "step": 1287 }, { "epoch": 0.17413347303668902, "grad_norm": 0.06731192022562027, "learning_rate": 2.907183333255445e-05, "loss": 0.057007551193237305, "step": 1288 }, { "epoch": 0.17426866983252498, "grad_norm": 0.1837085336446762, "learning_rate": 2.9069461612444384e-05, "loss": 0.10115432739257812, "step": 1289 }, { "epoch": 0.1744038666283609, "grad_norm": 0.06417826563119888, "learning_rate": 2.9067086962997385e-05, "loss": 0.03990578651428223, "step": 1290 }, { "epoch": 0.17453906342419684, "grad_norm": 0.11471592634916306, "learning_rate": 2.9064709384707868e-05, "loss": 0.0711812973022461, "step": 1291 }, { "epoch": 0.1746742602200328, "grad_norm": 0.24925322830677032, "learning_rate": 2.9062328878070855e-05, "loss": 0.0804452896118164, "step": 1292 }, { "epoch": 0.17480945701586872, "grad_norm": 0.17708346247673035, "learning_rate": 2.905994544358198e-05, "loss": 0.07260704040527344, "step": 1293 }, { "epoch": 0.17494465381170465, "grad_norm": 0.07037805765867233, "learning_rate": 2.9057559081737482e-05, "loss": 0.061341166496276855, "step": 1294 }, { "epoch": 0.1750798506075406, "grad_norm": 0.1026465892791748, "learning_rate": 2.9055169793034225e-05, "loss": 0.06517291069030762, "step": 1295 }, { "epoch": 0.17521504740337654, "grad_norm": 0.13157184422016144, "learning_rate": 2.9052777577969656e-05, "loss": 0.04810833930969238, "step": 1296 }, { "epoch": 0.17535024419921247, "grad_norm": 0.09227099269628525, "learning_rate": 2.9050382437041868e-05, "loss": 0.05074000358581543, "step": 1297 }, { "epoch": 0.17548544099504843, "grad_norm": 0.17749840021133423, "learning_rate": 2.9047984370749526e-05, "loss": 0.0745927095413208, "step": 1298 }, { "epoch": 0.17562063779088435, "grad_norm": 0.10106880217790604, "learning_rate": 2.9045583379591925e-05, "loss": 0.055890798568725586, "step": 1299 }, { "epoch": 0.17575583458672028, "grad_norm": 0.18339994549751282, "learning_rate": 2.9043179464068965e-05, "loss": 0.08118295669555664, "step": 1300 }, { "epoch": 0.17589103138255624, "grad_norm": 0.10667243599891663, "learning_rate": 2.9040772624681152e-05, "loss": 0.06749486923217773, "step": 1301 }, { "epoch": 0.17602622817839217, "grad_norm": 0.10810138285160065, "learning_rate": 2.9038362861929603e-05, "loss": 0.0656580924987793, "step": 1302 }, { "epoch": 0.1761614249742281, "grad_norm": 0.08462870866060257, "learning_rate": 2.903595017631605e-05, "loss": 0.05225372314453125, "step": 1303 }, { "epoch": 0.17629662177006405, "grad_norm": 0.069978728890419, "learning_rate": 2.903353456834282e-05, "loss": 0.06925821304321289, "step": 1304 }, { "epoch": 0.17643181856589998, "grad_norm": 0.11681298911571503, "learning_rate": 2.903111603851285e-05, "loss": 0.05601310729980469, "step": 1305 }, { "epoch": 0.17656701536173594, "grad_norm": 0.06930729001760483, "learning_rate": 2.9028694587329704e-05, "loss": 0.06746578216552734, "step": 1306 }, { "epoch": 0.17670221215757187, "grad_norm": 0.21610063314437866, "learning_rate": 2.902627021529753e-05, "loss": 0.0843818187713623, "step": 1307 }, { "epoch": 0.1768374089534078, "grad_norm": 0.06469037383794785, "learning_rate": 2.9023842922921105e-05, "loss": 0.05560588836669922, "step": 1308 }, { "epoch": 0.17697260574924376, "grad_norm": 0.1363135576248169, "learning_rate": 2.90214127107058e-05, "loss": 0.10736942291259766, "step": 1309 }, { "epoch": 0.17710780254507968, "grad_norm": 0.1837257593870163, "learning_rate": 2.9018979579157592e-05, "loss": 0.06760454177856445, "step": 1310 }, { "epoch": 0.1772429993409156, "grad_norm": 0.11503897607326508, "learning_rate": 2.901654352878308e-05, "loss": 0.059558868408203125, "step": 1311 }, { "epoch": 0.17737819613675157, "grad_norm": 0.13690738379955292, "learning_rate": 2.9014104560089462e-05, "loss": 0.06150054931640625, "step": 1312 }, { "epoch": 0.1775133929325875, "grad_norm": 0.07230895012617111, "learning_rate": 2.9011662673584538e-05, "loss": 0.05124711990356445, "step": 1313 }, { "epoch": 0.17764858972842343, "grad_norm": 0.12711846828460693, "learning_rate": 2.900921786977673e-05, "loss": 0.08394670486450195, "step": 1314 }, { "epoch": 0.17778378652425939, "grad_norm": 0.22098760306835175, "learning_rate": 2.900677014917505e-05, "loss": 0.06563472747802734, "step": 1315 }, { "epoch": 0.17791898332009531, "grad_norm": 0.1604573130607605, "learning_rate": 2.9004319512289136e-05, "loss": 0.07813453674316406, "step": 1316 }, { "epoch": 0.17805418011593124, "grad_norm": 0.06745977699756622, "learning_rate": 2.9001865959629222e-05, "loss": 0.046000003814697266, "step": 1317 }, { "epoch": 0.1781893769117672, "grad_norm": 0.12523406744003296, "learning_rate": 2.8999409491706143e-05, "loss": 0.07711029052734375, "step": 1318 }, { "epoch": 0.17832457370760313, "grad_norm": 0.2040608525276184, "learning_rate": 2.8996950109031355e-05, "loss": 0.07084465026855469, "step": 1319 }, { "epoch": 0.17845977050343906, "grad_norm": 0.09971045702695847, "learning_rate": 2.8994487812116917e-05, "loss": 0.06867599487304688, "step": 1320 }, { "epoch": 0.17859496729927501, "grad_norm": 0.09231485426425934, "learning_rate": 2.8992022601475483e-05, "loss": 0.03790569305419922, "step": 1321 }, { "epoch": 0.17873016409511094, "grad_norm": 0.0515068955719471, "learning_rate": 2.8989554477620332e-05, "loss": 0.058396339416503906, "step": 1322 }, { "epoch": 0.17886536089094687, "grad_norm": 0.1407320201396942, "learning_rate": 2.8987083441065335e-05, "loss": 0.09112024307250977, "step": 1323 }, { "epoch": 0.17900055768678283, "grad_norm": 0.08635195344686508, "learning_rate": 2.8984609492324983e-05, "loss": 0.06437969207763672, "step": 1324 }, { "epoch": 0.17913575448261876, "grad_norm": 0.08393722027540207, "learning_rate": 2.8982132631914357e-05, "loss": 0.07030361890792847, "step": 1325 }, { "epoch": 0.1792709512784547, "grad_norm": 0.22234483063220978, "learning_rate": 2.8979652860349154e-05, "loss": 0.044274330139160156, "step": 1326 }, { "epoch": 0.17940614807429064, "grad_norm": 0.07304778695106506, "learning_rate": 2.8977170178145675e-05, "loss": 0.05028223991394043, "step": 1327 }, { "epoch": 0.17954134487012657, "grad_norm": 0.08771605044603348, "learning_rate": 2.8974684585820833e-05, "loss": 0.060231685638427734, "step": 1328 }, { "epoch": 0.17967654166596253, "grad_norm": 0.05200042948126793, "learning_rate": 2.8972196083892138e-05, "loss": 0.057005882263183594, "step": 1329 }, { "epoch": 0.17981173846179846, "grad_norm": 0.12458130717277527, "learning_rate": 2.8969704672877707e-05, "loss": 0.051749229431152344, "step": 1330 }, { "epoch": 0.1799469352576344, "grad_norm": 0.2294543981552124, "learning_rate": 2.896721035329627e-05, "loss": 0.07013463973999023, "step": 1331 }, { "epoch": 0.18008213205347035, "grad_norm": 0.09830637276172638, "learning_rate": 2.8964713125667153e-05, "loss": 0.06429815292358398, "step": 1332 }, { "epoch": 0.18021732884930627, "grad_norm": 0.113348089158535, "learning_rate": 2.8962212990510294e-05, "loss": 0.06415843963623047, "step": 1333 }, { "epoch": 0.1803525256451422, "grad_norm": 0.2105209082365036, "learning_rate": 2.8959709948346237e-05, "loss": 0.08428645133972168, "step": 1334 }, { "epoch": 0.18048772244097816, "grad_norm": 0.09588108956813812, "learning_rate": 2.8957203999696124e-05, "loss": 0.04796886444091797, "step": 1335 }, { "epoch": 0.1806229192368141, "grad_norm": 0.09961932897567749, "learning_rate": 2.8954695145081713e-05, "loss": 0.08334970474243164, "step": 1336 }, { "epoch": 0.18075811603265002, "grad_norm": 0.06924933195114136, "learning_rate": 2.8952183385025356e-05, "loss": 0.07975625991821289, "step": 1337 }, { "epoch": 0.18089331282848597, "grad_norm": 0.22378431260585785, "learning_rate": 2.8949668720050014e-05, "loss": 0.10448598861694336, "step": 1338 }, { "epoch": 0.1810285096243219, "grad_norm": 0.1825861632823944, "learning_rate": 2.8947151150679256e-05, "loss": 0.06799077987670898, "step": 1339 }, { "epoch": 0.18116370642015783, "grad_norm": 0.08274604380130768, "learning_rate": 2.8944630677437255e-05, "loss": 0.0810018926858902, "step": 1340 }, { "epoch": 0.1812989032159938, "grad_norm": 0.10907977819442749, "learning_rate": 2.8942107300848784e-05, "loss": 0.06739604473114014, "step": 1341 }, { "epoch": 0.18143410001182972, "grad_norm": 0.10027384012937546, "learning_rate": 2.8939581021439225e-05, "loss": 0.09553384780883789, "step": 1342 }, { "epoch": 0.18156929680766565, "grad_norm": 0.1585473120212555, "learning_rate": 2.8937051839734563e-05, "loss": 0.05302238464355469, "step": 1343 }, { "epoch": 0.1817044936035016, "grad_norm": 0.19858047366142273, "learning_rate": 2.8934519756261384e-05, "loss": 0.056659698486328125, "step": 1344 }, { "epoch": 0.18183969039933753, "grad_norm": 0.19681096076965332, "learning_rate": 2.8931984771546885e-05, "loss": 0.05824720859527588, "step": 1345 }, { "epoch": 0.18197488719517346, "grad_norm": 0.1715867668390274, "learning_rate": 2.8929446886118866e-05, "loss": 0.08165168762207031, "step": 1346 }, { "epoch": 0.18211008399100942, "grad_norm": 0.14102637767791748, "learning_rate": 2.892690610050572e-05, "loss": 0.06902551651000977, "step": 1347 }, { "epoch": 0.18224528078684535, "grad_norm": 0.15581168234348297, "learning_rate": 2.892436241523646e-05, "loss": 0.05586886405944824, "step": 1348 }, { "epoch": 0.18238047758268128, "grad_norm": 0.09487874060869217, "learning_rate": 2.8921815830840685e-05, "loss": 0.09083080291748047, "step": 1349 }, { "epoch": 0.18251567437851723, "grad_norm": 0.1292032152414322, "learning_rate": 2.891926634784862e-05, "loss": 0.07888293266296387, "step": 1350 }, { "epoch": 0.18265087117435316, "grad_norm": 0.3102639615535736, "learning_rate": 2.8916713966791076e-05, "loss": 0.07295083999633789, "step": 1351 }, { "epoch": 0.18278606797018912, "grad_norm": 0.3073422610759735, "learning_rate": 2.8914158688199464e-05, "loss": 0.09141802787780762, "step": 1352 }, { "epoch": 0.18292126476602505, "grad_norm": 0.0924450159072876, "learning_rate": 2.891160051260582e-05, "loss": 0.07750105857849121, "step": 1353 }, { "epoch": 0.18305646156186098, "grad_norm": 0.07807210087776184, "learning_rate": 2.8909039440542758e-05, "loss": 0.0388340950012207, "step": 1354 }, { "epoch": 0.18319165835769693, "grad_norm": 0.2586558163166046, "learning_rate": 2.890647547254352e-05, "loss": 0.0616002082824707, "step": 1355 }, { "epoch": 0.18332685515353286, "grad_norm": 0.0976000651717186, "learning_rate": 2.8903908609141923e-05, "loss": 0.04634904861450195, "step": 1356 }, { "epoch": 0.1834620519493688, "grad_norm": 0.3379673957824707, "learning_rate": 2.8901338850872413e-05, "loss": 0.08740568161010742, "step": 1357 }, { "epoch": 0.18359724874520475, "grad_norm": 0.11325690150260925, "learning_rate": 2.8898766198270022e-05, "loss": 0.059988975524902344, "step": 1358 }, { "epoch": 0.18373244554104068, "grad_norm": 0.09347891062498093, "learning_rate": 2.8896190651870392e-05, "loss": 0.0576169490814209, "step": 1359 }, { "epoch": 0.1838676423368766, "grad_norm": 0.22027359902858734, "learning_rate": 2.8893612212209763e-05, "loss": 0.07803654670715332, "step": 1360 }, { "epoch": 0.18400283913271256, "grad_norm": 0.27932098507881165, "learning_rate": 2.8891030879824985e-05, "loss": 0.08052492141723633, "step": 1361 }, { "epoch": 0.1841380359285485, "grad_norm": 0.1708294004201889, "learning_rate": 2.88884466552535e-05, "loss": 0.062349557876586914, "step": 1362 }, { "epoch": 0.18427323272438442, "grad_norm": 0.06322871893644333, "learning_rate": 2.888585953903336e-05, "loss": 0.0391697883605957, "step": 1363 }, { "epoch": 0.18440842952022038, "grad_norm": 0.09236955642700195, "learning_rate": 2.888326953170321e-05, "loss": 0.09978437423706055, "step": 1364 }, { "epoch": 0.1845436263160563, "grad_norm": 0.08766787499189377, "learning_rate": 2.8880676633802314e-05, "loss": 0.05164957046508789, "step": 1365 }, { "epoch": 0.18467882311189224, "grad_norm": 0.11937934905290604, "learning_rate": 2.8878080845870522e-05, "loss": 0.06482076644897461, "step": 1366 }, { "epoch": 0.1848140199077282, "grad_norm": 0.03756975382566452, "learning_rate": 2.887548216844829e-05, "loss": 0.04184436798095703, "step": 1367 }, { "epoch": 0.18494921670356412, "grad_norm": 0.3251025974750519, "learning_rate": 2.8872880602076675e-05, "loss": 0.10559749603271484, "step": 1368 }, { "epoch": 0.18508441349940005, "grad_norm": 0.1538354903459549, "learning_rate": 2.8870276147297344e-05, "loss": 0.09036731719970703, "step": 1369 }, { "epoch": 0.185219610295236, "grad_norm": 0.10339432209730148, "learning_rate": 2.8867668804652552e-05, "loss": 0.0590212345123291, "step": 1370 }, { "epoch": 0.18535480709107194, "grad_norm": 0.08854861557483673, "learning_rate": 2.886505857468516e-05, "loss": 0.05467057228088379, "step": 1371 }, { "epoch": 0.18549000388690787, "grad_norm": 0.10698610544204712, "learning_rate": 2.8862445457938642e-05, "loss": 0.07664823532104492, "step": 1372 }, { "epoch": 0.18562520068274382, "grad_norm": 0.24214357137680054, "learning_rate": 2.8859829454957053e-05, "loss": 0.0495753288269043, "step": 1373 }, { "epoch": 0.18576039747857975, "grad_norm": 0.14994406700134277, "learning_rate": 2.8857210566285062e-05, "loss": 0.04423844814300537, "step": 1374 }, { "epoch": 0.1858955942744157, "grad_norm": 0.05552583560347557, "learning_rate": 2.8854588792467932e-05, "loss": 0.051290273666381836, "step": 1375 }, { "epoch": 0.18603079107025164, "grad_norm": 0.17275118827819824, "learning_rate": 2.8851964134051535e-05, "loss": 0.06133460998535156, "step": 1376 }, { "epoch": 0.18616598786608757, "grad_norm": 0.10736674070358276, "learning_rate": 2.884933659158234e-05, "loss": 0.0592951774597168, "step": 1377 }, { "epoch": 0.18630118466192352, "grad_norm": 0.14024773240089417, "learning_rate": 2.8846706165607415e-05, "loss": 0.09714174270629883, "step": 1378 }, { "epoch": 0.18643638145775945, "grad_norm": 0.08408856391906738, "learning_rate": 2.8844072856674422e-05, "loss": 0.07675886154174805, "step": 1379 }, { "epoch": 0.18657157825359538, "grad_norm": 0.0785374715924263, "learning_rate": 2.8841436665331634e-05, "loss": 0.057961463928222656, "step": 1380 }, { "epoch": 0.18670677504943134, "grad_norm": 0.15104219317436218, "learning_rate": 2.8838797592127927e-05, "loss": 0.07774686813354492, "step": 1381 }, { "epoch": 0.18684197184526727, "grad_norm": 0.17305879294872284, "learning_rate": 2.883615563761276e-05, "loss": 0.07398366928100586, "step": 1382 }, { "epoch": 0.1869771686411032, "grad_norm": 0.12430399656295776, "learning_rate": 2.8833510802336206e-05, "loss": 0.06166648864746094, "step": 1383 }, { "epoch": 0.18711236543693915, "grad_norm": 0.0911678597331047, "learning_rate": 2.883086308684893e-05, "loss": 0.06403088569641113, "step": 1384 }, { "epoch": 0.18724756223277508, "grad_norm": 0.11092045903205872, "learning_rate": 2.882821249170221e-05, "loss": 0.058730363845825195, "step": 1385 }, { "epoch": 0.187382759028611, "grad_norm": 0.07783066481351852, "learning_rate": 2.8825559017447905e-05, "loss": 0.05014991760253906, "step": 1386 }, { "epoch": 0.18751795582444697, "grad_norm": 0.11592923104763031, "learning_rate": 2.8822902664638487e-05, "loss": 0.07435894012451172, "step": 1387 }, { "epoch": 0.1876531526202829, "grad_norm": 0.08970188349485397, "learning_rate": 2.882024343382702e-05, "loss": 0.07713985443115234, "step": 1388 }, { "epoch": 0.18778834941611883, "grad_norm": 0.22262392938137054, "learning_rate": 2.8817581325567174e-05, "loss": 0.124847412109375, "step": 1389 }, { "epoch": 0.18792354621195478, "grad_norm": 0.056012123823165894, "learning_rate": 2.8814916340413205e-05, "loss": 0.0842580795288086, "step": 1390 }, { "epoch": 0.1880587430077907, "grad_norm": 0.09239956736564636, "learning_rate": 2.881224847891999e-05, "loss": 0.06169247627258301, "step": 1391 }, { "epoch": 0.18819393980362664, "grad_norm": 0.09727323800325394, "learning_rate": 2.8809577741642987e-05, "loss": 0.08878540992736816, "step": 1392 }, { "epoch": 0.1883291365994626, "grad_norm": 0.138835608959198, "learning_rate": 2.8806904129138255e-05, "loss": 0.0812673568725586, "step": 1393 }, { "epoch": 0.18846433339529853, "grad_norm": 0.15790875256061554, "learning_rate": 2.8804227641962457e-05, "loss": 0.0831451416015625, "step": 1394 }, { "epoch": 0.18859953019113446, "grad_norm": 0.15054289996623993, "learning_rate": 2.8801548280672847e-05, "loss": 0.11885452270507812, "step": 1395 }, { "epoch": 0.1887347269869704, "grad_norm": 0.11854302138090134, "learning_rate": 2.8798866045827288e-05, "loss": 0.037550926208496094, "step": 1396 }, { "epoch": 0.18886992378280634, "grad_norm": 0.07879561185836792, "learning_rate": 2.8796180937984234e-05, "loss": 0.06037092208862305, "step": 1397 }, { "epoch": 0.1890051205786423, "grad_norm": 0.06782495975494385, "learning_rate": 2.8793492957702738e-05, "loss": 0.06937313079833984, "step": 1398 }, { "epoch": 0.18914031737447823, "grad_norm": 0.22681939601898193, "learning_rate": 2.8790802105542454e-05, "loss": 0.08480256795883179, "step": 1399 }, { "epoch": 0.18927551417031416, "grad_norm": 0.10194907337427139, "learning_rate": 2.8788108382063628e-05, "loss": 0.05944967269897461, "step": 1400 }, { "epoch": 0.18941071096615011, "grad_norm": 0.2543809115886688, "learning_rate": 2.878541178782711e-05, "loss": 0.12061214447021484, "step": 1401 }, { "epoch": 0.18954590776198604, "grad_norm": 0.15377728641033173, "learning_rate": 2.8782712323394344e-05, "loss": 0.11813163757324219, "step": 1402 }, { "epoch": 0.18968110455782197, "grad_norm": 0.15461359918117523, "learning_rate": 2.878000998932738e-05, "loss": 0.05657768249511719, "step": 1403 }, { "epoch": 0.18981630135365793, "grad_norm": 0.16069062054157257, "learning_rate": 2.8777304786188847e-05, "loss": 0.10333633422851562, "step": 1404 }, { "epoch": 0.18995149814949386, "grad_norm": 0.06819060444831848, "learning_rate": 2.8774596714541988e-05, "loss": 0.0556105375289917, "step": 1405 }, { "epoch": 0.1900866949453298, "grad_norm": 0.0878991186618805, "learning_rate": 2.8771885774950637e-05, "loss": 0.04519295692443848, "step": 1406 }, { "epoch": 0.19022189174116574, "grad_norm": 0.17676198482513428, "learning_rate": 2.876917196797923e-05, "loss": 0.07409811019897461, "step": 1407 }, { "epoch": 0.19035708853700167, "grad_norm": 0.1823957860469818, "learning_rate": 2.876645529419279e-05, "loss": 0.08322715759277344, "step": 1408 }, { "epoch": 0.1904922853328376, "grad_norm": 0.09615840762853622, "learning_rate": 2.876373575415695e-05, "loss": 0.0773468017578125, "step": 1409 }, { "epoch": 0.19062748212867356, "grad_norm": 0.13884080946445465, "learning_rate": 2.8761013348437926e-05, "loss": 0.05861163139343262, "step": 1410 }, { "epoch": 0.1907626789245095, "grad_norm": 0.12990429997444153, "learning_rate": 2.875828807760254e-05, "loss": 0.07942724227905273, "step": 1411 }, { "epoch": 0.19089787572034542, "grad_norm": 0.21610304713249207, "learning_rate": 2.875555994221821e-05, "loss": 0.06644105911254883, "step": 1412 }, { "epoch": 0.19103307251618137, "grad_norm": 0.11897508800029755, "learning_rate": 2.8752828942852943e-05, "loss": 0.06753742694854736, "step": 1413 }, { "epoch": 0.1911682693120173, "grad_norm": 0.17719510197639465, "learning_rate": 2.875009508007535e-05, "loss": 0.08179759979248047, "step": 1414 }, { "epoch": 0.19130346610785323, "grad_norm": 0.062041688710451126, "learning_rate": 2.8747358354454642e-05, "loss": 0.06464147567749023, "step": 1415 }, { "epoch": 0.1914386629036892, "grad_norm": 0.09665444493293762, "learning_rate": 2.8744618766560614e-05, "loss": 0.06567001342773438, "step": 1416 }, { "epoch": 0.19157385969952512, "grad_norm": 0.13068965077400208, "learning_rate": 2.8741876316963664e-05, "loss": 0.09388256072998047, "step": 1417 }, { "epoch": 0.19170905649536105, "grad_norm": 0.10729022324085236, "learning_rate": 2.873913100623478e-05, "loss": 0.06386137008666992, "step": 1418 }, { "epoch": 0.191844253291197, "grad_norm": 0.3651251792907715, "learning_rate": 2.873638283494556e-05, "loss": 0.0839681625366211, "step": 1419 }, { "epoch": 0.19197945008703293, "grad_norm": 0.18678618967533112, "learning_rate": 2.8733631803668178e-05, "loss": 0.05196845531463623, "step": 1420 }, { "epoch": 0.1921146468828689, "grad_norm": 0.16412784159183502, "learning_rate": 2.8730877912975418e-05, "loss": 0.050917625427246094, "step": 1421 }, { "epoch": 0.19224984367870482, "grad_norm": 0.09918685257434845, "learning_rate": 2.8728121163440656e-05, "loss": 0.06438601016998291, "step": 1422 }, { "epoch": 0.19238504047454075, "grad_norm": 0.17637163400650024, "learning_rate": 2.8725361555637863e-05, "loss": 0.07499885559082031, "step": 1423 }, { "epoch": 0.1925202372703767, "grad_norm": 0.30195796489715576, "learning_rate": 2.8722599090141598e-05, "loss": 0.08899223804473877, "step": 1424 }, { "epoch": 0.19265543406621263, "grad_norm": 0.23839521408081055, "learning_rate": 2.8719833767527026e-05, "loss": 0.09548377990722656, "step": 1425 }, { "epoch": 0.19279063086204856, "grad_norm": 0.07339231669902802, "learning_rate": 2.8717065588369896e-05, "loss": 0.05073881149291992, "step": 1426 }, { "epoch": 0.19292582765788452, "grad_norm": 0.11924371868371964, "learning_rate": 2.871429455324657e-05, "loss": 0.06584513187408447, "step": 1427 }, { "epoch": 0.19306102445372045, "grad_norm": 0.14208443462848663, "learning_rate": 2.871152066273398e-05, "loss": 0.08660650253295898, "step": 1428 }, { "epoch": 0.19319622124955638, "grad_norm": 0.3181878924369812, "learning_rate": 2.870874391740967e-05, "loss": 0.08827018737792969, "step": 1429 }, { "epoch": 0.19333141804539233, "grad_norm": 0.07801058143377304, "learning_rate": 2.8705964317851774e-05, "loss": 0.07984018325805664, "step": 1430 }, { "epoch": 0.19346661484122826, "grad_norm": 0.09154821932315826, "learning_rate": 2.8703181864639013e-05, "loss": 0.0673375129699707, "step": 1431 }, { "epoch": 0.1936018116370642, "grad_norm": 0.11852426826953888, "learning_rate": 2.870039655835072e-05, "loss": 0.06897473335266113, "step": 1432 }, { "epoch": 0.19373700843290015, "grad_norm": 0.07511452585458755, "learning_rate": 2.8697608399566796e-05, "loss": 0.06895732879638672, "step": 1433 }, { "epoch": 0.19387220522873608, "grad_norm": 0.11842640489339828, "learning_rate": 2.869481738886777e-05, "loss": 0.058078765869140625, "step": 1434 }, { "epoch": 0.194007402024572, "grad_norm": 0.09408573806285858, "learning_rate": 2.8692023526834725e-05, "loss": 0.07669878005981445, "step": 1435 }, { "epoch": 0.19414259882040796, "grad_norm": 0.198813796043396, "learning_rate": 2.8689226814049367e-05, "loss": 0.07886314392089844, "step": 1436 }, { "epoch": 0.1942777956162439, "grad_norm": 0.0880727544426918, "learning_rate": 2.868642725109399e-05, "loss": 0.06202530860900879, "step": 1437 }, { "epoch": 0.19441299241207982, "grad_norm": 0.06944162398576736, "learning_rate": 2.868362483855147e-05, "loss": 0.05888009071350098, "step": 1438 }, { "epoch": 0.19454818920791578, "grad_norm": 0.1189267709851265, "learning_rate": 2.8680819577005295e-05, "loss": 0.0660104751586914, "step": 1439 }, { "epoch": 0.1946833860037517, "grad_norm": 0.07902384549379349, "learning_rate": 2.8678011467039526e-05, "loss": 0.05221271514892578, "step": 1440 }, { "epoch": 0.19481858279958764, "grad_norm": 0.09434419870376587, "learning_rate": 2.867520050923883e-05, "loss": 0.05070090293884277, "step": 1441 }, { "epoch": 0.1949537795954236, "grad_norm": 0.061128754168748856, "learning_rate": 2.8672386704188466e-05, "loss": 0.0540614128112793, "step": 1442 }, { "epoch": 0.19508897639125952, "grad_norm": 0.07600434869527817, "learning_rate": 2.8669570052474273e-05, "loss": 0.06619977951049805, "step": 1443 }, { "epoch": 0.19522417318709548, "grad_norm": 0.11401189118623734, "learning_rate": 2.86667505546827e-05, "loss": 0.06352663040161133, "step": 1444 }, { "epoch": 0.1953593699829314, "grad_norm": 0.11938532441854477, "learning_rate": 2.866392821140079e-05, "loss": 0.04743194580078125, "step": 1445 }, { "epoch": 0.19549456677876734, "grad_norm": 0.18245841562747955, "learning_rate": 2.8661103023216154e-05, "loss": 0.07476139068603516, "step": 1446 }, { "epoch": 0.1956297635746033, "grad_norm": 0.16927899420261383, "learning_rate": 2.8658274990717018e-05, "loss": 0.08357441425323486, "step": 1447 }, { "epoch": 0.19576496037043922, "grad_norm": 0.08636972308158875, "learning_rate": 2.86554441144922e-05, "loss": 0.06247091293334961, "step": 1448 }, { "epoch": 0.19590015716627515, "grad_norm": 0.08359581232070923, "learning_rate": 2.8652610395131097e-05, "loss": 0.05119967460632324, "step": 1449 }, { "epoch": 0.1960353539621111, "grad_norm": 0.21092578768730164, "learning_rate": 2.8649773833223702e-05, "loss": 0.06998217105865479, "step": 1450 }, { "epoch": 0.19617055075794704, "grad_norm": 0.11341535300016403, "learning_rate": 2.8646934429360606e-05, "loss": 0.08960509300231934, "step": 1451 }, { "epoch": 0.19630574755378297, "grad_norm": 0.09743883460760117, "learning_rate": 2.8644092184132986e-05, "loss": 0.05595874786376953, "step": 1452 }, { "epoch": 0.19644094434961892, "grad_norm": 0.17838837206363678, "learning_rate": 2.864124709813262e-05, "loss": 0.08580327033996582, "step": 1453 }, { "epoch": 0.19657614114545485, "grad_norm": 0.13772979378700256, "learning_rate": 2.8638399171951856e-05, "loss": 0.09108620882034302, "step": 1454 }, { "epoch": 0.19671133794129078, "grad_norm": 0.2179628610610962, "learning_rate": 2.8635548406183664e-05, "loss": 0.12022209167480469, "step": 1455 }, { "epoch": 0.19684653473712674, "grad_norm": 0.3471464216709137, "learning_rate": 2.8632694801421576e-05, "loss": 0.0930701494216919, "step": 1456 }, { "epoch": 0.19698173153296267, "grad_norm": 0.056388743221759796, "learning_rate": 2.862983835825973e-05, "loss": 0.05864667892456055, "step": 1457 }, { "epoch": 0.1971169283287986, "grad_norm": 0.16650868952274323, "learning_rate": 2.8626979077292856e-05, "loss": 0.11506080627441406, "step": 1458 }, { "epoch": 0.19725212512463455, "grad_norm": 0.05247064307332039, "learning_rate": 2.862411695911627e-05, "loss": 0.0590968132019043, "step": 1459 }, { "epoch": 0.19738732192047048, "grad_norm": 0.16531048715114594, "learning_rate": 2.862125200432588e-05, "loss": 0.06190013885498047, "step": 1460 }, { "epoch": 0.1975225187163064, "grad_norm": 0.06729508191347122, "learning_rate": 2.8618384213518188e-05, "loss": 0.03719806671142578, "step": 1461 }, { "epoch": 0.19765771551214237, "grad_norm": 0.15682785212993622, "learning_rate": 2.861551358729028e-05, "loss": 0.06908249855041504, "step": 1462 }, { "epoch": 0.1977929123079783, "grad_norm": 0.08799882233142853, "learning_rate": 2.8612640126239836e-05, "loss": 0.08551311492919922, "step": 1463 }, { "epoch": 0.19792810910381423, "grad_norm": 0.06565811485052109, "learning_rate": 2.8609763830965126e-05, "loss": 0.06384038925170898, "step": 1464 }, { "epoch": 0.19806330589965018, "grad_norm": 0.05718689411878586, "learning_rate": 2.860688470206501e-05, "loss": 0.050319671630859375, "step": 1465 }, { "epoch": 0.1981985026954861, "grad_norm": 0.1384674608707428, "learning_rate": 2.8604002740138936e-05, "loss": 0.09149646759033203, "step": 1466 }, { "epoch": 0.19833369949132207, "grad_norm": 0.09817821532487869, "learning_rate": 2.860111794578695e-05, "loss": 0.07784509658813477, "step": 1467 }, { "epoch": 0.198468896287158, "grad_norm": 0.09491139650344849, "learning_rate": 2.8598230319609677e-05, "loss": 0.06491518020629883, "step": 1468 }, { "epoch": 0.19860409308299393, "grad_norm": 0.09622706472873688, "learning_rate": 2.8595339862208336e-05, "loss": 0.07344484329223633, "step": 1469 }, { "epoch": 0.19873928987882988, "grad_norm": 0.22665034234523773, "learning_rate": 2.8592446574184733e-05, "loss": 0.08306217193603516, "step": 1470 }, { "epoch": 0.1988744866746658, "grad_norm": 0.08944015204906464, "learning_rate": 2.8589550456141274e-05, "loss": 0.07159876823425293, "step": 1471 }, { "epoch": 0.19900968347050174, "grad_norm": 0.07798139750957489, "learning_rate": 2.8586651508680942e-05, "loss": 0.05242156982421875, "step": 1472 }, { "epoch": 0.1991448802663377, "grad_norm": 0.13667482137680054, "learning_rate": 2.8583749732407312e-05, "loss": 0.07505989074707031, "step": 1473 }, { "epoch": 0.19928007706217363, "grad_norm": 0.22467473149299622, "learning_rate": 2.8580845127924546e-05, "loss": 0.09481024742126465, "step": 1474 }, { "epoch": 0.19941527385800956, "grad_norm": 0.09221498668193817, "learning_rate": 2.8577937695837408e-05, "loss": 0.04923224449157715, "step": 1475 }, { "epoch": 0.1995504706538455, "grad_norm": 0.1812954843044281, "learning_rate": 2.8575027436751235e-05, "loss": 0.08632588386535645, "step": 1476 }, { "epoch": 0.19968566744968144, "grad_norm": 0.08311624079942703, "learning_rate": 2.8572114351271955e-05, "loss": 0.06156265735626221, "step": 1477 }, { "epoch": 0.19982086424551737, "grad_norm": 0.14101848006248474, "learning_rate": 2.85691984400061e-05, "loss": 0.053627967834472656, "step": 1478 }, { "epoch": 0.19995606104135333, "grad_norm": 0.14146101474761963, "learning_rate": 2.8566279703560762e-05, "loss": 0.08534765243530273, "step": 1479 }, { "epoch": 0.20009125783718926, "grad_norm": 0.33410540223121643, "learning_rate": 2.8563358142543648e-05, "loss": 0.09288358688354492, "step": 1480 }, { "epoch": 0.20022645463302519, "grad_norm": 0.0630439817905426, "learning_rate": 2.856043375756304e-05, "loss": 0.04931831359863281, "step": 1481 }, { "epoch": 0.20036165142886114, "grad_norm": 0.10115737468004227, "learning_rate": 2.855750654922781e-05, "loss": 0.07011294364929199, "step": 1482 }, { "epoch": 0.20049684822469707, "grad_norm": 0.0963035374879837, "learning_rate": 2.855457651814742e-05, "loss": 0.06272411346435547, "step": 1483 }, { "epoch": 0.200632045020533, "grad_norm": 0.0937761664390564, "learning_rate": 2.8551643664931916e-05, "loss": 0.061197757720947266, "step": 1484 }, { "epoch": 0.20076724181636896, "grad_norm": 0.1477745622396469, "learning_rate": 2.8548707990191933e-05, "loss": 0.09209823608398438, "step": 1485 }, { "epoch": 0.2009024386122049, "grad_norm": 0.08705328404903412, "learning_rate": 2.8545769494538698e-05, "loss": 0.06943559646606445, "step": 1486 }, { "epoch": 0.20103763540804082, "grad_norm": 0.07988999783992767, "learning_rate": 2.854282817858402e-05, "loss": 0.053441524505615234, "step": 1487 }, { "epoch": 0.20117283220387677, "grad_norm": 0.06395494192838669, "learning_rate": 2.85398840429403e-05, "loss": 0.0539705753326416, "step": 1488 }, { "epoch": 0.2013080289997127, "grad_norm": 0.06291565299034119, "learning_rate": 2.853693708822051e-05, "loss": 0.05599689483642578, "step": 1489 }, { "epoch": 0.20144322579554866, "grad_norm": 0.15285834670066833, "learning_rate": 2.8533987315038234e-05, "loss": 0.0469818115234375, "step": 1490 }, { "epoch": 0.2015784225913846, "grad_norm": 0.21539804339408875, "learning_rate": 2.8531034724007627e-05, "loss": 0.07901370525360107, "step": 1491 }, { "epoch": 0.20171361938722052, "grad_norm": 0.11085191369056702, "learning_rate": 2.8528079315743435e-05, "loss": 0.08513593673706055, "step": 1492 }, { "epoch": 0.20184881618305647, "grad_norm": 0.1706719994544983, "learning_rate": 2.852512109086099e-05, "loss": 0.05638456344604492, "step": 1493 }, { "epoch": 0.2019840129788924, "grad_norm": 0.07389377802610397, "learning_rate": 2.8522160049976208e-05, "loss": 0.047602057456970215, "step": 1494 }, { "epoch": 0.20211920977472833, "grad_norm": 0.05763942003250122, "learning_rate": 2.8519196193705595e-05, "loss": 0.039682626724243164, "step": 1495 }, { "epoch": 0.2022544065705643, "grad_norm": 0.08368533104658127, "learning_rate": 2.8516229522666243e-05, "loss": 0.0820918083190918, "step": 1496 }, { "epoch": 0.20238960336640022, "grad_norm": 0.09992201626300812, "learning_rate": 2.8513260037475825e-05, "loss": 0.07529139518737793, "step": 1497 }, { "epoch": 0.20252480016223615, "grad_norm": 0.0575963519513607, "learning_rate": 2.8510287738752604e-05, "loss": 0.05639004707336426, "step": 1498 }, { "epoch": 0.2026599969580721, "grad_norm": 0.16548512876033783, "learning_rate": 2.8507312627115435e-05, "loss": 0.11516666412353516, "step": 1499 }, { "epoch": 0.20279519375390803, "grad_norm": 0.12798194587230682, "learning_rate": 2.850433470318374e-05, "loss": 0.05388355255126953, "step": 1500 }, { "epoch": 0.20293039054974396, "grad_norm": 0.09383414685726166, "learning_rate": 2.8501353967577556e-05, "loss": 0.06741315126419067, "step": 1501 }, { "epoch": 0.20306558734557992, "grad_norm": 0.09627026319503784, "learning_rate": 2.8498370420917468e-05, "loss": 0.07947826385498047, "step": 1502 }, { "epoch": 0.20320078414141585, "grad_norm": 0.09149527549743652, "learning_rate": 2.8495384063824683e-05, "loss": 0.047959089279174805, "step": 1503 }, { "epoch": 0.20333598093725178, "grad_norm": 0.17404185235500336, "learning_rate": 2.8492394896920964e-05, "loss": 0.06344318389892578, "step": 1504 }, { "epoch": 0.20347117773308773, "grad_norm": 0.12634052336215973, "learning_rate": 2.848940292082868e-05, "loss": 0.0624239444732666, "step": 1505 }, { "epoch": 0.20360637452892366, "grad_norm": 0.2699243426322937, "learning_rate": 2.8486408136170772e-05, "loss": 0.09841346740722656, "step": 1506 }, { "epoch": 0.2037415713247596, "grad_norm": 0.1997794210910797, "learning_rate": 2.8483410543570776e-05, "loss": 0.09163236618041992, "step": 1507 }, { "epoch": 0.20387676812059555, "grad_norm": 0.06674224138259888, "learning_rate": 2.8480410143652803e-05, "loss": 0.047437191009521484, "step": 1508 }, { "epoch": 0.20401196491643148, "grad_norm": 0.09612417221069336, "learning_rate": 2.8477406937041547e-05, "loss": 0.08894634246826172, "step": 1509 }, { "epoch": 0.20414716171226743, "grad_norm": 0.06677885353565216, "learning_rate": 2.8474400924362298e-05, "loss": 0.04779243469238281, "step": 1510 }, { "epoch": 0.20428235850810336, "grad_norm": 0.17527706921100616, "learning_rate": 2.847139210624092e-05, "loss": 0.0888051986694336, "step": 1511 }, { "epoch": 0.2044175553039393, "grad_norm": 0.1711282730102539, "learning_rate": 2.8468380483303873e-05, "loss": 0.08749103546142578, "step": 1512 }, { "epoch": 0.20455275209977525, "grad_norm": 0.1732027381658554, "learning_rate": 2.8465366056178183e-05, "loss": 0.0960836410522461, "step": 1513 }, { "epoch": 0.20468794889561118, "grad_norm": 0.21484826505184174, "learning_rate": 2.8462348825491475e-05, "loss": 0.0897359848022461, "step": 1514 }, { "epoch": 0.2048231456914471, "grad_norm": 0.08397260308265686, "learning_rate": 2.8459328791871953e-05, "loss": 0.07047891616821289, "step": 1515 }, { "epoch": 0.20495834248728306, "grad_norm": 0.09662534296512604, "learning_rate": 2.8456305955948402e-05, "loss": 0.06568765640258789, "step": 1516 }, { "epoch": 0.205093539283119, "grad_norm": 0.19887784123420715, "learning_rate": 2.845328031835019e-05, "loss": 0.11615180969238281, "step": 1517 }, { "epoch": 0.20522873607895492, "grad_norm": 0.1498584747314453, "learning_rate": 2.8450251879707277e-05, "loss": 0.05876421928405762, "step": 1518 }, { "epoch": 0.20536393287479088, "grad_norm": 0.1901555061340332, "learning_rate": 2.8447220640650194e-05, "loss": 0.07042741775512695, "step": 1519 }, { "epoch": 0.2054991296706268, "grad_norm": 0.06683945655822754, "learning_rate": 2.8444186601810068e-05, "loss": 0.0695185661315918, "step": 1520 }, { "epoch": 0.20563432646646274, "grad_norm": 0.1322280764579773, "learning_rate": 2.84411497638186e-05, "loss": 0.06537187099456787, "step": 1521 }, { "epoch": 0.2057695232622987, "grad_norm": 0.12220027297735214, "learning_rate": 2.843811012730807e-05, "loss": 0.07801580429077148, "step": 1522 }, { "epoch": 0.20590472005813462, "grad_norm": 0.20445573329925537, "learning_rate": 2.8435067692911353e-05, "loss": 0.10744094848632812, "step": 1523 }, { "epoch": 0.20603991685397055, "grad_norm": 0.22539056837558746, "learning_rate": 2.8432022461261897e-05, "loss": 0.0722203254699707, "step": 1524 }, { "epoch": 0.2061751136498065, "grad_norm": 0.23824705183506012, "learning_rate": 2.8428974432993736e-05, "loss": 0.10843801498413086, "step": 1525 }, { "epoch": 0.20631031044564244, "grad_norm": 0.1543579399585724, "learning_rate": 2.8425923608741486e-05, "loss": 0.05908036231994629, "step": 1526 }, { "epoch": 0.20644550724147837, "grad_norm": 0.07272074371576309, "learning_rate": 2.8422869989140343e-05, "loss": 0.06726193428039551, "step": 1527 }, { "epoch": 0.20658070403731432, "grad_norm": 0.15433360636234283, "learning_rate": 2.8419813574826093e-05, "loss": 0.08235502243041992, "step": 1528 }, { "epoch": 0.20671590083315025, "grad_norm": 0.10479071736335754, "learning_rate": 2.8416754366435092e-05, "loss": 0.07231855392456055, "step": 1529 }, { "epoch": 0.20685109762898618, "grad_norm": 0.17646753787994385, "learning_rate": 2.8413692364604285e-05, "loss": 0.09379863739013672, "step": 1530 }, { "epoch": 0.20698629442482214, "grad_norm": 0.06437936425209045, "learning_rate": 2.8410627569971197e-05, "loss": 0.05408430099487305, "step": 1531 }, { "epoch": 0.20712149122065807, "grad_norm": 0.11157441139221191, "learning_rate": 2.8407559983173934e-05, "loss": 0.08189725875854492, "step": 1532 }, { "epoch": 0.20725668801649402, "grad_norm": 0.07680966705083847, "learning_rate": 2.8404489604851186e-05, "loss": 0.061344146728515625, "step": 1533 }, { "epoch": 0.20739188481232995, "grad_norm": 0.132106751203537, "learning_rate": 2.840141643564222e-05, "loss": 0.07763051986694336, "step": 1534 }, { "epoch": 0.20752708160816588, "grad_norm": 0.06416943669319153, "learning_rate": 2.8398340476186885e-05, "loss": 0.07701873779296875, "step": 1535 }, { "epoch": 0.20766227840400184, "grad_norm": 0.09628237038850784, "learning_rate": 2.8395261727125617e-05, "loss": 0.06579113006591797, "step": 1536 }, { "epoch": 0.20779747519983777, "grad_norm": 0.10490274429321289, "learning_rate": 2.8392180189099425e-05, "loss": 0.08664131164550781, "step": 1537 }, { "epoch": 0.2079326719956737, "grad_norm": 0.11617899686098099, "learning_rate": 2.83890958627499e-05, "loss": 0.06887149810791016, "step": 1538 }, { "epoch": 0.20806786879150965, "grad_norm": 0.09263032674789429, "learning_rate": 2.8386008748719216e-05, "loss": 0.0717477798461914, "step": 1539 }, { "epoch": 0.20820306558734558, "grad_norm": 0.16455240547657013, "learning_rate": 2.838291884765013e-05, "loss": 0.05478811264038086, "step": 1540 }, { "epoch": 0.2083382623831815, "grad_norm": 0.08099474757909775, "learning_rate": 2.8379826160185975e-05, "loss": 0.054837703704833984, "step": 1541 }, { "epoch": 0.20847345917901747, "grad_norm": 0.2022770792245865, "learning_rate": 2.8376730686970664e-05, "loss": 0.07261061668395996, "step": 1542 }, { "epoch": 0.2086086559748534, "grad_norm": 0.12341496348381042, "learning_rate": 2.8373632428648683e-05, "loss": 0.06386590003967285, "step": 1543 }, { "epoch": 0.20874385277068933, "grad_norm": 0.27008938789367676, "learning_rate": 2.8370531385865124e-05, "loss": 0.08893513679504395, "step": 1544 }, { "epoch": 0.20887904956652528, "grad_norm": 0.04684915021061897, "learning_rate": 2.8367427559265622e-05, "loss": 0.04062134027481079, "step": 1545 }, { "epoch": 0.2090142463623612, "grad_norm": 0.08418384194374084, "learning_rate": 2.836432094949642e-05, "loss": 0.06748557090759277, "step": 1546 }, { "epoch": 0.20914944315819714, "grad_norm": 0.13064037263393402, "learning_rate": 2.836121155720433e-05, "loss": 0.07818770408630371, "step": 1547 }, { "epoch": 0.2092846399540331, "grad_norm": 0.13980351388454437, "learning_rate": 2.8358099383036745e-05, "loss": 0.08222794532775879, "step": 1548 }, { "epoch": 0.20941983674986903, "grad_norm": 0.07518435269594193, "learning_rate": 2.8354984427641634e-05, "loss": 0.06541681289672852, "step": 1549 }, { "epoch": 0.20955503354570496, "grad_norm": 0.26029059290885925, "learning_rate": 2.8351866691667544e-05, "loss": 0.06605732440948486, "step": 1550 }, { "epoch": 0.2096902303415409, "grad_norm": 0.08158840984106064, "learning_rate": 2.8348746175763613e-05, "loss": 0.051041364669799805, "step": 1551 }, { "epoch": 0.20982542713737684, "grad_norm": 0.09598598629236221, "learning_rate": 2.8345622880579537e-05, "loss": 0.10406064987182617, "step": 1552 }, { "epoch": 0.20996062393321277, "grad_norm": 0.13267220556735992, "learning_rate": 2.8342496806765615e-05, "loss": 0.070159912109375, "step": 1553 }, { "epoch": 0.21009582072904873, "grad_norm": 0.10742221772670746, "learning_rate": 2.833936795497271e-05, "loss": 0.08212882280349731, "step": 1554 }, { "epoch": 0.21023101752488466, "grad_norm": 0.13372552394866943, "learning_rate": 2.8336236325852257e-05, "loss": 0.07744884490966797, "step": 1555 }, { "epoch": 0.2103662143207206, "grad_norm": 0.19794897735118866, "learning_rate": 2.8333101920056285e-05, "loss": 0.10175323486328125, "step": 1556 }, { "epoch": 0.21050141111655654, "grad_norm": 0.12407051026821136, "learning_rate": 2.8329964738237392e-05, "loss": 0.0694122314453125, "step": 1557 }, { "epoch": 0.21063660791239247, "grad_norm": 0.17577140033245087, "learning_rate": 2.8326824781048756e-05, "loss": 0.06978273391723633, "step": 1558 }, { "epoch": 0.21077180470822843, "grad_norm": 0.22986391186714172, "learning_rate": 2.8323682049144135e-05, "loss": 0.07682514190673828, "step": 1559 }, { "epoch": 0.21090700150406436, "grad_norm": 0.07227930426597595, "learning_rate": 2.832053654317786e-05, "loss": 0.07820320129394531, "step": 1560 }, { "epoch": 0.21104219829990029, "grad_norm": 0.17465704679489136, "learning_rate": 2.8317388263804842e-05, "loss": 0.07083892822265625, "step": 1561 }, { "epoch": 0.21117739509573624, "grad_norm": 0.08385077863931656, "learning_rate": 2.8314237211680573e-05, "loss": 0.06954383850097656, "step": 1562 }, { "epoch": 0.21131259189157217, "grad_norm": 0.11462143063545227, "learning_rate": 2.8311083387461118e-05, "loss": 0.08948302268981934, "step": 1563 }, { "epoch": 0.2114477886874081, "grad_norm": 0.22417183220386505, "learning_rate": 2.8307926791803114e-05, "loss": 0.060292720794677734, "step": 1564 }, { "epoch": 0.21158298548324406, "grad_norm": 0.09488919377326965, "learning_rate": 2.8304767425363785e-05, "loss": 0.06700277328491211, "step": 1565 }, { "epoch": 0.21171818227907999, "grad_norm": 0.09614776819944382, "learning_rate": 2.830160528880093e-05, "loss": 0.0829324722290039, "step": 1566 }, { "epoch": 0.21185337907491592, "grad_norm": 0.07649706304073334, "learning_rate": 2.829844038277292e-05, "loss": 0.07271313667297363, "step": 1567 }, { "epoch": 0.21198857587075187, "grad_norm": 0.10743039846420288, "learning_rate": 2.8295272707938706e-05, "loss": 0.07613086700439453, "step": 1568 }, { "epoch": 0.2121237726665878, "grad_norm": 0.03896505758166313, "learning_rate": 2.8292102264957817e-05, "loss": 0.036650657653808594, "step": 1569 }, { "epoch": 0.21225896946242373, "grad_norm": 0.0936877578496933, "learning_rate": 2.8288929054490357e-05, "loss": 0.05343818664550781, "step": 1570 }, { "epoch": 0.2123941662582597, "grad_norm": 0.07594258338212967, "learning_rate": 2.8285753077196998e-05, "loss": 0.08158373832702637, "step": 1571 }, { "epoch": 0.21252936305409562, "grad_norm": 0.21740159392356873, "learning_rate": 2.8282574333739006e-05, "loss": 0.0598607063293457, "step": 1572 }, { "epoch": 0.21266455984993154, "grad_norm": 0.1019560918211937, "learning_rate": 2.8279392824778197e-05, "loss": 0.08047676086425781, "step": 1573 }, { "epoch": 0.2127997566457675, "grad_norm": 0.08105242997407913, "learning_rate": 2.8276208550976993e-05, "loss": 0.06468057632446289, "step": 1574 }, { "epoch": 0.21293495344160343, "grad_norm": 0.1682303249835968, "learning_rate": 2.8273021512998372e-05, "loss": 0.05381631851196289, "step": 1575 }, { "epoch": 0.21307015023743936, "grad_norm": 0.09697219729423523, "learning_rate": 2.826983171150589e-05, "loss": 0.07071447372436523, "step": 1576 }, { "epoch": 0.21320534703327532, "grad_norm": 0.060206107795238495, "learning_rate": 2.826663914716368e-05, "loss": 0.06472301483154297, "step": 1577 }, { "epoch": 0.21334054382911125, "grad_norm": 0.1292990893125534, "learning_rate": 2.826344382063646e-05, "loss": 0.0739588737487793, "step": 1578 }, { "epoch": 0.2134757406249472, "grad_norm": 0.1430521160364151, "learning_rate": 2.8260245732589503e-05, "loss": 0.08129310607910156, "step": 1579 }, { "epoch": 0.21361093742078313, "grad_norm": 0.06377974152565002, "learning_rate": 2.8257044883688672e-05, "loss": 0.07360696792602539, "step": 1580 }, { "epoch": 0.21374613421661906, "grad_norm": 0.13742199540138245, "learning_rate": 2.82538412746004e-05, "loss": 0.09415674209594727, "step": 1581 }, { "epoch": 0.21388133101245502, "grad_norm": 0.09372340887784958, "learning_rate": 2.8250634905991695e-05, "loss": 0.08354854583740234, "step": 1582 }, { "epoch": 0.21401652780829095, "grad_norm": 0.1757609099149704, "learning_rate": 2.824742577853015e-05, "loss": 0.07148551940917969, "step": 1583 }, { "epoch": 0.21415172460412688, "grad_norm": 0.13692936301231384, "learning_rate": 2.8244213892883907e-05, "loss": 0.06240952014923096, "step": 1584 }, { "epoch": 0.21428692139996283, "grad_norm": 0.15134233236312866, "learning_rate": 2.82409992497217e-05, "loss": 0.09677934646606445, "step": 1585 }, { "epoch": 0.21442211819579876, "grad_norm": 0.13773484528064728, "learning_rate": 2.8237781849712852e-05, "loss": 0.10232770442962646, "step": 1586 }, { "epoch": 0.2145573149916347, "grad_norm": 0.28322187066078186, "learning_rate": 2.8234561693527222e-05, "loss": 0.08209753036499023, "step": 1587 }, { "epoch": 0.21469251178747065, "grad_norm": 0.21161141991615295, "learning_rate": 2.8231338781835275e-05, "loss": 0.06932401657104492, "step": 1588 }, { "epoch": 0.21482770858330658, "grad_norm": 0.05349447578191757, "learning_rate": 2.8228113115308032e-05, "loss": 0.0672307014465332, "step": 1589 }, { "epoch": 0.2149629053791425, "grad_norm": 0.12604087591171265, "learning_rate": 2.82248846946171e-05, "loss": 0.07590246200561523, "step": 1590 }, { "epoch": 0.21509810217497846, "grad_norm": 0.09108059108257294, "learning_rate": 2.822165352043465e-05, "loss": 0.03805732727050781, "step": 1591 }, { "epoch": 0.2152332989708144, "grad_norm": 0.07555746287107468, "learning_rate": 2.8218419593433437e-05, "loss": 0.06487464904785156, "step": 1592 }, { "epoch": 0.21536849576665032, "grad_norm": 0.14201481640338898, "learning_rate": 2.8215182914286768e-05, "loss": 0.0627593994140625, "step": 1593 }, { "epoch": 0.21550369256248628, "grad_norm": 0.16735677421092987, "learning_rate": 2.8211943483668546e-05, "loss": 0.08746910095214844, "step": 1594 }, { "epoch": 0.2156388893583222, "grad_norm": 0.12074435502290726, "learning_rate": 2.8208701302253237e-05, "loss": 0.06325006484985352, "step": 1595 }, { "epoch": 0.21577408615415813, "grad_norm": 0.09936917573213577, "learning_rate": 2.820545637071588e-05, "loss": 0.09252023696899414, "step": 1596 }, { "epoch": 0.2159092829499941, "grad_norm": 0.13411439955234528, "learning_rate": 2.8202208689732083e-05, "loss": 0.07288265228271484, "step": 1597 }, { "epoch": 0.21604447974583002, "grad_norm": 0.16384102404117584, "learning_rate": 2.819895825997804e-05, "loss": 0.09059333801269531, "step": 1598 }, { "epoch": 0.21617967654166595, "grad_norm": 0.15895752608776093, "learning_rate": 2.81957050821305e-05, "loss": 0.07207655906677246, "step": 1599 }, { "epoch": 0.2163148733375019, "grad_norm": 0.11737911403179169, "learning_rate": 2.8192449156866787e-05, "loss": 0.07565879821777344, "step": 1600 }, { "epoch": 0.21645007013333784, "grad_norm": 0.16184231638908386, "learning_rate": 2.8189190484864814e-05, "loss": 0.08757781982421875, "step": 1601 }, { "epoch": 0.2165852669291738, "grad_norm": 0.09004601836204529, "learning_rate": 2.8185929066803052e-05, "loss": 0.05258011817932129, "step": 1602 }, { "epoch": 0.21672046372500972, "grad_norm": 0.1932934671640396, "learning_rate": 2.818266490336054e-05, "loss": 0.06732559204101562, "step": 1603 }, { "epoch": 0.21685566052084565, "grad_norm": 0.10448861867189407, "learning_rate": 2.817939799521689e-05, "loss": 0.056088924407958984, "step": 1604 }, { "epoch": 0.2169908573166816, "grad_norm": 0.049659907817840576, "learning_rate": 2.8176128343052304e-05, "loss": 0.05062723159790039, "step": 1605 }, { "epoch": 0.21712605411251754, "grad_norm": 0.08144520968198776, "learning_rate": 2.817285594754753e-05, "loss": 0.07903766632080078, "step": 1606 }, { "epoch": 0.21726125090835346, "grad_norm": 0.056168362498283386, "learning_rate": 2.8169580809383902e-05, "loss": 0.05120849609375, "step": 1607 }, { "epoch": 0.21739644770418942, "grad_norm": 0.09560943394899368, "learning_rate": 2.8166302929243326e-05, "loss": 0.07111024856567383, "step": 1608 }, { "epoch": 0.21753164450002535, "grad_norm": 0.07586546242237091, "learning_rate": 2.8163022307808264e-05, "loss": 0.07851409912109375, "step": 1609 }, { "epoch": 0.21766684129586128, "grad_norm": 0.05134633183479309, "learning_rate": 2.8159738945761764e-05, "loss": 0.05237221717834473, "step": 1610 }, { "epoch": 0.21780203809169724, "grad_norm": 0.10992392152547836, "learning_rate": 2.8156452843787438e-05, "loss": 0.10461616516113281, "step": 1611 }, { "epoch": 0.21793723488753317, "grad_norm": 0.12636183202266693, "learning_rate": 2.815316400256947e-05, "loss": 0.09352779388427734, "step": 1612 }, { "epoch": 0.2180724316833691, "grad_norm": 0.23528893291950226, "learning_rate": 2.814987242279262e-05, "loss": 0.09353137016296387, "step": 1613 }, { "epoch": 0.21820762847920505, "grad_norm": 0.13521729409694672, "learning_rate": 2.8146578105142202e-05, "loss": 0.08751773834228516, "step": 1614 }, { "epoch": 0.21834282527504098, "grad_norm": 0.12088946253061295, "learning_rate": 2.814328105030412e-05, "loss": 0.0687868595123291, "step": 1615 }, { "epoch": 0.2184780220708769, "grad_norm": 0.2687852680683136, "learning_rate": 2.8139981258964836e-05, "loss": 0.09131598472595215, "step": 1616 }, { "epoch": 0.21861321886671287, "grad_norm": 0.08263131976127625, "learning_rate": 2.8136678731811385e-05, "loss": 0.06813740730285645, "step": 1617 }, { "epoch": 0.2187484156625488, "grad_norm": 0.29229700565338135, "learning_rate": 2.8133373469531362e-05, "loss": 0.07965850830078125, "step": 1618 }, { "epoch": 0.21888361245838472, "grad_norm": 0.051193296909332275, "learning_rate": 2.8130065472812952e-05, "loss": 0.0567936897277832, "step": 1619 }, { "epoch": 0.21901880925422068, "grad_norm": 0.10434012115001678, "learning_rate": 2.812675474234489e-05, "loss": 0.07630383968353271, "step": 1620 }, { "epoch": 0.2191540060500566, "grad_norm": 0.08696451783180237, "learning_rate": 2.812344127881649e-05, "loss": 0.0768289566040039, "step": 1621 }, { "epoch": 0.21928920284589254, "grad_norm": 0.1323765516281128, "learning_rate": 2.8120125082917638e-05, "loss": 0.07840633392333984, "step": 1622 }, { "epoch": 0.2194243996417285, "grad_norm": 0.2015657126903534, "learning_rate": 2.8116806155338773e-05, "loss": 0.07006978988647461, "step": 1623 }, { "epoch": 0.21955959643756442, "grad_norm": 0.08281800895929337, "learning_rate": 2.8113484496770923e-05, "loss": 0.0648653507232666, "step": 1624 }, { "epoch": 0.21969479323340038, "grad_norm": 0.34033703804016113, "learning_rate": 2.811016010790567e-05, "loss": 0.10746574401855469, "step": 1625 }, { "epoch": 0.2198299900292363, "grad_norm": 0.08292002230882645, "learning_rate": 2.8106832989435165e-05, "loss": 0.05260574817657471, "step": 1626 }, { "epoch": 0.21996518682507224, "grad_norm": 0.14075486361980438, "learning_rate": 2.8103503142052146e-05, "loss": 0.04801654815673828, "step": 1627 }, { "epoch": 0.2201003836209082, "grad_norm": 0.365215539932251, "learning_rate": 2.8100170566449892e-05, "loss": 0.07610559463500977, "step": 1628 }, { "epoch": 0.22023558041674413, "grad_norm": 0.14373613893985748, "learning_rate": 2.8096835263322266e-05, "loss": 0.04710793495178223, "step": 1629 }, { "epoch": 0.22037077721258005, "grad_norm": 0.10747268050909042, "learning_rate": 2.8093497233363702e-05, "loss": 0.0658864974975586, "step": 1630 }, { "epoch": 0.220505974008416, "grad_norm": 0.1373828947544098, "learning_rate": 2.8090156477269185e-05, "loss": 0.06066489219665527, "step": 1631 }, { "epoch": 0.22064117080425194, "grad_norm": 0.0827164426445961, "learning_rate": 2.808681299573429e-05, "loss": 0.054712772369384766, "step": 1632 }, { "epoch": 0.22077636760008787, "grad_norm": 0.10212383419275284, "learning_rate": 2.8083466789455137e-05, "loss": 0.026240110397338867, "step": 1633 }, { "epoch": 0.22091156439592383, "grad_norm": 0.14644062519073486, "learning_rate": 2.808011785912843e-05, "loss": 0.06903481483459473, "step": 1634 }, { "epoch": 0.22104676119175976, "grad_norm": 0.1701095849275589, "learning_rate": 2.8076766205451435e-05, "loss": 0.03863692283630371, "step": 1635 }, { "epoch": 0.22118195798759568, "grad_norm": 0.2067553699016571, "learning_rate": 2.8073411829121983e-05, "loss": 0.09719610214233398, "step": 1636 }, { "epoch": 0.22131715478343164, "grad_norm": 0.1486196517944336, "learning_rate": 2.8070054730838467e-05, "loss": 0.0491485595703125, "step": 1637 }, { "epoch": 0.22145235157926757, "grad_norm": 0.07885777950286865, "learning_rate": 2.8066694911299865e-05, "loss": 0.0641927719116211, "step": 1638 }, { "epoch": 0.2215875483751035, "grad_norm": 0.09383964538574219, "learning_rate": 2.8063332371205698e-05, "loss": 0.10130882263183594, "step": 1639 }, { "epoch": 0.22172274517093946, "grad_norm": 0.10606039315462112, "learning_rate": 2.8059967111256072e-05, "loss": 0.07357358932495117, "step": 1640 }, { "epoch": 0.22185794196677538, "grad_norm": 0.09118998795747757, "learning_rate": 2.8056599132151647e-05, "loss": 0.06270885467529297, "step": 1641 }, { "epoch": 0.22199313876261131, "grad_norm": 0.11135542392730713, "learning_rate": 2.8053228434593656e-05, "loss": 0.06854462623596191, "step": 1642 }, { "epoch": 0.22212833555844727, "grad_norm": 0.25021982192993164, "learning_rate": 2.8049855019283895e-05, "loss": 0.09043657779693604, "step": 1643 }, { "epoch": 0.2222635323542832, "grad_norm": 0.16723372042179108, "learning_rate": 2.8046478886924736e-05, "loss": 0.11274099349975586, "step": 1644 }, { "epoch": 0.22239872915011913, "grad_norm": 0.07573637366294861, "learning_rate": 2.804310003821909e-05, "loss": 0.044938087463378906, "step": 1645 }, { "epoch": 0.22253392594595509, "grad_norm": 0.07393091917037964, "learning_rate": 2.8039718473870473e-05, "loss": 0.06756353378295898, "step": 1646 }, { "epoch": 0.22266912274179101, "grad_norm": 0.07173236459493637, "learning_rate": 2.8036334194582924e-05, "loss": 0.05489706993103027, "step": 1647 }, { "epoch": 0.22280431953762697, "grad_norm": 0.11214042454957962, "learning_rate": 2.8032947201061084e-05, "loss": 0.07085013389587402, "step": 1648 }, { "epoch": 0.2229395163334629, "grad_norm": 0.18437223136425018, "learning_rate": 2.8029557494010132e-05, "loss": 0.09244060516357422, "step": 1649 }, { "epoch": 0.22307471312929883, "grad_norm": 0.24310776591300964, "learning_rate": 2.802616507413583e-05, "loss": 0.0722799301147461, "step": 1650 }, { "epoch": 0.2232099099251348, "grad_norm": 0.08807925134897232, "learning_rate": 2.8022769942144492e-05, "loss": 0.07807731628417969, "step": 1651 }, { "epoch": 0.22334510672097072, "grad_norm": 0.2650485336780548, "learning_rate": 2.801937209874301e-05, "loss": 0.0627899169921875, "step": 1652 }, { "epoch": 0.22348030351680664, "grad_norm": 0.08361349254846573, "learning_rate": 2.8015971544638832e-05, "loss": 0.0717768669128418, "step": 1653 }, { "epoch": 0.2236155003126426, "grad_norm": 0.19367912411689758, "learning_rate": 2.8012568280539964e-05, "loss": 0.07652783393859863, "step": 1654 }, { "epoch": 0.22375069710847853, "grad_norm": 0.2210891842842102, "learning_rate": 2.800916230715499e-05, "loss": 0.07922744750976562, "step": 1655 }, { "epoch": 0.22388589390431446, "grad_norm": 0.15765532851219177, "learning_rate": 2.800575362519305e-05, "loss": 0.07997322082519531, "step": 1656 }, { "epoch": 0.22402109070015042, "grad_norm": 0.05100607872009277, "learning_rate": 2.800234223536385e-05, "loss": 0.04888582229614258, "step": 1657 }, { "epoch": 0.22415628749598634, "grad_norm": 0.12813875079154968, "learning_rate": 2.799892813837766e-05, "loss": 0.0952460765838623, "step": 1658 }, { "epoch": 0.22429148429182227, "grad_norm": 0.22915862500667572, "learning_rate": 2.7995511334945315e-05, "loss": 0.06994915008544922, "step": 1659 }, { "epoch": 0.22442668108765823, "grad_norm": 0.11997871100902557, "learning_rate": 2.7992091825778202e-05, "loss": 0.08458495140075684, "step": 1660 }, { "epoch": 0.22456187788349416, "grad_norm": 0.07525476068258286, "learning_rate": 2.7988669611588295e-05, "loss": 0.05947160720825195, "step": 1661 }, { "epoch": 0.2246970746793301, "grad_norm": 0.0761144831776619, "learning_rate": 2.7985244693088112e-05, "loss": 0.0955662727355957, "step": 1662 }, { "epoch": 0.22483227147516605, "grad_norm": 0.10482840240001678, "learning_rate": 2.7981817070990736e-05, "loss": 0.06867051124572754, "step": 1663 }, { "epoch": 0.22496746827100197, "grad_norm": 0.06475368142127991, "learning_rate": 2.7978386746009813e-05, "loss": 0.06558990478515625, "step": 1664 }, { "epoch": 0.2251026650668379, "grad_norm": 0.08630302548408508, "learning_rate": 2.797495371885957e-05, "loss": 0.09059739112854004, "step": 1665 }, { "epoch": 0.22523786186267386, "grad_norm": 0.14821258187294006, "learning_rate": 2.7971517990254768e-05, "loss": 0.06691479682922363, "step": 1666 }, { "epoch": 0.2253730586585098, "grad_norm": 0.21616770327091217, "learning_rate": 2.7968079560910744e-05, "loss": 0.06915140151977539, "step": 1667 }, { "epoch": 0.22550825545434572, "grad_norm": 0.11089816689491272, "learning_rate": 2.7964638431543402e-05, "loss": 0.04687166213989258, "step": 1668 }, { "epoch": 0.22564345225018168, "grad_norm": 0.2505188286304474, "learning_rate": 2.7961194602869208e-05, "loss": 0.09011673927307129, "step": 1669 }, { "epoch": 0.2257786490460176, "grad_norm": 0.11089015007019043, "learning_rate": 2.7957748075605178e-05, "loss": 0.06541275978088379, "step": 1670 }, { "epoch": 0.22591384584185356, "grad_norm": 0.17884966731071472, "learning_rate": 2.7954298850468898e-05, "loss": 0.09997940063476562, "step": 1671 }, { "epoch": 0.2260490426376895, "grad_norm": 0.15689659118652344, "learning_rate": 2.7950846928178517e-05, "loss": 0.09926700592041016, "step": 1672 }, { "epoch": 0.22618423943352542, "grad_norm": 0.12514255940914154, "learning_rate": 2.7947392309452744e-05, "loss": 0.06580209732055664, "step": 1673 }, { "epoch": 0.22631943622936138, "grad_norm": 0.10486900061368942, "learning_rate": 2.7943934995010845e-05, "loss": 0.06351089477539062, "step": 1674 }, { "epoch": 0.2264546330251973, "grad_norm": 0.08569465577602386, "learning_rate": 2.7940474985572657e-05, "loss": 0.05024516582489014, "step": 1675 }, { "epoch": 0.22658982982103323, "grad_norm": 0.06809850037097931, "learning_rate": 2.793701228185857e-05, "loss": 0.039927005767822266, "step": 1676 }, { "epoch": 0.2267250266168692, "grad_norm": 0.08198157697916031, "learning_rate": 2.7933546884589536e-05, "loss": 0.0691232681274414, "step": 1677 }, { "epoch": 0.22686022341270512, "grad_norm": 0.15173587203025818, "learning_rate": 2.7930078794487077e-05, "loss": 0.05842554569244385, "step": 1678 }, { "epoch": 0.22699542020854105, "grad_norm": 0.11979150772094727, "learning_rate": 2.7926608012273253e-05, "loss": 0.046627044677734375, "step": 1679 }, { "epoch": 0.227130617004377, "grad_norm": 0.1184156984090805, "learning_rate": 2.7923134538670715e-05, "loss": 0.11591625213623047, "step": 1680 }, { "epoch": 0.22726581380021293, "grad_norm": 0.07041200995445251, "learning_rate": 2.7919658374402645e-05, "loss": 0.05948638916015625, "step": 1681 }, { "epoch": 0.22740101059604886, "grad_norm": 0.16953514516353607, "learning_rate": 2.7916179520192807e-05, "loss": 0.057387351989746094, "step": 1682 }, { "epoch": 0.22753620739188482, "grad_norm": 0.09883497655391693, "learning_rate": 2.7912697976765516e-05, "loss": 0.06579351425170898, "step": 1683 }, { "epoch": 0.22767140418772075, "grad_norm": 0.15086331963539124, "learning_rate": 2.790921374484565e-05, "loss": 0.06711924076080322, "step": 1684 }, { "epoch": 0.22780660098355668, "grad_norm": 0.1059052050113678, "learning_rate": 2.7905726825158637e-05, "loss": 0.07807683944702148, "step": 1685 }, { "epoch": 0.22794179777939264, "grad_norm": 0.12642516195774078, "learning_rate": 2.7902237218430485e-05, "loss": 0.05850076675415039, "step": 1686 }, { "epoch": 0.22807699457522856, "grad_norm": 0.17913424968719482, "learning_rate": 2.7898744925387735e-05, "loss": 0.10469627380371094, "step": 1687 }, { "epoch": 0.2282121913710645, "grad_norm": 0.12199673801660538, "learning_rate": 2.7895249946757505e-05, "loss": 0.06017148494720459, "step": 1688 }, { "epoch": 0.22834738816690045, "grad_norm": 0.0852661207318306, "learning_rate": 2.7891752283267474e-05, "loss": 0.0633392333984375, "step": 1689 }, { "epoch": 0.22848258496273638, "grad_norm": 0.07933986186981201, "learning_rate": 2.788825193564587e-05, "loss": 0.056157827377319336, "step": 1690 }, { "epoch": 0.2286177817585723, "grad_norm": 0.08577398955821991, "learning_rate": 2.7884748904621483e-05, "loss": 0.09413909912109375, "step": 1691 }, { "epoch": 0.22875297855440826, "grad_norm": 0.07432276755571365, "learning_rate": 2.7881243190923667e-05, "loss": 0.06516695022583008, "step": 1692 }, { "epoch": 0.2288881753502442, "grad_norm": 0.12504330277442932, "learning_rate": 2.7877734795282326e-05, "loss": 0.07417774200439453, "step": 1693 }, { "epoch": 0.22902337214608015, "grad_norm": 0.09034009277820587, "learning_rate": 2.7874223718427926e-05, "loss": 0.05020713806152344, "step": 1694 }, { "epoch": 0.22915856894191608, "grad_norm": 0.1191539391875267, "learning_rate": 2.78707099610915e-05, "loss": 0.08720779418945312, "step": 1695 }, { "epoch": 0.229293765737752, "grad_norm": 0.18190699815750122, "learning_rate": 2.7867193524004618e-05, "loss": 0.0926237404346466, "step": 1696 }, { "epoch": 0.22942896253358797, "grad_norm": 0.1577400416135788, "learning_rate": 2.786367440789943e-05, "loss": 0.07935047149658203, "step": 1697 }, { "epoch": 0.2295641593294239, "grad_norm": 0.07673060148954391, "learning_rate": 2.7860152613508634e-05, "loss": 0.06094551086425781, "step": 1698 }, { "epoch": 0.22969935612525982, "grad_norm": 0.617973804473877, "learning_rate": 2.7856628141565484e-05, "loss": 0.1449728012084961, "step": 1699 }, { "epoch": 0.22983455292109578, "grad_norm": 0.07696376740932465, "learning_rate": 2.7853100992803797e-05, "loss": 0.06810665130615234, "step": 1700 }, { "epoch": 0.2299697497169317, "grad_norm": 0.2503134608268738, "learning_rate": 2.7849571167957942e-05, "loss": 0.08841514587402344, "step": 1701 }, { "epoch": 0.23010494651276764, "grad_norm": 0.16584907472133636, "learning_rate": 2.784603866776285e-05, "loss": 0.06377124786376953, "step": 1702 }, { "epoch": 0.2302401433086036, "grad_norm": 0.3064413070678711, "learning_rate": 2.7842503492953996e-05, "loss": 0.06874585151672363, "step": 1703 }, { "epoch": 0.23037534010443952, "grad_norm": 0.33040449023246765, "learning_rate": 2.7838965644267435e-05, "loss": 0.09757280349731445, "step": 1704 }, { "epoch": 0.23051053690027545, "grad_norm": 0.1609157770872116, "learning_rate": 2.7835425122439764e-05, "loss": 0.10168588161468506, "step": 1705 }, { "epoch": 0.2306457336961114, "grad_norm": 0.06962718069553375, "learning_rate": 2.7831881928208128e-05, "loss": 0.08070611953735352, "step": 1706 }, { "epoch": 0.23078093049194734, "grad_norm": 0.26125577092170715, "learning_rate": 2.7828336062310252e-05, "loss": 0.08952713012695312, "step": 1707 }, { "epoch": 0.23091612728778327, "grad_norm": 0.14154115319252014, "learning_rate": 2.7824787525484403e-05, "loss": 0.03807520866394043, "step": 1708 }, { "epoch": 0.23105132408361923, "grad_norm": 0.08921989798545837, "learning_rate": 2.7821236318469395e-05, "loss": 0.041365981101989746, "step": 1709 }, { "epoch": 0.23118652087945515, "grad_norm": 0.13391892611980438, "learning_rate": 2.7817682442004615e-05, "loss": 0.07260739803314209, "step": 1710 }, { "epoch": 0.23132171767529108, "grad_norm": 0.2577361464500427, "learning_rate": 2.781412589683e-05, "loss": 0.09927582740783691, "step": 1711 }, { "epoch": 0.23145691447112704, "grad_norm": 0.07660982012748718, "learning_rate": 2.781056668368604e-05, "loss": 0.0736246109008789, "step": 1712 }, { "epoch": 0.23159211126696297, "grad_norm": 0.09655215591192245, "learning_rate": 2.780700480331378e-05, "loss": 0.0680195689201355, "step": 1713 }, { "epoch": 0.2317273080627989, "grad_norm": 0.10156284272670746, "learning_rate": 2.7803440256454825e-05, "loss": 0.0675501823425293, "step": 1714 }, { "epoch": 0.23186250485863485, "grad_norm": 0.19986282289028168, "learning_rate": 2.7799873043851337e-05, "loss": 0.10214805603027344, "step": 1715 }, { "epoch": 0.23199770165447078, "grad_norm": 0.13760405778884888, "learning_rate": 2.7796303166246016e-05, "loss": 0.08921337127685547, "step": 1716 }, { "epoch": 0.23213289845030674, "grad_norm": 0.07674062997102737, "learning_rate": 2.7792730624382142e-05, "loss": 0.07658171653747559, "step": 1717 }, { "epoch": 0.23226809524614267, "grad_norm": 0.0891207680106163, "learning_rate": 2.778915541900353e-05, "loss": 0.08002948760986328, "step": 1718 }, { "epoch": 0.2324032920419786, "grad_norm": 0.11375489830970764, "learning_rate": 2.7785577550854566e-05, "loss": 0.05949282646179199, "step": 1719 }, { "epoch": 0.23253848883781456, "grad_norm": 0.09317008405923843, "learning_rate": 2.778199702068017e-05, "loss": 0.05891084671020508, "step": 1720 }, { "epoch": 0.23267368563365048, "grad_norm": 0.1997774839401245, "learning_rate": 2.777841382922583e-05, "loss": 0.11283183097839355, "step": 1721 }, { "epoch": 0.2328088824294864, "grad_norm": 0.23426946997642517, "learning_rate": 2.7774827977237596e-05, "loss": 0.08265495300292969, "step": 1722 }, { "epoch": 0.23294407922532237, "grad_norm": 0.2385103702545166, "learning_rate": 2.777123946546205e-05, "loss": 0.07417219877243042, "step": 1723 }, { "epoch": 0.2330792760211583, "grad_norm": 0.1510564684867859, "learning_rate": 2.776764829464634e-05, "loss": 0.09803891181945801, "step": 1724 }, { "epoch": 0.23321447281699423, "grad_norm": 0.1913069486618042, "learning_rate": 2.7764054465538173e-05, "loss": 0.07884073257446289, "step": 1725 }, { "epoch": 0.23334966961283019, "grad_norm": 0.1414082646369934, "learning_rate": 2.7760457978885794e-05, "loss": 0.06635475158691406, "step": 1726 }, { "epoch": 0.23348486640866611, "grad_norm": 0.2202560156583786, "learning_rate": 2.7756858835438022e-05, "loss": 0.06752020120620728, "step": 1727 }, { "epoch": 0.23362006320450204, "grad_norm": 0.056420207023620605, "learning_rate": 2.7753257035944216e-05, "loss": 0.052390098571777344, "step": 1728 }, { "epoch": 0.233755260000338, "grad_norm": 0.15214776992797852, "learning_rate": 2.7749652581154277e-05, "loss": 0.056015610694885254, "step": 1729 }, { "epoch": 0.23389045679617393, "grad_norm": 0.18461285531520844, "learning_rate": 2.7746045471818685e-05, "loss": 0.08249831199645996, "step": 1730 }, { "epoch": 0.23402565359200986, "grad_norm": 0.16687346994876862, "learning_rate": 2.7742435708688458e-05, "loss": 0.06353402137756348, "step": 1731 }, { "epoch": 0.23416085038784581, "grad_norm": 0.11642317473888397, "learning_rate": 2.7738823292515167e-05, "loss": 0.10229349136352539, "step": 1732 }, { "epoch": 0.23429604718368174, "grad_norm": 0.10276017338037491, "learning_rate": 2.773520822405093e-05, "loss": 0.10089659690856934, "step": 1733 }, { "epoch": 0.23443124397951767, "grad_norm": 0.12194877117872238, "learning_rate": 2.7731590504048433e-05, "loss": 0.043982505798339844, "step": 1734 }, { "epoch": 0.23456644077535363, "grad_norm": 0.07328354567289352, "learning_rate": 2.7727970133260896e-05, "loss": 0.05307340621948242, "step": 1735 }, { "epoch": 0.23470163757118956, "grad_norm": 0.11488433927297592, "learning_rate": 2.7724347112442106e-05, "loss": 0.05113840103149414, "step": 1736 }, { "epoch": 0.2348368343670255, "grad_norm": 0.2745373249053955, "learning_rate": 2.772072144234639e-05, "loss": 0.07746362686157227, "step": 1737 }, { "epoch": 0.23497203116286144, "grad_norm": 0.16850346326828003, "learning_rate": 2.7717093123728634e-05, "loss": 0.08483743667602539, "step": 1738 }, { "epoch": 0.23510722795869737, "grad_norm": 0.06206810846924782, "learning_rate": 2.771346215734428e-05, "loss": 0.04123663902282715, "step": 1739 }, { "epoch": 0.23524242475453333, "grad_norm": 0.10241566598415375, "learning_rate": 2.7709828543949302e-05, "loss": 0.11449801921844482, "step": 1740 }, { "epoch": 0.23537762155036926, "grad_norm": 0.11740782111883163, "learning_rate": 2.770619228430025e-05, "loss": 0.0947728157043457, "step": 1741 }, { "epoch": 0.2355128183462052, "grad_norm": 0.10658842325210571, "learning_rate": 2.77025533791542e-05, "loss": 0.07320404052734375, "step": 1742 }, { "epoch": 0.23564801514204115, "grad_norm": 0.2130248099565506, "learning_rate": 2.76989118292688e-05, "loss": 0.0629202127456665, "step": 1743 }, { "epoch": 0.23578321193787707, "grad_norm": 0.08386167883872986, "learning_rate": 2.7695267635402242e-05, "loss": 0.06380367279052734, "step": 1744 }, { "epoch": 0.235918408733713, "grad_norm": 0.053069718182086945, "learning_rate": 2.7691620798313258e-05, "loss": 0.0423736572265625, "step": 1745 }, { "epoch": 0.23605360552954896, "grad_norm": 0.07481296360492706, "learning_rate": 2.7687971318761145e-05, "loss": 0.05146193504333496, "step": 1746 }, { "epoch": 0.2361888023253849, "grad_norm": 0.10211343318223953, "learning_rate": 2.7684319197505746e-05, "loss": 0.054984331130981445, "step": 1747 }, { "epoch": 0.23632399912122082, "grad_norm": 0.0420459620654583, "learning_rate": 2.7680664435307446e-05, "loss": 0.042733073234558105, "step": 1748 }, { "epoch": 0.23645919591705677, "grad_norm": 0.09920414537191391, "learning_rate": 2.767700703292719e-05, "loss": 0.08068704605102539, "step": 1749 }, { "epoch": 0.2365943927128927, "grad_norm": 0.056447435170412064, "learning_rate": 2.767334699112647e-05, "loss": 0.061174869537353516, "step": 1750 }, { "epoch": 0.23672958950872863, "grad_norm": 0.1442887783050537, "learning_rate": 2.7669684310667318e-05, "loss": 0.10539007186889648, "step": 1751 }, { "epoch": 0.2368647863045646, "grad_norm": 0.0931338518857956, "learning_rate": 2.7666018992312333e-05, "loss": 0.04785966873168945, "step": 1752 }, { "epoch": 0.23699998310040052, "grad_norm": 0.045554425567388535, "learning_rate": 2.7662351036824653e-05, "loss": 0.04404497146606445, "step": 1753 }, { "epoch": 0.23713517989623645, "grad_norm": 0.04117034003138542, "learning_rate": 2.7658680444967964e-05, "loss": 0.036290884017944336, "step": 1754 }, { "epoch": 0.2372703766920724, "grad_norm": 0.11715850234031677, "learning_rate": 2.76550072175065e-05, "loss": 0.09999275207519531, "step": 1755 }, { "epoch": 0.23740557348790833, "grad_norm": 0.1328911930322647, "learning_rate": 2.7651331355205044e-05, "loss": 0.0979456901550293, "step": 1756 }, { "epoch": 0.23754077028374426, "grad_norm": 0.055276934057474136, "learning_rate": 2.7647652858828936e-05, "loss": 0.05973386764526367, "step": 1757 }, { "epoch": 0.23767596707958022, "grad_norm": 0.05377735570073128, "learning_rate": 2.764397172914406e-05, "loss": 0.06414568424224854, "step": 1758 }, { "epoch": 0.23781116387541615, "grad_norm": 0.1497291475534439, "learning_rate": 2.7640287966916845e-05, "loss": 0.07093620300292969, "step": 1759 }, { "epoch": 0.2379463606712521, "grad_norm": 0.10229865461587906, "learning_rate": 2.7636601572914266e-05, "loss": 0.07892036437988281, "step": 1760 }, { "epoch": 0.23808155746708803, "grad_norm": 0.11082065105438232, "learning_rate": 2.7632912547903855e-05, "loss": 0.06773090362548828, "step": 1761 }, { "epoch": 0.23821675426292396, "grad_norm": 0.0587751530110836, "learning_rate": 2.7629220892653685e-05, "loss": 0.06294918060302734, "step": 1762 }, { "epoch": 0.23835195105875992, "grad_norm": 0.051163315773010254, "learning_rate": 2.7625526607932378e-05, "loss": 0.06062507629394531, "step": 1763 }, { "epoch": 0.23848714785459585, "grad_norm": 0.07822441309690475, "learning_rate": 2.76218296945091e-05, "loss": 0.09968280792236328, "step": 1764 }, { "epoch": 0.23862234465043178, "grad_norm": 0.09619243443012238, "learning_rate": 2.7618130153153577e-05, "loss": 0.06036710739135742, "step": 1765 }, { "epoch": 0.23875754144626773, "grad_norm": 0.057991448789834976, "learning_rate": 2.7614427984636063e-05, "loss": 0.047690629959106445, "step": 1766 }, { "epoch": 0.23889273824210366, "grad_norm": 0.144164577126503, "learning_rate": 2.7610723189727377e-05, "loss": 0.0775446891784668, "step": 1767 }, { "epoch": 0.2390279350379396, "grad_norm": 0.35411012172698975, "learning_rate": 2.760701576919888e-05, "loss": 0.10361731052398682, "step": 1768 }, { "epoch": 0.23916313183377555, "grad_norm": 0.20320117473602295, "learning_rate": 2.760330572382246e-05, "loss": 0.10682868957519531, "step": 1769 }, { "epoch": 0.23929832862961148, "grad_norm": 0.09519585967063904, "learning_rate": 2.7599593054370584e-05, "loss": 0.03622698783874512, "step": 1770 }, { "epoch": 0.2394335254254474, "grad_norm": 0.22248098254203796, "learning_rate": 2.7595877761616246e-05, "loss": 0.06809139251708984, "step": 1771 }, { "epoch": 0.23956872222128336, "grad_norm": 0.20105934143066406, "learning_rate": 2.759215984633299e-05, "loss": 0.061814308166503906, "step": 1772 }, { "epoch": 0.2397039190171193, "grad_norm": 0.20335231721401215, "learning_rate": 2.7588439309294902e-05, "loss": 0.08295965194702148, "step": 1773 }, { "epoch": 0.23983911581295522, "grad_norm": 0.0795859843492508, "learning_rate": 2.7584716151276623e-05, "loss": 0.03898775577545166, "step": 1774 }, { "epoch": 0.23997431260879118, "grad_norm": 0.13627904653549194, "learning_rate": 2.7580990373053325e-05, "loss": 0.0668020248413086, "step": 1775 }, { "epoch": 0.2401095094046271, "grad_norm": 0.47427457571029663, "learning_rate": 2.7577261975400747e-05, "loss": 0.10455322265625, "step": 1776 }, { "epoch": 0.24024470620046304, "grad_norm": 0.1331705003976822, "learning_rate": 2.7573530959095154e-05, "loss": 0.07956361770629883, "step": 1777 }, { "epoch": 0.240379902996299, "grad_norm": 0.2390034794807434, "learning_rate": 2.756979732491336e-05, "loss": 0.08353757858276367, "step": 1778 }, { "epoch": 0.24051509979213492, "grad_norm": 0.25375720858573914, "learning_rate": 2.756606107363274e-05, "loss": 0.07422256469726562, "step": 1779 }, { "epoch": 0.24065029658797085, "grad_norm": 0.20521622896194458, "learning_rate": 2.7562322206031192e-05, "loss": 0.07655715942382812, "step": 1780 }, { "epoch": 0.2407854933838068, "grad_norm": 0.08148027211427689, "learning_rate": 2.7558580722887166e-05, "loss": 0.05353736877441406, "step": 1781 }, { "epoch": 0.24092069017964274, "grad_norm": 0.07796908169984818, "learning_rate": 2.7554836624979666e-05, "loss": 0.060524940490722656, "step": 1782 }, { "epoch": 0.2410558869754787, "grad_norm": 0.21814604103565216, "learning_rate": 2.7551089913088233e-05, "loss": 0.062492966651916504, "step": 1783 }, { "epoch": 0.24119108377131462, "grad_norm": 0.10739132016897202, "learning_rate": 2.7547340587992948e-05, "loss": 0.06865286827087402, "step": 1784 }, { "epoch": 0.24132628056715055, "grad_norm": 0.14468367397785187, "learning_rate": 2.754358865047444e-05, "loss": 0.05918312072753906, "step": 1785 }, { "epoch": 0.2414614773629865, "grad_norm": 0.2825700342655182, "learning_rate": 2.7539834101313885e-05, "loss": 0.09009218215942383, "step": 1786 }, { "epoch": 0.24159667415882244, "grad_norm": 0.07572701573371887, "learning_rate": 2.7536076941293003e-05, "loss": 0.047743797302246094, "step": 1787 }, { "epoch": 0.24173187095465837, "grad_norm": 0.07999354600906372, "learning_rate": 2.753231717119405e-05, "loss": 0.100921630859375, "step": 1788 }, { "epoch": 0.24186706775049432, "grad_norm": 0.12332987040281296, "learning_rate": 2.7528554791799826e-05, "loss": 0.06456756591796875, "step": 1789 }, { "epoch": 0.24200226454633025, "grad_norm": 0.1570417284965515, "learning_rate": 2.7524789803893686e-05, "loss": 0.06658697128295898, "step": 1790 }, { "epoch": 0.24213746134216618, "grad_norm": 0.16550730168819427, "learning_rate": 2.7521022208259526e-05, "loss": 0.05148625373840332, "step": 1791 }, { "epoch": 0.24227265813800214, "grad_norm": 0.08395404368638992, "learning_rate": 2.7517252005681762e-05, "loss": 0.07175612449645996, "step": 1792 }, { "epoch": 0.24240785493383807, "grad_norm": 0.07427235692739487, "learning_rate": 2.7513479196945385e-05, "loss": 0.042111873626708984, "step": 1793 }, { "epoch": 0.242543051729674, "grad_norm": 0.10649193823337555, "learning_rate": 2.750970378283591e-05, "loss": 0.06889820098876953, "step": 1794 }, { "epoch": 0.24267824852550995, "grad_norm": 0.07281965017318726, "learning_rate": 2.7505925764139398e-05, "loss": 0.06787073612213135, "step": 1795 }, { "epoch": 0.24281344532134588, "grad_norm": 0.1479542851448059, "learning_rate": 2.7502145141642447e-05, "loss": 0.10392379760742188, "step": 1796 }, { "epoch": 0.2429486421171818, "grad_norm": 0.17237550020217896, "learning_rate": 2.7498361916132212e-05, "loss": 0.08661484718322754, "step": 1797 }, { "epoch": 0.24308383891301777, "grad_norm": 0.10848338901996613, "learning_rate": 2.7494576088396376e-05, "loss": 0.058625221252441406, "step": 1798 }, { "epoch": 0.2432190357088537, "grad_norm": 0.19653986394405365, "learning_rate": 2.749078765922317e-05, "loss": 0.10975790023803711, "step": 1799 }, { "epoch": 0.24335423250468963, "grad_norm": 0.1427716165781021, "learning_rate": 2.7486996629401366e-05, "loss": 0.0874488353729248, "step": 1800 }, { "epoch": 0.24348942930052558, "grad_norm": 0.1089082807302475, "learning_rate": 2.7483202999720272e-05, "loss": 0.0858912467956543, "step": 1801 }, { "epoch": 0.2436246260963615, "grad_norm": 0.07230830192565918, "learning_rate": 2.7479406770969747e-05, "loss": 0.0698235034942627, "step": 1802 }, { "epoch": 0.24375982289219744, "grad_norm": 0.07871078699827194, "learning_rate": 2.7475607943940182e-05, "loss": 0.07205390930175781, "step": 1803 }, { "epoch": 0.2438950196880334, "grad_norm": 0.19898729026317596, "learning_rate": 2.7471806519422514e-05, "loss": 0.07661724090576172, "step": 1804 }, { "epoch": 0.24403021648386933, "grad_norm": 0.07905599474906921, "learning_rate": 2.746800249820822e-05, "loss": 0.06638145446777344, "step": 1805 }, { "epoch": 0.24416541327970528, "grad_norm": 0.043868523091077805, "learning_rate": 2.7464195881089323e-05, "loss": 0.0452427864074707, "step": 1806 }, { "epoch": 0.2443006100755412, "grad_norm": 0.12468072772026062, "learning_rate": 2.746038666885837e-05, "loss": 0.0823965072631836, "step": 1807 }, { "epoch": 0.24443580687137714, "grad_norm": 0.17508931457996368, "learning_rate": 2.7456574862308474e-05, "loss": 0.0991206169128418, "step": 1808 }, { "epoch": 0.2445710036672131, "grad_norm": 0.15082964301109314, "learning_rate": 2.745276046223326e-05, "loss": 0.08705997467041016, "step": 1809 }, { "epoch": 0.24470620046304903, "grad_norm": 0.18758554756641388, "learning_rate": 2.744894346942691e-05, "loss": 0.09409761428833008, "step": 1810 }, { "epoch": 0.24484139725888496, "grad_norm": 0.1278875768184662, "learning_rate": 2.744512388468415e-05, "loss": 0.0665130615234375, "step": 1811 }, { "epoch": 0.24497659405472091, "grad_norm": 0.1592629849910736, "learning_rate": 2.7441301708800227e-05, "loss": 0.08577775955200195, "step": 1812 }, { "epoch": 0.24511179085055684, "grad_norm": 0.17943112552165985, "learning_rate": 2.7437476942570942e-05, "loss": 0.07355308532714844, "step": 1813 }, { "epoch": 0.24524698764639277, "grad_norm": 0.09428275376558304, "learning_rate": 2.7433649586792637e-05, "loss": 0.06973576545715332, "step": 1814 }, { "epoch": 0.24538218444222873, "grad_norm": 0.10559109598398209, "learning_rate": 2.7429819642262178e-05, "loss": 0.07807350158691406, "step": 1815 }, { "epoch": 0.24551738123806466, "grad_norm": 0.05933111533522606, "learning_rate": 2.7425987109776994e-05, "loss": 0.046124935150146484, "step": 1816 }, { "epoch": 0.2456525780339006, "grad_norm": 0.08133486658334732, "learning_rate": 2.7422151990135022e-05, "loss": 0.08216977119445801, "step": 1817 }, { "epoch": 0.24578777482973654, "grad_norm": 0.08142583072185516, "learning_rate": 2.741831428413477e-05, "loss": 0.050116539001464844, "step": 1818 }, { "epoch": 0.24592297162557247, "grad_norm": 0.11490466445684433, "learning_rate": 2.7414473992575257e-05, "loss": 0.0796046257019043, "step": 1819 }, { "epoch": 0.2460581684214084, "grad_norm": 0.08164677023887634, "learning_rate": 2.7410631116256054e-05, "loss": 0.05349743366241455, "step": 1820 }, { "epoch": 0.24619336521724436, "grad_norm": 0.09565792977809906, "learning_rate": 2.7406785655977275e-05, "loss": 0.055932044982910156, "step": 1821 }, { "epoch": 0.2463285620130803, "grad_norm": 0.05333436280488968, "learning_rate": 2.7402937612539563e-05, "loss": 0.061859846115112305, "step": 1822 }, { "epoch": 0.24646375880891622, "grad_norm": 0.044519152492284775, "learning_rate": 2.7399086986744095e-05, "loss": 0.061359405517578125, "step": 1823 }, { "epoch": 0.24659895560475217, "grad_norm": 0.13185469806194305, "learning_rate": 2.7395233779392598e-05, "loss": 0.06690573692321777, "step": 1824 }, { "epoch": 0.2467341524005881, "grad_norm": 0.09143054485321045, "learning_rate": 2.739137799128733e-05, "loss": 0.09738707542419434, "step": 1825 }, { "epoch": 0.24686934919642403, "grad_norm": 0.06961528211832047, "learning_rate": 2.7387519623231085e-05, "loss": 0.0673065185546875, "step": 1826 }, { "epoch": 0.24700454599226, "grad_norm": 0.06789089739322662, "learning_rate": 2.7383658676027195e-05, "loss": 0.07415652275085449, "step": 1827 }, { "epoch": 0.24713974278809592, "grad_norm": 0.1685425192117691, "learning_rate": 2.7379795150479535e-05, "loss": 0.08176422119140625, "step": 1828 }, { "epoch": 0.24727493958393187, "grad_norm": 0.08949195593595505, "learning_rate": 2.73759290473925e-05, "loss": 0.05508732795715332, "step": 1829 }, { "epoch": 0.2474101363797678, "grad_norm": 0.0691029280424118, "learning_rate": 2.7372060367571044e-05, "loss": 0.06569278240203857, "step": 1830 }, { "epoch": 0.24754533317560373, "grad_norm": 0.15121932327747345, "learning_rate": 2.7368189111820648e-05, "loss": 0.08872222900390625, "step": 1831 }, { "epoch": 0.2476805299714397, "grad_norm": 0.08026376366615295, "learning_rate": 2.736431528094732e-05, "loss": 0.051239728927612305, "step": 1832 }, { "epoch": 0.24781572676727562, "grad_norm": 0.04872721806168556, "learning_rate": 2.7360438875757614e-05, "loss": 0.04367685317993164, "step": 1833 }, { "epoch": 0.24795092356311155, "grad_norm": 0.09968399256467819, "learning_rate": 2.7356559897058624e-05, "loss": 0.07347869873046875, "step": 1834 }, { "epoch": 0.2480861203589475, "grad_norm": 0.17475005984306335, "learning_rate": 2.735267834565797e-05, "loss": 0.10741519927978516, "step": 1835 }, { "epoch": 0.24822131715478343, "grad_norm": 0.15285226702690125, "learning_rate": 2.734879422236381e-05, "loss": 0.07441902160644531, "step": 1836 }, { "epoch": 0.24835651395061936, "grad_norm": 0.119447261095047, "learning_rate": 2.734490752798484e-05, "loss": 0.051962852478027344, "step": 1837 }, { "epoch": 0.24849171074645532, "grad_norm": 0.22703321278095245, "learning_rate": 2.7341018263330296e-05, "loss": 0.09919500350952148, "step": 1838 }, { "epoch": 0.24862690754229125, "grad_norm": 0.14595769345760345, "learning_rate": 2.7337126429209935e-05, "loss": 0.0747079849243164, "step": 1839 }, { "epoch": 0.24876210433812718, "grad_norm": 0.0487702377140522, "learning_rate": 2.7333232026434064e-05, "loss": 0.06485152244567871, "step": 1840 }, { "epoch": 0.24889730113396313, "grad_norm": 0.14004072546958923, "learning_rate": 2.7329335055813517e-05, "loss": 0.07010078430175781, "step": 1841 }, { "epoch": 0.24903249792979906, "grad_norm": 0.05502966418862343, "learning_rate": 2.732543551815966e-05, "loss": 0.05611228942871094, "step": 1842 }, { "epoch": 0.249167694725635, "grad_norm": 0.15478190779685974, "learning_rate": 2.7321533414284404e-05, "loss": 0.07949399948120117, "step": 1843 }, { "epoch": 0.24930289152147095, "grad_norm": 0.043452560901641846, "learning_rate": 2.731762874500018e-05, "loss": 0.045624732971191406, "step": 1844 }, { "epoch": 0.24943808831730688, "grad_norm": 0.08242925256490707, "learning_rate": 2.7313721511119972e-05, "loss": 0.07660722732543945, "step": 1845 }, { "epoch": 0.2495732851131428, "grad_norm": 0.1302908957004547, "learning_rate": 2.7309811713457275e-05, "loss": 0.07224464416503906, "step": 1846 }, { "epoch": 0.24970848190897876, "grad_norm": 0.16624121367931366, "learning_rate": 2.730589935282614e-05, "loss": 0.09778642654418945, "step": 1847 }, { "epoch": 0.2498436787048147, "grad_norm": 0.09746264666318893, "learning_rate": 2.7301984430041135e-05, "loss": 0.058812618255615234, "step": 1848 }, { "epoch": 0.24997887550065062, "grad_norm": 0.08061551302671432, "learning_rate": 2.7298066945917368e-05, "loss": 0.07266318798065186, "step": 1849 }, { "epoch": 0.2501140722964866, "grad_norm": 0.18297156691551208, "learning_rate": 2.7294146901270482e-05, "loss": 0.09313583374023438, "step": 1850 }, { "epoch": 0.25024926909232253, "grad_norm": 0.07570967078208923, "learning_rate": 2.7290224296916653e-05, "loss": 0.0724782943725586, "step": 1851 }, { "epoch": 0.25038446588815844, "grad_norm": 0.10806790739297867, "learning_rate": 2.7286299133672584e-05, "loss": 0.05780839920043945, "step": 1852 }, { "epoch": 0.2505196626839944, "grad_norm": 0.05563180893659592, "learning_rate": 2.728237141235552e-05, "loss": 0.06669878959655762, "step": 1853 }, { "epoch": 0.25065485947983035, "grad_norm": 0.12812623381614685, "learning_rate": 2.727844113378322e-05, "loss": 0.07734537124633789, "step": 1854 }, { "epoch": 0.25079005627566625, "grad_norm": 0.11334457993507385, "learning_rate": 2.7274508298774013e-05, "loss": 0.06672239303588867, "step": 1855 }, { "epoch": 0.2509252530715022, "grad_norm": 0.10986971855163574, "learning_rate": 2.727057290814672e-05, "loss": 0.0865480899810791, "step": 1856 }, { "epoch": 0.25106044986733816, "grad_norm": 0.2216511070728302, "learning_rate": 2.7266634962720704e-05, "loss": 0.07958316802978516, "step": 1857 }, { "epoch": 0.25119564666317407, "grad_norm": 0.33249416947364807, "learning_rate": 2.726269446331588e-05, "loss": 0.09619903564453125, "step": 1858 }, { "epoch": 0.25133084345901, "grad_norm": 0.05294548347592354, "learning_rate": 2.7258751410752676e-05, "loss": 0.046959877014160156, "step": 1859 }, { "epoch": 0.251466040254846, "grad_norm": 0.12228503823280334, "learning_rate": 2.725480580585206e-05, "loss": 0.05336642265319824, "step": 1860 }, { "epoch": 0.2516012370506819, "grad_norm": 0.11407537758350372, "learning_rate": 2.7250857649435522e-05, "loss": 0.05225372314453125, "step": 1861 }, { "epoch": 0.25173643384651784, "grad_norm": 0.2380487620830536, "learning_rate": 2.724690694232509e-05, "loss": 0.07039976119995117, "step": 1862 }, { "epoch": 0.2518716306423538, "grad_norm": 0.03274841979146004, "learning_rate": 2.7242953685343327e-05, "loss": 0.036340951919555664, "step": 1863 }, { "epoch": 0.2520068274381897, "grad_norm": 0.29131993651390076, "learning_rate": 2.723899787931332e-05, "loss": 0.09333992004394531, "step": 1864 }, { "epoch": 0.25214202423402565, "grad_norm": 0.10917803645133972, "learning_rate": 2.7235039525058684e-05, "loss": 0.09343409538269043, "step": 1865 }, { "epoch": 0.2522772210298616, "grad_norm": 0.07211418449878693, "learning_rate": 2.7231078623403575e-05, "loss": 0.06926798820495605, "step": 1866 }, { "epoch": 0.2524124178256975, "grad_norm": 0.1154571995139122, "learning_rate": 2.722711517517267e-05, "loss": 0.06325149536132812, "step": 1867 }, { "epoch": 0.25254761462153347, "grad_norm": 0.22004513442516327, "learning_rate": 2.7223149181191187e-05, "loss": 0.08453893661499023, "step": 1868 }, { "epoch": 0.2526828114173694, "grad_norm": 0.08724702894687653, "learning_rate": 2.7219180642284864e-05, "loss": 0.07194232940673828, "step": 1869 }, { "epoch": 0.2528180082132053, "grad_norm": 0.05822712555527687, "learning_rate": 2.721520955927997e-05, "loss": 0.06417250633239746, "step": 1870 }, { "epoch": 0.2529532050090413, "grad_norm": 0.18000958859920502, "learning_rate": 2.7211235933003302e-05, "loss": 0.06889104843139648, "step": 1871 }, { "epoch": 0.25308840180487724, "grad_norm": 0.10869405418634415, "learning_rate": 2.72072597642822e-05, "loss": 0.08119058609008789, "step": 1872 }, { "epoch": 0.25322359860071314, "grad_norm": 0.13462482392787933, "learning_rate": 2.7203281053944512e-05, "loss": 0.08239626884460449, "step": 1873 }, { "epoch": 0.2533587953965491, "grad_norm": 0.15795378386974335, "learning_rate": 2.719929980281864e-05, "loss": 0.06319999694824219, "step": 1874 }, { "epoch": 0.25349399219238505, "grad_norm": 0.16120566427707672, "learning_rate": 2.719531601173349e-05, "loss": 0.06544899940490723, "step": 1875 }, { "epoch": 0.25362918898822095, "grad_norm": 0.12590977549552917, "learning_rate": 2.7191329681518512e-05, "loss": 0.0908041000366211, "step": 1876 }, { "epoch": 0.2537643857840569, "grad_norm": 0.07940160483121872, "learning_rate": 2.7187340813003682e-05, "loss": 0.05701804161071777, "step": 1877 }, { "epoch": 0.25389958257989287, "grad_norm": 0.1196286678314209, "learning_rate": 2.718334940701951e-05, "loss": 0.05937361717224121, "step": 1878 }, { "epoch": 0.25403477937572877, "grad_norm": 0.1276506930589676, "learning_rate": 2.7179355464397014e-05, "loss": 0.10046029090881348, "step": 1879 }, { "epoch": 0.2541699761715647, "grad_norm": 0.0757499560713768, "learning_rate": 2.7175358985967763e-05, "loss": 0.032299041748046875, "step": 1880 }, { "epoch": 0.2543051729674007, "grad_norm": 0.1111755296587944, "learning_rate": 2.717135997256385e-05, "loss": 0.061296701431274414, "step": 1881 }, { "epoch": 0.2544403697632366, "grad_norm": 0.1633571982383728, "learning_rate": 2.7167358425017882e-05, "loss": 0.08982467651367188, "step": 1882 }, { "epoch": 0.25457556655907254, "grad_norm": 0.23074117302894592, "learning_rate": 2.7163354344163004e-05, "loss": 0.08429047465324402, "step": 1883 }, { "epoch": 0.2547107633549085, "grad_norm": 0.09363125264644623, "learning_rate": 2.715934773083289e-05, "loss": 0.07416868209838867, "step": 1884 }, { "epoch": 0.25484596015074445, "grad_norm": 0.09598194062709808, "learning_rate": 2.715533858586174e-05, "loss": 0.07480204105377197, "step": 1885 }, { "epoch": 0.25498115694658036, "grad_norm": 0.1794494241476059, "learning_rate": 2.715132691008427e-05, "loss": 0.06553339958190918, "step": 1886 }, { "epoch": 0.2551163537424163, "grad_norm": 0.25364989042282104, "learning_rate": 2.714731270433574e-05, "loss": 0.07598268985748291, "step": 1887 }, { "epoch": 0.25525155053825227, "grad_norm": 0.08078136295080185, "learning_rate": 2.7143295969451933e-05, "loss": 0.05204680562019348, "step": 1888 }, { "epoch": 0.25538674733408817, "grad_norm": 0.1530763804912567, "learning_rate": 2.7139276706269147e-05, "loss": 0.08506584167480469, "step": 1889 }, { "epoch": 0.25552194412992413, "grad_norm": 0.06039128080010414, "learning_rate": 2.7135254915624213e-05, "loss": 0.06613683700561523, "step": 1890 }, { "epoch": 0.2556571409257601, "grad_norm": 0.11038939654827118, "learning_rate": 2.7131230598354497e-05, "loss": 0.11463785171508789, "step": 1891 }, { "epoch": 0.255792337721596, "grad_norm": 0.047232795506715775, "learning_rate": 2.712720375529787e-05, "loss": 0.04717755317687988, "step": 1892 }, { "epoch": 0.25592753451743194, "grad_norm": 0.058000437915325165, "learning_rate": 2.7123174387292758e-05, "loss": 0.05485057830810547, "step": 1893 }, { "epoch": 0.2560627313132679, "grad_norm": 0.1676839143037796, "learning_rate": 2.7119142495178088e-05, "loss": 0.06298685073852539, "step": 1894 }, { "epoch": 0.2561979281091038, "grad_norm": 0.13864421844482422, "learning_rate": 2.711510807979333e-05, "loss": 0.06916332244873047, "step": 1895 }, { "epoch": 0.25633312490493976, "grad_norm": 0.10523370653390884, "learning_rate": 2.7111071141978452e-05, "loss": 0.06897759437561035, "step": 1896 }, { "epoch": 0.2564683217007757, "grad_norm": 0.07564353197813034, "learning_rate": 2.7107031682573987e-05, "loss": 0.08072948455810547, "step": 1897 }, { "epoch": 0.2566035184966116, "grad_norm": 0.07633915543556213, "learning_rate": 2.710298970242096e-05, "loss": 0.08800125122070312, "step": 1898 }, { "epoch": 0.2567387152924476, "grad_norm": 0.13839472830295563, "learning_rate": 2.7098945202360937e-05, "loss": 0.11099767684936523, "step": 1899 }, { "epoch": 0.25687391208828353, "grad_norm": 0.17224784195423126, "learning_rate": 2.7094898183236e-05, "loss": 0.08221054077148438, "step": 1900 }, { "epoch": 0.25700910888411943, "grad_norm": 0.07429969310760498, "learning_rate": 2.709084864588877e-05, "loss": 0.05411934852600098, "step": 1901 }, { "epoch": 0.2571443056799554, "grad_norm": 0.12401656061410904, "learning_rate": 2.708679659116237e-05, "loss": 0.07936358451843262, "step": 1902 }, { "epoch": 0.25727950247579134, "grad_norm": 0.04505674168467522, "learning_rate": 2.708274201990047e-05, "loss": 0.035658836364746094, "step": 1903 }, { "epoch": 0.25741469927162725, "grad_norm": 0.06026989594101906, "learning_rate": 2.7078684932947247e-05, "loss": 0.06824731826782227, "step": 1904 }, { "epoch": 0.2575498960674632, "grad_norm": 0.0819808766245842, "learning_rate": 2.7074625331147407e-05, "loss": 0.0772542953491211, "step": 1905 }, { "epoch": 0.25768509286329916, "grad_norm": 0.07260015606880188, "learning_rate": 2.7070563215346184e-05, "loss": 0.04888296127319336, "step": 1906 }, { "epoch": 0.25782028965913506, "grad_norm": 0.13175158202648163, "learning_rate": 2.7066498586389332e-05, "loss": 0.0486409068107605, "step": 1907 }, { "epoch": 0.257955486454971, "grad_norm": 0.04404719918966293, "learning_rate": 2.7062431445123127e-05, "loss": 0.04677295684814453, "step": 1908 }, { "epoch": 0.258090683250807, "grad_norm": 0.07209489494562149, "learning_rate": 2.705836179239437e-05, "loss": 0.06144356727600098, "step": 1909 }, { "epoch": 0.2582258800466429, "grad_norm": 0.06582480669021606, "learning_rate": 2.705428962905039e-05, "loss": 0.06948471069335938, "step": 1910 }, { "epoch": 0.25836107684247883, "grad_norm": 0.05781620740890503, "learning_rate": 2.705021495593902e-05, "loss": 0.04116058349609375, "step": 1911 }, { "epoch": 0.2584962736383148, "grad_norm": 0.08257302641868591, "learning_rate": 2.704613777390864e-05, "loss": 0.06554484367370605, "step": 1912 }, { "epoch": 0.2586314704341507, "grad_norm": 0.07617247104644775, "learning_rate": 2.7042058083808135e-05, "loss": 0.08389806747436523, "step": 1913 }, { "epoch": 0.25876666722998665, "grad_norm": 0.11628446727991104, "learning_rate": 2.7037975886486928e-05, "loss": 0.08635663986206055, "step": 1914 }, { "epoch": 0.2589018640258226, "grad_norm": 0.08471303433179855, "learning_rate": 2.7033891182794942e-05, "loss": 0.07380294799804688, "step": 1915 }, { "epoch": 0.2590370608216585, "grad_norm": 0.08547315746545792, "learning_rate": 2.7029803973582642e-05, "loss": 0.050653934478759766, "step": 1916 }, { "epoch": 0.25917225761749446, "grad_norm": 0.055937085300683975, "learning_rate": 2.7025714259701e-05, "loss": 0.03486669063568115, "step": 1917 }, { "epoch": 0.2593074544133304, "grad_norm": 0.13840137422084808, "learning_rate": 2.7021622042001524e-05, "loss": 0.0895574688911438, "step": 1918 }, { "epoch": 0.2594426512091663, "grad_norm": 0.04367328807711601, "learning_rate": 2.701752732133623e-05, "loss": 0.04321324825286865, "step": 1919 }, { "epoch": 0.2595778480050023, "grad_norm": 0.15681852400302887, "learning_rate": 2.7013430098557664e-05, "loss": 0.08592557907104492, "step": 1920 }, { "epoch": 0.25971304480083823, "grad_norm": 0.1759447455406189, "learning_rate": 2.7009330374518885e-05, "loss": 0.07019585371017456, "step": 1921 }, { "epoch": 0.25984824159667413, "grad_norm": 0.1534302830696106, "learning_rate": 2.7005228150073483e-05, "loss": 0.06428885459899902, "step": 1922 }, { "epoch": 0.2599834383925101, "grad_norm": 0.21793781220912933, "learning_rate": 2.7001123426075558e-05, "loss": 0.11581873893737793, "step": 1923 }, { "epoch": 0.26011863518834605, "grad_norm": 0.09259770065546036, "learning_rate": 2.699701620337974e-05, "loss": 0.061139583587646484, "step": 1924 }, { "epoch": 0.26025383198418195, "grad_norm": 0.2145368456840515, "learning_rate": 2.699290648284117e-05, "loss": 0.05533432960510254, "step": 1925 }, { "epoch": 0.2603890287800179, "grad_norm": 0.13483993709087372, "learning_rate": 2.6988794265315522e-05, "loss": 0.06428718566894531, "step": 1926 }, { "epoch": 0.26052422557585386, "grad_norm": 0.15750481188297272, "learning_rate": 2.698467955165897e-05, "loss": 0.06235980987548828, "step": 1927 }, { "epoch": 0.26065942237168976, "grad_norm": 0.17170867323875427, "learning_rate": 2.6980562342728226e-05, "loss": 0.0690385103225708, "step": 1928 }, { "epoch": 0.2607946191675257, "grad_norm": 0.055610764771699905, "learning_rate": 2.6976442639380516e-05, "loss": 0.04764509201049805, "step": 1929 }, { "epoch": 0.2609298159633617, "grad_norm": 0.16008204221725464, "learning_rate": 2.6972320442473583e-05, "loss": 0.058400630950927734, "step": 1930 }, { "epoch": 0.26106501275919763, "grad_norm": 0.09385678172111511, "learning_rate": 2.6968195752865686e-05, "loss": 0.0539708137512207, "step": 1931 }, { "epoch": 0.26120020955503354, "grad_norm": 0.09797768294811249, "learning_rate": 2.6964068571415613e-05, "loss": 0.06435680389404297, "step": 1932 }, { "epoch": 0.2613354063508695, "grad_norm": 0.10132727026939392, "learning_rate": 2.6959938898982667e-05, "loss": 0.033181071281433105, "step": 1933 }, { "epoch": 0.26147060314670545, "grad_norm": 0.17216293513774872, "learning_rate": 2.6955806736426657e-05, "loss": 0.07015323638916016, "step": 1934 }, { "epoch": 0.26160579994254135, "grad_norm": 0.08743895590305328, "learning_rate": 2.6951672084607937e-05, "loss": 0.06944751739501953, "step": 1935 }, { "epoch": 0.2617409967383773, "grad_norm": 0.050038017332553864, "learning_rate": 2.694753494438735e-05, "loss": 0.05537307262420654, "step": 1936 }, { "epoch": 0.26187619353421326, "grad_norm": 0.14453940093517303, "learning_rate": 2.6943395316626272e-05, "loss": 0.07186472415924072, "step": 1937 }, { "epoch": 0.26201139033004917, "grad_norm": 0.12734849750995636, "learning_rate": 2.69392532021866e-05, "loss": 0.08086848258972168, "step": 1938 }, { "epoch": 0.2621465871258851, "grad_norm": 0.05402849242091179, "learning_rate": 2.693510860193075e-05, "loss": 0.03654217720031738, "step": 1939 }, { "epoch": 0.2622817839217211, "grad_norm": 0.07365751266479492, "learning_rate": 2.6930961516721638e-05, "loss": 0.05237889289855957, "step": 1940 }, { "epoch": 0.262416980717557, "grad_norm": 0.12777666747570038, "learning_rate": 2.6926811947422717e-05, "loss": 0.05820131301879883, "step": 1941 }, { "epoch": 0.26255217751339294, "grad_norm": 0.13499997556209564, "learning_rate": 2.6922659894897946e-05, "loss": 0.06490612030029297, "step": 1942 }, { "epoch": 0.2626873743092289, "grad_norm": 0.1302352249622345, "learning_rate": 2.6918505360011805e-05, "loss": 0.09477472305297852, "step": 1943 }, { "epoch": 0.2628225711050648, "grad_norm": 0.13706472516059875, "learning_rate": 2.6914348343629292e-05, "loss": 0.07878828048706055, "step": 1944 }, { "epoch": 0.26295776790090075, "grad_norm": 0.0754261240363121, "learning_rate": 2.6910188846615918e-05, "loss": 0.05060696601867676, "step": 1945 }, { "epoch": 0.2630929646967367, "grad_norm": 0.1018480509519577, "learning_rate": 2.6906026869837714e-05, "loss": 0.07848453521728516, "step": 1946 }, { "epoch": 0.2632281614925726, "grad_norm": 0.12182067334651947, "learning_rate": 2.6901862414161222e-05, "loss": 0.06874632835388184, "step": 1947 }, { "epoch": 0.26336335828840857, "grad_norm": 0.1469702124595642, "learning_rate": 2.689769548045351e-05, "loss": 0.04393577575683594, "step": 1948 }, { "epoch": 0.2634985550842445, "grad_norm": 0.29771187901496887, "learning_rate": 2.6893526069582154e-05, "loss": 0.08208227157592773, "step": 1949 }, { "epoch": 0.2636337518800804, "grad_norm": 0.15200366079807281, "learning_rate": 2.6889354182415245e-05, "loss": 0.07265377044677734, "step": 1950 }, { "epoch": 0.2637689486759164, "grad_norm": 0.19280299544334412, "learning_rate": 2.688517981982139e-05, "loss": 0.066864013671875, "step": 1951 }, { "epoch": 0.26390414547175234, "grad_norm": 0.08240275830030441, "learning_rate": 2.6881002982669723e-05, "loss": 0.10303783416748047, "step": 1952 }, { "epoch": 0.26403934226758824, "grad_norm": 0.08139554411172867, "learning_rate": 2.6876823671829874e-05, "loss": 0.06015300750732422, "step": 1953 }, { "epoch": 0.2641745390634242, "grad_norm": 0.2022184580564499, "learning_rate": 2.6872641888172e-05, "loss": 0.07303333282470703, "step": 1954 }, { "epoch": 0.26430973585926015, "grad_norm": 0.0518563911318779, "learning_rate": 2.6868457632566774e-05, "loss": 0.04127919673919678, "step": 1955 }, { "epoch": 0.26444493265509605, "grad_norm": 0.14683027565479279, "learning_rate": 2.6864270905885377e-05, "loss": 0.06519961357116699, "step": 1956 }, { "epoch": 0.264580129450932, "grad_norm": 0.1502714604139328, "learning_rate": 2.6860081708999515e-05, "loss": 0.06728219985961914, "step": 1957 }, { "epoch": 0.26471532624676797, "grad_norm": 0.09536710381507874, "learning_rate": 2.685589004278139e-05, "loss": 0.06737709045410156, "step": 1958 }, { "epoch": 0.26485052304260387, "grad_norm": 0.19764938950538635, "learning_rate": 2.6851695908103737e-05, "loss": 0.0974421501159668, "step": 1959 }, { "epoch": 0.2649857198384398, "grad_norm": 0.16358472406864166, "learning_rate": 2.6847499305839796e-05, "loss": 0.07037496566772461, "step": 1960 }, { "epoch": 0.2651209166342758, "grad_norm": 0.04844284802675247, "learning_rate": 2.684330023686332e-05, "loss": 0.048337459564208984, "step": 1961 }, { "epoch": 0.2652561134301117, "grad_norm": 0.029073940590023994, "learning_rate": 2.6839098702048577e-05, "loss": 0.020238637924194336, "step": 1962 }, { "epoch": 0.26539131022594764, "grad_norm": 0.10952551662921906, "learning_rate": 2.683489470227035e-05, "loss": 0.07552814483642578, "step": 1963 }, { "epoch": 0.2655265070217836, "grad_norm": 0.1424301117658615, "learning_rate": 2.6830688238403936e-05, "loss": 0.07334160804748535, "step": 1964 }, { "epoch": 0.2656617038176195, "grad_norm": 0.10241356492042542, "learning_rate": 2.682647931132514e-05, "loss": 0.05843830108642578, "step": 1965 }, { "epoch": 0.26579690061345546, "grad_norm": 0.037290506064891815, "learning_rate": 2.682226792191029e-05, "loss": 0.03623199462890625, "step": 1966 }, { "epoch": 0.2659320974092914, "grad_norm": 0.127027690410614, "learning_rate": 2.681805407103621e-05, "loss": 0.046491026878356934, "step": 1967 }, { "epoch": 0.2660672942051273, "grad_norm": 0.07609870284795761, "learning_rate": 2.6813837759580253e-05, "loss": 0.08343672752380371, "step": 1968 }, { "epoch": 0.26620249100096327, "grad_norm": 0.03872406855225563, "learning_rate": 2.6809618988420274e-05, "loss": 0.04909372329711914, "step": 1969 }, { "epoch": 0.2663376877967992, "grad_norm": 0.10477709770202637, "learning_rate": 2.6805397758434647e-05, "loss": 0.07259082794189453, "step": 1970 }, { "epoch": 0.26647288459263513, "grad_norm": 0.06550008058547974, "learning_rate": 2.6801174070502248e-05, "loss": 0.05088019371032715, "step": 1971 }, { "epoch": 0.2666080813884711, "grad_norm": 0.1912948042154312, "learning_rate": 2.679694792550248e-05, "loss": 0.0713052749633789, "step": 1972 }, { "epoch": 0.26674327818430704, "grad_norm": 0.06793061643838882, "learning_rate": 2.6792719324315248e-05, "loss": 0.06447601318359375, "step": 1973 }, { "epoch": 0.26687847498014294, "grad_norm": 0.11101162433624268, "learning_rate": 2.678848826782096e-05, "loss": 0.07129859924316406, "step": 1974 }, { "epoch": 0.2670136717759789, "grad_norm": 0.08733350038528442, "learning_rate": 2.678425475690055e-05, "loss": 0.05308079719543457, "step": 1975 }, { "epoch": 0.26714886857181486, "grad_norm": 0.1475624442100525, "learning_rate": 2.6780018792435464e-05, "loss": 0.06163835525512695, "step": 1976 }, { "epoch": 0.2672840653676508, "grad_norm": 0.061899274587631226, "learning_rate": 2.6775780375307645e-05, "loss": 0.0669260025024414, "step": 1977 }, { "epoch": 0.2674192621634867, "grad_norm": 0.08895330876111984, "learning_rate": 2.6771539506399555e-05, "loss": 0.062021493911743164, "step": 1978 }, { "epoch": 0.26755445895932267, "grad_norm": 0.07559031248092651, "learning_rate": 2.6767296186594165e-05, "loss": 0.06453144550323486, "step": 1979 }, { "epoch": 0.26768965575515863, "grad_norm": 0.0834704115986824, "learning_rate": 2.676305041677496e-05, "loss": 0.0607830286026001, "step": 1980 }, { "epoch": 0.26782485255099453, "grad_norm": 0.050367336720228195, "learning_rate": 2.675880219782593e-05, "loss": 0.031048297882080078, "step": 1981 }, { "epoch": 0.2679600493468305, "grad_norm": 0.045134637504816055, "learning_rate": 2.6754551530631575e-05, "loss": 0.05095529556274414, "step": 1982 }, { "epoch": 0.26809524614266644, "grad_norm": 0.11680918186903, "learning_rate": 2.6750298416076907e-05, "loss": 0.06729412078857422, "step": 1983 }, { "epoch": 0.26823044293850234, "grad_norm": 0.14327314496040344, "learning_rate": 2.674604285504745e-05, "loss": 0.10972738265991211, "step": 1984 }, { "epoch": 0.2683656397343383, "grad_norm": 0.05404512584209442, "learning_rate": 2.6741784848429235e-05, "loss": 0.043704986572265625, "step": 1985 }, { "epoch": 0.26850083653017426, "grad_norm": 0.1642342209815979, "learning_rate": 2.67375243971088e-05, "loss": 0.0786275863647461, "step": 1986 }, { "epoch": 0.26863603332601016, "grad_norm": 0.1660015881061554, "learning_rate": 2.6733261501973192e-05, "loss": 0.06278562545776367, "step": 1987 }, { "epoch": 0.2687712301218461, "grad_norm": 0.164725661277771, "learning_rate": 2.672899616390997e-05, "loss": 0.09509825706481934, "step": 1988 }, { "epoch": 0.2689064269176821, "grad_norm": 0.07612603902816772, "learning_rate": 2.67247283838072e-05, "loss": 0.07907819747924805, "step": 1989 }, { "epoch": 0.269041623713518, "grad_norm": 0.13096463680267334, "learning_rate": 2.6720458162553457e-05, "loss": 0.08989977836608887, "step": 1990 }, { "epoch": 0.26917682050935393, "grad_norm": 0.09332142025232315, "learning_rate": 2.6716185501037822e-05, "loss": 0.04581499099731445, "step": 1991 }, { "epoch": 0.2693120173051899, "grad_norm": 0.12690138816833496, "learning_rate": 2.671191040014989e-05, "loss": 0.1052560806274414, "step": 1992 }, { "epoch": 0.2694472141010258, "grad_norm": 0.0809553861618042, "learning_rate": 2.6707632860779756e-05, "loss": 0.06932902336120605, "step": 1993 }, { "epoch": 0.26958241089686175, "grad_norm": 0.07745625078678131, "learning_rate": 2.6703352883818024e-05, "loss": 0.050919532775878906, "step": 1994 }, { "epoch": 0.2697176076926977, "grad_norm": 0.18288680911064148, "learning_rate": 2.6699070470155816e-05, "loss": 0.07959508895874023, "step": 1995 }, { "epoch": 0.2698528044885336, "grad_norm": 0.10290535539388657, "learning_rate": 2.669478562068475e-05, "loss": 0.06593942642211914, "step": 1996 }, { "epoch": 0.26998800128436956, "grad_norm": 0.10326408594846725, "learning_rate": 2.6690498336296955e-05, "loss": 0.06656599044799805, "step": 1997 }, { "epoch": 0.2701231980802055, "grad_norm": 0.2663053572177887, "learning_rate": 2.6686208617885057e-05, "loss": 0.06684589385986328, "step": 1998 }, { "epoch": 0.2702583948760414, "grad_norm": 0.100665383040905, "learning_rate": 2.668191646634221e-05, "loss": 0.08602237701416016, "step": 1999 }, { "epoch": 0.2703935916718774, "grad_norm": 0.10618358105421066, "learning_rate": 2.667762188256206e-05, "loss": 0.052654385566711426, "step": 2000 }, { "epoch": 0.27052878846771333, "grad_norm": 0.2265174388885498, "learning_rate": 2.6673324867438764e-05, "loss": 0.08537483215332031, "step": 2001 }, { "epoch": 0.27066398526354923, "grad_norm": 0.12552383542060852, "learning_rate": 2.666902542186698e-05, "loss": 0.05460166931152344, "step": 2002 }, { "epoch": 0.2707991820593852, "grad_norm": 0.2202092409133911, "learning_rate": 2.666472354674187e-05, "loss": 0.08910870552062988, "step": 2003 }, { "epoch": 0.27093437885522115, "grad_norm": 0.03565860167145729, "learning_rate": 2.666041924295912e-05, "loss": 0.03602790832519531, "step": 2004 }, { "epoch": 0.27106957565105705, "grad_norm": 0.03264477103948593, "learning_rate": 2.6656112511414902e-05, "loss": 0.040204644203186035, "step": 2005 }, { "epoch": 0.271204772446893, "grad_norm": 0.0596921443939209, "learning_rate": 2.6651803353005896e-05, "loss": 0.05192756652832031, "step": 2006 }, { "epoch": 0.27133996924272896, "grad_norm": 0.08647216856479645, "learning_rate": 2.66474917686293e-05, "loss": 0.050931692123413086, "step": 2007 }, { "epoch": 0.27147516603856486, "grad_norm": 0.15114521980285645, "learning_rate": 2.664317775918281e-05, "loss": 0.06802058219909668, "step": 2008 }, { "epoch": 0.2716103628344008, "grad_norm": 0.3004302382469177, "learning_rate": 2.6638861325564615e-05, "loss": 0.0746757984161377, "step": 2009 }, { "epoch": 0.2717455596302368, "grad_norm": 0.11647263914346695, "learning_rate": 2.6634542468673432e-05, "loss": 0.04262232780456543, "step": 2010 }, { "epoch": 0.2718807564260727, "grad_norm": 0.10886501520872116, "learning_rate": 2.663022118940846e-05, "loss": 0.04444265365600586, "step": 2011 }, { "epoch": 0.27201595322190864, "grad_norm": 0.17771105468273163, "learning_rate": 2.662589748866942e-05, "loss": 0.05081295967102051, "step": 2012 }, { "epoch": 0.2721511500177446, "grad_norm": 0.09182816743850708, "learning_rate": 2.6621571367356522e-05, "loss": 0.03714632987976074, "step": 2013 }, { "epoch": 0.2722863468135805, "grad_norm": 0.1519690454006195, "learning_rate": 2.6617242826370495e-05, "loss": 0.08922791481018066, "step": 2014 }, { "epoch": 0.27242154360941645, "grad_norm": 0.08940640836954117, "learning_rate": 2.661291186661256e-05, "loss": 0.08684539794921875, "step": 2015 }, { "epoch": 0.2725567404052524, "grad_norm": 0.0886605754494667, "learning_rate": 2.6608578488984444e-05, "loss": 0.07495689392089844, "step": 2016 }, { "epoch": 0.2726919372010883, "grad_norm": 0.08003921806812286, "learning_rate": 2.6604242694388388e-05, "loss": 0.04020333290100098, "step": 2017 }, { "epoch": 0.27282713399692426, "grad_norm": 0.08868551254272461, "learning_rate": 2.6599904483727116e-05, "loss": 0.09149885177612305, "step": 2018 }, { "epoch": 0.2729623307927602, "grad_norm": 0.07572377473115921, "learning_rate": 2.6595563857903872e-05, "loss": 0.05034780502319336, "step": 2019 }, { "epoch": 0.2730975275885961, "grad_norm": 0.07474635541439056, "learning_rate": 2.6591220817822405e-05, "loss": 0.06969404220581055, "step": 2020 }, { "epoch": 0.2732327243844321, "grad_norm": 0.03147522360086441, "learning_rate": 2.658687536438694e-05, "loss": 0.03863334655761719, "step": 2021 }, { "epoch": 0.27336792118026804, "grad_norm": 0.13000202178955078, "learning_rate": 2.6582527498502243e-05, "loss": 0.06570768356323242, "step": 2022 }, { "epoch": 0.273503117976104, "grad_norm": 0.07879548519849777, "learning_rate": 2.6578177221073556e-05, "loss": 0.0561366081237793, "step": 2023 }, { "epoch": 0.2736383147719399, "grad_norm": 0.04455554485321045, "learning_rate": 2.6573824533006628e-05, "loss": 0.028705358505249023, "step": 2024 }, { "epoch": 0.27377351156777585, "grad_norm": 0.1452604979276657, "learning_rate": 2.6569469435207712e-05, "loss": 0.07401388883590698, "step": 2025 }, { "epoch": 0.2739087083636118, "grad_norm": 0.14333850145339966, "learning_rate": 2.656511192858356e-05, "loss": 0.05373525619506836, "step": 2026 }, { "epoch": 0.2740439051594477, "grad_norm": 0.07900907844305038, "learning_rate": 2.6560752014041438e-05, "loss": 0.051888227462768555, "step": 2027 }, { "epoch": 0.27417910195528367, "grad_norm": 0.07668299973011017, "learning_rate": 2.6556389692489098e-05, "loss": 0.07629776000976562, "step": 2028 }, { "epoch": 0.2743142987511196, "grad_norm": 0.09356392920017242, "learning_rate": 2.6552024964834795e-05, "loss": 0.05437636375427246, "step": 2029 }, { "epoch": 0.2744494955469555, "grad_norm": 0.10345875471830368, "learning_rate": 2.6547657831987286e-05, "loss": 0.07483243942260742, "step": 2030 }, { "epoch": 0.2745846923427915, "grad_norm": 0.09706155210733414, "learning_rate": 2.6543288294855843e-05, "loss": 0.02883601188659668, "step": 2031 }, { "epoch": 0.27471988913862744, "grad_norm": 0.09148816019296646, "learning_rate": 2.653891635435022e-05, "loss": 0.06745243072509766, "step": 2032 }, { "epoch": 0.27485508593446334, "grad_norm": 0.1024070531129837, "learning_rate": 2.653454201138068e-05, "loss": 0.10385990142822266, "step": 2033 }, { "epoch": 0.2749902827302993, "grad_norm": 0.17602460086345673, "learning_rate": 2.653016526685798e-05, "loss": 0.07130289077758789, "step": 2034 }, { "epoch": 0.27512547952613525, "grad_norm": 0.16487474739551544, "learning_rate": 2.6525786121693387e-05, "loss": 0.08155179023742676, "step": 2035 }, { "epoch": 0.27526067632197115, "grad_norm": 0.09475169330835342, "learning_rate": 2.652140457679866e-05, "loss": 0.05621039867401123, "step": 2036 }, { "epoch": 0.2753958731178071, "grad_norm": 0.08501619100570679, "learning_rate": 2.6517020633086064e-05, "loss": 0.06403207778930664, "step": 2037 }, { "epoch": 0.27553106991364307, "grad_norm": 0.2924579381942749, "learning_rate": 2.6512634291468354e-05, "loss": 0.09018754959106445, "step": 2038 }, { "epoch": 0.27566626670947897, "grad_norm": 0.07076648622751236, "learning_rate": 2.6508245552858792e-05, "loss": 0.06429815292358398, "step": 2039 }, { "epoch": 0.2758014635053149, "grad_norm": 0.06613663583993912, "learning_rate": 2.6503854418171133e-05, "loss": 0.057242393493652344, "step": 2040 }, { "epoch": 0.2759366603011509, "grad_norm": 0.23619668185710907, "learning_rate": 2.6499460888319644e-05, "loss": 0.09474372863769531, "step": 2041 }, { "epoch": 0.2760718570969868, "grad_norm": 0.1373104751110077, "learning_rate": 2.6495064964219073e-05, "loss": 0.06896734237670898, "step": 2042 }, { "epoch": 0.27620705389282274, "grad_norm": 0.10663674026727676, "learning_rate": 2.649066664678467e-05, "loss": 0.08289837837219238, "step": 2043 }, { "epoch": 0.2763422506886587, "grad_norm": 0.13968206942081451, "learning_rate": 2.6486265936932205e-05, "loss": 0.08006739616394043, "step": 2044 }, { "epoch": 0.2764774474844946, "grad_norm": 0.1566438525915146, "learning_rate": 2.6481862835577915e-05, "loss": 0.06683707237243652, "step": 2045 }, { "epoch": 0.27661264428033056, "grad_norm": 0.09505102038383484, "learning_rate": 2.6477457343638557e-05, "loss": 0.059329986572265625, "step": 2046 }, { "epoch": 0.2767478410761665, "grad_norm": 0.06063215434551239, "learning_rate": 2.647304946203137e-05, "loss": 0.06640112400054932, "step": 2047 }, { "epoch": 0.2768830378720024, "grad_norm": 0.06275895982980728, "learning_rate": 2.6468639191674106e-05, "loss": 0.03875541687011719, "step": 2048 }, { "epoch": 0.27701823466783837, "grad_norm": 0.1204524114727974, "learning_rate": 2.6464226533485007e-05, "loss": 0.0769195556640625, "step": 2049 }, { "epoch": 0.2771534314636743, "grad_norm": 0.1370803713798523, "learning_rate": 2.6459811488382806e-05, "loss": 0.05951547622680664, "step": 2050 }, { "epoch": 0.27728862825951023, "grad_norm": 0.21443584561347961, "learning_rate": 2.645539405728674e-05, "loss": 0.06662940979003906, "step": 2051 }, { "epoch": 0.2774238250553462, "grad_norm": 0.08277695626020432, "learning_rate": 2.6450974241116545e-05, "loss": 0.04816639423370361, "step": 2052 }, { "epoch": 0.27755902185118214, "grad_norm": 0.09988467395305634, "learning_rate": 2.644655204079245e-05, "loss": 0.09002542495727539, "step": 2053 }, { "epoch": 0.27769421864701804, "grad_norm": 0.05024226754903793, "learning_rate": 2.6442127457235177e-05, "loss": 0.05195474624633789, "step": 2054 }, { "epoch": 0.277829415442854, "grad_norm": 0.12617287039756775, "learning_rate": 2.6437700491365957e-05, "loss": 0.05921363830566406, "step": 2055 }, { "epoch": 0.27796461223868996, "grad_norm": 0.08306661993265152, "learning_rate": 2.6433271144106495e-05, "loss": 0.04903411865234375, "step": 2056 }, { "epoch": 0.27809980903452586, "grad_norm": 0.1661880761384964, "learning_rate": 2.6428839416379015e-05, "loss": 0.09780478477478027, "step": 2057 }, { "epoch": 0.2782350058303618, "grad_norm": 0.057911623269319534, "learning_rate": 2.642440530910622e-05, "loss": 0.06536006927490234, "step": 2058 }, { "epoch": 0.27837020262619777, "grad_norm": 0.1030687466263771, "learning_rate": 2.6419968823211318e-05, "loss": 0.06716156005859375, "step": 2059 }, { "epoch": 0.2785053994220337, "grad_norm": 0.11096462607383728, "learning_rate": 2.641552995961801e-05, "loss": 0.05932283401489258, "step": 2060 }, { "epoch": 0.27864059621786963, "grad_norm": 0.06314587593078613, "learning_rate": 2.6411088719250484e-05, "loss": 0.049739837646484375, "step": 2061 }, { "epoch": 0.2787757930137056, "grad_norm": 0.12002673745155334, "learning_rate": 2.6406645103033442e-05, "loss": 0.0627899169921875, "step": 2062 }, { "epoch": 0.2789109898095415, "grad_norm": 0.1444939821958542, "learning_rate": 2.640219911189206e-05, "loss": 0.0700833797454834, "step": 2063 }, { "epoch": 0.27904618660537744, "grad_norm": 0.07030951231718063, "learning_rate": 2.6397750746752015e-05, "loss": 0.07575082778930664, "step": 2064 }, { "epoch": 0.2791813834012134, "grad_norm": 0.049706388264894485, "learning_rate": 2.6393300008539488e-05, "loss": 0.050824880599975586, "step": 2065 }, { "epoch": 0.2793165801970493, "grad_norm": 0.08290249854326248, "learning_rate": 2.6388846898181143e-05, "loss": 0.0519561767578125, "step": 2066 }, { "epoch": 0.27945177699288526, "grad_norm": 0.11108475923538208, "learning_rate": 2.6384391416604142e-05, "loss": 0.07414007186889648, "step": 2067 }, { "epoch": 0.2795869737887212, "grad_norm": 0.07047361135482788, "learning_rate": 2.6379933564736136e-05, "loss": 0.07421302795410156, "step": 2068 }, { "epoch": 0.2797221705845572, "grad_norm": 0.10987763106822968, "learning_rate": 2.637547334350528e-05, "loss": 0.08148670196533203, "step": 2069 }, { "epoch": 0.2798573673803931, "grad_norm": 0.06859488040208817, "learning_rate": 2.637101075384021e-05, "loss": 0.05065727233886719, "step": 2070 }, { "epoch": 0.27999256417622903, "grad_norm": 0.12862250208854675, "learning_rate": 2.636654579667006e-05, "loss": 0.06699180603027344, "step": 2071 }, { "epoch": 0.280127760972065, "grad_norm": 0.1253284066915512, "learning_rate": 2.6362078472924467e-05, "loss": 0.0641794204711914, "step": 2072 }, { "epoch": 0.2802629577679009, "grad_norm": 0.19095271825790405, "learning_rate": 2.6357608783533545e-05, "loss": 0.06796979904174805, "step": 2073 }, { "epoch": 0.28039815456373685, "grad_norm": 0.07041297107934952, "learning_rate": 2.6353136729427907e-05, "loss": 0.05645179748535156, "step": 2074 }, { "epoch": 0.2805333513595728, "grad_norm": 0.07707662135362625, "learning_rate": 2.6348662311538657e-05, "loss": 0.05318903923034668, "step": 2075 }, { "epoch": 0.2806685481554087, "grad_norm": 0.0824110209941864, "learning_rate": 2.6344185530797398e-05, "loss": 0.09498566389083862, "step": 2076 }, { "epoch": 0.28080374495124466, "grad_norm": 0.06920803338289261, "learning_rate": 2.633970638813622e-05, "loss": 0.06720471382141113, "step": 2077 }, { "epoch": 0.2809389417470806, "grad_norm": 0.1455577164888382, "learning_rate": 2.6335224884487698e-05, "loss": 0.07756519317626953, "step": 2078 }, { "epoch": 0.2810741385429165, "grad_norm": 0.08363092690706253, "learning_rate": 2.6330741020784905e-05, "loss": 0.07347440719604492, "step": 2079 }, { "epoch": 0.2812093353387525, "grad_norm": 0.06588733941316605, "learning_rate": 2.6326254797961415e-05, "loss": 0.03544259071350098, "step": 2080 }, { "epoch": 0.28134453213458843, "grad_norm": 0.0798463225364685, "learning_rate": 2.6321766216951273e-05, "loss": 0.03764486312866211, "step": 2081 }, { "epoch": 0.28147972893042433, "grad_norm": 0.09690330177545547, "learning_rate": 2.631727527868903e-05, "loss": 0.062256574630737305, "step": 2082 }, { "epoch": 0.2816149257262603, "grad_norm": 0.0920526310801506, "learning_rate": 2.6312781984109727e-05, "loss": 0.07936477661132812, "step": 2083 }, { "epoch": 0.28175012252209625, "grad_norm": 0.05120435729622841, "learning_rate": 2.6308286334148882e-05, "loss": 0.06283760070800781, "step": 2084 }, { "epoch": 0.28188531931793215, "grad_norm": 0.05456714332103729, "learning_rate": 2.630378832974252e-05, "loss": 0.054611384868621826, "step": 2085 }, { "epoch": 0.2820205161137681, "grad_norm": 0.037975460290908813, "learning_rate": 2.6299287971827154e-05, "loss": 0.04564058780670166, "step": 2086 }, { "epoch": 0.28215571290960406, "grad_norm": 0.22946839034557343, "learning_rate": 2.629478526133977e-05, "loss": 0.08869409561157227, "step": 2087 }, { "epoch": 0.28229090970543996, "grad_norm": 0.1571892946958542, "learning_rate": 2.6290280199217867e-05, "loss": 0.07628417015075684, "step": 2088 }, { "epoch": 0.2824261065012759, "grad_norm": 0.07472819089889526, "learning_rate": 2.6285772786399424e-05, "loss": 0.04982256889343262, "step": 2089 }, { "epoch": 0.2825613032971119, "grad_norm": 0.2132357954978943, "learning_rate": 2.6281263023822894e-05, "loss": 0.07956933975219727, "step": 2090 }, { "epoch": 0.2826965000929478, "grad_norm": 0.18320582807064056, "learning_rate": 2.627675091242725e-05, "loss": 0.06894779205322266, "step": 2091 }, { "epoch": 0.28283169688878373, "grad_norm": 0.08101629465818405, "learning_rate": 2.627223645315193e-05, "loss": 0.03517794609069824, "step": 2092 }, { "epoch": 0.2829668936846197, "grad_norm": 0.13307827711105347, "learning_rate": 2.6267719646936868e-05, "loss": 0.057976484298706055, "step": 2093 }, { "epoch": 0.2831020904804556, "grad_norm": 0.2228143811225891, "learning_rate": 2.626320049472249e-05, "loss": 0.06149768829345703, "step": 2094 }, { "epoch": 0.28323728727629155, "grad_norm": 0.21607151627540588, "learning_rate": 2.6258678997449705e-05, "loss": 0.05923795700073242, "step": 2095 }, { "epoch": 0.2833724840721275, "grad_norm": 0.28659555315971375, "learning_rate": 2.6254155156059912e-05, "loss": 0.04944181442260742, "step": 2096 }, { "epoch": 0.2835076808679634, "grad_norm": 0.2129606157541275, "learning_rate": 2.6249628971495006e-05, "loss": 0.05794954299926758, "step": 2097 }, { "epoch": 0.28364287766379936, "grad_norm": 0.12535381317138672, "learning_rate": 2.6245100444697353e-05, "loss": 0.06516098976135254, "step": 2098 }, { "epoch": 0.2837780744596353, "grad_norm": 0.06816662847995758, "learning_rate": 2.6240569576609824e-05, "loss": 0.036916375160217285, "step": 2099 }, { "epoch": 0.2839132712554712, "grad_norm": 0.07636474072933197, "learning_rate": 2.623603636817577e-05, "loss": 0.05437588691711426, "step": 2100 }, { "epoch": 0.2840484680513072, "grad_norm": 0.22799153625965118, "learning_rate": 2.6231500820339024e-05, "loss": 0.05071878433227539, "step": 2101 }, { "epoch": 0.28418366484714314, "grad_norm": 0.17223896086215973, "learning_rate": 2.6226962934043913e-05, "loss": 0.046317100524902344, "step": 2102 }, { "epoch": 0.28431886164297904, "grad_norm": 0.21222834289073944, "learning_rate": 2.622242271023525e-05, "loss": 0.041506290435791016, "step": 2103 }, { "epoch": 0.284454058438815, "grad_norm": 0.10336212813854218, "learning_rate": 2.6217880149858333e-05, "loss": 0.07502317428588867, "step": 2104 }, { "epoch": 0.28458925523465095, "grad_norm": 0.17416946589946747, "learning_rate": 2.621333525385895e-05, "loss": 0.08292198181152344, "step": 2105 }, { "epoch": 0.28472445203048685, "grad_norm": 0.04412782937288284, "learning_rate": 2.6208788023183366e-05, "loss": 0.035071492195129395, "step": 2106 }, { "epoch": 0.2848596488263228, "grad_norm": 0.26852941513061523, "learning_rate": 2.6204238458778346e-05, "loss": 0.09268230199813843, "step": 2107 }, { "epoch": 0.28499484562215877, "grad_norm": 0.08535538613796234, "learning_rate": 2.619968656159113e-05, "loss": 0.05893373489379883, "step": 2108 }, { "epoch": 0.28513004241799467, "grad_norm": 0.12854574620723724, "learning_rate": 2.6195132332569445e-05, "loss": 0.04909944534301758, "step": 2109 }, { "epoch": 0.2852652392138306, "grad_norm": 0.12075182795524597, "learning_rate": 2.619057577266151e-05, "loss": 0.0672757625579834, "step": 2110 }, { "epoch": 0.2854004360096666, "grad_norm": 0.09140174090862274, "learning_rate": 2.6186016882816027e-05, "loss": 0.09554576873779297, "step": 2111 }, { "epoch": 0.28553563280550254, "grad_norm": 0.1659955084323883, "learning_rate": 2.6181455663982175e-05, "loss": 0.09116029739379883, "step": 2112 }, { "epoch": 0.28567082960133844, "grad_norm": 0.1251361072063446, "learning_rate": 2.6176892117109628e-05, "loss": 0.09169983863830566, "step": 2113 }, { "epoch": 0.2858060263971744, "grad_norm": 0.06685018539428711, "learning_rate": 2.617232624314854e-05, "loss": 0.05655074119567871, "step": 2114 }, { "epoch": 0.28594122319301035, "grad_norm": 0.14993202686309814, "learning_rate": 2.616775804304955e-05, "loss": 0.06606578826904297, "step": 2115 }, { "epoch": 0.28607641998884625, "grad_norm": 0.08795338869094849, "learning_rate": 2.616318751776378e-05, "loss": 0.04561328887939453, "step": 2116 }, { "epoch": 0.2862116167846822, "grad_norm": 0.15838433802127838, "learning_rate": 2.615861466824284e-05, "loss": 0.07107336819171906, "step": 2117 }, { "epoch": 0.28634681358051817, "grad_norm": 0.2660566568374634, "learning_rate": 2.6154039495438825e-05, "loss": 0.12222003936767578, "step": 2118 }, { "epoch": 0.28648201037635407, "grad_norm": 0.09853076189756393, "learning_rate": 2.6149462000304302e-05, "loss": 0.08576822280883789, "step": 2119 }, { "epoch": 0.28661720717219, "grad_norm": 0.07233782857656479, "learning_rate": 2.6144882183792335e-05, "loss": 0.059578895568847656, "step": 2120 }, { "epoch": 0.286752403968026, "grad_norm": 0.06667614728212357, "learning_rate": 2.6140300046856468e-05, "loss": 0.04423332214355469, "step": 2121 }, { "epoch": 0.2868876007638619, "grad_norm": 0.2102232575416565, "learning_rate": 2.6135715590450722e-05, "loss": 0.10194778442382812, "step": 2122 }, { "epoch": 0.28702279755969784, "grad_norm": 0.19450493156909943, "learning_rate": 2.6131128815529608e-05, "loss": 0.06612181663513184, "step": 2123 }, { "epoch": 0.2871579943555338, "grad_norm": 0.0883248820900917, "learning_rate": 2.6126539723048115e-05, "loss": 0.07953023910522461, "step": 2124 }, { "epoch": 0.2872931911513697, "grad_norm": 0.10256035625934601, "learning_rate": 2.612194831396172e-05, "loss": 0.04761248826980591, "step": 2125 }, { "epoch": 0.28742838794720565, "grad_norm": 0.27658361196517944, "learning_rate": 2.611735458922637e-05, "loss": 0.07846355438232422, "step": 2126 }, { "epoch": 0.2875635847430416, "grad_norm": 0.10063400119543076, "learning_rate": 2.6112758549798515e-05, "loss": 0.06652474403381348, "step": 2127 }, { "epoch": 0.2876987815388775, "grad_norm": 0.1689261645078659, "learning_rate": 2.610816019663507e-05, "loss": 0.12515521049499512, "step": 2128 }, { "epoch": 0.28783397833471347, "grad_norm": 0.15385380387306213, "learning_rate": 2.6103559530693436e-05, "loss": 0.07243585586547852, "step": 2129 }, { "epoch": 0.2879691751305494, "grad_norm": 0.11074637621641159, "learning_rate": 2.6098956552931495e-05, "loss": 0.10162067413330078, "step": 2130 }, { "epoch": 0.2881043719263853, "grad_norm": 0.3546871542930603, "learning_rate": 2.6094351264307613e-05, "loss": 0.09630537033081055, "step": 2131 }, { "epoch": 0.2882395687222213, "grad_norm": 0.07562275975942612, "learning_rate": 2.6089743665780635e-05, "loss": 0.057579994201660156, "step": 2132 }, { "epoch": 0.28837476551805724, "grad_norm": 0.054605837911367416, "learning_rate": 2.6085133758309887e-05, "loss": 0.056827545166015625, "step": 2133 }, { "epoch": 0.28850996231389314, "grad_norm": 0.0396919809281826, "learning_rate": 2.6080521542855182e-05, "loss": 0.0452117919921875, "step": 2134 }, { "epoch": 0.2886451591097291, "grad_norm": 0.10526531934738159, "learning_rate": 2.60759070203768e-05, "loss": 0.07203531265258789, "step": 2135 }, { "epoch": 0.28878035590556506, "grad_norm": 0.15222521126270294, "learning_rate": 2.607129019183551e-05, "loss": 0.07175898551940918, "step": 2136 }, { "epoch": 0.28891555270140096, "grad_norm": 0.06815645843744278, "learning_rate": 2.6066671058192566e-05, "loss": 0.08519530296325684, "step": 2137 }, { "epoch": 0.2890507494972369, "grad_norm": 0.04716069996356964, "learning_rate": 2.606204962040969e-05, "loss": 0.05622053146362305, "step": 2138 }, { "epoch": 0.28918594629307287, "grad_norm": 0.08457861840724945, "learning_rate": 2.6057425879449095e-05, "loss": 0.08150696754455566, "step": 2139 }, { "epoch": 0.28932114308890877, "grad_norm": 0.19944559037685394, "learning_rate": 2.605279983627347e-05, "loss": 0.07221436500549316, "step": 2140 }, { "epoch": 0.28945633988474473, "grad_norm": 0.1339990496635437, "learning_rate": 2.6048171491845974e-05, "loss": 0.06955194473266602, "step": 2141 }, { "epoch": 0.2895915366805807, "grad_norm": 0.06655459105968475, "learning_rate": 2.604354084713026e-05, "loss": 0.07123279571533203, "step": 2142 }, { "epoch": 0.2897267334764166, "grad_norm": 0.0839993804693222, "learning_rate": 2.6038907903090446e-05, "loss": 0.05715513229370117, "step": 2143 }, { "epoch": 0.28986193027225254, "grad_norm": 0.06809302419424057, "learning_rate": 2.6034272660691143e-05, "loss": 0.046620845794677734, "step": 2144 }, { "epoch": 0.2899971270680885, "grad_norm": 0.10806578397750854, "learning_rate": 2.6029635120897434e-05, "loss": 0.08280181884765625, "step": 2145 }, { "epoch": 0.2901323238639244, "grad_norm": 0.18974973261356354, "learning_rate": 2.6024995284674867e-05, "loss": 0.08384335041046143, "step": 2146 }, { "epoch": 0.29026752065976036, "grad_norm": 0.09509654343128204, "learning_rate": 2.6020353152989496e-05, "loss": 0.05357968807220459, "step": 2147 }, { "epoch": 0.2904027174555963, "grad_norm": 0.2146265208721161, "learning_rate": 2.601570872680783e-05, "loss": 0.07716226577758789, "step": 2148 }, { "epoch": 0.2905379142514322, "grad_norm": 0.16212603449821472, "learning_rate": 2.6011062007096857e-05, "loss": 0.05240631103515625, "step": 2149 }, { "epoch": 0.2906731110472682, "grad_norm": 0.05886950343847275, "learning_rate": 2.6006412994824067e-05, "loss": 0.058757781982421875, "step": 2150 }, { "epoch": 0.29080830784310413, "grad_norm": 0.08821522444486618, "learning_rate": 2.6001761690957388e-05, "loss": 0.06379985809326172, "step": 2151 }, { "epoch": 0.29094350463894003, "grad_norm": 0.05830740928649902, "learning_rate": 2.5997108096465263e-05, "loss": 0.051024019718170166, "step": 2152 }, { "epoch": 0.291078701434776, "grad_norm": 0.13905581831932068, "learning_rate": 2.599245221231659e-05, "loss": 0.059331655502319336, "step": 2153 }, { "epoch": 0.29121389823061195, "grad_norm": 0.2339545488357544, "learning_rate": 2.5987794039480743e-05, "loss": 0.0910719633102417, "step": 2154 }, { "epoch": 0.29134909502644785, "grad_norm": 0.3189767003059387, "learning_rate": 2.5983133578927584e-05, "loss": 0.08815765380859375, "step": 2155 }, { "epoch": 0.2914842918222838, "grad_norm": 0.06492186337709427, "learning_rate": 2.5978470831627444e-05, "loss": 0.05967903137207031, "step": 2156 }, { "epoch": 0.29161948861811976, "grad_norm": 0.14014960825443268, "learning_rate": 2.597380579855113e-05, "loss": 0.0846705436706543, "step": 2157 }, { "epoch": 0.2917546854139557, "grad_norm": 0.10657402127981186, "learning_rate": 2.5969138480669936e-05, "loss": 0.07187724113464355, "step": 2158 }, { "epoch": 0.2918898822097916, "grad_norm": 0.24730943143367767, "learning_rate": 2.5964468878955614e-05, "loss": 0.07778358459472656, "step": 2159 }, { "epoch": 0.2920250790056276, "grad_norm": 0.19430230557918549, "learning_rate": 2.5959796994380397e-05, "loss": 0.05954170227050781, "step": 2160 }, { "epoch": 0.29216027580146353, "grad_norm": 0.09123359620571136, "learning_rate": 2.5955122827917004e-05, "loss": 0.06579208374023438, "step": 2161 }, { "epoch": 0.29229547259729943, "grad_norm": 0.06193273887038231, "learning_rate": 2.595044638053862e-05, "loss": 0.05091738700866699, "step": 2162 }, { "epoch": 0.2924306693931354, "grad_norm": 0.057271480560302734, "learning_rate": 2.59457676532189e-05, "loss": 0.05841255187988281, "step": 2163 }, { "epoch": 0.29256586618897135, "grad_norm": 0.3168664276599884, "learning_rate": 2.594108664693199e-05, "loss": 0.12117242813110352, "step": 2164 }, { "epoch": 0.29270106298480725, "grad_norm": 0.09441503137350082, "learning_rate": 2.5936403362652494e-05, "loss": 0.08960413932800293, "step": 2165 }, { "epoch": 0.2928362597806432, "grad_norm": 0.07435912638902664, "learning_rate": 2.5931717801355497e-05, "loss": 0.07468914985656738, "step": 2166 }, { "epoch": 0.29297145657647916, "grad_norm": 0.08187311887741089, "learning_rate": 2.5927029964016556e-05, "loss": 0.05230635404586792, "step": 2167 }, { "epoch": 0.29310665337231506, "grad_norm": 0.17817473411560059, "learning_rate": 2.592233985161171e-05, "loss": 0.09858036041259766, "step": 2168 }, { "epoch": 0.293241850168151, "grad_norm": 0.11683443933725357, "learning_rate": 2.5917647465117463e-05, "loss": 0.0915069580078125, "step": 2169 }, { "epoch": 0.293377046963987, "grad_norm": 0.29508888721466064, "learning_rate": 2.591295280551079e-05, "loss": 0.07468843460083008, "step": 2170 }, { "epoch": 0.2935122437598229, "grad_norm": 0.07180826365947723, "learning_rate": 2.590825587376915e-05, "loss": 0.06031370162963867, "step": 2171 }, { "epoch": 0.29364744055565883, "grad_norm": 0.06169212982058525, "learning_rate": 2.5903556670870464e-05, "loss": 0.0592656135559082, "step": 2172 }, { "epoch": 0.2937826373514948, "grad_norm": 0.06674641370773315, "learning_rate": 2.589885519779314e-05, "loss": 0.042227745056152344, "step": 2173 }, { "epoch": 0.2939178341473307, "grad_norm": 0.1836625039577484, "learning_rate": 2.5894151455516043e-05, "loss": 0.06458330154418945, "step": 2174 }, { "epoch": 0.29405303094316665, "grad_norm": 0.14718295633792877, "learning_rate": 2.5889445445018513e-05, "loss": 0.08520126342773438, "step": 2175 }, { "epoch": 0.2941882277390026, "grad_norm": 0.09587886184453964, "learning_rate": 2.5884737167280375e-05, "loss": 0.05840873718261719, "step": 2176 }, { "epoch": 0.2943234245348385, "grad_norm": 0.09669416397809982, "learning_rate": 2.5880026623281914e-05, "loss": 0.055062055587768555, "step": 2177 }, { "epoch": 0.29445862133067446, "grad_norm": 0.19759581983089447, "learning_rate": 2.5875313814003892e-05, "loss": 0.08341217041015625, "step": 2178 }, { "epoch": 0.2945938181265104, "grad_norm": 0.05674462392926216, "learning_rate": 2.587059874042754e-05, "loss": 0.042471885681152344, "step": 2179 }, { "epoch": 0.2947290149223463, "grad_norm": 0.049224868416786194, "learning_rate": 2.5865881403534557e-05, "loss": 0.04879045486450195, "step": 2180 }, { "epoch": 0.2948642117181823, "grad_norm": 0.09328607469797134, "learning_rate": 2.5861161804307124e-05, "loss": 0.06924581527709961, "step": 2181 }, { "epoch": 0.29499940851401824, "grad_norm": 0.15414372086524963, "learning_rate": 2.5856439943727886e-05, "loss": 0.10968542098999023, "step": 2182 }, { "epoch": 0.29513460530985414, "grad_norm": 0.06725557893514633, "learning_rate": 2.5851715822779954e-05, "loss": 0.05676007270812988, "step": 2183 }, { "epoch": 0.2952698021056901, "grad_norm": 0.23364093899726868, "learning_rate": 2.5846989442446926e-05, "loss": 0.06983709335327148, "step": 2184 }, { "epoch": 0.29540499890152605, "grad_norm": 0.1450379192829132, "learning_rate": 2.584226080371285e-05, "loss": 0.06979644298553467, "step": 2185 }, { "epoch": 0.29554019569736195, "grad_norm": 0.27563992142677307, "learning_rate": 2.5837529907562258e-05, "loss": 0.08946192264556885, "step": 2186 }, { "epoch": 0.2956753924931979, "grad_norm": 0.14683540165424347, "learning_rate": 2.5832796754980138e-05, "loss": 0.08282756805419922, "step": 2187 }, { "epoch": 0.29581058928903387, "grad_norm": 0.058543555438518524, "learning_rate": 2.5828061346951974e-05, "loss": 0.06068229675292969, "step": 2188 }, { "epoch": 0.29594578608486977, "grad_norm": 0.09753567725419998, "learning_rate": 2.5823323684463693e-05, "loss": 0.0587306022644043, "step": 2189 }, { "epoch": 0.2960809828807057, "grad_norm": 0.17675673961639404, "learning_rate": 2.5818583768501708e-05, "loss": 0.06389808654785156, "step": 2190 }, { "epoch": 0.2962161796765417, "grad_norm": 0.1600918173789978, "learning_rate": 2.5813841600052887e-05, "loss": 0.07679128646850586, "step": 2191 }, { "epoch": 0.2963513764723776, "grad_norm": 0.09405505657196045, "learning_rate": 2.580909718010458e-05, "loss": 0.056090354919433594, "step": 2192 }, { "epoch": 0.29648657326821354, "grad_norm": 0.07425408065319061, "learning_rate": 2.58043505096446e-05, "loss": 0.07295727729797363, "step": 2193 }, { "epoch": 0.2966217700640495, "grad_norm": 0.0767381340265274, "learning_rate": 2.5799601589661223e-05, "loss": 0.05618906021118164, "step": 2194 }, { "epoch": 0.2967569668598854, "grad_norm": 0.06611032783985138, "learning_rate": 2.579485042114321e-05, "loss": 0.05595207214355469, "step": 2195 }, { "epoch": 0.29689216365572135, "grad_norm": 0.09716714173555374, "learning_rate": 2.5790097005079766e-05, "loss": 0.0463939905166626, "step": 2196 }, { "epoch": 0.2970273604515573, "grad_norm": 0.0926392525434494, "learning_rate": 2.5785341342460595e-05, "loss": 0.05385637283325195, "step": 2197 }, { "epoch": 0.2971625572473932, "grad_norm": 0.04031377285718918, "learning_rate": 2.5780583434275837e-05, "loss": 0.05214262008666992, "step": 2198 }, { "epoch": 0.29729775404322917, "grad_norm": 0.05670323222875595, "learning_rate": 2.577582328151612e-05, "loss": 0.07280492782592773, "step": 2199 }, { "epoch": 0.2974329508390651, "grad_norm": 0.1620096117258072, "learning_rate": 2.5771060885172532e-05, "loss": 0.11318397521972656, "step": 2200 }, { "epoch": 0.297568147634901, "grad_norm": 0.07859797030687332, "learning_rate": 2.5766296246236628e-05, "loss": 0.06444263458251953, "step": 2201 }, { "epoch": 0.297703344430737, "grad_norm": 0.2573566138744354, "learning_rate": 2.5761529365700437e-05, "loss": 0.10247278213500977, "step": 2202 }, { "epoch": 0.29783854122657294, "grad_norm": 0.15654997527599335, "learning_rate": 2.5756760244556445e-05, "loss": 0.0964818000793457, "step": 2203 }, { "epoch": 0.2979737380224089, "grad_norm": 0.09162845462560654, "learning_rate": 2.5751988883797603e-05, "loss": 0.07657527923583984, "step": 2204 }, { "epoch": 0.2981089348182448, "grad_norm": 0.09175653010606766, "learning_rate": 2.574721528441734e-05, "loss": 0.040541887283325195, "step": 2205 }, { "epoch": 0.29824413161408075, "grad_norm": 0.1088048443198204, "learning_rate": 2.5742439447409545e-05, "loss": 0.045897722244262695, "step": 2206 }, { "epoch": 0.2983793284099167, "grad_norm": 0.11155156791210175, "learning_rate": 2.5737661373768568e-05, "loss": 0.05272102355957031, "step": 2207 }, { "epoch": 0.2985145252057526, "grad_norm": 0.03990326076745987, "learning_rate": 2.5732881064489237e-05, "loss": 0.06196737289428711, "step": 2208 }, { "epoch": 0.29864972200158857, "grad_norm": 0.23571626842021942, "learning_rate": 2.572809852056683e-05, "loss": 0.12282919883728027, "step": 2209 }, { "epoch": 0.2987849187974245, "grad_norm": 0.09622597694396973, "learning_rate": 2.572331374299711e-05, "loss": 0.091552734375, "step": 2210 }, { "epoch": 0.2989201155932604, "grad_norm": 0.1908119171857834, "learning_rate": 2.5718526732776276e-05, "loss": 0.09036707878112793, "step": 2211 }, { "epoch": 0.2990553123890964, "grad_norm": 0.13202886283397675, "learning_rate": 2.5713737490901023e-05, "loss": 0.0438840389251709, "step": 2212 }, { "epoch": 0.29919050918493234, "grad_norm": 0.12403219193220139, "learning_rate": 2.570894601836849e-05, "loss": 0.06312799453735352, "step": 2213 }, { "epoch": 0.29932570598076824, "grad_norm": 0.0662514716386795, "learning_rate": 2.5704152316176287e-05, "loss": 0.046802520751953125, "step": 2214 }, { "epoch": 0.2994609027766042, "grad_norm": 0.22417013347148895, "learning_rate": 2.5699356385322487e-05, "loss": 0.10979652404785156, "step": 2215 }, { "epoch": 0.29959609957244016, "grad_norm": 0.11969805508852005, "learning_rate": 2.5694558226805643e-05, "loss": 0.0529780387878418, "step": 2216 }, { "epoch": 0.29973129636827606, "grad_norm": 0.07081670314073563, "learning_rate": 2.568975784162474e-05, "loss": 0.07811403274536133, "step": 2217 }, { "epoch": 0.299866493164112, "grad_norm": 0.0757928416132927, "learning_rate": 2.5684955230779245e-05, "loss": 0.03953409194946289, "step": 2218 }, { "epoch": 0.30000168995994797, "grad_norm": 0.10730839520692825, "learning_rate": 2.5680150395269096e-05, "loss": 0.11053705215454102, "step": 2219 }, { "epoch": 0.30013688675578387, "grad_norm": 0.16285882890224457, "learning_rate": 2.5675343336094683e-05, "loss": 0.08858633041381836, "step": 2220 }, { "epoch": 0.30027208355161983, "grad_norm": 0.06795356422662735, "learning_rate": 2.5670534054256855e-05, "loss": 0.061885833740234375, "step": 2221 }, { "epoch": 0.3004072803474558, "grad_norm": 0.3278854489326477, "learning_rate": 2.5665722550756937e-05, "loss": 0.10415267944335938, "step": 2222 }, { "epoch": 0.3005424771432917, "grad_norm": 0.11619860678911209, "learning_rate": 2.5660908826596707e-05, "loss": 0.07813048362731934, "step": 2223 }, { "epoch": 0.30067767393912764, "grad_norm": 0.14091181755065918, "learning_rate": 2.5656092882778413e-05, "loss": 0.0955878496170044, "step": 2224 }, { "epoch": 0.3008128707349636, "grad_norm": 0.18230250477790833, "learning_rate": 2.565127472030475e-05, "loss": 0.06585180759429932, "step": 2225 }, { "epoch": 0.3009480675307995, "grad_norm": 0.20425178110599518, "learning_rate": 2.5646454340178894e-05, "loss": 0.06609654426574707, "step": 2226 }, { "epoch": 0.30108326432663546, "grad_norm": 0.11512142419815063, "learning_rate": 2.564163174340447e-05, "loss": 0.053675174713134766, "step": 2227 }, { "epoch": 0.3012184611224714, "grad_norm": 0.21589672565460205, "learning_rate": 2.5636806930985565e-05, "loss": 0.12194061279296875, "step": 2228 }, { "epoch": 0.3013536579183073, "grad_norm": 0.16532501578330994, "learning_rate": 2.5631979903926738e-05, "loss": 0.08331567049026489, "step": 2229 }, { "epoch": 0.3014888547141433, "grad_norm": 0.21849635243415833, "learning_rate": 2.5627150663233e-05, "loss": 0.055123329162597656, "step": 2230 }, { "epoch": 0.30162405150997923, "grad_norm": 0.1428455114364624, "learning_rate": 2.5622319209909817e-05, "loss": 0.055088043212890625, "step": 2231 }, { "epoch": 0.30175924830581513, "grad_norm": 0.47405576705932617, "learning_rate": 2.5617485544963135e-05, "loss": 0.09007501602172852, "step": 2232 }, { "epoch": 0.3018944451016511, "grad_norm": 0.1740884780883789, "learning_rate": 2.561264966939934e-05, "loss": 0.07647299766540527, "step": 2233 }, { "epoch": 0.30202964189748704, "grad_norm": 0.1323907971382141, "learning_rate": 2.5607811584225294e-05, "loss": 0.0838174819946289, "step": 2234 }, { "epoch": 0.30216483869332295, "grad_norm": 0.11019740253686905, "learning_rate": 2.5602971290448305e-05, "loss": 0.05591773986816406, "step": 2235 }, { "epoch": 0.3023000354891589, "grad_norm": 0.06115534156560898, "learning_rate": 2.5598128789076152e-05, "loss": 0.05503726005554199, "step": 2236 }, { "epoch": 0.30243523228499486, "grad_norm": 0.37740057706832886, "learning_rate": 2.559328408111707e-05, "loss": 0.0873861312866211, "step": 2237 }, { "epoch": 0.30257042908083076, "grad_norm": 0.23444798588752747, "learning_rate": 2.5588437167579755e-05, "loss": 0.061660319566726685, "step": 2238 }, { "epoch": 0.3027056258766667, "grad_norm": 0.10367490351200104, "learning_rate": 2.558358804947335e-05, "loss": 0.05496370792388916, "step": 2239 }, { "epoch": 0.3028408226725027, "grad_norm": 0.10610882192850113, "learning_rate": 2.557873672780748e-05, "loss": 0.09772443771362305, "step": 2240 }, { "epoch": 0.3029760194683386, "grad_norm": 0.0653059259057045, "learning_rate": 2.557388320359221e-05, "loss": 0.04547691345214844, "step": 2241 }, { "epoch": 0.30311121626417453, "grad_norm": 0.16897399723529816, "learning_rate": 2.5569027477838068e-05, "loss": 0.06138944625854492, "step": 2242 }, { "epoch": 0.3032464130600105, "grad_norm": 0.18280315399169922, "learning_rate": 2.5564169551556044e-05, "loss": 0.049242258071899414, "step": 2243 }, { "epoch": 0.3033816098558464, "grad_norm": 0.1963411420583725, "learning_rate": 2.5559309425757586e-05, "loss": 0.07587909698486328, "step": 2244 }, { "epoch": 0.30351680665168235, "grad_norm": 0.1461852341890335, "learning_rate": 2.5554447101454597e-05, "loss": 0.052052974700927734, "step": 2245 }, { "epoch": 0.3036520034475183, "grad_norm": 0.10520715266466141, "learning_rate": 2.554958257965944e-05, "loss": 0.06604957580566406, "step": 2246 }, { "epoch": 0.3037872002433542, "grad_norm": 0.11993642151355743, "learning_rate": 2.554471586138493e-05, "loss": 0.06366157531738281, "step": 2247 }, { "epoch": 0.30392239703919016, "grad_norm": 0.08100903034210205, "learning_rate": 2.5539846947644342e-05, "loss": 0.05422258377075195, "step": 2248 }, { "epoch": 0.3040575938350261, "grad_norm": 0.3330111801624298, "learning_rate": 2.5534975839451416e-05, "loss": 0.07745647430419922, "step": 2249 }, { "epoch": 0.3041927906308621, "grad_norm": 0.18529288470745087, "learning_rate": 2.5530102537820348e-05, "loss": 0.07784557342529297, "step": 2250 }, { "epoch": 0.304327987426698, "grad_norm": 0.11477598547935486, "learning_rate": 2.5525227043765774e-05, "loss": 0.10215377807617188, "step": 2251 }, { "epoch": 0.30446318422253393, "grad_norm": 0.10381592810153961, "learning_rate": 2.55203493583028e-05, "loss": 0.0888218879699707, "step": 2252 }, { "epoch": 0.3045983810183699, "grad_norm": 0.23445124924182892, "learning_rate": 2.551546948244699e-05, "loss": 0.11539363861083984, "step": 2253 }, { "epoch": 0.3047335778142058, "grad_norm": 0.18332181870937347, "learning_rate": 2.551058741721436e-05, "loss": 0.0734856128692627, "step": 2254 }, { "epoch": 0.30486877461004175, "grad_norm": 0.16522622108459473, "learning_rate": 2.550570316362138e-05, "loss": 0.10046911239624023, "step": 2255 }, { "epoch": 0.3050039714058777, "grad_norm": 0.09568028151988983, "learning_rate": 2.5500816722684975e-05, "loss": 0.07169008255004883, "step": 2256 }, { "epoch": 0.3051391682017136, "grad_norm": 0.051216576248407364, "learning_rate": 2.549592809542253e-05, "loss": 0.05153989791870117, "step": 2257 }, { "epoch": 0.30527436499754956, "grad_norm": 0.10579536110162735, "learning_rate": 2.549103728285189e-05, "loss": 0.08163261413574219, "step": 2258 }, { "epoch": 0.3054095617933855, "grad_norm": 0.0329514779150486, "learning_rate": 2.548614428599134e-05, "loss": 0.03496980667114258, "step": 2259 }, { "epoch": 0.3055447585892214, "grad_norm": 0.10999783873558044, "learning_rate": 2.5481249105859633e-05, "loss": 0.05714726448059082, "step": 2260 }, { "epoch": 0.3056799553850574, "grad_norm": 0.03867647051811218, "learning_rate": 2.5476351743475964e-05, "loss": 0.041094183921813965, "step": 2261 }, { "epoch": 0.30581515218089333, "grad_norm": 0.33016112446784973, "learning_rate": 2.547145219986e-05, "loss": 0.10236454010009766, "step": 2262 }, { "epoch": 0.30595034897672924, "grad_norm": 0.11598561704158783, "learning_rate": 2.5466550476031846e-05, "loss": 0.051011085510253906, "step": 2263 }, { "epoch": 0.3060855457725652, "grad_norm": 0.22420819103717804, "learning_rate": 2.5461646573012072e-05, "loss": 0.07788515090942383, "step": 2264 }, { "epoch": 0.30622074256840115, "grad_norm": 0.1614769548177719, "learning_rate": 2.5456740491821687e-05, "loss": 0.0509333610534668, "step": 2265 }, { "epoch": 0.30635593936423705, "grad_norm": 0.04684172198176384, "learning_rate": 2.5451832233482172e-05, "loss": 0.033049583435058594, "step": 2266 }, { "epoch": 0.306491136160073, "grad_norm": 0.21184338629245758, "learning_rate": 2.544692179901545e-05, "loss": 0.07379531860351562, "step": 2267 }, { "epoch": 0.30662633295590896, "grad_norm": 0.09742343425750732, "learning_rate": 2.5442009189443902e-05, "loss": 0.04856705665588379, "step": 2268 }, { "epoch": 0.30676152975174487, "grad_norm": 0.06952012330293655, "learning_rate": 2.5437094405790355e-05, "loss": 0.059391021728515625, "step": 2269 }, { "epoch": 0.3068967265475808, "grad_norm": 0.11396628618240356, "learning_rate": 2.5432177449078096e-05, "loss": 0.09135818481445312, "step": 2270 }, { "epoch": 0.3070319233434168, "grad_norm": 0.030981941148638725, "learning_rate": 2.5427258320330857e-05, "loss": 0.029935359954833984, "step": 2271 }, { "epoch": 0.3071671201392527, "grad_norm": 0.05656053125858307, "learning_rate": 2.5422337020572835e-05, "loss": 0.0642547607421875, "step": 2272 }, { "epoch": 0.30730231693508864, "grad_norm": 0.08299758285284042, "learning_rate": 2.5417413550828667e-05, "loss": 0.06255435943603516, "step": 2273 }, { "epoch": 0.3074375137309246, "grad_norm": 0.12040698528289795, "learning_rate": 2.5412487912123444e-05, "loss": 0.10099029541015625, "step": 2274 }, { "epoch": 0.3075727105267605, "grad_norm": 0.06496715545654297, "learning_rate": 2.5407560105482708e-05, "loss": 0.04317343235015869, "step": 2275 }, { "epoch": 0.30770790732259645, "grad_norm": 0.0502430722117424, "learning_rate": 2.540263013193246e-05, "loss": 0.047867536544799805, "step": 2276 }, { "epoch": 0.3078431041184324, "grad_norm": 0.1314105987548828, "learning_rate": 2.539769799249915e-05, "loss": 0.07472860813140869, "step": 2277 }, { "epoch": 0.3079783009142683, "grad_norm": 0.0870327427983284, "learning_rate": 2.5392763688209666e-05, "loss": 0.08386802673339844, "step": 2278 }, { "epoch": 0.30811349771010427, "grad_norm": 0.08614157140254974, "learning_rate": 2.5387827220091362e-05, "loss": 0.07688188552856445, "step": 2279 }, { "epoch": 0.3082486945059402, "grad_norm": 0.1127915009856224, "learning_rate": 2.538288858917204e-05, "loss": 0.10305452346801758, "step": 2280 }, { "epoch": 0.3083838913017761, "grad_norm": 0.11062409728765488, "learning_rate": 2.5377947796479936e-05, "loss": 0.08144855499267578, "step": 2281 }, { "epoch": 0.3085190880976121, "grad_norm": 0.05871828645467758, "learning_rate": 2.537300484304377e-05, "loss": 0.05708503723144531, "step": 2282 }, { "epoch": 0.30865428489344804, "grad_norm": 0.0803714320063591, "learning_rate": 2.536805972989267e-05, "loss": 0.06974267959594727, "step": 2283 }, { "epoch": 0.30878948168928394, "grad_norm": 0.08624116331338882, "learning_rate": 2.5363112458056252e-05, "loss": 0.04434061050415039, "step": 2284 }, { "epoch": 0.3089246784851199, "grad_norm": 0.08888224512338638, "learning_rate": 2.5358163028564552e-05, "loss": 0.045476555824279785, "step": 2285 }, { "epoch": 0.30905987528095585, "grad_norm": 0.09790392220020294, "learning_rate": 2.535321144244808e-05, "loss": 0.07386910915374756, "step": 2286 }, { "epoch": 0.30919507207679175, "grad_norm": 0.0618215836584568, "learning_rate": 2.534825770073777e-05, "loss": 0.09366035461425781, "step": 2287 }, { "epoch": 0.3093302688726277, "grad_norm": 0.2710959017276764, "learning_rate": 2.5343301804465026e-05, "loss": 0.07016468048095703, "step": 2288 }, { "epoch": 0.30946546566846367, "grad_norm": 0.09551559388637543, "learning_rate": 2.533834375466169e-05, "loss": 0.08010244369506836, "step": 2289 }, { "epoch": 0.30960066246429957, "grad_norm": 0.06421948969364166, "learning_rate": 2.533338355236005e-05, "loss": 0.06645393371582031, "step": 2290 }, { "epoch": 0.3097358592601355, "grad_norm": 0.03430100530385971, "learning_rate": 2.532842119859285e-05, "loss": 0.03856325149536133, "step": 2291 }, { "epoch": 0.3098710560559715, "grad_norm": 0.15019206702709198, "learning_rate": 2.532345669439328e-05, "loss": 0.07943582534790039, "step": 2292 }, { "epoch": 0.3100062528518074, "grad_norm": 0.08374845236539841, "learning_rate": 2.5318490040794975e-05, "loss": 0.04987788200378418, "step": 2293 }, { "epoch": 0.31014144964764334, "grad_norm": 0.14791464805603027, "learning_rate": 2.531352123883202e-05, "loss": 0.07583856582641602, "step": 2294 }, { "epoch": 0.3102766464434793, "grad_norm": 0.12051131576299667, "learning_rate": 2.530855028953894e-05, "loss": 0.10768318176269531, "step": 2295 }, { "epoch": 0.31041184323931525, "grad_norm": 0.06165115162730217, "learning_rate": 2.5303577193950724e-05, "loss": 0.04842996597290039, "step": 2296 }, { "epoch": 0.31054704003515116, "grad_norm": 0.04354666918516159, "learning_rate": 2.5298601953102785e-05, "loss": 0.041533470153808594, "step": 2297 }, { "epoch": 0.3106822368309871, "grad_norm": 0.05833108723163605, "learning_rate": 2.5293624568031008e-05, "loss": 0.04864239692687988, "step": 2298 }, { "epoch": 0.31081743362682307, "grad_norm": 0.31188562512397766, "learning_rate": 2.5288645039771697e-05, "loss": 0.11146879196166992, "step": 2299 }, { "epoch": 0.31095263042265897, "grad_norm": 0.040503934025764465, "learning_rate": 2.5283663369361624e-05, "loss": 0.04791879653930664, "step": 2300 }, { "epoch": 0.31108782721849493, "grad_norm": 0.05054056644439697, "learning_rate": 2.5278679557837998e-05, "loss": 0.047451019287109375, "step": 2301 }, { "epoch": 0.3112230240143309, "grad_norm": 0.09091488271951675, "learning_rate": 2.5273693606238474e-05, "loss": 0.08942079544067383, "step": 2302 }, { "epoch": 0.3113582208101668, "grad_norm": 0.06451370567083359, "learning_rate": 2.5268705515601164e-05, "loss": 0.048095703125, "step": 2303 }, { "epoch": 0.31149341760600274, "grad_norm": 0.19290512800216675, "learning_rate": 2.5263715286964596e-05, "loss": 0.1023675799369812, "step": 2304 }, { "epoch": 0.3116286144018387, "grad_norm": 0.062214192003011703, "learning_rate": 2.525872292136778e-05, "loss": 0.04499626159667969, "step": 2305 }, { "epoch": 0.3117638111976746, "grad_norm": 0.11830825358629227, "learning_rate": 2.525372841985014e-05, "loss": 0.07111459970474243, "step": 2306 }, { "epoch": 0.31189900799351056, "grad_norm": 0.11798103898763657, "learning_rate": 2.5248731783451567e-05, "loss": 0.0844111442565918, "step": 2307 }, { "epoch": 0.3120342047893465, "grad_norm": 0.0817176103591919, "learning_rate": 2.524373301321238e-05, "loss": 0.06750679016113281, "step": 2308 }, { "epoch": 0.3121694015851824, "grad_norm": 0.18090811371803284, "learning_rate": 2.5238732110173356e-05, "loss": 0.0880126953125, "step": 2309 }, { "epoch": 0.3123045983810184, "grad_norm": 0.12871138751506805, "learning_rate": 2.5233729075375708e-05, "loss": 0.05259895324707031, "step": 2310 }, { "epoch": 0.31243979517685433, "grad_norm": 0.10063536465167999, "learning_rate": 2.522872390986109e-05, "loss": 0.05362749099731445, "step": 2311 }, { "epoch": 0.31257499197269023, "grad_norm": 0.12158272415399551, "learning_rate": 2.522371661467161e-05, "loss": 0.07718062400817871, "step": 2312 }, { "epoch": 0.3127101887685262, "grad_norm": 0.06021657586097717, "learning_rate": 2.521870719084981e-05, "loss": 0.04691481590270996, "step": 2313 }, { "epoch": 0.31284538556436214, "grad_norm": 0.1747627556324005, "learning_rate": 2.5213695639438686e-05, "loss": 0.08052420616149902, "step": 2314 }, { "epoch": 0.31298058236019805, "grad_norm": 0.1785389631986618, "learning_rate": 2.5208681961481657e-05, "loss": 0.06527280807495117, "step": 2315 }, { "epoch": 0.313115779156034, "grad_norm": 0.17502829432487488, "learning_rate": 2.5203666158022607e-05, "loss": 0.07204568386077881, "step": 2316 }, { "epoch": 0.31325097595186996, "grad_norm": 0.08006101846694946, "learning_rate": 2.519864823010585e-05, "loss": 0.08469772338867188, "step": 2317 }, { "epoch": 0.31338617274770586, "grad_norm": 0.05874093621969223, "learning_rate": 2.5193628178776148e-05, "loss": 0.049321889877319336, "step": 2318 }, { "epoch": 0.3135213695435418, "grad_norm": 0.10377375036478043, "learning_rate": 2.5188606005078695e-05, "loss": 0.06112253665924072, "step": 2319 }, { "epoch": 0.3136565663393778, "grad_norm": 0.21512280404567719, "learning_rate": 2.518358171005914e-05, "loss": 0.09794676303863525, "step": 2320 }, { "epoch": 0.3137917631352137, "grad_norm": 0.11540849506855011, "learning_rate": 2.517855529476357e-05, "loss": 0.05525970458984375, "step": 2321 }, { "epoch": 0.31392695993104963, "grad_norm": 0.23029382526874542, "learning_rate": 2.517352676023851e-05, "loss": 0.07313638925552368, "step": 2322 }, { "epoch": 0.3140621567268856, "grad_norm": 0.08203113079071045, "learning_rate": 2.5168496107530925e-05, "loss": 0.09019255638122559, "step": 2323 }, { "epoch": 0.3141973535227215, "grad_norm": 0.1450749784708023, "learning_rate": 2.5163463337688224e-05, "loss": 0.069061279296875, "step": 2324 }, { "epoch": 0.31433255031855745, "grad_norm": 0.0744481086730957, "learning_rate": 2.515842845175826e-05, "loss": 0.052683472633361816, "step": 2325 }, { "epoch": 0.3144677471143934, "grad_norm": 0.14945203065872192, "learning_rate": 2.5153391450789326e-05, "loss": 0.08378362655639648, "step": 2326 }, { "epoch": 0.3146029439102293, "grad_norm": 0.06944262236356735, "learning_rate": 2.514835233583014e-05, "loss": 0.07415914535522461, "step": 2327 }, { "epoch": 0.31473814070606526, "grad_norm": 0.04126359522342682, "learning_rate": 2.514331110792988e-05, "loss": 0.05480104684829712, "step": 2328 }, { "epoch": 0.3148733375019012, "grad_norm": 0.09607455879449844, "learning_rate": 2.513826776813816e-05, "loss": 0.06066393852233887, "step": 2329 }, { "epoch": 0.3150085342977371, "grad_norm": 0.16600649058818817, "learning_rate": 2.5133222317505024e-05, "loss": 0.0763096809387207, "step": 2330 }, { "epoch": 0.3151437310935731, "grad_norm": 0.08547329902648926, "learning_rate": 2.5128174757080965e-05, "loss": 0.043604135513305664, "step": 2331 }, { "epoch": 0.31527892788940903, "grad_norm": 0.0659867525100708, "learning_rate": 2.5123125087916916e-05, "loss": 0.08096981048583984, "step": 2332 }, { "epoch": 0.31541412468524493, "grad_norm": 0.13382531702518463, "learning_rate": 2.5118073311064236e-05, "loss": 0.07327628135681152, "step": 2333 }, { "epoch": 0.3155493214810809, "grad_norm": 0.14889389276504517, "learning_rate": 2.5113019427574734e-05, "loss": 0.08733415603637695, "step": 2334 }, { "epoch": 0.31568451827691685, "grad_norm": 0.09740637242794037, "learning_rate": 2.5107963438500666e-05, "loss": 0.06246376037597656, "step": 2335 }, { "epoch": 0.31581971507275275, "grad_norm": 0.16206662356853485, "learning_rate": 2.51029053448947e-05, "loss": 0.07365751266479492, "step": 2336 }, { "epoch": 0.3159549118685887, "grad_norm": 0.07863498479127884, "learning_rate": 2.509784514780997e-05, "loss": 0.04028058052062988, "step": 2337 }, { "epoch": 0.31609010866442466, "grad_norm": 0.10697831958532333, "learning_rate": 2.5092782848300033e-05, "loss": 0.06625747680664062, "step": 2338 }, { "epoch": 0.3162253054602606, "grad_norm": 0.11231585592031479, "learning_rate": 2.5087718447418886e-05, "loss": 0.08422279357910156, "step": 2339 }, { "epoch": 0.3163605022560965, "grad_norm": 0.058244891464710236, "learning_rate": 2.5082651946220958e-05, "loss": 0.05232381820678711, "step": 2340 }, { "epoch": 0.3164956990519325, "grad_norm": 0.075192391872406, "learning_rate": 2.507758334576113e-05, "loss": 0.05902707576751709, "step": 2341 }, { "epoch": 0.31663089584776843, "grad_norm": 0.05973155051469803, "learning_rate": 2.5072512647094713e-05, "loss": 0.06212282180786133, "step": 2342 }, { "epoch": 0.31676609264360434, "grad_norm": 0.1123841404914856, "learning_rate": 2.506743985127745e-05, "loss": 0.06532955169677734, "step": 2343 }, { "epoch": 0.3169012894394403, "grad_norm": 0.0680035874247551, "learning_rate": 2.506236495936552e-05, "loss": 0.06137585639953613, "step": 2344 }, { "epoch": 0.31703648623527625, "grad_norm": 0.16798603534698486, "learning_rate": 2.5057287972415547e-05, "loss": 0.062421560287475586, "step": 2345 }, { "epoch": 0.31717168303111215, "grad_norm": 0.095027394592762, "learning_rate": 2.5052208891484588e-05, "loss": 0.054033756256103516, "step": 2346 }, { "epoch": 0.3173068798269481, "grad_norm": 0.07161493599414825, "learning_rate": 2.504712771763013e-05, "loss": 0.04774975776672363, "step": 2347 }, { "epoch": 0.31744207662278406, "grad_norm": 0.10778544098138809, "learning_rate": 2.5042044451910108e-05, "loss": 0.07732677459716797, "step": 2348 }, { "epoch": 0.31757727341861997, "grad_norm": 0.083549365401268, "learning_rate": 2.5036959095382875e-05, "loss": 0.049724578857421875, "step": 2349 }, { "epoch": 0.3177124702144559, "grad_norm": 0.0668855607509613, "learning_rate": 2.5031871649107233e-05, "loss": 0.04806923866271973, "step": 2350 }, { "epoch": 0.3178476670102919, "grad_norm": 0.07492992281913757, "learning_rate": 2.5026782114142426e-05, "loss": 0.05648326873779297, "step": 2351 }, { "epoch": 0.3179828638061278, "grad_norm": 0.07489626854658127, "learning_rate": 2.5021690491548107e-05, "loss": 0.0668635368347168, "step": 2352 }, { "epoch": 0.31811806060196374, "grad_norm": 0.03294162079691887, "learning_rate": 2.5016596782384387e-05, "loss": 0.035373568534851074, "step": 2353 }, { "epoch": 0.3182532573977997, "grad_norm": 0.12333255261182785, "learning_rate": 2.5011500987711804e-05, "loss": 0.06703853607177734, "step": 2354 }, { "epoch": 0.3183884541936356, "grad_norm": 0.07464449852705002, "learning_rate": 2.5006403108591325e-05, "loss": 0.04925203323364258, "step": 2355 }, { "epoch": 0.31852365098947155, "grad_norm": 0.040240079164505005, "learning_rate": 2.500130314608436e-05, "loss": 0.040866851806640625, "step": 2356 }, { "epoch": 0.3186588477853075, "grad_norm": 0.09679000824689865, "learning_rate": 2.4996201101252742e-05, "loss": 0.10651779174804688, "step": 2357 }, { "epoch": 0.3187940445811434, "grad_norm": 0.10267443209886551, "learning_rate": 2.4991096975158757e-05, "loss": 0.1043238639831543, "step": 2358 }, { "epoch": 0.31892924137697937, "grad_norm": 0.11303116381168365, "learning_rate": 2.4985990768865095e-05, "loss": 0.10076355934143066, "step": 2359 }, { "epoch": 0.3190644381728153, "grad_norm": 0.0692182406783104, "learning_rate": 2.4980882483434904e-05, "loss": 0.06462359428405762, "step": 2360 }, { "epoch": 0.3191996349686512, "grad_norm": 0.0846063494682312, "learning_rate": 2.497577211993176e-05, "loss": 0.04369497299194336, "step": 2361 }, { "epoch": 0.3193348317644872, "grad_norm": 0.022632962092757225, "learning_rate": 2.4970659679419658e-05, "loss": 0.025502681732177734, "step": 2362 }, { "epoch": 0.31947002856032314, "grad_norm": 0.11357161402702332, "learning_rate": 2.496554516296304e-05, "loss": 0.08234643936157227, "step": 2363 }, { "epoch": 0.31960522535615904, "grad_norm": 0.06699955463409424, "learning_rate": 2.4960428571626784e-05, "loss": 0.04887890815734863, "step": 2364 }, { "epoch": 0.319740422151995, "grad_norm": 0.10973644256591797, "learning_rate": 2.4955309906476177e-05, "loss": 0.08873987197875977, "step": 2365 }, { "epoch": 0.31987561894783095, "grad_norm": 0.16547797620296478, "learning_rate": 2.495018916857696e-05, "loss": 0.06520485877990723, "step": 2366 }, { "epoch": 0.32001081574366685, "grad_norm": 0.10857252776622772, "learning_rate": 2.4945066358995304e-05, "loss": 0.045226216316223145, "step": 2367 }, { "epoch": 0.3201460125395028, "grad_norm": 0.2102724313735962, "learning_rate": 2.493994147879779e-05, "loss": 0.07881355285644531, "step": 2368 }, { "epoch": 0.32028120933533877, "grad_norm": 0.09104569256305695, "learning_rate": 2.4934814529051458e-05, "loss": 0.05008077621459961, "step": 2369 }, { "epoch": 0.32041640613117467, "grad_norm": 0.06916666030883789, "learning_rate": 2.4929685510823763e-05, "loss": 0.06512880325317383, "step": 2370 }, { "epoch": 0.3205516029270106, "grad_norm": 0.08778955787420273, "learning_rate": 2.492455442518259e-05, "loss": 0.08102905750274658, "step": 2371 }, { "epoch": 0.3206867997228466, "grad_norm": 0.09968572854995728, "learning_rate": 2.4919421273196262e-05, "loss": 0.07334661483764648, "step": 2372 }, { "epoch": 0.3208219965186825, "grad_norm": 0.07584325224161148, "learning_rate": 2.4914286055933527e-05, "loss": 0.03999125957489014, "step": 2373 }, { "epoch": 0.32095719331451844, "grad_norm": 0.07697533816099167, "learning_rate": 2.4909148774463572e-05, "loss": 0.061720848083496094, "step": 2374 }, { "epoch": 0.3210923901103544, "grad_norm": 0.06482286006212234, "learning_rate": 2.4904009429855992e-05, "loss": 0.049068570137023926, "step": 2375 }, { "epoch": 0.3212275869061903, "grad_norm": 0.12293051183223724, "learning_rate": 2.4898868023180844e-05, "loss": 0.040645599365234375, "step": 2376 }, { "epoch": 0.32136278370202626, "grad_norm": 0.03160116448998451, "learning_rate": 2.4893724555508575e-05, "loss": 0.03198385238647461, "step": 2377 }, { "epoch": 0.3214979804978622, "grad_norm": 0.1189572736620903, "learning_rate": 2.4888579027910105e-05, "loss": 0.052210330963134766, "step": 2378 }, { "epoch": 0.3216331772936981, "grad_norm": 0.057750605046749115, "learning_rate": 2.4883431441456738e-05, "loss": 0.05643749237060547, "step": 2379 }, { "epoch": 0.32176837408953407, "grad_norm": 0.08462224155664444, "learning_rate": 2.4878281797220244e-05, "loss": 0.08265113830566406, "step": 2380 }, { "epoch": 0.32190357088537, "grad_norm": 0.1276833564043045, "learning_rate": 2.4873130096272805e-05, "loss": 0.061980247497558594, "step": 2381 }, { "epoch": 0.32203876768120593, "grad_norm": 0.2143513560295105, "learning_rate": 2.4867976339687026e-05, "loss": 0.08031105995178223, "step": 2382 }, { "epoch": 0.3221739644770419, "grad_norm": 0.03482074290513992, "learning_rate": 2.4862820528535955e-05, "loss": 0.04183077812194824, "step": 2383 }, { "epoch": 0.32230916127287784, "grad_norm": 0.07347599416971207, "learning_rate": 2.4857662663893054e-05, "loss": 0.09386968612670898, "step": 2384 }, { "epoch": 0.3224443580687138, "grad_norm": 0.09324394911527634, "learning_rate": 2.485250274683222e-05, "loss": 0.06085968017578125, "step": 2385 }, { "epoch": 0.3225795548645497, "grad_norm": 0.05152599513530731, "learning_rate": 2.4847340778427772e-05, "loss": 0.05889081954956055, "step": 2386 }, { "epoch": 0.32271475166038566, "grad_norm": 0.0555427223443985, "learning_rate": 2.484217675975446e-05, "loss": 0.039900779724121094, "step": 2387 }, { "epoch": 0.3228499484562216, "grad_norm": 0.07489274442195892, "learning_rate": 2.4837010691887466e-05, "loss": 0.05003166198730469, "step": 2388 }, { "epoch": 0.3229851452520575, "grad_norm": 0.06637747585773468, "learning_rate": 2.4831842575902383e-05, "loss": 0.08872222900390625, "step": 2389 }, { "epoch": 0.32312034204789347, "grad_norm": 0.05388886108994484, "learning_rate": 2.482667241287525e-05, "loss": 0.05496954917907715, "step": 2390 }, { "epoch": 0.32325553884372943, "grad_norm": 0.059940699487924576, "learning_rate": 2.4821500203882517e-05, "loss": 0.0766594409942627, "step": 2391 }, { "epoch": 0.32339073563956533, "grad_norm": 0.11315230280160904, "learning_rate": 2.4816325950001067e-05, "loss": 0.1268310546875, "step": 2392 }, { "epoch": 0.3235259324354013, "grad_norm": 0.09025148302316666, "learning_rate": 2.4811149652308205e-05, "loss": 0.06718683242797852, "step": 2393 }, { "epoch": 0.32366112923123724, "grad_norm": 0.1144208163022995, "learning_rate": 2.480597131188167e-05, "loss": 0.08830738067626953, "step": 2394 }, { "epoch": 0.32379632602707314, "grad_norm": 0.10639499872922897, "learning_rate": 2.4800790929799614e-05, "loss": 0.07596683502197266, "step": 2395 }, { "epoch": 0.3239315228229091, "grad_norm": 0.05590733513236046, "learning_rate": 2.4795608507140623e-05, "loss": 0.03989553451538086, "step": 2396 }, { "epoch": 0.32406671961874506, "grad_norm": 0.178768128156662, "learning_rate": 2.4790424044983705e-05, "loss": 0.0841226577758789, "step": 2397 }, { "epoch": 0.32420191641458096, "grad_norm": 0.09750822186470032, "learning_rate": 2.4785237544408288e-05, "loss": 0.08683586120605469, "step": 2398 }, { "epoch": 0.3243371132104169, "grad_norm": 0.2906476557254791, "learning_rate": 2.478004900649424e-05, "loss": 0.07411885261535645, "step": 2399 }, { "epoch": 0.3244723100062529, "grad_norm": 0.1681055724620819, "learning_rate": 2.477485843232183e-05, "loss": 0.05577874183654785, "step": 2400 }, { "epoch": 0.3246075068020888, "grad_norm": 0.055169619619846344, "learning_rate": 2.476966582297177e-05, "loss": 0.045427799224853516, "step": 2401 }, { "epoch": 0.32474270359792473, "grad_norm": 0.09917508810758591, "learning_rate": 2.4764471179525188e-05, "loss": 0.06054329872131348, "step": 2402 }, { "epoch": 0.3248779003937607, "grad_norm": 0.17921628057956696, "learning_rate": 2.4759274503063632e-05, "loss": 0.12224006652832031, "step": 2403 }, { "epoch": 0.3250130971895966, "grad_norm": 0.10945697873830795, "learning_rate": 2.4754075794669088e-05, "loss": 0.06194591522216797, "step": 2404 }, { "epoch": 0.32514829398543255, "grad_norm": 0.14536145329475403, "learning_rate": 2.4748875055423942e-05, "loss": 0.09374499320983887, "step": 2405 }, { "epoch": 0.3252834907812685, "grad_norm": 0.17457795143127441, "learning_rate": 2.4743672286411027e-05, "loss": 0.055704593658447266, "step": 2406 }, { "epoch": 0.3254186875771044, "grad_norm": 0.07643047720193863, "learning_rate": 2.4738467488713582e-05, "loss": 0.03910708427429199, "step": 2407 }, { "epoch": 0.32555388437294036, "grad_norm": 0.07738658785820007, "learning_rate": 2.473326066341527e-05, "loss": 0.06092846393585205, "step": 2408 }, { "epoch": 0.3256890811687763, "grad_norm": 0.19982022047042847, "learning_rate": 2.4728051811600184e-05, "loss": 0.10982227325439453, "step": 2409 }, { "epoch": 0.3258242779646122, "grad_norm": 0.09709753096103668, "learning_rate": 2.4722840934352838e-05, "loss": 0.08084249496459961, "step": 2410 }, { "epoch": 0.3259594747604482, "grad_norm": 0.1849898248910904, "learning_rate": 2.471762803275816e-05, "loss": 0.08124637603759766, "step": 2411 }, { "epoch": 0.32609467155628413, "grad_norm": 0.35742610692977905, "learning_rate": 2.4712413107901504e-05, "loss": 0.08498001098632812, "step": 2412 }, { "epoch": 0.32622986835212003, "grad_norm": 0.15750548243522644, "learning_rate": 2.470719616086865e-05, "loss": 0.08361291885375977, "step": 2413 }, { "epoch": 0.326365065147956, "grad_norm": 0.0812111347913742, "learning_rate": 2.4701977192745785e-05, "loss": 0.0506134033203125, "step": 2414 }, { "epoch": 0.32650026194379195, "grad_norm": 0.08652845025062561, "learning_rate": 2.4696756204619535e-05, "loss": 0.06915473937988281, "step": 2415 }, { "epoch": 0.32663545873962785, "grad_norm": 0.0650532990694046, "learning_rate": 2.469153319757693e-05, "loss": 0.05527293682098389, "step": 2416 }, { "epoch": 0.3267706555354638, "grad_norm": 0.06082440912723541, "learning_rate": 2.4686308172705433e-05, "loss": 0.06781196594238281, "step": 2417 }, { "epoch": 0.32690585233129976, "grad_norm": 0.10656819492578506, "learning_rate": 2.4681081131092926e-05, "loss": 0.044648170471191406, "step": 2418 }, { "epoch": 0.32704104912713566, "grad_norm": 0.07517208158969879, "learning_rate": 2.467585207382769e-05, "loss": 0.07143878936767578, "step": 2419 }, { "epoch": 0.3271762459229716, "grad_norm": 0.07864128798246384, "learning_rate": 2.4670621001998467e-05, "loss": 0.0643090009689331, "step": 2420 }, { "epoch": 0.3273114427188076, "grad_norm": 0.09423500299453735, "learning_rate": 2.466538791669437e-05, "loss": 0.050605058670043945, "step": 2421 }, { "epoch": 0.3274466395146435, "grad_norm": 0.08596308529376984, "learning_rate": 2.4660152819004973e-05, "loss": 0.06726741790771484, "step": 2422 }, { "epoch": 0.32758183631047944, "grad_norm": 0.08023789525032043, "learning_rate": 2.4654915710020246e-05, "loss": 0.0673479437828064, "step": 2423 }, { "epoch": 0.3277170331063154, "grad_norm": 0.06163188815116882, "learning_rate": 2.464967659083058e-05, "loss": 0.06528234481811523, "step": 2424 }, { "epoch": 0.3278522299021513, "grad_norm": 0.05535079538822174, "learning_rate": 2.464443546252679e-05, "loss": 0.0523221492767334, "step": 2425 }, { "epoch": 0.32798742669798725, "grad_norm": 0.08049971610307693, "learning_rate": 2.4639192326200104e-05, "loss": 0.07275652885437012, "step": 2426 }, { "epoch": 0.3281226234938232, "grad_norm": 0.2276969701051712, "learning_rate": 2.463394718294218e-05, "loss": 0.07629859447479248, "step": 2427 }, { "epoch": 0.3282578202896591, "grad_norm": 0.11948155611753464, "learning_rate": 2.4628700033845072e-05, "loss": 0.06624412536621094, "step": 2428 }, { "epoch": 0.32839301708549506, "grad_norm": 0.16820286214351654, "learning_rate": 2.4623450880001268e-05, "loss": 0.04695701599121094, "step": 2429 }, { "epoch": 0.328528213881331, "grad_norm": 0.07818910479545593, "learning_rate": 2.4618199722503676e-05, "loss": 0.05118513107299805, "step": 2430 }, { "epoch": 0.328663410677167, "grad_norm": 0.15611349046230316, "learning_rate": 2.4612946562445613e-05, "loss": 0.07812213897705078, "step": 2431 }, { "epoch": 0.3287986074730029, "grad_norm": 0.07964304089546204, "learning_rate": 2.460769140092081e-05, "loss": 0.05789899826049805, "step": 2432 }, { "epoch": 0.32893380426883884, "grad_norm": 0.16050387918949127, "learning_rate": 2.460243423902342e-05, "loss": 0.06040549278259277, "step": 2433 }, { "epoch": 0.3290690010646748, "grad_norm": 0.06264235824346542, "learning_rate": 2.459717507784802e-05, "loss": 0.05280160903930664, "step": 2434 }, { "epoch": 0.3292041978605107, "grad_norm": 0.16657042503356934, "learning_rate": 2.459191391848959e-05, "loss": 0.08240318298339844, "step": 2435 }, { "epoch": 0.32933939465634665, "grad_norm": 0.09569832682609558, "learning_rate": 2.4586650762043538e-05, "loss": 0.06727290153503418, "step": 2436 }, { "epoch": 0.3294745914521826, "grad_norm": 0.07165045291185379, "learning_rate": 2.4581385609605665e-05, "loss": 0.060944557189941406, "step": 2437 }, { "epoch": 0.3296097882480185, "grad_norm": 0.06087822467088699, "learning_rate": 2.4576118462272218e-05, "loss": 0.06873607635498047, "step": 2438 }, { "epoch": 0.32974498504385447, "grad_norm": 0.24831995368003845, "learning_rate": 2.4570849321139836e-05, "loss": 0.09560632705688477, "step": 2439 }, { "epoch": 0.3298801818396904, "grad_norm": 0.10931834578514099, "learning_rate": 2.4565578187305596e-05, "loss": 0.05682849884033203, "step": 2440 }, { "epoch": 0.3300153786355263, "grad_norm": 0.08153682947158813, "learning_rate": 2.456030506186696e-05, "loss": 0.07766056060791016, "step": 2441 }, { "epoch": 0.3301505754313623, "grad_norm": 0.06826280057430267, "learning_rate": 2.4555029945921832e-05, "loss": 0.07746005058288574, "step": 2442 }, { "epoch": 0.33028577222719824, "grad_norm": 0.08454151451587677, "learning_rate": 2.4549752840568516e-05, "loss": 0.07732677459716797, "step": 2443 }, { "epoch": 0.33042096902303414, "grad_norm": 0.12010058760643005, "learning_rate": 2.4544473746905733e-05, "loss": 0.06531429290771484, "step": 2444 }, { "epoch": 0.3305561658188701, "grad_norm": 0.09992405027151108, "learning_rate": 2.4539192666032617e-05, "loss": 0.07818913459777832, "step": 2445 }, { "epoch": 0.33069136261470605, "grad_norm": 0.18263866007328033, "learning_rate": 2.4533909599048718e-05, "loss": 0.090087890625, "step": 2446 }, { "epoch": 0.33082655941054195, "grad_norm": 0.28081822395324707, "learning_rate": 2.4528624547054003e-05, "loss": 0.10488128662109375, "step": 2447 }, { "epoch": 0.3309617562063779, "grad_norm": 0.1193016767501831, "learning_rate": 2.4523337511148843e-05, "loss": 0.05753660202026367, "step": 2448 }, { "epoch": 0.33109695300221387, "grad_norm": 0.1351795643568039, "learning_rate": 2.4518048492434028e-05, "loss": 0.0748896598815918, "step": 2449 }, { "epoch": 0.33123214979804977, "grad_norm": 0.07759327441453934, "learning_rate": 2.4512757492010762e-05, "loss": 0.04728102684020996, "step": 2450 }, { "epoch": 0.3313673465938857, "grad_norm": 0.07855939120054245, "learning_rate": 2.4507464510980652e-05, "loss": 0.037827253341674805, "step": 2451 }, { "epoch": 0.3315025433897217, "grad_norm": 0.04127610847353935, "learning_rate": 2.450216955044574e-05, "loss": 0.05664873123168945, "step": 2452 }, { "epoch": 0.3316377401855576, "grad_norm": 0.11055218428373337, "learning_rate": 2.449687261150845e-05, "loss": 0.07512664794921875, "step": 2453 }, { "epoch": 0.33177293698139354, "grad_norm": 0.06807762384414673, "learning_rate": 2.449157369527164e-05, "loss": 0.03923296928405762, "step": 2454 }, { "epoch": 0.3319081337772295, "grad_norm": 0.07681751996278763, "learning_rate": 2.448627280283857e-05, "loss": 0.05240321159362793, "step": 2455 }, { "epoch": 0.3320433305730654, "grad_norm": 0.053800538182258606, "learning_rate": 2.4480969935312917e-05, "loss": 0.04906940460205078, "step": 2456 }, { "epoch": 0.33217852736890136, "grad_norm": 0.12979096174240112, "learning_rate": 2.4475665093798766e-05, "loss": 0.08299636840820312, "step": 2457 }, { "epoch": 0.3323137241647373, "grad_norm": 0.05039919912815094, "learning_rate": 2.447035827940061e-05, "loss": 0.04596900939941406, "step": 2458 }, { "epoch": 0.3324489209605732, "grad_norm": 0.35557109117507935, "learning_rate": 2.4465049493223356e-05, "loss": 0.09499406814575195, "step": 2459 }, { "epoch": 0.33258411775640917, "grad_norm": 0.058588236570358276, "learning_rate": 2.4459738736372327e-05, "loss": 0.06646537780761719, "step": 2460 }, { "epoch": 0.3327193145522451, "grad_norm": 0.22273430228233337, "learning_rate": 2.4454426009953252e-05, "loss": 0.09099411964416504, "step": 2461 }, { "epoch": 0.33285451134808103, "grad_norm": 0.02887437492609024, "learning_rate": 2.4449111315072254e-05, "loss": 0.029601454734802246, "step": 2462 }, { "epoch": 0.332989708143917, "grad_norm": 0.03981492668390274, "learning_rate": 2.44437946528359e-05, "loss": 0.02615034580230713, "step": 2463 }, { "epoch": 0.33312490493975294, "grad_norm": 0.12644590437412262, "learning_rate": 2.4438476024351138e-05, "loss": 0.049398183822631836, "step": 2464 }, { "epoch": 0.33326010173558884, "grad_norm": 0.19335535168647766, "learning_rate": 2.4433155430725333e-05, "loss": 0.07953166961669922, "step": 2465 }, { "epoch": 0.3333952985314248, "grad_norm": 0.09825469553470612, "learning_rate": 2.4427832873066262e-05, "loss": 0.11213040351867676, "step": 2466 }, { "epoch": 0.33353049532726076, "grad_norm": 0.07603967934846878, "learning_rate": 2.4422508352482113e-05, "loss": 0.06043732166290283, "step": 2467 }, { "epoch": 0.33366569212309666, "grad_norm": 0.039789531379938126, "learning_rate": 2.441718187008148e-05, "loss": 0.037857770919799805, "step": 2468 }, { "epoch": 0.3338008889189326, "grad_norm": 0.07418537139892578, "learning_rate": 2.441185342697336e-05, "loss": 0.043689846992492676, "step": 2469 }, { "epoch": 0.33393608571476857, "grad_norm": 0.09936017543077469, "learning_rate": 2.440652302426717e-05, "loss": 0.08456897735595703, "step": 2470 }, { "epoch": 0.3340712825106045, "grad_norm": 0.0806683599948883, "learning_rate": 2.440119066307272e-05, "loss": 0.06488752365112305, "step": 2471 }, { "epoch": 0.33420647930644043, "grad_norm": 0.08230933547019958, "learning_rate": 2.4395856344500244e-05, "loss": 0.08169841766357422, "step": 2472 }, { "epoch": 0.3343416761022764, "grad_norm": 0.08933542668819427, "learning_rate": 2.4390520069660377e-05, "loss": 0.07372283935546875, "step": 2473 }, { "epoch": 0.3344768728981123, "grad_norm": 0.18069317936897278, "learning_rate": 2.4385181839664146e-05, "loss": 0.07696413993835449, "step": 2474 }, { "epoch": 0.33461206969394824, "grad_norm": 0.041141998022794724, "learning_rate": 2.437984165562301e-05, "loss": 0.05206632614135742, "step": 2475 }, { "epoch": 0.3347472664897842, "grad_norm": 0.10608562082052231, "learning_rate": 2.4374499518648827e-05, "loss": 0.08230805397033691, "step": 2476 }, { "epoch": 0.33488246328562016, "grad_norm": 0.13384272158145905, "learning_rate": 2.436915542985385e-05, "loss": 0.09208157658576965, "step": 2477 }, { "epoch": 0.33501766008145606, "grad_norm": 0.1401708722114563, "learning_rate": 2.436380939035075e-05, "loss": 0.0727243423461914, "step": 2478 }, { "epoch": 0.335152856877292, "grad_norm": 0.0704093724489212, "learning_rate": 2.43584614012526e-05, "loss": 0.05974757671356201, "step": 2479 }, { "epoch": 0.335288053673128, "grad_norm": 0.08460766077041626, "learning_rate": 2.4353111463672882e-05, "loss": 0.054102540016174316, "step": 2480 }, { "epoch": 0.3354232504689639, "grad_norm": 0.0492757223546505, "learning_rate": 2.4347759578725482e-05, "loss": 0.040642738342285156, "step": 2481 }, { "epoch": 0.33555844726479983, "grad_norm": 0.07080303132534027, "learning_rate": 2.4342405747524685e-05, "loss": 0.06881427764892578, "step": 2482 }, { "epoch": 0.3356936440606358, "grad_norm": 0.1043875589966774, "learning_rate": 2.4337049971185194e-05, "loss": 0.09662342071533203, "step": 2483 }, { "epoch": 0.3358288408564717, "grad_norm": 0.07296701520681381, "learning_rate": 2.433169225082211e-05, "loss": 0.057660818099975586, "step": 2484 }, { "epoch": 0.33596403765230765, "grad_norm": 0.07778462767601013, "learning_rate": 2.432633258755093e-05, "loss": 0.06607341766357422, "step": 2485 }, { "epoch": 0.3360992344481436, "grad_norm": 0.09076784551143646, "learning_rate": 2.432097098248758e-05, "loss": 0.06475687026977539, "step": 2486 }, { "epoch": 0.3362344312439795, "grad_norm": 0.09413054585456848, "learning_rate": 2.4315607436748362e-05, "loss": 0.053586483001708984, "step": 2487 }, { "epoch": 0.33636962803981546, "grad_norm": 0.08732490241527557, "learning_rate": 2.4310241951449997e-05, "loss": 0.049224853515625, "step": 2488 }, { "epoch": 0.3365048248356514, "grad_norm": 0.09103147685527802, "learning_rate": 2.4304874527709614e-05, "loss": 0.05713611841201782, "step": 2489 }, { "epoch": 0.3366400216314873, "grad_norm": 0.06709442287683487, "learning_rate": 2.429950516664473e-05, "loss": 0.05126011371612549, "step": 2490 }, { "epoch": 0.3367752184273233, "grad_norm": 0.12592142820358276, "learning_rate": 2.4294133869373284e-05, "loss": 0.05962419509887695, "step": 2491 }, { "epoch": 0.33691041522315923, "grad_norm": 0.055713240057229996, "learning_rate": 2.42887606370136e-05, "loss": 0.03740692138671875, "step": 2492 }, { "epoch": 0.33704561201899513, "grad_norm": 0.10131179541349411, "learning_rate": 2.428338547068442e-05, "loss": 0.057768821716308594, "step": 2493 }, { "epoch": 0.3371808088148311, "grad_norm": 0.0491848848760128, "learning_rate": 2.427800837150488e-05, "loss": 0.03424358367919922, "step": 2494 }, { "epoch": 0.33731600561066705, "grad_norm": 0.09536954760551453, "learning_rate": 2.4272629340594518e-05, "loss": 0.07404160499572754, "step": 2495 }, { "epoch": 0.33745120240650295, "grad_norm": 0.05238153040409088, "learning_rate": 2.426724837907328e-05, "loss": 0.06235027313232422, "step": 2496 }, { "epoch": 0.3375863992023389, "grad_norm": 0.037997812032699585, "learning_rate": 2.4261865488061512e-05, "loss": 0.03674960136413574, "step": 2497 }, { "epoch": 0.33772159599817486, "grad_norm": 0.08411721885204315, "learning_rate": 2.4256480668679958e-05, "loss": 0.031391799449920654, "step": 2498 }, { "epoch": 0.33785679279401076, "grad_norm": 0.06932103633880615, "learning_rate": 2.4251093922049766e-05, "loss": 0.052609920501708984, "step": 2499 }, { "epoch": 0.3379919895898467, "grad_norm": 0.08212162554264069, "learning_rate": 2.4245705249292494e-05, "loss": 0.06226414442062378, "step": 2500 }, { "epoch": 0.3381271863856827, "grad_norm": 0.17662891745567322, "learning_rate": 2.4240314651530073e-05, "loss": 0.07156133651733398, "step": 2501 }, { "epoch": 0.3382623831815186, "grad_norm": 0.07636614143848419, "learning_rate": 2.4234922129884873e-05, "loss": 0.06734883785247803, "step": 2502 }, { "epoch": 0.33839757997735453, "grad_norm": 0.09068209677934647, "learning_rate": 2.4229527685479644e-05, "loss": 0.058286190032958984, "step": 2503 }, { "epoch": 0.3385327767731905, "grad_norm": 0.10690157860517502, "learning_rate": 2.4224131319437523e-05, "loss": 0.09006214141845703, "step": 2504 }, { "epoch": 0.3386679735690264, "grad_norm": 0.15770073235034943, "learning_rate": 2.421873303288208e-05, "loss": 0.07337117195129395, "step": 2505 }, { "epoch": 0.33880317036486235, "grad_norm": 0.20079083740711212, "learning_rate": 2.4213332826937255e-05, "loss": 0.07252883911132812, "step": 2506 }, { "epoch": 0.3389383671606983, "grad_norm": 0.06770540773868561, "learning_rate": 2.4207930702727404e-05, "loss": 0.06073760986328125, "step": 2507 }, { "epoch": 0.3390735639565342, "grad_norm": 0.23615293204784393, "learning_rate": 2.420252666137728e-05, "loss": 0.09350407123565674, "step": 2508 }, { "epoch": 0.33920876075237016, "grad_norm": 0.08327589184045792, "learning_rate": 2.419712070401203e-05, "loss": 0.06562519073486328, "step": 2509 }, { "epoch": 0.3393439575482061, "grad_norm": 0.072517529129982, "learning_rate": 2.4191712831757203e-05, "loss": 0.06211280822753906, "step": 2510 }, { "epoch": 0.339479154344042, "grad_norm": 0.21241813898086548, "learning_rate": 2.418630304573875e-05, "loss": 0.0859375, "step": 2511 }, { "epoch": 0.339614351139878, "grad_norm": 0.11263559013605118, "learning_rate": 2.418089134708302e-05, "loss": 0.05048847198486328, "step": 2512 }, { "epoch": 0.33974954793571394, "grad_norm": 0.13258202373981476, "learning_rate": 2.4175477736916743e-05, "loss": 0.05666661262512207, "step": 2513 }, { "epoch": 0.33988474473154984, "grad_norm": 0.09894821047782898, "learning_rate": 2.4170062216367082e-05, "loss": 0.059726715087890625, "step": 2514 }, { "epoch": 0.3400199415273858, "grad_norm": 0.1272960901260376, "learning_rate": 2.416464478656156e-05, "loss": 0.07559537887573242, "step": 2515 }, { "epoch": 0.34015513832322175, "grad_norm": 0.2235560417175293, "learning_rate": 2.4159225448628123e-05, "loss": 0.0636606216430664, "step": 2516 }, { "epoch": 0.34029033511905765, "grad_norm": 0.11577052623033524, "learning_rate": 2.4153804203695103e-05, "loss": 0.07288074493408203, "step": 2517 }, { "epoch": 0.3404255319148936, "grad_norm": 0.22524523735046387, "learning_rate": 2.4148381052891236e-05, "loss": 0.08819818496704102, "step": 2518 }, { "epoch": 0.34056072871072957, "grad_norm": 0.2137891799211502, "learning_rate": 2.4142955997345648e-05, "loss": 0.07626008987426758, "step": 2519 }, { "epoch": 0.34069592550656547, "grad_norm": 0.08030815422534943, "learning_rate": 2.4137529038187864e-05, "loss": 0.05479228496551514, "step": 2520 }, { "epoch": 0.3408311223024014, "grad_norm": 0.20790722966194153, "learning_rate": 2.413210017654781e-05, "loss": 0.07788324356079102, "step": 2521 }, { "epoch": 0.3409663190982374, "grad_norm": 0.24542152881622314, "learning_rate": 2.4126669413555802e-05, "loss": 0.0837564468383789, "step": 2522 }, { "epoch": 0.34110151589407334, "grad_norm": 0.12429172545671463, "learning_rate": 2.4121236750342548e-05, "loss": 0.038735151290893555, "step": 2523 }, { "epoch": 0.34123671268990924, "grad_norm": 0.256369411945343, "learning_rate": 2.4115802188039165e-05, "loss": 0.07303023338317871, "step": 2524 }, { "epoch": 0.3413719094857452, "grad_norm": 0.1187695786356926, "learning_rate": 2.4110365727777156e-05, "loss": 0.037609100341796875, "step": 2525 }, { "epoch": 0.34150710628158115, "grad_norm": 0.11538417637348175, "learning_rate": 2.410492737068842e-05, "loss": 0.07685530185699463, "step": 2526 }, { "epoch": 0.34164230307741705, "grad_norm": 0.09548041969537735, "learning_rate": 2.409948711790525e-05, "loss": 0.06582450866699219, "step": 2527 }, { "epoch": 0.341777499873253, "grad_norm": 0.3541737198829651, "learning_rate": 2.4094044970560336e-05, "loss": 0.08763790130615234, "step": 2528 }, { "epoch": 0.34191269666908897, "grad_norm": 0.25250816345214844, "learning_rate": 2.4088600929786767e-05, "loss": 0.07401752471923828, "step": 2529 }, { "epoch": 0.34204789346492487, "grad_norm": 0.05376897752285004, "learning_rate": 2.408315499671802e-05, "loss": 0.041963815689086914, "step": 2530 }, { "epoch": 0.3421830902607608, "grad_norm": 0.05698872357606888, "learning_rate": 2.407770717248796e-05, "loss": 0.06352472305297852, "step": 2531 }, { "epoch": 0.3423182870565968, "grad_norm": 0.2583656907081604, "learning_rate": 2.407225745823086e-05, "loss": 0.07767629623413086, "step": 2532 }, { "epoch": 0.3424534838524327, "grad_norm": 0.11859942227602005, "learning_rate": 2.4066805855081378e-05, "loss": 0.0802459716796875, "step": 2533 }, { "epoch": 0.34258868064826864, "grad_norm": 0.20530535280704498, "learning_rate": 2.406135236417457e-05, "loss": 0.056284546852111816, "step": 2534 }, { "epoch": 0.3427238774441046, "grad_norm": 0.2839643955230713, "learning_rate": 2.4055896986645875e-05, "loss": 0.07913827896118164, "step": 2535 }, { "epoch": 0.3428590742399405, "grad_norm": 0.27108123898506165, "learning_rate": 2.4050439723631136e-05, "loss": 0.07863044738769531, "step": 2536 }, { "epoch": 0.34299427103577645, "grad_norm": 0.14025717973709106, "learning_rate": 2.404498057626659e-05, "loss": 0.07633376121520996, "step": 2537 }, { "epoch": 0.3431294678316124, "grad_norm": 0.21891368925571442, "learning_rate": 2.4039519545688848e-05, "loss": 0.07482755184173584, "step": 2538 }, { "epoch": 0.3432646646274483, "grad_norm": 0.07436855882406235, "learning_rate": 2.4034056633034932e-05, "loss": 0.06477475166320801, "step": 2539 }, { "epoch": 0.34339986142328427, "grad_norm": 0.11428110301494598, "learning_rate": 2.402859183944225e-05, "loss": 0.07420492172241211, "step": 2540 }, { "epoch": 0.3435350582191202, "grad_norm": 0.11608269065618515, "learning_rate": 2.4023125166048597e-05, "loss": 0.07308387756347656, "step": 2541 }, { "epoch": 0.3436702550149561, "grad_norm": 0.3174440860748291, "learning_rate": 2.401765661399218e-05, "loss": 0.08926451206207275, "step": 2542 }, { "epoch": 0.3438054518107921, "grad_norm": 0.13375575840473175, "learning_rate": 2.4012186184411556e-05, "loss": 0.0831151008605957, "step": 2543 }, { "epoch": 0.34394064860662804, "grad_norm": 0.12427858263254166, "learning_rate": 2.400671387844571e-05, "loss": 0.05663442611694336, "step": 2544 }, { "epoch": 0.34407584540246394, "grad_norm": 0.07700292766094208, "learning_rate": 2.4001239697234008e-05, "loss": 0.06965017318725586, "step": 2545 }, { "epoch": 0.3442110421982999, "grad_norm": 0.09709370881319046, "learning_rate": 2.3995763641916205e-05, "loss": 0.038106679916381836, "step": 2546 }, { "epoch": 0.34434623899413586, "grad_norm": 0.184953510761261, "learning_rate": 2.3990285713632436e-05, "loss": 0.0747835636138916, "step": 2547 }, { "epoch": 0.34448143578997176, "grad_norm": 0.1404159814119339, "learning_rate": 2.398480591352324e-05, "loss": 0.0558781623840332, "step": 2548 }, { "epoch": 0.3446166325858077, "grad_norm": 0.0346747562289238, "learning_rate": 2.3979324242729537e-05, "loss": 0.03399914503097534, "step": 2549 }, { "epoch": 0.34475182938164367, "grad_norm": 0.08864232152700424, "learning_rate": 2.3973840702392646e-05, "loss": 0.09303855895996094, "step": 2550 }, { "epoch": 0.34488702617747957, "grad_norm": 0.154584139585495, "learning_rate": 2.3968355293654267e-05, "loss": 0.06567645072937012, "step": 2551 }, { "epoch": 0.34502222297331553, "grad_norm": 0.17533770203590393, "learning_rate": 2.396286801765649e-05, "loss": 0.07946348190307617, "step": 2552 }, { "epoch": 0.3451574197691515, "grad_norm": 0.11433087289333344, "learning_rate": 2.3957378875541795e-05, "loss": 0.06172299385070801, "step": 2553 }, { "epoch": 0.3452926165649874, "grad_norm": 0.08917225152254105, "learning_rate": 2.395188786845305e-05, "loss": 0.05142402648925781, "step": 2554 }, { "epoch": 0.34542781336082334, "grad_norm": 0.11143828928470612, "learning_rate": 2.3946394997533516e-05, "loss": 0.0695028305053711, "step": 2555 }, { "epoch": 0.3455630101566593, "grad_norm": 0.19000935554504395, "learning_rate": 2.3940900263926833e-05, "loss": 0.11290311813354492, "step": 2556 }, { "epoch": 0.3456982069524952, "grad_norm": 0.09674094617366791, "learning_rate": 2.393540366877704e-05, "loss": 0.06610798835754395, "step": 2557 }, { "epoch": 0.34583340374833116, "grad_norm": 0.057298630475997925, "learning_rate": 2.392990521322855e-05, "loss": 0.04901587963104248, "step": 2558 }, { "epoch": 0.3459686005441671, "grad_norm": 0.09472622722387314, "learning_rate": 2.392440489842618e-05, "loss": 0.053554654121398926, "step": 2559 }, { "epoch": 0.346103797340003, "grad_norm": 0.2285137176513672, "learning_rate": 2.3918902725515118e-05, "loss": 0.04955458641052246, "step": 2560 }, { "epoch": 0.346238994135839, "grad_norm": 0.10635851323604584, "learning_rate": 2.391339869564094e-05, "loss": 0.09121417999267578, "step": 2561 }, { "epoch": 0.34637419093167493, "grad_norm": 0.19390016794204712, "learning_rate": 2.3907892809949628e-05, "loss": 0.079193115234375, "step": 2562 }, { "epoch": 0.34650938772751083, "grad_norm": 0.07630050927400589, "learning_rate": 2.390238506958753e-05, "loss": 0.0725393295288086, "step": 2563 }, { "epoch": 0.3466445845233468, "grad_norm": 0.05220980942249298, "learning_rate": 2.3896875475701387e-05, "loss": 0.043163299560546875, "step": 2564 }, { "epoch": 0.34677978131918274, "grad_norm": 0.14900819957256317, "learning_rate": 2.3891364029438323e-05, "loss": 0.06634807586669922, "step": 2565 }, { "epoch": 0.3469149781150187, "grad_norm": 0.30286872386932373, "learning_rate": 2.3885850731945857e-05, "loss": 0.09769010543823242, "step": 2566 }, { "epoch": 0.3470501749108546, "grad_norm": 0.2032707780599594, "learning_rate": 2.3880335584371884e-05, "loss": 0.08674049377441406, "step": 2567 }, { "epoch": 0.34718537170669056, "grad_norm": 0.06512822210788727, "learning_rate": 2.387481858786468e-05, "loss": 0.06437945365905762, "step": 2568 }, { "epoch": 0.3473205685025265, "grad_norm": 0.06642600893974304, "learning_rate": 2.386929974357293e-05, "loss": 0.0353083610534668, "step": 2569 }, { "epoch": 0.3474557652983624, "grad_norm": 0.22962428629398346, "learning_rate": 2.386377905264567e-05, "loss": 0.07955026626586914, "step": 2570 }, { "epoch": 0.3475909620941984, "grad_norm": 0.09827073663473129, "learning_rate": 2.3858256516232346e-05, "loss": 0.054482460021972656, "step": 2571 }, { "epoch": 0.34772615889003433, "grad_norm": 0.17681315541267395, "learning_rate": 2.3852732135482775e-05, "loss": 0.04503798484802246, "step": 2572 }, { "epoch": 0.34786135568587023, "grad_norm": 0.20490522682666779, "learning_rate": 2.3847205911547166e-05, "loss": 0.06481099128723145, "step": 2573 }, { "epoch": 0.3479965524817062, "grad_norm": 0.04753294214606285, "learning_rate": 2.3841677845576108e-05, "loss": 0.04919910430908203, "step": 2574 }, { "epoch": 0.34813174927754215, "grad_norm": 0.07276739925146103, "learning_rate": 2.383614793872057e-05, "loss": 0.042105913162231445, "step": 2575 }, { "epoch": 0.34826694607337805, "grad_norm": 0.05481953173875809, "learning_rate": 2.3830616192131913e-05, "loss": 0.06608033180236816, "step": 2576 }, { "epoch": 0.348402142869214, "grad_norm": 0.04325982928276062, "learning_rate": 2.3825082606961876e-05, "loss": 0.0615384578704834, "step": 2577 }, { "epoch": 0.34853733966504996, "grad_norm": 0.08279349654912949, "learning_rate": 2.3819547184362575e-05, "loss": 0.06657028198242188, "step": 2578 }, { "epoch": 0.34867253646088586, "grad_norm": 0.16935347020626068, "learning_rate": 2.3814009925486522e-05, "loss": 0.1102151870727539, "step": 2579 }, { "epoch": 0.3488077332567218, "grad_norm": 0.07301969826221466, "learning_rate": 2.38084708314866e-05, "loss": 0.04360055923461914, "step": 2580 }, { "epoch": 0.3489429300525578, "grad_norm": 0.2518143951892853, "learning_rate": 2.380292990351608e-05, "loss": 0.07262802124023438, "step": 2581 }, { "epoch": 0.3490781268483937, "grad_norm": 0.2129613310098648, "learning_rate": 2.3797387142728607e-05, "loss": 0.05638575553894043, "step": 2582 }, { "epoch": 0.34921332364422963, "grad_norm": 0.05965558439493179, "learning_rate": 2.379184255027822e-05, "loss": 0.06928062438964844, "step": 2583 }, { "epoch": 0.3493485204400656, "grad_norm": 0.04601344093680382, "learning_rate": 2.378629612731933e-05, "loss": 0.0615992546081543, "step": 2584 }, { "epoch": 0.3494837172359015, "grad_norm": 0.2759677767753601, "learning_rate": 2.3780747875006735e-05, "loss": 0.05584144592285156, "step": 2585 }, { "epoch": 0.34961891403173745, "grad_norm": 0.0999409556388855, "learning_rate": 2.37751977944956e-05, "loss": 0.07807350158691406, "step": 2586 }, { "epoch": 0.3497541108275734, "grad_norm": 0.3165963590145111, "learning_rate": 2.3769645886941497e-05, "loss": 0.09059429168701172, "step": 2587 }, { "epoch": 0.3498893076234093, "grad_norm": 0.1861148327589035, "learning_rate": 2.376409215350035e-05, "loss": 0.06132638454437256, "step": 2588 }, { "epoch": 0.35002450441924526, "grad_norm": 0.10965070128440857, "learning_rate": 2.3758536595328486e-05, "loss": 0.07873296737670898, "step": 2589 }, { "epoch": 0.3501597012150812, "grad_norm": 0.048266466706991196, "learning_rate": 2.375297921358259e-05, "loss": 0.046037644147872925, "step": 2590 }, { "epoch": 0.3502948980109171, "grad_norm": 0.06304934620857239, "learning_rate": 2.3747420009419745e-05, "loss": 0.07021570205688477, "step": 2591 }, { "epoch": 0.3504300948067531, "grad_norm": 0.0976991280913353, "learning_rate": 2.3741858983997415e-05, "loss": 0.04964160919189453, "step": 2592 }, { "epoch": 0.35056529160258904, "grad_norm": 0.12907104194164276, "learning_rate": 2.373629613847342e-05, "loss": 0.05902290344238281, "step": 2593 }, { "epoch": 0.35070048839842494, "grad_norm": 0.06459382176399231, "learning_rate": 2.3730731474005988e-05, "loss": 0.06478548049926758, "step": 2594 }, { "epoch": 0.3508356851942609, "grad_norm": 0.11330688744783401, "learning_rate": 2.37251649917537e-05, "loss": 0.04386448860168457, "step": 2595 }, { "epoch": 0.35097088199009685, "grad_norm": 0.19156739115715027, "learning_rate": 2.3719596692875534e-05, "loss": 0.05406102538108826, "step": 2596 }, { "epoch": 0.35110607878593275, "grad_norm": 0.09353367239236832, "learning_rate": 2.3714026578530836e-05, "loss": 0.05425453186035156, "step": 2597 }, { "epoch": 0.3512412755817687, "grad_norm": 0.09591637551784515, "learning_rate": 2.370845464987934e-05, "loss": 0.06257152557373047, "step": 2598 }, { "epoch": 0.35137647237760467, "grad_norm": 0.07256006449460983, "learning_rate": 2.370288090808114e-05, "loss": 0.0731973648071289, "step": 2599 }, { "epoch": 0.35151166917344057, "grad_norm": 0.08736106753349304, "learning_rate": 2.369730535429673e-05, "loss": 0.0936741828918457, "step": 2600 }, { "epoch": 0.3516468659692765, "grad_norm": 0.10256905853748322, "learning_rate": 2.369172798968697e-05, "loss": 0.0741281509399414, "step": 2601 }, { "epoch": 0.3517820627651125, "grad_norm": 0.14048469066619873, "learning_rate": 2.3686148815413083e-05, "loss": 0.0610661506652832, "step": 2602 }, { "epoch": 0.3519172595609484, "grad_norm": 0.05986129119992256, "learning_rate": 2.3680567832636695e-05, "loss": 0.04454541206359863, "step": 2603 }, { "epoch": 0.35205245635678434, "grad_norm": 0.10789672285318375, "learning_rate": 2.3674985042519795e-05, "loss": 0.08755874633789062, "step": 2604 }, { "epoch": 0.3521876531526203, "grad_norm": 0.05408640578389168, "learning_rate": 2.366940044622475e-05, "loss": 0.0374140739440918, "step": 2605 }, { "epoch": 0.3523228499484562, "grad_norm": 0.0820254236459732, "learning_rate": 2.3663814044914302e-05, "loss": 0.06505441665649414, "step": 2606 }, { "epoch": 0.35245804674429215, "grad_norm": 0.0746568962931633, "learning_rate": 2.3658225839751566e-05, "loss": 0.08930015563964844, "step": 2607 }, { "epoch": 0.3525932435401281, "grad_norm": 0.05777743086218834, "learning_rate": 2.3652635831900043e-05, "loss": 0.06343793869018555, "step": 2608 }, { "epoch": 0.352728440335964, "grad_norm": 0.08560241013765335, "learning_rate": 2.3647044022523595e-05, "loss": 0.09074974060058594, "step": 2609 }, { "epoch": 0.35286363713179997, "grad_norm": 0.08882021903991699, "learning_rate": 2.364145041278647e-05, "loss": 0.050927162170410156, "step": 2610 }, { "epoch": 0.3529988339276359, "grad_norm": 0.1459065079689026, "learning_rate": 2.3635855003853287e-05, "loss": 0.0890188217163086, "step": 2611 }, { "epoch": 0.3531340307234719, "grad_norm": 0.09583209455013275, "learning_rate": 2.363025779688904e-05, "loss": 0.05753755569458008, "step": 2612 }, { "epoch": 0.3532692275193078, "grad_norm": 0.05305046588182449, "learning_rate": 2.3624658793059103e-05, "loss": 0.051485300064086914, "step": 2613 }, { "epoch": 0.35340442431514374, "grad_norm": 0.05574241280555725, "learning_rate": 2.3619057993529204e-05, "loss": 0.041806817054748535, "step": 2614 }, { "epoch": 0.3535396211109797, "grad_norm": 0.04217883571982384, "learning_rate": 2.3613455399465475e-05, "loss": 0.04663550853729248, "step": 2615 }, { "epoch": 0.3536748179068156, "grad_norm": 0.0355115570127964, "learning_rate": 2.3607851012034394e-05, "loss": 0.04209566116333008, "step": 2616 }, { "epoch": 0.35381001470265155, "grad_norm": 0.10890758037567139, "learning_rate": 2.3602244832402838e-05, "loss": 0.06583833694458008, "step": 2617 }, { "epoch": 0.3539452114984875, "grad_norm": 0.14254949986934662, "learning_rate": 2.3596636861738024e-05, "loss": 0.062408447265625, "step": 2618 }, { "epoch": 0.3540804082943234, "grad_norm": 0.09131710231304169, "learning_rate": 2.3591027101207578e-05, "loss": 0.048073530197143555, "step": 2619 }, { "epoch": 0.35421560509015937, "grad_norm": 0.08882056921720505, "learning_rate": 2.3585415551979476e-05, "loss": 0.09334373474121094, "step": 2620 }, { "epoch": 0.3543508018859953, "grad_norm": 0.1009046882390976, "learning_rate": 2.3579802215222076e-05, "loss": 0.10957920551300049, "step": 2621 }, { "epoch": 0.3544859986818312, "grad_norm": 0.15729114413261414, "learning_rate": 2.35741870921041e-05, "loss": 0.06731593608856201, "step": 2622 }, { "epoch": 0.3546211954776672, "grad_norm": 0.19666698575019836, "learning_rate": 2.3568570183794645e-05, "loss": 0.04717528820037842, "step": 2623 }, { "epoch": 0.35475639227350314, "grad_norm": 0.09813804924488068, "learning_rate": 2.356295149146319e-05, "loss": 0.05095100402832031, "step": 2624 }, { "epoch": 0.35489158906933904, "grad_norm": 0.07537435740232468, "learning_rate": 2.3557331016279567e-05, "loss": 0.04773664474487305, "step": 2625 }, { "epoch": 0.355026785865175, "grad_norm": 0.10022012889385223, "learning_rate": 2.3551708759413998e-05, "loss": 0.086090087890625, "step": 2626 }, { "epoch": 0.35516198266101096, "grad_norm": 0.16503126919269562, "learning_rate": 2.354608472203706e-05, "loss": 0.06345498561859131, "step": 2627 }, { "epoch": 0.35529717945684686, "grad_norm": 0.07295048981904984, "learning_rate": 2.3540458905319705e-05, "loss": 0.04761457443237305, "step": 2628 }, { "epoch": 0.3554323762526828, "grad_norm": 0.08839529752731323, "learning_rate": 2.3534831310433264e-05, "loss": 0.07560676336288452, "step": 2629 }, { "epoch": 0.35556757304851877, "grad_norm": 0.10200095176696777, "learning_rate": 2.3529201938549434e-05, "loss": 0.0702185332775116, "step": 2630 }, { "epoch": 0.35570276984435467, "grad_norm": 0.05184308439493179, "learning_rate": 2.3523570790840274e-05, "loss": 0.04102891683578491, "step": 2631 }, { "epoch": 0.35583796664019063, "grad_norm": 0.23401685059070587, "learning_rate": 2.3517937868478228e-05, "loss": 0.05937385559082031, "step": 2632 }, { "epoch": 0.3559731634360266, "grad_norm": 0.15099912881851196, "learning_rate": 2.3512303172636092e-05, "loss": 0.06755036115646362, "step": 2633 }, { "epoch": 0.3561083602318625, "grad_norm": 0.07012380659580231, "learning_rate": 2.3506666704487033e-05, "loss": 0.0710906982421875, "step": 2634 }, { "epoch": 0.35624355702769844, "grad_norm": 0.14317156374454498, "learning_rate": 2.3501028465204614e-05, "loss": 0.08348512649536133, "step": 2635 }, { "epoch": 0.3563787538235344, "grad_norm": 0.0987994521856308, "learning_rate": 2.3495388455962734e-05, "loss": 0.06573677062988281, "step": 2636 }, { "epoch": 0.3565139506193703, "grad_norm": 0.12739720940589905, "learning_rate": 2.3489746677935673e-05, "loss": 0.0580441951751709, "step": 2637 }, { "epoch": 0.35664914741520626, "grad_norm": 0.23972631990909576, "learning_rate": 2.3484103132298082e-05, "loss": 0.06209278106689453, "step": 2638 }, { "epoch": 0.3567843442110422, "grad_norm": 0.08867891877889633, "learning_rate": 2.347845782022497e-05, "loss": 0.08517313003540039, "step": 2639 }, { "epoch": 0.3569195410068781, "grad_norm": 0.06530512869358063, "learning_rate": 2.3472810742891734e-05, "loss": 0.08191490173339844, "step": 2640 }, { "epoch": 0.3570547378027141, "grad_norm": 0.15433338284492493, "learning_rate": 2.3467161901474118e-05, "loss": 0.067047119140625, "step": 2641 }, { "epoch": 0.35718993459855003, "grad_norm": 0.06282784044742584, "learning_rate": 2.346151129714824e-05, "loss": 0.057749390602111816, "step": 2642 }, { "epoch": 0.35732513139438593, "grad_norm": 0.2099207192659378, "learning_rate": 2.3455858931090588e-05, "loss": 0.10221624374389648, "step": 2643 }, { "epoch": 0.3574603281902219, "grad_norm": 0.18579956889152527, "learning_rate": 2.3450204804478014e-05, "loss": 0.0904233455657959, "step": 2644 }, { "epoch": 0.35759552498605784, "grad_norm": 0.10013622045516968, "learning_rate": 2.344454891848774e-05, "loss": 0.055353641510009766, "step": 2645 }, { "epoch": 0.35773072178189375, "grad_norm": 0.07909790426492691, "learning_rate": 2.3438891274297348e-05, "loss": 0.07140171527862549, "step": 2646 }, { "epoch": 0.3578659185777297, "grad_norm": 0.0854388102889061, "learning_rate": 2.343323187308479e-05, "loss": 0.060881614685058594, "step": 2647 }, { "epoch": 0.35800111537356566, "grad_norm": 0.09499748051166534, "learning_rate": 2.342757071602839e-05, "loss": 0.08330488204956055, "step": 2648 }, { "epoch": 0.35813631216940156, "grad_norm": 0.11566922068595886, "learning_rate": 2.3421907804306816e-05, "loss": 0.10874366760253906, "step": 2649 }, { "epoch": 0.3582715089652375, "grad_norm": 0.15448719263076782, "learning_rate": 2.341624313909913e-05, "loss": 0.07314777374267578, "step": 2650 }, { "epoch": 0.3584067057610735, "grad_norm": 0.10072079300880432, "learning_rate": 2.3410576721584742e-05, "loss": 0.08783245086669922, "step": 2651 }, { "epoch": 0.3585419025569094, "grad_norm": 0.08502735942602158, "learning_rate": 2.3404908552943435e-05, "loss": 0.06700992584228516, "step": 2652 }, { "epoch": 0.35867709935274533, "grad_norm": 0.06627336144447327, "learning_rate": 2.339923863435534e-05, "loss": 0.05844402313232422, "step": 2653 }, { "epoch": 0.3588122961485813, "grad_norm": 0.06782086938619614, "learning_rate": 2.3393566967000974e-05, "loss": 0.04925274848937988, "step": 2654 }, { "epoch": 0.3589474929444172, "grad_norm": 0.0751691684126854, "learning_rate": 2.3387893552061202e-05, "loss": 0.06341791152954102, "step": 2655 }, { "epoch": 0.35908268974025315, "grad_norm": 0.09506940841674805, "learning_rate": 2.3382218390717268e-05, "loss": 0.0672452449798584, "step": 2656 }, { "epoch": 0.3592178865360891, "grad_norm": 0.1252928376197815, "learning_rate": 2.3376541484150762e-05, "loss": 0.08710122108459473, "step": 2657 }, { "epoch": 0.35935308333192506, "grad_norm": 0.08160462230443954, "learning_rate": 2.3370862833543652e-05, "loss": 0.09289407730102539, "step": 2658 }, { "epoch": 0.35948828012776096, "grad_norm": 0.06463818997144699, "learning_rate": 2.336518244007826e-05, "loss": 0.06690812110900879, "step": 2659 }, { "epoch": 0.3596234769235969, "grad_norm": 0.13034532964229584, "learning_rate": 2.3359500304937274e-05, "loss": 0.07425403594970703, "step": 2660 }, { "epoch": 0.3597586737194329, "grad_norm": 0.08017157763242722, "learning_rate": 2.335381642930375e-05, "loss": 0.08252096176147461, "step": 2661 }, { "epoch": 0.3598938705152688, "grad_norm": 0.10218983888626099, "learning_rate": 2.3348130814361094e-05, "loss": 0.06869041919708252, "step": 2662 }, { "epoch": 0.36002906731110473, "grad_norm": 0.05752688646316528, "learning_rate": 2.334244346129309e-05, "loss": 0.06130063533782959, "step": 2663 }, { "epoch": 0.3601642641069407, "grad_norm": 0.13094431161880493, "learning_rate": 2.3336754371283862e-05, "loss": 0.07358217239379883, "step": 2664 }, { "epoch": 0.3602994609027766, "grad_norm": 0.08713006973266602, "learning_rate": 2.333106354551792e-05, "loss": 0.05613231658935547, "step": 2665 }, { "epoch": 0.36043465769861255, "grad_norm": 0.08167698234319687, "learning_rate": 2.332537098518012e-05, "loss": 0.0674431324005127, "step": 2666 }, { "epoch": 0.3605698544944485, "grad_norm": 0.11628971248865128, "learning_rate": 2.3319676691455686e-05, "loss": 0.11261987686157227, "step": 2667 }, { "epoch": 0.3607050512902844, "grad_norm": 0.06777145713567734, "learning_rate": 2.3313980665530205e-05, "loss": 0.06354594230651855, "step": 2668 }, { "epoch": 0.36084024808612036, "grad_norm": 0.07502219080924988, "learning_rate": 2.3308282908589606e-05, "loss": 0.0854799747467041, "step": 2669 }, { "epoch": 0.3609754448819563, "grad_norm": 0.07987609505653381, "learning_rate": 2.330258342182021e-05, "loss": 0.07422924041748047, "step": 2670 }, { "epoch": 0.3611106416777922, "grad_norm": 0.05155773088335991, "learning_rate": 2.329688220640866e-05, "loss": 0.053259849548339844, "step": 2671 }, { "epoch": 0.3612458384736282, "grad_norm": 0.05814781412482262, "learning_rate": 2.329117926354199e-05, "loss": 0.08957481384277344, "step": 2672 }, { "epoch": 0.36138103526946413, "grad_norm": 0.09278398007154465, "learning_rate": 2.3285474594407588e-05, "loss": 0.06488543748855591, "step": 2673 }, { "epoch": 0.36151623206530004, "grad_norm": 0.08003925532102585, "learning_rate": 2.327976820019319e-05, "loss": 0.04104804992675781, "step": 2674 }, { "epoch": 0.361651428861136, "grad_norm": 0.0690421387553215, "learning_rate": 2.32740600820869e-05, "loss": 0.06993722915649414, "step": 2675 }, { "epoch": 0.36178662565697195, "grad_norm": 0.07715563476085663, "learning_rate": 2.326835024127718e-05, "loss": 0.06545495986938477, "step": 2676 }, { "epoch": 0.36192182245280785, "grad_norm": 0.11081211268901825, "learning_rate": 2.326263867895285e-05, "loss": 0.06154513359069824, "step": 2677 }, { "epoch": 0.3620570192486438, "grad_norm": 0.07325959205627441, "learning_rate": 2.3256925396303076e-05, "loss": 0.05494523048400879, "step": 2678 }, { "epoch": 0.36219221604447976, "grad_norm": 0.09612903743982315, "learning_rate": 2.3251210394517412e-05, "loss": 0.043145179748535156, "step": 2679 }, { "epoch": 0.36232741284031567, "grad_norm": 0.12016204744577408, "learning_rate": 2.3245493674785742e-05, "loss": 0.06176042556762695, "step": 2680 }, { "epoch": 0.3624626096361516, "grad_norm": 0.1272999346256256, "learning_rate": 2.3239775238298316e-05, "loss": 0.04294252395629883, "step": 2681 }, { "epoch": 0.3625978064319876, "grad_norm": 0.130104660987854, "learning_rate": 2.3234055086245744e-05, "loss": 0.07342720031738281, "step": 2682 }, { "epoch": 0.3627330032278235, "grad_norm": 0.18558073043823242, "learning_rate": 2.3228333219818998e-05, "loss": 0.06855964660644531, "step": 2683 }, { "epoch": 0.36286820002365944, "grad_norm": 0.09637975692749023, "learning_rate": 2.3222609640209397e-05, "loss": 0.05062079429626465, "step": 2684 }, { "epoch": 0.3630033968194954, "grad_norm": 0.10081814229488373, "learning_rate": 2.3216884348608614e-05, "loss": 0.07904863357543945, "step": 2685 }, { "epoch": 0.3631385936153313, "grad_norm": 0.08016641438007355, "learning_rate": 2.32111573462087e-05, "loss": 0.07809066772460938, "step": 2686 }, { "epoch": 0.36327379041116725, "grad_norm": 0.16184598207473755, "learning_rate": 2.3205428634202028e-05, "loss": 0.10135173797607422, "step": 2687 }, { "epoch": 0.3634089872070032, "grad_norm": 0.08866973221302032, "learning_rate": 2.3199698213781367e-05, "loss": 0.0719752311706543, "step": 2688 }, { "epoch": 0.3635441840028391, "grad_norm": 0.10324577242136002, "learning_rate": 2.319396608613981e-05, "loss": 0.050694942474365234, "step": 2689 }, { "epoch": 0.36367938079867507, "grad_norm": 0.04577205330133438, "learning_rate": 2.318823225247082e-05, "loss": 0.035382986068725586, "step": 2690 }, { "epoch": 0.363814577594511, "grad_norm": 0.09056251496076584, "learning_rate": 2.3182496713968208e-05, "loss": 0.05240058898925781, "step": 2691 }, { "epoch": 0.3639497743903469, "grad_norm": 0.08505533635616302, "learning_rate": 2.3176759471826143e-05, "loss": 0.05746889114379883, "step": 2692 }, { "epoch": 0.3640849711861829, "grad_norm": 0.2081272006034851, "learning_rate": 2.3171020527239155e-05, "loss": 0.07700604200363159, "step": 2693 }, { "epoch": 0.36422016798201884, "grad_norm": 0.0531369186937809, "learning_rate": 2.316527988140212e-05, "loss": 0.030387282371520996, "step": 2694 }, { "epoch": 0.36435536477785474, "grad_norm": 0.06013937667012215, "learning_rate": 2.315953753551027e-05, "loss": 0.055182039737701416, "step": 2695 }, { "epoch": 0.3644905615736907, "grad_norm": 0.10666341334581375, "learning_rate": 2.3153793490759197e-05, "loss": 0.06256580352783203, "step": 2696 }, { "epoch": 0.36462575836952665, "grad_norm": 0.08365563303232193, "learning_rate": 2.3148047748344835e-05, "loss": 0.06514883041381836, "step": 2697 }, { "epoch": 0.36476095516536255, "grad_norm": 0.08651472628116608, "learning_rate": 2.314230030946348e-05, "loss": 0.06864029169082642, "step": 2698 }, { "epoch": 0.3648961519611985, "grad_norm": 0.17710311710834503, "learning_rate": 2.3136551175311782e-05, "loss": 0.05901694297790527, "step": 2699 }, { "epoch": 0.36503134875703447, "grad_norm": 0.060504451394081116, "learning_rate": 2.313080034708674e-05, "loss": 0.05349087715148926, "step": 2700 }, { "epoch": 0.36516654555287037, "grad_norm": 0.06128257140517235, "learning_rate": 2.312504782598571e-05, "loss": 0.06322497129440308, "step": 2701 }, { "epoch": 0.3653017423487063, "grad_norm": 0.05907202884554863, "learning_rate": 2.311929361320639e-05, "loss": 0.03665876388549805, "step": 2702 }, { "epoch": 0.3654369391445423, "grad_norm": 0.09939534962177277, "learning_rate": 2.311353770994684e-05, "loss": 0.060498714447021484, "step": 2703 }, { "epoch": 0.36557213594037824, "grad_norm": 0.1539541482925415, "learning_rate": 2.310778011740548e-05, "loss": 0.04438591003417969, "step": 2704 }, { "epoch": 0.36570733273621414, "grad_norm": 0.07016557455062866, "learning_rate": 2.310202083678106e-05, "loss": 0.07504618167877197, "step": 2705 }, { "epoch": 0.3658425295320501, "grad_norm": 0.07573091238737106, "learning_rate": 2.3096259869272694e-05, "loss": 0.07481127977371216, "step": 2706 }, { "epoch": 0.36597772632788605, "grad_norm": 0.06745566427707672, "learning_rate": 2.309049721607985e-05, "loss": 0.05385398864746094, "step": 2707 }, { "epoch": 0.36611292312372196, "grad_norm": 0.15015491843223572, "learning_rate": 2.3084732878402342e-05, "loss": 0.0795435905456543, "step": 2708 }, { "epoch": 0.3662481199195579, "grad_norm": 0.09348881244659424, "learning_rate": 2.307896685744034e-05, "loss": 0.08438968658447266, "step": 2709 }, { "epoch": 0.36638331671539387, "grad_norm": 0.044678203761577606, "learning_rate": 2.3073199154394352e-05, "loss": 0.03957223892211914, "step": 2710 }, { "epoch": 0.36651851351122977, "grad_norm": 0.1181914433836937, "learning_rate": 2.3067429770465246e-05, "loss": 0.032999396324157715, "step": 2711 }, { "epoch": 0.3666537103070657, "grad_norm": 0.2017289400100708, "learning_rate": 2.3061658706854244e-05, "loss": 0.07780218124389648, "step": 2712 }, { "epoch": 0.3667889071029017, "grad_norm": 0.21630126237869263, "learning_rate": 2.3055885964762907e-05, "loss": 0.10248970985412598, "step": 2713 }, { "epoch": 0.3669241038987376, "grad_norm": 0.16515401005744934, "learning_rate": 2.3050111545393156e-05, "loss": 0.06472444534301758, "step": 2714 }, { "epoch": 0.36705930069457354, "grad_norm": 0.08267875760793686, "learning_rate": 2.304433544994725e-05, "loss": 0.06518292427062988, "step": 2715 }, { "epoch": 0.3671944974904095, "grad_norm": 0.09502299875020981, "learning_rate": 2.303855767962781e-05, "loss": 0.07845473289489746, "step": 2716 }, { "epoch": 0.3673296942862454, "grad_norm": 0.42943859100341797, "learning_rate": 2.303277823563779e-05, "loss": 0.08218860626220703, "step": 2717 }, { "epoch": 0.36746489108208136, "grad_norm": 0.16932396590709686, "learning_rate": 2.3026997119180507e-05, "loss": 0.0467226505279541, "step": 2718 }, { "epoch": 0.3676000878779173, "grad_norm": 0.05949682369828224, "learning_rate": 2.3021214331459616e-05, "loss": 0.06003618240356445, "step": 2719 }, { "epoch": 0.3677352846737532, "grad_norm": 0.19413945078849792, "learning_rate": 2.301542987367913e-05, "loss": 0.09903526306152344, "step": 2720 }, { "epoch": 0.3678704814695892, "grad_norm": 0.22097532451152802, "learning_rate": 2.3009643747043403e-05, "loss": 0.09497189521789551, "step": 2721 }, { "epoch": 0.36800567826542513, "grad_norm": 0.06210830435156822, "learning_rate": 2.3003855952757132e-05, "loss": 0.06505823135375977, "step": 2722 }, { "epoch": 0.36814087506126103, "grad_norm": 0.12661142647266388, "learning_rate": 2.2998066492025372e-05, "loss": 0.04744744300842285, "step": 2723 }, { "epoch": 0.368276071857097, "grad_norm": 0.24423353374004364, "learning_rate": 2.2992275366053513e-05, "loss": 0.11527371406555176, "step": 2724 }, { "epoch": 0.36841126865293294, "grad_norm": 0.1709631085395813, "learning_rate": 2.2986482576047305e-05, "loss": 0.051657795906066895, "step": 2725 }, { "epoch": 0.36854646544876885, "grad_norm": 0.17682389914989471, "learning_rate": 2.298068812321284e-05, "loss": 0.07861995697021484, "step": 2726 }, { "epoch": 0.3686816622446048, "grad_norm": 0.18474675714969635, "learning_rate": 2.297489200875654e-05, "loss": 0.08978843688964844, "step": 2727 }, { "epoch": 0.36881685904044076, "grad_norm": 0.16826172173023224, "learning_rate": 2.2969094233885204e-05, "loss": 0.07134509086608887, "step": 2728 }, { "epoch": 0.36895205583627666, "grad_norm": 0.09160614758729935, "learning_rate": 2.296329479980595e-05, "loss": 0.07286214828491211, "step": 2729 }, { "epoch": 0.3690872526321126, "grad_norm": 0.29583054780960083, "learning_rate": 2.2957493707726252e-05, "loss": 0.08674812316894531, "step": 2730 }, { "epoch": 0.3692224494279486, "grad_norm": 0.11967773735523224, "learning_rate": 2.2951690958853932e-05, "loss": 0.04410243034362793, "step": 2731 }, { "epoch": 0.3693576462237845, "grad_norm": 0.18442955613136292, "learning_rate": 2.2945886554397154e-05, "loss": 0.05983924865722656, "step": 2732 }, { "epoch": 0.36949284301962043, "grad_norm": 0.11077980697154999, "learning_rate": 2.294008049556441e-05, "loss": 0.04616779088973999, "step": 2733 }, { "epoch": 0.3696280398154564, "grad_norm": 0.10775760561227798, "learning_rate": 2.2934272783564577e-05, "loss": 0.036651611328125, "step": 2734 }, { "epoch": 0.3697632366112923, "grad_norm": 0.0744224488735199, "learning_rate": 2.2928463419606835e-05, "loss": 0.07147765159606934, "step": 2735 }, { "epoch": 0.36989843340712825, "grad_norm": 0.08086686581373215, "learning_rate": 2.292265240490073e-05, "loss": 0.06716775894165039, "step": 2736 }, { "epoch": 0.3700336302029642, "grad_norm": 0.08919425308704376, "learning_rate": 2.2916839740656154e-05, "loss": 0.07674074172973633, "step": 2737 }, { "epoch": 0.3701688269988001, "grad_norm": 0.07416735589504242, "learning_rate": 2.2911025428083316e-05, "loss": 0.03160429000854492, "step": 2738 }, { "epoch": 0.37030402379463606, "grad_norm": 0.12528888881206512, "learning_rate": 2.2905209468392798e-05, "loss": 0.06074643135070801, "step": 2739 }, { "epoch": 0.370439220590472, "grad_norm": 0.13408605754375458, "learning_rate": 2.2899391862795514e-05, "loss": 0.07269096374511719, "step": 2740 }, { "epoch": 0.3705744173863079, "grad_norm": 0.06715600937604904, "learning_rate": 2.2893572612502718e-05, "loss": 0.05186152458190918, "step": 2741 }, { "epoch": 0.3707096141821439, "grad_norm": 0.05913464352488518, "learning_rate": 2.2887751718726013e-05, "loss": 0.06685709953308105, "step": 2742 }, { "epoch": 0.37084481097797983, "grad_norm": 0.10640189796686172, "learning_rate": 2.288192918267734e-05, "loss": 0.0781707763671875, "step": 2743 }, { "epoch": 0.37098000777381573, "grad_norm": 0.1281871199607849, "learning_rate": 2.2876105005568974e-05, "loss": 0.06212592124938965, "step": 2744 }, { "epoch": 0.3711152045696517, "grad_norm": 0.08705922216176987, "learning_rate": 2.287027918861355e-05, "loss": 0.05160844326019287, "step": 2745 }, { "epoch": 0.37125040136548765, "grad_norm": 0.13004440069198608, "learning_rate": 2.2864451733024024e-05, "loss": 0.06316614151000977, "step": 2746 }, { "epoch": 0.37138559816132355, "grad_norm": 0.07214105129241943, "learning_rate": 2.2858622640013716e-05, "loss": 0.09713459014892578, "step": 2747 }, { "epoch": 0.3715207949571595, "grad_norm": 0.047702912241220474, "learning_rate": 2.285279191079626e-05, "loss": 0.05330824851989746, "step": 2748 }, { "epoch": 0.37165599175299546, "grad_norm": 0.12142839282751083, "learning_rate": 2.2846959546585656e-05, "loss": 0.0802164077758789, "step": 2749 }, { "epoch": 0.3717911885488314, "grad_norm": 0.07923062890768051, "learning_rate": 2.2841125548596225e-05, "loss": 0.058669328689575195, "step": 2750 }, { "epoch": 0.3719263853446673, "grad_norm": 0.1896916776895523, "learning_rate": 2.2835289918042648e-05, "loss": 0.06959748268127441, "step": 2751 }, { "epoch": 0.3720615821405033, "grad_norm": 0.0663958266377449, "learning_rate": 2.282945265613992e-05, "loss": 0.08017897605895996, "step": 2752 }, { "epoch": 0.37219677893633923, "grad_norm": 0.10044600814580917, "learning_rate": 2.2823613764103406e-05, "loss": 0.08410584926605225, "step": 2753 }, { "epoch": 0.37233197573217514, "grad_norm": 0.09321119636297226, "learning_rate": 2.2817773243148776e-05, "loss": 0.060943603515625, "step": 2754 }, { "epoch": 0.3724671725280111, "grad_norm": 0.06502675265073776, "learning_rate": 2.2811931094492074e-05, "loss": 0.04653823375701904, "step": 2755 }, { "epoch": 0.37260236932384705, "grad_norm": 0.14461436867713928, "learning_rate": 2.280608731934966e-05, "loss": 0.07020258903503418, "step": 2756 }, { "epoch": 0.37273756611968295, "grad_norm": 0.1345045417547226, "learning_rate": 2.280024191893823e-05, "loss": 0.08864402770996094, "step": 2757 }, { "epoch": 0.3728727629155189, "grad_norm": 0.100283183157444, "learning_rate": 2.279439489447485e-05, "loss": 0.09058094024658203, "step": 2758 }, { "epoch": 0.37300795971135486, "grad_norm": 0.09580633044242859, "learning_rate": 2.278854624717688e-05, "loss": 0.06604033708572388, "step": 2759 }, { "epoch": 0.37314315650719077, "grad_norm": 0.061638735234737396, "learning_rate": 2.2782695978262045e-05, "loss": 0.0608983039855957, "step": 2760 }, { "epoch": 0.3732783533030267, "grad_norm": 0.07221037894487381, "learning_rate": 2.2776844088948406e-05, "loss": 0.05460381507873535, "step": 2761 }, { "epoch": 0.3734135500988627, "grad_norm": 0.03720799461007118, "learning_rate": 2.2770990580454364e-05, "loss": 0.045108795166015625, "step": 2762 }, { "epoch": 0.3735487468946986, "grad_norm": 0.08576427400112152, "learning_rate": 2.276513545399864e-05, "loss": 0.06876599788665771, "step": 2763 }, { "epoch": 0.37368394369053454, "grad_norm": 0.08200296014547348, "learning_rate": 2.2759278710800306e-05, "loss": 0.040184736251831055, "step": 2764 }, { "epoch": 0.3738191404863705, "grad_norm": 0.07570790499448776, "learning_rate": 2.275342035207876e-05, "loss": 0.06896781921386719, "step": 2765 }, { "epoch": 0.3739543372822064, "grad_norm": 0.15543131530284882, "learning_rate": 2.2747560379053752e-05, "loss": 0.08238428831100464, "step": 2766 }, { "epoch": 0.37408953407804235, "grad_norm": 0.07087230682373047, "learning_rate": 2.2741698792945364e-05, "loss": 0.03446626663208008, "step": 2767 }, { "epoch": 0.3742247308738783, "grad_norm": 0.2016008049249649, "learning_rate": 2.2735835594974003e-05, "loss": 0.0862741470336914, "step": 2768 }, { "epoch": 0.3743599276697142, "grad_norm": 0.10248856246471405, "learning_rate": 2.272997078636042e-05, "loss": 0.041033267974853516, "step": 2769 }, { "epoch": 0.37449512446555017, "grad_norm": 0.10012223571538925, "learning_rate": 2.272410436832569e-05, "loss": 0.06709051132202148, "step": 2770 }, { "epoch": 0.3746303212613861, "grad_norm": 0.09525300562381744, "learning_rate": 2.2718236342091248e-05, "loss": 0.06319558620452881, "step": 2771 }, { "epoch": 0.374765518057222, "grad_norm": 0.0983513593673706, "learning_rate": 2.2712366708878838e-05, "loss": 0.07823848724365234, "step": 2772 }, { "epoch": 0.374900714853058, "grad_norm": 0.2814671993255615, "learning_rate": 2.2706495469910552e-05, "loss": 0.1147909164428711, "step": 2773 }, { "epoch": 0.37503591164889394, "grad_norm": 0.08254552632570267, "learning_rate": 2.2700622626408814e-05, "loss": 0.08213996887207031, "step": 2774 }, { "epoch": 0.37517110844472984, "grad_norm": 0.06676315516233444, "learning_rate": 2.2694748179596375e-05, "loss": 0.0569148063659668, "step": 2775 }, { "epoch": 0.3753063052405658, "grad_norm": 0.09155144542455673, "learning_rate": 2.2688872130696342e-05, "loss": 0.054955482482910156, "step": 2776 }, { "epoch": 0.37544150203640175, "grad_norm": 0.06272590160369873, "learning_rate": 2.268299448093212e-05, "loss": 0.05836343765258789, "step": 2777 }, { "epoch": 0.37557669883223765, "grad_norm": 0.14821811020374298, "learning_rate": 2.2677115231527482e-05, "loss": 0.09092545509338379, "step": 2778 }, { "epoch": 0.3757118956280736, "grad_norm": 0.08862105011940002, "learning_rate": 2.267123438370651e-05, "loss": 0.04809236526489258, "step": 2779 }, { "epoch": 0.37584709242390957, "grad_norm": 0.05294622853398323, "learning_rate": 2.266535193869363e-05, "loss": 0.05145597457885742, "step": 2780 }, { "epoch": 0.37598228921974547, "grad_norm": 0.1998557597398758, "learning_rate": 2.2659467897713604e-05, "loss": 0.07789409160614014, "step": 2781 }, { "epoch": 0.3761174860155814, "grad_norm": 0.06989549845457077, "learning_rate": 2.2653582261991516e-05, "loss": 0.04813957214355469, "step": 2782 }, { "epoch": 0.3762526828114174, "grad_norm": 0.05387307330965996, "learning_rate": 2.2647695032752785e-05, "loss": 0.0516359806060791, "step": 2783 }, { "epoch": 0.3763878796072533, "grad_norm": 0.10231353342533112, "learning_rate": 2.264180621122317e-05, "loss": 0.06594467163085938, "step": 2784 }, { "epoch": 0.37652307640308924, "grad_norm": 0.12177782505750656, "learning_rate": 2.2635915798628747e-05, "loss": 0.08141422271728516, "step": 2785 }, { "epoch": 0.3766582731989252, "grad_norm": 0.05815134942531586, "learning_rate": 2.2630023796195932e-05, "loss": 0.046163082122802734, "step": 2786 }, { "epoch": 0.3767934699947611, "grad_norm": 0.10425441712141037, "learning_rate": 2.262413020515148e-05, "loss": 0.08174657821655273, "step": 2787 }, { "epoch": 0.37692866679059706, "grad_norm": 0.18714912235736847, "learning_rate": 2.261823502672246e-05, "loss": 0.06075096130371094, "step": 2788 }, { "epoch": 0.377063863586433, "grad_norm": 0.24350875616073608, "learning_rate": 2.261233826213628e-05, "loss": 0.07392144203186035, "step": 2789 }, { "epoch": 0.3771990603822689, "grad_norm": 0.08834494650363922, "learning_rate": 2.2606439912620688e-05, "loss": 0.07562696933746338, "step": 2790 }, { "epoch": 0.37733425717810487, "grad_norm": 0.10013694316148758, "learning_rate": 2.2600539979403734e-05, "loss": 0.06572079658508301, "step": 2791 }, { "epoch": 0.3774694539739408, "grad_norm": 0.10506601631641388, "learning_rate": 2.259463846371383e-05, "loss": 0.0860595703125, "step": 2792 }, { "epoch": 0.3776046507697768, "grad_norm": 0.14454716444015503, "learning_rate": 2.2588735366779698e-05, "loss": 0.05851936340332031, "step": 2793 }, { "epoch": 0.3777398475656127, "grad_norm": 0.23578503727912903, "learning_rate": 2.2582830689830394e-05, "loss": 0.08013248443603516, "step": 2794 }, { "epoch": 0.37787504436144864, "grad_norm": 0.41214001178741455, "learning_rate": 2.2576924434095305e-05, "loss": 0.10672998428344727, "step": 2795 }, { "epoch": 0.3780102411572846, "grad_norm": 0.04705068841576576, "learning_rate": 2.257101660080414e-05, "loss": 0.047440528869628906, "step": 2796 }, { "epoch": 0.3781454379531205, "grad_norm": 0.08299770951271057, "learning_rate": 2.256510719118695e-05, "loss": 0.03977060317993164, "step": 2797 }, { "epoch": 0.37828063474895646, "grad_norm": 0.050721246749162674, "learning_rate": 2.2559196206474094e-05, "loss": 0.0630655288696289, "step": 2798 }, { "epoch": 0.3784158315447924, "grad_norm": 0.12276268750429153, "learning_rate": 2.2553283647896287e-05, "loss": 0.07132959365844727, "step": 2799 }, { "epoch": 0.3785510283406283, "grad_norm": 0.1889687329530716, "learning_rate": 2.254736951668454e-05, "loss": 0.09444999694824219, "step": 2800 }, { "epoch": 0.37868622513646427, "grad_norm": 0.11569591611623764, "learning_rate": 2.2541453814070212e-05, "loss": 0.06716674566268921, "step": 2801 }, { "epoch": 0.37882142193230023, "grad_norm": 0.20026740431785583, "learning_rate": 2.2535536541284983e-05, "loss": 0.09140205383300781, "step": 2802 }, { "epoch": 0.37895661872813613, "grad_norm": 0.2150438278913498, "learning_rate": 2.2529617699560857e-05, "loss": 0.06071138381958008, "step": 2803 }, { "epoch": 0.3790918155239721, "grad_norm": 0.1123405247926712, "learning_rate": 2.2523697290130185e-05, "loss": 0.049558281898498535, "step": 2804 }, { "epoch": 0.37922701231980804, "grad_norm": 0.061004046350717545, "learning_rate": 2.251777531422561e-05, "loss": 0.03796124458312988, "step": 2805 }, { "epoch": 0.37936220911564394, "grad_norm": 0.15213656425476074, "learning_rate": 2.2511851773080127e-05, "loss": 0.09109020233154297, "step": 2806 }, { "epoch": 0.3794974059114799, "grad_norm": 0.0711393654346466, "learning_rate": 2.2505926667927043e-05, "loss": 0.04447793960571289, "step": 2807 }, { "epoch": 0.37963260270731586, "grad_norm": 0.1593368947505951, "learning_rate": 2.25e-05, "loss": 0.08230447769165039, "step": 2808 }, { "epoch": 0.37976779950315176, "grad_norm": 0.06413017958402634, "learning_rate": 2.2494071770532966e-05, "loss": 0.10065078735351562, "step": 2809 }, { "epoch": 0.3799029962989877, "grad_norm": 0.11518452316522598, "learning_rate": 2.2488141980760223e-05, "loss": 0.038037776947021484, "step": 2810 }, { "epoch": 0.3800381930948237, "grad_norm": 0.05485963076353073, "learning_rate": 2.248221063191639e-05, "loss": 0.06107282638549805, "step": 2811 }, { "epoch": 0.3801733898906596, "grad_norm": 0.0578949972987175, "learning_rate": 2.24762777252364e-05, "loss": 0.05760788917541504, "step": 2812 }, { "epoch": 0.38030858668649553, "grad_norm": 0.09842949360609055, "learning_rate": 2.2470343261955525e-05, "loss": 0.10003852844238281, "step": 2813 }, { "epoch": 0.3804437834823315, "grad_norm": 0.06713572144508362, "learning_rate": 2.246440724330934e-05, "loss": 0.04203915596008301, "step": 2814 }, { "epoch": 0.3805789802781674, "grad_norm": 0.07310368120670319, "learning_rate": 2.2458469670533765e-05, "loss": 0.04777121543884277, "step": 2815 }, { "epoch": 0.38071417707400335, "grad_norm": 0.10394813865423203, "learning_rate": 2.2452530544865034e-05, "loss": 0.04501032829284668, "step": 2816 }, { "epoch": 0.3808493738698393, "grad_norm": 0.043789658695459366, "learning_rate": 2.24465898675397e-05, "loss": 0.05106019973754883, "step": 2817 }, { "epoch": 0.3809845706656752, "grad_norm": 0.1555924117565155, "learning_rate": 2.244064763979464e-05, "loss": 0.07743453979492188, "step": 2818 }, { "epoch": 0.38111976746151116, "grad_norm": 0.19059306383132935, "learning_rate": 2.2434703862867068e-05, "loss": 0.09229755401611328, "step": 2819 }, { "epoch": 0.3812549642573471, "grad_norm": 0.14083994925022125, "learning_rate": 2.2428758537994504e-05, "loss": 0.05932426452636719, "step": 2820 }, { "epoch": 0.381390161053183, "grad_norm": 0.07788024097681046, "learning_rate": 2.24228116664148e-05, "loss": 0.04738330841064453, "step": 2821 }, { "epoch": 0.381525357849019, "grad_norm": 0.19101087749004364, "learning_rate": 2.2416863249366125e-05, "loss": 0.06658792495727539, "step": 2822 }, { "epoch": 0.38166055464485493, "grad_norm": 0.12705843150615692, "learning_rate": 2.241091328808696e-05, "loss": 0.0597684383392334, "step": 2823 }, { "epoch": 0.38179575144069083, "grad_norm": 0.05747111886739731, "learning_rate": 2.240496178381614e-05, "loss": 0.04332387447357178, "step": 2824 }, { "epoch": 0.3819309482365268, "grad_norm": 0.12303254753351212, "learning_rate": 2.239900873779278e-05, "loss": 0.06458282470703125, "step": 2825 }, { "epoch": 0.38206614503236275, "grad_norm": 0.06540141254663467, "learning_rate": 2.2393054151256352e-05, "loss": 0.06014728546142578, "step": 2826 }, { "epoch": 0.38220134182819865, "grad_norm": 0.11612020432949066, "learning_rate": 2.238709802544662e-05, "loss": 0.08864045143127441, "step": 2827 }, { "epoch": 0.3823365386240346, "grad_norm": 0.0544077605009079, "learning_rate": 2.2381140361603686e-05, "loss": 0.05488467216491699, "step": 2828 }, { "epoch": 0.38247173541987056, "grad_norm": 0.05459945276379585, "learning_rate": 2.237518116096797e-05, "loss": 0.06433665752410889, "step": 2829 }, { "epoch": 0.38260693221570646, "grad_norm": 0.16710463166236877, "learning_rate": 2.2369220424780203e-05, "loss": 0.05536776781082153, "step": 2830 }, { "epoch": 0.3827421290115424, "grad_norm": 0.10125366598367691, "learning_rate": 2.2363258154281452e-05, "loss": 0.09418916702270508, "step": 2831 }, { "epoch": 0.3828773258073784, "grad_norm": 0.10701412707567215, "learning_rate": 2.2357294350713088e-05, "loss": 0.06345701217651367, "step": 2832 }, { "epoch": 0.3830125226032143, "grad_norm": 0.06601959466934204, "learning_rate": 2.2351329015316802e-05, "loss": 0.05744194984436035, "step": 2833 }, { "epoch": 0.38314771939905024, "grad_norm": 0.20217901468276978, "learning_rate": 2.2345362149334613e-05, "loss": 0.1077275276184082, "step": 2834 }, { "epoch": 0.3832829161948862, "grad_norm": 0.10598672926425934, "learning_rate": 2.2339393754008854e-05, "loss": 0.05259418487548828, "step": 2835 }, { "epoch": 0.3834181129907221, "grad_norm": 0.10022184252738953, "learning_rate": 2.233342383058218e-05, "loss": 0.0843663215637207, "step": 2836 }, { "epoch": 0.38355330978655805, "grad_norm": 0.21557487547397614, "learning_rate": 2.2327452380297554e-05, "loss": 0.0895071029663086, "step": 2837 }, { "epoch": 0.383688506582394, "grad_norm": 0.18631607294082642, "learning_rate": 2.232147940439827e-05, "loss": 0.06493210792541504, "step": 2838 }, { "epoch": 0.38382370337822996, "grad_norm": 0.14699497818946838, "learning_rate": 2.2315504904127936e-05, "loss": 0.09984707832336426, "step": 2839 }, { "epoch": 0.38395890017406586, "grad_norm": 0.08688609302043915, "learning_rate": 2.2309528880730463e-05, "loss": 0.08707427978515625, "step": 2840 }, { "epoch": 0.3840940969699018, "grad_norm": 0.09528959542512894, "learning_rate": 2.2303551335450096e-05, "loss": 0.06581449508666992, "step": 2841 }, { "epoch": 0.3842292937657378, "grad_norm": 0.05205417424440384, "learning_rate": 2.2297572269531398e-05, "loss": 0.04139566421508789, "step": 2842 }, { "epoch": 0.3843644905615737, "grad_norm": 0.07317586243152618, "learning_rate": 2.2291591684219243e-05, "loss": 0.05797839164733887, "step": 2843 }, { "epoch": 0.38449968735740964, "grad_norm": 0.1700967699289322, "learning_rate": 2.2285609580758806e-05, "loss": 0.0629042387008667, "step": 2844 }, { "epoch": 0.3846348841532456, "grad_norm": 0.38466769456863403, "learning_rate": 2.227962596039561e-05, "loss": 0.10962724685668945, "step": 2845 }, { "epoch": 0.3847700809490815, "grad_norm": 0.07835380733013153, "learning_rate": 2.2273640824375462e-05, "loss": 0.06114912033081055, "step": 2846 }, { "epoch": 0.38490527774491745, "grad_norm": 0.11597051471471786, "learning_rate": 2.2267654173944515e-05, "loss": 0.0951838493347168, "step": 2847 }, { "epoch": 0.3850404745407534, "grad_norm": 0.1270502507686615, "learning_rate": 2.2261666010349212e-05, "loss": 0.09619283676147461, "step": 2848 }, { "epoch": 0.3851756713365893, "grad_norm": 0.17541272938251495, "learning_rate": 2.2255676334836317e-05, "loss": 0.0588226318359375, "step": 2849 }, { "epoch": 0.38531086813242527, "grad_norm": 0.16797445714473724, "learning_rate": 2.2249685148652917e-05, "loss": 0.07074284553527832, "step": 2850 }, { "epoch": 0.3854460649282612, "grad_norm": 0.12969140708446503, "learning_rate": 2.224369245304641e-05, "loss": 0.08632183074951172, "step": 2851 }, { "epoch": 0.3855812617240971, "grad_norm": 0.0639800950884819, "learning_rate": 2.2237698249264507e-05, "loss": 0.04711794853210449, "step": 2852 }, { "epoch": 0.3857164585199331, "grad_norm": 0.2360343337059021, "learning_rate": 2.2231702538555235e-05, "loss": 0.0767362117767334, "step": 2853 }, { "epoch": 0.38585165531576904, "grad_norm": 0.05243297666311264, "learning_rate": 2.2225705322166928e-05, "loss": 0.07010650634765625, "step": 2854 }, { "epoch": 0.38598685211160494, "grad_norm": 0.16941840946674347, "learning_rate": 2.2219706601348242e-05, "loss": 0.10315513610839844, "step": 2855 }, { "epoch": 0.3861220489074409, "grad_norm": 0.07535373419523239, "learning_rate": 2.221370637734814e-05, "loss": 0.048774003982543945, "step": 2856 }, { "epoch": 0.38625724570327685, "grad_norm": 0.09462258219718933, "learning_rate": 2.22077046514159e-05, "loss": 0.07574272155761719, "step": 2857 }, { "epoch": 0.38639244249911275, "grad_norm": 0.11887585371732712, "learning_rate": 2.220170142480112e-05, "loss": 0.06749486923217773, "step": 2858 }, { "epoch": 0.3865276392949487, "grad_norm": 0.07648269832134247, "learning_rate": 2.2195696698753695e-05, "loss": 0.06076592206954956, "step": 2859 }, { "epoch": 0.38666283609078467, "grad_norm": 0.09027384966611862, "learning_rate": 2.2189690474523844e-05, "loss": 0.05429410934448242, "step": 2860 }, { "epoch": 0.38679803288662057, "grad_norm": 0.06682781130075455, "learning_rate": 2.21836827533621e-05, "loss": 0.054702043533325195, "step": 2861 }, { "epoch": 0.3869332296824565, "grad_norm": 0.1171586737036705, "learning_rate": 2.2177673536519297e-05, "loss": 0.07994914054870605, "step": 2862 }, { "epoch": 0.3870684264782925, "grad_norm": 0.10911373794078827, "learning_rate": 2.217166282524659e-05, "loss": 0.07723045349121094, "step": 2863 }, { "epoch": 0.3872036232741284, "grad_norm": 0.09084512293338776, "learning_rate": 2.216565062079544e-05, "loss": 0.06727838516235352, "step": 2864 }, { "epoch": 0.38733882006996434, "grad_norm": 0.09167740494012833, "learning_rate": 2.2159636924417612e-05, "loss": 0.0527501106262207, "step": 2865 }, { "epoch": 0.3874740168658003, "grad_norm": 0.23680470883846283, "learning_rate": 2.2153621737365205e-05, "loss": 0.08391380310058594, "step": 2866 }, { "epoch": 0.3876092136616362, "grad_norm": 0.10274053364992142, "learning_rate": 2.2147605060890598e-05, "loss": 0.08497810363769531, "step": 2867 }, { "epoch": 0.38774441045747216, "grad_norm": 0.17132432758808136, "learning_rate": 2.2141586896246503e-05, "loss": 0.09068691730499268, "step": 2868 }, { "epoch": 0.3878796072533081, "grad_norm": 0.11979129910469055, "learning_rate": 2.2135567244685933e-05, "loss": 0.07145345211029053, "step": 2869 }, { "epoch": 0.388014804049144, "grad_norm": 0.16250404715538025, "learning_rate": 2.2129546107462214e-05, "loss": 0.10976696014404297, "step": 2870 }, { "epoch": 0.38815000084497997, "grad_norm": 0.1045694574713707, "learning_rate": 2.212352348582897e-05, "loss": 0.08321857452392578, "step": 2871 }, { "epoch": 0.3882851976408159, "grad_norm": 0.16664868593215942, "learning_rate": 2.2117499381040157e-05, "loss": 0.061040401458740234, "step": 2872 }, { "epoch": 0.38842039443665183, "grad_norm": 0.08886837959289551, "learning_rate": 2.211147379435001e-05, "loss": 0.05218172073364258, "step": 2873 }, { "epoch": 0.3885555912324878, "grad_norm": 0.08197692036628723, "learning_rate": 2.2105446727013098e-05, "loss": 0.0597538948059082, "step": 2874 }, { "epoch": 0.38869078802832374, "grad_norm": 0.06315159797668457, "learning_rate": 2.209941818028429e-05, "loss": 0.06333255767822266, "step": 2875 }, { "epoch": 0.38882598482415964, "grad_norm": 0.09284429997205734, "learning_rate": 2.2093388155418757e-05, "loss": 0.05210423469543457, "step": 2876 }, { "epoch": 0.3889611816199956, "grad_norm": 0.0909295529127121, "learning_rate": 2.2087356653671982e-05, "loss": 0.05983614921569824, "step": 2877 }, { "epoch": 0.38909637841583156, "grad_norm": 0.1092667207121849, "learning_rate": 2.2081323676299756e-05, "loss": 0.05205661058425903, "step": 2878 }, { "epoch": 0.38923157521166746, "grad_norm": 0.11321708559989929, "learning_rate": 2.207528922455818e-05, "loss": 0.05749708414077759, "step": 2879 }, { "epoch": 0.3893667720075034, "grad_norm": 0.1219281479716301, "learning_rate": 2.206925329970366e-05, "loss": 0.060623884201049805, "step": 2880 }, { "epoch": 0.38950196880333937, "grad_norm": 0.07548338174819946, "learning_rate": 2.20632159029929e-05, "loss": 0.07660472393035889, "step": 2881 }, { "epoch": 0.3896371655991753, "grad_norm": 0.07393660396337509, "learning_rate": 2.2057177035682926e-05, "loss": 0.06603860855102539, "step": 2882 }, { "epoch": 0.38977236239501123, "grad_norm": 0.11670587211847305, "learning_rate": 2.2051136699031058e-05, "loss": 0.06527531147003174, "step": 2883 }, { "epoch": 0.3899075591908472, "grad_norm": 0.18693673610687256, "learning_rate": 2.2045094894294933e-05, "loss": 0.055972933769226074, "step": 2884 }, { "epoch": 0.39004275598668314, "grad_norm": 0.04825974255800247, "learning_rate": 2.203905162273248e-05, "loss": 0.04258167743682861, "step": 2885 }, { "epoch": 0.39017795278251904, "grad_norm": 0.05295131728053093, "learning_rate": 2.203300688560194e-05, "loss": 0.05836498737335205, "step": 2886 }, { "epoch": 0.390313149578355, "grad_norm": 0.14171721041202545, "learning_rate": 2.2026960684161862e-05, "loss": 0.07374858856201172, "step": 2887 }, { "epoch": 0.39044834637419096, "grad_norm": 0.08113564550876617, "learning_rate": 2.2020913019671097e-05, "loss": 0.05691087245941162, "step": 2888 }, { "epoch": 0.39058354317002686, "grad_norm": 0.05424632877111435, "learning_rate": 2.20148638933888e-05, "loss": 0.05599355697631836, "step": 2889 }, { "epoch": 0.3907187399658628, "grad_norm": 0.09617909789085388, "learning_rate": 2.2008813306574438e-05, "loss": 0.03911733627319336, "step": 2890 }, { "epoch": 0.3908539367616988, "grad_norm": 0.07980953902006149, "learning_rate": 2.200276126048777e-05, "loss": 0.09441471099853516, "step": 2891 }, { "epoch": 0.3909891335575347, "grad_norm": 0.0935797244310379, "learning_rate": 2.199670775638886e-05, "loss": 0.07928848266601562, "step": 2892 }, { "epoch": 0.39112433035337063, "grad_norm": 0.28658780455589294, "learning_rate": 2.1990652795538085e-05, "loss": 0.0719062089920044, "step": 2893 }, { "epoch": 0.3912595271492066, "grad_norm": 0.09717074036598206, "learning_rate": 2.1984596379196117e-05, "loss": 0.07060098648071289, "step": 2894 }, { "epoch": 0.3913947239450425, "grad_norm": 0.0727180764079094, "learning_rate": 2.1978538508623942e-05, "loss": 0.05077362060546875, "step": 2895 }, { "epoch": 0.39152992074087845, "grad_norm": 0.07455969601869583, "learning_rate": 2.197247918508283e-05, "loss": 0.037695884704589844, "step": 2896 }, { "epoch": 0.3916651175367144, "grad_norm": 0.1373928189277649, "learning_rate": 2.1966418409834374e-05, "loss": 0.06282782554626465, "step": 2897 }, { "epoch": 0.3918003143325503, "grad_norm": 0.08033270388841629, "learning_rate": 2.1960356184140453e-05, "loss": 0.07048797607421875, "step": 2898 }, { "epoch": 0.39193551112838626, "grad_norm": 0.2078501284122467, "learning_rate": 2.1954292509263258e-05, "loss": 0.07110238075256348, "step": 2899 }, { "epoch": 0.3920707079242222, "grad_norm": 0.08781122416257858, "learning_rate": 2.194822738646528e-05, "loss": 0.06780123710632324, "step": 2900 }, { "epoch": 0.3922059047200581, "grad_norm": 0.04870763048529625, "learning_rate": 2.1942160817009304e-05, "loss": 0.04882550239562988, "step": 2901 }, { "epoch": 0.3923411015158941, "grad_norm": 0.0994347333908081, "learning_rate": 2.193609280215843e-05, "loss": 0.07555484771728516, "step": 2902 }, { "epoch": 0.39247629831173003, "grad_norm": 0.12155533581972122, "learning_rate": 2.1930023343176044e-05, "loss": 0.061284780502319336, "step": 2903 }, { "epoch": 0.39261149510756593, "grad_norm": 0.04636900871992111, "learning_rate": 2.1923952441325837e-05, "loss": 0.053002357482910156, "step": 2904 }, { "epoch": 0.3927466919034019, "grad_norm": 0.06653093546628952, "learning_rate": 2.191788009787182e-05, "loss": 0.06837129592895508, "step": 2905 }, { "epoch": 0.39288188869923785, "grad_norm": 0.10176833719015121, "learning_rate": 2.1911806314078267e-05, "loss": 0.060544490814208984, "step": 2906 }, { "epoch": 0.39301708549507375, "grad_norm": 0.07731485366821289, "learning_rate": 2.1905731091209786e-05, "loss": 0.0642235279083252, "step": 2907 }, { "epoch": 0.3931522822909097, "grad_norm": 0.20840750634670258, "learning_rate": 2.1899654430531262e-05, "loss": 0.09018063545227051, "step": 2908 }, { "epoch": 0.39328747908674566, "grad_norm": 0.1256875842809677, "learning_rate": 2.18935763333079e-05, "loss": 0.04741537570953369, "step": 2909 }, { "epoch": 0.39342267588258156, "grad_norm": 0.09193024039268494, "learning_rate": 2.1887496800805175e-05, "loss": 0.07258749008178711, "step": 2910 }, { "epoch": 0.3935578726784175, "grad_norm": 0.05182574689388275, "learning_rate": 2.188141583428889e-05, "loss": 0.05204415321350098, "step": 2911 }, { "epoch": 0.3936930694742535, "grad_norm": 0.07833902537822723, "learning_rate": 2.1875333435025138e-05, "loss": 0.06772053241729736, "step": 2912 }, { "epoch": 0.3938282662700894, "grad_norm": 0.20408417284488678, "learning_rate": 2.1869249604280296e-05, "loss": 0.056893348693847656, "step": 2913 }, { "epoch": 0.39396346306592533, "grad_norm": 0.1315690279006958, "learning_rate": 2.1863164343321057e-05, "loss": 0.061517953872680664, "step": 2914 }, { "epoch": 0.3940986598617613, "grad_norm": 0.048998698592185974, "learning_rate": 2.1857077653414397e-05, "loss": 0.04927492141723633, "step": 2915 }, { "epoch": 0.3942338566575972, "grad_norm": 0.10806699842214584, "learning_rate": 2.185098953582761e-05, "loss": 0.06411170959472656, "step": 2916 }, { "epoch": 0.39436905345343315, "grad_norm": 0.07757361978292465, "learning_rate": 2.1844899991828265e-05, "loss": 0.06878209114074707, "step": 2917 }, { "epoch": 0.3945042502492691, "grad_norm": 0.0717562660574913, "learning_rate": 2.1838809022684247e-05, "loss": 0.07520771026611328, "step": 2918 }, { "epoch": 0.394639447045105, "grad_norm": 0.07770061492919922, "learning_rate": 2.1832716629663712e-05, "loss": 0.0619356632232666, "step": 2919 }, { "epoch": 0.39477464384094096, "grad_norm": 0.19647294282913208, "learning_rate": 2.1826622814035138e-05, "loss": 0.09703421592712402, "step": 2920 }, { "epoch": 0.3949098406367769, "grad_norm": 0.21972203254699707, "learning_rate": 2.1820527577067293e-05, "loss": 0.06958174705505371, "step": 2921 }, { "epoch": 0.3950450374326128, "grad_norm": 0.15243977308273315, "learning_rate": 2.1814430920029238e-05, "loss": 0.07746529579162598, "step": 2922 }, { "epoch": 0.3951802342284488, "grad_norm": 0.15289008617401123, "learning_rate": 2.1808332844190325e-05, "loss": 0.05689287185668945, "step": 2923 }, { "epoch": 0.39531543102428474, "grad_norm": 0.0488365963101387, "learning_rate": 2.1802233350820203e-05, "loss": 0.047292470932006836, "step": 2924 }, { "epoch": 0.39545062782012064, "grad_norm": 0.10398261994123459, "learning_rate": 2.179613244118883e-05, "loss": 0.06886279582977295, "step": 2925 }, { "epoch": 0.3955858246159566, "grad_norm": 0.09976505488157272, "learning_rate": 2.1790030116566436e-05, "loss": 0.03988444805145264, "step": 2926 }, { "epoch": 0.39572102141179255, "grad_norm": 0.09861832857131958, "learning_rate": 2.1783926378223563e-05, "loss": 0.075400710105896, "step": 2927 }, { "epoch": 0.39585621820762845, "grad_norm": 0.16030851006507874, "learning_rate": 2.1777821227431048e-05, "loss": 0.05587649345397949, "step": 2928 }, { "epoch": 0.3959914150034644, "grad_norm": 0.061079803854227066, "learning_rate": 2.1771714665460005e-05, "loss": 0.07207679748535156, "step": 2929 }, { "epoch": 0.39612661179930037, "grad_norm": 0.29675906896591187, "learning_rate": 2.1765606693581857e-05, "loss": 0.10237550735473633, "step": 2930 }, { "epoch": 0.3962618085951363, "grad_norm": 0.1858254224061966, "learning_rate": 2.1759497313068316e-05, "loss": 0.07250165939331055, "step": 2931 }, { "epoch": 0.3963970053909722, "grad_norm": 0.0911153256893158, "learning_rate": 2.175338652519139e-05, "loss": 0.08556985855102539, "step": 2932 }, { "epoch": 0.3965322021868082, "grad_norm": 0.10770085453987122, "learning_rate": 2.1747274331223377e-05, "loss": 0.08372950553894043, "step": 2933 }, { "epoch": 0.39666739898264414, "grad_norm": 0.07733134180307388, "learning_rate": 2.1741160732436865e-05, "loss": 0.044725894927978516, "step": 2934 }, { "epoch": 0.39680259577848004, "grad_norm": 0.2189149707555771, "learning_rate": 2.1735045730104746e-05, "loss": 0.07374417781829834, "step": 2935 }, { "epoch": 0.396937792574316, "grad_norm": 0.12623330950737, "learning_rate": 2.1728929325500183e-05, "loss": 0.08131217956542969, "step": 2936 }, { "epoch": 0.39707298937015195, "grad_norm": 0.08285139501094818, "learning_rate": 2.1722811519896654e-05, "loss": 0.08355093002319336, "step": 2937 }, { "epoch": 0.39720818616598785, "grad_norm": 0.08076751232147217, "learning_rate": 2.171669231456792e-05, "loss": 0.07858467102050781, "step": 2938 }, { "epoch": 0.3973433829618238, "grad_norm": 0.051190897822380066, "learning_rate": 2.1710571710788025e-05, "loss": 0.0547715425491333, "step": 2939 }, { "epoch": 0.39747857975765977, "grad_norm": 0.07591031491756439, "learning_rate": 2.1704449709831312e-05, "loss": 0.07062768936157227, "step": 2940 }, { "epoch": 0.39761377655349567, "grad_norm": 0.12319187074899673, "learning_rate": 2.1698326312972423e-05, "loss": 0.05763721466064453, "step": 2941 }, { "epoch": 0.3977489733493316, "grad_norm": 0.07787314802408218, "learning_rate": 2.1692201521486268e-05, "loss": 0.045623779296875, "step": 2942 }, { "epoch": 0.3978841701451676, "grad_norm": 0.1041032075881958, "learning_rate": 2.1686075336648075e-05, "loss": 0.07411837577819824, "step": 2943 }, { "epoch": 0.3980193669410035, "grad_norm": 0.09960998594760895, "learning_rate": 2.167994775973334e-05, "loss": 0.1112370491027832, "step": 2944 }, { "epoch": 0.39815456373683944, "grad_norm": 0.03396177664399147, "learning_rate": 2.167381879201786e-05, "loss": 0.03413820266723633, "step": 2945 }, { "epoch": 0.3982897605326754, "grad_norm": 0.16394257545471191, "learning_rate": 2.166768843477772e-05, "loss": 0.07999277114868164, "step": 2946 }, { "epoch": 0.3984249573285113, "grad_norm": 0.11818178743124008, "learning_rate": 2.166155668928929e-05, "loss": 0.05094313621520996, "step": 2947 }, { "epoch": 0.39856015412434725, "grad_norm": 0.05330347269773483, "learning_rate": 2.1655423556829233e-05, "loss": 0.06194496154785156, "step": 2948 }, { "epoch": 0.3986953509201832, "grad_norm": 0.05445524677634239, "learning_rate": 2.1649289038674504e-05, "loss": 0.06589770317077637, "step": 2949 }, { "epoch": 0.3988305477160191, "grad_norm": 0.11717861145734787, "learning_rate": 2.1643153136102333e-05, "loss": 0.08285784721374512, "step": 2950 }, { "epoch": 0.39896574451185507, "grad_norm": 0.09164632111787796, "learning_rate": 2.1637015850390255e-05, "loss": 0.0628896951675415, "step": 2951 }, { "epoch": 0.399100941307691, "grad_norm": 0.15552611649036407, "learning_rate": 2.1630877182816087e-05, "loss": 0.12502717971801758, "step": 2952 }, { "epoch": 0.3992361381035269, "grad_norm": 0.04887017980217934, "learning_rate": 2.162473713465793e-05, "loss": 0.05484938621520996, "step": 2953 }, { "epoch": 0.3993713348993629, "grad_norm": 0.1604534387588501, "learning_rate": 2.161859570719417e-05, "loss": 0.07022333145141602, "step": 2954 }, { "epoch": 0.39950653169519884, "grad_norm": 0.08292780071496964, "learning_rate": 2.161245290170349e-05, "loss": 0.06511497497558594, "step": 2955 }, { "epoch": 0.39964172849103474, "grad_norm": 0.09012793749570847, "learning_rate": 2.1606308719464858e-05, "loss": 0.07941818237304688, "step": 2956 }, { "epoch": 0.3997769252868707, "grad_norm": 0.06618606299161911, "learning_rate": 2.160016316175752e-05, "loss": 0.07085943222045898, "step": 2957 }, { "epoch": 0.39991212208270666, "grad_norm": 0.1177380159497261, "learning_rate": 2.159401622986101e-05, "loss": 0.0714254379272461, "step": 2958 }, { "epoch": 0.40004731887854256, "grad_norm": 0.18334154784679413, "learning_rate": 2.1587867925055165e-05, "loss": 0.0803985595703125, "step": 2959 }, { "epoch": 0.4001825156743785, "grad_norm": 0.12085236608982086, "learning_rate": 2.158171824862008e-05, "loss": 0.07714378833770752, "step": 2960 }, { "epoch": 0.40031771247021447, "grad_norm": 0.08710359036922455, "learning_rate": 2.157556720183616e-05, "loss": 0.04966241121292114, "step": 2961 }, { "epoch": 0.40045290926605037, "grad_norm": 0.05405594781041145, "learning_rate": 2.156941478598409e-05, "loss": 0.036066532135009766, "step": 2962 }, { "epoch": 0.40058810606188633, "grad_norm": 0.13081663846969604, "learning_rate": 2.156326100234482e-05, "loss": 0.0903012752532959, "step": 2963 }, { "epoch": 0.4007233028577223, "grad_norm": 0.08093271404504776, "learning_rate": 2.1557105852199612e-05, "loss": 0.10352659225463867, "step": 2964 }, { "epoch": 0.4008584996535582, "grad_norm": 0.07003793865442276, "learning_rate": 2.155094933683e-05, "loss": 0.0554126501083374, "step": 2965 }, { "epoch": 0.40099369644939414, "grad_norm": 0.1738732010126114, "learning_rate": 2.1544791457517802e-05, "loss": 0.07842874526977539, "step": 2966 }, { "epoch": 0.4011288932452301, "grad_norm": 0.06995101273059845, "learning_rate": 2.1538632215545126e-05, "loss": 0.06881844997406006, "step": 2967 }, { "epoch": 0.401264090041066, "grad_norm": 0.03176918253302574, "learning_rate": 2.153247161219435e-05, "loss": 0.03398263454437256, "step": 2968 }, { "epoch": 0.40139928683690196, "grad_norm": 0.07758156210184097, "learning_rate": 2.1526309648748147e-05, "loss": 0.04697573184967041, "step": 2969 }, { "epoch": 0.4015344836327379, "grad_norm": 0.05357353761792183, "learning_rate": 2.1520146326489476e-05, "loss": 0.049936771392822266, "step": 2970 }, { "epoch": 0.4016696804285738, "grad_norm": 0.08340733498334885, "learning_rate": 2.151398164670157e-05, "loss": 0.06646072864532471, "step": 2971 }, { "epoch": 0.4018048772244098, "grad_norm": 0.14851519465446472, "learning_rate": 2.1507815610667948e-05, "loss": 0.0814967155456543, "step": 2972 }, { "epoch": 0.40194007402024573, "grad_norm": 0.08107221871614456, "learning_rate": 2.1501648219672407e-05, "loss": 0.059036970138549805, "step": 2973 }, { "epoch": 0.40207527081608163, "grad_norm": 0.074784055352211, "learning_rate": 2.149547947499904e-05, "loss": 0.04662632942199707, "step": 2974 }, { "epoch": 0.4022104676119176, "grad_norm": 0.22925560176372528, "learning_rate": 2.1489309377932212e-05, "loss": 0.07279229164123535, "step": 2975 }, { "epoch": 0.40234566440775354, "grad_norm": 0.06163569912314415, "learning_rate": 2.1483137929756562e-05, "loss": 0.06318807601928711, "step": 2976 }, { "epoch": 0.4024808612035895, "grad_norm": 0.15652644634246826, "learning_rate": 2.147696513175702e-05, "loss": 0.07147181034088135, "step": 2977 }, { "epoch": 0.4026160579994254, "grad_norm": 0.14739644527435303, "learning_rate": 2.1470790985218804e-05, "loss": 0.04479563236236572, "step": 2978 }, { "epoch": 0.40275125479526136, "grad_norm": 0.0683426484465599, "learning_rate": 2.1464615491427393e-05, "loss": 0.06860971450805664, "step": 2979 }, { "epoch": 0.4028864515910973, "grad_norm": 0.20358186960220337, "learning_rate": 2.1458438651668567e-05, "loss": 0.05639910697937012, "step": 2980 }, { "epoch": 0.4030216483869332, "grad_norm": 0.07618564367294312, "learning_rate": 2.1452260467228376e-05, "loss": 0.07398104667663574, "step": 2981 }, { "epoch": 0.4031568451827692, "grad_norm": 0.10755223780870438, "learning_rate": 2.144608093939314e-05, "loss": 0.08925628662109375, "step": 2982 }, { "epoch": 0.40329204197860513, "grad_norm": 0.10787951201200485, "learning_rate": 2.1439900069449483e-05, "loss": 0.06861186027526855, "step": 2983 }, { "epoch": 0.40342723877444103, "grad_norm": 0.12498243898153305, "learning_rate": 2.1433717858684286e-05, "loss": 0.0492253303527832, "step": 2984 }, { "epoch": 0.403562435570277, "grad_norm": 0.15943868458271027, "learning_rate": 2.1427534308384724e-05, "loss": 0.07301950454711914, "step": 2985 }, { "epoch": 0.40369763236611295, "grad_norm": 0.31179332733154297, "learning_rate": 2.1421349419838245e-05, "loss": 0.10101604461669922, "step": 2986 }, { "epoch": 0.40383282916194885, "grad_norm": 0.06472857296466827, "learning_rate": 2.1415163194332574e-05, "loss": 0.0504913330078125, "step": 2987 }, { "epoch": 0.4039680259577848, "grad_norm": 0.10403521358966827, "learning_rate": 2.1408975633155715e-05, "loss": 0.06492233276367188, "step": 2988 }, { "epoch": 0.40410322275362076, "grad_norm": 0.22043445706367493, "learning_rate": 2.140278673759595e-05, "loss": 0.064263254404068, "step": 2989 }, { "epoch": 0.40423841954945666, "grad_norm": 0.024387631565332413, "learning_rate": 2.1396596508941847e-05, "loss": 0.023431196808815002, "step": 2990 }, { "epoch": 0.4043736163452926, "grad_norm": 0.09069811552762985, "learning_rate": 2.1390404948482238e-05, "loss": 0.057845115661621094, "step": 2991 }, { "epoch": 0.4045088131411286, "grad_norm": 0.0715799331665039, "learning_rate": 2.1384212057506243e-05, "loss": 0.06095367670059204, "step": 2992 }, { "epoch": 0.4046440099369645, "grad_norm": 0.10685287415981293, "learning_rate": 2.137801783730325e-05, "loss": 0.07075166702270508, "step": 2993 }, { "epoch": 0.40477920673280043, "grad_norm": 0.09337018430233002, "learning_rate": 2.137182228916293e-05, "loss": 0.06181478500366211, "step": 2994 }, { "epoch": 0.4049144035286364, "grad_norm": 0.09772127866744995, "learning_rate": 2.136562541437523e-05, "loss": 0.06584453582763672, "step": 2995 }, { "epoch": 0.4050496003244723, "grad_norm": 0.09671685099601746, "learning_rate": 2.135942721423038e-05, "loss": 0.06240963935852051, "step": 2996 }, { "epoch": 0.40518479712030825, "grad_norm": 0.03251372277736664, "learning_rate": 2.1353227690018865e-05, "loss": 0.03559398651123047, "step": 2997 }, { "epoch": 0.4053199939161442, "grad_norm": 0.07866518944501877, "learning_rate": 2.1347026843031467e-05, "loss": 0.05701172351837158, "step": 2998 }, { "epoch": 0.4054551907119801, "grad_norm": 0.061463821679353714, "learning_rate": 2.1340824674559238e-05, "loss": 0.06513738632202148, "step": 2999 }, { "epoch": 0.40559038750781606, "grad_norm": 0.06971432268619537, "learning_rate": 2.133462118589349e-05, "loss": 0.05636489391326904, "step": 3000 }, { "epoch": 0.405725584303652, "grad_norm": 0.08442901819944382, "learning_rate": 2.1328416378325837e-05, "loss": 0.0807790756225586, "step": 3001 }, { "epoch": 0.4058607810994879, "grad_norm": 0.07448405772447586, "learning_rate": 2.1322210253148144e-05, "loss": 0.05698251724243164, "step": 3002 }, { "epoch": 0.4059959778953239, "grad_norm": 0.05302705615758896, "learning_rate": 2.131600281165257e-05, "loss": 0.041414737701416016, "step": 3003 }, { "epoch": 0.40613117469115984, "grad_norm": 0.18194712698459625, "learning_rate": 2.130979405513152e-05, "loss": 0.07084739208221436, "step": 3004 }, { "epoch": 0.40626637148699574, "grad_norm": 0.08297908306121826, "learning_rate": 2.1303583984877697e-05, "loss": 0.05286884307861328, "step": 3005 }, { "epoch": 0.4064015682828317, "grad_norm": 0.10639271140098572, "learning_rate": 2.1297372602184085e-05, "loss": 0.08077192306518555, "step": 3006 }, { "epoch": 0.40653676507866765, "grad_norm": 0.14989924430847168, "learning_rate": 2.1291159908343907e-05, "loss": 0.06470870971679688, "step": 3007 }, { "epoch": 0.40667196187450355, "grad_norm": 0.07326214015483856, "learning_rate": 2.1284945904650693e-05, "loss": 0.04383516311645508, "step": 3008 }, { "epoch": 0.4068071586703395, "grad_norm": 0.12765485048294067, "learning_rate": 2.127873059239822e-05, "loss": 0.06995940208435059, "step": 3009 }, { "epoch": 0.40694235546617546, "grad_norm": 0.13427044451236725, "learning_rate": 2.127251397288056e-05, "loss": 0.081390380859375, "step": 3010 }, { "epoch": 0.40707755226201137, "grad_norm": 0.062342721968889236, "learning_rate": 2.126629604739204e-05, "loss": 0.06158982962369919, "step": 3011 }, { "epoch": 0.4072127490578473, "grad_norm": 0.10195564478635788, "learning_rate": 2.1260076817227268e-05, "loss": 0.09209346771240234, "step": 3012 }, { "epoch": 0.4073479458536833, "grad_norm": 0.07800912111997604, "learning_rate": 2.1253856283681122e-05, "loss": 0.03464120626449585, "step": 3013 }, { "epoch": 0.4074831426495192, "grad_norm": 0.08662211149930954, "learning_rate": 2.1247634448048743e-05, "loss": 0.05074167251586914, "step": 3014 }, { "epoch": 0.40761833944535514, "grad_norm": 0.07274608314037323, "learning_rate": 2.1241411311625562e-05, "loss": 0.05540144443511963, "step": 3015 }, { "epoch": 0.4077535362411911, "grad_norm": 0.08997748792171478, "learning_rate": 2.1235186875707257e-05, "loss": 0.04934501647949219, "step": 3016 }, { "epoch": 0.407888733037027, "grad_norm": 0.09159883111715317, "learning_rate": 2.1228961141589797e-05, "loss": 0.06996488571166992, "step": 3017 }, { "epoch": 0.40802392983286295, "grad_norm": 0.12758347392082214, "learning_rate": 2.122273411056941e-05, "loss": 0.05107855796813965, "step": 3018 }, { "epoch": 0.4081591266286989, "grad_norm": 0.09249735623598099, "learning_rate": 2.1216505783942592e-05, "loss": 0.06343221664428711, "step": 3019 }, { "epoch": 0.40829432342453487, "grad_norm": 0.056661177426576614, "learning_rate": 2.121027616300613e-05, "loss": 0.04540300369262695, "step": 3020 }, { "epoch": 0.40842952022037077, "grad_norm": 0.07007092237472534, "learning_rate": 2.1204045249057043e-05, "loss": 0.043569087982177734, "step": 3021 }, { "epoch": 0.4085647170162067, "grad_norm": 0.09861729294061661, "learning_rate": 2.119781304339266e-05, "loss": 0.041123151779174805, "step": 3022 }, { "epoch": 0.4086999138120427, "grad_norm": 0.18184809386730194, "learning_rate": 2.1191579547310547e-05, "loss": 0.11762666702270508, "step": 3023 }, { "epoch": 0.4088351106078786, "grad_norm": 0.09003068506717682, "learning_rate": 2.1185344762108556e-05, "loss": 0.0914621353149414, "step": 3024 }, { "epoch": 0.40897030740371454, "grad_norm": 0.09968771040439606, "learning_rate": 2.11791086890848e-05, "loss": 0.10293006896972656, "step": 3025 }, { "epoch": 0.4091055041995505, "grad_norm": 0.08220849186182022, "learning_rate": 2.1172871329537662e-05, "loss": 0.0676412582397461, "step": 3026 }, { "epoch": 0.4092407009953864, "grad_norm": 0.12748676538467407, "learning_rate": 2.1166632684765794e-05, "loss": 0.05331754684448242, "step": 3027 }, { "epoch": 0.40937589779122235, "grad_norm": 0.1278143674135208, "learning_rate": 2.1160392756068124e-05, "loss": 0.054271697998046875, "step": 3028 }, { "epoch": 0.4095110945870583, "grad_norm": 0.054544832557439804, "learning_rate": 2.1154151544743826e-05, "loss": 0.05579090118408203, "step": 3029 }, { "epoch": 0.4096462913828942, "grad_norm": 0.20833535492420197, "learning_rate": 2.114790905209236e-05, "loss": 0.09582304954528809, "step": 3030 }, { "epoch": 0.40978148817873017, "grad_norm": 0.15027658641338348, "learning_rate": 2.1141665279413444e-05, "loss": 0.10325431823730469, "step": 3031 }, { "epoch": 0.4099166849745661, "grad_norm": 0.10712064057588577, "learning_rate": 2.1135420228007062e-05, "loss": 0.0791773796081543, "step": 3032 }, { "epoch": 0.410051881770402, "grad_norm": 0.159246027469635, "learning_rate": 2.1129173899173474e-05, "loss": 0.10382843017578125, "step": 3033 }, { "epoch": 0.410187078566238, "grad_norm": 0.1293485313653946, "learning_rate": 2.11229262942132e-05, "loss": 0.09114742279052734, "step": 3034 }, { "epoch": 0.41032227536207394, "grad_norm": 0.06148974969983101, "learning_rate": 2.1116677414427008e-05, "loss": 0.07535743713378906, "step": 3035 }, { "epoch": 0.41045747215790984, "grad_norm": 0.17812968790531158, "learning_rate": 2.1110427261115972e-05, "loss": 0.08387422561645508, "step": 3036 }, { "epoch": 0.4105926689537458, "grad_norm": 0.11709120124578476, "learning_rate": 2.1104175835581386e-05, "loss": 0.0662384033203125, "step": 3037 }, { "epoch": 0.41072786574958176, "grad_norm": 0.041039351373910904, "learning_rate": 2.1097923139124846e-05, "loss": 0.03850674629211426, "step": 3038 }, { "epoch": 0.41086306254541766, "grad_norm": 0.0684625580906868, "learning_rate": 2.109166917304819e-05, "loss": 0.0749208927154541, "step": 3039 }, { "epoch": 0.4109982593412536, "grad_norm": 0.05245031788945198, "learning_rate": 2.1085413938653532e-05, "loss": 0.06904721260070801, "step": 3040 }, { "epoch": 0.41113345613708957, "grad_norm": 0.08892738819122314, "learning_rate": 2.107915743724323e-05, "loss": 0.07693004608154297, "step": 3041 }, { "epoch": 0.41126865293292547, "grad_norm": 0.11836464703083038, "learning_rate": 2.1072899670119935e-05, "loss": 0.07848429679870605, "step": 3042 }, { "epoch": 0.41140384972876143, "grad_norm": 0.09845896810293198, "learning_rate": 2.1066640638586543e-05, "loss": 0.09078264236450195, "step": 3043 }, { "epoch": 0.4115390465245974, "grad_norm": 0.04389701783657074, "learning_rate": 2.1060380343946223e-05, "loss": 0.04076647758483887, "step": 3044 }, { "epoch": 0.4116742433204333, "grad_norm": 0.07464593648910522, "learning_rate": 2.10541187875024e-05, "loss": 0.04718589782714844, "step": 3045 }, { "epoch": 0.41180944011626924, "grad_norm": 0.13523316383361816, "learning_rate": 2.1047855970558753e-05, "loss": 0.07450461387634277, "step": 3046 }, { "epoch": 0.4119446369121052, "grad_norm": 0.0772295892238617, "learning_rate": 2.1041591894419244e-05, "loss": 0.05384349822998047, "step": 3047 }, { "epoch": 0.4120798337079411, "grad_norm": 0.06032535433769226, "learning_rate": 2.1035326560388087e-05, "loss": 0.06676864624023438, "step": 3048 }, { "epoch": 0.41221503050377706, "grad_norm": 0.09851662069559097, "learning_rate": 2.1029059969769756e-05, "loss": 0.08440446853637695, "step": 3049 }, { "epoch": 0.412350227299613, "grad_norm": 0.0773877426981926, "learning_rate": 2.1022792123868986e-05, "loss": 0.05698060989379883, "step": 3050 }, { "epoch": 0.4124854240954489, "grad_norm": 0.19656893610954285, "learning_rate": 2.1016523023990783e-05, "loss": 0.10703563690185547, "step": 3051 }, { "epoch": 0.4126206208912849, "grad_norm": 0.07784394919872284, "learning_rate": 2.1010252671440398e-05, "loss": 0.06422150135040283, "step": 3052 }, { "epoch": 0.41275581768712083, "grad_norm": 0.05273876711726189, "learning_rate": 2.1003981067523358e-05, "loss": 0.04862689971923828, "step": 3053 }, { "epoch": 0.41289101448295673, "grad_norm": 0.11838555335998535, "learning_rate": 2.099770821354544e-05, "loss": 0.08938407897949219, "step": 3054 }, { "epoch": 0.4130262112787927, "grad_norm": 0.057645510882139206, "learning_rate": 2.0991434110812692e-05, "loss": 0.05691719055175781, "step": 3055 }, { "epoch": 0.41316140807462864, "grad_norm": 0.04171757772564888, "learning_rate": 2.0985158760631415e-05, "loss": 0.0449824333190918, "step": 3056 }, { "epoch": 0.41329660487046455, "grad_norm": 0.2051403671503067, "learning_rate": 2.0978882164308157e-05, "loss": 0.10453391075134277, "step": 3057 }, { "epoch": 0.4134318016663005, "grad_norm": 0.17624957859516144, "learning_rate": 2.0972604323149755e-05, "loss": 0.0975494384765625, "step": 3058 }, { "epoch": 0.41356699846213646, "grad_norm": 0.060458704829216, "learning_rate": 2.0966325238463283e-05, "loss": 0.061492919921875, "step": 3059 }, { "epoch": 0.41370219525797236, "grad_norm": 0.039016854017972946, "learning_rate": 2.096004491155608e-05, "loss": 0.0387115478515625, "step": 3060 }, { "epoch": 0.4138373920538083, "grad_norm": 0.05725531652569771, "learning_rate": 2.0953763343735746e-05, "loss": 0.06633472442626953, "step": 3061 }, { "epoch": 0.4139725888496443, "grad_norm": 0.1099138855934143, "learning_rate": 2.0947480536310133e-05, "loss": 0.03399014472961426, "step": 3062 }, { "epoch": 0.4141077856454802, "grad_norm": 0.1203671395778656, "learning_rate": 2.0941196490587352e-05, "loss": 0.04614830017089844, "step": 3063 }, { "epoch": 0.41424298244131613, "grad_norm": 0.08886481821537018, "learning_rate": 2.0934911207875782e-05, "loss": 0.05822920799255371, "step": 3064 }, { "epoch": 0.4143781792371521, "grad_norm": 0.05011489987373352, "learning_rate": 2.092862468948405e-05, "loss": 0.05697751045227051, "step": 3065 }, { "epoch": 0.41451337603298805, "grad_norm": 0.1723695546388626, "learning_rate": 2.0922336936721044e-05, "loss": 0.07545614242553711, "step": 3066 }, { "epoch": 0.41464857282882395, "grad_norm": 0.11021208763122559, "learning_rate": 2.0916047950895907e-05, "loss": 0.1034536361694336, "step": 3067 }, { "epoch": 0.4147837696246599, "grad_norm": 0.05389266088604927, "learning_rate": 2.0909757733318035e-05, "loss": 0.03635275363922119, "step": 3068 }, { "epoch": 0.41491896642049586, "grad_norm": 0.15880133211612701, "learning_rate": 2.090346628529709e-05, "loss": 0.08521342277526855, "step": 3069 }, { "epoch": 0.41505416321633176, "grad_norm": 0.05554840713739395, "learning_rate": 2.089717360814298e-05, "loss": 0.056429386138916016, "step": 3070 }, { "epoch": 0.4151893600121677, "grad_norm": 0.15616926550865173, "learning_rate": 2.0890879703165885e-05, "loss": 0.11056041717529297, "step": 3071 }, { "epoch": 0.4153245568080037, "grad_norm": 0.11596349626779556, "learning_rate": 2.0884584571676217e-05, "loss": 0.04823112487792969, "step": 3072 }, { "epoch": 0.4154597536038396, "grad_norm": 0.07576422393321991, "learning_rate": 2.0878288214984657e-05, "loss": 0.07398319244384766, "step": 3073 }, { "epoch": 0.41559495039967553, "grad_norm": 0.06998582184314728, "learning_rate": 2.0871990634402147e-05, "loss": 0.06373214721679688, "step": 3074 }, { "epoch": 0.4157301471955115, "grad_norm": 0.07030367106199265, "learning_rate": 2.0865691831239877e-05, "loss": 0.06789588928222656, "step": 3075 }, { "epoch": 0.4158653439913474, "grad_norm": 0.12691596150398254, "learning_rate": 2.0859391806809285e-05, "loss": 0.06635773181915283, "step": 3076 }, { "epoch": 0.41600054078718335, "grad_norm": 0.11216427385807037, "learning_rate": 2.0853090562422072e-05, "loss": 0.09018993377685547, "step": 3077 }, { "epoch": 0.4161357375830193, "grad_norm": 0.07276362180709839, "learning_rate": 2.084678809939019e-05, "loss": 0.06460797786712646, "step": 3078 }, { "epoch": 0.4162709343788552, "grad_norm": 0.057856395840644836, "learning_rate": 2.084048441902585e-05, "loss": 0.07272624969482422, "step": 3079 }, { "epoch": 0.41640613117469116, "grad_norm": 0.08151490241289139, "learning_rate": 2.0834179522641508e-05, "loss": 0.05999279022216797, "step": 3080 }, { "epoch": 0.4165413279705271, "grad_norm": 0.07579455524682999, "learning_rate": 2.0827873411549877e-05, "loss": 0.054164767265319824, "step": 3081 }, { "epoch": 0.416676524766363, "grad_norm": 0.04379725083708763, "learning_rate": 2.0821566087063926e-05, "loss": 0.04554009437561035, "step": 3082 }, { "epoch": 0.416811721562199, "grad_norm": 0.04158473014831543, "learning_rate": 2.081525755049687e-05, "loss": 0.04493856430053711, "step": 3083 }, { "epoch": 0.41694691835803493, "grad_norm": 0.06890937685966492, "learning_rate": 2.0808947803162182e-05, "loss": 0.0759425163269043, "step": 3084 }, { "epoch": 0.41708211515387084, "grad_norm": 0.12793216109275818, "learning_rate": 2.0802636846373578e-05, "loss": 0.060241103172302246, "step": 3085 }, { "epoch": 0.4172173119497068, "grad_norm": 0.06333158165216446, "learning_rate": 2.0796324681445045e-05, "loss": 0.05533003807067871, "step": 3086 }, { "epoch": 0.41735250874554275, "grad_norm": 0.04647061973810196, "learning_rate": 2.0790011309690806e-05, "loss": 0.05628824234008789, "step": 3087 }, { "epoch": 0.41748770554137865, "grad_norm": 0.08324585109949112, "learning_rate": 2.0783696732425332e-05, "loss": 0.07022237777709961, "step": 3088 }, { "epoch": 0.4176229023372146, "grad_norm": 0.08278854936361313, "learning_rate": 2.0777380950963355e-05, "loss": 0.05677366256713867, "step": 3089 }, { "epoch": 0.41775809913305056, "grad_norm": 0.11014974862337112, "learning_rate": 2.0771063966619854e-05, "loss": 0.059134483337402344, "step": 3090 }, { "epoch": 0.41789329592888647, "grad_norm": 0.0685177892446518, "learning_rate": 2.0764745780710065e-05, "loss": 0.06299197673797607, "step": 3091 }, { "epoch": 0.4180284927247224, "grad_norm": 0.06992512196302414, "learning_rate": 2.075842639454946e-05, "loss": 0.04918193817138672, "step": 3092 }, { "epoch": 0.4181636895205584, "grad_norm": 0.07779309898614883, "learning_rate": 2.075210580945378e-05, "loss": 0.042852044105529785, "step": 3093 }, { "epoch": 0.4182988863163943, "grad_norm": 0.10292708873748779, "learning_rate": 2.0745784026738984e-05, "loss": 0.08461833000183105, "step": 3094 }, { "epoch": 0.41843408311223024, "grad_norm": 0.06557980179786682, "learning_rate": 2.073946104772132e-05, "loss": 0.056448280811309814, "step": 3095 }, { "epoch": 0.4185692799080662, "grad_norm": 0.13443908095359802, "learning_rate": 2.0733136873717258e-05, "loss": 0.0901947021484375, "step": 3096 }, { "epoch": 0.4187044767039021, "grad_norm": 0.09556220471858978, "learning_rate": 2.0726811506043527e-05, "loss": 0.06488943099975586, "step": 3097 }, { "epoch": 0.41883967349973805, "grad_norm": 0.05614485219120979, "learning_rate": 2.0720484946017104e-05, "loss": 0.04627394676208496, "step": 3098 }, { "epoch": 0.418974870295574, "grad_norm": 0.06654493510723114, "learning_rate": 2.0714157194955202e-05, "loss": 0.07684820890426636, "step": 3099 }, { "epoch": 0.4191100670914099, "grad_norm": 0.1605784296989441, "learning_rate": 2.070782825417531e-05, "loss": 0.08035087585449219, "step": 3100 }, { "epoch": 0.41924526388724587, "grad_norm": 0.042471446096897125, "learning_rate": 2.0701498124995127e-05, "loss": 0.037381887435913086, "step": 3101 }, { "epoch": 0.4193804606830818, "grad_norm": 0.08313640207052231, "learning_rate": 2.069516680873264e-05, "loss": 0.08044379949569702, "step": 3102 }, { "epoch": 0.4195156574789177, "grad_norm": 0.08323860913515091, "learning_rate": 2.0688834306706047e-05, "loss": 0.07319927215576172, "step": 3103 }, { "epoch": 0.4196508542747537, "grad_norm": 0.14642830193042755, "learning_rate": 2.0682500620233815e-05, "loss": 0.06599903106689453, "step": 3104 }, { "epoch": 0.41978605107058964, "grad_norm": 0.06447198241949081, "learning_rate": 2.0676165750634656e-05, "loss": 0.06340980529785156, "step": 3105 }, { "epoch": 0.41992124786642554, "grad_norm": 0.11369089037179947, "learning_rate": 2.0669829699227513e-05, "loss": 0.049335360527038574, "step": 3106 }, { "epoch": 0.4200564446622615, "grad_norm": 0.12140129506587982, "learning_rate": 2.06634924673316e-05, "loss": 0.0637502670288086, "step": 3107 }, { "epoch": 0.42019164145809745, "grad_norm": 0.06305120140314102, "learning_rate": 2.0657154056266346e-05, "loss": 0.09333229064941406, "step": 3108 }, { "epoch": 0.42032683825393335, "grad_norm": 0.06813465803861618, "learning_rate": 2.0650814467351452e-05, "loss": 0.06344270706176758, "step": 3109 }, { "epoch": 0.4204620350497693, "grad_norm": 0.06209234148263931, "learning_rate": 2.064447370190685e-05, "loss": 0.06512045860290527, "step": 3110 }, { "epoch": 0.42059723184560527, "grad_norm": 0.07012920081615448, "learning_rate": 2.0638131761252724e-05, "loss": 0.053237199783325195, "step": 3111 }, { "epoch": 0.4207324286414412, "grad_norm": 0.056594476103782654, "learning_rate": 2.06317886467095e-05, "loss": 0.07568359375, "step": 3112 }, { "epoch": 0.4208676254372771, "grad_norm": 0.058603040874004364, "learning_rate": 2.0625444359597847e-05, "loss": 0.05091691017150879, "step": 3113 }, { "epoch": 0.4210028222331131, "grad_norm": 0.109632208943367, "learning_rate": 2.0619098901238684e-05, "loss": 0.11093711853027344, "step": 3114 }, { "epoch": 0.42113801902894904, "grad_norm": 0.11282766610383987, "learning_rate": 2.0612752272953158e-05, "loss": 0.10464859008789062, "step": 3115 }, { "epoch": 0.42127321582478494, "grad_norm": 0.03744662553071976, "learning_rate": 2.060640447606268e-05, "loss": 0.04370832443237305, "step": 3116 }, { "epoch": 0.4214084126206209, "grad_norm": 0.0604991540312767, "learning_rate": 2.0600055511888895e-05, "loss": 0.05349850654602051, "step": 3117 }, { "epoch": 0.42154360941645685, "grad_norm": 0.07266171276569366, "learning_rate": 2.059370538175369e-05, "loss": 0.055145978927612305, "step": 3118 }, { "epoch": 0.42167880621229276, "grad_norm": 0.044544413685798645, "learning_rate": 2.0587354086979194e-05, "loss": 0.06481552124023438, "step": 3119 }, { "epoch": 0.4218140030081287, "grad_norm": 0.0836453065276146, "learning_rate": 2.0581001628887785e-05, "loss": 0.06637883186340332, "step": 3120 }, { "epoch": 0.42194919980396467, "grad_norm": 0.04422401636838913, "learning_rate": 2.057464800880207e-05, "loss": 0.03454303741455078, "step": 3121 }, { "epoch": 0.42208439659980057, "grad_norm": 0.10634706169366837, "learning_rate": 2.0568293228044914e-05, "loss": 0.05202054977416992, "step": 3122 }, { "epoch": 0.4222195933956365, "grad_norm": 0.10487055778503418, "learning_rate": 2.0561937287939413e-05, "loss": 0.056520938873291016, "step": 3123 }, { "epoch": 0.4223547901914725, "grad_norm": 0.044058021157979965, "learning_rate": 2.055558018980891e-05, "loss": 0.04020202159881592, "step": 3124 }, { "epoch": 0.4224899869873084, "grad_norm": 0.03371758759021759, "learning_rate": 2.0549221934976987e-05, "loss": 0.03473806381225586, "step": 3125 }, { "epoch": 0.42262518378314434, "grad_norm": 0.10781261324882507, "learning_rate": 2.054286252476746e-05, "loss": 0.0739436149597168, "step": 3126 }, { "epoch": 0.4227603805789803, "grad_norm": 0.06343436241149902, "learning_rate": 2.05365019605044e-05, "loss": 0.05147576332092285, "step": 3127 }, { "epoch": 0.4228955773748162, "grad_norm": 0.10348464548587799, "learning_rate": 2.053014024351211e-05, "loss": 0.06627631187438965, "step": 3128 }, { "epoch": 0.42303077417065216, "grad_norm": 0.06274273991584778, "learning_rate": 2.0523777375115133e-05, "loss": 0.053835391998291016, "step": 3129 }, { "epoch": 0.4231659709664881, "grad_norm": 0.07987461984157562, "learning_rate": 2.0517413356638245e-05, "loss": 0.05454719066619873, "step": 3130 }, { "epoch": 0.423301167762324, "grad_norm": 0.08394049853086472, "learning_rate": 2.0511048189406472e-05, "loss": 0.04127311706542969, "step": 3131 }, { "epoch": 0.42343636455815997, "grad_norm": 0.08531752228736877, "learning_rate": 2.0504681874745082e-05, "loss": 0.07720017433166504, "step": 3132 }, { "epoch": 0.42357156135399593, "grad_norm": 0.13521486520767212, "learning_rate": 2.049831441397957e-05, "loss": 0.09921646118164062, "step": 3133 }, { "epoch": 0.42370675814983183, "grad_norm": 0.0751941055059433, "learning_rate": 2.0491945808435674e-05, "loss": 0.04781484603881836, "step": 3134 }, { "epoch": 0.4238419549456678, "grad_norm": 0.03657032176852226, "learning_rate": 2.048557605943938e-05, "loss": 0.040822505950927734, "step": 3135 }, { "epoch": 0.42397715174150374, "grad_norm": 0.17417150735855103, "learning_rate": 2.047920516831689e-05, "loss": 0.07330513000488281, "step": 3136 }, { "epoch": 0.42411234853733965, "grad_norm": 0.07247629016637802, "learning_rate": 2.047283313639467e-05, "loss": 0.05410957336425781, "step": 3137 }, { "epoch": 0.4242475453331756, "grad_norm": 0.13214074075222015, "learning_rate": 2.0466459964999408e-05, "loss": 0.06297755241394043, "step": 3138 }, { "epoch": 0.42438274212901156, "grad_norm": 0.0893910676240921, "learning_rate": 2.0460085655458025e-05, "loss": 0.05625462532043457, "step": 3139 }, { "epoch": 0.42451793892484746, "grad_norm": 0.07778891921043396, "learning_rate": 2.0453710209097697e-05, "loss": 0.0769956111907959, "step": 3140 }, { "epoch": 0.4246531357206834, "grad_norm": 0.16175420582294464, "learning_rate": 2.044733362724582e-05, "loss": 0.05966007709503174, "step": 3141 }, { "epoch": 0.4247883325165194, "grad_norm": 0.07387705147266388, "learning_rate": 2.0440955911230028e-05, "loss": 0.03690910339355469, "step": 3142 }, { "epoch": 0.4249235293123553, "grad_norm": 0.11351793259382248, "learning_rate": 2.0434577062378203e-05, "loss": 0.06286334991455078, "step": 3143 }, { "epoch": 0.42505872610819123, "grad_norm": 0.16056591272354126, "learning_rate": 2.0428197082018458e-05, "loss": 0.05270254611968994, "step": 3144 }, { "epoch": 0.4251939229040272, "grad_norm": 0.08143115788698196, "learning_rate": 2.042181597147913e-05, "loss": 0.05297136306762695, "step": 3145 }, { "epoch": 0.4253291196998631, "grad_norm": 0.11065438389778137, "learning_rate": 2.0415433732088806e-05, "loss": 0.08269381523132324, "step": 3146 }, { "epoch": 0.42546431649569905, "grad_norm": 0.053662966936826706, "learning_rate": 2.0409050365176294e-05, "loss": 0.049225687980651855, "step": 3147 }, { "epoch": 0.425599513291535, "grad_norm": 0.18104280531406403, "learning_rate": 2.0402665872070656e-05, "loss": 0.09186267852783203, "step": 3148 }, { "epoch": 0.4257347100873709, "grad_norm": 0.0901026725769043, "learning_rate": 2.0396280254101172e-05, "loss": 0.04329252243041992, "step": 3149 }, { "epoch": 0.42586990688320686, "grad_norm": 0.2154507040977478, "learning_rate": 2.0389893512597364e-05, "loss": 0.07400131225585938, "step": 3150 }, { "epoch": 0.4260051036790428, "grad_norm": 0.16561947762966156, "learning_rate": 2.0383505648888986e-05, "loss": 0.055444300174713135, "step": 3151 }, { "epoch": 0.4261403004748787, "grad_norm": 0.10395420342683792, "learning_rate": 2.037711666430602e-05, "loss": 0.07625532150268555, "step": 3152 }, { "epoch": 0.4262754972707147, "grad_norm": 0.09603693336248398, "learning_rate": 2.0370726560178693e-05, "loss": 0.04709172248840332, "step": 3153 }, { "epoch": 0.42641069406655063, "grad_norm": 0.06602743268013, "learning_rate": 2.036433533783745e-05, "loss": 0.05215644836425781, "step": 3154 }, { "epoch": 0.42654589086238653, "grad_norm": 0.10673072934150696, "learning_rate": 2.0357942998612988e-05, "loss": 0.06460332870483398, "step": 3155 }, { "epoch": 0.4266810876582225, "grad_norm": 0.08656342327594757, "learning_rate": 2.0351549543836224e-05, "loss": 0.07187271118164062, "step": 3156 }, { "epoch": 0.42681628445405845, "grad_norm": 0.11255121976137161, "learning_rate": 2.0345154974838307e-05, "loss": 0.047976016998291016, "step": 3157 }, { "epoch": 0.4269514812498944, "grad_norm": 0.08434443920850754, "learning_rate": 2.0338759292950618e-05, "loss": 0.049024343490600586, "step": 3158 }, { "epoch": 0.4270866780457303, "grad_norm": 0.2571490705013275, "learning_rate": 2.033236249950477e-05, "loss": 0.07988381385803223, "step": 3159 }, { "epoch": 0.42722187484156626, "grad_norm": 0.05089796707034111, "learning_rate": 2.0325964595832618e-05, "loss": 0.06427013874053955, "step": 3160 }, { "epoch": 0.4273570716374022, "grad_norm": 0.1525297909975052, "learning_rate": 2.031956558326624e-05, "loss": 0.08524465560913086, "step": 3161 }, { "epoch": 0.4274922684332381, "grad_norm": 0.0746731162071228, "learning_rate": 2.0313165463137935e-05, "loss": 0.07829952239990234, "step": 3162 }, { "epoch": 0.4276274652290741, "grad_norm": 0.08205119520425797, "learning_rate": 2.030676423678025e-05, "loss": 0.0600128173828125, "step": 3163 }, { "epoch": 0.42776266202491003, "grad_norm": 0.09793049097061157, "learning_rate": 2.030036190552595e-05, "loss": 0.03784537315368652, "step": 3164 }, { "epoch": 0.42789785882074594, "grad_norm": 0.1096339076757431, "learning_rate": 2.029395847070803e-05, "loss": 0.05259275436401367, "step": 3165 }, { "epoch": 0.4280330556165819, "grad_norm": 0.15999886393547058, "learning_rate": 2.0287553933659735e-05, "loss": 0.06393766403198242, "step": 3166 }, { "epoch": 0.42816825241241785, "grad_norm": 0.1600424200296402, "learning_rate": 2.0281148295714512e-05, "loss": 0.0844869613647461, "step": 3167 }, { "epoch": 0.42830344920825375, "grad_norm": 0.16639955341815948, "learning_rate": 2.027474155820605e-05, "loss": 0.06110811233520508, "step": 3168 }, { "epoch": 0.4284386460040897, "grad_norm": 0.06218131259083748, "learning_rate": 2.026833372246827e-05, "loss": 0.09666824340820312, "step": 3169 }, { "epoch": 0.42857384279992566, "grad_norm": 0.058496762067079544, "learning_rate": 2.026192478983531e-05, "loss": 0.05681562423706055, "step": 3170 }, { "epoch": 0.42870903959576157, "grad_norm": 0.10382314026355743, "learning_rate": 2.0255514761641555e-05, "loss": 0.07334446907043457, "step": 3171 }, { "epoch": 0.4288442363915975, "grad_norm": 0.1503184586763382, "learning_rate": 2.0249103639221597e-05, "loss": 0.08085298538208008, "step": 3172 }, { "epoch": 0.4289794331874335, "grad_norm": 0.13700683414936066, "learning_rate": 2.024269142391027e-05, "loss": 0.06449460983276367, "step": 3173 }, { "epoch": 0.4291146299832694, "grad_norm": 0.1608564853668213, "learning_rate": 2.023627811704263e-05, "loss": 0.08159399032592773, "step": 3174 }, { "epoch": 0.42924982677910534, "grad_norm": 0.12306592613458633, "learning_rate": 2.0229863719953963e-05, "loss": 0.07640337944030762, "step": 3175 }, { "epoch": 0.4293850235749413, "grad_norm": 0.0657508447766304, "learning_rate": 2.0223448233979785e-05, "loss": 0.050688326358795166, "step": 3176 }, { "epoch": 0.4295202203707772, "grad_norm": 0.22151495516300201, "learning_rate": 2.0217031660455825e-05, "loss": 0.09482574462890625, "step": 3177 }, { "epoch": 0.42965541716661315, "grad_norm": 0.09951341897249222, "learning_rate": 2.0210614000718054e-05, "loss": 0.0622866153717041, "step": 3178 }, { "epoch": 0.4297906139624491, "grad_norm": 0.12187718600034714, "learning_rate": 2.020419525610266e-05, "loss": 0.08787405490875244, "step": 3179 }, { "epoch": 0.429925810758285, "grad_norm": 0.09063244611024857, "learning_rate": 2.0197775427946066e-05, "loss": 0.09056663513183594, "step": 3180 }, { "epoch": 0.43006100755412097, "grad_norm": 0.04932304844260216, "learning_rate": 2.0191354517584902e-05, "loss": 0.06115436553955078, "step": 3181 }, { "epoch": 0.4301962043499569, "grad_norm": 0.20993757247924805, "learning_rate": 2.018493252635605e-05, "loss": 0.08200836181640625, "step": 3182 }, { "epoch": 0.4303314011457928, "grad_norm": 0.15816952288150787, "learning_rate": 2.0178509455596598e-05, "loss": 0.06627660989761353, "step": 3183 }, { "epoch": 0.4304665979416288, "grad_norm": 0.04997331649065018, "learning_rate": 2.017208530664386e-05, "loss": 0.057770729064941406, "step": 3184 }, { "epoch": 0.43060179473746474, "grad_norm": 0.06886984407901764, "learning_rate": 2.016566008083538e-05, "loss": 0.07120418548583984, "step": 3185 }, { "epoch": 0.43073699153330064, "grad_norm": 0.1024257019162178, "learning_rate": 2.0159233779508923e-05, "loss": 0.07142853736877441, "step": 3186 }, { "epoch": 0.4308721883291366, "grad_norm": 0.06449129432439804, "learning_rate": 2.0152806404002482e-05, "loss": 0.053707122802734375, "step": 3187 }, { "epoch": 0.43100738512497255, "grad_norm": 0.1538398265838623, "learning_rate": 2.014637795565427e-05, "loss": 0.08022022247314453, "step": 3188 }, { "epoch": 0.43114258192080845, "grad_norm": 0.1447172462940216, "learning_rate": 2.0139948435802722e-05, "loss": 0.0802949070930481, "step": 3189 }, { "epoch": 0.4312777787166444, "grad_norm": 0.05420884117484093, "learning_rate": 2.0133517845786504e-05, "loss": 0.06741786003112793, "step": 3190 }, { "epoch": 0.43141297551248037, "grad_norm": 0.0832863375544548, "learning_rate": 2.012708618694449e-05, "loss": 0.07708120346069336, "step": 3191 }, { "epoch": 0.43154817230831627, "grad_norm": 0.046726614236831665, "learning_rate": 2.0120653460615795e-05, "loss": 0.051119327545166016, "step": 3192 }, { "epoch": 0.4316833691041522, "grad_norm": 0.06133299320936203, "learning_rate": 2.011421966813974e-05, "loss": 0.04952883720397949, "step": 3193 }, { "epoch": 0.4318185658999882, "grad_norm": 0.10175460577011108, "learning_rate": 2.0107784810855882e-05, "loss": 0.06792628765106201, "step": 3194 }, { "epoch": 0.4319537626958241, "grad_norm": 0.0926133245229721, "learning_rate": 2.0101348890103985e-05, "loss": 0.054627180099487305, "step": 3195 }, { "epoch": 0.43208895949166004, "grad_norm": 0.05594244226813316, "learning_rate": 2.0094911907224043e-05, "loss": 0.06363487243652344, "step": 3196 }, { "epoch": 0.432224156287496, "grad_norm": 0.1287541687488556, "learning_rate": 2.008847386355628e-05, "loss": 0.07038569450378418, "step": 3197 }, { "epoch": 0.4323593530833319, "grad_norm": 0.1956535279750824, "learning_rate": 2.008203476044112e-05, "loss": 0.07847857475280762, "step": 3198 }, { "epoch": 0.43249454987916786, "grad_norm": 0.13023318350315094, "learning_rate": 2.007559459921922e-05, "loss": 0.07769918441772461, "step": 3199 }, { "epoch": 0.4326297466750038, "grad_norm": 0.12397795170545578, "learning_rate": 2.0069153381231456e-05, "loss": 0.060346126556396484, "step": 3200 }, { "epoch": 0.4327649434708397, "grad_norm": 0.08094791322946548, "learning_rate": 2.0062711107818933e-05, "loss": 0.06024932861328125, "step": 3201 }, { "epoch": 0.43290014026667567, "grad_norm": 0.0785946398973465, "learning_rate": 2.0056267780322953e-05, "loss": 0.046599388122558594, "step": 3202 }, { "epoch": 0.4330353370625116, "grad_norm": 0.08903083950281143, "learning_rate": 2.004982340008506e-05, "loss": 0.05499982833862305, "step": 3203 }, { "epoch": 0.4331705338583476, "grad_norm": 0.1369883418083191, "learning_rate": 2.004337796844701e-05, "loss": 0.04588675498962402, "step": 3204 }, { "epoch": 0.4333057306541835, "grad_norm": 0.09771862626075745, "learning_rate": 2.003693148675077e-05, "loss": 0.04064750671386719, "step": 3205 }, { "epoch": 0.43344092745001944, "grad_norm": 0.032162558287382126, "learning_rate": 2.003048395633853e-05, "loss": 0.03018641471862793, "step": 3206 }, { "epoch": 0.4335761242458554, "grad_norm": 0.10231100022792816, "learning_rate": 2.0024035378552708e-05, "loss": 0.11081218719482422, "step": 3207 }, { "epoch": 0.4337113210416913, "grad_norm": 0.08728881180286407, "learning_rate": 2.001758575473593e-05, "loss": 0.06610870361328125, "step": 3208 }, { "epoch": 0.43384651783752726, "grad_norm": 0.09966468065977097, "learning_rate": 2.0011135086231042e-05, "loss": 0.08336687088012695, "step": 3209 }, { "epoch": 0.4339817146333632, "grad_norm": 0.0684678927063942, "learning_rate": 2.0004683374381104e-05, "loss": 0.08991813659667969, "step": 3210 }, { "epoch": 0.4341169114291991, "grad_norm": 0.130214661359787, "learning_rate": 1.9998230620529395e-05, "loss": 0.07419872283935547, "step": 3211 }, { "epoch": 0.43425210822503507, "grad_norm": 0.16386649012565613, "learning_rate": 1.999177682601942e-05, "loss": 0.07621908187866211, "step": 3212 }, { "epoch": 0.43438730502087103, "grad_norm": 0.20553043484687805, "learning_rate": 1.9985321992194896e-05, "loss": 0.11010169982910156, "step": 3213 }, { "epoch": 0.43452250181670693, "grad_norm": 0.03922649845480919, "learning_rate": 1.9978866120399746e-05, "loss": 0.04037795960903168, "step": 3214 }, { "epoch": 0.4346576986125429, "grad_norm": 0.03622576594352722, "learning_rate": 1.9972409211978116e-05, "loss": 0.04348158836364746, "step": 3215 }, { "epoch": 0.43479289540837884, "grad_norm": 0.08675309270620346, "learning_rate": 1.9965951268274373e-05, "loss": 0.07869338989257812, "step": 3216 }, { "epoch": 0.43492809220421474, "grad_norm": 0.08813906461000443, "learning_rate": 1.9959492290633093e-05, "loss": 0.051762521266937256, "step": 3217 }, { "epoch": 0.4350632890000507, "grad_norm": 0.07535872608423233, "learning_rate": 1.995303228039907e-05, "loss": 0.06920510530471802, "step": 3218 }, { "epoch": 0.43519848579588666, "grad_norm": 0.09170832484960556, "learning_rate": 1.994657123891732e-05, "loss": 0.07438278198242188, "step": 3219 }, { "epoch": 0.43533368259172256, "grad_norm": 0.07532388716936111, "learning_rate": 1.9940109167533055e-05, "loss": 0.06286758184432983, "step": 3220 }, { "epoch": 0.4354688793875585, "grad_norm": 0.17018432915210724, "learning_rate": 1.9933646067591716e-05, "loss": 0.07802581787109375, "step": 3221 }, { "epoch": 0.4356040761833945, "grad_norm": 0.08761341124773026, "learning_rate": 1.992718194043896e-05, "loss": 0.06822872161865234, "step": 3222 }, { "epoch": 0.4357392729792304, "grad_norm": 0.1064404621720314, "learning_rate": 1.9920716787420643e-05, "loss": 0.06493830680847168, "step": 3223 }, { "epoch": 0.43587446977506633, "grad_norm": 0.0616588331758976, "learning_rate": 1.9914250609882858e-05, "loss": 0.06285524368286133, "step": 3224 }, { "epoch": 0.4360096665709023, "grad_norm": 0.19811654090881348, "learning_rate": 1.9907783409171885e-05, "loss": 0.09122323989868164, "step": 3225 }, { "epoch": 0.4361448633667382, "grad_norm": 0.0374748669564724, "learning_rate": 1.990131518663424e-05, "loss": 0.036103010177612305, "step": 3226 }, { "epoch": 0.43628006016257415, "grad_norm": 0.06356500834226608, "learning_rate": 1.9894845943616632e-05, "loss": 0.05052900314331055, "step": 3227 }, { "epoch": 0.4364152569584101, "grad_norm": 0.09023895114660263, "learning_rate": 1.988837568146599e-05, "loss": 0.057244300842285156, "step": 3228 }, { "epoch": 0.436550453754246, "grad_norm": 0.08295368403196335, "learning_rate": 1.988190440152947e-05, "loss": 0.07534408569335938, "step": 3229 }, { "epoch": 0.43668565055008196, "grad_norm": 0.17162348330020905, "learning_rate": 1.9875432105154424e-05, "loss": 0.08234882354736328, "step": 3230 }, { "epoch": 0.4368208473459179, "grad_norm": 0.1472318470478058, "learning_rate": 1.9868958793688412e-05, "loss": 0.09856009483337402, "step": 3231 }, { "epoch": 0.4369560441417538, "grad_norm": 0.11385153234004974, "learning_rate": 1.9862484468479213e-05, "loss": 0.07649481296539307, "step": 3232 }, { "epoch": 0.4370912409375898, "grad_norm": 0.0535292886197567, "learning_rate": 1.985600913087482e-05, "loss": 0.03593182563781738, "step": 3233 }, { "epoch": 0.43722643773342573, "grad_norm": 0.05281436815857887, "learning_rate": 1.9849532782223425e-05, "loss": 0.04095196723937988, "step": 3234 }, { "epoch": 0.43736163452926163, "grad_norm": 0.08454808592796326, "learning_rate": 1.9843055423873447e-05, "loss": 0.0487523078918457, "step": 3235 }, { "epoch": 0.4374968313250976, "grad_norm": 0.04947862774133682, "learning_rate": 1.9836577057173507e-05, "loss": 0.05003643035888672, "step": 3236 }, { "epoch": 0.43763202812093355, "grad_norm": 0.12157605588436127, "learning_rate": 1.9830097683472427e-05, "loss": 0.07188990712165833, "step": 3237 }, { "epoch": 0.43776722491676945, "grad_norm": 0.1331806778907776, "learning_rate": 1.9823617304119252e-05, "loss": 0.06160330772399902, "step": 3238 }, { "epoch": 0.4379024217126054, "grad_norm": 0.03333786129951477, "learning_rate": 1.9817135920463232e-05, "loss": 0.025844454765319824, "step": 3239 }, { "epoch": 0.43803761850844136, "grad_norm": 0.0759047269821167, "learning_rate": 1.9810653533853826e-05, "loss": 0.07703280448913574, "step": 3240 }, { "epoch": 0.43817281530427726, "grad_norm": 0.043087806552648544, "learning_rate": 1.9804170145640706e-05, "loss": 0.05115079879760742, "step": 3241 }, { "epoch": 0.4383080121001132, "grad_norm": 0.09004665911197662, "learning_rate": 1.9797685757173737e-05, "loss": 0.06872403621673584, "step": 3242 }, { "epoch": 0.4384432088959492, "grad_norm": 0.18874123692512512, "learning_rate": 1.979120036980301e-05, "loss": 0.06737768650054932, "step": 3243 }, { "epoch": 0.4385784056917851, "grad_norm": 0.11173944920301437, "learning_rate": 1.9784713984878814e-05, "loss": 0.04859170317649841, "step": 3244 }, { "epoch": 0.43871360248762103, "grad_norm": 0.046070925891399384, "learning_rate": 1.9778226603751652e-05, "loss": 0.05103862285614014, "step": 3245 }, { "epoch": 0.438848799283457, "grad_norm": 0.15152396261692047, "learning_rate": 1.9771738227772235e-05, "loss": 0.07271099090576172, "step": 3246 }, { "epoch": 0.4389839960792929, "grad_norm": 0.06952129304409027, "learning_rate": 1.976524885829147e-05, "loss": 0.05684375762939453, "step": 3247 }, { "epoch": 0.43911919287512885, "grad_norm": 0.08088558167219162, "learning_rate": 1.975875849666048e-05, "loss": 0.0633234977722168, "step": 3248 }, { "epoch": 0.4392543896709648, "grad_norm": 0.07711949944496155, "learning_rate": 1.9752267144230595e-05, "loss": 0.0728921890258789, "step": 3249 }, { "epoch": 0.43938958646680076, "grad_norm": 0.07124711573123932, "learning_rate": 1.9745774802353347e-05, "loss": 0.06157541275024414, "step": 3250 }, { "epoch": 0.43952478326263666, "grad_norm": 0.159086674451828, "learning_rate": 1.973928147238048e-05, "loss": 0.05113077163696289, "step": 3251 }, { "epoch": 0.4396599800584726, "grad_norm": 0.10507015138864517, "learning_rate": 1.973278715566394e-05, "loss": 0.049019575119018555, "step": 3252 }, { "epoch": 0.4397951768543086, "grad_norm": 0.07731273770332336, "learning_rate": 1.972629185355587e-05, "loss": 0.06448245048522949, "step": 3253 }, { "epoch": 0.4399303736501445, "grad_norm": 0.07900913059711456, "learning_rate": 1.971979556740864e-05, "loss": 0.05887889862060547, "step": 3254 }, { "epoch": 0.44006557044598044, "grad_norm": 0.07361288368701935, "learning_rate": 1.971329829857479e-05, "loss": 0.07146072387695312, "step": 3255 }, { "epoch": 0.4402007672418164, "grad_norm": 0.06809293478727341, "learning_rate": 1.9706800048407112e-05, "loss": 0.03605484962463379, "step": 3256 }, { "epoch": 0.4403359640376523, "grad_norm": 0.21496723592281342, "learning_rate": 1.9700300818258566e-05, "loss": 0.072593092918396, "step": 3257 }, { "epoch": 0.44047116083348825, "grad_norm": 0.05230354145169258, "learning_rate": 1.9693800609482318e-05, "loss": 0.05963575839996338, "step": 3258 }, { "epoch": 0.4406063576293242, "grad_norm": 0.056048981845378876, "learning_rate": 1.9687299423431754e-05, "loss": 0.05493319034576416, "step": 3259 }, { "epoch": 0.4407415544251601, "grad_norm": 0.12063869088888168, "learning_rate": 1.968079726146045e-05, "loss": 0.07966184616088867, "step": 3260 }, { "epoch": 0.44087675122099607, "grad_norm": 0.12758807837963104, "learning_rate": 1.9674294124922204e-05, "loss": 0.08736658096313477, "step": 3261 }, { "epoch": 0.441011948016832, "grad_norm": 0.05945262312889099, "learning_rate": 1.966779001517099e-05, "loss": 0.06276941299438477, "step": 3262 }, { "epoch": 0.4411471448126679, "grad_norm": 0.04829312488436699, "learning_rate": 1.9661284933561006e-05, "loss": 0.047865867614746094, "step": 3263 }, { "epoch": 0.4412823416085039, "grad_norm": 0.13132239878177643, "learning_rate": 1.9654778881446636e-05, "loss": 0.07096004486083984, "step": 3264 }, { "epoch": 0.44141753840433984, "grad_norm": 0.0730091780424118, "learning_rate": 1.9648271860182487e-05, "loss": 0.07902050018310547, "step": 3265 }, { "epoch": 0.44155273520017574, "grad_norm": 0.05608237534761429, "learning_rate": 1.9641763871123345e-05, "loss": 0.07141232490539551, "step": 3266 }, { "epoch": 0.4416879319960117, "grad_norm": 0.09248071163892746, "learning_rate": 1.963525491562421e-05, "loss": 0.07359836250543594, "step": 3267 }, { "epoch": 0.44182312879184765, "grad_norm": 0.0771331861615181, "learning_rate": 1.9628744995040287e-05, "loss": 0.0667567253112793, "step": 3268 }, { "epoch": 0.44195832558768355, "grad_norm": 0.099677175283432, "learning_rate": 1.9622234110726976e-05, "loss": 0.06462430953979492, "step": 3269 }, { "epoch": 0.4420935223835195, "grad_norm": 0.07637973129749298, "learning_rate": 1.9615722264039868e-05, "loss": 0.06608521938323975, "step": 3270 }, { "epoch": 0.44222871917935547, "grad_norm": 0.08768682926893234, "learning_rate": 1.9609209456334772e-05, "loss": 0.046433210372924805, "step": 3271 }, { "epoch": 0.44236391597519137, "grad_norm": 0.07879055291414261, "learning_rate": 1.960269568896769e-05, "loss": 0.06598556041717529, "step": 3272 }, { "epoch": 0.4424991127710273, "grad_norm": 0.062198273837566376, "learning_rate": 1.9596180963294822e-05, "loss": 0.04973268508911133, "step": 3273 }, { "epoch": 0.4426343095668633, "grad_norm": 0.06544417887926102, "learning_rate": 1.9589665280672564e-05, "loss": 0.04590177536010742, "step": 3274 }, { "epoch": 0.4427695063626992, "grad_norm": 0.07249431312084198, "learning_rate": 1.958314864245752e-05, "loss": 0.05933821201324463, "step": 3275 }, { "epoch": 0.44290470315853514, "grad_norm": 0.03899597004055977, "learning_rate": 1.957663105000649e-05, "loss": 0.03536057472229004, "step": 3276 }, { "epoch": 0.4430398999543711, "grad_norm": 0.11802103370428085, "learning_rate": 1.957011250467647e-05, "loss": 0.06655168533325195, "step": 3277 }, { "epoch": 0.443175096750207, "grad_norm": 0.06084742397069931, "learning_rate": 1.9563593007824658e-05, "loss": 0.0634080171585083, "step": 3278 }, { "epoch": 0.44331029354604296, "grad_norm": 0.05009220540523529, "learning_rate": 1.9557072560808442e-05, "loss": 0.0459136962890625, "step": 3279 }, { "epoch": 0.4434454903418789, "grad_norm": 0.1192793995141983, "learning_rate": 1.9550551164985418e-05, "loss": 0.04971790313720703, "step": 3280 }, { "epoch": 0.4435806871377148, "grad_norm": 0.1543719321489334, "learning_rate": 1.9544028821713372e-05, "loss": 0.07663488388061523, "step": 3281 }, { "epoch": 0.44371588393355077, "grad_norm": 0.04591715335845947, "learning_rate": 1.9537505532350298e-05, "loss": 0.043121337890625, "step": 3282 }, { "epoch": 0.4438510807293867, "grad_norm": 0.21003416180610657, "learning_rate": 1.9530981298254376e-05, "loss": 0.06580352783203125, "step": 3283 }, { "epoch": 0.44398627752522263, "grad_norm": 0.06348355859518051, "learning_rate": 1.9524456120783983e-05, "loss": 0.04552507400512695, "step": 3284 }, { "epoch": 0.4441214743210586, "grad_norm": 0.12992259860038757, "learning_rate": 1.95179300012977e-05, "loss": 0.08132648468017578, "step": 3285 }, { "epoch": 0.44425667111689454, "grad_norm": 0.06173097714781761, "learning_rate": 1.9511402941154296e-05, "loss": 0.05788033455610275, "step": 3286 }, { "epoch": 0.44439186791273044, "grad_norm": 0.09726239740848541, "learning_rate": 1.950487494171274e-05, "loss": 0.07055425643920898, "step": 3287 }, { "epoch": 0.4445270647085664, "grad_norm": 0.08492317795753479, "learning_rate": 1.9498346004332203e-05, "loss": 0.04618768393993378, "step": 3288 }, { "epoch": 0.44466226150440236, "grad_norm": 0.18086524307727814, "learning_rate": 1.949181613037204e-05, "loss": 0.0637044906616211, "step": 3289 }, { "epoch": 0.44479745830023826, "grad_norm": 0.18768538534641266, "learning_rate": 1.9485285321191804e-05, "loss": 0.07714366912841797, "step": 3290 }, { "epoch": 0.4449326550960742, "grad_norm": 0.06693324446678162, "learning_rate": 1.9478753578151244e-05, "loss": 0.05836009979248047, "step": 3291 }, { "epoch": 0.44506785189191017, "grad_norm": 0.17425455152988434, "learning_rate": 1.9472220902610304e-05, "loss": 0.06835460662841797, "step": 3292 }, { "epoch": 0.44520304868774613, "grad_norm": 0.24480070173740387, "learning_rate": 1.9465687295929127e-05, "loss": 0.0700838565826416, "step": 3293 }, { "epoch": 0.44533824548358203, "grad_norm": 0.12210825830698013, "learning_rate": 1.945915275946804e-05, "loss": 0.04314112663269043, "step": 3294 }, { "epoch": 0.445473442279418, "grad_norm": 0.06020324304699898, "learning_rate": 1.9452617294587573e-05, "loss": 0.05254793167114258, "step": 3295 }, { "epoch": 0.44560863907525394, "grad_norm": 0.12664490938186646, "learning_rate": 1.9446080902648435e-05, "loss": 0.07109546661376953, "step": 3296 }, { "epoch": 0.44574383587108984, "grad_norm": 0.038578931242227554, "learning_rate": 1.943954358501154e-05, "loss": 0.035285115242004395, "step": 3297 }, { "epoch": 0.4458790326669258, "grad_norm": 0.054506756365299225, "learning_rate": 1.943300534303801e-05, "loss": 0.0599825382232666, "step": 3298 }, { "epoch": 0.44601422946276176, "grad_norm": 0.08307227492332458, "learning_rate": 1.9426466178089116e-05, "loss": 0.07606077194213867, "step": 3299 }, { "epoch": 0.44614942625859766, "grad_norm": 0.06323505192995071, "learning_rate": 1.9419926091526367e-05, "loss": 0.08138561248779297, "step": 3300 }, { "epoch": 0.4462846230544336, "grad_norm": 0.04589042812585831, "learning_rate": 1.9413385084711425e-05, "loss": 0.06054115295410156, "step": 3301 }, { "epoch": 0.4464198198502696, "grad_norm": 0.09724168479442596, "learning_rate": 1.9406843159006183e-05, "loss": 0.07733547687530518, "step": 3302 }, { "epoch": 0.4465550166461055, "grad_norm": 0.10368669778108597, "learning_rate": 1.940030031577269e-05, "loss": 0.06560420989990234, "step": 3303 }, { "epoch": 0.44669021344194143, "grad_norm": 0.052629485726356506, "learning_rate": 1.9393756556373212e-05, "loss": 0.0666041374206543, "step": 3304 }, { "epoch": 0.4468254102377774, "grad_norm": 0.09566310793161392, "learning_rate": 1.9387211882170184e-05, "loss": 0.07320308685302734, "step": 3305 }, { "epoch": 0.4469606070336133, "grad_norm": 0.16783958673477173, "learning_rate": 1.9380666294526243e-05, "loss": 0.0665731430053711, "step": 3306 }, { "epoch": 0.44709580382944925, "grad_norm": 0.08109188824892044, "learning_rate": 1.9374119794804228e-05, "loss": 0.05227309465408325, "step": 3307 }, { "epoch": 0.4472310006252852, "grad_norm": 0.058383334428071976, "learning_rate": 1.936757238436714e-05, "loss": 0.0552821159362793, "step": 3308 }, { "epoch": 0.4473661974211211, "grad_norm": 0.10600022226572037, "learning_rate": 1.936102406457819e-05, "loss": 0.07664573192596436, "step": 3309 }, { "epoch": 0.44750139421695706, "grad_norm": 0.08078666031360626, "learning_rate": 1.935447483680078e-05, "loss": 0.05944967269897461, "step": 3310 }, { "epoch": 0.447636591012793, "grad_norm": 0.10073341429233551, "learning_rate": 1.9347924702398484e-05, "loss": 0.08360576629638672, "step": 3311 }, { "epoch": 0.4477717878086289, "grad_norm": 0.15922707319259644, "learning_rate": 1.9341373662735075e-05, "loss": 0.051326751708984375, "step": 3312 }, { "epoch": 0.4479069846044649, "grad_norm": 0.12030304223299026, "learning_rate": 1.9334821719174524e-05, "loss": 0.06579971313476562, "step": 3313 }, { "epoch": 0.44804218140030083, "grad_norm": 0.08145484328269958, "learning_rate": 1.9328268873080974e-05, "loss": 0.05148029327392578, "step": 3314 }, { "epoch": 0.44817737819613673, "grad_norm": 0.10939681529998779, "learning_rate": 1.9321715125818765e-05, "loss": 0.08198165893554688, "step": 3315 }, { "epoch": 0.4483125749919727, "grad_norm": 0.10112543404102325, "learning_rate": 1.931516047875242e-05, "loss": 0.04317474365234375, "step": 3316 }, { "epoch": 0.44844777178780865, "grad_norm": 0.163516566157341, "learning_rate": 1.930860493324665e-05, "loss": 0.10313796997070312, "step": 3317 }, { "epoch": 0.44858296858364455, "grad_norm": 0.24030812084674835, "learning_rate": 1.9302048490666356e-05, "loss": 0.14154672622680664, "step": 3318 }, { "epoch": 0.4487181653794805, "grad_norm": 0.07341719418764114, "learning_rate": 1.9295491152376623e-05, "loss": 0.06919002532958984, "step": 3319 }, { "epoch": 0.44885336217531646, "grad_norm": 0.1363794058561325, "learning_rate": 1.928893291974273e-05, "loss": 0.07803583145141602, "step": 3320 }, { "epoch": 0.44898855897115236, "grad_norm": 0.1386910378932953, "learning_rate": 1.9282373794130132e-05, "loss": 0.05220353603363037, "step": 3321 }, { "epoch": 0.4491237557669883, "grad_norm": 0.15536145865917206, "learning_rate": 1.9275813776904472e-05, "loss": 0.09165549278259277, "step": 3322 }, { "epoch": 0.4492589525628243, "grad_norm": 0.14397242665290833, "learning_rate": 1.9269252869431582e-05, "loss": 0.05765819549560547, "step": 3323 }, { "epoch": 0.4493941493586602, "grad_norm": 0.17620156705379486, "learning_rate": 1.9262691073077476e-05, "loss": 0.055515289306640625, "step": 3324 }, { "epoch": 0.44952934615449613, "grad_norm": 0.0667574480175972, "learning_rate": 1.9256128389208363e-05, "loss": 0.07583379745483398, "step": 3325 }, { "epoch": 0.4496645429503321, "grad_norm": 0.12930706143379211, "learning_rate": 1.924956481919062e-05, "loss": 0.052909016609191895, "step": 3326 }, { "epoch": 0.449799739746168, "grad_norm": 0.06159701943397522, "learning_rate": 1.9243000364390825e-05, "loss": 0.0617833137512207, "step": 3327 }, { "epoch": 0.44993493654200395, "grad_norm": 0.09984344989061356, "learning_rate": 1.9236435026175717e-05, "loss": 0.048177242279052734, "step": 3328 }, { "epoch": 0.4500701333378399, "grad_norm": 0.11977959424257278, "learning_rate": 1.9229868805912248e-05, "loss": 0.11803674697875977, "step": 3329 }, { "epoch": 0.4502053301336758, "grad_norm": 0.06340879201889038, "learning_rate": 1.9223301704967543e-05, "loss": 0.07313919067382812, "step": 3330 }, { "epoch": 0.45034052692951176, "grad_norm": 0.12955889105796814, "learning_rate": 1.92167337247089e-05, "loss": 0.08098793029785156, "step": 3331 }, { "epoch": 0.4504757237253477, "grad_norm": 0.15105193853378296, "learning_rate": 1.9210164866503808e-05, "loss": 0.07820415496826172, "step": 3332 }, { "epoch": 0.4506109205211836, "grad_norm": 0.061245497316122055, "learning_rate": 1.9203595131719935e-05, "loss": 0.06598091125488281, "step": 3333 }, { "epoch": 0.4507461173170196, "grad_norm": 0.08076949417591095, "learning_rate": 1.9197024521725148e-05, "loss": 0.10294008255004883, "step": 3334 }, { "epoch": 0.45088131411285554, "grad_norm": 0.14071492850780487, "learning_rate": 1.9190453037887465e-05, "loss": 0.07048368453979492, "step": 3335 }, { "epoch": 0.45101651090869144, "grad_norm": 0.15181340277194977, "learning_rate": 1.918388068157512e-05, "loss": 0.0891256332397461, "step": 3336 }, { "epoch": 0.4511517077045274, "grad_norm": 0.07215507328510284, "learning_rate": 1.9177307454156507e-05, "loss": 0.08237147331237793, "step": 3337 }, { "epoch": 0.45128690450036335, "grad_norm": 0.10343208909034729, "learning_rate": 1.9170733357000202e-05, "loss": 0.08262085914611816, "step": 3338 }, { "epoch": 0.4514221012961993, "grad_norm": 0.13351555168628693, "learning_rate": 1.916415839147497e-05, "loss": 0.048540592193603516, "step": 3339 }, { "epoch": 0.4515572980920352, "grad_norm": 0.07054270058870316, "learning_rate": 1.9157582558949756e-05, "loss": 0.0576019287109375, "step": 3340 }, { "epoch": 0.45169249488787117, "grad_norm": 0.14163419604301453, "learning_rate": 1.9151005860793682e-05, "loss": 0.07925033569335938, "step": 3341 }, { "epoch": 0.4518276916837071, "grad_norm": 0.07599087804555893, "learning_rate": 1.9144428298376056e-05, "loss": 0.06356644630432129, "step": 3342 }, { "epoch": 0.451962888479543, "grad_norm": 0.06390077620744705, "learning_rate": 1.9137849873066355e-05, "loss": 0.05053520202636719, "step": 3343 }, { "epoch": 0.452098085275379, "grad_norm": 0.11575238406658173, "learning_rate": 1.9131270586234243e-05, "loss": 0.09288692474365234, "step": 3344 }, { "epoch": 0.45223328207121494, "grad_norm": 0.28957614302635193, "learning_rate": 1.9124690439249564e-05, "loss": 0.08353424072265625, "step": 3345 }, { "epoch": 0.45236847886705084, "grad_norm": 0.07142768800258636, "learning_rate": 1.9118109433482342e-05, "loss": 0.06046032905578613, "step": 3346 }, { "epoch": 0.4525036756628868, "grad_norm": 0.1332925707101822, "learning_rate": 1.911152757030278e-05, "loss": 0.08690881729125977, "step": 3347 }, { "epoch": 0.45263887245872275, "grad_norm": 0.13871270418167114, "learning_rate": 1.9104944851081247e-05, "loss": 0.07982838153839111, "step": 3348 }, { "epoch": 0.45277406925455865, "grad_norm": 0.12735576927661896, "learning_rate": 1.9098361277188303e-05, "loss": 0.05886983871459961, "step": 3349 }, { "epoch": 0.4529092660503946, "grad_norm": 0.10669447481632233, "learning_rate": 1.909177684999469e-05, "loss": 0.0580788254737854, "step": 3350 }, { "epoch": 0.45304446284623057, "grad_norm": 0.05876397714018822, "learning_rate": 1.9085191570871316e-05, "loss": 0.030082225799560547, "step": 3351 }, { "epoch": 0.45317965964206647, "grad_norm": 0.06846889108419418, "learning_rate": 1.9078605441189275e-05, "loss": 0.056444644927978516, "step": 3352 }, { "epoch": 0.4533148564379024, "grad_norm": 0.10122275352478027, "learning_rate": 1.9072018462319828e-05, "loss": 0.07421910762786865, "step": 3353 }, { "epoch": 0.4534500532337384, "grad_norm": 0.044947437942028046, "learning_rate": 1.9065430635634422e-05, "loss": 0.04962897300720215, "step": 3354 }, { "epoch": 0.4535852500295743, "grad_norm": 0.05820563808083534, "learning_rate": 1.9058841962504677e-05, "loss": 0.048013925552368164, "step": 3355 }, { "epoch": 0.45372044682541024, "grad_norm": 0.055406201630830765, "learning_rate": 1.9052252444302394e-05, "loss": 0.06353473663330078, "step": 3356 }, { "epoch": 0.4538556436212462, "grad_norm": 0.08920296281576157, "learning_rate": 1.904566208239954e-05, "loss": 0.07845258712768555, "step": 3357 }, { "epoch": 0.4539908404170821, "grad_norm": 0.056996092200279236, "learning_rate": 1.903907087816827e-05, "loss": 0.056680917739868164, "step": 3358 }, { "epoch": 0.45412603721291805, "grad_norm": 0.08179796487092972, "learning_rate": 1.9032478832980902e-05, "loss": 0.060242652893066406, "step": 3359 }, { "epoch": 0.454261234008754, "grad_norm": 0.05451581999659538, "learning_rate": 1.9025885948209938e-05, "loss": 0.04308128356933594, "step": 3360 }, { "epoch": 0.4543964308045899, "grad_norm": 0.054087620228528976, "learning_rate": 1.901929222522805e-05, "loss": 0.058830857276916504, "step": 3361 }, { "epoch": 0.45453162760042587, "grad_norm": 0.14267101883888245, "learning_rate": 1.901269766540809e-05, "loss": 0.07298135757446289, "step": 3362 }, { "epoch": 0.4546668243962618, "grad_norm": 0.04137654975056648, "learning_rate": 1.9006102270123076e-05, "loss": 0.06249809265136719, "step": 3363 }, { "epoch": 0.4548020211920977, "grad_norm": 0.05987751483917236, "learning_rate": 1.8999506040746208e-05, "loss": 0.045018911361694336, "step": 3364 }, { "epoch": 0.4549372179879337, "grad_norm": 0.05372031778097153, "learning_rate": 1.899290897865085e-05, "loss": 0.05090665817260742, "step": 3365 }, { "epoch": 0.45507241478376964, "grad_norm": 0.0519489049911499, "learning_rate": 1.898631108521055e-05, "loss": 0.03508281707763672, "step": 3366 }, { "epoch": 0.45520761157960554, "grad_norm": 0.07589711993932724, "learning_rate": 1.8979712361799027e-05, "loss": 0.04430168867111206, "step": 3367 }, { "epoch": 0.4553428083754415, "grad_norm": 0.09291955828666687, "learning_rate": 1.8973112809790168e-05, "loss": 0.08121395111083984, "step": 3368 }, { "epoch": 0.45547800517127746, "grad_norm": 0.10260295867919922, "learning_rate": 1.8966512430558036e-05, "loss": 0.0785675048828125, "step": 3369 }, { "epoch": 0.45561320196711336, "grad_norm": 0.09186156094074249, "learning_rate": 1.8959911225476858e-05, "loss": 0.054290771484375, "step": 3370 }, { "epoch": 0.4557483987629493, "grad_norm": 0.11577325314283371, "learning_rate": 1.895330919592105e-05, "loss": 0.07844400405883789, "step": 3371 }, { "epoch": 0.45588359555878527, "grad_norm": 0.18349677324295044, "learning_rate": 1.8946706343265187e-05, "loss": 0.07850754261016846, "step": 3372 }, { "epoch": 0.45601879235462117, "grad_norm": 0.11858109384775162, "learning_rate": 1.8940102668884016e-05, "loss": 0.0690622329711914, "step": 3373 }, { "epoch": 0.45615398915045713, "grad_norm": 0.1004151925444603, "learning_rate": 1.893349817415246e-05, "loss": 0.05154871940612793, "step": 3374 }, { "epoch": 0.4562891859462931, "grad_norm": 0.07658327370882034, "learning_rate": 1.8926892860445607e-05, "loss": 0.0634160041809082, "step": 3375 }, { "epoch": 0.456424382742129, "grad_norm": 0.12076064944267273, "learning_rate": 1.8920286729138718e-05, "loss": 0.06859207153320312, "step": 3376 }, { "epoch": 0.45655957953796494, "grad_norm": 0.059752751141786575, "learning_rate": 1.8913679781607225e-05, "loss": 0.04063749313354492, "step": 3377 }, { "epoch": 0.4566947763338009, "grad_norm": 0.14601118862628937, "learning_rate": 1.8907072019226734e-05, "loss": 0.05247354507446289, "step": 3378 }, { "epoch": 0.4568299731296368, "grad_norm": 0.16539974510669708, "learning_rate": 1.8900463443373015e-05, "loss": 0.07305121421813965, "step": 3379 }, { "epoch": 0.45696516992547276, "grad_norm": 0.05158930644392967, "learning_rate": 1.889385405542201e-05, "loss": 0.058272361755371094, "step": 3380 }, { "epoch": 0.4571003667213087, "grad_norm": 0.05201015993952751, "learning_rate": 1.8887243856749816e-05, "loss": 0.055187225341796875, "step": 3381 }, { "epoch": 0.4572355635171446, "grad_norm": 0.06832972168922424, "learning_rate": 1.8880632848732723e-05, "loss": 0.06260538101196289, "step": 3382 }, { "epoch": 0.4573707603129806, "grad_norm": 0.10209842771291733, "learning_rate": 1.8874021032747185e-05, "loss": 0.07155990600585938, "step": 3383 }, { "epoch": 0.45750595710881653, "grad_norm": 0.20631210505962372, "learning_rate": 1.8867408410169803e-05, "loss": 0.13002777099609375, "step": 3384 }, { "epoch": 0.4576411539046525, "grad_norm": 0.17883168160915375, "learning_rate": 1.886079498237737e-05, "loss": 0.0935506820678711, "step": 3385 }, { "epoch": 0.4577763507004884, "grad_norm": 0.10258541256189346, "learning_rate": 1.885418075074683e-05, "loss": 0.045658111572265625, "step": 3386 }, { "epoch": 0.45791154749632434, "grad_norm": 0.14190711081027985, "learning_rate": 1.884756571665531e-05, "loss": 0.09167361259460449, "step": 3387 }, { "epoch": 0.4580467442921603, "grad_norm": 0.05444727838039398, "learning_rate": 1.8840949881480085e-05, "loss": 0.05582749843597412, "step": 3388 }, { "epoch": 0.4581819410879962, "grad_norm": 0.05539333075284958, "learning_rate": 1.8834333246598613e-05, "loss": 0.037569522857666016, "step": 3389 }, { "epoch": 0.45831713788383216, "grad_norm": 0.07443052530288696, "learning_rate": 1.8827715813388514e-05, "loss": 0.04635262489318848, "step": 3390 }, { "epoch": 0.4584523346796681, "grad_norm": 0.09747791290283203, "learning_rate": 1.8821097583227572e-05, "loss": 0.051010966300964355, "step": 3391 }, { "epoch": 0.458587531475504, "grad_norm": 0.09465256333351135, "learning_rate": 1.8814478557493732e-05, "loss": 0.06458258628845215, "step": 3392 }, { "epoch": 0.45872272827134, "grad_norm": 0.06467480957508087, "learning_rate": 1.8807858737565118e-05, "loss": 0.04518604278564453, "step": 3393 }, { "epoch": 0.45885792506717593, "grad_norm": 0.07349219918251038, "learning_rate": 1.880123812482001e-05, "loss": 0.06886053085327148, "step": 3394 }, { "epoch": 0.45899312186301183, "grad_norm": 0.11295178532600403, "learning_rate": 1.8794616720636853e-05, "loss": 0.07073509693145752, "step": 3395 }, { "epoch": 0.4591283186588478, "grad_norm": 0.08168105781078339, "learning_rate": 1.8787994526394257e-05, "loss": 0.08412361145019531, "step": 3396 }, { "epoch": 0.45926351545468375, "grad_norm": 0.10572415590286255, "learning_rate": 1.8781371543471e-05, "loss": 0.08240842819213867, "step": 3397 }, { "epoch": 0.45939871225051965, "grad_norm": 0.07295472174882889, "learning_rate": 1.8774747773246024e-05, "loss": 0.05265235900878906, "step": 3398 }, { "epoch": 0.4595339090463556, "grad_norm": 0.06520816683769226, "learning_rate": 1.8768123217098438e-05, "loss": 0.06548810005187988, "step": 3399 }, { "epoch": 0.45966910584219156, "grad_norm": 0.05021342262625694, "learning_rate": 1.8761497876407496e-05, "loss": 0.039754629135131836, "step": 3400 }, { "epoch": 0.45980430263802746, "grad_norm": 0.0390787348151207, "learning_rate": 1.8754871752552646e-05, "loss": 0.04110407829284668, "step": 3401 }, { "epoch": 0.4599394994338634, "grad_norm": 0.05425737425684929, "learning_rate": 1.8748244846913463e-05, "loss": 0.04517549276351929, "step": 3402 }, { "epoch": 0.4600746962296994, "grad_norm": 0.07912790775299072, "learning_rate": 1.874161716086972e-05, "loss": 0.056559622287750244, "step": 3403 }, { "epoch": 0.4602098930255353, "grad_norm": 0.10625887662172318, "learning_rate": 1.8734988695801333e-05, "loss": 0.07645535469055176, "step": 3404 }, { "epoch": 0.46034508982137123, "grad_norm": 0.05065944790840149, "learning_rate": 1.8728359453088382e-05, "loss": 0.042622387409210205, "step": 3405 }, { "epoch": 0.4604802866172072, "grad_norm": 0.16429084539413452, "learning_rate": 1.8721729434111108e-05, "loss": 0.10288619995117188, "step": 3406 }, { "epoch": 0.4606154834130431, "grad_norm": 0.06927911937236786, "learning_rate": 1.871509864024992e-05, "loss": 0.05532026290893555, "step": 3407 }, { "epoch": 0.46075068020887905, "grad_norm": 0.03308272361755371, "learning_rate": 1.8708467072885385e-05, "loss": 0.04797959327697754, "step": 3408 }, { "epoch": 0.460885877004715, "grad_norm": 0.07664691656827927, "learning_rate": 1.8701834733398227e-05, "loss": 0.04942655563354492, "step": 3409 }, { "epoch": 0.4610210738005509, "grad_norm": 0.0725528746843338, "learning_rate": 1.8695201623169335e-05, "loss": 0.05355715751647949, "step": 3410 }, { "epoch": 0.46115627059638686, "grad_norm": 0.08797306567430496, "learning_rate": 1.868856774357977e-05, "loss": 0.06735944747924805, "step": 3411 }, { "epoch": 0.4612914673922228, "grad_norm": 0.0751928761601448, "learning_rate": 1.868193309601072e-05, "loss": 0.042236328125, "step": 3412 }, { "epoch": 0.4614266641880587, "grad_norm": 0.1532754749059677, "learning_rate": 1.867529768184357e-05, "loss": 0.08893823623657227, "step": 3413 }, { "epoch": 0.4615618609838947, "grad_norm": 0.06271111220121384, "learning_rate": 1.8668661502459842e-05, "loss": 0.04842948913574219, "step": 3414 }, { "epoch": 0.46169705777973064, "grad_norm": 0.06832994520664215, "learning_rate": 1.866202455924123e-05, "loss": 0.02930617332458496, "step": 3415 }, { "epoch": 0.46183225457556654, "grad_norm": 0.0676761344075203, "learning_rate": 1.865538685356957e-05, "loss": 0.06079220771789551, "step": 3416 }, { "epoch": 0.4619674513714025, "grad_norm": 0.16096584498882294, "learning_rate": 1.864874838682688e-05, "loss": 0.08106279373168945, "step": 3417 }, { "epoch": 0.46210264816723845, "grad_norm": 0.06340809911489487, "learning_rate": 1.864210916039531e-05, "loss": 0.044806480407714844, "step": 3418 }, { "epoch": 0.46223784496307435, "grad_norm": 0.1108318641781807, "learning_rate": 1.86354691756572e-05, "loss": 0.08450126647949219, "step": 3419 }, { "epoch": 0.4623730417589103, "grad_norm": 0.0868648886680603, "learning_rate": 1.8628828433995013e-05, "loss": 0.06555521488189697, "step": 3420 }, { "epoch": 0.46250823855474626, "grad_norm": 0.13067664206027985, "learning_rate": 1.86221869367914e-05, "loss": 0.05691194534301758, "step": 3421 }, { "epoch": 0.46264343535058217, "grad_norm": 0.03764400631189346, "learning_rate": 1.8615544685429153e-05, "loss": 0.043553829193115234, "step": 3422 }, { "epoch": 0.4627786321464181, "grad_norm": 0.10746897757053375, "learning_rate": 1.860890168129122e-05, "loss": 0.04911398887634277, "step": 3423 }, { "epoch": 0.4629138289422541, "grad_norm": 0.0851382240653038, "learning_rate": 1.8602257925760708e-05, "loss": 0.07423734664916992, "step": 3424 }, { "epoch": 0.46304902573809, "grad_norm": 0.12360157072544098, "learning_rate": 1.859561342022089e-05, "loss": 0.08611297607421875, "step": 3425 }, { "epoch": 0.46318422253392594, "grad_norm": 0.09106816351413727, "learning_rate": 1.8588968166055185e-05, "loss": 0.07781457901000977, "step": 3426 }, { "epoch": 0.4633194193297619, "grad_norm": 0.05281224474310875, "learning_rate": 1.8582322164647166e-05, "loss": 0.06018543243408203, "step": 3427 }, { "epoch": 0.4634546161255978, "grad_norm": 0.14227604866027832, "learning_rate": 1.8575675417380568e-05, "loss": 0.07758688926696777, "step": 3428 }, { "epoch": 0.46358981292143375, "grad_norm": 0.17344899475574493, "learning_rate": 1.856902792563928e-05, "loss": 0.0871438980102539, "step": 3429 }, { "epoch": 0.4637250097172697, "grad_norm": 0.07698340713977814, "learning_rate": 1.856237969080735e-05, "loss": 0.06049633026123047, "step": 3430 }, { "epoch": 0.46386020651310567, "grad_norm": 0.05003563314676285, "learning_rate": 1.8555730714268967e-05, "loss": 0.04724407196044922, "step": 3431 }, { "epoch": 0.46399540330894157, "grad_norm": 0.15106461942195892, "learning_rate": 1.8549080997408492e-05, "loss": 0.09495067596435547, "step": 3432 }, { "epoch": 0.4641306001047775, "grad_norm": 0.041310567408800125, "learning_rate": 1.8542430541610426e-05, "loss": 0.03312993049621582, "step": 3433 }, { "epoch": 0.4642657969006135, "grad_norm": 0.1053837388753891, "learning_rate": 1.8535779348259428e-05, "loss": 0.06680011749267578, "step": 3434 }, { "epoch": 0.4644009936964494, "grad_norm": 0.047986049205064774, "learning_rate": 1.852912741874032e-05, "loss": 0.04587411880493164, "step": 3435 }, { "epoch": 0.46453619049228534, "grad_norm": 0.06248097121715546, "learning_rate": 1.8522474754438056e-05, "loss": 0.06231224536895752, "step": 3436 }, { "epoch": 0.4646713872881213, "grad_norm": 0.10099022090435028, "learning_rate": 1.851582135673777e-05, "loss": 0.0876932144165039, "step": 3437 }, { "epoch": 0.4648065840839572, "grad_norm": 0.09039631485939026, "learning_rate": 1.850916722702473e-05, "loss": 0.10105228424072266, "step": 3438 }, { "epoch": 0.46494178087979315, "grad_norm": 0.040435608476400375, "learning_rate": 1.8502512366684355e-05, "loss": 0.038915395736694336, "step": 3439 }, { "epoch": 0.4650769776756291, "grad_norm": 0.04880201071500778, "learning_rate": 1.8495856777102232e-05, "loss": 0.052845001220703125, "step": 3440 }, { "epoch": 0.465212174471465, "grad_norm": 0.07616909593343735, "learning_rate": 1.848920045966408e-05, "loss": 0.07064557075500488, "step": 3441 }, { "epoch": 0.46534737126730097, "grad_norm": 0.06871594488620758, "learning_rate": 1.8482543415755797e-05, "loss": 0.07067060470581055, "step": 3442 }, { "epoch": 0.4654825680631369, "grad_norm": 0.05334244668483734, "learning_rate": 1.8475885646763394e-05, "loss": 0.04635286331176758, "step": 3443 }, { "epoch": 0.4656177648589728, "grad_norm": 0.054341871291399, "learning_rate": 1.8469227154073064e-05, "loss": 0.059173583984375, "step": 3444 }, { "epoch": 0.4657529616548088, "grad_norm": 0.04777482897043228, "learning_rate": 1.8462567939071142e-05, "loss": 0.03973746299743652, "step": 3445 }, { "epoch": 0.46588815845064474, "grad_norm": 0.0668109804391861, "learning_rate": 1.845590800314411e-05, "loss": 0.054836273193359375, "step": 3446 }, { "epoch": 0.46602335524648064, "grad_norm": 0.14163623750209808, "learning_rate": 1.8449247347678607e-05, "loss": 0.06424188613891602, "step": 3447 }, { "epoch": 0.4661585520423166, "grad_norm": 0.0577327236533165, "learning_rate": 1.8442585974061405e-05, "loss": 0.06545388698577881, "step": 3448 }, { "epoch": 0.46629374883815256, "grad_norm": 0.03324039280414581, "learning_rate": 1.8435923883679452e-05, "loss": 0.04532194137573242, "step": 3449 }, { "epoch": 0.46642894563398846, "grad_norm": 0.09412823617458344, "learning_rate": 1.8429261077919818e-05, "loss": 0.08692502975463867, "step": 3450 }, { "epoch": 0.4665641424298244, "grad_norm": 0.05564672872424126, "learning_rate": 1.8422597558169742e-05, "loss": 0.055829405784606934, "step": 3451 }, { "epoch": 0.46669933922566037, "grad_norm": 0.06166764348745346, "learning_rate": 1.84159333258166e-05, "loss": 0.056166648864746094, "step": 3452 }, { "epoch": 0.46683453602149627, "grad_norm": 0.0592268742620945, "learning_rate": 1.8409268382247933e-05, "loss": 0.03865456581115723, "step": 3453 }, { "epoch": 0.46696973281733223, "grad_norm": 0.0823439359664917, "learning_rate": 1.8402602728851405e-05, "loss": 0.06898117065429688, "step": 3454 }, { "epoch": 0.4671049296131682, "grad_norm": 0.0611744150519371, "learning_rate": 1.839593636701484e-05, "loss": 0.038504183292388916, "step": 3455 }, { "epoch": 0.4672401264090041, "grad_norm": 0.054461125284433365, "learning_rate": 1.8389269298126214e-05, "loss": 0.047418832778930664, "step": 3456 }, { "epoch": 0.46737532320484004, "grad_norm": 0.058121565729379654, "learning_rate": 1.838260152357365e-05, "loss": 0.042220115661621094, "step": 3457 }, { "epoch": 0.467510520000676, "grad_norm": 0.08581023663282394, "learning_rate": 1.837593304474541e-05, "loss": 0.07930755615234375, "step": 3458 }, { "epoch": 0.4676457167965119, "grad_norm": 0.09980616718530655, "learning_rate": 1.836926386302991e-05, "loss": 0.08956289291381836, "step": 3459 }, { "epoch": 0.46778091359234786, "grad_norm": 0.07131228595972061, "learning_rate": 1.8362593979815696e-05, "loss": 0.06785130500793457, "step": 3460 }, { "epoch": 0.4679161103881838, "grad_norm": 0.10368010401725769, "learning_rate": 1.8355923396491496e-05, "loss": 0.10187339782714844, "step": 3461 }, { "epoch": 0.4680513071840197, "grad_norm": 0.08152949064970016, "learning_rate": 1.8349252114446138e-05, "loss": 0.056446075439453125, "step": 3462 }, { "epoch": 0.4681865039798557, "grad_norm": 0.04941851273179054, "learning_rate": 1.834258013506864e-05, "loss": 0.0626382827758789, "step": 3463 }, { "epoch": 0.46832170077569163, "grad_norm": 0.05576416850090027, "learning_rate": 1.833590745974813e-05, "loss": 0.058524370193481445, "step": 3464 }, { "epoch": 0.46845689757152753, "grad_norm": 0.10252301394939423, "learning_rate": 1.8329234089873892e-05, "loss": 0.04757833480834961, "step": 3465 }, { "epoch": 0.4685920943673635, "grad_norm": 0.028037160634994507, "learning_rate": 1.8322560026835366e-05, "loss": 0.023073315620422363, "step": 3466 }, { "epoch": 0.46872729116319944, "grad_norm": 0.11344929039478302, "learning_rate": 1.8315885272022125e-05, "loss": 0.08417391777038574, "step": 3467 }, { "epoch": 0.46886248795903535, "grad_norm": 0.05986790731549263, "learning_rate": 1.830920982682389e-05, "loss": 0.05260425806045532, "step": 3468 }, { "epoch": 0.4689976847548713, "grad_norm": 0.08218964189291, "learning_rate": 1.830253369263052e-05, "loss": 0.06722700595855713, "step": 3469 }, { "epoch": 0.46913288155070726, "grad_norm": 0.10893671959638596, "learning_rate": 1.8295856870832024e-05, "loss": 0.06206321716308594, "step": 3470 }, { "epoch": 0.46926807834654316, "grad_norm": 0.05701784789562225, "learning_rate": 1.828917936281855e-05, "loss": 0.050049543380737305, "step": 3471 }, { "epoch": 0.4694032751423791, "grad_norm": 0.11329787969589233, "learning_rate": 1.8282501169980396e-05, "loss": 0.0606992244720459, "step": 3472 }, { "epoch": 0.4695384719382151, "grad_norm": 0.09615732729434967, "learning_rate": 1.8275822293707992e-05, "loss": 0.07668781280517578, "step": 3473 }, { "epoch": 0.469673668734051, "grad_norm": 0.11335486173629761, "learning_rate": 1.8269142735391917e-05, "loss": 0.06651115417480469, "step": 3474 }, { "epoch": 0.46980886552988693, "grad_norm": 0.09174930304288864, "learning_rate": 1.8262462496422893e-05, "loss": 0.04760622978210449, "step": 3475 }, { "epoch": 0.4699440623257229, "grad_norm": 0.06920750439167023, "learning_rate": 1.8255781578191778e-05, "loss": 0.048304349184036255, "step": 3476 }, { "epoch": 0.47007925912155885, "grad_norm": 0.06450843065977097, "learning_rate": 1.824909998208958e-05, "loss": 0.05025315284729004, "step": 3477 }, { "epoch": 0.47021445591739475, "grad_norm": 0.20177200436592102, "learning_rate": 1.8242417709507434e-05, "loss": 0.07547140121459961, "step": 3478 }, { "epoch": 0.4703496527132307, "grad_norm": 0.08519097417593002, "learning_rate": 1.8235734761836637e-05, "loss": 0.06819629669189453, "step": 3479 }, { "epoch": 0.47048484950906666, "grad_norm": 0.09071063250303268, "learning_rate": 1.82290511404686e-05, "loss": 0.04694056510925293, "step": 3480 }, { "epoch": 0.47062004630490256, "grad_norm": 0.17191484570503235, "learning_rate": 1.8222366846794904e-05, "loss": 0.0582888126373291, "step": 3481 }, { "epoch": 0.4707552431007385, "grad_norm": 0.08369534462690353, "learning_rate": 1.8215681882207238e-05, "loss": 0.0454862117767334, "step": 3482 }, { "epoch": 0.4708904398965745, "grad_norm": 0.06966706365346909, "learning_rate": 1.8208996248097462e-05, "loss": 0.0385441780090332, "step": 3483 }, { "epoch": 0.4710256366924104, "grad_norm": 0.07693798840045929, "learning_rate": 1.8202309945857557e-05, "loss": 0.07320630550384521, "step": 3484 }, { "epoch": 0.47116083348824633, "grad_norm": 0.10992797464132309, "learning_rate": 1.8195622976879638e-05, "loss": 0.06183671951293945, "step": 3485 }, { "epoch": 0.4712960302840823, "grad_norm": 0.12138800323009491, "learning_rate": 1.8188935342555977e-05, "loss": 0.06736171245574951, "step": 3486 }, { "epoch": 0.4714312270799182, "grad_norm": 0.07128792256116867, "learning_rate": 1.818224704427897e-05, "loss": 0.054497480392456055, "step": 3487 }, { "epoch": 0.47156642387575415, "grad_norm": 0.06149104982614517, "learning_rate": 1.8175558083441162e-05, "loss": 0.06778943538665771, "step": 3488 }, { "epoch": 0.4717016206715901, "grad_norm": 0.2007308006286621, "learning_rate": 1.8168868461435225e-05, "loss": 0.05882704257965088, "step": 3489 }, { "epoch": 0.471836817467426, "grad_norm": 0.07799633592367172, "learning_rate": 1.8162178179653977e-05, "loss": 0.059824466705322266, "step": 3490 }, { "epoch": 0.47197201426326196, "grad_norm": 0.07294801622629166, "learning_rate": 1.815548723949037e-05, "loss": 0.06339430809020996, "step": 3491 }, { "epoch": 0.4721072110590979, "grad_norm": 0.08424577116966248, "learning_rate": 1.814879564233749e-05, "loss": 0.08004307746887207, "step": 3492 }, { "epoch": 0.4722424078549338, "grad_norm": 0.0833587795495987, "learning_rate": 1.8142103389588567e-05, "loss": 0.07729113101959229, "step": 3493 }, { "epoch": 0.4723776046507698, "grad_norm": 0.06454747915267944, "learning_rate": 1.813541048263696e-05, "loss": 0.05300617218017578, "step": 3494 }, { "epoch": 0.47251280144660573, "grad_norm": 0.1339472234249115, "learning_rate": 1.8128716922876178e-05, "loss": 0.08754396438598633, "step": 3495 }, { "epoch": 0.47264799824244164, "grad_norm": 0.09679068624973297, "learning_rate": 1.812202271169984e-05, "loss": 0.06735682487487793, "step": 3496 }, { "epoch": 0.4727831950382776, "grad_norm": 0.0711110383272171, "learning_rate": 1.8115327850501726e-05, "loss": 0.04534256458282471, "step": 3497 }, { "epoch": 0.47291839183411355, "grad_norm": 0.08141111582517624, "learning_rate": 1.8108632340675746e-05, "loss": 0.0625767707824707, "step": 3498 }, { "epoch": 0.47305358862994945, "grad_norm": 0.05167558416724205, "learning_rate": 1.810193618361593e-05, "loss": 0.055295467376708984, "step": 3499 }, { "epoch": 0.4731887854257854, "grad_norm": 0.12220493704080582, "learning_rate": 1.8095239380716464e-05, "loss": 0.04735064506530762, "step": 3500 }, { "epoch": 0.47332398222162136, "grad_norm": 0.06282928586006165, "learning_rate": 1.808854193337165e-05, "loss": 0.07062315940856934, "step": 3501 }, { "epoch": 0.47345917901745727, "grad_norm": 0.05195547640323639, "learning_rate": 1.8081843842975935e-05, "loss": 0.07199633121490479, "step": 3502 }, { "epoch": 0.4735943758132932, "grad_norm": 0.23887105286121368, "learning_rate": 1.80751451109239e-05, "loss": 0.08484089374542236, "step": 3503 }, { "epoch": 0.4737295726091292, "grad_norm": 0.05381393805146217, "learning_rate": 1.806844573861025e-05, "loss": 0.06839513778686523, "step": 3504 }, { "epoch": 0.4738647694049651, "grad_norm": 0.10352759063243866, "learning_rate": 1.806174572742984e-05, "loss": 0.06512832641601562, "step": 3505 }, { "epoch": 0.47399996620080104, "grad_norm": 0.0745445191860199, "learning_rate": 1.8055045078777634e-05, "loss": 0.08244776725769043, "step": 3506 }, { "epoch": 0.474135162996637, "grad_norm": 0.08982892334461212, "learning_rate": 1.8048343794048762e-05, "loss": 0.06277763843536377, "step": 3507 }, { "epoch": 0.4742703597924729, "grad_norm": 0.15886647999286652, "learning_rate": 1.8041641874638445e-05, "loss": 0.07700347900390625, "step": 3508 }, { "epoch": 0.47440555658830885, "grad_norm": 0.12980806827545166, "learning_rate": 1.8034939321942077e-05, "loss": 0.05527544021606445, "step": 3509 }, { "epoch": 0.4745407533841448, "grad_norm": 0.11058919876813889, "learning_rate": 1.8028236137355154e-05, "loss": 0.0814218521118164, "step": 3510 }, { "epoch": 0.4746759501799807, "grad_norm": 0.07259859889745712, "learning_rate": 1.8021532322273327e-05, "loss": 0.08646726608276367, "step": 3511 }, { "epoch": 0.47481114697581667, "grad_norm": 0.051616255193948746, "learning_rate": 1.8014827878092347e-05, "loss": 0.059804439544677734, "step": 3512 }, { "epoch": 0.4749463437716526, "grad_norm": 0.11216377466917038, "learning_rate": 1.800812280620813e-05, "loss": 0.09200799465179443, "step": 3513 }, { "epoch": 0.4750815405674885, "grad_norm": 0.07416464388370514, "learning_rate": 1.80014171080167e-05, "loss": 0.07598209381103516, "step": 3514 }, { "epoch": 0.4752167373633245, "grad_norm": 0.08573921769857407, "learning_rate": 1.7994710784914227e-05, "loss": 0.0655217170715332, "step": 3515 }, { "epoch": 0.47535193415916044, "grad_norm": 0.0943194255232811, "learning_rate": 1.7988003838297e-05, "loss": 0.04941558837890625, "step": 3516 }, { "epoch": 0.47548713095499634, "grad_norm": 0.13027246296405792, "learning_rate": 1.7981296269561438e-05, "loss": 0.0620427131652832, "step": 3517 }, { "epoch": 0.4756223277508323, "grad_norm": 0.065853551030159, "learning_rate": 1.7974588080104095e-05, "loss": 0.061742305755615234, "step": 3518 }, { "epoch": 0.47575752454666825, "grad_norm": 0.09798743575811386, "learning_rate": 1.7967879271321648e-05, "loss": 0.049071550369262695, "step": 3519 }, { "epoch": 0.4758927213425042, "grad_norm": 0.12141052633523941, "learning_rate": 1.7961169844610913e-05, "loss": 0.048730552196502686, "step": 3520 }, { "epoch": 0.4760279181383401, "grad_norm": 0.09053508192300797, "learning_rate": 1.795445980136883e-05, "loss": 0.05276894569396973, "step": 3521 }, { "epoch": 0.47616311493417607, "grad_norm": 0.048913341015577316, "learning_rate": 1.794774914299245e-05, "loss": 0.045694589614868164, "step": 3522 }, { "epoch": 0.476298311730012, "grad_norm": 0.08369025588035583, "learning_rate": 1.794103787087899e-05, "loss": 0.08330965042114258, "step": 3523 }, { "epoch": 0.4764335085258479, "grad_norm": 0.07855888456106186, "learning_rate": 1.7934325986425755e-05, "loss": 0.07700634002685547, "step": 3524 }, { "epoch": 0.4765687053216839, "grad_norm": 0.10310858488082886, "learning_rate": 1.7927613491030204e-05, "loss": 0.10139870643615723, "step": 3525 }, { "epoch": 0.47670390211751984, "grad_norm": 0.09178508818149567, "learning_rate": 1.7920900386089913e-05, "loss": 0.06978440284729004, "step": 3526 }, { "epoch": 0.47683909891335574, "grad_norm": 0.09025003015995026, "learning_rate": 1.7914186673002588e-05, "loss": 0.0522613525390625, "step": 3527 }, { "epoch": 0.4769742957091917, "grad_norm": 0.2030298262834549, "learning_rate": 1.790747235316605e-05, "loss": 0.05517292022705078, "step": 3528 }, { "epoch": 0.47710949250502765, "grad_norm": 0.09993169456720352, "learning_rate": 1.790075742797827e-05, "loss": 0.051212310791015625, "step": 3529 }, { "epoch": 0.47724468930086356, "grad_norm": 0.04517374932765961, "learning_rate": 1.789404189883732e-05, "loss": 0.05431985855102539, "step": 3530 }, { "epoch": 0.4773798860966995, "grad_norm": 0.10662773251533508, "learning_rate": 1.7887325767141415e-05, "loss": 0.05180820822715759, "step": 3531 }, { "epoch": 0.47751508289253547, "grad_norm": 0.1368734985589981, "learning_rate": 1.7880609034288894e-05, "loss": 0.06817197799682617, "step": 3532 }, { "epoch": 0.47765027968837137, "grad_norm": 0.06638755649328232, "learning_rate": 1.7873891701678208e-05, "loss": 0.05412650108337402, "step": 3533 }, { "epoch": 0.4777854764842073, "grad_norm": 0.08016375452280045, "learning_rate": 1.786717377070794e-05, "loss": 0.04256296157836914, "step": 3534 }, { "epoch": 0.4779206732800433, "grad_norm": 0.0387137345969677, "learning_rate": 1.7860455242776803e-05, "loss": 0.0393376350402832, "step": 3535 }, { "epoch": 0.4780558700758792, "grad_norm": 0.14212824404239655, "learning_rate": 1.7853736119283635e-05, "loss": 0.06944620609283447, "step": 3536 }, { "epoch": 0.47819106687171514, "grad_norm": 0.1294069141149521, "learning_rate": 1.7847016401627388e-05, "loss": 0.05991697311401367, "step": 3537 }, { "epoch": 0.4783262636675511, "grad_norm": 0.1399858444929123, "learning_rate": 1.7840296091207144e-05, "loss": 0.08116912841796875, "step": 3538 }, { "epoch": 0.478461460463387, "grad_norm": 0.1316068172454834, "learning_rate": 1.7833575189422107e-05, "loss": 0.0764765739440918, "step": 3539 }, { "epoch": 0.47859665725922296, "grad_norm": 0.04513904079794884, "learning_rate": 1.7826853697671604e-05, "loss": 0.0444035530090332, "step": 3540 }, { "epoch": 0.4787318540550589, "grad_norm": 0.06743033230304718, "learning_rate": 1.782013161735509e-05, "loss": 0.07162857055664062, "step": 3541 }, { "epoch": 0.4788670508508948, "grad_norm": 0.07776404917240143, "learning_rate": 1.781340894987213e-05, "loss": 0.05133962631225586, "step": 3542 }, { "epoch": 0.47900224764673077, "grad_norm": 0.08006428927183151, "learning_rate": 1.7806685696622426e-05, "loss": 0.0925908088684082, "step": 3543 }, { "epoch": 0.47913744444256673, "grad_norm": 0.06711641699075699, "learning_rate": 1.779996185900579e-05, "loss": 0.0480729341506958, "step": 3544 }, { "epoch": 0.47927264123840263, "grad_norm": 0.15275880694389343, "learning_rate": 1.7793237438422165e-05, "loss": 0.0650796890258789, "step": 3545 }, { "epoch": 0.4794078380342386, "grad_norm": 0.04742910712957382, "learning_rate": 1.7786512436271617e-05, "loss": 0.04808545112609863, "step": 3546 }, { "epoch": 0.47954303483007454, "grad_norm": 0.08539284020662308, "learning_rate": 1.777978685395431e-05, "loss": 0.07293939590454102, "step": 3547 }, { "epoch": 0.47967823162591045, "grad_norm": 0.05164426192641258, "learning_rate": 1.7773060692870564e-05, "loss": 0.024924039840698242, "step": 3548 }, { "epoch": 0.4798134284217464, "grad_norm": 0.07167036831378937, "learning_rate": 1.7766333954420794e-05, "loss": 0.05931425094604492, "step": 3549 }, { "epoch": 0.47994862521758236, "grad_norm": 0.1040789857506752, "learning_rate": 1.775960664000554e-05, "loss": 0.08452892303466797, "step": 3550 }, { "epoch": 0.48008382201341826, "grad_norm": 0.06301531940698624, "learning_rate": 1.7752878751025463e-05, "loss": 0.06488990783691406, "step": 3551 }, { "epoch": 0.4802190188092542, "grad_norm": 0.04951043426990509, "learning_rate": 1.7746150288881352e-05, "loss": 0.040463805198669434, "step": 3552 }, { "epoch": 0.4803542156050902, "grad_norm": 0.0803549736738205, "learning_rate": 1.7739421254974114e-05, "loss": 0.06554222106933594, "step": 3553 }, { "epoch": 0.4804894124009261, "grad_norm": 0.15805961191654205, "learning_rate": 1.7732691650704756e-05, "loss": 0.05463051795959473, "step": 3554 }, { "epoch": 0.48062460919676203, "grad_norm": 0.07525870949029922, "learning_rate": 1.7725961477474423e-05, "loss": 0.06330084800720215, "step": 3555 }, { "epoch": 0.480759805992598, "grad_norm": 0.06652756035327911, "learning_rate": 1.7719230736684375e-05, "loss": 0.06137204170227051, "step": 3556 }, { "epoch": 0.4808950027884339, "grad_norm": 0.05889417231082916, "learning_rate": 1.771249942973599e-05, "loss": 0.04092574119567871, "step": 3557 }, { "epoch": 0.48103019958426985, "grad_norm": 0.0636635571718216, "learning_rate": 1.7705767558030756e-05, "loss": 0.05011940002441406, "step": 3558 }, { "epoch": 0.4811653963801058, "grad_norm": 0.14814883470535278, "learning_rate": 1.769903512297029e-05, "loss": 0.0930180549621582, "step": 3559 }, { "epoch": 0.4813005931759417, "grad_norm": 0.053254399448633194, "learning_rate": 1.7692302125956315e-05, "loss": 0.04150569438934326, "step": 3560 }, { "epoch": 0.48143578997177766, "grad_norm": 0.17485874891281128, "learning_rate": 1.768556856839068e-05, "loss": 0.05882751941680908, "step": 3561 }, { "epoch": 0.4815709867676136, "grad_norm": 0.07868581265211105, "learning_rate": 1.767883445167535e-05, "loss": 0.04897665977478027, "step": 3562 }, { "epoch": 0.4817061835634495, "grad_norm": 0.10659178346395493, "learning_rate": 1.7672099777212398e-05, "loss": 0.07756972312927246, "step": 3563 }, { "epoch": 0.4818413803592855, "grad_norm": 0.20663602650165558, "learning_rate": 1.7665364546404034e-05, "loss": 0.08997774124145508, "step": 3564 }, { "epoch": 0.48197657715512143, "grad_norm": 0.13580411672592163, "learning_rate": 1.7658628760652548e-05, "loss": 0.06884914636611938, "step": 3565 }, { "epoch": 0.4821117739509574, "grad_norm": 0.09598405659198761, "learning_rate": 1.765189242136038e-05, "loss": 0.06521987915039062, "step": 3566 }, { "epoch": 0.4822469707467933, "grad_norm": 0.21622607111930847, "learning_rate": 1.7645155529930065e-05, "loss": 0.07096123695373535, "step": 3567 }, { "epoch": 0.48238216754262925, "grad_norm": 0.05366864427924156, "learning_rate": 1.763841808776426e-05, "loss": 0.03513383865356445, "step": 3568 }, { "epoch": 0.4825173643384652, "grad_norm": 0.1085478886961937, "learning_rate": 1.763168009626575e-05, "loss": 0.06385660171508789, "step": 3569 }, { "epoch": 0.4826525611343011, "grad_norm": 0.0757722407579422, "learning_rate": 1.7624941556837406e-05, "loss": 0.07055854797363281, "step": 3570 }, { "epoch": 0.48278775793013706, "grad_norm": 0.08150186389684677, "learning_rate": 1.7618202470882233e-05, "loss": 0.08629035949707031, "step": 3571 }, { "epoch": 0.482922954725973, "grad_norm": 0.08880028873682022, "learning_rate": 1.7611462839803336e-05, "loss": 0.08350419998168945, "step": 3572 }, { "epoch": 0.4830581515218089, "grad_norm": 0.07939331978559494, "learning_rate": 1.760472266500396e-05, "loss": 0.0688624382019043, "step": 3573 }, { "epoch": 0.4831933483176449, "grad_norm": 0.075597383081913, "learning_rate": 1.759798194788743e-05, "loss": 0.07290935516357422, "step": 3574 }, { "epoch": 0.48332854511348083, "grad_norm": 0.04727097600698471, "learning_rate": 1.75912406898572e-05, "loss": 0.057602882385253906, "step": 3575 }, { "epoch": 0.48346374190931674, "grad_norm": 0.13879527151584625, "learning_rate": 1.758449889231685e-05, "loss": 0.06453084945678711, "step": 3576 }, { "epoch": 0.4835989387051527, "grad_norm": 0.07641217857599258, "learning_rate": 1.757775655667004e-05, "loss": 0.08345484733581543, "step": 3577 }, { "epoch": 0.48373413550098865, "grad_norm": 0.07564567029476166, "learning_rate": 1.757101368432057e-05, "loss": 0.06930351257324219, "step": 3578 }, { "epoch": 0.48386933229682455, "grad_norm": 0.08055280148983002, "learning_rate": 1.7564270276672343e-05, "loss": 0.0822610855102539, "step": 3579 }, { "epoch": 0.4840045290926605, "grad_norm": 0.14254067838191986, "learning_rate": 1.7557526335129372e-05, "loss": 0.08722805976867676, "step": 3580 }, { "epoch": 0.48413972588849646, "grad_norm": 0.1177094429731369, "learning_rate": 1.7550781861095774e-05, "loss": 0.06356048583984375, "step": 3581 }, { "epoch": 0.48427492268433237, "grad_norm": 0.10753617435693741, "learning_rate": 1.754403685597579e-05, "loss": 0.05353355407714844, "step": 3582 }, { "epoch": 0.4844101194801683, "grad_norm": 0.09060823917388916, "learning_rate": 1.7537291321173773e-05, "loss": 0.05819261074066162, "step": 3583 }, { "epoch": 0.4845453162760043, "grad_norm": 0.10979875922203064, "learning_rate": 1.7530545258094165e-05, "loss": 0.08277839422225952, "step": 3584 }, { "epoch": 0.4846805130718402, "grad_norm": 0.058302171528339386, "learning_rate": 1.7523798668141548e-05, "loss": 0.049314022064208984, "step": 3585 }, { "epoch": 0.48481570986767614, "grad_norm": 0.03541146591305733, "learning_rate": 1.7517051552720584e-05, "loss": 0.03382599353790283, "step": 3586 }, { "epoch": 0.4849509066635121, "grad_norm": 0.048140544444322586, "learning_rate": 1.7510303913236066e-05, "loss": 0.06548905372619629, "step": 3587 }, { "epoch": 0.485086103459348, "grad_norm": 0.05853385105729103, "learning_rate": 1.7503555751092883e-05, "loss": 0.05188095569610596, "step": 3588 }, { "epoch": 0.48522130025518395, "grad_norm": 0.06466836482286453, "learning_rate": 1.7496807067696046e-05, "loss": 0.05787205696105957, "step": 3589 }, { "epoch": 0.4853564970510199, "grad_norm": 0.13970918953418732, "learning_rate": 1.7490057864450665e-05, "loss": 0.07719898223876953, "step": 3590 }, { "epoch": 0.4854916938468558, "grad_norm": 0.041163310408592224, "learning_rate": 1.748330814276195e-05, "loss": 0.04050576686859131, "step": 3591 }, { "epoch": 0.48562689064269177, "grad_norm": 0.09408541023731232, "learning_rate": 1.7476557904035243e-05, "loss": 0.06473207473754883, "step": 3592 }, { "epoch": 0.4857620874385277, "grad_norm": 0.08710946887731552, "learning_rate": 1.7469807149675973e-05, "loss": 0.08213043212890625, "step": 3593 }, { "epoch": 0.4858972842343636, "grad_norm": 0.05058041960000992, "learning_rate": 1.7463055881089685e-05, "loss": 0.02990889549255371, "step": 3594 }, { "epoch": 0.4860324810301996, "grad_norm": 0.05483940988779068, "learning_rate": 1.7456304099682024e-05, "loss": 0.05761528015136719, "step": 3595 }, { "epoch": 0.48616767782603554, "grad_norm": 0.10008472949266434, "learning_rate": 1.7449551806858756e-05, "loss": 0.0710608959197998, "step": 3596 }, { "epoch": 0.48630287462187144, "grad_norm": 0.06381729990243912, "learning_rate": 1.7442799004025733e-05, "loss": 0.06379079818725586, "step": 3597 }, { "epoch": 0.4864380714177074, "grad_norm": 0.06842538714408875, "learning_rate": 1.7436045692588934e-05, "loss": 0.038065433502197266, "step": 3598 }, { "epoch": 0.48657326821354335, "grad_norm": 0.08415386825799942, "learning_rate": 1.742929187395443e-05, "loss": 0.08133077621459961, "step": 3599 }, { "epoch": 0.48670846500937925, "grad_norm": 0.14112652838230133, "learning_rate": 1.7422537549528402e-05, "loss": 0.07901191711425781, "step": 3600 }, { "epoch": 0.4868436618052152, "grad_norm": 0.08802326023578644, "learning_rate": 1.7415782720717147e-05, "loss": 0.059496402740478516, "step": 3601 }, { "epoch": 0.48697885860105117, "grad_norm": 0.06313256919384003, "learning_rate": 1.740902738892704e-05, "loss": 0.04536402225494385, "step": 3602 }, { "epoch": 0.48711405539688707, "grad_norm": 0.05303634703159332, "learning_rate": 1.7402271555564585e-05, "loss": 0.059635162353515625, "step": 3603 }, { "epoch": 0.487249252192723, "grad_norm": 0.11761408299207687, "learning_rate": 1.739551522203638e-05, "loss": 0.06266450881958008, "step": 3604 }, { "epoch": 0.487384448988559, "grad_norm": 0.13394463062286377, "learning_rate": 1.738875838974913e-05, "loss": 0.06246042251586914, "step": 3605 }, { "epoch": 0.4875196457843949, "grad_norm": 0.07512262463569641, "learning_rate": 1.7382001060109652e-05, "loss": 0.05531167984008789, "step": 3606 }, { "epoch": 0.48765484258023084, "grad_norm": 0.04697803407907486, "learning_rate": 1.7375243234524843e-05, "loss": 0.05104684829711914, "step": 3607 }, { "epoch": 0.4877900393760668, "grad_norm": 0.06704489141702652, "learning_rate": 1.736848491440173e-05, "loss": 0.06655192375183105, "step": 3608 }, { "epoch": 0.4879252361719027, "grad_norm": 0.08053556829690933, "learning_rate": 1.7361726101147424e-05, "loss": 0.057255566120147705, "step": 3609 }, { "epoch": 0.48806043296773866, "grad_norm": 0.1130124032497406, "learning_rate": 1.7354966796169157e-05, "loss": 0.08051919937133789, "step": 3610 }, { "epoch": 0.4881956297635746, "grad_norm": 0.03626268357038498, "learning_rate": 1.734820700087424e-05, "loss": 0.03832656145095825, "step": 3611 }, { "epoch": 0.48833082655941057, "grad_norm": 0.05131611227989197, "learning_rate": 1.7341446716670103e-05, "loss": 0.05534648895263672, "step": 3612 }, { "epoch": 0.48846602335524647, "grad_norm": 0.04659602791070938, "learning_rate": 1.7334685944964272e-05, "loss": 0.05809736251831055, "step": 3613 }, { "epoch": 0.4886012201510824, "grad_norm": 0.03651859238743782, "learning_rate": 1.732792468716438e-05, "loss": 0.0412445068359375, "step": 3614 }, { "epoch": 0.4887364169469184, "grad_norm": 0.22989794611930847, "learning_rate": 1.7321162944678155e-05, "loss": 0.07867467403411865, "step": 3615 }, { "epoch": 0.4888716137427543, "grad_norm": 0.07495442777872086, "learning_rate": 1.7314400718913425e-05, "loss": 0.06592893600463867, "step": 3616 }, { "epoch": 0.48900681053859024, "grad_norm": 0.033692941069602966, "learning_rate": 1.7307638011278126e-05, "loss": 0.03146100044250488, "step": 3617 }, { "epoch": 0.4891420073344262, "grad_norm": 0.05789799615740776, "learning_rate": 1.7300874823180284e-05, "loss": 0.052855849266052246, "step": 3618 }, { "epoch": 0.4892772041302621, "grad_norm": 0.06451264023780823, "learning_rate": 1.7294111156028034e-05, "loss": 0.06437468528747559, "step": 3619 }, { "epoch": 0.48941240092609806, "grad_norm": 0.06568501144647598, "learning_rate": 1.7287347011229605e-05, "loss": 0.0803375244140625, "step": 3620 }, { "epoch": 0.489547597721934, "grad_norm": 0.1086663156747818, "learning_rate": 1.7280582390193333e-05, "loss": 0.06152153015136719, "step": 3621 }, { "epoch": 0.4896827945177699, "grad_norm": 0.08800483494997025, "learning_rate": 1.7273817294327653e-05, "loss": 0.055284976959228516, "step": 3622 }, { "epoch": 0.48981799131360587, "grad_norm": 0.04633105918765068, "learning_rate": 1.726705172504108e-05, "loss": 0.06363499164581299, "step": 3623 }, { "epoch": 0.48995318810944183, "grad_norm": 0.0646023079752922, "learning_rate": 1.7260285683742248e-05, "loss": 0.07406139373779297, "step": 3624 }, { "epoch": 0.49008838490527773, "grad_norm": 0.09981629252433777, "learning_rate": 1.7253519171839883e-05, "loss": 0.05177879333496094, "step": 3625 }, { "epoch": 0.4902235817011137, "grad_norm": 0.1757117360830307, "learning_rate": 1.724675219074281e-05, "loss": 0.06247490644454956, "step": 3626 }, { "epoch": 0.49035877849694964, "grad_norm": 0.06993858516216278, "learning_rate": 1.7239984741859957e-05, "loss": 0.06765270233154297, "step": 3627 }, { "epoch": 0.49049397529278554, "grad_norm": 0.12846733629703522, "learning_rate": 1.7233216826600324e-05, "loss": 0.08030715584754944, "step": 3628 }, { "epoch": 0.4906291720886215, "grad_norm": 0.07241996377706528, "learning_rate": 1.7226448446373047e-05, "loss": 0.05104684829711914, "step": 3629 }, { "epoch": 0.49076436888445746, "grad_norm": 0.07053696364164352, "learning_rate": 1.7219679602587326e-05, "loss": 0.06361579895019531, "step": 3630 }, { "epoch": 0.49089956568029336, "grad_norm": 0.1293111890554428, "learning_rate": 1.7212910296652476e-05, "loss": 0.06764888763427734, "step": 3631 }, { "epoch": 0.4910347624761293, "grad_norm": 0.13575351238250732, "learning_rate": 1.7206140529977904e-05, "loss": 0.06544160842895508, "step": 3632 }, { "epoch": 0.4911699592719653, "grad_norm": 0.09848814457654953, "learning_rate": 1.719937030397311e-05, "loss": 0.07082962989807129, "step": 3633 }, { "epoch": 0.4913051560678012, "grad_norm": 0.25757378339767456, "learning_rate": 1.7192599620047683e-05, "loss": 0.0867757797241211, "step": 3634 }, { "epoch": 0.49144035286363713, "grad_norm": 0.1287541389465332, "learning_rate": 1.718582847961133e-05, "loss": 0.06615257263183594, "step": 3635 }, { "epoch": 0.4915755496594731, "grad_norm": 0.1424255669116974, "learning_rate": 1.7179056884073826e-05, "loss": 0.07771587371826172, "step": 3636 }, { "epoch": 0.491710746455309, "grad_norm": 0.08382894843816757, "learning_rate": 1.717228483484506e-05, "loss": 0.06627535820007324, "step": 3637 }, { "epoch": 0.49184594325114495, "grad_norm": 0.09832364320755005, "learning_rate": 1.7165512333335013e-05, "loss": 0.06449282169342041, "step": 3638 }, { "epoch": 0.4919811400469809, "grad_norm": 0.14199407398700714, "learning_rate": 1.715873938095374e-05, "loss": 0.08585596084594727, "step": 3639 }, { "epoch": 0.4921163368428168, "grad_norm": 0.05253595486283302, "learning_rate": 1.7151965979111427e-05, "loss": 0.03324699401855469, "step": 3640 }, { "epoch": 0.49225153363865276, "grad_norm": 0.07695692032575607, "learning_rate": 1.7145192129218313e-05, "loss": 0.038543701171875, "step": 3641 }, { "epoch": 0.4923867304344887, "grad_norm": 0.1592692732810974, "learning_rate": 1.7138417832684763e-05, "loss": 0.08372068405151367, "step": 3642 }, { "epoch": 0.4925219272303246, "grad_norm": 0.04907463863492012, "learning_rate": 1.7131643090921216e-05, "loss": 0.06655740737915039, "step": 3643 }, { "epoch": 0.4926571240261606, "grad_norm": 0.24179919064044952, "learning_rate": 1.712486790533821e-05, "loss": 0.1024484634399414, "step": 3644 }, { "epoch": 0.49279232082199653, "grad_norm": 0.047212354838848114, "learning_rate": 1.7118092277346372e-05, "loss": 0.04586261510848999, "step": 3645 }, { "epoch": 0.49292751761783243, "grad_norm": 0.10214903205633163, "learning_rate": 1.7111316208356428e-05, "loss": 0.055303215980529785, "step": 3646 }, { "epoch": 0.4930627144136684, "grad_norm": 0.21530210971832275, "learning_rate": 1.7104539699779192e-05, "loss": 0.06785714626312256, "step": 3647 }, { "epoch": 0.49319791120950435, "grad_norm": 0.08425609767436981, "learning_rate": 1.709776275302557e-05, "loss": 0.049014151096343994, "step": 3648 }, { "epoch": 0.49333310800534025, "grad_norm": 0.2440614104270935, "learning_rate": 1.7090985369506555e-05, "loss": 0.07528328895568848, "step": 3649 }, { "epoch": 0.4934683048011762, "grad_norm": 0.07584003359079361, "learning_rate": 1.708420755063323e-05, "loss": 0.07402420043945312, "step": 3650 }, { "epoch": 0.49360350159701216, "grad_norm": 0.07362735271453857, "learning_rate": 1.707742929781678e-05, "loss": 0.05284547805786133, "step": 3651 }, { "epoch": 0.49373869839284806, "grad_norm": 0.10645918548107147, "learning_rate": 1.707065061246848e-05, "loss": 0.06648111343383789, "step": 3652 }, { "epoch": 0.493873895188684, "grad_norm": 0.09508925676345825, "learning_rate": 1.7063871495999677e-05, "loss": 0.07498025894165039, "step": 3653 }, { "epoch": 0.49400909198452, "grad_norm": 0.1568058729171753, "learning_rate": 1.705709194982182e-05, "loss": 0.05193781852722168, "step": 3654 }, { "epoch": 0.4941442887803559, "grad_norm": 0.07692322134971619, "learning_rate": 1.7050311975346447e-05, "loss": 0.06574797630310059, "step": 3655 }, { "epoch": 0.49427948557619183, "grad_norm": 0.20145468413829803, "learning_rate": 1.704353157398519e-05, "loss": 0.0744791030883789, "step": 3656 }, { "epoch": 0.4944146823720278, "grad_norm": 0.08423712104558945, "learning_rate": 1.7036750747149764e-05, "loss": 0.05287003517150879, "step": 3657 }, { "epoch": 0.49454987916786375, "grad_norm": 0.20200200378894806, "learning_rate": 1.702996949625197e-05, "loss": 0.08007049560546875, "step": 3658 }, { "epoch": 0.49468507596369965, "grad_norm": 0.10411909222602844, "learning_rate": 1.7023187822703702e-05, "loss": 0.05687999725341797, "step": 3659 }, { "epoch": 0.4948202727595356, "grad_norm": 0.10596667230129242, "learning_rate": 1.7016405727916936e-05, "loss": 0.08284330368041992, "step": 3660 }, { "epoch": 0.49495546955537156, "grad_norm": 0.0667446106672287, "learning_rate": 1.700962321330375e-05, "loss": 0.05919456481933594, "step": 3661 }, { "epoch": 0.49509066635120746, "grad_norm": 0.06044870615005493, "learning_rate": 1.700284028027629e-05, "loss": 0.04858824610710144, "step": 3662 }, { "epoch": 0.4952258631470434, "grad_norm": 0.20683208107948303, "learning_rate": 1.6996056930246807e-05, "loss": 0.07144784927368164, "step": 3663 }, { "epoch": 0.4953610599428794, "grad_norm": 0.11469860374927521, "learning_rate": 1.6989273164627626e-05, "loss": 0.09452939033508301, "step": 3664 }, { "epoch": 0.4954962567387153, "grad_norm": 0.12053488940000534, "learning_rate": 1.6982488984831163e-05, "loss": 0.06110095977783203, "step": 3665 }, { "epoch": 0.49563145353455124, "grad_norm": 0.10469727218151093, "learning_rate": 1.697570439226992e-05, "loss": 0.06834030151367188, "step": 3666 }, { "epoch": 0.4957666503303872, "grad_norm": 0.0829186663031578, "learning_rate": 1.6968919388356486e-05, "loss": 0.06743144989013672, "step": 3667 }, { "epoch": 0.4959018471262231, "grad_norm": 0.12245159596204758, "learning_rate": 1.696213397450354e-05, "loss": 0.07387471199035645, "step": 3668 }, { "epoch": 0.49603704392205905, "grad_norm": 0.06037118658423424, "learning_rate": 1.695534815212384e-05, "loss": 0.07586240768432617, "step": 3669 }, { "epoch": 0.496172240717895, "grad_norm": 0.055406391620635986, "learning_rate": 1.6948561922630223e-05, "loss": 0.061498045921325684, "step": 3670 }, { "epoch": 0.4963074375137309, "grad_norm": 0.0674186646938324, "learning_rate": 1.694177528743562e-05, "loss": 0.05174517631530762, "step": 3671 }, { "epoch": 0.49644263430956687, "grad_norm": 0.06779282540082932, "learning_rate": 1.6934988247953053e-05, "loss": 0.06182551383972168, "step": 3672 }, { "epoch": 0.4965778311054028, "grad_norm": 0.09276220947504044, "learning_rate": 1.6928200805595606e-05, "loss": 0.07191336154937744, "step": 3673 }, { "epoch": 0.4967130279012387, "grad_norm": 0.0914679765701294, "learning_rate": 1.6921412961776475e-05, "loss": 0.0748758316040039, "step": 3674 }, { "epoch": 0.4968482246970747, "grad_norm": 0.11273293942213058, "learning_rate": 1.6914624717908922e-05, "loss": 0.07414913177490234, "step": 3675 }, { "epoch": 0.49698342149291064, "grad_norm": 0.06812527775764465, "learning_rate": 1.6907836075406288e-05, "loss": 0.07167196273803711, "step": 3676 }, { "epoch": 0.49711861828874654, "grad_norm": 0.14902490377426147, "learning_rate": 1.690104703568201e-05, "loss": 0.05316972732543945, "step": 3677 }, { "epoch": 0.4972538150845825, "grad_norm": 0.05334148928523064, "learning_rate": 1.68942576001496e-05, "loss": 0.05182605981826782, "step": 3678 }, { "epoch": 0.49738901188041845, "grad_norm": 0.12150225043296814, "learning_rate": 1.6887467770222658e-05, "loss": 0.06699976325035095, "step": 3679 }, { "epoch": 0.49752420867625435, "grad_norm": 0.0888490304350853, "learning_rate": 1.6880677547314865e-05, "loss": 0.05602145195007324, "step": 3680 }, { "epoch": 0.4976594054720903, "grad_norm": 0.05888473615050316, "learning_rate": 1.6873886932839973e-05, "loss": 0.0517694354057312, "step": 3681 }, { "epoch": 0.49779460226792627, "grad_norm": 0.08135475963354111, "learning_rate": 1.686709592821183e-05, "loss": 0.08391571044921875, "step": 3682 }, { "epoch": 0.49792979906376217, "grad_norm": 0.09950217604637146, "learning_rate": 1.6860304534844355e-05, "loss": 0.07316875457763672, "step": 3683 }, { "epoch": 0.4980649958595981, "grad_norm": 0.1802351474761963, "learning_rate": 1.6853512754151556e-05, "loss": 0.08319240808486938, "step": 3684 }, { "epoch": 0.4982001926554341, "grad_norm": 0.03477051854133606, "learning_rate": 1.684672058754752e-05, "loss": 0.04135942459106445, "step": 3685 }, { "epoch": 0.49833538945127, "grad_norm": 0.10808668285608292, "learning_rate": 1.6839928036446416e-05, "loss": 0.07411050796508789, "step": 3686 }, { "epoch": 0.49847058624710594, "grad_norm": 0.12113899737596512, "learning_rate": 1.6833135102262473e-05, "loss": 0.07147717475891113, "step": 3687 }, { "epoch": 0.4986057830429419, "grad_norm": 0.11671570688486099, "learning_rate": 1.682634178641003e-05, "loss": 0.045951128005981445, "step": 3688 }, { "epoch": 0.4987409798387778, "grad_norm": 0.2194662094116211, "learning_rate": 1.6819548090303485e-05, "loss": 0.08658981323242188, "step": 3689 }, { "epoch": 0.49887617663461375, "grad_norm": 0.06902020424604416, "learning_rate": 1.6812754015357328e-05, "loss": 0.07514506578445435, "step": 3690 }, { "epoch": 0.4990113734304497, "grad_norm": 0.05978160351514816, "learning_rate": 1.680595956298612e-05, "loss": 0.07438373565673828, "step": 3691 }, { "epoch": 0.4991465702262856, "grad_norm": 0.06871736794710159, "learning_rate": 1.6799164734604497e-05, "loss": 0.07107377052307129, "step": 3692 }, { "epoch": 0.49928176702212157, "grad_norm": 0.09108826518058777, "learning_rate": 1.6792369531627186e-05, "loss": 0.057581424713134766, "step": 3693 }, { "epoch": 0.4994169638179575, "grad_norm": 0.12614762783050537, "learning_rate": 1.6785573955468974e-05, "loss": 0.04407024383544922, "step": 3694 }, { "epoch": 0.49955216061379343, "grad_norm": 0.1276492327451706, "learning_rate": 1.6778778007544745e-05, "loss": 0.06745529174804688, "step": 3695 }, { "epoch": 0.4996873574096294, "grad_norm": 0.16838513314723969, "learning_rate": 1.6771981689269452e-05, "loss": 0.0907735824584961, "step": 3696 }, { "epoch": 0.49982255420546534, "grad_norm": 0.092658132314682, "learning_rate": 1.6765185002058123e-05, "loss": 0.05747032165527344, "step": 3697 }, { "epoch": 0.49995775100130124, "grad_norm": 0.025818796828389168, "learning_rate": 1.6758387947325856e-05, "loss": 0.028505563735961914, "step": 3698 }, { "epoch": 0.5000929477971372, "grad_norm": 0.06602969020605087, "learning_rate": 1.6751590526487843e-05, "loss": 0.07781982421875, "step": 3699 }, { "epoch": 0.5002281445929732, "grad_norm": 0.20559480786323547, "learning_rate": 1.6744792740959347e-05, "loss": 0.08217430114746094, "step": 3700 }, { "epoch": 0.5003633413888091, "grad_norm": 0.14475670456886292, "learning_rate": 1.6737994592155697e-05, "loss": 0.05979299545288086, "step": 3701 }, { "epoch": 0.5004985381846451, "grad_norm": 0.08835698664188385, "learning_rate": 1.6731196081492307e-05, "loss": 0.05028557777404785, "step": 3702 }, { "epoch": 0.5006337349804809, "grad_norm": 0.07242413610219955, "learning_rate": 1.6724397210384655e-05, "loss": 0.05842924118041992, "step": 3703 }, { "epoch": 0.5007689317763169, "grad_norm": 0.08495945483446121, "learning_rate": 1.6717597980248308e-05, "loss": 0.06853437423706055, "step": 3704 }, { "epoch": 0.5009041285721528, "grad_norm": 0.13081209361553192, "learning_rate": 1.6710798392498904e-05, "loss": 0.07030391693115234, "step": 3705 }, { "epoch": 0.5010393253679888, "grad_norm": 0.040592074394226074, "learning_rate": 1.6703998448552154e-05, "loss": 0.04258108139038086, "step": 3706 }, { "epoch": 0.5011745221638247, "grad_norm": 0.056849293410778046, "learning_rate": 1.669719814982384e-05, "loss": 0.05554676055908203, "step": 3707 }, { "epoch": 0.5013097189596607, "grad_norm": 0.16090774536132812, "learning_rate": 1.6690397497729818e-05, "loss": 0.11242866516113281, "step": 3708 }, { "epoch": 0.5014449157554965, "grad_norm": 0.12958185374736786, "learning_rate": 1.6683596493686028e-05, "loss": 0.08212113380432129, "step": 3709 }, { "epoch": 0.5015801125513325, "grad_norm": 0.14468182623386383, "learning_rate": 1.667679513910846e-05, "loss": 0.06597638130187988, "step": 3710 }, { "epoch": 0.5017153093471685, "grad_norm": 0.06954637169837952, "learning_rate": 1.666999343541321e-05, "loss": 0.048244476318359375, "step": 3711 }, { "epoch": 0.5018505061430044, "grad_norm": 0.03504554554820061, "learning_rate": 1.6663191384016422e-05, "loss": 0.04031825065612793, "step": 3712 }, { "epoch": 0.5019857029388404, "grad_norm": 0.058307476341724396, "learning_rate": 1.6656388986334315e-05, "loss": 0.07852506637573242, "step": 3713 }, { "epoch": 0.5021208997346763, "grad_norm": 0.08634979277849197, "learning_rate": 1.6649586243783186e-05, "loss": 0.06403493881225586, "step": 3714 }, { "epoch": 0.5022560965305122, "grad_norm": 0.03979789838194847, "learning_rate": 1.6642783157779405e-05, "loss": 0.04753732681274414, "step": 3715 }, { "epoch": 0.5023912933263481, "grad_norm": 0.08048854023218155, "learning_rate": 1.6635979729739417e-05, "loss": 0.06908845901489258, "step": 3716 }, { "epoch": 0.5025264901221841, "grad_norm": 0.07217130064964294, "learning_rate": 1.662917596107972e-05, "loss": 0.07325267791748047, "step": 3717 }, { "epoch": 0.50266168691802, "grad_norm": 0.1316104680299759, "learning_rate": 1.6622371853216904e-05, "loss": 0.07314467430114746, "step": 3718 }, { "epoch": 0.502796883713856, "grad_norm": 0.09273051470518112, "learning_rate": 1.661556740756761e-05, "loss": 0.056643009185791016, "step": 3719 }, { "epoch": 0.502932080509692, "grad_norm": 0.10645793378353119, "learning_rate": 1.6608762625548572e-05, "loss": 0.060303688049316406, "step": 3720 }, { "epoch": 0.5030672773055278, "grad_norm": 0.05880333483219147, "learning_rate": 1.6601957508576573e-05, "loss": 0.02784597873687744, "step": 3721 }, { "epoch": 0.5032024741013638, "grad_norm": 0.051762621849775314, "learning_rate": 1.659515205806848e-05, "loss": 0.0576329231262207, "step": 3722 }, { "epoch": 0.5033376708971997, "grad_norm": 0.0424882210791111, "learning_rate": 1.6588346275441224e-05, "loss": 0.04112982749938965, "step": 3723 }, { "epoch": 0.5034728676930357, "grad_norm": 0.06540235131978989, "learning_rate": 1.65815401621118e-05, "loss": 0.06955790519714355, "step": 3724 }, { "epoch": 0.5036080644888716, "grad_norm": 0.08217894285917282, "learning_rate": 1.657473371949728e-05, "loss": 0.05295515060424805, "step": 3725 }, { "epoch": 0.5037432612847076, "grad_norm": 0.10725366324186325, "learning_rate": 1.6567926949014805e-05, "loss": 0.09947586059570312, "step": 3726 }, { "epoch": 0.5038784580805435, "grad_norm": 0.08663515746593475, "learning_rate": 1.6561119852081574e-05, "loss": 0.08110237121582031, "step": 3727 }, { "epoch": 0.5040136548763794, "grad_norm": 0.046510547399520874, "learning_rate": 1.6554312430114868e-05, "loss": 0.0476679801940918, "step": 3728 }, { "epoch": 0.5041488516722153, "grad_norm": 0.08510484546422958, "learning_rate": 1.6547504684532026e-05, "loss": 0.07415771484375, "step": 3729 }, { "epoch": 0.5042840484680513, "grad_norm": 0.10251069068908691, "learning_rate": 1.6540696616750454e-05, "loss": 0.07366853952407837, "step": 3730 }, { "epoch": 0.5044192452638873, "grad_norm": 0.05395134910941124, "learning_rate": 1.6533888228187628e-05, "loss": 0.05528545379638672, "step": 3731 }, { "epoch": 0.5045544420597232, "grad_norm": 0.06983628869056702, "learning_rate": 1.6527079520261103e-05, "loss": 0.05981945991516113, "step": 3732 }, { "epoch": 0.5046896388555592, "grad_norm": 0.060526005923748016, "learning_rate": 1.6520270494388472e-05, "loss": 0.0650949478149414, "step": 3733 }, { "epoch": 0.504824835651395, "grad_norm": 0.04767141863703728, "learning_rate": 1.6513461151987418e-05, "loss": 0.05769610404968262, "step": 3734 }, { "epoch": 0.504960032447231, "grad_norm": 0.036564599722623825, "learning_rate": 1.6506651494475678e-05, "loss": 0.038811683654785156, "step": 3735 }, { "epoch": 0.5050952292430669, "grad_norm": 0.07894016802310944, "learning_rate": 1.6499841523271062e-05, "loss": 0.0546497106552124, "step": 3736 }, { "epoch": 0.5052304260389029, "grad_norm": 0.10773193836212158, "learning_rate": 1.649303123979145e-05, "loss": 0.09120464324951172, "step": 3737 }, { "epoch": 0.5053656228347388, "grad_norm": 0.060023244470357895, "learning_rate": 1.648622064545477e-05, "loss": 0.06358909606933594, "step": 3738 }, { "epoch": 0.5055008196305748, "grad_norm": 0.0951104462146759, "learning_rate": 1.6479409741679025e-05, "loss": 0.05770683288574219, "step": 3739 }, { "epoch": 0.5056360164264107, "grad_norm": 0.07395204156637192, "learning_rate": 1.6472598529882277e-05, "loss": 0.06883478164672852, "step": 3740 }, { "epoch": 0.5057712132222466, "grad_norm": 0.07054848223924637, "learning_rate": 1.646578701148267e-05, "loss": 0.07321429252624512, "step": 3741 }, { "epoch": 0.5059064100180826, "grad_norm": 0.1394336223602295, "learning_rate": 1.6458975187898384e-05, "loss": 0.09001421928405762, "step": 3742 }, { "epoch": 0.5060416068139185, "grad_norm": 0.04635302722454071, "learning_rate": 1.6452163060547687e-05, "loss": 0.04274177551269531, "step": 3743 }, { "epoch": 0.5061768036097545, "grad_norm": 0.12482871115207672, "learning_rate": 1.64453506308489e-05, "loss": 0.05527004599571228, "step": 3744 }, { "epoch": 0.5063120004055904, "grad_norm": 0.19521453976631165, "learning_rate": 1.64385379002204e-05, "loss": 0.08733940124511719, "step": 3745 }, { "epoch": 0.5064471972014263, "grad_norm": 0.0754537582397461, "learning_rate": 1.643172487008064e-05, "loss": 0.054754823446273804, "step": 3746 }, { "epoch": 0.5065823939972622, "grad_norm": 0.1390322893857956, "learning_rate": 1.6424911541848124e-05, "loss": 0.05157053470611572, "step": 3747 }, { "epoch": 0.5067175907930982, "grad_norm": 0.05547186732292175, "learning_rate": 1.641809791694143e-05, "loss": 0.04738926887512207, "step": 3748 }, { "epoch": 0.5068527875889342, "grad_norm": 0.07509680092334747, "learning_rate": 1.6411283996779184e-05, "loss": 0.05946412682533264, "step": 3749 }, { "epoch": 0.5069879843847701, "grad_norm": 0.06471659243106842, "learning_rate": 1.6404469782780088e-05, "loss": 0.039231061935424805, "step": 3750 }, { "epoch": 0.5071231811806061, "grad_norm": 0.06595001369714737, "learning_rate": 1.639765527636289e-05, "loss": 0.08158111572265625, "step": 3751 }, { "epoch": 0.5072583779764419, "grad_norm": 0.07521500438451767, "learning_rate": 1.639084047894641e-05, "loss": 0.05309092998504639, "step": 3752 }, { "epoch": 0.5073935747722779, "grad_norm": 0.0801013633608818, "learning_rate": 1.638402539194953e-05, "loss": 0.054356396198272705, "step": 3753 }, { "epoch": 0.5075287715681138, "grad_norm": 0.05563773959875107, "learning_rate": 1.6377210016791182e-05, "loss": 0.033054351806640625, "step": 3754 }, { "epoch": 0.5076639683639498, "grad_norm": 0.19027762115001678, "learning_rate": 1.6370394354890364e-05, "loss": 0.0989912748336792, "step": 3755 }, { "epoch": 0.5077991651597857, "grad_norm": 0.07743227481842041, "learning_rate": 1.636357840766613e-05, "loss": 0.054848670959472656, "step": 3756 }, { "epoch": 0.5079343619556217, "grad_norm": 0.07018070667982101, "learning_rate": 1.6356762176537606e-05, "loss": 0.05977439880371094, "step": 3757 }, { "epoch": 0.5080695587514575, "grad_norm": 0.06427748501300812, "learning_rate": 1.6349945662923953e-05, "loss": 0.0861368179321289, "step": 3758 }, { "epoch": 0.5082047555472935, "grad_norm": 0.11053955554962158, "learning_rate": 1.634312886824442e-05, "loss": 0.05287642776966095, "step": 3759 }, { "epoch": 0.5083399523431295, "grad_norm": 0.06569293886423111, "learning_rate": 1.6336311793918298e-05, "loss": 0.07151281833648682, "step": 3760 }, { "epoch": 0.5084751491389654, "grad_norm": 0.07489728182554245, "learning_rate": 1.6329494441364925e-05, "loss": 0.04281347990036011, "step": 3761 }, { "epoch": 0.5086103459348014, "grad_norm": 0.08263158798217773, "learning_rate": 1.6322676812003727e-05, "loss": 0.06351804733276367, "step": 3762 }, { "epoch": 0.5087455427306373, "grad_norm": 0.0663142204284668, "learning_rate": 1.631585890725416e-05, "loss": 0.06160569190979004, "step": 3763 }, { "epoch": 0.5088807395264732, "grad_norm": 0.1277400702238083, "learning_rate": 1.630904072853575e-05, "loss": 0.09601640701293945, "step": 3764 }, { "epoch": 0.5090159363223091, "grad_norm": 0.054404523223638535, "learning_rate": 1.6302222277268085e-05, "loss": 0.05263924598693848, "step": 3765 }, { "epoch": 0.5091511331181451, "grad_norm": 0.04608814790844917, "learning_rate": 1.6295403554870794e-05, "loss": 0.04669177532196045, "step": 3766 }, { "epoch": 0.509286329913981, "grad_norm": 0.06972187012434006, "learning_rate": 1.6288584562763572e-05, "loss": 0.06536746025085449, "step": 3767 }, { "epoch": 0.509421526709817, "grad_norm": 0.07748934626579285, "learning_rate": 1.6281765302366176e-05, "loss": 0.10449600219726562, "step": 3768 }, { "epoch": 0.509556723505653, "grad_norm": 0.0524292066693306, "learning_rate": 1.6274945775098412e-05, "loss": 0.06367683410644531, "step": 3769 }, { "epoch": 0.5096919203014889, "grad_norm": 0.03959411755204201, "learning_rate": 1.6268125982380135e-05, "loss": 0.045714378356933594, "step": 3770 }, { "epoch": 0.5098271170973248, "grad_norm": 0.14590390026569366, "learning_rate": 1.626130592563127e-05, "loss": 0.06103801727294922, "step": 3771 }, { "epoch": 0.5099623138931607, "grad_norm": 0.0810171589255333, "learning_rate": 1.6254485606271778e-05, "loss": 0.04927778244018555, "step": 3772 }, { "epoch": 0.5100975106889967, "grad_norm": 0.057023655623197556, "learning_rate": 1.6247665025721698e-05, "loss": 0.04679107666015625, "step": 3773 }, { "epoch": 0.5102327074848326, "grad_norm": 0.07838837802410126, "learning_rate": 1.62408441854011e-05, "loss": 0.05614662170410156, "step": 3774 }, { "epoch": 0.5103679042806686, "grad_norm": 0.08667189627885818, "learning_rate": 1.6234023086730136e-05, "loss": 0.06268644332885742, "step": 3775 }, { "epoch": 0.5105031010765045, "grad_norm": 0.04647170752286911, "learning_rate": 1.622720173112898e-05, "loss": 0.06048393249511719, "step": 3776 }, { "epoch": 0.5106382978723404, "grad_norm": 0.07768401503562927, "learning_rate": 1.6220380120017874e-05, "loss": 0.05697762966156006, "step": 3777 }, { "epoch": 0.5107734946681763, "grad_norm": 0.11173009872436523, "learning_rate": 1.6213558254817128e-05, "loss": 0.05269980430603027, "step": 3778 }, { "epoch": 0.5109086914640123, "grad_norm": 0.15954984724521637, "learning_rate": 1.6206736136947074e-05, "loss": 0.08355236053466797, "step": 3779 }, { "epoch": 0.5110438882598483, "grad_norm": 0.06876694411039352, "learning_rate": 1.6199913767828126e-05, "loss": 0.05315274000167847, "step": 3780 }, { "epoch": 0.5111790850556842, "grad_norm": 0.04431116208434105, "learning_rate": 1.6193091148880733e-05, "loss": 0.060036659240722656, "step": 3781 }, { "epoch": 0.5113142818515202, "grad_norm": 0.029200371354818344, "learning_rate": 1.61862682815254e-05, "loss": 0.034078359603881836, "step": 3782 }, { "epoch": 0.511449478647356, "grad_norm": 0.042952120304107666, "learning_rate": 1.617944516718268e-05, "loss": 0.04280126094818115, "step": 3783 }, { "epoch": 0.511584675443192, "grad_norm": 0.07698074728250504, "learning_rate": 1.617262180727319e-05, "loss": 0.06352090835571289, "step": 3784 }, { "epoch": 0.5117198722390279, "grad_norm": 0.10133420675992966, "learning_rate": 1.6165798203217588e-05, "loss": 0.051175594329833984, "step": 3785 }, { "epoch": 0.5118550690348639, "grad_norm": 0.06485810875892639, "learning_rate": 1.6158974356436585e-05, "loss": 0.06676959991455078, "step": 3786 }, { "epoch": 0.5119902658306998, "grad_norm": 0.08356793224811554, "learning_rate": 1.6152150268350938e-05, "loss": 0.06603431701660156, "step": 3787 }, { "epoch": 0.5121254626265358, "grad_norm": 0.03928566351532936, "learning_rate": 1.6145325940381458e-05, "loss": 0.04350090026855469, "step": 3788 }, { "epoch": 0.5122606594223716, "grad_norm": 0.1155201643705368, "learning_rate": 1.6138501373949018e-05, "loss": 0.07770967483520508, "step": 3789 }, { "epoch": 0.5123958562182076, "grad_norm": 0.077711321413517, "learning_rate": 1.613167657047451e-05, "loss": 0.07222151756286621, "step": 3790 }, { "epoch": 0.5125310530140436, "grad_norm": 0.10896597802639008, "learning_rate": 1.612485153137891e-05, "loss": 0.0713735818862915, "step": 3791 }, { "epoch": 0.5126662498098795, "grad_norm": 0.09788154065608978, "learning_rate": 1.611802625808323e-05, "loss": 0.062265872955322266, "step": 3792 }, { "epoch": 0.5128014466057155, "grad_norm": 0.05886108800768852, "learning_rate": 1.611120075200851e-05, "loss": 0.05328443646430969, "step": 3793 }, { "epoch": 0.5129366434015514, "grad_norm": 0.058363646268844604, "learning_rate": 1.610437501457587e-05, "loss": 0.04350912570953369, "step": 3794 }, { "epoch": 0.5130718401973873, "grad_norm": 0.14264824986457825, "learning_rate": 1.6097549047206464e-05, "loss": 0.06175494194030762, "step": 3795 }, { "epoch": 0.5132070369932232, "grad_norm": 0.09353962540626526, "learning_rate": 1.6090722851321497e-05, "loss": 0.07037687301635742, "step": 3796 }, { "epoch": 0.5133422337890592, "grad_norm": 0.14305047690868378, "learning_rate": 1.6083896428342213e-05, "loss": 0.08239507675170898, "step": 3797 }, { "epoch": 0.5134774305848951, "grad_norm": 0.0963195413351059, "learning_rate": 1.6077069779689915e-05, "loss": 0.04875802993774414, "step": 3798 }, { "epoch": 0.5136126273807311, "grad_norm": 0.1686539500951767, "learning_rate": 1.607024290678594e-05, "loss": 0.0822446346282959, "step": 3799 }, { "epoch": 0.5137478241765671, "grad_norm": 0.0595044381916523, "learning_rate": 1.6063415811051686e-05, "loss": 0.061818599700927734, "step": 3800 }, { "epoch": 0.5138830209724029, "grad_norm": 0.11236492544412613, "learning_rate": 1.6056588493908596e-05, "loss": 0.08846747875213623, "step": 3801 }, { "epoch": 0.5140182177682389, "grad_norm": 0.049755096435546875, "learning_rate": 1.604976095677814e-05, "loss": 0.05275154113769531, "step": 3802 }, { "epoch": 0.5141534145640748, "grad_norm": 0.06233778968453407, "learning_rate": 1.604293320108186e-05, "loss": 0.06415486335754395, "step": 3803 }, { "epoch": 0.5142886113599108, "grad_norm": 0.11785244941711426, "learning_rate": 1.603610522824132e-05, "loss": 0.07109951972961426, "step": 3804 }, { "epoch": 0.5144238081557467, "grad_norm": 0.06423136591911316, "learning_rate": 1.6029277039678153e-05, "loss": 0.06672000885009766, "step": 3805 }, { "epoch": 0.5145590049515827, "grad_norm": 0.07795750349760056, "learning_rate": 1.602244863681401e-05, "loss": 0.08669614791870117, "step": 3806 }, { "epoch": 0.5146942017474185, "grad_norm": 0.06414157897233963, "learning_rate": 1.6015620021070613e-05, "loss": 0.06005525588989258, "step": 3807 }, { "epoch": 0.5148293985432545, "grad_norm": 0.06245529279112816, "learning_rate": 1.6008791193869714e-05, "loss": 0.06514453887939453, "step": 3808 }, { "epoch": 0.5149645953390904, "grad_norm": 0.043420981615781784, "learning_rate": 1.6001962156633102e-05, "loss": 0.042061805725097656, "step": 3809 }, { "epoch": 0.5150997921349264, "grad_norm": 0.08287721127271652, "learning_rate": 1.5995132910782632e-05, "loss": 0.061681151390075684, "step": 3810 }, { "epoch": 0.5152349889307624, "grad_norm": 0.051789380609989166, "learning_rate": 1.5988303457740178e-05, "loss": 0.040281474590301514, "step": 3811 }, { "epoch": 0.5153701857265983, "grad_norm": 0.06059059873223305, "learning_rate": 1.598147379892768e-05, "loss": 0.0915842056274414, "step": 3812 }, { "epoch": 0.5155053825224342, "grad_norm": 0.12110874056816101, "learning_rate": 1.5974643935767098e-05, "loss": 0.06841802597045898, "step": 3813 }, { "epoch": 0.5156405793182701, "grad_norm": 0.048348553478717804, "learning_rate": 1.5967813869680452e-05, "loss": 0.059702157974243164, "step": 3814 }, { "epoch": 0.5157757761141061, "grad_norm": 0.07379832118749619, "learning_rate": 1.59609836020898e-05, "loss": 0.053717851638793945, "step": 3815 }, { "epoch": 0.515910972909942, "grad_norm": 0.09354014694690704, "learning_rate": 1.5954153134417236e-05, "loss": 0.08050727844238281, "step": 3816 }, { "epoch": 0.516046169705778, "grad_norm": 0.04181956499814987, "learning_rate": 1.59473224680849e-05, "loss": 0.05160093307495117, "step": 3817 }, { "epoch": 0.516181366501614, "grad_norm": 0.08444039523601532, "learning_rate": 1.5940491604514976e-05, "loss": 0.04850679636001587, "step": 3818 }, { "epoch": 0.5163165632974499, "grad_norm": 0.08802709728479385, "learning_rate": 1.5933660545129683e-05, "loss": 0.055463552474975586, "step": 3819 }, { "epoch": 0.5164517600932857, "grad_norm": 0.05439211055636406, "learning_rate": 1.5926829291351288e-05, "loss": 0.0729532241821289, "step": 3820 }, { "epoch": 0.5165869568891217, "grad_norm": 0.07838249206542969, "learning_rate": 1.591999784460209e-05, "loss": 0.07324886322021484, "step": 3821 }, { "epoch": 0.5167221536849577, "grad_norm": 0.06595619022846222, "learning_rate": 1.5913166206304435e-05, "loss": 0.04148983955383301, "step": 3822 }, { "epoch": 0.5168573504807936, "grad_norm": 0.09841666370630264, "learning_rate": 1.5906334377880707e-05, "loss": 0.09315657615661621, "step": 3823 }, { "epoch": 0.5169925472766296, "grad_norm": 0.11421307176351547, "learning_rate": 1.589950236075333e-05, "loss": 0.0876319408416748, "step": 3824 }, { "epoch": 0.5171277440724655, "grad_norm": 0.05589543655514717, "learning_rate": 1.5892670156344764e-05, "loss": 0.06772851943969727, "step": 3825 }, { "epoch": 0.5172629408683014, "grad_norm": 0.052531734108924866, "learning_rate": 1.588583776607751e-05, "loss": 0.056597232818603516, "step": 3826 }, { "epoch": 0.5173981376641373, "grad_norm": 0.1090838611125946, "learning_rate": 1.5879005191374106e-05, "loss": 0.05065488815307617, "step": 3827 }, { "epoch": 0.5175333344599733, "grad_norm": 0.06877259165048599, "learning_rate": 1.587217243365714e-05, "loss": 0.053522706031799316, "step": 3828 }, { "epoch": 0.5176685312558092, "grad_norm": 0.06729893386363983, "learning_rate": 1.586533949434922e-05, "loss": 0.04121077060699463, "step": 3829 }, { "epoch": 0.5178037280516452, "grad_norm": 0.0439026914536953, "learning_rate": 1.5858506374872998e-05, "loss": 0.04289674758911133, "step": 3830 }, { "epoch": 0.5179389248474812, "grad_norm": 0.18009690940380096, "learning_rate": 1.5851673076651178e-05, "loss": 0.09005510807037354, "step": 3831 }, { "epoch": 0.518074121643317, "grad_norm": 0.16789914667606354, "learning_rate": 1.5844839601106477e-05, "loss": 0.07492780685424805, "step": 3832 }, { "epoch": 0.518209318439153, "grad_norm": 0.06930071860551834, "learning_rate": 1.583800594966167e-05, "loss": 0.05796194076538086, "step": 3833 }, { "epoch": 0.5183445152349889, "grad_norm": 0.12920333445072174, "learning_rate": 1.583117212373955e-05, "loss": 0.0737161636352539, "step": 3834 }, { "epoch": 0.5184797120308249, "grad_norm": 0.09299825876951218, "learning_rate": 1.5824338124762967e-05, "loss": 0.06281715631484985, "step": 3835 }, { "epoch": 0.5186149088266608, "grad_norm": 0.10455896705389023, "learning_rate": 1.581750395415479e-05, "loss": 0.0762324333190918, "step": 3836 }, { "epoch": 0.5187501056224968, "grad_norm": 0.09843981266021729, "learning_rate": 1.5810669613337922e-05, "loss": 0.07065558433532715, "step": 3837 }, { "epoch": 0.5188853024183326, "grad_norm": 0.062306541949510574, "learning_rate": 1.5803835103735327e-05, "loss": 0.059501975774765015, "step": 3838 }, { "epoch": 0.5190204992141686, "grad_norm": 0.09388948231935501, "learning_rate": 1.5797000426769973e-05, "loss": 0.041937828063964844, "step": 3839 }, { "epoch": 0.5191556960100046, "grad_norm": 0.08753836154937744, "learning_rate": 1.579016558386488e-05, "loss": 0.07376432418823242, "step": 3840 }, { "epoch": 0.5192908928058405, "grad_norm": 0.05474778264760971, "learning_rate": 1.5783330576443096e-05, "loss": 0.062407493591308594, "step": 3841 }, { "epoch": 0.5194260896016765, "grad_norm": 0.10432812571525574, "learning_rate": 1.5776495405927716e-05, "loss": 0.06308603286743164, "step": 3842 }, { "epoch": 0.5195612863975124, "grad_norm": 0.06874200701713562, "learning_rate": 1.5769660073741844e-05, "loss": 0.06066441535949707, "step": 3843 }, { "epoch": 0.5196964831933483, "grad_norm": 0.098721943795681, "learning_rate": 1.5762824581308645e-05, "loss": 0.06487464904785156, "step": 3844 }, { "epoch": 0.5198316799891842, "grad_norm": 0.14265795052051544, "learning_rate": 1.5755988930051304e-05, "loss": 0.06925880908966064, "step": 3845 }, { "epoch": 0.5199668767850202, "grad_norm": 0.09727565199136734, "learning_rate": 1.5749153121393025e-05, "loss": 0.07371652126312256, "step": 3846 }, { "epoch": 0.5201020735808561, "grad_norm": 0.12147000432014465, "learning_rate": 1.574231715675708e-05, "loss": 0.06255006790161133, "step": 3847 }, { "epoch": 0.5202372703766921, "grad_norm": 0.048076607286930084, "learning_rate": 1.573548103756674e-05, "loss": 0.04524517059326172, "step": 3848 }, { "epoch": 0.520372467172528, "grad_norm": 0.045020125806331635, "learning_rate": 1.572864476524533e-05, "loss": 0.06437432765960693, "step": 3849 }, { "epoch": 0.5205076639683639, "grad_norm": 0.09541922062635422, "learning_rate": 1.5721808341216195e-05, "loss": 0.08754968643188477, "step": 3850 }, { "epoch": 0.5206428607641999, "grad_norm": 0.14167389273643494, "learning_rate": 1.571497176690271e-05, "loss": 0.07863259315490723, "step": 3851 }, { "epoch": 0.5207780575600358, "grad_norm": 0.06314025819301605, "learning_rate": 1.570813504372829e-05, "loss": 0.07802295684814453, "step": 3852 }, { "epoch": 0.5209132543558718, "grad_norm": 0.09422006458044052, "learning_rate": 1.570129817311638e-05, "loss": 0.057814836502075195, "step": 3853 }, { "epoch": 0.5210484511517077, "grad_norm": 0.07615645229816437, "learning_rate": 1.5694461156490452e-05, "loss": 0.0606694221496582, "step": 3854 }, { "epoch": 0.5211836479475437, "grad_norm": 0.07192464917898178, "learning_rate": 1.5687623995274008e-05, "loss": 0.060402095317840576, "step": 3855 }, { "epoch": 0.5213188447433795, "grad_norm": 0.11170166730880737, "learning_rate": 1.568078669089058e-05, "loss": 0.07771480083465576, "step": 3856 }, { "epoch": 0.5214540415392155, "grad_norm": 0.18437263369560242, "learning_rate": 1.567394924476373e-05, "loss": 0.07514524459838867, "step": 3857 }, { "epoch": 0.5215892383350514, "grad_norm": 0.06449896842241287, "learning_rate": 1.5667111658317057e-05, "loss": 0.057717084884643555, "step": 3858 }, { "epoch": 0.5217244351308874, "grad_norm": 0.11347664147615433, "learning_rate": 1.5660273932974177e-05, "loss": 0.06470084190368652, "step": 3859 }, { "epoch": 0.5218596319267234, "grad_norm": 0.12989670038223267, "learning_rate": 1.5653436070158743e-05, "loss": 0.06696939468383789, "step": 3860 }, { "epoch": 0.5219948287225593, "grad_norm": 0.09307479858398438, "learning_rate": 1.564659807129444e-05, "loss": 0.05035877227783203, "step": 3861 }, { "epoch": 0.5221300255183953, "grad_norm": 0.06069663539528847, "learning_rate": 1.5639759937804962e-05, "loss": 0.049747586250305176, "step": 3862 }, { "epoch": 0.5222652223142311, "grad_norm": 0.07519473880529404, "learning_rate": 1.5632921671114055e-05, "loss": 0.041859984397888184, "step": 3863 }, { "epoch": 0.5224004191100671, "grad_norm": 0.16680215299129486, "learning_rate": 1.5626083272645485e-05, "loss": 0.09029364585876465, "step": 3864 }, { "epoch": 0.522535615905903, "grad_norm": 0.059998974204063416, "learning_rate": 1.5619244743823038e-05, "loss": 0.06173241138458252, "step": 3865 }, { "epoch": 0.522670812701739, "grad_norm": 0.07698603719472885, "learning_rate": 1.5612406086070534e-05, "loss": 0.09070968627929688, "step": 3866 }, { "epoch": 0.5228060094975749, "grad_norm": 0.06496132165193558, "learning_rate": 1.560556730081181e-05, "loss": 0.05671834945678711, "step": 3867 }, { "epoch": 0.5229412062934109, "grad_norm": 0.17899605631828308, "learning_rate": 1.5598728389470754e-05, "loss": 0.08618974685668945, "step": 3868 }, { "epoch": 0.5230764030892467, "grad_norm": 0.07173573970794678, "learning_rate": 1.5591889353471245e-05, "loss": 0.05062144994735718, "step": 3869 }, { "epoch": 0.5232115998850827, "grad_norm": 0.07148000597953796, "learning_rate": 1.5585050194237226e-05, "loss": 0.08269500732421875, "step": 3870 }, { "epoch": 0.5233467966809187, "grad_norm": 0.1372024267911911, "learning_rate": 1.557821091319263e-05, "loss": 0.05329108238220215, "step": 3871 }, { "epoch": 0.5234819934767546, "grad_norm": 0.05171942710876465, "learning_rate": 1.5571371511761446e-05, "loss": 0.0505826473236084, "step": 3872 }, { "epoch": 0.5236171902725906, "grad_norm": 0.05921654403209686, "learning_rate": 1.5564531991367658e-05, "loss": 0.07123398780822754, "step": 3873 }, { "epoch": 0.5237523870684265, "grad_norm": 0.04116828739643097, "learning_rate": 1.5557692353435302e-05, "loss": 0.03347063064575195, "step": 3874 }, { "epoch": 0.5238875838642624, "grad_norm": 0.04731554538011551, "learning_rate": 1.5550852599388424e-05, "loss": 0.05790519714355469, "step": 3875 }, { "epoch": 0.5240227806600983, "grad_norm": 0.07882832735776901, "learning_rate": 1.5544012730651096e-05, "loss": 0.07251739501953125, "step": 3876 }, { "epoch": 0.5241579774559343, "grad_norm": 0.16371144354343414, "learning_rate": 1.5537172748647422e-05, "loss": 0.06999373435974121, "step": 3877 }, { "epoch": 0.5242931742517702, "grad_norm": 0.07022348791360855, "learning_rate": 1.553033265480151e-05, "loss": 0.053800106048583984, "step": 3878 }, { "epoch": 0.5244283710476062, "grad_norm": 0.09335823357105255, "learning_rate": 1.552349245053752e-05, "loss": 0.07693004608154297, "step": 3879 }, { "epoch": 0.5245635678434422, "grad_norm": 0.16085992753505707, "learning_rate": 1.5516652137279597e-05, "loss": 0.0805203914642334, "step": 3880 }, { "epoch": 0.524698764639278, "grad_norm": 0.09730314463376999, "learning_rate": 1.5509811716451955e-05, "loss": 0.051087260246276855, "step": 3881 }, { "epoch": 0.524833961435114, "grad_norm": 0.13653609156608582, "learning_rate": 1.550297118947879e-05, "loss": 0.06461071968078613, "step": 3882 }, { "epoch": 0.5249691582309499, "grad_norm": 0.11773184686899185, "learning_rate": 1.5496130557784343e-05, "loss": 0.06762957572937012, "step": 3883 }, { "epoch": 0.5251043550267859, "grad_norm": 0.10872945934534073, "learning_rate": 1.5489289822792868e-05, "loss": 0.07314491271972656, "step": 3884 }, { "epoch": 0.5252395518226218, "grad_norm": 0.14969436824321747, "learning_rate": 1.5482448985928645e-05, "loss": 0.08587002754211426, "step": 3885 }, { "epoch": 0.5253747486184578, "grad_norm": 0.10076700896024704, "learning_rate": 1.5475608048615964e-05, "loss": 0.07973664999008179, "step": 3886 }, { "epoch": 0.5255099454142936, "grad_norm": 0.11089161783456802, "learning_rate": 1.546876701227916e-05, "loss": 0.05116748809814453, "step": 3887 }, { "epoch": 0.5256451422101296, "grad_norm": 0.06001724675297737, "learning_rate": 1.5461925878342558e-05, "loss": 0.05179119110107422, "step": 3888 }, { "epoch": 0.5257803390059655, "grad_norm": 0.07770363241434097, "learning_rate": 1.5455084648230527e-05, "loss": 0.03802156448364258, "step": 3889 }, { "epoch": 0.5259155358018015, "grad_norm": 0.23153872787952423, "learning_rate": 1.5448243323367438e-05, "loss": 0.0949256420135498, "step": 3890 }, { "epoch": 0.5260507325976375, "grad_norm": 0.06408070027828217, "learning_rate": 1.544140190517771e-05, "loss": 0.059844970703125, "step": 3891 }, { "epoch": 0.5261859293934734, "grad_norm": 0.10859911888837814, "learning_rate": 1.5434560395085745e-05, "loss": 0.06161069869995117, "step": 3892 }, { "epoch": 0.5263211261893093, "grad_norm": 0.0758669376373291, "learning_rate": 1.542771879451599e-05, "loss": 0.06345462799072266, "step": 3893 }, { "epoch": 0.5264563229851452, "grad_norm": 0.09149164706468582, "learning_rate": 1.54208771048929e-05, "loss": 0.07653465867042542, "step": 3894 }, { "epoch": 0.5265915197809812, "grad_norm": 0.06464672833681107, "learning_rate": 1.5414035327640958e-05, "loss": 0.06371927261352539, "step": 3895 }, { "epoch": 0.5267267165768171, "grad_norm": 0.10314838588237762, "learning_rate": 1.5407193464184644e-05, "loss": 0.08294963836669922, "step": 3896 }, { "epoch": 0.5268619133726531, "grad_norm": 0.09328769892454147, "learning_rate": 1.5400351515948485e-05, "loss": 0.037291765213012695, "step": 3897 }, { "epoch": 0.526997110168489, "grad_norm": 0.08296113461256027, "learning_rate": 1.5393509484357006e-05, "loss": 0.05197262763977051, "step": 3898 }, { "epoch": 0.5271323069643249, "grad_norm": 0.121526800096035, "learning_rate": 1.538666737083475e-05, "loss": 0.05584794282913208, "step": 3899 }, { "epoch": 0.5272675037601608, "grad_norm": 0.050672899931669235, "learning_rate": 1.537982517680629e-05, "loss": 0.04543423652648926, "step": 3900 }, { "epoch": 0.5274027005559968, "grad_norm": 0.07551522552967072, "learning_rate": 1.5372982903696196e-05, "loss": 0.0659552812576294, "step": 3901 }, { "epoch": 0.5275378973518328, "grad_norm": 0.040404438972473145, "learning_rate": 1.536614055292908e-05, "loss": 0.0595707893371582, "step": 3902 }, { "epoch": 0.5276730941476687, "grad_norm": 0.13261736929416656, "learning_rate": 1.535929812592955e-05, "loss": 0.05104660987854004, "step": 3903 }, { "epoch": 0.5278082909435047, "grad_norm": 0.08313792943954468, "learning_rate": 1.5352455624122227e-05, "loss": 0.062470436096191406, "step": 3904 }, { "epoch": 0.5279434877393405, "grad_norm": 0.058765213936567307, "learning_rate": 1.5345613048931765e-05, "loss": 0.061493873596191406, "step": 3905 }, { "epoch": 0.5280786845351765, "grad_norm": 0.0861872136592865, "learning_rate": 1.5338770401782822e-05, "loss": 0.08306741714477539, "step": 3906 }, { "epoch": 0.5282138813310124, "grad_norm": 0.02905181236565113, "learning_rate": 1.5331927684100077e-05, "loss": 0.03806662559509277, "step": 3907 }, { "epoch": 0.5283490781268484, "grad_norm": 0.0869058221578598, "learning_rate": 1.5325084897308218e-05, "loss": 0.06251716613769531, "step": 3908 }, { "epoch": 0.5284842749226843, "grad_norm": 0.15307335555553436, "learning_rate": 1.5318242042831952e-05, "loss": 0.0908346176147461, "step": 3909 }, { "epoch": 0.5286194717185203, "grad_norm": 0.1766705960035324, "learning_rate": 1.5311399122095992e-05, "loss": 0.0809323787689209, "step": 3910 }, { "epoch": 0.5287546685143563, "grad_norm": 0.11445577442646027, "learning_rate": 1.5304556136525074e-05, "loss": 0.042017459869384766, "step": 3911 }, { "epoch": 0.5288898653101921, "grad_norm": 0.10634942352771759, "learning_rate": 1.5297713087543948e-05, "loss": 0.06469142436981201, "step": 3912 }, { "epoch": 0.5290250621060281, "grad_norm": 0.08895298838615417, "learning_rate": 1.5290869976577365e-05, "loss": 0.09692764282226562, "step": 3913 }, { "epoch": 0.529160258901864, "grad_norm": 0.07691548019647598, "learning_rate": 1.5284026805050107e-05, "loss": 0.06772613525390625, "step": 3914 }, { "epoch": 0.5292954556977, "grad_norm": 0.09204553812742233, "learning_rate": 1.5277183574386947e-05, "loss": 0.06881999969482422, "step": 3915 }, { "epoch": 0.5294306524935359, "grad_norm": 0.06572859734296799, "learning_rate": 1.5270340286012694e-05, "loss": 0.05770134925842285, "step": 3916 }, { "epoch": 0.5295658492893719, "grad_norm": 0.12960654497146606, "learning_rate": 1.526349694135215e-05, "loss": 0.04240679740905762, "step": 3917 }, { "epoch": 0.5297010460852077, "grad_norm": 0.0583483912050724, "learning_rate": 1.525665354183014e-05, "loss": 0.060099124908447266, "step": 3918 }, { "epoch": 0.5298362428810437, "grad_norm": 0.14548785984516144, "learning_rate": 1.5249810088871493e-05, "loss": 0.08008337020874023, "step": 3919 }, { "epoch": 0.5299714396768797, "grad_norm": 0.1373683512210846, "learning_rate": 1.5242966583901052e-05, "loss": 0.07833051681518555, "step": 3920 }, { "epoch": 0.5301066364727156, "grad_norm": 0.14249083399772644, "learning_rate": 1.523612302834367e-05, "loss": 0.11403656005859375, "step": 3921 }, { "epoch": 0.5302418332685516, "grad_norm": 0.07043417543172836, "learning_rate": 1.5229279423624217e-05, "loss": 0.04156756401062012, "step": 3922 }, { "epoch": 0.5303770300643875, "grad_norm": 0.07135765999555588, "learning_rate": 1.5222435771167566e-05, "loss": 0.06475543975830078, "step": 3923 }, { "epoch": 0.5305122268602234, "grad_norm": 0.19948060810565948, "learning_rate": 1.5215592072398602e-05, "loss": 0.08450007438659668, "step": 3924 }, { "epoch": 0.5306474236560593, "grad_norm": 0.07923898100852966, "learning_rate": 1.520874832874222e-05, "loss": 0.032118797302246094, "step": 3925 }, { "epoch": 0.5307826204518953, "grad_norm": 0.0771009624004364, "learning_rate": 1.5201904541623318e-05, "loss": 0.05563783645629883, "step": 3926 }, { "epoch": 0.5309178172477312, "grad_norm": 0.046507254242897034, "learning_rate": 1.5195060712466817e-05, "loss": 0.05328008532524109, "step": 3927 }, { "epoch": 0.5310530140435672, "grad_norm": 0.06295576691627502, "learning_rate": 1.5188216842697635e-05, "loss": 0.05476522445678711, "step": 3928 }, { "epoch": 0.5311882108394032, "grad_norm": 0.1106257289648056, "learning_rate": 1.5181372933740703e-05, "loss": 0.05213189125061035, "step": 3929 }, { "epoch": 0.531323407635239, "grad_norm": 0.10583825409412384, "learning_rate": 1.5174528987020958e-05, "loss": 0.058868408203125, "step": 3930 }, { "epoch": 0.531458604431075, "grad_norm": 0.03729039803147316, "learning_rate": 1.5167685003963345e-05, "loss": 0.03513073921203613, "step": 3931 }, { "epoch": 0.5315938012269109, "grad_norm": 0.05793580785393715, "learning_rate": 1.5160840985992824e-05, "loss": 0.06871342658996582, "step": 3932 }, { "epoch": 0.5317289980227469, "grad_norm": 0.09303639084100723, "learning_rate": 1.515399693453435e-05, "loss": 0.06128692626953125, "step": 3933 }, { "epoch": 0.5318641948185828, "grad_norm": 0.08739953488111496, "learning_rate": 1.5147152851012894e-05, "loss": 0.05667126178741455, "step": 3934 }, { "epoch": 0.5319993916144188, "grad_norm": 0.047449346631765366, "learning_rate": 1.514030873685343e-05, "loss": 0.04632049798965454, "step": 3935 }, { "epoch": 0.5321345884102546, "grad_norm": 0.08154062926769257, "learning_rate": 1.513346459348094e-05, "loss": 0.05924797058105469, "step": 3936 }, { "epoch": 0.5322697852060906, "grad_norm": 0.09296199679374695, "learning_rate": 1.5126620422320405e-05, "loss": 0.07582998275756836, "step": 3937 }, { "epoch": 0.5324049820019265, "grad_norm": 0.0505727082490921, "learning_rate": 1.5119776224796823e-05, "loss": 0.04465341567993164, "step": 3938 }, { "epoch": 0.5325401787977625, "grad_norm": 0.12637023627758026, "learning_rate": 1.5112932002335195e-05, "loss": 0.061310768127441406, "step": 3939 }, { "epoch": 0.5326753755935985, "grad_norm": 0.07522363215684891, "learning_rate": 1.5106087756360524e-05, "loss": 0.06966352462768555, "step": 3940 }, { "epoch": 0.5328105723894344, "grad_norm": 0.10160709172487259, "learning_rate": 1.5099243488297816e-05, "loss": 0.0876927375793457, "step": 3941 }, { "epoch": 0.5329457691852703, "grad_norm": 0.08219432085752487, "learning_rate": 1.5092399199572083e-05, "loss": 0.0741729736328125, "step": 3942 }, { "epoch": 0.5330809659811062, "grad_norm": 0.045017704367637634, "learning_rate": 1.5085554891608343e-05, "loss": 0.04221534729003906, "step": 3943 }, { "epoch": 0.5332161627769422, "grad_norm": 0.07566311955451965, "learning_rate": 1.5078710565831616e-05, "loss": 0.07285451889038086, "step": 3944 }, { "epoch": 0.5333513595727781, "grad_norm": 0.04524838551878929, "learning_rate": 1.5071866223666935e-05, "loss": 0.03532552719116211, "step": 3945 }, { "epoch": 0.5334865563686141, "grad_norm": 0.06698434054851532, "learning_rate": 1.5065021866539323e-05, "loss": 0.05963277816772461, "step": 3946 }, { "epoch": 0.53362175316445, "grad_norm": 0.05195764824748039, "learning_rate": 1.5058177495873805e-05, "loss": 0.04464384913444519, "step": 3947 }, { "epoch": 0.5337569499602859, "grad_norm": 0.0745687261223793, "learning_rate": 1.5051333113095429e-05, "loss": 0.06804251670837402, "step": 3948 }, { "epoch": 0.5338921467561218, "grad_norm": 0.045073360204696655, "learning_rate": 1.5044488719629218e-05, "loss": 0.04778623580932617, "step": 3949 }, { "epoch": 0.5340273435519578, "grad_norm": 0.04042297601699829, "learning_rate": 1.5037644316900227e-05, "loss": 0.05254650115966797, "step": 3950 }, { "epoch": 0.5341625403477938, "grad_norm": 0.07153289765119553, "learning_rate": 1.5030799906333484e-05, "loss": 0.07042825222015381, "step": 3951 }, { "epoch": 0.5342977371436297, "grad_norm": 0.10275546461343765, "learning_rate": 1.5023955489354031e-05, "loss": 0.05800127983093262, "step": 3952 }, { "epoch": 0.5344329339394657, "grad_norm": 0.08324237167835236, "learning_rate": 1.5017111067386927e-05, "loss": 0.07979631423950195, "step": 3953 }, { "epoch": 0.5345681307353016, "grad_norm": 0.041641414165496826, "learning_rate": 1.50102666418572e-05, "loss": 0.039324164390563965, "step": 3954 }, { "epoch": 0.5347033275311375, "grad_norm": 0.10182870179414749, "learning_rate": 1.500342221418991e-05, "loss": 0.10242462158203125, "step": 3955 }, { "epoch": 0.5348385243269734, "grad_norm": 0.07581325620412827, "learning_rate": 1.4996577785810094e-05, "loss": 0.04960298538208008, "step": 3956 }, { "epoch": 0.5349737211228094, "grad_norm": 0.05332183092832565, "learning_rate": 1.4989733358142798e-05, "loss": 0.0499800443649292, "step": 3957 }, { "epoch": 0.5351089179186453, "grad_norm": 0.06088460981845856, "learning_rate": 1.498288893261308e-05, "loss": 0.05264925956726074, "step": 3958 }, { "epoch": 0.5352441147144813, "grad_norm": 0.089680977165699, "learning_rate": 1.497604451064597e-05, "loss": 0.052727460861206055, "step": 3959 }, { "epoch": 0.5353793115103173, "grad_norm": 0.06065080687403679, "learning_rate": 1.496920009366652e-05, "loss": 0.0589599609375, "step": 3960 }, { "epoch": 0.5355145083061531, "grad_norm": 0.03876560553908348, "learning_rate": 1.4962355683099777e-05, "loss": 0.038787841796875, "step": 3961 }, { "epoch": 0.5356497051019891, "grad_norm": 0.06510844081640244, "learning_rate": 1.4955511280370782e-05, "loss": 0.06097531318664551, "step": 3962 }, { "epoch": 0.535784901897825, "grad_norm": 0.11961193382740021, "learning_rate": 1.4948666886904579e-05, "loss": 0.08705329895019531, "step": 3963 }, { "epoch": 0.535920098693661, "grad_norm": 0.03729398921132088, "learning_rate": 1.4941822504126199e-05, "loss": 0.036171793937683105, "step": 3964 }, { "epoch": 0.5360552954894969, "grad_norm": 0.07781950384378433, "learning_rate": 1.4934978133460681e-05, "loss": 0.07162904739379883, "step": 3965 }, { "epoch": 0.5361904922853329, "grad_norm": 0.07768019288778305, "learning_rate": 1.4928133776333068e-05, "loss": 0.07037782669067383, "step": 3966 }, { "epoch": 0.5363256890811687, "grad_norm": 0.11429572105407715, "learning_rate": 1.4921289434168386e-05, "loss": 0.07644963264465332, "step": 3967 }, { "epoch": 0.5364608858770047, "grad_norm": 0.10944870859384537, "learning_rate": 1.4914445108391663e-05, "loss": 0.0532231330871582, "step": 3968 }, { "epoch": 0.5365960826728406, "grad_norm": 0.05272554233670235, "learning_rate": 1.4907600800427922e-05, "loss": 0.053615570068359375, "step": 3969 }, { "epoch": 0.5367312794686766, "grad_norm": 0.09222150593996048, "learning_rate": 1.4900756511702188e-05, "loss": 0.06598258018493652, "step": 3970 }, { "epoch": 0.5368664762645126, "grad_norm": 0.0834721103310585, "learning_rate": 1.4893912243639479e-05, "loss": 0.0625772476196289, "step": 3971 }, { "epoch": 0.5370016730603485, "grad_norm": 0.0897260308265686, "learning_rate": 1.4887067997664807e-05, "loss": 0.05955678969621658, "step": 3972 }, { "epoch": 0.5371368698561844, "grad_norm": 0.08779985457658768, "learning_rate": 1.488022377520318e-05, "loss": 0.09327077865600586, "step": 3973 }, { "epoch": 0.5372720666520203, "grad_norm": 0.09298571199178696, "learning_rate": 1.4873379577679599e-05, "loss": 0.06790244579315186, "step": 3974 }, { "epoch": 0.5374072634478563, "grad_norm": 0.05805501714348793, "learning_rate": 1.4866535406519063e-05, "loss": 0.06986933946609497, "step": 3975 }, { "epoch": 0.5375424602436922, "grad_norm": 0.056761037558317184, "learning_rate": 1.4859691263146574e-05, "loss": 0.05704069137573242, "step": 3976 }, { "epoch": 0.5376776570395282, "grad_norm": 0.08102252334356308, "learning_rate": 1.485284714898711e-05, "loss": 0.07594895362854004, "step": 3977 }, { "epoch": 0.5378128538353641, "grad_norm": 0.09888124465942383, "learning_rate": 1.4846003065465653e-05, "loss": 0.07868790626525879, "step": 3978 }, { "epoch": 0.5379480506312, "grad_norm": 0.12224207073450089, "learning_rate": 1.4839159014007179e-05, "loss": 0.08986568450927734, "step": 3979 }, { "epoch": 0.538083247427036, "grad_norm": 0.06923440843820572, "learning_rate": 1.4832314996036653e-05, "loss": 0.07303094863891602, "step": 3980 }, { "epoch": 0.5382184442228719, "grad_norm": 0.10873255133628845, "learning_rate": 1.4825471012979048e-05, "loss": 0.0881357192993164, "step": 3981 }, { "epoch": 0.5383536410187079, "grad_norm": 0.13190844655036926, "learning_rate": 1.4818627066259301e-05, "loss": 0.08095932006835938, "step": 3982 }, { "epoch": 0.5384888378145438, "grad_norm": 0.08680063486099243, "learning_rate": 1.481178315730237e-05, "loss": 0.07169675827026367, "step": 3983 }, { "epoch": 0.5386240346103798, "grad_norm": 0.13077495992183685, "learning_rate": 1.4804939287533184e-05, "loss": 0.06654715538024902, "step": 3984 }, { "epoch": 0.5387592314062156, "grad_norm": 0.09566811472177505, "learning_rate": 1.4798095458376682e-05, "loss": 0.0431288480758667, "step": 3985 }, { "epoch": 0.5388944282020516, "grad_norm": 0.08555074781179428, "learning_rate": 1.4791251671257788e-05, "loss": 0.05816817283630371, "step": 3986 }, { "epoch": 0.5390296249978875, "grad_norm": 0.09549107402563095, "learning_rate": 1.4784407927601401e-05, "loss": 0.0589756965637207, "step": 3987 }, { "epoch": 0.5391648217937235, "grad_norm": 0.06103307008743286, "learning_rate": 1.4777564228832436e-05, "loss": 0.058516621589660645, "step": 3988 }, { "epoch": 0.5393000185895594, "grad_norm": 0.08083301037549973, "learning_rate": 1.4770720576375782e-05, "loss": 0.06440114974975586, "step": 3989 }, { "epoch": 0.5394352153853954, "grad_norm": 0.16758571565151215, "learning_rate": 1.4763876971656334e-05, "loss": 0.08042621612548828, "step": 3990 }, { "epoch": 0.5395704121812313, "grad_norm": 0.04845372587442398, "learning_rate": 1.4757033416098953e-05, "loss": 0.04959309101104736, "step": 3991 }, { "epoch": 0.5397056089770672, "grad_norm": 0.11092467606067657, "learning_rate": 1.4750189911128511e-05, "loss": 0.09543991088867188, "step": 3992 }, { "epoch": 0.5398408057729032, "grad_norm": 0.07181750237941742, "learning_rate": 1.4743346458169863e-05, "loss": 0.0502493679523468, "step": 3993 }, { "epoch": 0.5399760025687391, "grad_norm": 0.07229512184858322, "learning_rate": 1.473650305864785e-05, "loss": 0.06245088577270508, "step": 3994 }, { "epoch": 0.5401111993645751, "grad_norm": 0.097012959420681, "learning_rate": 1.472965971398731e-05, "loss": 0.07404017448425293, "step": 3995 }, { "epoch": 0.540246396160411, "grad_norm": 0.06427924335002899, "learning_rate": 1.4722816425613054e-05, "loss": 0.049645185470581055, "step": 3996 }, { "epoch": 0.540381592956247, "grad_norm": 0.05618743970990181, "learning_rate": 1.4715973194949895e-05, "loss": 0.04946005344390869, "step": 3997 }, { "epoch": 0.5405167897520828, "grad_norm": 0.08045166730880737, "learning_rate": 1.4709130023422636e-05, "loss": 0.07694530487060547, "step": 3998 }, { "epoch": 0.5406519865479188, "grad_norm": 0.05502136051654816, "learning_rate": 1.4702286912456052e-05, "loss": 0.056183815002441406, "step": 3999 }, { "epoch": 0.5407871833437548, "grad_norm": 0.1206865906715393, "learning_rate": 1.4695443863474928e-05, "loss": 0.10046792030334473, "step": 4000 }, { "epoch": 0.5409223801395907, "grad_norm": 0.19072018563747406, "learning_rate": 1.4688600877904012e-05, "loss": 0.0827031135559082, "step": 4001 }, { "epoch": 0.5410575769354267, "grad_norm": 0.14215311408042908, "learning_rate": 1.468175795716805e-05, "loss": 0.07501697540283203, "step": 4002 }, { "epoch": 0.5411927737312626, "grad_norm": 0.06811357289552689, "learning_rate": 1.4674915102691783e-05, "loss": 0.05690455436706543, "step": 4003 }, { "epoch": 0.5413279705270985, "grad_norm": 0.1141452044248581, "learning_rate": 1.4668072315899926e-05, "loss": 0.09975820779800415, "step": 4004 }, { "epoch": 0.5414631673229344, "grad_norm": 0.07782881706953049, "learning_rate": 1.466122959821718e-05, "loss": 0.09650778770446777, "step": 4005 }, { "epoch": 0.5415983641187704, "grad_norm": 0.05322935804724693, "learning_rate": 1.4654386951068239e-05, "loss": 0.047977179288864136, "step": 4006 }, { "epoch": 0.5417335609146063, "grad_norm": 0.07051298767328262, "learning_rate": 1.4647544375877776e-05, "loss": 0.03043365478515625, "step": 4007 }, { "epoch": 0.5418687577104423, "grad_norm": 0.08823000639677048, "learning_rate": 1.4640701874070457e-05, "loss": 0.07546186447143555, "step": 4008 }, { "epoch": 0.5420039545062783, "grad_norm": 0.08165750652551651, "learning_rate": 1.4633859447070922e-05, "loss": 0.06728363037109375, "step": 4009 }, { "epoch": 0.5421391513021141, "grad_norm": 0.07753219455480576, "learning_rate": 1.4627017096303805e-05, "loss": 0.06643152236938477, "step": 4010 }, { "epoch": 0.54227434809795, "grad_norm": 0.14924810826778412, "learning_rate": 1.4620174823193711e-05, "loss": 0.07922667264938354, "step": 4011 }, { "epoch": 0.542409544893786, "grad_norm": 0.11316506564617157, "learning_rate": 1.4613332629165249e-05, "loss": 0.08025646209716797, "step": 4012 }, { "epoch": 0.542544741689622, "grad_norm": 0.07670755684375763, "learning_rate": 1.4606490515642998e-05, "loss": 0.07195925712585449, "step": 4013 }, { "epoch": 0.5426799384854579, "grad_norm": 0.09826210886240005, "learning_rate": 1.4599648484051516e-05, "loss": 0.09030818939208984, "step": 4014 }, { "epoch": 0.5428151352812939, "grad_norm": 0.05562635138630867, "learning_rate": 1.4592806535815358e-05, "loss": 0.052507877349853516, "step": 4015 }, { "epoch": 0.5429503320771297, "grad_norm": 0.06538919359445572, "learning_rate": 1.4585964672359045e-05, "loss": 0.06469321250915527, "step": 4016 }, { "epoch": 0.5430855288729657, "grad_norm": 0.06262054294347763, "learning_rate": 1.4579122895107098e-05, "loss": 0.06033515930175781, "step": 4017 }, { "epoch": 0.5432207256688016, "grad_norm": 0.12654909491539001, "learning_rate": 1.4572281205484012e-05, "loss": 0.05745267868041992, "step": 4018 }, { "epoch": 0.5433559224646376, "grad_norm": 0.0647176206111908, "learning_rate": 1.4565439604914256e-05, "loss": 0.060677528381347656, "step": 4019 }, { "epoch": 0.5434911192604736, "grad_norm": 0.09649985283613205, "learning_rate": 1.4558598094822294e-05, "loss": 0.06522369384765625, "step": 4020 }, { "epoch": 0.5436263160563095, "grad_norm": 0.14743003249168396, "learning_rate": 1.455175667663256e-05, "loss": 0.08967161178588867, "step": 4021 }, { "epoch": 0.5437615128521454, "grad_norm": 0.0617133192718029, "learning_rate": 1.4544915351769476e-05, "loss": 0.045554161071777344, "step": 4022 }, { "epoch": 0.5438967096479813, "grad_norm": 0.19207578897476196, "learning_rate": 1.4538074121657448e-05, "loss": 0.07785415649414062, "step": 4023 }, { "epoch": 0.5440319064438173, "grad_norm": 0.06239357218146324, "learning_rate": 1.4531232987720846e-05, "loss": 0.06808662414550781, "step": 4024 }, { "epoch": 0.5441671032396532, "grad_norm": 0.10362675786018372, "learning_rate": 1.4524391951384037e-05, "loss": 0.0351710319519043, "step": 4025 }, { "epoch": 0.5443023000354892, "grad_norm": 0.09004382789134979, "learning_rate": 1.4517551014071358e-05, "loss": 0.061960697174072266, "step": 4026 }, { "epoch": 0.5444374968313251, "grad_norm": 0.1107117235660553, "learning_rate": 1.4510710177207137e-05, "loss": 0.04803276062011719, "step": 4027 }, { "epoch": 0.544572693627161, "grad_norm": 0.1254362165927887, "learning_rate": 1.450386944221566e-05, "loss": 0.06754922866821289, "step": 4028 }, { "epoch": 0.5447078904229969, "grad_norm": 0.1771830916404724, "learning_rate": 1.449702881052121e-05, "loss": 0.08376109600067139, "step": 4029 }, { "epoch": 0.5448430872188329, "grad_norm": 0.18193966150283813, "learning_rate": 1.4490188283548048e-05, "loss": 0.08131217956542969, "step": 4030 }, { "epoch": 0.5449782840146689, "grad_norm": 0.14530053734779358, "learning_rate": 1.44833478627204e-05, "loss": 0.07862281799316406, "step": 4031 }, { "epoch": 0.5451134808105048, "grad_norm": 0.0912572368979454, "learning_rate": 1.447650754946249e-05, "loss": 0.03206348419189453, "step": 4032 }, { "epoch": 0.5452486776063408, "grad_norm": 0.15424300730228424, "learning_rate": 1.4469667345198492e-05, "loss": 0.05220985412597656, "step": 4033 }, { "epoch": 0.5453838744021766, "grad_norm": 0.09253767877817154, "learning_rate": 1.446282725135258e-05, "loss": 0.08451128005981445, "step": 4034 }, { "epoch": 0.5455190711980126, "grad_norm": 0.298917680978775, "learning_rate": 1.4455987269348904e-05, "loss": 0.10218369960784912, "step": 4035 }, { "epoch": 0.5456542679938485, "grad_norm": 0.22219686210155487, "learning_rate": 1.4449147400611578e-05, "loss": 0.09082603454589844, "step": 4036 }, { "epoch": 0.5457894647896845, "grad_norm": 0.11093132197856903, "learning_rate": 1.4442307646564702e-05, "loss": 0.06393581628799438, "step": 4037 }, { "epoch": 0.5459246615855204, "grad_norm": 0.134236678481102, "learning_rate": 1.4435468008632345e-05, "loss": 0.0842595100402832, "step": 4038 }, { "epoch": 0.5460598583813564, "grad_norm": 0.07306552678346634, "learning_rate": 1.4428628488238557e-05, "loss": 0.06939458847045898, "step": 4039 }, { "epoch": 0.5461950551771922, "grad_norm": 0.14223705232143402, "learning_rate": 1.442178908680737e-05, "loss": 0.05612373352050781, "step": 4040 }, { "epoch": 0.5463302519730282, "grad_norm": 0.15557502210140228, "learning_rate": 1.4414949805762779e-05, "loss": 0.07192802429199219, "step": 4041 }, { "epoch": 0.5464654487688642, "grad_norm": 0.06720432639122009, "learning_rate": 1.4408110646528757e-05, "loss": 0.08239865303039551, "step": 4042 }, { "epoch": 0.5466006455647001, "grad_norm": 0.0795508548617363, "learning_rate": 1.440127161052925e-05, "loss": 0.07013583183288574, "step": 4043 }, { "epoch": 0.5467358423605361, "grad_norm": 0.07922715693712234, "learning_rate": 1.4394432699188188e-05, "loss": 0.053696632385253906, "step": 4044 }, { "epoch": 0.546871039156372, "grad_norm": 0.1482291966676712, "learning_rate": 1.4387593913929472e-05, "loss": 0.07376480102539062, "step": 4045 }, { "epoch": 0.547006235952208, "grad_norm": 0.12175357341766357, "learning_rate": 1.4380755256176968e-05, "loss": 0.07642602920532227, "step": 4046 }, { "epoch": 0.5471414327480438, "grad_norm": 0.08537746965885162, "learning_rate": 1.437391672735452e-05, "loss": 0.0711512565612793, "step": 4047 }, { "epoch": 0.5472766295438798, "grad_norm": 0.07718467712402344, "learning_rate": 1.4367078328885946e-05, "loss": 0.07671165466308594, "step": 4048 }, { "epoch": 0.5474118263397157, "grad_norm": 0.06549594551324844, "learning_rate": 1.4360240062195039e-05, "loss": 0.08379745483398438, "step": 4049 }, { "epoch": 0.5475470231355517, "grad_norm": 0.09232236444950104, "learning_rate": 1.435340192870557e-05, "loss": 0.05181431770324707, "step": 4050 }, { "epoch": 0.5476822199313877, "grad_norm": 0.15960656106472015, "learning_rate": 1.434656392984126e-05, "loss": 0.07012367248535156, "step": 4051 }, { "epoch": 0.5478174167272236, "grad_norm": 0.06931684911251068, "learning_rate": 1.4339726067025828e-05, "loss": 0.058058738708496094, "step": 4052 }, { "epoch": 0.5479526135230595, "grad_norm": 0.150718092918396, "learning_rate": 1.4332888341682947e-05, "loss": 0.061393022537231445, "step": 4053 }, { "epoch": 0.5480878103188954, "grad_norm": 0.08669254183769226, "learning_rate": 1.432605075523627e-05, "loss": 0.08437573909759521, "step": 4054 }, { "epoch": 0.5482230071147314, "grad_norm": 0.07061182707548141, "learning_rate": 1.4319213309109426e-05, "loss": 0.07586526870727539, "step": 4055 }, { "epoch": 0.5483582039105673, "grad_norm": 0.06959249079227448, "learning_rate": 1.4312376004725996e-05, "loss": 0.05583477020263672, "step": 4056 }, { "epoch": 0.5484934007064033, "grad_norm": 0.059173211455345154, "learning_rate": 1.430553884350955e-05, "loss": 0.050989627838134766, "step": 4057 }, { "epoch": 0.5486285975022392, "grad_norm": 0.0690905749797821, "learning_rate": 1.429870182688362e-05, "loss": 0.03889751434326172, "step": 4058 }, { "epoch": 0.5487637942980751, "grad_norm": 0.11699472367763519, "learning_rate": 1.4291864956271713e-05, "loss": 0.07867813110351562, "step": 4059 }, { "epoch": 0.548898991093911, "grad_norm": 0.05598118528723717, "learning_rate": 1.4285028233097293e-05, "loss": 0.0629434585571289, "step": 4060 }, { "epoch": 0.549034187889747, "grad_norm": 0.05027598887681961, "learning_rate": 1.4278191658783809e-05, "loss": 0.04721999168395996, "step": 4061 }, { "epoch": 0.549169384685583, "grad_norm": 0.0698387622833252, "learning_rate": 1.427135523475467e-05, "loss": 0.06154298782348633, "step": 4062 }, { "epoch": 0.5493045814814189, "grad_norm": 0.18718264997005463, "learning_rate": 1.4264518962433258e-05, "loss": 0.07340455055236816, "step": 4063 }, { "epoch": 0.5494397782772549, "grad_norm": 0.10411976277828217, "learning_rate": 1.4257682843242925e-05, "loss": 0.0651845932006836, "step": 4064 }, { "epoch": 0.5495749750730907, "grad_norm": 0.06220874935388565, "learning_rate": 1.4250846878606974e-05, "loss": 0.0698246955871582, "step": 4065 }, { "epoch": 0.5497101718689267, "grad_norm": 0.0830516442656517, "learning_rate": 1.4244011069948702e-05, "loss": 0.06204843521118164, "step": 4066 }, { "epoch": 0.5498453686647626, "grad_norm": 0.03967805579304695, "learning_rate": 1.4237175418691357e-05, "loss": 0.04635214805603027, "step": 4067 }, { "epoch": 0.5499805654605986, "grad_norm": 0.060160860419273376, "learning_rate": 1.4230339926258153e-05, "loss": 0.0524139404296875, "step": 4068 }, { "epoch": 0.5501157622564345, "grad_norm": 0.12720409035682678, "learning_rate": 1.422350459407229e-05, "loss": 0.05837535858154297, "step": 4069 }, { "epoch": 0.5502509590522705, "grad_norm": 0.11871021240949631, "learning_rate": 1.4216669423556903e-05, "loss": 0.07346153259277344, "step": 4070 }, { "epoch": 0.5503861558481064, "grad_norm": 0.05642274394631386, "learning_rate": 1.420983441613512e-05, "loss": 0.054018616676330566, "step": 4071 }, { "epoch": 0.5505213526439423, "grad_norm": 0.09161009639501572, "learning_rate": 1.420299957323003e-05, "loss": 0.0765218734741211, "step": 4072 }, { "epoch": 0.5506565494397783, "grad_norm": 0.08621351420879364, "learning_rate": 1.4196164896264679e-05, "loss": 0.057952165603637695, "step": 4073 }, { "epoch": 0.5507917462356142, "grad_norm": 0.11336734890937805, "learning_rate": 1.418933038666208e-05, "loss": 0.04543304443359375, "step": 4074 }, { "epoch": 0.5509269430314502, "grad_norm": 0.09643787145614624, "learning_rate": 1.4182496045845217e-05, "loss": 0.07432794570922852, "step": 4075 }, { "epoch": 0.5510621398272861, "grad_norm": 0.06991898268461227, "learning_rate": 1.4175661875237036e-05, "loss": 0.05778098106384277, "step": 4076 }, { "epoch": 0.551197336623122, "grad_norm": 0.05274023860692978, "learning_rate": 1.416882787626045e-05, "loss": 0.030061841011047363, "step": 4077 }, { "epoch": 0.5513325334189579, "grad_norm": 0.10722766071557999, "learning_rate": 1.4161994050338334e-05, "loss": 0.0763922929763794, "step": 4078 }, { "epoch": 0.5514677302147939, "grad_norm": 0.07351508736610413, "learning_rate": 1.4155160398893528e-05, "loss": 0.07125234603881836, "step": 4079 }, { "epoch": 0.5516029270106299, "grad_norm": 0.12987740337848663, "learning_rate": 1.4148326923348824e-05, "loss": 0.0997319221496582, "step": 4080 }, { "epoch": 0.5517381238064658, "grad_norm": 0.07257214933633804, "learning_rate": 1.4141493625127e-05, "loss": 0.058835387229919434, "step": 4081 }, { "epoch": 0.5518733206023018, "grad_norm": 0.10118108987808228, "learning_rate": 1.4134660505650786e-05, "loss": 0.03758549690246582, "step": 4082 }, { "epoch": 0.5520085173981376, "grad_norm": 0.06019473075866699, "learning_rate": 1.4127827566342864e-05, "loss": 0.04579758644104004, "step": 4083 }, { "epoch": 0.5521437141939736, "grad_norm": 0.1526159793138504, "learning_rate": 1.4120994808625896e-05, "loss": 0.08717477321624756, "step": 4084 }, { "epoch": 0.5522789109898095, "grad_norm": 0.09764004498720169, "learning_rate": 1.4114162233922494e-05, "loss": 0.0828394889831543, "step": 4085 }, { "epoch": 0.5524141077856455, "grad_norm": 0.11772321164608002, "learning_rate": 1.4107329843655238e-05, "loss": 0.08561849594116211, "step": 4086 }, { "epoch": 0.5525493045814814, "grad_norm": 0.17571792006492615, "learning_rate": 1.4100497639246675e-05, "loss": 0.07077193260192871, "step": 4087 }, { "epoch": 0.5526845013773174, "grad_norm": 0.05513649433851242, "learning_rate": 1.4093665622119294e-05, "loss": 0.04854846000671387, "step": 4088 }, { "epoch": 0.5528196981731534, "grad_norm": 0.12004321068525314, "learning_rate": 1.4086833793695566e-05, "loss": 0.06998515129089355, "step": 4089 }, { "epoch": 0.5529548949689892, "grad_norm": 0.09957384318113327, "learning_rate": 1.408000215539791e-05, "loss": 0.07066822052001953, "step": 4090 }, { "epoch": 0.5530900917648252, "grad_norm": 0.08457494527101517, "learning_rate": 1.4073170708648711e-05, "loss": 0.06726408004760742, "step": 4091 }, { "epoch": 0.5532252885606611, "grad_norm": 0.05638938397169113, "learning_rate": 1.406633945487032e-05, "loss": 0.05702662467956543, "step": 4092 }, { "epoch": 0.5533604853564971, "grad_norm": 0.0509008951485157, "learning_rate": 1.4059508395485026e-05, "loss": 0.05689096450805664, "step": 4093 }, { "epoch": 0.553495682152333, "grad_norm": 0.06192107871174812, "learning_rate": 1.4052677531915102e-05, "loss": 0.059439241886138916, "step": 4094 }, { "epoch": 0.553630878948169, "grad_norm": 0.08555155992507935, "learning_rate": 1.4045846865582765e-05, "loss": 0.03972148895263672, "step": 4095 }, { "epoch": 0.5537660757440048, "grad_norm": 0.12039042264223099, "learning_rate": 1.4039016397910206e-05, "loss": 0.059500694274902344, "step": 4096 }, { "epoch": 0.5539012725398408, "grad_norm": 0.19533653557300568, "learning_rate": 1.403218613031955e-05, "loss": 0.07166004180908203, "step": 4097 }, { "epoch": 0.5540364693356767, "grad_norm": 0.0606880709528923, "learning_rate": 1.4025356064232903e-05, "loss": 0.05276799201965332, "step": 4098 }, { "epoch": 0.5541716661315127, "grad_norm": 0.05337110534310341, "learning_rate": 1.4018526201072324e-05, "loss": 0.049727439880371094, "step": 4099 }, { "epoch": 0.5543068629273487, "grad_norm": 0.08678770810365677, "learning_rate": 1.4011696542259821e-05, "loss": 0.08035659790039062, "step": 4100 }, { "epoch": 0.5544420597231846, "grad_norm": 0.04927872493863106, "learning_rate": 1.4004867089217376e-05, "loss": 0.04891204833984375, "step": 4101 }, { "epoch": 0.5545772565190205, "grad_norm": 0.0675642341375351, "learning_rate": 1.39980378433669e-05, "loss": 0.07493162155151367, "step": 4102 }, { "epoch": 0.5547124533148564, "grad_norm": 0.04178496077656746, "learning_rate": 1.399120880613029e-05, "loss": 0.05071115493774414, "step": 4103 }, { "epoch": 0.5548476501106924, "grad_norm": 0.09921976178884506, "learning_rate": 1.3984379978929388e-05, "loss": 0.09384346008300781, "step": 4104 }, { "epoch": 0.5549828469065283, "grad_norm": 0.1152753159403801, "learning_rate": 1.3977551363185995e-05, "loss": 0.059639692306518555, "step": 4105 }, { "epoch": 0.5551180437023643, "grad_norm": 0.18421974778175354, "learning_rate": 1.3970722960321854e-05, "loss": 0.09104394912719727, "step": 4106 }, { "epoch": 0.5552532404982002, "grad_norm": 0.1050596535205841, "learning_rate": 1.3963894771758682e-05, "loss": 0.06557977199554443, "step": 4107 }, { "epoch": 0.5553884372940361, "grad_norm": 0.049730006605386734, "learning_rate": 1.3957066798918143e-05, "loss": 0.0556025505065918, "step": 4108 }, { "epoch": 0.555523634089872, "grad_norm": 0.058101147413253784, "learning_rate": 1.3950239043221861e-05, "loss": 0.06447029113769531, "step": 4109 }, { "epoch": 0.555658830885708, "grad_norm": 0.15102678537368774, "learning_rate": 1.3943411506091408e-05, "loss": 0.09522128105163574, "step": 4110 }, { "epoch": 0.555794027681544, "grad_norm": 0.08406145125627518, "learning_rate": 1.3936584188948313e-05, "loss": 0.08793139457702637, "step": 4111 }, { "epoch": 0.5559292244773799, "grad_norm": 0.15618664026260376, "learning_rate": 1.3929757093214059e-05, "loss": 0.07349300384521484, "step": 4112 }, { "epoch": 0.5560644212732159, "grad_norm": 0.09589562565088272, "learning_rate": 1.3922930220310085e-05, "loss": 0.07369613647460938, "step": 4113 }, { "epoch": 0.5561996180690517, "grad_norm": 0.11526025086641312, "learning_rate": 1.3916103571657786e-05, "loss": 0.06490325927734375, "step": 4114 }, { "epoch": 0.5563348148648877, "grad_norm": 0.06394743174314499, "learning_rate": 1.3909277148678504e-05, "loss": 0.05130791664123535, "step": 4115 }, { "epoch": 0.5564700116607236, "grad_norm": 0.08251817524433136, "learning_rate": 1.3902450952793536e-05, "loss": 0.04411637783050537, "step": 4116 }, { "epoch": 0.5566052084565596, "grad_norm": 0.09122715145349503, "learning_rate": 1.389562498542413e-05, "loss": 0.06601238250732422, "step": 4117 }, { "epoch": 0.5567404052523955, "grad_norm": 0.07016460597515106, "learning_rate": 1.388879924799149e-05, "loss": 0.04880332946777344, "step": 4118 }, { "epoch": 0.5568756020482315, "grad_norm": 0.0685984194278717, "learning_rate": 1.388197374191678e-05, "loss": 0.06895732879638672, "step": 4119 }, { "epoch": 0.5570107988440673, "grad_norm": 0.060960136353969574, "learning_rate": 1.387514846862109e-05, "loss": 0.07156848907470703, "step": 4120 }, { "epoch": 0.5571459956399033, "grad_norm": 0.0268340315669775, "learning_rate": 1.3868323429525492e-05, "loss": 0.03031754493713379, "step": 4121 }, { "epoch": 0.5572811924357393, "grad_norm": 0.06605499237775803, "learning_rate": 1.3861498626050986e-05, "loss": 0.08360886573791504, "step": 4122 }, { "epoch": 0.5574163892315752, "grad_norm": 0.0721614882349968, "learning_rate": 1.385467405961854e-05, "loss": 0.06602191925048828, "step": 4123 }, { "epoch": 0.5575515860274112, "grad_norm": 0.13270018994808197, "learning_rate": 1.3847849731649066e-05, "loss": 0.08553099632263184, "step": 4124 }, { "epoch": 0.5576867828232471, "grad_norm": 0.10970934480428696, "learning_rate": 1.3841025643563418e-05, "loss": 0.07076334953308105, "step": 4125 }, { "epoch": 0.557821979619083, "grad_norm": 0.09613852947950363, "learning_rate": 1.3834201796782413e-05, "loss": 0.07680797576904297, "step": 4126 }, { "epoch": 0.5579571764149189, "grad_norm": 0.05349888280034065, "learning_rate": 1.3827378192726808e-05, "loss": 0.056195735931396484, "step": 4127 }, { "epoch": 0.5580923732107549, "grad_norm": 0.06799422204494476, "learning_rate": 1.3820554832817324e-05, "loss": 0.05526018142700195, "step": 4128 }, { "epoch": 0.5582275700065908, "grad_norm": 0.07790925353765488, "learning_rate": 1.3813731718474606e-05, "loss": 0.05934453010559082, "step": 4129 }, { "epoch": 0.5583627668024268, "grad_norm": 0.09974254667758942, "learning_rate": 1.380690885111927e-05, "loss": 0.059127092361450195, "step": 4130 }, { "epoch": 0.5584979635982628, "grad_norm": 0.1049327552318573, "learning_rate": 1.3800086232171877e-05, "loss": 0.05780649185180664, "step": 4131 }, { "epoch": 0.5586331603940986, "grad_norm": 0.2112324982881546, "learning_rate": 1.3793263863052926e-05, "loss": 0.10281717777252197, "step": 4132 }, { "epoch": 0.5587683571899346, "grad_norm": 0.1996569037437439, "learning_rate": 1.3786441745182881e-05, "loss": 0.08563566207885742, "step": 4133 }, { "epoch": 0.5589035539857705, "grad_norm": 0.07548871636390686, "learning_rate": 1.3779619879982127e-05, "loss": 0.054668426513671875, "step": 4134 }, { "epoch": 0.5590387507816065, "grad_norm": 0.09903180599212646, "learning_rate": 1.3772798268871025e-05, "loss": 0.05505460500717163, "step": 4135 }, { "epoch": 0.5591739475774424, "grad_norm": 0.06804535537958145, "learning_rate": 1.376597691326987e-05, "loss": 0.06219196319580078, "step": 4136 }, { "epoch": 0.5593091443732784, "grad_norm": 0.085710808634758, "learning_rate": 1.3759155814598898e-05, "loss": 0.052351951599121094, "step": 4137 }, { "epoch": 0.5594443411691143, "grad_norm": 0.0954032838344574, "learning_rate": 1.3752334974278308e-05, "loss": 0.056681811809539795, "step": 4138 }, { "epoch": 0.5595795379649502, "grad_norm": 0.09635797888040543, "learning_rate": 1.3745514393728225e-05, "loss": 0.081623375415802, "step": 4139 }, { "epoch": 0.5597147347607861, "grad_norm": 0.12712836265563965, "learning_rate": 1.3738694074368735e-05, "loss": 0.05469036102294922, "step": 4140 }, { "epoch": 0.5598499315566221, "grad_norm": 0.045935600996017456, "learning_rate": 1.3731874017619868e-05, "loss": 0.05090534687042236, "step": 4141 }, { "epoch": 0.5599851283524581, "grad_norm": 0.08144697546958923, "learning_rate": 1.3725054224901597e-05, "loss": 0.06747055053710938, "step": 4142 }, { "epoch": 0.560120325148294, "grad_norm": 0.11648909002542496, "learning_rate": 1.3718234697633826e-05, "loss": 0.05892801284790039, "step": 4143 }, { "epoch": 0.56025552194413, "grad_norm": 0.03973710536956787, "learning_rate": 1.3711415437236427e-05, "loss": 0.04181087017059326, "step": 4144 }, { "epoch": 0.5603907187399658, "grad_norm": 0.09953191876411438, "learning_rate": 1.3704596445129207e-05, "loss": 0.09266901016235352, "step": 4145 }, { "epoch": 0.5605259155358018, "grad_norm": 0.06498900800943375, "learning_rate": 1.369777772273192e-05, "loss": 0.06208086013793945, "step": 4146 }, { "epoch": 0.5606611123316377, "grad_norm": 0.162736177444458, "learning_rate": 1.369095927146425e-05, "loss": 0.07050085067749023, "step": 4147 }, { "epoch": 0.5607963091274737, "grad_norm": 0.11763712763786316, "learning_rate": 1.3684141092745846e-05, "loss": 0.09427165985107422, "step": 4148 }, { "epoch": 0.5609315059233096, "grad_norm": 0.0428580567240715, "learning_rate": 1.3677323187996276e-05, "loss": 0.03928828239440918, "step": 4149 }, { "epoch": 0.5610667027191456, "grad_norm": 0.08018385618925095, "learning_rate": 1.3670505558635074e-05, "loss": 0.061930060386657715, "step": 4150 }, { "epoch": 0.5612018995149815, "grad_norm": 0.11412448436021805, "learning_rate": 1.366368820608171e-05, "loss": 0.07166659832000732, "step": 4151 }, { "epoch": 0.5613370963108174, "grad_norm": 0.04032596945762634, "learning_rate": 1.365687113175558e-05, "loss": 0.044728755950927734, "step": 4152 }, { "epoch": 0.5614722931066534, "grad_norm": 0.06026893109083176, "learning_rate": 1.3650054337076049e-05, "loss": 0.07131719589233398, "step": 4153 }, { "epoch": 0.5616074899024893, "grad_norm": 0.05437815561890602, "learning_rate": 1.3643237823462398e-05, "loss": 0.05131793022155762, "step": 4154 }, { "epoch": 0.5617426866983253, "grad_norm": 0.08465273678302765, "learning_rate": 1.363642159233387e-05, "loss": 0.07694077491760254, "step": 4155 }, { "epoch": 0.5618778834941612, "grad_norm": 0.09877727180719376, "learning_rate": 1.3629605645109642e-05, "loss": 0.05626237392425537, "step": 4156 }, { "epoch": 0.5620130802899971, "grad_norm": 0.10937295854091644, "learning_rate": 1.362278998320882e-05, "loss": 0.08494925498962402, "step": 4157 }, { "epoch": 0.562148277085833, "grad_norm": 0.16738057136535645, "learning_rate": 1.3615974608050472e-05, "loss": 0.08388376235961914, "step": 4158 }, { "epoch": 0.562283473881669, "grad_norm": 0.07141170650720596, "learning_rate": 1.3609159521053588e-05, "loss": 0.06803560256958008, "step": 4159 }, { "epoch": 0.562418670677505, "grad_norm": 0.06486458331346512, "learning_rate": 1.3602344723637107e-05, "loss": 0.040422677993774414, "step": 4160 }, { "epoch": 0.5625538674733409, "grad_norm": 0.09215779602527618, "learning_rate": 1.3595530217219916e-05, "loss": 0.06354379653930664, "step": 4161 }, { "epoch": 0.5626890642691769, "grad_norm": 0.12979668378829956, "learning_rate": 1.3588716003220815e-05, "loss": 0.08090019226074219, "step": 4162 }, { "epoch": 0.5628242610650127, "grad_norm": 0.06881259381771088, "learning_rate": 1.3581902083058574e-05, "loss": 0.07419204711914062, "step": 4163 }, { "epoch": 0.5629594578608487, "grad_norm": 0.08449291437864304, "learning_rate": 1.3575088458151877e-05, "loss": 0.0575709342956543, "step": 4164 }, { "epoch": 0.5630946546566846, "grad_norm": 0.16275553405284882, "learning_rate": 1.3568275129919367e-05, "loss": 0.06701159477233887, "step": 4165 }, { "epoch": 0.5632298514525206, "grad_norm": 0.07035147398710251, "learning_rate": 1.3561462099779604e-05, "loss": 0.06704211235046387, "step": 4166 }, { "epoch": 0.5633650482483565, "grad_norm": 0.11745652556419373, "learning_rate": 1.3554649369151104e-05, "loss": 0.06928348541259766, "step": 4167 }, { "epoch": 0.5635002450441925, "grad_norm": 0.16336382925510406, "learning_rate": 1.3547836939452315e-05, "loss": 0.05987215042114258, "step": 4168 }, { "epoch": 0.5636354418400283, "grad_norm": 0.043371912091970444, "learning_rate": 1.3541024812101615e-05, "loss": 0.04890775680541992, "step": 4169 }, { "epoch": 0.5637706386358643, "grad_norm": 0.04410286992788315, "learning_rate": 1.3534212988517339e-05, "loss": 0.04256725311279297, "step": 4170 }, { "epoch": 0.5639058354317003, "grad_norm": 0.11862429976463318, "learning_rate": 1.3527401470117726e-05, "loss": 0.07047128677368164, "step": 4171 }, { "epoch": 0.5640410322275362, "grad_norm": 0.15285752713680267, "learning_rate": 1.3520590258320981e-05, "loss": 0.06650954484939575, "step": 4172 }, { "epoch": 0.5641762290233722, "grad_norm": 0.05751129612326622, "learning_rate": 1.3513779354545235e-05, "loss": 0.036438584327697754, "step": 4173 }, { "epoch": 0.5643114258192081, "grad_norm": 0.0486147366464138, "learning_rate": 1.3506968760208557e-05, "loss": 0.04313361644744873, "step": 4174 }, { "epoch": 0.564446622615044, "grad_norm": 0.10829327255487442, "learning_rate": 1.3500158476728938e-05, "loss": 0.05553328990936279, "step": 4175 }, { "epoch": 0.5645818194108799, "grad_norm": 0.06074419245123863, "learning_rate": 1.3493348505524325e-05, "loss": 0.05710744857788086, "step": 4176 }, { "epoch": 0.5647170162067159, "grad_norm": 0.13050898909568787, "learning_rate": 1.3486538848012586e-05, "loss": 0.06959700584411621, "step": 4177 }, { "epoch": 0.5648522130025518, "grad_norm": 0.06630608439445496, "learning_rate": 1.3479729505611532e-05, "loss": 0.06532549858093262, "step": 4178 }, { "epoch": 0.5649874097983878, "grad_norm": 0.1411292403936386, "learning_rate": 1.3472920479738906e-05, "loss": 0.10419750213623047, "step": 4179 }, { "epoch": 0.5651226065942238, "grad_norm": 0.0672093853354454, "learning_rate": 1.346611177181237e-05, "loss": 0.06861841678619385, "step": 4180 }, { "epoch": 0.5652578033900597, "grad_norm": 0.10425559431314468, "learning_rate": 1.3459303383249547e-05, "loss": 0.09084463119506836, "step": 4181 }, { "epoch": 0.5653930001858956, "grad_norm": 0.04203963279724121, "learning_rate": 1.3452495315467975e-05, "loss": 0.04257559776306152, "step": 4182 }, { "epoch": 0.5655281969817315, "grad_norm": 0.11237596720457077, "learning_rate": 1.3445687569885132e-05, "loss": 0.05130720138549805, "step": 4183 }, { "epoch": 0.5656633937775675, "grad_norm": 0.11588342487812042, "learning_rate": 1.3438880147918429e-05, "loss": 0.06269454956054688, "step": 4184 }, { "epoch": 0.5657985905734034, "grad_norm": 0.05830025300383568, "learning_rate": 1.3432073050985201e-05, "loss": 0.055112600326538086, "step": 4185 }, { "epoch": 0.5659337873692394, "grad_norm": 0.09931790828704834, "learning_rate": 1.3425266280502721e-05, "loss": 0.05435037612915039, "step": 4186 }, { "epoch": 0.5660689841650753, "grad_norm": 0.12272310256958008, "learning_rate": 1.3418459837888202e-05, "loss": 0.05464887619018555, "step": 4187 }, { "epoch": 0.5662041809609112, "grad_norm": 0.07456616312265396, "learning_rate": 1.3411653724558784e-05, "loss": 0.06966352462768555, "step": 4188 }, { "epoch": 0.5663393777567471, "grad_norm": 0.0517207533121109, "learning_rate": 1.3404847941931523e-05, "loss": 0.05562591552734375, "step": 4189 }, { "epoch": 0.5664745745525831, "grad_norm": 0.050687264651060104, "learning_rate": 1.339804249142343e-05, "loss": 0.040265798568725586, "step": 4190 }, { "epoch": 0.5666097713484191, "grad_norm": 0.058872658759355545, "learning_rate": 1.3391237374451429e-05, "loss": 0.05400204658508301, "step": 4191 }, { "epoch": 0.566744968144255, "grad_norm": 0.08813443034887314, "learning_rate": 1.3384432592432388e-05, "loss": 0.07317638397216797, "step": 4192 }, { "epoch": 0.566880164940091, "grad_norm": 0.11647678166627884, "learning_rate": 1.3377628146783102e-05, "loss": 0.06634187698364258, "step": 4193 }, { "epoch": 0.5670153617359268, "grad_norm": 0.10847894847393036, "learning_rate": 1.3370824038920281e-05, "loss": 0.062330007553100586, "step": 4194 }, { "epoch": 0.5671505585317628, "grad_norm": 0.05624714866280556, "learning_rate": 1.3364020270260586e-05, "loss": 0.054793477058410645, "step": 4195 }, { "epoch": 0.5672857553275987, "grad_norm": 0.11303391307592392, "learning_rate": 1.335721684222059e-05, "loss": 0.07008886337280273, "step": 4196 }, { "epoch": 0.5674209521234347, "grad_norm": 0.042140375822782516, "learning_rate": 1.3350413756216816e-05, "loss": 0.053290605545043945, "step": 4197 }, { "epoch": 0.5675561489192706, "grad_norm": 0.05239458754658699, "learning_rate": 1.334361101366569e-05, "loss": 0.041712045669555664, "step": 4198 }, { "epoch": 0.5676913457151066, "grad_norm": 0.16714490950107574, "learning_rate": 1.3336808615983582e-05, "loss": 0.08261919021606445, "step": 4199 }, { "epoch": 0.5678265425109424, "grad_norm": 0.08035875111818314, "learning_rate": 1.3330006564586791e-05, "loss": 0.07848501205444336, "step": 4200 }, { "epoch": 0.5679617393067784, "grad_norm": 0.09957578033208847, "learning_rate": 1.3323204860891539e-05, "loss": 0.06833028793334961, "step": 4201 }, { "epoch": 0.5680969361026144, "grad_norm": 0.08059883117675781, "learning_rate": 1.3316403506313981e-05, "loss": 0.051741600036621094, "step": 4202 }, { "epoch": 0.5682321328984503, "grad_norm": 0.05721670016646385, "learning_rate": 1.3309602502270184e-05, "loss": 0.04900932312011719, "step": 4203 }, { "epoch": 0.5683673296942863, "grad_norm": 0.06999655067920685, "learning_rate": 1.3302801850176161e-05, "loss": 0.056043148040771484, "step": 4204 }, { "epoch": 0.5685025264901222, "grad_norm": 0.063369520008564, "learning_rate": 1.3296001551447848e-05, "loss": 0.07485222816467285, "step": 4205 }, { "epoch": 0.5686377232859581, "grad_norm": 0.06795758008956909, "learning_rate": 1.32892016075011e-05, "loss": 0.07515382766723633, "step": 4206 }, { "epoch": 0.568772920081794, "grad_norm": 0.22410255670547485, "learning_rate": 1.3282402019751694e-05, "loss": 0.07778263092041016, "step": 4207 }, { "epoch": 0.56890811687763, "grad_norm": 0.07383868098258972, "learning_rate": 1.327560278961535e-05, "loss": 0.05089735984802246, "step": 4208 }, { "epoch": 0.569043313673466, "grad_norm": 0.09812065213918686, "learning_rate": 1.3268803918507699e-05, "loss": 0.07952666282653809, "step": 4209 }, { "epoch": 0.5691785104693019, "grad_norm": 0.08895250409841537, "learning_rate": 1.3262005407844306e-05, "loss": 0.06500053405761719, "step": 4210 }, { "epoch": 0.5693137072651379, "grad_norm": 0.11505065858364105, "learning_rate": 1.325520725904066e-05, "loss": 0.0758066177368164, "step": 4211 }, { "epoch": 0.5694489040609737, "grad_norm": 0.08348774164915085, "learning_rate": 1.3248409473512158e-05, "loss": 0.05151009559631348, "step": 4212 }, { "epoch": 0.5695841008568097, "grad_norm": 0.07074927538633347, "learning_rate": 1.3241612052674146e-05, "loss": 0.05847978591918945, "step": 4213 }, { "epoch": 0.5697192976526456, "grad_norm": 0.08385651558637619, "learning_rate": 1.3234814997941883e-05, "loss": 0.05460691452026367, "step": 4214 }, { "epoch": 0.5698544944484816, "grad_norm": 0.11835520714521408, "learning_rate": 1.322801831073055e-05, "loss": 0.07225751876831055, "step": 4215 }, { "epoch": 0.5699896912443175, "grad_norm": 0.0451829731464386, "learning_rate": 1.322122199245526e-05, "loss": 0.04921305179595947, "step": 4216 }, { "epoch": 0.5701248880401535, "grad_norm": 0.09435160458087921, "learning_rate": 1.321442604453103e-05, "loss": 0.0588705837726593, "step": 4217 }, { "epoch": 0.5702600848359893, "grad_norm": 0.11451330780982971, "learning_rate": 1.320763046837282e-05, "loss": 0.0531536340713501, "step": 4218 }, { "epoch": 0.5703952816318253, "grad_norm": 0.1146269217133522, "learning_rate": 1.3200835265395504e-05, "loss": 0.08288764953613281, "step": 4219 }, { "epoch": 0.5705304784276612, "grad_norm": 0.045473698526620865, "learning_rate": 1.3194040437013885e-05, "loss": 0.04354667663574219, "step": 4220 }, { "epoch": 0.5706656752234972, "grad_norm": 0.10906580090522766, "learning_rate": 1.3187245984642673e-05, "loss": 0.07678627967834473, "step": 4221 }, { "epoch": 0.5708008720193332, "grad_norm": 0.04136877506971359, "learning_rate": 1.3180451909696517e-05, "loss": 0.045319318771362305, "step": 4222 }, { "epoch": 0.5709360688151691, "grad_norm": 0.1619584858417511, "learning_rate": 1.3173658213589972e-05, "loss": 0.10235190391540527, "step": 4223 }, { "epoch": 0.5710712656110051, "grad_norm": 0.04617733135819435, "learning_rate": 1.3166864897737526e-05, "loss": 0.043419480323791504, "step": 4224 }, { "epoch": 0.5712064624068409, "grad_norm": 0.056700531393289566, "learning_rate": 1.3160071963553593e-05, "loss": 0.04490089416503906, "step": 4225 }, { "epoch": 0.5713416592026769, "grad_norm": 0.07429137825965881, "learning_rate": 1.315327941245248e-05, "loss": 0.08240365982055664, "step": 4226 }, { "epoch": 0.5714768559985128, "grad_norm": 0.09860310703516006, "learning_rate": 1.3146487245848445e-05, "loss": 0.05005502700805664, "step": 4227 }, { "epoch": 0.5716120527943488, "grad_norm": 0.06659059226512909, "learning_rate": 1.3139695465155645e-05, "loss": 0.04583883285522461, "step": 4228 }, { "epoch": 0.5717472495901847, "grad_norm": 0.09478210657835007, "learning_rate": 1.3132904071788177e-05, "loss": 0.0897223949432373, "step": 4229 }, { "epoch": 0.5718824463860207, "grad_norm": 0.07268854975700378, "learning_rate": 1.3126113067160031e-05, "loss": 0.052277326583862305, "step": 4230 }, { "epoch": 0.5720176431818565, "grad_norm": 0.09079897403717041, "learning_rate": 1.3119322452685139e-05, "loss": 0.051481008529663086, "step": 4231 }, { "epoch": 0.5721528399776925, "grad_norm": 0.08313412219285965, "learning_rate": 1.3112532229777344e-05, "loss": 0.06950795650482178, "step": 4232 }, { "epoch": 0.5722880367735285, "grad_norm": 0.12661270797252655, "learning_rate": 1.3105742399850399e-05, "loss": 0.05651652812957764, "step": 4233 }, { "epoch": 0.5724232335693644, "grad_norm": 0.1941634565591812, "learning_rate": 1.3098952964317996e-05, "loss": 0.07221412658691406, "step": 4234 }, { "epoch": 0.5725584303652004, "grad_norm": 0.05457120016217232, "learning_rate": 1.3092163924593717e-05, "loss": 0.07674026489257812, "step": 4235 }, { "epoch": 0.5726936271610363, "grad_norm": 0.1400396078824997, "learning_rate": 1.308537528209108e-05, "loss": 0.0699148178100586, "step": 4236 }, { "epoch": 0.5728288239568722, "grad_norm": 0.07919779419898987, "learning_rate": 1.3078587038223525e-05, "loss": 0.07209181785583496, "step": 4237 }, { "epoch": 0.5729640207527081, "grad_norm": 0.07076119631528854, "learning_rate": 1.3071799194404392e-05, "loss": 0.04361701011657715, "step": 4238 }, { "epoch": 0.5730992175485441, "grad_norm": 0.05911535769701004, "learning_rate": 1.3065011752046955e-05, "loss": 0.0661616325378418, "step": 4239 }, { "epoch": 0.57323441434438, "grad_norm": 0.11360391229391098, "learning_rate": 1.3058224712564382e-05, "loss": 0.10242629051208496, "step": 4240 }, { "epoch": 0.573369611140216, "grad_norm": 0.19428549706935883, "learning_rate": 1.305143807736978e-05, "loss": 0.08803272247314453, "step": 4241 }, { "epoch": 0.573504807936052, "grad_norm": 0.07120192050933838, "learning_rate": 1.3044651847876163e-05, "loss": 0.051282644271850586, "step": 4242 }, { "epoch": 0.5736400047318878, "grad_norm": 0.06361155956983566, "learning_rate": 1.3037866025496466e-05, "loss": 0.058825016021728516, "step": 4243 }, { "epoch": 0.5737752015277238, "grad_norm": 0.04544292762875557, "learning_rate": 1.3031080611643514e-05, "loss": 0.061600685119628906, "step": 4244 }, { "epoch": 0.5739103983235597, "grad_norm": 0.08656535297632217, "learning_rate": 1.3024295607730083e-05, "loss": 0.08835315704345703, "step": 4245 }, { "epoch": 0.5740455951193957, "grad_norm": 0.13732722401618958, "learning_rate": 1.301751101516884e-05, "loss": 0.06532704830169678, "step": 4246 }, { "epoch": 0.5741807919152316, "grad_norm": 0.08534975349903107, "learning_rate": 1.3010726835372377e-05, "loss": 0.06534075736999512, "step": 4247 }, { "epoch": 0.5743159887110676, "grad_norm": 0.06839689612388611, "learning_rate": 1.30039430697532e-05, "loss": 0.06026506423950195, "step": 4248 }, { "epoch": 0.5744511855069034, "grad_norm": 0.07596047967672348, "learning_rate": 1.2997159719723713e-05, "loss": 0.04868745803833008, "step": 4249 }, { "epoch": 0.5745863823027394, "grad_norm": 0.142812579870224, "learning_rate": 1.2990376786696254e-05, "loss": 0.06736147403717041, "step": 4250 }, { "epoch": 0.5747215790985754, "grad_norm": 0.11198151856660843, "learning_rate": 1.2983594272083063e-05, "loss": 0.06825399398803711, "step": 4251 }, { "epoch": 0.5748567758944113, "grad_norm": 0.06062844395637512, "learning_rate": 1.2976812177296307e-05, "loss": 0.0415073037147522, "step": 4252 }, { "epoch": 0.5749919726902473, "grad_norm": 0.2237558513879776, "learning_rate": 1.2970030503748039e-05, "loss": 0.08611869812011719, "step": 4253 }, { "epoch": 0.5751271694860832, "grad_norm": 0.0790284126996994, "learning_rate": 1.2963249252850242e-05, "loss": 0.053389549255371094, "step": 4254 }, { "epoch": 0.5752623662819191, "grad_norm": 0.08783169835805893, "learning_rate": 1.295646842601481e-05, "loss": 0.047675132751464844, "step": 4255 }, { "epoch": 0.575397563077755, "grad_norm": 0.11812038719654083, "learning_rate": 1.294968802465355e-05, "loss": 0.04704391956329346, "step": 4256 }, { "epoch": 0.575532759873591, "grad_norm": 0.06148853898048401, "learning_rate": 1.2942908050178187e-05, "loss": 0.05360066890716553, "step": 4257 }, { "epoch": 0.5756679566694269, "grad_norm": 0.11899429559707642, "learning_rate": 1.293612850400033e-05, "loss": 0.0918264389038086, "step": 4258 }, { "epoch": 0.5758031534652629, "grad_norm": 0.09223272651433945, "learning_rate": 1.2929349387531525e-05, "loss": 0.09097796678543091, "step": 4259 }, { "epoch": 0.5759383502610989, "grad_norm": 0.17729069292545319, "learning_rate": 1.2922570702183217e-05, "loss": 0.0759168490767479, "step": 4260 }, { "epoch": 0.5760735470569347, "grad_norm": 0.06010180711746216, "learning_rate": 1.2915792449366768e-05, "loss": 0.05062532424926758, "step": 4261 }, { "epoch": 0.5762087438527707, "grad_norm": 0.12164776772260666, "learning_rate": 1.2909014630493451e-05, "loss": 0.08553028106689453, "step": 4262 }, { "epoch": 0.5763439406486066, "grad_norm": 0.07825644314289093, "learning_rate": 1.2902237246974432e-05, "loss": 0.06135845184326172, "step": 4263 }, { "epoch": 0.5764791374444426, "grad_norm": 0.04814029857516289, "learning_rate": 1.289546030022081e-05, "loss": 0.03315305709838867, "step": 4264 }, { "epoch": 0.5766143342402785, "grad_norm": 0.0693979263305664, "learning_rate": 1.2888683791643572e-05, "loss": 0.04173469543457031, "step": 4265 }, { "epoch": 0.5767495310361145, "grad_norm": 0.10865487158298492, "learning_rate": 1.2881907722653633e-05, "loss": 0.06183052062988281, "step": 4266 }, { "epoch": 0.5768847278319503, "grad_norm": 0.09468737989664078, "learning_rate": 1.2875132094661796e-05, "loss": 0.04216897487640381, "step": 4267 }, { "epoch": 0.5770199246277863, "grad_norm": 0.10020890086889267, "learning_rate": 1.2868356909078787e-05, "loss": 0.07439160346984863, "step": 4268 }, { "epoch": 0.5771551214236222, "grad_norm": 0.07731208205223083, "learning_rate": 1.286158216731524e-05, "loss": 0.04565715789794922, "step": 4269 }, { "epoch": 0.5772903182194582, "grad_norm": 0.07695096731185913, "learning_rate": 1.2854807870781686e-05, "loss": 0.058754920959472656, "step": 4270 }, { "epoch": 0.5774255150152942, "grad_norm": 0.0921509712934494, "learning_rate": 1.284803402088858e-05, "loss": 0.06733107566833496, "step": 4271 }, { "epoch": 0.5775607118111301, "grad_norm": 0.17220793664455414, "learning_rate": 1.284126061904626e-05, "loss": 0.08247411251068115, "step": 4272 }, { "epoch": 0.5776959086069661, "grad_norm": 0.04844580218195915, "learning_rate": 1.283448766666499e-05, "loss": 0.05113554000854492, "step": 4273 }, { "epoch": 0.5778311054028019, "grad_norm": 0.0884275957942009, "learning_rate": 1.282771516515494e-05, "loss": 0.08338689804077148, "step": 4274 }, { "epoch": 0.5779663021986379, "grad_norm": 0.05612083524465561, "learning_rate": 1.282094311592618e-05, "loss": 0.0509493350982666, "step": 4275 }, { "epoch": 0.5781014989944738, "grad_norm": 0.0678568035364151, "learning_rate": 1.2814171520388676e-05, "loss": 0.04532456398010254, "step": 4276 }, { "epoch": 0.5782366957903098, "grad_norm": 0.08181630820035934, "learning_rate": 1.2807400379952318e-05, "loss": 0.05478405952453613, "step": 4277 }, { "epoch": 0.5783718925861457, "grad_norm": 0.0537908598780632, "learning_rate": 1.2800629696026895e-05, "loss": 0.06338620185852051, "step": 4278 }, { "epoch": 0.5785070893819817, "grad_norm": 0.15645410120487213, "learning_rate": 1.2793859470022098e-05, "loss": 0.08153152465820312, "step": 4279 }, { "epoch": 0.5786422861778175, "grad_norm": 0.05124976485967636, "learning_rate": 1.278708970334753e-05, "loss": 0.04568624496459961, "step": 4280 }, { "epoch": 0.5787774829736535, "grad_norm": 0.08156215399503708, "learning_rate": 1.2780320397412678e-05, "loss": 0.062059640884399414, "step": 4281 }, { "epoch": 0.5789126797694895, "grad_norm": 0.07532893121242523, "learning_rate": 1.2773551553626957e-05, "loss": 0.06192612648010254, "step": 4282 }, { "epoch": 0.5790478765653254, "grad_norm": 0.11657476425170898, "learning_rate": 1.2766783173399675e-05, "loss": 0.06608283519744873, "step": 4283 }, { "epoch": 0.5791830733611614, "grad_norm": 0.054533701390028, "learning_rate": 1.276001525814005e-05, "loss": 0.04305529594421387, "step": 4284 }, { "epoch": 0.5793182701569973, "grad_norm": 0.04621422290802002, "learning_rate": 1.2753247809257192e-05, "loss": 0.045116424560546875, "step": 4285 }, { "epoch": 0.5794534669528332, "grad_norm": 0.058378081768751144, "learning_rate": 1.2746480828160119e-05, "loss": 0.06225442886352539, "step": 4286 }, { "epoch": 0.5795886637486691, "grad_norm": 0.09055574238300323, "learning_rate": 1.2739714316257753e-05, "loss": 0.06014537811279297, "step": 4287 }, { "epoch": 0.5797238605445051, "grad_norm": 0.09533646702766418, "learning_rate": 1.273294827495892e-05, "loss": 0.07230591773986816, "step": 4288 }, { "epoch": 0.579859057340341, "grad_norm": 0.07357525825500488, "learning_rate": 1.2726182705672352e-05, "loss": 0.0585169792175293, "step": 4289 }, { "epoch": 0.579994254136177, "grad_norm": 0.09403366595506668, "learning_rate": 1.271941760980667e-05, "loss": 0.06914520263671875, "step": 4290 }, { "epoch": 0.580129450932013, "grad_norm": 0.08954782783985138, "learning_rate": 1.2712652988770396e-05, "loss": 0.07144689559936523, "step": 4291 }, { "epoch": 0.5802646477278488, "grad_norm": 0.11628621816635132, "learning_rate": 1.2705888843971967e-05, "loss": 0.07306385040283203, "step": 4292 }, { "epoch": 0.5803998445236848, "grad_norm": 0.07146918028593063, "learning_rate": 1.2699125176819717e-05, "loss": 0.05750393867492676, "step": 4293 }, { "epoch": 0.5805350413195207, "grad_norm": 0.0771651491522789, "learning_rate": 1.269236198872188e-05, "loss": 0.07769155502319336, "step": 4294 }, { "epoch": 0.5806702381153567, "grad_norm": 0.04131464287638664, "learning_rate": 1.2685599281086577e-05, "loss": 0.04571548104286194, "step": 4295 }, { "epoch": 0.5808054349111926, "grad_norm": 0.0727495551109314, "learning_rate": 1.2678837055321849e-05, "loss": 0.05162614583969116, "step": 4296 }, { "epoch": 0.5809406317070286, "grad_norm": 0.08859908580780029, "learning_rate": 1.267207531283562e-05, "loss": 0.07144522666931152, "step": 4297 }, { "epoch": 0.5810758285028644, "grad_norm": 0.12046671658754349, "learning_rate": 1.266531405503573e-05, "loss": 0.09143280982971191, "step": 4298 }, { "epoch": 0.5812110252987004, "grad_norm": 0.05022713169455528, "learning_rate": 1.26585532833299e-05, "loss": 0.045766353607177734, "step": 4299 }, { "epoch": 0.5813462220945363, "grad_norm": 0.05693650245666504, "learning_rate": 1.2651792999125763e-05, "loss": 0.07350161671638489, "step": 4300 }, { "epoch": 0.5814814188903723, "grad_norm": 0.08686058968305588, "learning_rate": 1.2645033203830846e-05, "loss": 0.06072640419006348, "step": 4301 }, { "epoch": 0.5816166156862083, "grad_norm": 0.0378505140542984, "learning_rate": 1.2638273898852573e-05, "loss": 0.03803896903991699, "step": 4302 }, { "epoch": 0.5817518124820442, "grad_norm": 0.07793077826499939, "learning_rate": 1.2631515085598275e-05, "loss": 0.06520962715148926, "step": 4303 }, { "epoch": 0.5818870092778801, "grad_norm": 0.07835107296705246, "learning_rate": 1.262475676547516e-05, "loss": 0.06730532646179199, "step": 4304 }, { "epoch": 0.582022206073716, "grad_norm": 0.04331303760409355, "learning_rate": 1.2617998939890352e-05, "loss": 0.03670799732208252, "step": 4305 }, { "epoch": 0.582157402869552, "grad_norm": 0.061630748212337494, "learning_rate": 1.261124161025087e-05, "loss": 0.07912278175354004, "step": 4306 }, { "epoch": 0.5822925996653879, "grad_norm": 0.08987826108932495, "learning_rate": 1.260448477796362e-05, "loss": 0.07572793960571289, "step": 4307 }, { "epoch": 0.5824277964612239, "grad_norm": 0.07333193719387054, "learning_rate": 1.259772844443542e-05, "loss": 0.05310225486755371, "step": 4308 }, { "epoch": 0.5825629932570598, "grad_norm": 0.07325128465890884, "learning_rate": 1.2590972611072964e-05, "loss": 0.060450196266174316, "step": 4309 }, { "epoch": 0.5826981900528957, "grad_norm": 0.07857516407966614, "learning_rate": 1.2584217279282855e-05, "loss": 0.06274294853210449, "step": 4310 }, { "epoch": 0.5828333868487316, "grad_norm": 0.21415475010871887, "learning_rate": 1.2577462450471593e-05, "loss": 0.07872295379638672, "step": 4311 }, { "epoch": 0.5829685836445676, "grad_norm": 0.06408897042274475, "learning_rate": 1.2570708126045574e-05, "loss": 0.06525969505310059, "step": 4312 }, { "epoch": 0.5831037804404036, "grad_norm": 0.08825203031301498, "learning_rate": 1.256395430741107e-05, "loss": 0.05959606170654297, "step": 4313 }, { "epoch": 0.5832389772362395, "grad_norm": 0.12151241302490234, "learning_rate": 1.2557200995974268e-05, "loss": 0.08298683166503906, "step": 4314 }, { "epoch": 0.5833741740320755, "grad_norm": 0.2041746973991394, "learning_rate": 1.2550448193141248e-05, "loss": 0.06635129451751709, "step": 4315 }, { "epoch": 0.5835093708279114, "grad_norm": 0.04379083216190338, "learning_rate": 1.2543695900317977e-05, "loss": 0.05576348304748535, "step": 4316 }, { "epoch": 0.5836445676237473, "grad_norm": 0.06385776400566101, "learning_rate": 1.2536944118910323e-05, "loss": 0.055294156074523926, "step": 4317 }, { "epoch": 0.5837797644195832, "grad_norm": 0.08738244324922562, "learning_rate": 1.2530192850324032e-05, "loss": 0.06669628620147705, "step": 4318 }, { "epoch": 0.5839149612154192, "grad_norm": 0.1625872254371643, "learning_rate": 1.252344209596476e-05, "loss": 0.07272851467132568, "step": 4319 }, { "epoch": 0.5840501580112551, "grad_norm": 0.05684758350253105, "learning_rate": 1.251669185723805e-05, "loss": 0.04969900846481323, "step": 4320 }, { "epoch": 0.5841853548070911, "grad_norm": 0.06663138419389725, "learning_rate": 1.2509942135549344e-05, "loss": 0.06862401962280273, "step": 4321 }, { "epoch": 0.5843205516029271, "grad_norm": 0.10185294598340988, "learning_rate": 1.250319293230396e-05, "loss": 0.03975176811218262, "step": 4322 }, { "epoch": 0.5844557483987629, "grad_norm": 0.16229085624217987, "learning_rate": 1.2496444248907121e-05, "loss": 0.053262948989868164, "step": 4323 }, { "epoch": 0.5845909451945989, "grad_norm": 0.22272610664367676, "learning_rate": 1.2489696086763939e-05, "loss": 0.10411542654037476, "step": 4324 }, { "epoch": 0.5847261419904348, "grad_norm": 0.15241949260234833, "learning_rate": 1.2482948447279417e-05, "loss": 0.07978057861328125, "step": 4325 }, { "epoch": 0.5848613387862708, "grad_norm": 0.14279307425022125, "learning_rate": 1.2476201331858458e-05, "loss": 0.07953023910522461, "step": 4326 }, { "epoch": 0.5849965355821067, "grad_norm": 0.03669939562678337, "learning_rate": 1.2469454741905839e-05, "loss": 0.045300424098968506, "step": 4327 }, { "epoch": 0.5851317323779427, "grad_norm": 0.06310693919658661, "learning_rate": 1.2462708678826233e-05, "loss": 0.08255577087402344, "step": 4328 }, { "epoch": 0.5852669291737785, "grad_norm": 0.07301875203847885, "learning_rate": 1.245596314402421e-05, "loss": 0.06145954132080078, "step": 4329 }, { "epoch": 0.5854021259696145, "grad_norm": 0.085129015147686, "learning_rate": 1.2449218138904225e-05, "loss": 0.06589889526367188, "step": 4330 }, { "epoch": 0.5855373227654505, "grad_norm": 0.06567202508449554, "learning_rate": 1.2442473664870636e-05, "loss": 0.06220436096191406, "step": 4331 }, { "epoch": 0.5856725195612864, "grad_norm": 0.11292267590761185, "learning_rate": 1.2435729723327661e-05, "loss": 0.07966041564941406, "step": 4332 }, { "epoch": 0.5858077163571224, "grad_norm": 0.11293935775756836, "learning_rate": 1.2428986315679433e-05, "loss": 0.10083329677581787, "step": 4333 }, { "epoch": 0.5859429131529583, "grad_norm": 0.06286844611167908, "learning_rate": 1.2422243443329962e-05, "loss": 0.07518863677978516, "step": 4334 }, { "epoch": 0.5860781099487942, "grad_norm": 0.13119444251060486, "learning_rate": 1.241550110768316e-05, "loss": 0.08473682403564453, "step": 4335 }, { "epoch": 0.5862133067446301, "grad_norm": 0.05801859125494957, "learning_rate": 1.2408759310142803e-05, "loss": 0.051717281341552734, "step": 4336 }, { "epoch": 0.5863485035404661, "grad_norm": 0.14584961533546448, "learning_rate": 1.2402018052112576e-05, "loss": 0.08333253860473633, "step": 4337 }, { "epoch": 0.586483700336302, "grad_norm": 0.08096270263195038, "learning_rate": 1.2395277334996045e-05, "loss": 0.0552973747253418, "step": 4338 }, { "epoch": 0.586618897132138, "grad_norm": 0.07098846137523651, "learning_rate": 1.2388537160196663e-05, "loss": 0.05774843692779541, "step": 4339 }, { "epoch": 0.586754093927974, "grad_norm": 0.06354069709777832, "learning_rate": 1.2381797529117776e-05, "loss": 0.060511231422424316, "step": 4340 }, { "epoch": 0.5868892907238098, "grad_norm": 0.09636250138282776, "learning_rate": 1.23750584431626e-05, "loss": 0.06908249855041504, "step": 4341 }, { "epoch": 0.5870244875196458, "grad_norm": 0.11914902925491333, "learning_rate": 1.236831990373425e-05, "loss": 0.09679985046386719, "step": 4342 }, { "epoch": 0.5871596843154817, "grad_norm": 0.0579880066215992, "learning_rate": 1.2361581912235736e-05, "loss": 0.03981423377990723, "step": 4343 }, { "epoch": 0.5872948811113177, "grad_norm": 0.0891919881105423, "learning_rate": 1.235484447006994e-05, "loss": 0.059975266456604004, "step": 4344 }, { "epoch": 0.5874300779071536, "grad_norm": 0.060457512736320496, "learning_rate": 1.2348107578639627e-05, "loss": 0.04759836196899414, "step": 4345 }, { "epoch": 0.5875652747029896, "grad_norm": 0.040965937077999115, "learning_rate": 1.2341371239347454e-05, "loss": 0.03705120086669922, "step": 4346 }, { "epoch": 0.5877004714988254, "grad_norm": 0.0886962041258812, "learning_rate": 1.233463545359597e-05, "loss": 0.08275675773620605, "step": 4347 }, { "epoch": 0.5878356682946614, "grad_norm": 0.054824456572532654, "learning_rate": 1.23279002227876e-05, "loss": 0.050445556640625, "step": 4348 }, { "epoch": 0.5879708650904973, "grad_norm": 0.06049030274152756, "learning_rate": 1.2321165548324655e-05, "loss": 0.04991459846496582, "step": 4349 }, { "epoch": 0.5881060618863333, "grad_norm": 0.082413449883461, "learning_rate": 1.2314431431609323e-05, "loss": 0.05001091957092285, "step": 4350 }, { "epoch": 0.5882412586821693, "grad_norm": 0.06685661524534225, "learning_rate": 1.2307697874043687e-05, "loss": 0.0806427001953125, "step": 4351 }, { "epoch": 0.5883764554780052, "grad_norm": 0.13062191009521484, "learning_rate": 1.2300964877029712e-05, "loss": 0.04336404800415039, "step": 4352 }, { "epoch": 0.5885116522738411, "grad_norm": 0.19655302166938782, "learning_rate": 1.2294232441969246e-05, "loss": 0.06818056106567383, "step": 4353 }, { "epoch": 0.588646849069677, "grad_norm": 0.12404762208461761, "learning_rate": 1.2287500570264017e-05, "loss": 0.061942100524902344, "step": 4354 }, { "epoch": 0.588782045865513, "grad_norm": 0.0574922151863575, "learning_rate": 1.2280769263315628e-05, "loss": 0.05584937334060669, "step": 4355 }, { "epoch": 0.5889172426613489, "grad_norm": 0.08373738825321198, "learning_rate": 1.2274038522525577e-05, "loss": 0.0830233097076416, "step": 4356 }, { "epoch": 0.5890524394571849, "grad_norm": 0.08427920192480087, "learning_rate": 1.2267308349295246e-05, "loss": 0.05469787120819092, "step": 4357 }, { "epoch": 0.5891876362530208, "grad_norm": 0.18004299700260162, "learning_rate": 1.2260578745025892e-05, "loss": 0.088470458984375, "step": 4358 }, { "epoch": 0.5893228330488567, "grad_norm": 0.04521116614341736, "learning_rate": 1.225384971111865e-05, "loss": 0.03809046745300293, "step": 4359 }, { "epoch": 0.5894580298446926, "grad_norm": 0.07919525355100632, "learning_rate": 1.224712124897454e-05, "loss": 0.04444409906864166, "step": 4360 }, { "epoch": 0.5895932266405286, "grad_norm": 0.0926135927438736, "learning_rate": 1.2240393359994466e-05, "loss": 0.04586225748062134, "step": 4361 }, { "epoch": 0.5897284234363646, "grad_norm": 0.04971746355295181, "learning_rate": 1.2233666045579209e-05, "loss": 0.040291011333465576, "step": 4362 }, { "epoch": 0.5898636202322005, "grad_norm": 0.12292158603668213, "learning_rate": 1.222693930712944e-05, "loss": 0.05761420726776123, "step": 4363 }, { "epoch": 0.5899988170280365, "grad_norm": 0.1523953378200531, "learning_rate": 1.2220213146045691e-05, "loss": 0.07309556007385254, "step": 4364 }, { "epoch": 0.5901340138238724, "grad_norm": 0.13882263004779816, "learning_rate": 1.2213487563728389e-05, "loss": 0.06578588485717773, "step": 4365 }, { "epoch": 0.5902692106197083, "grad_norm": 0.12734755873680115, "learning_rate": 1.220676256157783e-05, "loss": 0.05518484115600586, "step": 4366 }, { "epoch": 0.5904044074155442, "grad_norm": 0.0852704793214798, "learning_rate": 1.2200038140994212e-05, "loss": 0.06588315963745117, "step": 4367 }, { "epoch": 0.5905396042113802, "grad_norm": 0.12388197332620621, "learning_rate": 1.2193314303377578e-05, "loss": 0.06680738925933838, "step": 4368 }, { "epoch": 0.5906748010072161, "grad_norm": 0.07882075756788254, "learning_rate": 1.2186591050127874e-05, "loss": 0.08265852928161621, "step": 4369 }, { "epoch": 0.5908099978030521, "grad_norm": 0.11623174697160721, "learning_rate": 1.2179868382644916e-05, "loss": 0.11804866790771484, "step": 4370 }, { "epoch": 0.5909451945988881, "grad_norm": 0.15952181816101074, "learning_rate": 1.2173146302328396e-05, "loss": 0.06362748146057129, "step": 4371 }, { "epoch": 0.5910803913947239, "grad_norm": 0.12032456696033478, "learning_rate": 1.21664248105779e-05, "loss": 0.07953834533691406, "step": 4372 }, { "epoch": 0.5912155881905599, "grad_norm": 0.13278843462467194, "learning_rate": 1.2159703908792858e-05, "loss": 0.04867291450500488, "step": 4373 }, { "epoch": 0.5913507849863958, "grad_norm": 0.17047610878944397, "learning_rate": 1.2152983598372613e-05, "loss": 0.050318002700805664, "step": 4374 }, { "epoch": 0.5914859817822318, "grad_norm": 0.13333120942115784, "learning_rate": 1.2146263880716366e-05, "loss": 0.052118778228759766, "step": 4375 }, { "epoch": 0.5916211785780677, "grad_norm": 0.05238313227891922, "learning_rate": 1.2139544757223194e-05, "loss": 0.03557056188583374, "step": 4376 }, { "epoch": 0.5917563753739037, "grad_norm": 0.0734664723277092, "learning_rate": 1.2132826229292066e-05, "loss": 0.07580423355102539, "step": 4377 }, { "epoch": 0.5918915721697395, "grad_norm": 0.058580432087183, "learning_rate": 1.2126108298321798e-05, "loss": 0.04852795600891113, "step": 4378 }, { "epoch": 0.5920267689655755, "grad_norm": 0.1077832356095314, "learning_rate": 1.2119390965711107e-05, "loss": 0.047350525856018066, "step": 4379 }, { "epoch": 0.5921619657614114, "grad_norm": 0.12427591532468796, "learning_rate": 1.2112674232858582e-05, "loss": 0.06860542297363281, "step": 4380 }, { "epoch": 0.5922971625572474, "grad_norm": 0.06752648204565048, "learning_rate": 1.2105958101162684e-05, "loss": 0.0730905532836914, "step": 4381 }, { "epoch": 0.5924323593530834, "grad_norm": 0.10141565650701523, "learning_rate": 1.2099242572021735e-05, "loss": 0.057066917419433594, "step": 4382 }, { "epoch": 0.5925675561489193, "grad_norm": 0.05199457332491875, "learning_rate": 1.209252764683395e-05, "loss": 0.05041339993476868, "step": 4383 }, { "epoch": 0.5927027529447552, "grad_norm": 0.08452307432889938, "learning_rate": 1.2085813326997414e-05, "loss": 0.05943500995635986, "step": 4384 }, { "epoch": 0.5928379497405911, "grad_norm": 0.10068266093730927, "learning_rate": 1.2079099613910088e-05, "loss": 0.07964611053466797, "step": 4385 }, { "epoch": 0.5929731465364271, "grad_norm": 0.05352442339062691, "learning_rate": 1.20723865089698e-05, "loss": 0.051158905029296875, "step": 4386 }, { "epoch": 0.593108343332263, "grad_norm": 0.09588709473609924, "learning_rate": 1.2065674013574248e-05, "loss": 0.04647183418273926, "step": 4387 }, { "epoch": 0.593243540128099, "grad_norm": 0.06838831305503845, "learning_rate": 1.2058962129121013e-05, "loss": 0.0778799057006836, "step": 4388 }, { "epoch": 0.593378736923935, "grad_norm": 0.1471521109342575, "learning_rate": 1.2052250857007548e-05, "loss": 0.07695746421813965, "step": 4389 }, { "epoch": 0.5935139337197708, "grad_norm": 0.08080458641052246, "learning_rate": 1.2045540198631177e-05, "loss": 0.05729961395263672, "step": 4390 }, { "epoch": 0.5936491305156067, "grad_norm": 0.056590184569358826, "learning_rate": 1.2038830155389091e-05, "loss": 0.0617365837097168, "step": 4391 }, { "epoch": 0.5937843273114427, "grad_norm": 0.08045302331447601, "learning_rate": 1.2032120728678354e-05, "loss": 0.05838799476623535, "step": 4392 }, { "epoch": 0.5939195241072787, "grad_norm": 0.030840545892715454, "learning_rate": 1.2025411919895907e-05, "loss": 0.02690201997756958, "step": 4393 }, { "epoch": 0.5940547209031146, "grad_norm": 0.07777990400791168, "learning_rate": 1.2018703730438561e-05, "loss": 0.058644771575927734, "step": 4394 }, { "epoch": 0.5941899176989506, "grad_norm": 0.1365945041179657, "learning_rate": 1.2011996161703003e-05, "loss": 0.08714818954467773, "step": 4395 }, { "epoch": 0.5943251144947864, "grad_norm": 0.09489940106868744, "learning_rate": 1.2005289215085775e-05, "loss": 0.07715433835983276, "step": 4396 }, { "epoch": 0.5944603112906224, "grad_norm": 0.06490685045719147, "learning_rate": 1.19985828919833e-05, "loss": 0.038370609283447266, "step": 4397 }, { "epoch": 0.5945955080864583, "grad_norm": 0.06644526869058609, "learning_rate": 1.1991877193791872e-05, "loss": 0.06057024002075195, "step": 4398 }, { "epoch": 0.5947307048822943, "grad_norm": 0.057687729597091675, "learning_rate": 1.1985172121907653e-05, "loss": 0.06514382362365723, "step": 4399 }, { "epoch": 0.5948659016781302, "grad_norm": 0.07421575486660004, "learning_rate": 1.1978467677726682e-05, "loss": 0.06838130950927734, "step": 4400 }, { "epoch": 0.5950010984739662, "grad_norm": 0.06818335503339767, "learning_rate": 1.197176386264485e-05, "loss": 0.062157630920410156, "step": 4401 }, { "epoch": 0.595136295269802, "grad_norm": 0.08384193480014801, "learning_rate": 1.1965060678057927e-05, "loss": 0.06805944442749023, "step": 4402 }, { "epoch": 0.595271492065638, "grad_norm": 0.06513229757547379, "learning_rate": 1.1958358125361554e-05, "loss": 0.06832408905029297, "step": 4403 }, { "epoch": 0.595406688861474, "grad_norm": 0.16560900211334229, "learning_rate": 1.1951656205951247e-05, "loss": 0.063323974609375, "step": 4404 }, { "epoch": 0.5955418856573099, "grad_norm": 0.07044423371553421, "learning_rate": 1.1944954921222367e-05, "loss": 0.05042409896850586, "step": 4405 }, { "epoch": 0.5956770824531459, "grad_norm": 0.08897704631090164, "learning_rate": 1.1938254272570167e-05, "loss": 0.07789993286132812, "step": 4406 }, { "epoch": 0.5958122792489818, "grad_norm": 0.0321781188249588, "learning_rate": 1.1931554261389751e-05, "loss": 0.03359043598175049, "step": 4407 }, { "epoch": 0.5959474760448178, "grad_norm": 0.043969232589006424, "learning_rate": 1.1924854889076103e-05, "loss": 0.052018165588378906, "step": 4408 }, { "epoch": 0.5960826728406536, "grad_norm": 0.07983013987541199, "learning_rate": 1.191815615702407e-05, "loss": 0.04252278804779053, "step": 4409 }, { "epoch": 0.5962178696364896, "grad_norm": 0.08181300759315491, "learning_rate": 1.1911458066628353e-05, "loss": 0.067130446434021, "step": 4410 }, { "epoch": 0.5963530664323256, "grad_norm": 0.08677782863378525, "learning_rate": 1.1904760619283537e-05, "loss": 0.05368852615356445, "step": 4411 }, { "epoch": 0.5964882632281615, "grad_norm": 0.046128302812576294, "learning_rate": 1.1898063816384069e-05, "loss": 0.039492011070251465, "step": 4412 }, { "epoch": 0.5966234600239975, "grad_norm": 0.06046607345342636, "learning_rate": 1.189136765932426e-05, "loss": 0.0479738712310791, "step": 4413 }, { "epoch": 0.5967586568198334, "grad_norm": 0.11654657125473022, "learning_rate": 1.1884672149498276e-05, "loss": 0.06967872381210327, "step": 4414 }, { "epoch": 0.5968938536156693, "grad_norm": 0.08996270596981049, "learning_rate": 1.187797728830016e-05, "loss": 0.05943727493286133, "step": 4415 }, { "epoch": 0.5970290504115052, "grad_norm": 0.18222592771053314, "learning_rate": 1.1871283077123823e-05, "loss": 0.12794017791748047, "step": 4416 }, { "epoch": 0.5971642472073412, "grad_norm": 0.06251989305019379, "learning_rate": 1.1864589517363038e-05, "loss": 0.0666964054107666, "step": 4417 }, { "epoch": 0.5972994440031771, "grad_norm": 0.10987559705972672, "learning_rate": 1.185789661041144e-05, "loss": 0.06929570436477661, "step": 4418 }, { "epoch": 0.5974346407990131, "grad_norm": 0.1608314961194992, "learning_rate": 1.1851204357662513e-05, "loss": 0.07338333129882812, "step": 4419 }, { "epoch": 0.597569837594849, "grad_norm": 0.04701576754450798, "learning_rate": 1.1844512760509634e-05, "loss": 0.04197359085083008, "step": 4420 }, { "epoch": 0.5977050343906849, "grad_norm": 0.07407865673303604, "learning_rate": 1.1837821820346022e-05, "loss": 0.051410675048828125, "step": 4421 }, { "epoch": 0.5978402311865209, "grad_norm": 0.05823945626616478, "learning_rate": 1.1831131538564775e-05, "loss": 0.040430545806884766, "step": 4422 }, { "epoch": 0.5979754279823568, "grad_norm": 0.10350481420755386, "learning_rate": 1.1824441916558843e-05, "loss": 0.052404165267944336, "step": 4423 }, { "epoch": 0.5981106247781928, "grad_norm": 0.08278493583202362, "learning_rate": 1.1817752955721031e-05, "loss": 0.07575035095214844, "step": 4424 }, { "epoch": 0.5982458215740287, "grad_norm": 0.07234026491641998, "learning_rate": 1.1811064657444023e-05, "loss": 0.04076719284057617, "step": 4425 }, { "epoch": 0.5983810183698647, "grad_norm": 0.19123956561088562, "learning_rate": 1.1804377023120361e-05, "loss": 0.07475078105926514, "step": 4426 }, { "epoch": 0.5985162151657005, "grad_norm": 0.046525053679943085, "learning_rate": 1.1797690054142451e-05, "loss": 0.0392451286315918, "step": 4427 }, { "epoch": 0.5986514119615365, "grad_norm": 0.14990352094173431, "learning_rate": 1.1791003751902542e-05, "loss": 0.057016611099243164, "step": 4428 }, { "epoch": 0.5987866087573724, "grad_norm": 0.07029904425144196, "learning_rate": 1.1784318117792763e-05, "loss": 0.06714379787445068, "step": 4429 }, { "epoch": 0.5989218055532084, "grad_norm": 0.07516420632600784, "learning_rate": 1.17776331532051e-05, "loss": 0.04988747835159302, "step": 4430 }, { "epoch": 0.5990570023490444, "grad_norm": 0.061831362545490265, "learning_rate": 1.1770948859531397e-05, "loss": 0.057512760162353516, "step": 4431 }, { "epoch": 0.5991921991448803, "grad_norm": 0.07346495240926743, "learning_rate": 1.1764265238163369e-05, "loss": 0.05953788757324219, "step": 4432 }, { "epoch": 0.5993273959407162, "grad_norm": 0.05526241287589073, "learning_rate": 1.1757582290492568e-05, "loss": 0.05492663383483887, "step": 4433 }, { "epoch": 0.5994625927365521, "grad_norm": 0.04413831979036331, "learning_rate": 1.1750900017910425e-05, "loss": 0.04280579090118408, "step": 4434 }, { "epoch": 0.5995977895323881, "grad_norm": 0.05408065766096115, "learning_rate": 1.1744218421808221e-05, "loss": 0.052460670471191406, "step": 4435 }, { "epoch": 0.599732986328224, "grad_norm": 0.04588092491030693, "learning_rate": 1.1737537503577112e-05, "loss": 0.043430209159851074, "step": 4436 }, { "epoch": 0.59986818312406, "grad_norm": 0.07211904227733612, "learning_rate": 1.1730857264608086e-05, "loss": 0.05210256576538086, "step": 4437 }, { "epoch": 0.6000033799198959, "grad_norm": 0.1481742560863495, "learning_rate": 1.1724177706292013e-05, "loss": 0.0910649299621582, "step": 4438 }, { "epoch": 0.6001385767157318, "grad_norm": 0.09262997657060623, "learning_rate": 1.1717498830019607e-05, "loss": 0.07965517044067383, "step": 4439 }, { "epoch": 0.6002737735115677, "grad_norm": 0.10517065227031708, "learning_rate": 1.1710820637181449e-05, "loss": 0.06760859489440918, "step": 4440 }, { "epoch": 0.6004089703074037, "grad_norm": 0.12969042360782623, "learning_rate": 1.170414312916798e-05, "loss": 0.10483169555664062, "step": 4441 }, { "epoch": 0.6005441671032397, "grad_norm": 0.0682075098156929, "learning_rate": 1.1697466307369484e-05, "loss": 0.04946744441986084, "step": 4442 }, { "epoch": 0.6006793638990756, "grad_norm": 0.09153049439191818, "learning_rate": 1.1690790173176116e-05, "loss": 0.07303977012634277, "step": 4443 }, { "epoch": 0.6008145606949116, "grad_norm": 0.0307060144841671, "learning_rate": 1.1684114727977876e-05, "loss": 0.02816760540008545, "step": 4444 }, { "epoch": 0.6009497574907474, "grad_norm": 0.19880400598049164, "learning_rate": 1.167743997316464e-05, "loss": 0.08561182022094727, "step": 4445 }, { "epoch": 0.6010849542865834, "grad_norm": 0.08696088194847107, "learning_rate": 1.1670765910126112e-05, "loss": 0.054329514503479004, "step": 4446 }, { "epoch": 0.6012201510824193, "grad_norm": 0.09676600247621536, "learning_rate": 1.1664092540251877e-05, "loss": 0.0719614028930664, "step": 4447 }, { "epoch": 0.6013553478782553, "grad_norm": 0.04374127835035324, "learning_rate": 1.1657419864931361e-05, "loss": 0.04409217834472656, "step": 4448 }, { "epoch": 0.6014905446740912, "grad_norm": 0.05726258084177971, "learning_rate": 1.165074788555386e-05, "loss": 0.04376363754272461, "step": 4449 }, { "epoch": 0.6016257414699272, "grad_norm": 0.08017420768737793, "learning_rate": 1.1644076603508514e-05, "loss": 0.04785299301147461, "step": 4450 }, { "epoch": 0.6017609382657632, "grad_norm": 0.08526419848203659, "learning_rate": 1.1637406020184305e-05, "loss": 0.06665301322937012, "step": 4451 }, { "epoch": 0.601896135061599, "grad_norm": 0.08559117466211319, "learning_rate": 1.1630736136970097e-05, "loss": 0.08066320419311523, "step": 4452 }, { "epoch": 0.602031331857435, "grad_norm": 0.03953111544251442, "learning_rate": 1.162406695525459e-05, "loss": 0.04483795166015625, "step": 4453 }, { "epoch": 0.6021665286532709, "grad_norm": 0.036057956516742706, "learning_rate": 1.161739847642635e-05, "loss": 0.05193614959716797, "step": 4454 }, { "epoch": 0.6023017254491069, "grad_norm": 0.13869522511959076, "learning_rate": 1.1610730701873788e-05, "loss": 0.06614875793457031, "step": 4455 }, { "epoch": 0.6024369222449428, "grad_norm": 0.07303797453641891, "learning_rate": 1.1604063632985163e-05, "loss": 0.09101521968841553, "step": 4456 }, { "epoch": 0.6025721190407788, "grad_norm": 0.18419502675533295, "learning_rate": 1.1597397271148598e-05, "loss": 0.06749451160430908, "step": 4457 }, { "epoch": 0.6027073158366146, "grad_norm": 0.06704884022474289, "learning_rate": 1.1590731617752067e-05, "loss": 0.04151320457458496, "step": 4458 }, { "epoch": 0.6028425126324506, "grad_norm": 0.07047439366579056, "learning_rate": 1.1584066674183398e-05, "loss": 0.051407575607299805, "step": 4459 }, { "epoch": 0.6029777094282865, "grad_norm": 0.07154999673366547, "learning_rate": 1.1577402441830262e-05, "loss": 0.08313798904418945, "step": 4460 }, { "epoch": 0.6031129062241225, "grad_norm": 0.10697804391384125, "learning_rate": 1.1570738922080185e-05, "loss": 0.08496499061584473, "step": 4461 }, { "epoch": 0.6032481030199585, "grad_norm": 0.07778322696685791, "learning_rate": 1.1564076116320552e-05, "loss": 0.06845259666442871, "step": 4462 }, { "epoch": 0.6033832998157944, "grad_norm": 0.12543387711048126, "learning_rate": 1.1557414025938592e-05, "loss": 0.08098649978637695, "step": 4463 }, { "epoch": 0.6035184966116303, "grad_norm": 0.06336898356676102, "learning_rate": 1.15507526523214e-05, "loss": 0.0354960560798645, "step": 4464 }, { "epoch": 0.6036536934074662, "grad_norm": 0.2825378477573395, "learning_rate": 1.1544091996855895e-05, "loss": 0.12302923202514648, "step": 4465 }, { "epoch": 0.6037888902033022, "grad_norm": 0.08610116690397263, "learning_rate": 1.153743206092886e-05, "loss": 0.06659555435180664, "step": 4466 }, { "epoch": 0.6039240869991381, "grad_norm": 0.06296218931674957, "learning_rate": 1.1530772845926936e-05, "loss": 0.03441047668457031, "step": 4467 }, { "epoch": 0.6040592837949741, "grad_norm": 0.17660225927829742, "learning_rate": 1.1524114353236614e-05, "loss": 0.07161283493041992, "step": 4468 }, { "epoch": 0.60419448059081, "grad_norm": 0.0970006138086319, "learning_rate": 1.151745658424421e-05, "loss": 0.07809591293334961, "step": 4469 }, { "epoch": 0.6043296773866459, "grad_norm": 0.04494573548436165, "learning_rate": 1.151079954033592e-05, "loss": 0.029191136360168457, "step": 4470 }, { "epoch": 0.6044648741824818, "grad_norm": 0.06157716363668442, "learning_rate": 1.150414322289777e-05, "loss": 0.05384635925292969, "step": 4471 }, { "epoch": 0.6046000709783178, "grad_norm": 0.0951310321688652, "learning_rate": 1.1497487633315643e-05, "loss": 0.07082986831665039, "step": 4472 }, { "epoch": 0.6047352677741538, "grad_norm": 0.08796324580907822, "learning_rate": 1.1490832772975275e-05, "loss": 0.06610298156738281, "step": 4473 }, { "epoch": 0.6048704645699897, "grad_norm": 0.11102192848920822, "learning_rate": 1.148417864326223e-05, "loss": 0.06828904151916504, "step": 4474 }, { "epoch": 0.6050056613658257, "grad_norm": 0.07584069669246674, "learning_rate": 1.1477525245561944e-05, "loss": 0.049087464809417725, "step": 4475 }, { "epoch": 0.6051408581616615, "grad_norm": 0.05231916904449463, "learning_rate": 1.1470872581259684e-05, "loss": 0.07267236709594727, "step": 4476 }, { "epoch": 0.6052760549574975, "grad_norm": 0.1191301941871643, "learning_rate": 1.146422065174057e-05, "loss": 0.09348361939191818, "step": 4477 }, { "epoch": 0.6054112517533334, "grad_norm": 0.12056214362382889, "learning_rate": 1.1457569458389578e-05, "loss": 0.0738821029663086, "step": 4478 }, { "epoch": 0.6055464485491694, "grad_norm": 0.09845026582479477, "learning_rate": 1.145091900259151e-05, "loss": 0.08072614669799805, "step": 4479 }, { "epoch": 0.6056816453450053, "grad_norm": 0.07445214688777924, "learning_rate": 1.1444269285731032e-05, "loss": 0.04471850395202637, "step": 4480 }, { "epoch": 0.6058168421408413, "grad_norm": 0.06066924333572388, "learning_rate": 1.1437620309192652e-05, "loss": 0.03565335273742676, "step": 4481 }, { "epoch": 0.6059520389366772, "grad_norm": 0.07676257938146591, "learning_rate": 1.1430972074360722e-05, "loss": 0.07148265838623047, "step": 4482 }, { "epoch": 0.6060872357325131, "grad_norm": 0.08411075919866562, "learning_rate": 1.1424324582619435e-05, "loss": 0.06795120239257812, "step": 4483 }, { "epoch": 0.6062224325283491, "grad_norm": 0.05896037071943283, "learning_rate": 1.1417677835352837e-05, "loss": 0.05142498016357422, "step": 4484 }, { "epoch": 0.606357629324185, "grad_norm": 0.12102647870779037, "learning_rate": 1.1411031833944816e-05, "loss": 0.08036601543426514, "step": 4485 }, { "epoch": 0.606492826120021, "grad_norm": 0.08629456162452698, "learning_rate": 1.1404386579779111e-05, "loss": 0.05320119857788086, "step": 4486 }, { "epoch": 0.6066280229158569, "grad_norm": 0.12007882446050644, "learning_rate": 1.1397742074239296e-05, "loss": 0.07620716094970703, "step": 4487 }, { "epoch": 0.6067632197116928, "grad_norm": 0.08685746043920517, "learning_rate": 1.1391098318708785e-05, "loss": 0.07518482208251953, "step": 4488 }, { "epoch": 0.6068984165075287, "grad_norm": 0.07086971402168274, "learning_rate": 1.1384455314570848e-05, "loss": 0.04566526412963867, "step": 4489 }, { "epoch": 0.6070336133033647, "grad_norm": 0.06558715552091599, "learning_rate": 1.1377813063208596e-05, "loss": 0.07185649871826172, "step": 4490 }, { "epoch": 0.6071688100992007, "grad_norm": 0.08730585128068924, "learning_rate": 1.1371171566004986e-05, "loss": 0.0811614990234375, "step": 4491 }, { "epoch": 0.6073040068950366, "grad_norm": 0.12361347675323486, "learning_rate": 1.1364530824342806e-05, "loss": 0.05371090769767761, "step": 4492 }, { "epoch": 0.6074392036908726, "grad_norm": 0.05922313779592514, "learning_rate": 1.1357890839604688e-05, "loss": 0.05019420385360718, "step": 4493 }, { "epoch": 0.6075744004867084, "grad_norm": 0.06025974079966545, "learning_rate": 1.1351251613173122e-05, "loss": 0.055617570877075195, "step": 4494 }, { "epoch": 0.6077095972825444, "grad_norm": 0.06857304275035858, "learning_rate": 1.1344613146430428e-05, "loss": 0.06400120258331299, "step": 4495 }, { "epoch": 0.6078447940783803, "grad_norm": 0.04857001453638077, "learning_rate": 1.1337975440758775e-05, "loss": 0.048676252365112305, "step": 4496 }, { "epoch": 0.6079799908742163, "grad_norm": 0.1769881695508957, "learning_rate": 1.133133849754016e-05, "loss": 0.09433364868164062, "step": 4497 }, { "epoch": 0.6081151876700522, "grad_norm": 0.07416752725839615, "learning_rate": 1.1324702318156431e-05, "loss": 0.06578731536865234, "step": 4498 }, { "epoch": 0.6082503844658882, "grad_norm": 0.053321123123168945, "learning_rate": 1.1318066903989279e-05, "loss": 0.06825423240661621, "step": 4499 }, { "epoch": 0.6083855812617242, "grad_norm": 0.10447041690349579, "learning_rate": 1.1311432256420232e-05, "loss": 0.06466418504714966, "step": 4500 }, { "epoch": 0.60852077805756, "grad_norm": 0.04495958983898163, "learning_rate": 1.1304798376830664e-05, "loss": 0.0367891788482666, "step": 4501 }, { "epoch": 0.608655974853396, "grad_norm": 0.11137567460536957, "learning_rate": 1.1298165266601778e-05, "loss": 0.09295177459716797, "step": 4502 }, { "epoch": 0.6087911716492319, "grad_norm": 0.08697705715894699, "learning_rate": 1.129153292711462e-05, "loss": 0.06599617004394531, "step": 4503 }, { "epoch": 0.6089263684450679, "grad_norm": 0.042401522397994995, "learning_rate": 1.1284901359750082e-05, "loss": 0.04012654721736908, "step": 4504 }, { "epoch": 0.6090615652409038, "grad_norm": 0.06264140456914902, "learning_rate": 1.1278270565888897e-05, "loss": 0.04436802864074707, "step": 4505 }, { "epoch": 0.6091967620367398, "grad_norm": 0.13806472718715668, "learning_rate": 1.1271640546911624e-05, "loss": 0.0663565993309021, "step": 4506 }, { "epoch": 0.6093319588325756, "grad_norm": 0.08638143539428711, "learning_rate": 1.1265011304198672e-05, "loss": 0.08188581466674805, "step": 4507 }, { "epoch": 0.6094671556284116, "grad_norm": 0.17412403225898743, "learning_rate": 1.1258382839130282e-05, "loss": 0.08156490325927734, "step": 4508 }, { "epoch": 0.6096023524242475, "grad_norm": 0.04546007141470909, "learning_rate": 1.1251755153086536e-05, "loss": 0.040462493896484375, "step": 4509 }, { "epoch": 0.6097375492200835, "grad_norm": 0.10680299252271652, "learning_rate": 1.1245128247447362e-05, "loss": 0.07860314846038818, "step": 4510 }, { "epoch": 0.6098727460159195, "grad_norm": 0.05734534561634064, "learning_rate": 1.1238502123592507e-05, "loss": 0.05141758918762207, "step": 4511 }, { "epoch": 0.6100079428117554, "grad_norm": 0.08302834630012512, "learning_rate": 1.1231876782901568e-05, "loss": 0.04714542627334595, "step": 4512 }, { "epoch": 0.6101431396075913, "grad_norm": 0.055085211992263794, "learning_rate": 1.1225252226753975e-05, "loss": 0.05603361129760742, "step": 4513 }, { "epoch": 0.6102783364034272, "grad_norm": 0.10334553569555283, "learning_rate": 1.1218628456529005e-05, "loss": 0.07889223098754883, "step": 4514 }, { "epoch": 0.6104135331992632, "grad_norm": 0.08391764014959335, "learning_rate": 1.1212005473605746e-05, "loss": 0.07118511199951172, "step": 4515 }, { "epoch": 0.6105487299950991, "grad_norm": 0.1853853464126587, "learning_rate": 1.120538327936315e-05, "loss": 0.07809734344482422, "step": 4516 }, { "epoch": 0.6106839267909351, "grad_norm": 0.05819907411932945, "learning_rate": 1.1198761875179993e-05, "loss": 0.04647541046142578, "step": 4517 }, { "epoch": 0.610819123586771, "grad_norm": 0.09502635896205902, "learning_rate": 1.1192141262434883e-05, "loss": 0.05236482620239258, "step": 4518 }, { "epoch": 0.6109543203826069, "grad_norm": 0.03877397999167442, "learning_rate": 1.1185521442506272e-05, "loss": 0.05437469482421875, "step": 4519 }, { "epoch": 0.6110895171784428, "grad_norm": 0.0829184502363205, "learning_rate": 1.1178902416772432e-05, "loss": 0.04833507537841797, "step": 4520 }, { "epoch": 0.6112247139742788, "grad_norm": 0.11189968883991241, "learning_rate": 1.1172284186611485e-05, "loss": 0.06994009017944336, "step": 4521 }, { "epoch": 0.6113599107701148, "grad_norm": 0.05897166579961777, "learning_rate": 1.1165666753401384e-05, "loss": 0.04030972719192505, "step": 4522 }, { "epoch": 0.6114951075659507, "grad_norm": 0.09178470820188522, "learning_rate": 1.1159050118519914e-05, "loss": 0.08080816268920898, "step": 4523 }, { "epoch": 0.6116303043617867, "grad_norm": 0.16161687672138214, "learning_rate": 1.1152434283344696e-05, "loss": 0.0764613151550293, "step": 4524 }, { "epoch": 0.6117655011576225, "grad_norm": 0.06385630369186401, "learning_rate": 1.114581924925317e-05, "loss": 0.056527018547058105, "step": 4525 }, { "epoch": 0.6119006979534585, "grad_norm": 0.04277735948562622, "learning_rate": 1.113920501762263e-05, "loss": 0.041338324546813965, "step": 4526 }, { "epoch": 0.6120358947492944, "grad_norm": 0.043355513364076614, "learning_rate": 1.1132591589830193e-05, "loss": 0.052962303161621094, "step": 4527 }, { "epoch": 0.6121710915451304, "grad_norm": 0.09569446742534637, "learning_rate": 1.1125978967252818e-05, "loss": 0.06783771514892578, "step": 4528 }, { "epoch": 0.6123062883409663, "grad_norm": 0.058598004281520844, "learning_rate": 1.1119367151267278e-05, "loss": 0.046468138694763184, "step": 4529 }, { "epoch": 0.6124414851368023, "grad_norm": 0.12105164676904678, "learning_rate": 1.1112756143250186e-05, "loss": 0.08240771293640137, "step": 4530 }, { "epoch": 0.6125766819326381, "grad_norm": 0.07010209560394287, "learning_rate": 1.1106145944577995e-05, "loss": 0.05855751037597656, "step": 4531 }, { "epoch": 0.6127118787284741, "grad_norm": 0.12166358530521393, "learning_rate": 1.1099536556626984e-05, "loss": 0.05544114112854004, "step": 4532 }, { "epoch": 0.6128470755243101, "grad_norm": 0.0898999348282814, "learning_rate": 1.1092927980773269e-05, "loss": 0.05614280700683594, "step": 4533 }, { "epoch": 0.612982272320146, "grad_norm": 0.1359388381242752, "learning_rate": 1.1086320218392777e-05, "loss": 0.06669783592224121, "step": 4534 }, { "epoch": 0.613117469115982, "grad_norm": 0.04480629414319992, "learning_rate": 1.1079713270861286e-05, "loss": 0.049057722091674805, "step": 4535 }, { "epoch": 0.6132526659118179, "grad_norm": 0.13082189857959747, "learning_rate": 1.1073107139554395e-05, "loss": 0.04634416103363037, "step": 4536 }, { "epoch": 0.6133878627076538, "grad_norm": 0.05390363559126854, "learning_rate": 1.1066501825847545e-05, "loss": 0.054264068603515625, "step": 4537 }, { "epoch": 0.6135230595034897, "grad_norm": 0.08407049626111984, "learning_rate": 1.1059897331115985e-05, "loss": 0.04398465156555176, "step": 4538 }, { "epoch": 0.6136582562993257, "grad_norm": 0.1293017566204071, "learning_rate": 1.1053293656734816e-05, "loss": 0.0730581283569336, "step": 4539 }, { "epoch": 0.6137934530951616, "grad_norm": 0.09226568043231964, "learning_rate": 1.1046690804078949e-05, "loss": 0.05648469924926758, "step": 4540 }, { "epoch": 0.6139286498909976, "grad_norm": 0.052317120134830475, "learning_rate": 1.1040088774523139e-05, "loss": 0.047637939453125, "step": 4541 }, { "epoch": 0.6140638466868336, "grad_norm": 0.07901492714881897, "learning_rate": 1.1033487569441971e-05, "loss": 0.0620880126953125, "step": 4542 }, { "epoch": 0.6141990434826695, "grad_norm": 0.09862032532691956, "learning_rate": 1.1026887190209834e-05, "loss": 0.05237436294555664, "step": 4543 }, { "epoch": 0.6143342402785054, "grad_norm": 0.06221865862607956, "learning_rate": 1.1020287638200977e-05, "loss": 0.0640573501586914, "step": 4544 }, { "epoch": 0.6144694370743413, "grad_norm": 0.05767063423991203, "learning_rate": 1.1013688914789452e-05, "loss": 0.056105852127075195, "step": 4545 }, { "epoch": 0.6146046338701773, "grad_norm": 0.055492449551820755, "learning_rate": 1.100709102134915e-05, "loss": 0.057866811752319336, "step": 4546 }, { "epoch": 0.6147398306660132, "grad_norm": 0.06277579069137573, "learning_rate": 1.10004939592538e-05, "loss": 0.05402016639709473, "step": 4547 }, { "epoch": 0.6148750274618492, "grad_norm": 0.0807250514626503, "learning_rate": 1.0993897729876927e-05, "loss": 0.0560833215713501, "step": 4548 }, { "epoch": 0.6150102242576851, "grad_norm": 0.09189426898956299, "learning_rate": 1.0987302334591915e-05, "loss": 0.04750269651412964, "step": 4549 }, { "epoch": 0.615145421053521, "grad_norm": 0.12089020758867264, "learning_rate": 1.098070777477195e-05, "loss": 0.05227017402648926, "step": 4550 }, { "epoch": 0.615280617849357, "grad_norm": 0.09360329061746597, "learning_rate": 1.0974114051790067e-05, "loss": 0.06433185935020447, "step": 4551 }, { "epoch": 0.6154158146451929, "grad_norm": 0.11711576581001282, "learning_rate": 1.09675211670191e-05, "loss": 0.0613856315612793, "step": 4552 }, { "epoch": 0.6155510114410289, "grad_norm": 0.12193598598241806, "learning_rate": 1.0960929121831732e-05, "loss": 0.044788360595703125, "step": 4553 }, { "epoch": 0.6156862082368648, "grad_norm": 0.10480480641126633, "learning_rate": 1.095433791760046e-05, "loss": 0.05182170867919922, "step": 4554 }, { "epoch": 0.6158214050327008, "grad_norm": 0.08455322682857513, "learning_rate": 1.0947747555697609e-05, "loss": 0.0470576286315918, "step": 4555 }, { "epoch": 0.6159566018285366, "grad_norm": 0.06877885013818741, "learning_rate": 1.0941158037495328e-05, "loss": 0.059427738189697266, "step": 4556 }, { "epoch": 0.6160917986243726, "grad_norm": 0.06504160910844803, "learning_rate": 1.0934569364365583e-05, "loss": 0.06248807907104492, "step": 4557 }, { "epoch": 0.6162269954202085, "grad_norm": 0.11980217695236206, "learning_rate": 1.0927981537680176e-05, "loss": 0.05568826198577881, "step": 4558 }, { "epoch": 0.6163621922160445, "grad_norm": 0.10692892968654633, "learning_rate": 1.0921394558810726e-05, "loss": 0.08085012435913086, "step": 4559 }, { "epoch": 0.6164973890118804, "grad_norm": 0.09154141694307327, "learning_rate": 1.0914808429128688e-05, "loss": 0.054042696952819824, "step": 4560 }, { "epoch": 0.6166325858077164, "grad_norm": 0.08877961337566376, "learning_rate": 1.0908223150005315e-05, "loss": 0.05475306510925293, "step": 4561 }, { "epoch": 0.6167677826035523, "grad_norm": 0.14868059754371643, "learning_rate": 1.09016387228117e-05, "loss": 0.06053924560546875, "step": 4562 }, { "epoch": 0.6169029793993882, "grad_norm": 0.08445591479539871, "learning_rate": 1.0895055148918758e-05, "loss": 0.06797933578491211, "step": 4563 }, { "epoch": 0.6170381761952242, "grad_norm": 0.10985692590475082, "learning_rate": 1.0888472429697223e-05, "loss": 0.07233119010925293, "step": 4564 }, { "epoch": 0.6171733729910601, "grad_norm": 0.14809873700141907, "learning_rate": 1.088189056651766e-05, "loss": 0.08217453956604004, "step": 4565 }, { "epoch": 0.6173085697868961, "grad_norm": 0.09050686657428741, "learning_rate": 1.0875309560750438e-05, "loss": 0.07004356384277344, "step": 4566 }, { "epoch": 0.617443766582732, "grad_norm": 0.06252928823232651, "learning_rate": 1.086872941376576e-05, "loss": 0.057417869567871094, "step": 4567 }, { "epoch": 0.6175789633785679, "grad_norm": 0.09707442671060562, "learning_rate": 1.0862150126933648e-05, "loss": 0.047460198402404785, "step": 4568 }, { "epoch": 0.6177141601744038, "grad_norm": 0.08752568811178207, "learning_rate": 1.0855571701623942e-05, "loss": 0.08448314666748047, "step": 4569 }, { "epoch": 0.6178493569702398, "grad_norm": 0.12507696449756622, "learning_rate": 1.0848994139206317e-05, "loss": 0.09156608581542969, "step": 4570 }, { "epoch": 0.6179845537660758, "grad_norm": 0.10488781332969666, "learning_rate": 1.0842417441050247e-05, "loss": 0.07348823547363281, "step": 4571 }, { "epoch": 0.6181197505619117, "grad_norm": 0.07688906788825989, "learning_rate": 1.0835841608525031e-05, "loss": 0.07751977443695068, "step": 4572 }, { "epoch": 0.6182549473577477, "grad_norm": 0.05180235207080841, "learning_rate": 1.08292666429998e-05, "loss": 0.055185794830322266, "step": 4573 }, { "epoch": 0.6183901441535835, "grad_norm": 0.11347298324108124, "learning_rate": 1.08226925458435e-05, "loss": 0.06383049488067627, "step": 4574 }, { "epoch": 0.6185253409494195, "grad_norm": 0.11888216435909271, "learning_rate": 1.0816119318424882e-05, "loss": 0.10407376289367676, "step": 4575 }, { "epoch": 0.6186605377452554, "grad_norm": 0.05760718509554863, "learning_rate": 1.0809546962112535e-05, "loss": 0.0504530668258667, "step": 4576 }, { "epoch": 0.6187957345410914, "grad_norm": 0.061022959649562836, "learning_rate": 1.0802975478274856e-05, "loss": 0.069732666015625, "step": 4577 }, { "epoch": 0.6189309313369273, "grad_norm": 0.07595275342464447, "learning_rate": 1.0796404868280062e-05, "loss": 0.08083534240722656, "step": 4578 }, { "epoch": 0.6190661281327633, "grad_norm": 0.05080515518784523, "learning_rate": 1.07898351334962e-05, "loss": 0.046851396560668945, "step": 4579 }, { "epoch": 0.6192013249285991, "grad_norm": 0.21546253561973572, "learning_rate": 1.0783266275291103e-05, "loss": 0.09590339660644531, "step": 4580 }, { "epoch": 0.6193365217244351, "grad_norm": 0.06499781459569931, "learning_rate": 1.077669829503246e-05, "loss": 0.04685091972351074, "step": 4581 }, { "epoch": 0.619471718520271, "grad_norm": 0.0577646940946579, "learning_rate": 1.077013119408775e-05, "loss": 0.05171847343444824, "step": 4582 }, { "epoch": 0.619606915316107, "grad_norm": 0.0627833753824234, "learning_rate": 1.0763564973824289e-05, "loss": 0.06477808952331543, "step": 4583 }, { "epoch": 0.619742112111943, "grad_norm": 0.08142044395208359, "learning_rate": 1.0756999635609185e-05, "loss": 0.07486128807067871, "step": 4584 }, { "epoch": 0.6198773089077789, "grad_norm": 0.04036731645464897, "learning_rate": 1.0750435180809381e-05, "loss": 0.033394813537597656, "step": 4585 }, { "epoch": 0.6200125057036148, "grad_norm": 0.08377616852521896, "learning_rate": 1.074387161079164e-05, "loss": 0.07164502143859863, "step": 4586 }, { "epoch": 0.6201477024994507, "grad_norm": 0.05724209547042847, "learning_rate": 1.0737308926922521e-05, "loss": 0.056397438049316406, "step": 4587 }, { "epoch": 0.6202828992952867, "grad_norm": 0.06017300859093666, "learning_rate": 1.0730747130568424e-05, "loss": 0.0733184814453125, "step": 4588 }, { "epoch": 0.6204180960911226, "grad_norm": 0.07337550818920135, "learning_rate": 1.0724186223095532e-05, "loss": 0.056066691875457764, "step": 4589 }, { "epoch": 0.6205532928869586, "grad_norm": 0.06339246034622192, "learning_rate": 1.071762620586987e-05, "loss": 0.06686973571777344, "step": 4590 }, { "epoch": 0.6206884896827946, "grad_norm": 0.15413643419742584, "learning_rate": 1.0711067080257273e-05, "loss": 0.051012277603149414, "step": 4591 }, { "epoch": 0.6208236864786305, "grad_norm": 0.09538403153419495, "learning_rate": 1.0704508847623374e-05, "loss": 0.06360960006713867, "step": 4592 }, { "epoch": 0.6209588832744664, "grad_norm": 0.04824884980916977, "learning_rate": 1.069795150933365e-05, "loss": 0.04549694061279297, "step": 4593 }, { "epoch": 0.6210940800703023, "grad_norm": 0.07252566516399384, "learning_rate": 1.0691395066753357e-05, "loss": 0.08558177947998047, "step": 4594 }, { "epoch": 0.6212292768661383, "grad_norm": 0.1259952038526535, "learning_rate": 1.0684839521247584e-05, "loss": 0.0791010856628418, "step": 4595 }, { "epoch": 0.6213644736619742, "grad_norm": 0.07317842543125153, "learning_rate": 1.0678284874181234e-05, "loss": 0.06471514701843262, "step": 4596 }, { "epoch": 0.6214996704578102, "grad_norm": 0.06666328758001328, "learning_rate": 1.0671731126919028e-05, "loss": 0.048066139221191406, "step": 4597 }, { "epoch": 0.6216348672536461, "grad_norm": 0.041755132377147675, "learning_rate": 1.066517828082548e-05, "loss": 0.027606487274169922, "step": 4598 }, { "epoch": 0.621770064049482, "grad_norm": 0.07600902020931244, "learning_rate": 1.0658626337264926e-05, "loss": 0.04816699028015137, "step": 4599 }, { "epoch": 0.6219052608453179, "grad_norm": 0.08347147703170776, "learning_rate": 1.0652075297601518e-05, "loss": 0.05012869834899902, "step": 4600 }, { "epoch": 0.6220404576411539, "grad_norm": 0.032536547631025314, "learning_rate": 1.0645525163199222e-05, "loss": 0.03523588180541992, "step": 4601 }, { "epoch": 0.6221756544369899, "grad_norm": 0.029898855835199356, "learning_rate": 1.063897593542181e-05, "loss": 0.039261817932128906, "step": 4602 }, { "epoch": 0.6223108512328258, "grad_norm": 0.08544182777404785, "learning_rate": 1.0632427615632864e-05, "loss": 0.05735349655151367, "step": 4603 }, { "epoch": 0.6224460480286618, "grad_norm": 0.10907492786645889, "learning_rate": 1.0625880205195776e-05, "loss": 0.06522655487060547, "step": 4604 }, { "epoch": 0.6225812448244976, "grad_norm": 0.07812304049730301, "learning_rate": 1.0619333705473754e-05, "loss": 0.06149721145629883, "step": 4605 }, { "epoch": 0.6227164416203336, "grad_norm": 0.06729117035865784, "learning_rate": 1.0612788117829821e-05, "loss": 0.06447267532348633, "step": 4606 }, { "epoch": 0.6228516384161695, "grad_norm": 0.12640045583248138, "learning_rate": 1.0606243443626792e-05, "loss": 0.06893444061279297, "step": 4607 }, { "epoch": 0.6229868352120055, "grad_norm": 0.0570698007941246, "learning_rate": 1.0599699684227313e-05, "loss": 0.03769737482070923, "step": 4608 }, { "epoch": 0.6231220320078414, "grad_norm": 0.08291788399219513, "learning_rate": 1.0593156840993818e-05, "loss": 0.07607030868530273, "step": 4609 }, { "epoch": 0.6232572288036774, "grad_norm": 0.12094594538211823, "learning_rate": 1.0586614915288571e-05, "loss": 0.06901097297668457, "step": 4610 }, { "epoch": 0.6233924255995132, "grad_norm": 0.12530755996704102, "learning_rate": 1.0580073908473641e-05, "loss": 0.09278249740600586, "step": 4611 }, { "epoch": 0.6235276223953492, "grad_norm": 0.06343226134777069, "learning_rate": 1.0573533821910885e-05, "loss": 0.05980968475341797, "step": 4612 }, { "epoch": 0.6236628191911852, "grad_norm": 0.05784863233566284, "learning_rate": 1.0566994656961997e-05, "loss": 0.04554760456085205, "step": 4613 }, { "epoch": 0.6237980159870211, "grad_norm": 0.07663862407207489, "learning_rate": 1.0560456414988456e-05, "loss": 0.08053302764892578, "step": 4614 }, { "epoch": 0.6239332127828571, "grad_norm": 0.043838586658239365, "learning_rate": 1.0553919097351564e-05, "loss": 0.03961610794067383, "step": 4615 }, { "epoch": 0.624068409578693, "grad_norm": 0.10038360953330994, "learning_rate": 1.0547382705412434e-05, "loss": 0.07228636741638184, "step": 4616 }, { "epoch": 0.6242036063745289, "grad_norm": 0.05642108991742134, "learning_rate": 1.054084724053196e-05, "loss": 0.044862300157547, "step": 4617 }, { "epoch": 0.6243388031703648, "grad_norm": 0.07895555347204208, "learning_rate": 1.0534312704070875e-05, "loss": 0.08383417129516602, "step": 4618 }, { "epoch": 0.6244739999662008, "grad_norm": 0.06675758957862854, "learning_rate": 1.0527779097389695e-05, "loss": 0.06104004383087158, "step": 4619 }, { "epoch": 0.6246091967620367, "grad_norm": 0.09647714346647263, "learning_rate": 1.0521246421848762e-05, "loss": 0.06209111213684082, "step": 4620 }, { "epoch": 0.6247443935578727, "grad_norm": 0.06091001629829407, "learning_rate": 1.0514714678808202e-05, "loss": 0.04690009355545044, "step": 4621 }, { "epoch": 0.6248795903537087, "grad_norm": 0.1445750892162323, "learning_rate": 1.0508183869627962e-05, "loss": 0.07294130325317383, "step": 4622 }, { "epoch": 0.6250147871495445, "grad_norm": 0.11366762965917587, "learning_rate": 1.0501653995667798e-05, "loss": 0.03745102882385254, "step": 4623 }, { "epoch": 0.6251499839453805, "grad_norm": 0.09455694258213043, "learning_rate": 1.0495125058287258e-05, "loss": 0.07582062482833862, "step": 4624 }, { "epoch": 0.6252851807412164, "grad_norm": 0.09008247405290604, "learning_rate": 1.0488597058845708e-05, "loss": 0.05620574951171875, "step": 4625 }, { "epoch": 0.6254203775370524, "grad_norm": 0.07328398525714874, "learning_rate": 1.0482069998702304e-05, "loss": 0.061090558767318726, "step": 4626 }, { "epoch": 0.6255555743328883, "grad_norm": 0.034278471022844315, "learning_rate": 1.0475543879216017e-05, "loss": 0.034066200256347656, "step": 4627 }, { "epoch": 0.6256907711287243, "grad_norm": 0.09474608302116394, "learning_rate": 1.0469018701745626e-05, "loss": 0.08124363422393799, "step": 4628 }, { "epoch": 0.6258259679245601, "grad_norm": 0.08546648919582367, "learning_rate": 1.0462494467649704e-05, "loss": 0.06053125858306885, "step": 4629 }, { "epoch": 0.6259611647203961, "grad_norm": 0.04647248983383179, "learning_rate": 1.045597117828663e-05, "loss": 0.03316211700439453, "step": 4630 }, { "epoch": 0.626096361516232, "grad_norm": 0.07093106210231781, "learning_rate": 1.0449448835014586e-05, "loss": 0.04970729351043701, "step": 4631 }, { "epoch": 0.626231558312068, "grad_norm": 0.1595800518989563, "learning_rate": 1.044292743919156e-05, "loss": 0.10310888290405273, "step": 4632 }, { "epoch": 0.626366755107904, "grad_norm": 0.05669156461954117, "learning_rate": 1.0436406992175343e-05, "loss": 0.05315113067626953, "step": 4633 }, { "epoch": 0.6265019519037399, "grad_norm": 0.07586748152971268, "learning_rate": 1.0429887495323532e-05, "loss": 0.05605113506317139, "step": 4634 }, { "epoch": 0.6266371486995759, "grad_norm": 0.08573278039693832, "learning_rate": 1.0423368949993512e-05, "loss": 0.07182550430297852, "step": 4635 }, { "epoch": 0.6267723454954117, "grad_norm": 0.05033598095178604, "learning_rate": 1.041685135754248e-05, "loss": 0.04836010932922363, "step": 4636 }, { "epoch": 0.6269075422912477, "grad_norm": 0.0657183974981308, "learning_rate": 1.0410334719327435e-05, "loss": 0.06773090362548828, "step": 4637 }, { "epoch": 0.6270427390870836, "grad_norm": 0.047336284071207047, "learning_rate": 1.0403819036705177e-05, "loss": 0.05188131332397461, "step": 4638 }, { "epoch": 0.6271779358829196, "grad_norm": 0.0654551237821579, "learning_rate": 1.0397304311032311e-05, "loss": 0.056011199951171875, "step": 4639 }, { "epoch": 0.6273131326787555, "grad_norm": 0.07031180709600449, "learning_rate": 1.039079054366523e-05, "loss": 0.051982879638671875, "step": 4640 }, { "epoch": 0.6274483294745915, "grad_norm": 0.10409411042928696, "learning_rate": 1.0384277735960133e-05, "loss": 0.05213010311126709, "step": 4641 }, { "epoch": 0.6275835262704273, "grad_norm": 0.09999171644449234, "learning_rate": 1.0377765889273025e-05, "loss": 0.049260616302490234, "step": 4642 }, { "epoch": 0.6277187230662633, "grad_norm": 0.09111247956752777, "learning_rate": 1.0371255004959715e-05, "loss": 0.042534828186035156, "step": 4643 }, { "epoch": 0.6278539198620993, "grad_norm": 0.052284471690654755, "learning_rate": 1.036474508437579e-05, "loss": 0.03229665756225586, "step": 4644 }, { "epoch": 0.6279891166579352, "grad_norm": 0.08931080996990204, "learning_rate": 1.035823612887666e-05, "loss": 0.05841207504272461, "step": 4645 }, { "epoch": 0.6281243134537712, "grad_norm": 0.06335139274597168, "learning_rate": 1.0351728139817517e-05, "loss": 0.04215601086616516, "step": 4646 }, { "epoch": 0.6282595102496071, "grad_norm": 0.04505191743373871, "learning_rate": 1.0345221118553362e-05, "loss": 0.04367262125015259, "step": 4647 }, { "epoch": 0.628394707045443, "grad_norm": 0.10204634815454483, "learning_rate": 1.0338715066439002e-05, "loss": 0.06146645545959473, "step": 4648 }, { "epoch": 0.6285299038412789, "grad_norm": 0.18705184757709503, "learning_rate": 1.0332209984829013e-05, "loss": 0.07594788074493408, "step": 4649 }, { "epoch": 0.6286651006371149, "grad_norm": 0.04279199615120888, "learning_rate": 1.03257058750778e-05, "loss": 0.04190528392791748, "step": 4650 }, { "epoch": 0.6288002974329508, "grad_norm": 0.12377715110778809, "learning_rate": 1.0319202738539548e-05, "loss": 0.056028127670288086, "step": 4651 }, { "epoch": 0.6289354942287868, "grad_norm": 0.04761936143040657, "learning_rate": 1.0312700576568253e-05, "loss": 0.05487704277038574, "step": 4652 }, { "epoch": 0.6290706910246228, "grad_norm": 0.18021810054779053, "learning_rate": 1.0306199390517688e-05, "loss": 0.08748221397399902, "step": 4653 }, { "epoch": 0.6292058878204586, "grad_norm": 0.06973965466022491, "learning_rate": 1.0299699181741439e-05, "loss": 0.05473470687866211, "step": 4654 }, { "epoch": 0.6293410846162946, "grad_norm": 0.06991768628358841, "learning_rate": 1.0293199951592889e-05, "loss": 0.058176517486572266, "step": 4655 }, { "epoch": 0.6294762814121305, "grad_norm": 0.07701300084590912, "learning_rate": 1.0286701701425206e-05, "loss": 0.06687712669372559, "step": 4656 }, { "epoch": 0.6296114782079665, "grad_norm": 0.08925246447324753, "learning_rate": 1.0280204432591369e-05, "loss": 0.07634258270263672, "step": 4657 }, { "epoch": 0.6297466750038024, "grad_norm": 0.05631409212946892, "learning_rate": 1.0273708146444133e-05, "loss": 0.07979965209960938, "step": 4658 }, { "epoch": 0.6298818717996384, "grad_norm": 0.06966651231050491, "learning_rate": 1.0267212844336062e-05, "loss": 0.07070302963256836, "step": 4659 }, { "epoch": 0.6300170685954742, "grad_norm": 0.10738211870193481, "learning_rate": 1.026071852761952e-05, "loss": 0.09098625183105469, "step": 4660 }, { "epoch": 0.6301522653913102, "grad_norm": 0.06925952434539795, "learning_rate": 1.025422519764665e-05, "loss": 0.043718695640563965, "step": 4661 }, { "epoch": 0.6302874621871462, "grad_norm": 0.11434145271778107, "learning_rate": 1.024773285576941e-05, "loss": 0.06419801712036133, "step": 4662 }, { "epoch": 0.6304226589829821, "grad_norm": 0.12941968441009521, "learning_rate": 1.0241241503339524e-05, "loss": 0.045635223388671875, "step": 4663 }, { "epoch": 0.6305578557788181, "grad_norm": 0.04785580933094025, "learning_rate": 1.023475114170853e-05, "loss": 0.059967994689941406, "step": 4664 }, { "epoch": 0.630693052574654, "grad_norm": 0.08671826124191284, "learning_rate": 1.0228261772227768e-05, "loss": 0.06833600997924805, "step": 4665 }, { "epoch": 0.6308282493704899, "grad_norm": 0.0740731880068779, "learning_rate": 1.0221773396248349e-05, "loss": 0.08113479614257812, "step": 4666 }, { "epoch": 0.6309634461663258, "grad_norm": 0.07166953384876251, "learning_rate": 1.021528601512119e-05, "loss": 0.0814390778541565, "step": 4667 }, { "epoch": 0.6310986429621618, "grad_norm": 0.049806226044893265, "learning_rate": 1.0208799630196994e-05, "loss": 0.052498817443847656, "step": 4668 }, { "epoch": 0.6312338397579977, "grad_norm": 0.047655995935201645, "learning_rate": 1.0202314242826264e-05, "loss": 0.035086870193481445, "step": 4669 }, { "epoch": 0.6313690365538337, "grad_norm": 0.09570282697677612, "learning_rate": 1.0195829854359299e-05, "loss": 0.0708608627319336, "step": 4670 }, { "epoch": 0.6315042333496697, "grad_norm": 0.13582776486873627, "learning_rate": 1.0189346466146175e-05, "loss": 0.08343559503555298, "step": 4671 }, { "epoch": 0.6316394301455055, "grad_norm": 0.08645089715719223, "learning_rate": 1.018286407953677e-05, "loss": 0.06045341491699219, "step": 4672 }, { "epoch": 0.6317746269413415, "grad_norm": 0.11129418760538101, "learning_rate": 1.017638269588075e-05, "loss": 0.062251150608062744, "step": 4673 }, { "epoch": 0.6319098237371774, "grad_norm": 0.19079650938510895, "learning_rate": 1.0169902316527575e-05, "loss": 0.0704948902130127, "step": 4674 }, { "epoch": 0.6320450205330134, "grad_norm": 0.06261169165372849, "learning_rate": 1.0163422942826502e-05, "loss": 0.047031402587890625, "step": 4675 }, { "epoch": 0.6321802173288493, "grad_norm": 0.16144797205924988, "learning_rate": 1.0156944576126555e-05, "loss": 0.06773519515991211, "step": 4676 }, { "epoch": 0.6323154141246853, "grad_norm": 0.04278594255447388, "learning_rate": 1.0150467217776579e-05, "loss": 0.03280520439147949, "step": 4677 }, { "epoch": 0.6324506109205212, "grad_norm": 0.05383704602718353, "learning_rate": 1.0143990869125185e-05, "loss": 0.05450177192687988, "step": 4678 }, { "epoch": 0.6325858077163571, "grad_norm": 0.05481462553143501, "learning_rate": 1.013751553152079e-05, "loss": 0.04588794708251953, "step": 4679 }, { "epoch": 0.632721004512193, "grad_norm": 0.12561596930027008, "learning_rate": 1.0131041206311594e-05, "loss": 0.06728601455688477, "step": 4680 }, { "epoch": 0.632856201308029, "grad_norm": 0.07858829200267792, "learning_rate": 1.0124567894845578e-05, "loss": 0.0653071403503418, "step": 4681 }, { "epoch": 0.632991398103865, "grad_norm": 0.12705333530902863, "learning_rate": 1.0118095598470528e-05, "loss": 0.06161785125732422, "step": 4682 }, { "epoch": 0.6331265948997009, "grad_norm": 0.20888195931911469, "learning_rate": 1.0111624318534006e-05, "loss": 0.08056330680847168, "step": 4683 }, { "epoch": 0.6332617916955369, "grad_norm": 0.07739520817995071, "learning_rate": 1.0105154056383377e-05, "loss": 0.08420372009277344, "step": 4684 }, { "epoch": 0.6333969884913727, "grad_norm": 0.07199637591838837, "learning_rate": 1.0098684813365764e-05, "loss": 0.060961246490478516, "step": 4685 }, { "epoch": 0.6335321852872087, "grad_norm": 0.10835880786180496, "learning_rate": 1.0092216590828115e-05, "loss": 0.050191402435302734, "step": 4686 }, { "epoch": 0.6336673820830446, "grad_norm": 0.07479794323444366, "learning_rate": 1.0085749390117146e-05, "loss": 0.07352566719055176, "step": 4687 }, { "epoch": 0.6338025788788806, "grad_norm": 0.08301626145839691, "learning_rate": 1.0079283212579354e-05, "loss": 0.06337213516235352, "step": 4688 }, { "epoch": 0.6339377756747165, "grad_norm": 0.1601741462945938, "learning_rate": 1.0072818059561045e-05, "loss": 0.05661177635192871, "step": 4689 }, { "epoch": 0.6340729724705525, "grad_norm": 0.17615368962287903, "learning_rate": 1.0066353932408285e-05, "loss": 0.09593796730041504, "step": 4690 }, { "epoch": 0.6342081692663883, "grad_norm": 0.0910477340221405, "learning_rate": 1.0059890832466948e-05, "loss": 0.04729163646697998, "step": 4691 }, { "epoch": 0.6343433660622243, "grad_norm": 0.12613417208194733, "learning_rate": 1.0053428761082684e-05, "loss": 0.07843419909477234, "step": 4692 }, { "epoch": 0.6344785628580603, "grad_norm": 0.08855003118515015, "learning_rate": 1.0046967719600927e-05, "loss": 0.04972577095031738, "step": 4693 }, { "epoch": 0.6346137596538962, "grad_norm": 0.06535353511571884, "learning_rate": 1.0040507709366912e-05, "loss": 0.06455039978027344, "step": 4694 }, { "epoch": 0.6347489564497322, "grad_norm": 0.13490630686283112, "learning_rate": 1.0034048731725631e-05, "loss": 0.056884169578552246, "step": 4695 }, { "epoch": 0.6348841532455681, "grad_norm": 0.057094454765319824, "learning_rate": 1.0027590788021886e-05, "loss": 0.06293630599975586, "step": 4696 }, { "epoch": 0.635019350041404, "grad_norm": 0.08362656831741333, "learning_rate": 1.0021133879600258e-05, "loss": 0.05770599842071533, "step": 4697 }, { "epoch": 0.6351545468372399, "grad_norm": 0.04646025970578194, "learning_rate": 1.0014678007805108e-05, "loss": 0.05965447425842285, "step": 4698 }, { "epoch": 0.6352897436330759, "grad_norm": 0.08744431287050247, "learning_rate": 1.0008223173980579e-05, "loss": 0.06365203857421875, "step": 4699 }, { "epoch": 0.6354249404289118, "grad_norm": 0.05192157253623009, "learning_rate": 1.0001769379470604e-05, "loss": 0.03719377517700195, "step": 4700 }, { "epoch": 0.6355601372247478, "grad_norm": 0.05070551484823227, "learning_rate": 9.995316625618898e-06, "loss": 0.06077384948730469, "step": 4701 }, { "epoch": 0.6356953340205838, "grad_norm": 0.16018208861351013, "learning_rate": 9.988864913768962e-06, "loss": 0.07527637481689453, "step": 4702 }, { "epoch": 0.6358305308164196, "grad_norm": 0.0906892940402031, "learning_rate": 9.982414245264071e-06, "loss": 0.07805728912353516, "step": 4703 }, { "epoch": 0.6359657276122556, "grad_norm": 0.05629304423928261, "learning_rate": 9.975964621447293e-06, "loss": 0.032604217529296875, "step": 4704 }, { "epoch": 0.6361009244080915, "grad_norm": 0.07576316595077515, "learning_rate": 9.96951604366147e-06, "loss": 0.059985995292663574, "step": 4705 }, { "epoch": 0.6362361212039275, "grad_norm": 0.051404230296611786, "learning_rate": 9.963068513249233e-06, "loss": 0.06557893753051758, "step": 4706 }, { "epoch": 0.6363713179997634, "grad_norm": 0.08788328617811203, "learning_rate": 9.956622031552996e-06, "loss": 0.07783031463623047, "step": 4707 }, { "epoch": 0.6365065147955994, "grad_norm": 0.06323837488889694, "learning_rate": 9.950176599914942e-06, "loss": 0.0667886734008789, "step": 4708 }, { "epoch": 0.6366417115914352, "grad_norm": 0.09150069952011108, "learning_rate": 9.943732219677048e-06, "loss": 0.05652618408203125, "step": 4709 }, { "epoch": 0.6367769083872712, "grad_norm": 0.0968921035528183, "learning_rate": 9.93728889218107e-06, "loss": 0.05686187744140625, "step": 4710 }, { "epoch": 0.6369121051831071, "grad_norm": 0.08614809066057205, "learning_rate": 9.930846618768543e-06, "loss": 0.07626223564147949, "step": 4711 }, { "epoch": 0.6370473019789431, "grad_norm": 0.04705547168850899, "learning_rate": 9.924405400780784e-06, "loss": 0.05328822135925293, "step": 4712 }, { "epoch": 0.6371824987747791, "grad_norm": 0.05670127645134926, "learning_rate": 9.917965239558885e-06, "loss": 0.05498456954956055, "step": 4713 }, { "epoch": 0.637317695570615, "grad_norm": 0.10341071337461472, "learning_rate": 9.911526136443726e-06, "loss": 0.07686924934387207, "step": 4714 }, { "epoch": 0.6374528923664509, "grad_norm": 0.11412568390369415, "learning_rate": 9.905088092775956e-06, "loss": 0.08795738220214844, "step": 4715 }, { "epoch": 0.6375880891622868, "grad_norm": 0.1375456154346466, "learning_rate": 9.898651109896015e-06, "loss": 0.07765698432922363, "step": 4716 }, { "epoch": 0.6377232859581228, "grad_norm": 0.0671454593539238, "learning_rate": 9.892215189144123e-06, "loss": 0.03809666633605957, "step": 4717 }, { "epoch": 0.6378584827539587, "grad_norm": 0.03875022381544113, "learning_rate": 9.88578033186026e-06, "loss": 0.036783814430236816, "step": 4718 }, { "epoch": 0.6379936795497947, "grad_norm": 0.09829258173704147, "learning_rate": 9.879346539384207e-06, "loss": 0.05499982833862305, "step": 4719 }, { "epoch": 0.6381288763456306, "grad_norm": 0.07232803851366043, "learning_rate": 9.87291381305551e-06, "loss": 0.037989139556884766, "step": 4720 }, { "epoch": 0.6382640731414665, "grad_norm": 0.12916040420532227, "learning_rate": 9.866482154213502e-06, "loss": 0.0659339427947998, "step": 4721 }, { "epoch": 0.6383992699373024, "grad_norm": 0.04350670799612999, "learning_rate": 9.86005156419728e-06, "loss": 0.03740239143371582, "step": 4722 }, { "epoch": 0.6385344667331384, "grad_norm": 0.09592660516500473, "learning_rate": 9.853622044345732e-06, "loss": 0.08411908149719238, "step": 4723 }, { "epoch": 0.6386696635289744, "grad_norm": 0.09439944475889206, "learning_rate": 9.847193595997522e-06, "loss": 0.08109784126281738, "step": 4724 }, { "epoch": 0.6388048603248103, "grad_norm": 0.07170064002275467, "learning_rate": 9.840766220491078e-06, "loss": 0.07858061790466309, "step": 4725 }, { "epoch": 0.6389400571206463, "grad_norm": 0.0726851373910904, "learning_rate": 9.834339919164625e-06, "loss": 0.07430505752563477, "step": 4726 }, { "epoch": 0.6390752539164822, "grad_norm": 0.09529086947441101, "learning_rate": 9.827914693356145e-06, "loss": 0.06363117694854736, "step": 4727 }, { "epoch": 0.6392104507123181, "grad_norm": 0.054350197315216064, "learning_rate": 9.821490544403403e-06, "loss": 0.05079984664916992, "step": 4728 }, { "epoch": 0.639345647508154, "grad_norm": 0.07870441675186157, "learning_rate": 9.815067473643951e-06, "loss": 0.05780029296875, "step": 4729 }, { "epoch": 0.63948084430399, "grad_norm": 0.0771501362323761, "learning_rate": 9.808645482415097e-06, "loss": 0.06372389942407608, "step": 4730 }, { "epoch": 0.639616041099826, "grad_norm": 0.05503477156162262, "learning_rate": 9.80222457205394e-06, "loss": 0.05171859264373779, "step": 4731 }, { "epoch": 0.6397512378956619, "grad_norm": 0.3246934413909912, "learning_rate": 9.795804743897341e-06, "loss": 0.06792831420898438, "step": 4732 }, { "epoch": 0.6398864346914979, "grad_norm": 0.09514765441417694, "learning_rate": 9.789385999281948e-06, "loss": 0.06976509094238281, "step": 4733 }, { "epoch": 0.6400216314873337, "grad_norm": 0.07772054523229599, "learning_rate": 9.782968339544179e-06, "loss": 0.05619454383850098, "step": 4734 }, { "epoch": 0.6401568282831697, "grad_norm": 0.07579687237739563, "learning_rate": 9.776551766020219e-06, "loss": 0.04684770107269287, "step": 4735 }, { "epoch": 0.6402920250790056, "grad_norm": 0.0779886320233345, "learning_rate": 9.77013628004604e-06, "loss": 0.05618441104888916, "step": 4736 }, { "epoch": 0.6404272218748416, "grad_norm": 0.06135810911655426, "learning_rate": 9.763721882957371e-06, "loss": 0.042951107025146484, "step": 4737 }, { "epoch": 0.6405624186706775, "grad_norm": 0.08087282627820969, "learning_rate": 9.757308576089732e-06, "loss": 0.0594707727432251, "step": 4738 }, { "epoch": 0.6406976154665135, "grad_norm": 0.033737912774086, "learning_rate": 9.750896360778404e-06, "loss": 0.032074689865112305, "step": 4739 }, { "epoch": 0.6408328122623493, "grad_norm": 0.04383016377687454, "learning_rate": 9.744485238358448e-06, "loss": 0.06212806701660156, "step": 4740 }, { "epoch": 0.6409680090581853, "grad_norm": 0.0744631439447403, "learning_rate": 9.73807521016469e-06, "loss": 0.054350078105926514, "step": 4741 }, { "epoch": 0.6411032058540213, "grad_norm": 0.106045201420784, "learning_rate": 9.731666277531732e-06, "loss": 0.05135154724121094, "step": 4742 }, { "epoch": 0.6412384026498572, "grad_norm": 0.11143618077039719, "learning_rate": 9.725258441793947e-06, "loss": 0.060022830963134766, "step": 4743 }, { "epoch": 0.6413735994456932, "grad_norm": 0.059104084968566895, "learning_rate": 9.71885170428549e-06, "loss": 0.04802215099334717, "step": 4744 }, { "epoch": 0.6415087962415291, "grad_norm": 0.11476949602365494, "learning_rate": 9.712446066340265e-06, "loss": 0.06240034103393555, "step": 4745 }, { "epoch": 0.641643993037365, "grad_norm": 0.06269510835409164, "learning_rate": 9.70604152929197e-06, "loss": 0.04493749141693115, "step": 4746 }, { "epoch": 0.6417791898332009, "grad_norm": 0.059973087161779404, "learning_rate": 9.699638094474054e-06, "loss": 0.0621793270111084, "step": 4747 }, { "epoch": 0.6419143866290369, "grad_norm": 0.04422726109623909, "learning_rate": 9.693235763219752e-06, "loss": 0.04668474197387695, "step": 4748 }, { "epoch": 0.6420495834248728, "grad_norm": 0.07597751170396805, "learning_rate": 9.68683453686207e-06, "loss": 0.0742950439453125, "step": 4749 }, { "epoch": 0.6421847802207088, "grad_norm": 0.0901431292295456, "learning_rate": 9.680434416733763e-06, "loss": 0.06837058067321777, "step": 4750 }, { "epoch": 0.6423199770165448, "grad_norm": 0.11406173557043076, "learning_rate": 9.674035404167381e-06, "loss": 0.04448425769805908, "step": 4751 }, { "epoch": 0.6424551738123806, "grad_norm": 0.09654321521520615, "learning_rate": 9.66763750049523e-06, "loss": 0.04701948165893555, "step": 4752 }, { "epoch": 0.6425903706082166, "grad_norm": 0.060939475893974304, "learning_rate": 9.66124070704939e-06, "loss": 0.05481863021850586, "step": 4753 }, { "epoch": 0.6427255674040525, "grad_norm": 0.12973052263259888, "learning_rate": 9.654845025161699e-06, "loss": 0.06844782829284668, "step": 4754 }, { "epoch": 0.6428607641998885, "grad_norm": 0.06413166970014572, "learning_rate": 9.648450456163777e-06, "loss": 0.04809761047363281, "step": 4755 }, { "epoch": 0.6429959609957244, "grad_norm": 0.05641203373670578, "learning_rate": 9.64205700138701e-06, "loss": 0.05434608459472656, "step": 4756 }, { "epoch": 0.6431311577915604, "grad_norm": 0.062255438417196274, "learning_rate": 9.635664662162548e-06, "loss": 0.06367373466491699, "step": 4757 }, { "epoch": 0.6432663545873962, "grad_norm": 0.19724592566490173, "learning_rate": 9.629273439821315e-06, "loss": 0.05742764472961426, "step": 4758 }, { "epoch": 0.6434015513832322, "grad_norm": 0.10318677127361298, "learning_rate": 9.622883335693984e-06, "loss": 0.07251644134521484, "step": 4759 }, { "epoch": 0.6435367481790681, "grad_norm": 0.1288115382194519, "learning_rate": 9.616494351111017e-06, "loss": 0.06653308868408203, "step": 4760 }, { "epoch": 0.6436719449749041, "grad_norm": 0.03664591163396835, "learning_rate": 9.610106487402637e-06, "loss": 0.04697155952453613, "step": 4761 }, { "epoch": 0.64380714177074, "grad_norm": 0.04664827510714531, "learning_rate": 9.603719745898826e-06, "loss": 0.047150611877441406, "step": 4762 }, { "epoch": 0.643942338566576, "grad_norm": 0.08848141878843307, "learning_rate": 9.597334127929346e-06, "loss": 0.07023659348487854, "step": 4763 }, { "epoch": 0.6440775353624119, "grad_norm": 0.09126049280166626, "learning_rate": 9.590949634823707e-06, "loss": 0.07262587547302246, "step": 4764 }, { "epoch": 0.6442127321582478, "grad_norm": 0.12153051048517227, "learning_rate": 9.584566267911198e-06, "loss": 0.05871462821960449, "step": 4765 }, { "epoch": 0.6443479289540838, "grad_norm": 0.1530318707227707, "learning_rate": 9.578184028520874e-06, "loss": 0.05263471603393555, "step": 4766 }, { "epoch": 0.6444831257499197, "grad_norm": 0.06433980911970139, "learning_rate": 9.571802917981548e-06, "loss": 0.05490922927856445, "step": 4767 }, { "epoch": 0.6446183225457557, "grad_norm": 0.050201933830976486, "learning_rate": 9.565422937621798e-06, "loss": 0.03552103042602539, "step": 4768 }, { "epoch": 0.6447535193415916, "grad_norm": 0.07437662780284882, "learning_rate": 9.559044088769971e-06, "loss": 0.07383871078491211, "step": 4769 }, { "epoch": 0.6448887161374276, "grad_norm": 0.05894550308585167, "learning_rate": 9.552666372754182e-06, "loss": 0.05084085464477539, "step": 4770 }, { "epoch": 0.6450239129332634, "grad_norm": 0.05779692530632019, "learning_rate": 9.546289790902307e-06, "loss": 0.03959548473358154, "step": 4771 }, { "epoch": 0.6451591097290994, "grad_norm": 0.09428199380636215, "learning_rate": 9.539914344541976e-06, "loss": 0.07280898094177246, "step": 4772 }, { "epoch": 0.6452943065249354, "grad_norm": 0.0767393484711647, "learning_rate": 9.533540035000598e-06, "loss": 0.055715084075927734, "step": 4773 }, { "epoch": 0.6454295033207713, "grad_norm": 0.07694047689437866, "learning_rate": 9.52716686360533e-06, "loss": 0.0643010139465332, "step": 4774 }, { "epoch": 0.6455647001166073, "grad_norm": 0.09121976047754288, "learning_rate": 9.520794831683108e-06, "loss": 0.0471339225769043, "step": 4775 }, { "epoch": 0.6456998969124432, "grad_norm": 0.08535441756248474, "learning_rate": 9.514423940560627e-06, "loss": 0.07198810577392578, "step": 4776 }, { "epoch": 0.6458350937082791, "grad_norm": 0.17966727912425995, "learning_rate": 9.508054191564326e-06, "loss": 0.07330751419067383, "step": 4777 }, { "epoch": 0.645970290504115, "grad_norm": 0.049681130796670914, "learning_rate": 9.501685586020434e-06, "loss": 0.055103421211242676, "step": 4778 }, { "epoch": 0.646105487299951, "grad_norm": 0.07365203648805618, "learning_rate": 9.495318125254919e-06, "loss": 0.06869173049926758, "step": 4779 }, { "epoch": 0.6462406840957869, "grad_norm": 0.15162532031536102, "learning_rate": 9.488951810593527e-06, "loss": 0.0923604965209961, "step": 4780 }, { "epoch": 0.6463758808916229, "grad_norm": 0.04366796091198921, "learning_rate": 9.48258664336176e-06, "loss": 0.04380369186401367, "step": 4781 }, { "epoch": 0.6465110776874589, "grad_norm": 0.1019766628742218, "learning_rate": 9.476222624884873e-06, "loss": 0.05139732360839844, "step": 4782 }, { "epoch": 0.6466462744832947, "grad_norm": 0.11347633600234985, "learning_rate": 9.469859756487893e-06, "loss": 0.06167197227478027, "step": 4783 }, { "epoch": 0.6467814712791307, "grad_norm": 0.11291565746068954, "learning_rate": 9.463498039495598e-06, "loss": 0.06554603576660156, "step": 4784 }, { "epoch": 0.6469166680749666, "grad_norm": 0.21300140023231506, "learning_rate": 9.457137475232537e-06, "loss": 0.08475494384765625, "step": 4785 }, { "epoch": 0.6470518648708026, "grad_norm": 0.1279429942369461, "learning_rate": 9.450778065023019e-06, "loss": 0.06005287170410156, "step": 4786 }, { "epoch": 0.6471870616666385, "grad_norm": 0.05140487477183342, "learning_rate": 9.444419810191091e-06, "loss": 0.0693502426147461, "step": 4787 }, { "epoch": 0.6473222584624745, "grad_norm": 0.10644088685512543, "learning_rate": 9.43806271206059e-06, "loss": 0.06517815589904785, "step": 4788 }, { "epoch": 0.6474574552583103, "grad_norm": 0.07961923629045486, "learning_rate": 9.431706771955089e-06, "loss": 0.05404472351074219, "step": 4789 }, { "epoch": 0.6475926520541463, "grad_norm": 0.07635562121868134, "learning_rate": 9.425351991197937e-06, "loss": 0.08744347095489502, "step": 4790 }, { "epoch": 0.6477278488499822, "grad_norm": 0.16630281507968903, "learning_rate": 9.418998371112221e-06, "loss": 0.05486869812011719, "step": 4791 }, { "epoch": 0.6478630456458182, "grad_norm": 0.10429415106773376, "learning_rate": 9.412645913020807e-06, "loss": 0.07294023036956787, "step": 4792 }, { "epoch": 0.6479982424416542, "grad_norm": 0.07620232552289963, "learning_rate": 9.406294618246313e-06, "loss": 0.06562268733978271, "step": 4793 }, { "epoch": 0.6481334392374901, "grad_norm": 0.10241834074258804, "learning_rate": 9.399944488111103e-06, "loss": 0.03882145881652832, "step": 4794 }, { "epoch": 0.648268636033326, "grad_norm": 0.07540863007307053, "learning_rate": 9.39359552393732e-06, "loss": 0.056829214096069336, "step": 4795 }, { "epoch": 0.6484038328291619, "grad_norm": 0.06442712992429733, "learning_rate": 9.387247727046845e-06, "loss": 0.06419014930725098, "step": 4796 }, { "epoch": 0.6485390296249979, "grad_norm": 0.07219398766756058, "learning_rate": 9.380901098761319e-06, "loss": 0.05280756950378418, "step": 4797 }, { "epoch": 0.6486742264208338, "grad_norm": 0.0553305558860302, "learning_rate": 9.374555640402153e-06, "loss": 0.05795645713806152, "step": 4798 }, { "epoch": 0.6488094232166698, "grad_norm": 0.18707898259162903, "learning_rate": 9.368211353290503e-06, "loss": 0.07580041885375977, "step": 4799 }, { "epoch": 0.6489446200125057, "grad_norm": 0.045232661068439484, "learning_rate": 9.36186823874728e-06, "loss": 0.03824043273925781, "step": 4800 }, { "epoch": 0.6490798168083416, "grad_norm": 0.10402955114841461, "learning_rate": 9.355526298093152e-06, "loss": 0.05919694900512695, "step": 4801 }, { "epoch": 0.6492150136041775, "grad_norm": 0.10305744409561157, "learning_rate": 9.34918553264855e-06, "loss": 0.07936334609985352, "step": 4802 }, { "epoch": 0.6493502104000135, "grad_norm": 0.08143419772386551, "learning_rate": 9.342845943733658e-06, "loss": 0.07880687713623047, "step": 4803 }, { "epoch": 0.6494854071958495, "grad_norm": 0.07584871351718903, "learning_rate": 9.336507532668407e-06, "loss": 0.05534029006958008, "step": 4804 }, { "epoch": 0.6496206039916854, "grad_norm": 0.08206778019666672, "learning_rate": 9.33017030077249e-06, "loss": 0.06379318237304688, "step": 4805 }, { "epoch": 0.6497558007875214, "grad_norm": 0.116139255464077, "learning_rate": 9.323834249365346e-06, "loss": 0.04954695701599121, "step": 4806 }, { "epoch": 0.6498909975833572, "grad_norm": 0.1587880253791809, "learning_rate": 9.317499379766183e-06, "loss": 0.07483458518981934, "step": 4807 }, { "epoch": 0.6500261943791932, "grad_norm": 0.1243368610739708, "learning_rate": 9.311165693293954e-06, "loss": 0.06823205947875977, "step": 4808 }, { "epoch": 0.6501613911750291, "grad_norm": 0.05966208875179291, "learning_rate": 9.304833191267364e-06, "loss": 0.054343223571777344, "step": 4809 }, { "epoch": 0.6502965879708651, "grad_norm": 0.10272936522960663, "learning_rate": 9.298501875004874e-06, "loss": 0.0591428279876709, "step": 4810 }, { "epoch": 0.650431784766701, "grad_norm": 0.0742470920085907, "learning_rate": 9.292171745824695e-06, "loss": 0.05108910799026489, "step": 4811 }, { "epoch": 0.650566981562537, "grad_norm": 0.07919621467590332, "learning_rate": 9.285842805044797e-06, "loss": 0.07143473625183105, "step": 4812 }, { "epoch": 0.6507021783583729, "grad_norm": 0.06798703223466873, "learning_rate": 9.279515053982905e-06, "loss": 0.054782867431640625, "step": 4813 }, { "epoch": 0.6508373751542088, "grad_norm": 0.10217496752738953, "learning_rate": 9.273188493956476e-06, "loss": 0.043814897537231445, "step": 4814 }, { "epoch": 0.6509725719500448, "grad_norm": 0.08980832248926163, "learning_rate": 9.266863126282746e-06, "loss": 0.060454368591308594, "step": 4815 }, { "epoch": 0.6511077687458807, "grad_norm": 0.09616213291883469, "learning_rate": 9.260538952278683e-06, "loss": 0.03709614276885986, "step": 4816 }, { "epoch": 0.6512429655417167, "grad_norm": 0.13874880969524384, "learning_rate": 9.254215973261014e-06, "loss": 0.09170341491699219, "step": 4817 }, { "epoch": 0.6513781623375526, "grad_norm": 0.2109307050704956, "learning_rate": 9.247894190546228e-06, "loss": 0.06992048025131226, "step": 4818 }, { "epoch": 0.6515133591333886, "grad_norm": 0.12446895986795425, "learning_rate": 9.241573605450539e-06, "loss": 0.07599878311157227, "step": 4819 }, { "epoch": 0.6516485559292244, "grad_norm": 0.06491612643003464, "learning_rate": 9.235254219289937e-06, "loss": 0.04820096492767334, "step": 4820 }, { "epoch": 0.6517837527250604, "grad_norm": 0.07671085000038147, "learning_rate": 9.228936033380143e-06, "loss": 0.07025575637817383, "step": 4821 }, { "epoch": 0.6519189495208964, "grad_norm": 0.08806153386831284, "learning_rate": 9.222619049036649e-06, "loss": 0.0580744743347168, "step": 4822 }, { "epoch": 0.6520541463167323, "grad_norm": 0.09657975286245346, "learning_rate": 9.216303267574674e-06, "loss": 0.07407474517822266, "step": 4823 }, { "epoch": 0.6521893431125683, "grad_norm": 0.05526379495859146, "learning_rate": 9.209988690309198e-06, "loss": 0.050994873046875, "step": 4824 }, { "epoch": 0.6523245399084042, "grad_norm": 0.12516328692436218, "learning_rate": 9.203675318554956e-06, "loss": 0.0677499771118164, "step": 4825 }, { "epoch": 0.6524597367042401, "grad_norm": 0.04505413398146629, "learning_rate": 9.19736315362642e-06, "loss": 0.05249142646789551, "step": 4826 }, { "epoch": 0.652594933500076, "grad_norm": 0.10846035927534103, "learning_rate": 9.191052196837825e-06, "loss": 0.05478870868682861, "step": 4827 }, { "epoch": 0.652730130295912, "grad_norm": 0.1005919873714447, "learning_rate": 9.184742449503135e-06, "loss": 0.05190730094909668, "step": 4828 }, { "epoch": 0.6528653270917479, "grad_norm": 0.08012963831424713, "learning_rate": 9.178433912936077e-06, "loss": 0.0807640552520752, "step": 4829 }, { "epoch": 0.6530005238875839, "grad_norm": 0.03739846125245094, "learning_rate": 9.172126588450125e-06, "loss": 0.035629332065582275, "step": 4830 }, { "epoch": 0.6531357206834199, "grad_norm": 0.07629495114088058, "learning_rate": 9.165820477358491e-06, "loss": 0.052832841873168945, "step": 4831 }, { "epoch": 0.6532709174792557, "grad_norm": 0.05624593794345856, "learning_rate": 9.159515580974154e-06, "loss": 0.04668146371841431, "step": 4832 }, { "epoch": 0.6534061142750917, "grad_norm": 0.09051258116960526, "learning_rate": 9.15321190060981e-06, "loss": 0.05543529987335205, "step": 4833 }, { "epoch": 0.6535413110709276, "grad_norm": 0.08451498299837112, "learning_rate": 9.14690943757793e-06, "loss": 0.05232357978820801, "step": 4834 }, { "epoch": 0.6536765078667636, "grad_norm": 0.07455456256866455, "learning_rate": 9.14060819319072e-06, "loss": 0.0569191575050354, "step": 4835 }, { "epoch": 0.6538117046625995, "grad_norm": 0.06281017512083054, "learning_rate": 9.134308168760127e-06, "loss": 0.051880478858947754, "step": 4836 }, { "epoch": 0.6539469014584355, "grad_norm": 0.08092804998159409, "learning_rate": 9.128009365597854e-06, "loss": 0.0421660840511322, "step": 4837 }, { "epoch": 0.6540820982542713, "grad_norm": 0.06607811152935028, "learning_rate": 9.121711785015342e-06, "loss": 0.04873204231262207, "step": 4838 }, { "epoch": 0.6542172950501073, "grad_norm": 0.044848911464214325, "learning_rate": 9.115415428323787e-06, "loss": 0.042281270027160645, "step": 4839 }, { "epoch": 0.6543524918459432, "grad_norm": 0.11027204990386963, "learning_rate": 9.109120296834118e-06, "loss": 0.08812141418457031, "step": 4840 }, { "epoch": 0.6544876886417792, "grad_norm": 0.09283500164747238, "learning_rate": 9.10282639185702e-06, "loss": 0.07068729400634766, "step": 4841 }, { "epoch": 0.6546228854376152, "grad_norm": 0.09839151054620743, "learning_rate": 9.096533714702913e-06, "loss": 0.060366153717041016, "step": 4842 }, { "epoch": 0.6547580822334511, "grad_norm": 0.06751798093318939, "learning_rate": 9.090242266681967e-06, "loss": 0.05329442024230957, "step": 4843 }, { "epoch": 0.654893279029287, "grad_norm": 0.0747271478176117, "learning_rate": 9.083952049104094e-06, "loss": 0.05246615409851074, "step": 4844 }, { "epoch": 0.6550284758251229, "grad_norm": 0.1263909637928009, "learning_rate": 9.07766306327896e-06, "loss": 0.0943707823753357, "step": 4845 }, { "epoch": 0.6551636726209589, "grad_norm": 0.08197560161352158, "learning_rate": 9.071375310515949e-06, "loss": 0.060854434967041016, "step": 4846 }, { "epoch": 0.6552988694167948, "grad_norm": 0.06990229338407516, "learning_rate": 9.065088792124219e-06, "loss": 0.03728199005126953, "step": 4847 }, { "epoch": 0.6554340662126308, "grad_norm": 0.093485027551651, "learning_rate": 9.058803509412647e-06, "loss": 0.07423925399780273, "step": 4848 }, { "epoch": 0.6555692630084667, "grad_norm": 0.09205955266952515, "learning_rate": 9.05251946368987e-06, "loss": 0.05419516563415527, "step": 4849 }, { "epoch": 0.6557044598043026, "grad_norm": 0.05168626829981804, "learning_rate": 9.046236656264258e-06, "loss": 0.05808258056640625, "step": 4850 }, { "epoch": 0.6558396566001385, "grad_norm": 0.0883084386587143, "learning_rate": 9.03995508844392e-06, "loss": 0.04830360412597656, "step": 4851 }, { "epoch": 0.6559748533959745, "grad_norm": 0.06716379523277283, "learning_rate": 9.033674761536718e-06, "loss": 0.07019400596618652, "step": 4852 }, { "epoch": 0.6561100501918105, "grad_norm": 0.03799884393811226, "learning_rate": 9.027395676850244e-06, "loss": 0.04934489727020264, "step": 4853 }, { "epoch": 0.6562452469876464, "grad_norm": 0.06861934065818787, "learning_rate": 9.02111783569184e-06, "loss": 0.06679368019104004, "step": 4854 }, { "epoch": 0.6563804437834824, "grad_norm": 0.09711926430463791, "learning_rate": 9.014841239368591e-06, "loss": 0.06822347640991211, "step": 4855 }, { "epoch": 0.6565156405793182, "grad_norm": 0.05366342514753342, "learning_rate": 9.008565889187308e-06, "loss": 0.05695343017578125, "step": 4856 }, { "epoch": 0.6566508373751542, "grad_norm": 0.11509646475315094, "learning_rate": 9.00229178645456e-06, "loss": 0.0469319224357605, "step": 4857 }, { "epoch": 0.6567860341709901, "grad_norm": 0.0437554195523262, "learning_rate": 8.996018932476641e-06, "loss": 0.03499004244804382, "step": 4858 }, { "epoch": 0.6569212309668261, "grad_norm": 0.06808724999427795, "learning_rate": 8.989747328559606e-06, "loss": 0.05707836151123047, "step": 4859 }, { "epoch": 0.657056427762662, "grad_norm": 0.06786102801561356, "learning_rate": 8.98347697600922e-06, "loss": 0.06307046115398407, "step": 4860 }, { "epoch": 0.657191624558498, "grad_norm": 0.05690736323595047, "learning_rate": 8.977207876131013e-06, "loss": 0.05515623092651367, "step": 4861 }, { "epoch": 0.657326821354334, "grad_norm": 0.09086936712265015, "learning_rate": 8.970940030230245e-06, "loss": 0.053468942642211914, "step": 4862 }, { "epoch": 0.6574620181501698, "grad_norm": 0.05574815720319748, "learning_rate": 8.96467343961191e-06, "loss": 0.05734062194824219, "step": 4863 }, { "epoch": 0.6575972149460058, "grad_norm": 0.12143168598413467, "learning_rate": 8.958408105580759e-06, "loss": 0.06808352470397949, "step": 4864 }, { "epoch": 0.6577324117418417, "grad_norm": 0.09449589997529984, "learning_rate": 8.952144029441248e-06, "loss": 0.07207393646240234, "step": 4865 }, { "epoch": 0.6578676085376777, "grad_norm": 0.12222263962030411, "learning_rate": 8.945881212497603e-06, "loss": 0.04599571228027344, "step": 4866 }, { "epoch": 0.6580028053335136, "grad_norm": 0.06164616718888283, "learning_rate": 8.939619656053777e-06, "loss": 0.06098055839538574, "step": 4867 }, { "epoch": 0.6581380021293496, "grad_norm": 0.1222454309463501, "learning_rate": 8.933359361413456e-06, "loss": 0.09478718042373657, "step": 4868 }, { "epoch": 0.6582731989251854, "grad_norm": 0.09668613970279694, "learning_rate": 8.92710032988007e-06, "loss": 0.07592487335205078, "step": 4869 }, { "epoch": 0.6584083957210214, "grad_norm": 0.09866383671760559, "learning_rate": 8.920842562756773e-06, "loss": 0.08246517181396484, "step": 4870 }, { "epoch": 0.6585435925168573, "grad_norm": 0.055106084793806076, "learning_rate": 8.914586061346474e-06, "loss": 0.04721784591674805, "step": 4871 }, { "epoch": 0.6586787893126933, "grad_norm": 0.06283453851938248, "learning_rate": 8.908330826951811e-06, "loss": 0.06782341003417969, "step": 4872 }, { "epoch": 0.6588139861085293, "grad_norm": 0.0724920928478241, "learning_rate": 8.902076860875155e-06, "loss": 0.04080760478973389, "step": 4873 }, { "epoch": 0.6589491829043652, "grad_norm": 0.1315796822309494, "learning_rate": 8.895824164418615e-06, "loss": 0.06891417503356934, "step": 4874 }, { "epoch": 0.6590843797002011, "grad_norm": 0.15078124403953552, "learning_rate": 8.889572738884033e-06, "loss": 0.06255936622619629, "step": 4875 }, { "epoch": 0.659219576496037, "grad_norm": 0.0863567441701889, "learning_rate": 8.88332258557299e-06, "loss": 0.07738113403320312, "step": 4876 }, { "epoch": 0.659354773291873, "grad_norm": 0.04448531195521355, "learning_rate": 8.877073705786806e-06, "loss": 0.0442737340927124, "step": 4877 }, { "epoch": 0.6594899700877089, "grad_norm": 0.17885813117027283, "learning_rate": 8.870826100826527e-06, "loss": 0.07945680618286133, "step": 4878 }, { "epoch": 0.6596251668835449, "grad_norm": 0.038998864591121674, "learning_rate": 8.86457977199294e-06, "loss": 0.031073331832885742, "step": 4879 }, { "epoch": 0.6597603636793808, "grad_norm": 0.1214047223329544, "learning_rate": 8.85833472058656e-06, "loss": 0.08460617065429688, "step": 4880 }, { "epoch": 0.6598955604752167, "grad_norm": 0.1492588222026825, "learning_rate": 8.852090947907643e-06, "loss": 0.053884267807006836, "step": 4881 }, { "epoch": 0.6600307572710526, "grad_norm": 0.046563033014535904, "learning_rate": 8.84584845525618e-06, "loss": 0.05501675605773926, "step": 4882 }, { "epoch": 0.6601659540668886, "grad_norm": 0.08040779829025269, "learning_rate": 8.83960724393188e-06, "loss": 0.08896112442016602, "step": 4883 }, { "epoch": 0.6603011508627246, "grad_norm": 0.05154357850551605, "learning_rate": 8.833367315234206e-06, "loss": 0.05368471145629883, "step": 4884 }, { "epoch": 0.6604363476585605, "grad_norm": 0.05111148953437805, "learning_rate": 8.82712867046234e-06, "loss": 0.03934288024902344, "step": 4885 }, { "epoch": 0.6605715444543965, "grad_norm": 0.05213287100195885, "learning_rate": 8.820891310915203e-06, "loss": 0.046380043029785156, "step": 4886 }, { "epoch": 0.6607067412502323, "grad_norm": 0.06324321776628494, "learning_rate": 8.81465523789145e-06, "loss": 0.042552947998046875, "step": 4887 }, { "epoch": 0.6608419380460683, "grad_norm": 0.07806697487831116, "learning_rate": 8.808420452689455e-06, "loss": 0.06439042091369629, "step": 4888 }, { "epoch": 0.6609771348419042, "grad_norm": 0.17124710977077484, "learning_rate": 8.802186956607344e-06, "loss": 0.06840074062347412, "step": 4889 }, { "epoch": 0.6611123316377402, "grad_norm": 0.10869485884904861, "learning_rate": 8.795954750942954e-06, "loss": 0.04557037353515625, "step": 4890 }, { "epoch": 0.6612475284335761, "grad_norm": 0.09238927066326141, "learning_rate": 8.789723836993878e-06, "loss": 0.09480142593383789, "step": 4891 }, { "epoch": 0.6613827252294121, "grad_norm": 0.11820916086435318, "learning_rate": 8.783494216057407e-06, "loss": 0.06707632541656494, "step": 4892 }, { "epoch": 0.661517922025248, "grad_norm": 0.033634837716817856, "learning_rate": 8.777265889430593e-06, "loss": 0.03417396545410156, "step": 4893 }, { "epoch": 0.6616531188210839, "grad_norm": 0.15732374787330627, "learning_rate": 8.771038858410206e-06, "loss": 0.0848923921585083, "step": 4894 }, { "epoch": 0.6617883156169199, "grad_norm": 0.0708540603518486, "learning_rate": 8.764813124292744e-06, "loss": 0.05016779899597168, "step": 4895 }, { "epoch": 0.6619235124127558, "grad_norm": 0.058807123452425, "learning_rate": 8.758588688374445e-06, "loss": 0.054766178131103516, "step": 4896 }, { "epoch": 0.6620587092085918, "grad_norm": 0.11723329871892929, "learning_rate": 8.752365551951262e-06, "loss": 0.1027536392211914, "step": 4897 }, { "epoch": 0.6621939060044277, "grad_norm": 0.0490725077688694, "learning_rate": 8.74614371631888e-06, "loss": 0.03446251153945923, "step": 4898 }, { "epoch": 0.6623291028002636, "grad_norm": 0.07183082401752472, "learning_rate": 8.739923182772732e-06, "loss": 0.05862855911254883, "step": 4899 }, { "epoch": 0.6624642995960995, "grad_norm": 0.07355359196662903, "learning_rate": 8.733703952607956e-06, "loss": 0.0664055347442627, "step": 4900 }, { "epoch": 0.6625994963919355, "grad_norm": 0.042284250259399414, "learning_rate": 8.727486027119443e-06, "loss": 0.04697680473327637, "step": 4901 }, { "epoch": 0.6627346931877715, "grad_norm": 0.09456198662519455, "learning_rate": 8.721269407601783e-06, "loss": 0.08613348007202148, "step": 4902 }, { "epoch": 0.6628698899836074, "grad_norm": 0.04701028764247894, "learning_rate": 8.71505409534931e-06, "loss": 0.042190492153167725, "step": 4903 }, { "epoch": 0.6630050867794434, "grad_norm": 0.0853782370686531, "learning_rate": 8.708840091656093e-06, "loss": 0.05188995599746704, "step": 4904 }, { "epoch": 0.6631402835752793, "grad_norm": 0.0897306501865387, "learning_rate": 8.70262739781592e-06, "loss": 0.09391683340072632, "step": 4905 }, { "epoch": 0.6632754803711152, "grad_norm": 0.08579965680837631, "learning_rate": 8.696416015122302e-06, "loss": 0.05850362777709961, "step": 4906 }, { "epoch": 0.6634106771669511, "grad_norm": 0.06838425993919373, "learning_rate": 8.690205944868487e-06, "loss": 0.04645967483520508, "step": 4907 }, { "epoch": 0.6635458739627871, "grad_norm": 0.06270729005336761, "learning_rate": 8.683997188347436e-06, "loss": 0.04364466667175293, "step": 4908 }, { "epoch": 0.663681070758623, "grad_norm": 0.14725305140018463, "learning_rate": 8.677789746851855e-06, "loss": 0.07762765884399414, "step": 4909 }, { "epoch": 0.663816267554459, "grad_norm": 0.07702852040529251, "learning_rate": 8.671583621674167e-06, "loss": 0.04913806915283203, "step": 4910 }, { "epoch": 0.663951464350295, "grad_norm": 0.04927091673016548, "learning_rate": 8.665378814106512e-06, "loss": 0.049920082092285156, "step": 4911 }, { "epoch": 0.6640866611461308, "grad_norm": 0.09376531839370728, "learning_rate": 8.65917532544077e-06, "loss": 0.057477474212646484, "step": 4912 }, { "epoch": 0.6642218579419668, "grad_norm": 0.06192830204963684, "learning_rate": 8.652973156968532e-06, "loss": 0.04173433780670166, "step": 4913 }, { "epoch": 0.6643570547378027, "grad_norm": 0.13013476133346558, "learning_rate": 8.646772309981141e-06, "loss": 0.08666729927062988, "step": 4914 }, { "epoch": 0.6644922515336387, "grad_norm": 0.0691540464758873, "learning_rate": 8.640572785769624e-06, "loss": 0.07049989700317383, "step": 4915 }, { "epoch": 0.6646274483294746, "grad_norm": 0.10392516106367111, "learning_rate": 8.63437458562477e-06, "loss": 0.037362098693847656, "step": 4916 }, { "epoch": 0.6647626451253106, "grad_norm": 0.11799994111061096, "learning_rate": 8.628177710837068e-06, "loss": 0.09870147705078125, "step": 4917 }, { "epoch": 0.6648978419211464, "grad_norm": 0.10603231191635132, "learning_rate": 8.621982162696752e-06, "loss": 0.08280134201049805, "step": 4918 }, { "epoch": 0.6650330387169824, "grad_norm": 0.11542531847953796, "learning_rate": 8.615787942493766e-06, "loss": 0.03516983985900879, "step": 4919 }, { "epoch": 0.6651682355128183, "grad_norm": 0.07931727170944214, "learning_rate": 8.609595051517765e-06, "loss": 0.07717704772949219, "step": 4920 }, { "epoch": 0.6653034323086543, "grad_norm": 0.09853526204824448, "learning_rate": 8.603403491058157e-06, "loss": 0.0735853910446167, "step": 4921 }, { "epoch": 0.6654386291044903, "grad_norm": 0.06230878457427025, "learning_rate": 8.597213262404046e-06, "loss": 0.05725669860839844, "step": 4922 }, { "epoch": 0.6655738259003262, "grad_norm": 0.048624128103256226, "learning_rate": 8.591024366844291e-06, "loss": 0.044798851013183594, "step": 4923 }, { "epoch": 0.6657090226961621, "grad_norm": 0.1617489457130432, "learning_rate": 8.584836805667434e-06, "loss": 0.06394720077514648, "step": 4924 }, { "epoch": 0.665844219491998, "grad_norm": 0.0880943313241005, "learning_rate": 8.578650580161754e-06, "loss": 0.07224702835083008, "step": 4925 }, { "epoch": 0.665979416287834, "grad_norm": 0.10280217230319977, "learning_rate": 8.572465691615275e-06, "loss": 0.06000781059265137, "step": 4926 }, { "epoch": 0.6661146130836699, "grad_norm": 0.055835746228694916, "learning_rate": 8.56628214131571e-06, "loss": 0.0520939826965332, "step": 4927 }, { "epoch": 0.6662498098795059, "grad_norm": 0.1812639683485031, "learning_rate": 8.560099930550523e-06, "loss": 0.08518028259277344, "step": 4928 }, { "epoch": 0.6663850066753418, "grad_norm": 0.07087575644254684, "learning_rate": 8.553919060606866e-06, "loss": 0.03808313608169556, "step": 4929 }, { "epoch": 0.6665202034711777, "grad_norm": 0.0676916167140007, "learning_rate": 8.54773953277163e-06, "loss": 0.06779193878173828, "step": 4930 }, { "epoch": 0.6666554002670136, "grad_norm": 0.12563782930374146, "learning_rate": 8.541561348331433e-06, "loss": 0.08685779571533203, "step": 4931 }, { "epoch": 0.6667905970628496, "grad_norm": 0.1221809834241867, "learning_rate": 8.535384508572603e-06, "loss": 0.08554840087890625, "step": 4932 }, { "epoch": 0.6669257938586856, "grad_norm": 0.06465268135070801, "learning_rate": 8.529209014781202e-06, "loss": 0.0711984634399414, "step": 4933 }, { "epoch": 0.6670609906545215, "grad_norm": 0.0423555001616478, "learning_rate": 8.523034868242984e-06, "loss": 0.04691123962402344, "step": 4934 }, { "epoch": 0.6671961874503575, "grad_norm": 0.07108409702777863, "learning_rate": 8.51686207024344e-06, "loss": 0.07406044006347656, "step": 4935 }, { "epoch": 0.6673313842461933, "grad_norm": 0.04676260054111481, "learning_rate": 8.510690622067792e-06, "loss": 0.03845500946044922, "step": 4936 }, { "epoch": 0.6674665810420293, "grad_norm": 0.09336749464273453, "learning_rate": 8.50452052500096e-06, "loss": 0.06414878368377686, "step": 4937 }, { "epoch": 0.6676017778378652, "grad_norm": 0.0990876704454422, "learning_rate": 8.498351780327594e-06, "loss": 0.08987903594970703, "step": 4938 }, { "epoch": 0.6677369746337012, "grad_norm": 0.05231555551290512, "learning_rate": 8.492184389332061e-06, "loss": 0.05912339687347412, "step": 4939 }, { "epoch": 0.6678721714295371, "grad_norm": 0.061920735985040665, "learning_rate": 8.486018353298432e-06, "loss": 0.08337268233299255, "step": 4940 }, { "epoch": 0.6680073682253731, "grad_norm": 0.044133953750133514, "learning_rate": 8.479853673510528e-06, "loss": 0.04243624210357666, "step": 4941 }, { "epoch": 0.668142565021209, "grad_norm": 0.11150945723056793, "learning_rate": 8.473690351251855e-06, "loss": 0.04516524076461792, "step": 4942 }, { "epoch": 0.6682777618170449, "grad_norm": 0.04301859810948372, "learning_rate": 8.467528387805656e-06, "loss": 0.03423190116882324, "step": 4943 }, { "epoch": 0.6684129586128809, "grad_norm": 0.0725431889295578, "learning_rate": 8.461367784454881e-06, "loss": 0.050350189208984375, "step": 4944 }, { "epoch": 0.6685481554087168, "grad_norm": 0.12706536054611206, "learning_rate": 8.455208542482195e-06, "loss": 0.06219625473022461, "step": 4945 }, { "epoch": 0.6686833522045528, "grad_norm": 0.0744287371635437, "learning_rate": 8.449050663170004e-06, "loss": 0.07586669921875, "step": 4946 }, { "epoch": 0.6688185490003887, "grad_norm": 0.08442191779613495, "learning_rate": 8.442894147800387e-06, "loss": 0.048914432525634766, "step": 4947 }, { "epoch": 0.6689537457962246, "grad_norm": 0.0932261198759079, "learning_rate": 8.436738997655184e-06, "loss": 0.05852067470550537, "step": 4948 }, { "epoch": 0.6690889425920605, "grad_norm": 0.04948500916361809, "learning_rate": 8.430585214015918e-06, "loss": 0.03929734230041504, "step": 4949 }, { "epoch": 0.6692241393878965, "grad_norm": 0.07685403525829315, "learning_rate": 8.424432798163838e-06, "loss": 0.06699171662330627, "step": 4950 }, { "epoch": 0.6693593361837324, "grad_norm": 0.12221293896436691, "learning_rate": 8.418281751379926e-06, "loss": 0.06083989143371582, "step": 4951 }, { "epoch": 0.6694945329795684, "grad_norm": 0.2653440833091736, "learning_rate": 8.41213207494484e-06, "loss": 0.09709548950195312, "step": 4952 }, { "epoch": 0.6696297297754044, "grad_norm": 0.09595558047294617, "learning_rate": 8.405983770138992e-06, "loss": 0.07072973251342773, "step": 4953 }, { "epoch": 0.6697649265712403, "grad_norm": 0.11023281514644623, "learning_rate": 8.399836838242479e-06, "loss": 0.06504392623901367, "step": 4954 }, { "epoch": 0.6699001233670762, "grad_norm": 0.06484043598175049, "learning_rate": 8.393691280535143e-06, "loss": 0.05286455154418945, "step": 4955 }, { "epoch": 0.6700353201629121, "grad_norm": 0.21476531028747559, "learning_rate": 8.387547098296516e-06, "loss": 0.1045083999633789, "step": 4956 }, { "epoch": 0.6701705169587481, "grad_norm": 0.06728379428386688, "learning_rate": 8.38140429280583e-06, "loss": 0.05539655685424805, "step": 4957 }, { "epoch": 0.670305713754584, "grad_norm": 0.11947528272867203, "learning_rate": 8.375262865342073e-06, "loss": 0.07096648216247559, "step": 4958 }, { "epoch": 0.67044091055042, "grad_norm": 0.07799821346998215, "learning_rate": 8.36912281718391e-06, "loss": 0.05708050727844238, "step": 4959 }, { "epoch": 0.670576107346256, "grad_norm": 0.07748235017061234, "learning_rate": 8.362984149609748e-06, "loss": 0.0420374870300293, "step": 4960 }, { "epoch": 0.6707113041420918, "grad_norm": 0.14528732001781464, "learning_rate": 8.356846863897672e-06, "loss": 0.06159478425979614, "step": 4961 }, { "epoch": 0.6708465009379277, "grad_norm": 0.05735364928841591, "learning_rate": 8.350710961325498e-06, "loss": 0.031278252601623535, "step": 4962 }, { "epoch": 0.6709816977337637, "grad_norm": 0.04464547336101532, "learning_rate": 8.344576443170768e-06, "loss": 0.041180968284606934, "step": 4963 }, { "epoch": 0.6711168945295997, "grad_norm": 0.07714742422103882, "learning_rate": 8.338443310710708e-06, "loss": 0.05145072937011719, "step": 4964 }, { "epoch": 0.6712520913254356, "grad_norm": 0.1096891239285469, "learning_rate": 8.332311565222284e-06, "loss": 0.0773930549621582, "step": 4965 }, { "epoch": 0.6713872881212716, "grad_norm": 0.16796119511127472, "learning_rate": 8.326181207982145e-06, "loss": 0.08898353576660156, "step": 4966 }, { "epoch": 0.6715224849171074, "grad_norm": 0.07012584805488586, "learning_rate": 8.32005224026666e-06, "loss": 0.06497669219970703, "step": 4967 }, { "epoch": 0.6716576817129434, "grad_norm": 0.0804951936006546, "learning_rate": 8.313924663351927e-06, "loss": 0.05633258819580078, "step": 4968 }, { "epoch": 0.6717928785087793, "grad_norm": 0.1588028073310852, "learning_rate": 8.307798478513733e-06, "loss": 0.07164812088012695, "step": 4969 }, { "epoch": 0.6719280753046153, "grad_norm": 0.0795266330242157, "learning_rate": 8.301673687027583e-06, "loss": 0.03998684883117676, "step": 4970 }, { "epoch": 0.6720632721004512, "grad_norm": 0.03827924281358719, "learning_rate": 8.295550290168692e-06, "loss": 0.04121851921081543, "step": 4971 }, { "epoch": 0.6721984688962872, "grad_norm": 0.041952770203351974, "learning_rate": 8.289428289211977e-06, "loss": 0.0473254919052124, "step": 4972 }, { "epoch": 0.672333665692123, "grad_norm": 0.12119283527135849, "learning_rate": 8.283307685432083e-06, "loss": 0.08006525039672852, "step": 4973 }, { "epoch": 0.672468862487959, "grad_norm": 0.06962171941995621, "learning_rate": 8.277188480103348e-06, "loss": 0.08458590507507324, "step": 4974 }, { "epoch": 0.672604059283795, "grad_norm": 0.079900823533535, "learning_rate": 8.271070674499821e-06, "loss": 0.06687116622924805, "step": 4975 }, { "epoch": 0.6727392560796309, "grad_norm": 0.08159684389829636, "learning_rate": 8.264954269895262e-06, "loss": 0.048860788345336914, "step": 4976 }, { "epoch": 0.6728744528754669, "grad_norm": 0.07078345119953156, "learning_rate": 8.258839267563134e-06, "loss": 0.049520134925842285, "step": 4977 }, { "epoch": 0.6730096496713028, "grad_norm": 0.049564849585294724, "learning_rate": 8.252725668776623e-06, "loss": 0.05319809913635254, "step": 4978 }, { "epoch": 0.6731448464671387, "grad_norm": 0.06402891129255295, "learning_rate": 8.24661347480861e-06, "loss": 0.0640106201171875, "step": 4979 }, { "epoch": 0.6732800432629746, "grad_norm": 0.07140089571475983, "learning_rate": 8.240502686931686e-06, "loss": 0.05462515354156494, "step": 4980 }, { "epoch": 0.6734152400588106, "grad_norm": 0.038319412618875504, "learning_rate": 8.234393306418148e-06, "loss": 0.03282272815704346, "step": 4981 }, { "epoch": 0.6735504368546466, "grad_norm": 0.14451679587364197, "learning_rate": 8.228285334539995e-06, "loss": 0.08681714534759521, "step": 4982 }, { "epoch": 0.6736856336504825, "grad_norm": 0.1048111841082573, "learning_rate": 8.22217877256896e-06, "loss": 0.0899057388305664, "step": 4983 }, { "epoch": 0.6738208304463185, "grad_norm": 0.08846127241849899, "learning_rate": 8.216073621776436e-06, "loss": 0.07339954376220703, "step": 4984 }, { "epoch": 0.6739560272421543, "grad_norm": 0.06530094891786575, "learning_rate": 8.209969883433566e-06, "loss": 0.06296944618225098, "step": 4985 }, { "epoch": 0.6740912240379903, "grad_norm": 0.06321869790554047, "learning_rate": 8.203867558811177e-06, "loss": 0.06943702697753906, "step": 4986 }, { "epoch": 0.6742264208338262, "grad_norm": 0.0455254502594471, "learning_rate": 8.197766649179795e-06, "loss": 0.050469398498535156, "step": 4987 }, { "epoch": 0.6743616176296622, "grad_norm": 0.051120419055223465, "learning_rate": 8.191667155809684e-06, "loss": 0.05274152755737305, "step": 4988 }, { "epoch": 0.6744968144254981, "grad_norm": 0.16293583810329437, "learning_rate": 8.185569079970764e-06, "loss": 0.05103790760040283, "step": 4989 }, { "epoch": 0.6746320112213341, "grad_norm": 0.055799953639507294, "learning_rate": 8.179472422932709e-06, "loss": 0.058223724365234375, "step": 4990 }, { "epoch": 0.6747672080171699, "grad_norm": 0.09432735294103622, "learning_rate": 8.17337718596486e-06, "loss": 0.0534052848815918, "step": 4991 }, { "epoch": 0.6749024048130059, "grad_norm": 0.09030844271183014, "learning_rate": 8.167283370336295e-06, "loss": 0.08411085605621338, "step": 4992 }, { "epoch": 0.6750376016088419, "grad_norm": 0.05092301219701767, "learning_rate": 8.161190977315766e-06, "loss": 0.05570089817047119, "step": 4993 }, { "epoch": 0.6751727984046778, "grad_norm": 0.10943484306335449, "learning_rate": 8.155100008171736e-06, "loss": 0.07501763105392456, "step": 4994 }, { "epoch": 0.6753079952005138, "grad_norm": 0.04953750595450401, "learning_rate": 8.149010464172392e-06, "loss": 0.03264760971069336, "step": 4995 }, { "epoch": 0.6754431919963497, "grad_norm": 0.06535929441452026, "learning_rate": 8.142922346585597e-06, "loss": 0.04820537567138672, "step": 4996 }, { "epoch": 0.6755783887921857, "grad_norm": 0.08695446699857712, "learning_rate": 8.13683565667895e-06, "loss": 0.06612396240234375, "step": 4997 }, { "epoch": 0.6757135855880215, "grad_norm": 0.05927109345793724, "learning_rate": 8.13075039571971e-06, "loss": 0.043105125427246094, "step": 4998 }, { "epoch": 0.6758487823838575, "grad_norm": 0.04301923140883446, "learning_rate": 8.124666564974864e-06, "loss": 0.04664754867553711, "step": 4999 }, { "epoch": 0.6759839791796934, "grad_norm": 0.11336946487426758, "learning_rate": 8.11858416571111e-06, "loss": 0.07625558972358704, "step": 5000 }, { "epoch": 0.6761191759755294, "grad_norm": 0.08768303692340851, "learning_rate": 8.112503199194821e-06, "loss": 0.0694434642791748, "step": 5001 }, { "epoch": 0.6762543727713654, "grad_norm": 0.0627063512802124, "learning_rate": 8.106423666692108e-06, "loss": 0.07008993625640869, "step": 5002 }, { "epoch": 0.6763895695672013, "grad_norm": 0.07504633814096451, "learning_rate": 8.100345569468742e-06, "loss": 0.07243967056274414, "step": 5003 }, { "epoch": 0.6765247663630372, "grad_norm": 0.05151161924004555, "learning_rate": 8.094268908790215e-06, "loss": 0.05637025833129883, "step": 5004 }, { "epoch": 0.6766599631588731, "grad_norm": 0.04521086439490318, "learning_rate": 8.088193685921733e-06, "loss": 0.04199337959289551, "step": 5005 }, { "epoch": 0.6767951599547091, "grad_norm": 0.06515444070100784, "learning_rate": 8.082119902128185e-06, "loss": 0.07120251655578613, "step": 5006 }, { "epoch": 0.676930356750545, "grad_norm": 0.11023542284965515, "learning_rate": 8.076047558674164e-06, "loss": 0.09229302406311035, "step": 5007 }, { "epoch": 0.677065553546381, "grad_norm": 0.08682873845100403, "learning_rate": 8.069976656823964e-06, "loss": 0.055080413818359375, "step": 5008 }, { "epoch": 0.6772007503422169, "grad_norm": 0.1625545471906662, "learning_rate": 8.063907197841574e-06, "loss": 0.040492117404937744, "step": 5009 }, { "epoch": 0.6773359471380528, "grad_norm": 0.07702849805355072, "learning_rate": 8.057839182990698e-06, "loss": 0.05395984649658203, "step": 5010 }, { "epoch": 0.6774711439338887, "grad_norm": 0.17600449919700623, "learning_rate": 8.051772613534725e-06, "loss": 0.05693554878234863, "step": 5011 }, { "epoch": 0.6776063407297247, "grad_norm": 0.07110217213630676, "learning_rate": 8.045707490736745e-06, "loss": 0.05278158187866211, "step": 5012 }, { "epoch": 0.6777415375255607, "grad_norm": 0.0685405284166336, "learning_rate": 8.039643815859552e-06, "loss": 0.055429697036743164, "step": 5013 }, { "epoch": 0.6778767343213966, "grad_norm": 0.13837657868862152, "learning_rate": 8.033581590165627e-06, "loss": 0.08815836906433105, "step": 5014 }, { "epoch": 0.6780119311172326, "grad_norm": 0.051498956978321075, "learning_rate": 8.027520814917175e-06, "loss": 0.047371864318847656, "step": 5015 }, { "epoch": 0.6781471279130684, "grad_norm": 0.12761028110980988, "learning_rate": 8.021461491376064e-06, "loss": 0.07303357124328613, "step": 5016 }, { "epoch": 0.6782823247089044, "grad_norm": 0.09460553526878357, "learning_rate": 8.015403620803885e-06, "loss": 0.06608045101165771, "step": 5017 }, { "epoch": 0.6784175215047403, "grad_norm": 0.05743256211280823, "learning_rate": 8.009347204461922e-06, "loss": 0.062085628509521484, "step": 5018 }, { "epoch": 0.6785527183005763, "grad_norm": 0.0879201740026474, "learning_rate": 8.003292243611143e-06, "loss": 0.05473756790161133, "step": 5019 }, { "epoch": 0.6786879150964122, "grad_norm": 0.07949728518724442, "learning_rate": 7.99723873951224e-06, "loss": 0.06790757179260254, "step": 5020 }, { "epoch": 0.6788231118922482, "grad_norm": 0.06531021744012833, "learning_rate": 7.991186693425563e-06, "loss": 0.05360287427902222, "step": 5021 }, { "epoch": 0.678958308688084, "grad_norm": 0.08996099978685379, "learning_rate": 7.9851361066112e-06, "loss": 0.0458034873008728, "step": 5022 }, { "epoch": 0.67909350548392, "grad_norm": 0.11217587441205978, "learning_rate": 7.979086980328907e-06, "loss": 0.05580878257751465, "step": 5023 }, { "epoch": 0.679228702279756, "grad_norm": 0.058305565267801285, "learning_rate": 7.973039315838137e-06, "loss": 0.05259418487548828, "step": 5024 }, { "epoch": 0.6793638990755919, "grad_norm": 0.11344636976718903, "learning_rate": 7.966993114398067e-06, "loss": 0.07076263427734375, "step": 5025 }, { "epoch": 0.6794990958714279, "grad_norm": 0.043777696788311005, "learning_rate": 7.960948377267524e-06, "loss": 0.048508420586586, "step": 5026 }, { "epoch": 0.6796342926672638, "grad_norm": 0.06955604255199432, "learning_rate": 7.954905105705071e-06, "loss": 0.05074763298034668, "step": 5027 }, { "epoch": 0.6797694894630997, "grad_norm": 0.08960990607738495, "learning_rate": 7.948863300968938e-06, "loss": 0.08656072616577148, "step": 5028 }, { "epoch": 0.6799046862589356, "grad_norm": 0.10875927656888962, "learning_rate": 7.942822964317078e-06, "loss": 0.05288103222846985, "step": 5029 }, { "epoch": 0.6800398830547716, "grad_norm": 0.09737157076597214, "learning_rate": 7.936784097007105e-06, "loss": 0.053118228912353516, "step": 5030 }, { "epoch": 0.6801750798506075, "grad_norm": 0.10650312155485153, "learning_rate": 7.930746700296344e-06, "loss": 0.08737468719482422, "step": 5031 }, { "epoch": 0.6803102766464435, "grad_norm": 0.07064100354909897, "learning_rate": 7.924710775441822e-06, "loss": 0.06871414184570312, "step": 5032 }, { "epoch": 0.6804454734422795, "grad_norm": 0.057951413094997406, "learning_rate": 7.918676323700241e-06, "loss": 0.06516039371490479, "step": 5033 }, { "epoch": 0.6805806702381153, "grad_norm": 0.07951295375823975, "learning_rate": 7.912643346328023e-06, "loss": 0.06445860862731934, "step": 5034 }, { "epoch": 0.6807158670339513, "grad_norm": 0.045828066766262054, "learning_rate": 7.906611844581251e-06, "loss": 0.04909694194793701, "step": 5035 }, { "epoch": 0.6808510638297872, "grad_norm": 0.17410415410995483, "learning_rate": 7.900581819715713e-06, "loss": 0.08871793746948242, "step": 5036 }, { "epoch": 0.6809862606256232, "grad_norm": 0.04508494213223457, "learning_rate": 7.894553272986901e-06, "loss": 0.040786564350128174, "step": 5037 }, { "epoch": 0.6811214574214591, "grad_norm": 0.06835804134607315, "learning_rate": 7.888526205649993e-06, "loss": 0.0617070198059082, "step": 5038 }, { "epoch": 0.6812566542172951, "grad_norm": 0.10027843713760376, "learning_rate": 7.882500618959849e-06, "loss": 0.08986115455627441, "step": 5039 }, { "epoch": 0.6813918510131309, "grad_norm": 0.06253597140312195, "learning_rate": 7.876476514171033e-06, "loss": 0.0515742301940918, "step": 5040 }, { "epoch": 0.6815270478089669, "grad_norm": 0.07670264691114426, "learning_rate": 7.870453892537788e-06, "loss": 0.07343602180480957, "step": 5041 }, { "epoch": 0.6816622446048028, "grad_norm": 0.06232365965843201, "learning_rate": 7.864432755314068e-06, "loss": 0.05206248164176941, "step": 5042 }, { "epoch": 0.6817974414006388, "grad_norm": 0.059800885617733, "learning_rate": 7.858413103753499e-06, "loss": 0.037760019302368164, "step": 5043 }, { "epoch": 0.6819326381964748, "grad_norm": 0.12328537553548813, "learning_rate": 7.852394939109408e-06, "loss": 0.054387569427490234, "step": 5044 }, { "epoch": 0.6820678349923107, "grad_norm": 0.14490880072116852, "learning_rate": 7.846378262634803e-06, "loss": 0.061079978942871094, "step": 5045 }, { "epoch": 0.6822030317881467, "grad_norm": 0.06602545827627182, "learning_rate": 7.840363075582385e-06, "loss": 0.07086324691772461, "step": 5046 }, { "epoch": 0.6823382285839825, "grad_norm": 0.09575103968381882, "learning_rate": 7.834349379204565e-06, "loss": 0.07340621948242188, "step": 5047 }, { "epoch": 0.6824734253798185, "grad_norm": 0.104802705347538, "learning_rate": 7.828337174753411e-06, "loss": 0.07211112976074219, "step": 5048 }, { "epoch": 0.6826086221756544, "grad_norm": 0.07989746332168579, "learning_rate": 7.822326463480703e-06, "loss": 0.06185269355773926, "step": 5049 }, { "epoch": 0.6827438189714904, "grad_norm": 0.05287535861134529, "learning_rate": 7.816317246637901e-06, "loss": 0.04005555808544159, "step": 5050 }, { "epoch": 0.6828790157673263, "grad_norm": 0.1011776402592659, "learning_rate": 7.810309525476152e-06, "loss": 0.052993178367614746, "step": 5051 }, { "epoch": 0.6830142125631623, "grad_norm": 0.16065378487110138, "learning_rate": 7.804303301246311e-06, "loss": 0.0652306079864502, "step": 5052 }, { "epoch": 0.6831494093589981, "grad_norm": 0.08364777266979218, "learning_rate": 7.798298575198884e-06, "loss": 0.09554576873779297, "step": 5053 }, { "epoch": 0.6832846061548341, "grad_norm": 0.11256513744592667, "learning_rate": 7.792295348584103e-06, "loss": 0.04108166694641113, "step": 5054 }, { "epoch": 0.6834198029506701, "grad_norm": 0.09159068018198013, "learning_rate": 7.786293622651866e-06, "loss": 0.06287813186645508, "step": 5055 }, { "epoch": 0.683554999746506, "grad_norm": 0.08099980652332306, "learning_rate": 7.78029339865176e-06, "loss": 0.04396533966064453, "step": 5056 }, { "epoch": 0.683690196542342, "grad_norm": 0.055911049246788025, "learning_rate": 7.774294677833078e-06, "loss": 0.05603682994842529, "step": 5057 }, { "epoch": 0.6838253933381779, "grad_norm": 0.0674651563167572, "learning_rate": 7.768297461444766e-06, "loss": 0.05206373333930969, "step": 5058 }, { "epoch": 0.6839605901340138, "grad_norm": 0.11200419813394547, "learning_rate": 7.762301750735494e-06, "loss": 0.07729125022888184, "step": 5059 }, { "epoch": 0.6840957869298497, "grad_norm": 0.0715426579117775, "learning_rate": 7.756307546953592e-06, "loss": 0.046530961990356445, "step": 5060 }, { "epoch": 0.6842309837256857, "grad_norm": 0.04778808727860451, "learning_rate": 7.750314851347087e-06, "loss": 0.03265070915222168, "step": 5061 }, { "epoch": 0.6843661805215216, "grad_norm": 0.06253280490636826, "learning_rate": 7.74432366516369e-06, "loss": 0.05382585525512695, "step": 5062 }, { "epoch": 0.6845013773173576, "grad_norm": 0.027975067496299744, "learning_rate": 7.738333989650794e-06, "loss": 0.022824764251708984, "step": 5063 }, { "epoch": 0.6846365741131936, "grad_norm": 0.06765957176685333, "learning_rate": 7.732345826055487e-06, "loss": 0.058557212352752686, "step": 5064 }, { "epoch": 0.6847717709090294, "grad_norm": 0.12224598973989487, "learning_rate": 7.726359175624537e-06, "loss": 0.07012653350830078, "step": 5065 }, { "epoch": 0.6849069677048654, "grad_norm": 0.08704402297735214, "learning_rate": 7.720374039604395e-06, "loss": 0.0554652214050293, "step": 5066 }, { "epoch": 0.6850421645007013, "grad_norm": 0.0807432010769844, "learning_rate": 7.714390419241198e-06, "loss": 0.06577324867248535, "step": 5067 }, { "epoch": 0.6851773612965373, "grad_norm": 0.06077314540743828, "learning_rate": 7.70840831578076e-06, "loss": 0.04194331169128418, "step": 5068 }, { "epoch": 0.6853125580923732, "grad_norm": 0.047380853444337845, "learning_rate": 7.702427730468601e-06, "loss": 0.05104851722717285, "step": 5069 }, { "epoch": 0.6854477548882092, "grad_norm": 0.056827180087566376, "learning_rate": 7.696448664549898e-06, "loss": 0.06889700889587402, "step": 5070 }, { "epoch": 0.685582951684045, "grad_norm": 0.07652460783720016, "learning_rate": 7.690471119269541e-06, "loss": 0.0346217155456543, "step": 5071 }, { "epoch": 0.685718148479881, "grad_norm": 0.06559426337480545, "learning_rate": 7.684495095872073e-06, "loss": 0.052144765853881836, "step": 5072 }, { "epoch": 0.685853345275717, "grad_norm": 0.09552326798439026, "learning_rate": 7.678520595601728e-06, "loss": 0.06816208362579346, "step": 5073 }, { "epoch": 0.6859885420715529, "grad_norm": 0.03336964547634125, "learning_rate": 7.672547619702445e-06, "loss": 0.033823758363723755, "step": 5074 }, { "epoch": 0.6861237388673889, "grad_norm": 0.18504440784454346, "learning_rate": 7.666576169417823e-06, "loss": 0.0859217643737793, "step": 5075 }, { "epoch": 0.6862589356632248, "grad_norm": 0.048577431589365005, "learning_rate": 7.660606245991147e-06, "loss": 0.04616965353488922, "step": 5076 }, { "epoch": 0.6863941324590607, "grad_norm": 0.1842295229434967, "learning_rate": 7.654637850665393e-06, "loss": 0.07582733035087585, "step": 5077 }, { "epoch": 0.6865293292548966, "grad_norm": 0.07542487233877182, "learning_rate": 7.648670984683199e-06, "loss": 0.05849027633666992, "step": 5078 }, { "epoch": 0.6866645260507326, "grad_norm": 0.07645473629236221, "learning_rate": 7.642705649286916e-06, "loss": 0.049231886863708496, "step": 5079 }, { "epoch": 0.6867997228465685, "grad_norm": 0.10021381825208664, "learning_rate": 7.63674184571855e-06, "loss": 0.06332027912139893, "step": 5080 }, { "epoch": 0.6869349196424045, "grad_norm": 0.09316113591194153, "learning_rate": 7.630779575219797e-06, "loss": 0.046317338943481445, "step": 5081 }, { "epoch": 0.6870701164382405, "grad_norm": 0.06710349768400192, "learning_rate": 7.6248188390320344e-06, "loss": 0.0571141242980957, "step": 5082 }, { "epoch": 0.6872053132340763, "grad_norm": 0.051020462065935135, "learning_rate": 7.6188596383963135e-06, "loss": 0.04548037052154541, "step": 5083 }, { "epoch": 0.6873405100299123, "grad_norm": 0.0944100096821785, "learning_rate": 7.612901974553388e-06, "loss": 0.05563795566558838, "step": 5084 }, { "epoch": 0.6874757068257482, "grad_norm": 0.04953437298536301, "learning_rate": 7.606945848743653e-06, "loss": 0.05012011528015137, "step": 5085 }, { "epoch": 0.6876109036215842, "grad_norm": 0.18776196241378784, "learning_rate": 7.600991262207221e-06, "loss": 0.09292605519294739, "step": 5086 }, { "epoch": 0.6877461004174201, "grad_norm": 0.045769114047288895, "learning_rate": 7.595038216183867e-06, "loss": 0.05554145574569702, "step": 5087 }, { "epoch": 0.6878812972132561, "grad_norm": 0.06939801573753357, "learning_rate": 7.589086711913037e-06, "loss": 0.04562854766845703, "step": 5088 }, { "epoch": 0.688016494009092, "grad_norm": 0.0660228580236435, "learning_rate": 7.583136750633885e-06, "loss": 0.05246230959892273, "step": 5089 }, { "epoch": 0.6881516908049279, "grad_norm": 0.1277281939983368, "learning_rate": 7.577188333585202e-06, "loss": 0.06319618225097656, "step": 5090 }, { "epoch": 0.6882868876007638, "grad_norm": 0.14541186392307281, "learning_rate": 7.5712414620054975e-06, "loss": 0.09457850456237793, "step": 5091 }, { "epoch": 0.6884220843965998, "grad_norm": 0.11648689210414886, "learning_rate": 7.565296137132935e-06, "loss": 0.07001566886901855, "step": 5092 }, { "epoch": 0.6885572811924358, "grad_norm": 0.2234610915184021, "learning_rate": 7.559352360205357e-06, "loss": 0.06101638078689575, "step": 5093 }, { "epoch": 0.6886924779882717, "grad_norm": 0.044088516384363174, "learning_rate": 7.553410132460308e-06, "loss": 0.03093719482421875, "step": 5094 }, { "epoch": 0.6888276747841077, "grad_norm": 0.0879405215382576, "learning_rate": 7.547469455134968e-06, "loss": 0.06865859031677246, "step": 5095 }, { "epoch": 0.6889628715799435, "grad_norm": 0.152337446808815, "learning_rate": 7.541530329466236e-06, "loss": 0.06282424926757812, "step": 5096 }, { "epoch": 0.6890980683757795, "grad_norm": 0.12875361740589142, "learning_rate": 7.535592756690661e-06, "loss": 0.09005355834960938, "step": 5097 }, { "epoch": 0.6892332651716154, "grad_norm": 0.07957500219345093, "learning_rate": 7.52965673804448e-06, "loss": 0.05158579349517822, "step": 5098 }, { "epoch": 0.6893684619674514, "grad_norm": 0.16104726493358612, "learning_rate": 7.5237222747636025e-06, "loss": 0.07291078567504883, "step": 5099 }, { "epoch": 0.6895036587632873, "grad_norm": 0.07990613579750061, "learning_rate": 7.517789368083611e-06, "loss": 0.0559384822845459, "step": 5100 }, { "epoch": 0.6896388555591233, "grad_norm": 0.07068870961666107, "learning_rate": 7.511858019239778e-06, "loss": 0.07599782943725586, "step": 5101 }, { "epoch": 0.6897740523549591, "grad_norm": 0.0743715837597847, "learning_rate": 7.505928229467038e-06, "loss": 0.05978125333786011, "step": 5102 }, { "epoch": 0.6899092491507951, "grad_norm": 0.09362413734197617, "learning_rate": 7.500000000000004e-06, "loss": 0.05211925506591797, "step": 5103 }, { "epoch": 0.6900444459466311, "grad_norm": 0.07234194129705429, "learning_rate": 7.494073332072963e-06, "loss": 0.0369877815246582, "step": 5104 }, { "epoch": 0.690179642742467, "grad_norm": 0.08082889765501022, "learning_rate": 7.488148226919877e-06, "loss": 0.05261269211769104, "step": 5105 }, { "epoch": 0.690314839538303, "grad_norm": 0.17888988554477692, "learning_rate": 7.482224685774393e-06, "loss": 0.09072732925415039, "step": 5106 }, { "epoch": 0.6904500363341389, "grad_norm": 0.11682732403278351, "learning_rate": 7.4763027098698184e-06, "loss": 0.0584789514541626, "step": 5107 }, { "epoch": 0.6905852331299748, "grad_norm": 0.07673139125108719, "learning_rate": 7.470382300439143e-06, "loss": 0.04524785280227661, "step": 5108 }, { "epoch": 0.6907204299258107, "grad_norm": 0.08818185329437256, "learning_rate": 7.4644634587150225e-06, "loss": 0.06822824478149414, "step": 5109 }, { "epoch": 0.6908556267216467, "grad_norm": 0.09656547009944916, "learning_rate": 7.4585461859297906e-06, "loss": 0.04591912031173706, "step": 5110 }, { "epoch": 0.6909908235174826, "grad_norm": 0.05788983032107353, "learning_rate": 7.452630483315463e-06, "loss": 0.04120028018951416, "step": 5111 }, { "epoch": 0.6911260203133186, "grad_norm": 0.1802745908498764, "learning_rate": 7.4467163521037186e-06, "loss": 0.09113550186157227, "step": 5112 }, { "epoch": 0.6912612171091546, "grad_norm": 0.12040071934461594, "learning_rate": 7.440803793525907e-06, "loss": 0.059673309326171875, "step": 5113 }, { "epoch": 0.6913964139049904, "grad_norm": 0.11113721132278442, "learning_rate": 7.434892808813056e-06, "loss": 0.06353449821472168, "step": 5114 }, { "epoch": 0.6915316107008264, "grad_norm": 0.13186430931091309, "learning_rate": 7.42898339919586e-06, "loss": 0.08459639549255371, "step": 5115 }, { "epoch": 0.6916668074966623, "grad_norm": 0.07785635441541672, "learning_rate": 7.423075565904698e-06, "loss": 0.04776430130004883, "step": 5116 }, { "epoch": 0.6918020042924983, "grad_norm": 0.08863280713558197, "learning_rate": 7.417169310169609e-06, "loss": 0.05180215835571289, "step": 5117 }, { "epoch": 0.6919372010883342, "grad_norm": 0.07045657187700272, "learning_rate": 7.411264633220305e-06, "loss": 0.04566240310668945, "step": 5118 }, { "epoch": 0.6920723978841702, "grad_norm": 0.04311591014266014, "learning_rate": 7.405361536286174e-06, "loss": 0.0449676513671875, "step": 5119 }, { "epoch": 0.692207594680006, "grad_norm": 0.09630683809518814, "learning_rate": 7.399460020596266e-06, "loss": 0.08295631408691406, "step": 5120 }, { "epoch": 0.692342791475842, "grad_norm": 0.07007215917110443, "learning_rate": 7.393560087379322e-06, "loss": 0.05119359493255615, "step": 5121 }, { "epoch": 0.692477988271678, "grad_norm": 0.06707391887903214, "learning_rate": 7.3876617378637195e-06, "loss": 0.04883003234863281, "step": 5122 }, { "epoch": 0.6926131850675139, "grad_norm": 0.07459626346826553, "learning_rate": 7.381764973277543e-06, "loss": 0.045946478843688965, "step": 5123 }, { "epoch": 0.6927483818633499, "grad_norm": 0.08041670173406601, "learning_rate": 7.375869794848525e-06, "loss": 0.07621145248413086, "step": 5124 }, { "epoch": 0.6928835786591858, "grad_norm": 0.0551561675965786, "learning_rate": 7.3699762038040654e-06, "loss": 0.05382108688354492, "step": 5125 }, { "epoch": 0.6930187754550217, "grad_norm": 0.17855863273143768, "learning_rate": 7.364084201371261e-06, "loss": 0.1080636978149414, "step": 5126 }, { "epoch": 0.6931539722508576, "grad_norm": 0.036853570491075516, "learning_rate": 7.3581937887768334e-06, "loss": 0.03311949968338013, "step": 5127 }, { "epoch": 0.6932891690466936, "grad_norm": 0.08787119388580322, "learning_rate": 7.352304967247217e-06, "loss": 0.04837656021118164, "step": 5128 }, { "epoch": 0.6934243658425295, "grad_norm": 0.056589942425489426, "learning_rate": 7.346417738008487e-06, "loss": 0.0705728530883789, "step": 5129 }, { "epoch": 0.6935595626383655, "grad_norm": 0.15202593803405762, "learning_rate": 7.340532102286399e-06, "loss": 0.07158017158508301, "step": 5130 }, { "epoch": 0.6936947594342014, "grad_norm": 0.0418815053999424, "learning_rate": 7.3346480613063725e-06, "loss": 0.03671407699584961, "step": 5131 }, { "epoch": 0.6938299562300374, "grad_norm": 0.10494381189346313, "learning_rate": 7.328765616293491e-06, "loss": 0.04256272315979004, "step": 5132 }, { "epoch": 0.6939651530258732, "grad_norm": 0.06368820369243622, "learning_rate": 7.322884768472521e-06, "loss": 0.06847715377807617, "step": 5133 }, { "epoch": 0.6941003498217092, "grad_norm": 0.044152189046144485, "learning_rate": 7.317005519067881e-06, "loss": 0.05760622024536133, "step": 5134 }, { "epoch": 0.6942355466175452, "grad_norm": 0.08222747594118118, "learning_rate": 7.311127869303665e-06, "loss": 0.039581298828125, "step": 5135 }, { "epoch": 0.6943707434133811, "grad_norm": 0.07158730924129486, "learning_rate": 7.305251820403628e-06, "loss": 0.051753997802734375, "step": 5136 }, { "epoch": 0.6945059402092171, "grad_norm": 0.08473771065473557, "learning_rate": 7.299377373591188e-06, "loss": 0.05568647384643555, "step": 5137 }, { "epoch": 0.694641137005053, "grad_norm": 0.09255312383174896, "learning_rate": 7.29350453008945e-06, "loss": 0.06441974639892578, "step": 5138 }, { "epoch": 0.6947763338008889, "grad_norm": 0.08644882589578629, "learning_rate": 7.287633291121166e-06, "loss": 0.0827341079711914, "step": 5139 }, { "epoch": 0.6949115305967248, "grad_norm": 0.07177211344242096, "learning_rate": 7.281763657908756e-06, "loss": 0.07648968696594238, "step": 5140 }, { "epoch": 0.6950467273925608, "grad_norm": 0.10879196971654892, "learning_rate": 7.275895631674313e-06, "loss": 0.0838766098022461, "step": 5141 }, { "epoch": 0.6951819241883967, "grad_norm": 0.033908288925886154, "learning_rate": 7.2700292136395826e-06, "loss": 0.029471397399902344, "step": 5142 }, { "epoch": 0.6953171209842327, "grad_norm": 0.11807725578546524, "learning_rate": 7.264164405025997e-06, "loss": 0.07389593124389648, "step": 5143 }, { "epoch": 0.6954523177800687, "grad_norm": 0.05082758143544197, "learning_rate": 7.2583012070546364e-06, "loss": 0.052376389503479004, "step": 5144 }, { "epoch": 0.6955875145759045, "grad_norm": 0.04344399645924568, "learning_rate": 7.252439620946247e-06, "loss": 0.05011838674545288, "step": 5145 }, { "epoch": 0.6957227113717405, "grad_norm": 0.09419479966163635, "learning_rate": 7.246579647921243e-06, "loss": 0.05389326810836792, "step": 5146 }, { "epoch": 0.6958579081675764, "grad_norm": 0.05912099406123161, "learning_rate": 7.240721289199699e-06, "loss": 0.043886661529541016, "step": 5147 }, { "epoch": 0.6959931049634124, "grad_norm": 0.051456380635499954, "learning_rate": 7.234864546001364e-06, "loss": 0.05203723907470703, "step": 5148 }, { "epoch": 0.6961283017592483, "grad_norm": 0.07783391326665878, "learning_rate": 7.229009419545638e-06, "loss": 0.061969757080078125, "step": 5149 }, { "epoch": 0.6962634985550843, "grad_norm": 0.053779236972332, "learning_rate": 7.223155911051593e-06, "loss": 0.053030967712402344, "step": 5150 }, { "epoch": 0.6963986953509201, "grad_norm": 0.12143230438232422, "learning_rate": 7.2173040217379575e-06, "loss": 0.07530605792999268, "step": 5151 }, { "epoch": 0.6965338921467561, "grad_norm": 0.061984140425920486, "learning_rate": 7.211453752823122e-06, "loss": 0.055178046226501465, "step": 5152 }, { "epoch": 0.696669088942592, "grad_norm": 0.11437245458364487, "learning_rate": 7.205605105525161e-06, "loss": 0.06825780868530273, "step": 5153 }, { "epoch": 0.696804285738428, "grad_norm": 0.05304133892059326, "learning_rate": 7.19975808106177e-06, "loss": 0.050585269927978516, "step": 5154 }, { "epoch": 0.696939482534264, "grad_norm": 0.0624159537255764, "learning_rate": 7.193912680650346e-06, "loss": 0.05103921890258789, "step": 5155 }, { "epoch": 0.6970746793300999, "grad_norm": 0.05065198987722397, "learning_rate": 7.188068905507931e-06, "loss": 0.06281852722167969, "step": 5156 }, { "epoch": 0.6972098761259358, "grad_norm": 0.10954926162958145, "learning_rate": 7.182226756851223e-06, "loss": 0.050206899642944336, "step": 5157 }, { "epoch": 0.6973450729217717, "grad_norm": 0.0819028839468956, "learning_rate": 7.176386235896603e-06, "loss": 0.06219756603240967, "step": 5158 }, { "epoch": 0.6974802697176077, "grad_norm": 0.05428193509578705, "learning_rate": 7.170547343860079e-06, "loss": 0.04502427577972412, "step": 5159 }, { "epoch": 0.6976154665134436, "grad_norm": 0.05840172991156578, "learning_rate": 7.164710081957355e-06, "loss": 0.056485652923583984, "step": 5160 }, { "epoch": 0.6977506633092796, "grad_norm": 0.11079628765583038, "learning_rate": 7.158874451403777e-06, "loss": 0.07830274105072021, "step": 5161 }, { "epoch": 0.6978858601051156, "grad_norm": 0.04507223516702652, "learning_rate": 7.15304045341435e-06, "loss": 0.039655983448028564, "step": 5162 }, { "epoch": 0.6980210569009514, "grad_norm": 0.049603331834077835, "learning_rate": 7.147208089203745e-06, "loss": 0.05478668212890625, "step": 5163 }, { "epoch": 0.6981562536967874, "grad_norm": 0.07919736951589584, "learning_rate": 7.141377359986288e-06, "loss": 0.06793379783630371, "step": 5164 }, { "epoch": 0.6982914504926233, "grad_norm": 0.06492088735103607, "learning_rate": 7.135548266975978e-06, "loss": 0.06708502769470215, "step": 5165 }, { "epoch": 0.6984266472884593, "grad_norm": 0.14459918439388275, "learning_rate": 7.129720811386456e-06, "loss": 0.06500530242919922, "step": 5166 }, { "epoch": 0.6985618440842952, "grad_norm": 0.06155581399798393, "learning_rate": 7.12389499443103e-06, "loss": 0.07036352157592773, "step": 5167 }, { "epoch": 0.6986970408801312, "grad_norm": 0.10594793409109116, "learning_rate": 7.118070817322668e-06, "loss": 0.04835796356201172, "step": 5168 }, { "epoch": 0.698832237675967, "grad_norm": 0.06809194386005402, "learning_rate": 7.1122482812739885e-06, "loss": 0.06902766227722168, "step": 5169 }, { "epoch": 0.698967434471803, "grad_norm": 0.07082079350948334, "learning_rate": 7.106427387497283e-06, "loss": 0.07391858100891113, "step": 5170 }, { "epoch": 0.6991026312676389, "grad_norm": 0.05770654231309891, "learning_rate": 7.10060813720449e-06, "loss": 0.05979514122009277, "step": 5171 }, { "epoch": 0.6992378280634749, "grad_norm": 0.10694356262683868, "learning_rate": 7.094790531607207e-06, "loss": 0.0682300329208374, "step": 5172 }, { "epoch": 0.6993730248593109, "grad_norm": 0.06676455587148666, "learning_rate": 7.088974571916692e-06, "loss": 0.04688286781311035, "step": 5173 }, { "epoch": 0.6995082216551468, "grad_norm": 0.09390151500701904, "learning_rate": 7.0831602593438515e-06, "loss": 0.05468571186065674, "step": 5174 }, { "epoch": 0.6996434184509827, "grad_norm": 0.05963120609521866, "learning_rate": 7.077347595099269e-06, "loss": 0.06016874313354492, "step": 5175 }, { "epoch": 0.6997786152468186, "grad_norm": 0.1497652530670166, "learning_rate": 7.071536580393166e-06, "loss": 0.06478071212768555, "step": 5176 }, { "epoch": 0.6999138120426546, "grad_norm": 0.05569586902856827, "learning_rate": 7.065727216435426e-06, "loss": 0.03982280194759369, "step": 5177 }, { "epoch": 0.7000490088384905, "grad_norm": 0.14853443205356598, "learning_rate": 7.05991950443559e-06, "loss": 0.07564735412597656, "step": 5178 }, { "epoch": 0.7001842056343265, "grad_norm": 0.04694928228855133, "learning_rate": 7.05411344560285e-06, "loss": 0.0429036021232605, "step": 5179 }, { "epoch": 0.7003194024301624, "grad_norm": 0.15553955733776093, "learning_rate": 7.048309041146069e-06, "loss": 0.08684754371643066, "step": 5180 }, { "epoch": 0.7004545992259984, "grad_norm": 0.0598907433450222, "learning_rate": 7.0425062922737495e-06, "loss": 0.07079172134399414, "step": 5181 }, { "epoch": 0.7005897960218342, "grad_norm": 0.10334526002407074, "learning_rate": 7.036705200194053e-06, "loss": 0.05799150466918945, "step": 5182 }, { "epoch": 0.7007249928176702, "grad_norm": 0.1398114264011383, "learning_rate": 7.0309057661148e-06, "loss": 0.07380008697509766, "step": 5183 }, { "epoch": 0.7008601896135062, "grad_norm": 0.09317490458488464, "learning_rate": 7.0251079912434565e-06, "loss": 0.06856918334960938, "step": 5184 }, { "epoch": 0.7009953864093421, "grad_norm": 0.09354618936777115, "learning_rate": 7.019311876787169e-06, "loss": 0.03395271301269531, "step": 5185 }, { "epoch": 0.7011305832051781, "grad_norm": 0.08463387936353683, "learning_rate": 7.013517423952696e-06, "loss": 0.0663614273071289, "step": 5186 }, { "epoch": 0.701265780001014, "grad_norm": 0.10186035931110382, "learning_rate": 7.0077246339464904e-06, "loss": 0.0855398178100586, "step": 5187 }, { "epoch": 0.7014009767968499, "grad_norm": 0.08779264986515045, "learning_rate": 7.001933507974635e-06, "loss": 0.05159878730773926, "step": 5188 }, { "epoch": 0.7015361735926858, "grad_norm": 0.07597818225622177, "learning_rate": 6.996144047242868e-06, "loss": 0.06801390647888184, "step": 5189 }, { "epoch": 0.7016713703885218, "grad_norm": 0.08194998651742935, "learning_rate": 6.9903562529566044e-06, "loss": 0.0679391622543335, "step": 5190 }, { "epoch": 0.7018065671843577, "grad_norm": 0.047737594693899155, "learning_rate": 6.984570126320869e-06, "loss": 0.042282700538635254, "step": 5191 }, { "epoch": 0.7019417639801937, "grad_norm": 0.052308209240436554, "learning_rate": 6.978785668540384e-06, "loss": 0.02779725193977356, "step": 5192 }, { "epoch": 0.7020769607760297, "grad_norm": 0.10460575670003891, "learning_rate": 6.973002880819496e-06, "loss": 0.07366108894348145, "step": 5193 }, { "epoch": 0.7022121575718655, "grad_norm": 0.12668466567993164, "learning_rate": 6.96722176436221e-06, "loss": 0.055776357650756836, "step": 5194 }, { "epoch": 0.7023473543677015, "grad_norm": 0.09756961464881897, "learning_rate": 6.9614423203721975e-06, "loss": 0.06728655099868774, "step": 5195 }, { "epoch": 0.7024825511635374, "grad_norm": 0.07483081519603729, "learning_rate": 6.955664550052749e-06, "loss": 0.04792451858520508, "step": 5196 }, { "epoch": 0.7026177479593734, "grad_norm": 0.049773622304201126, "learning_rate": 6.949888454606847e-06, "loss": 0.05487394332885742, "step": 5197 }, { "epoch": 0.7027529447552093, "grad_norm": 0.043582089245319366, "learning_rate": 6.944114035237095e-06, "loss": 0.041115760803222656, "step": 5198 }, { "epoch": 0.7028881415510453, "grad_norm": 0.059224683791399, "learning_rate": 6.93834129314576e-06, "loss": 0.052294254302978516, "step": 5199 }, { "epoch": 0.7030233383468811, "grad_norm": 0.20177409052848816, "learning_rate": 6.932570229534759e-06, "loss": 0.11620330810546875, "step": 5200 }, { "epoch": 0.7031585351427171, "grad_norm": 0.06465593725442886, "learning_rate": 6.9268008456056505e-06, "loss": 0.032606661319732666, "step": 5201 }, { "epoch": 0.703293731938553, "grad_norm": 0.07341928035020828, "learning_rate": 6.921033142559664e-06, "loss": 0.05399465560913086, "step": 5202 }, { "epoch": 0.703428928734389, "grad_norm": 0.08364728093147278, "learning_rate": 6.915267121597659e-06, "loss": 0.06726408004760742, "step": 5203 }, { "epoch": 0.703564125530225, "grad_norm": 0.09734231233596802, "learning_rate": 6.909502783920153e-06, "loss": 0.07239216566085815, "step": 5204 }, { "epoch": 0.7036993223260609, "grad_norm": 0.07396171987056732, "learning_rate": 6.903740130727312e-06, "loss": 0.07080841064453125, "step": 5205 }, { "epoch": 0.7038345191218968, "grad_norm": 0.1027669683098793, "learning_rate": 6.8979791632189425e-06, "loss": 0.08009147644042969, "step": 5206 }, { "epoch": 0.7039697159177327, "grad_norm": 0.15998680889606476, "learning_rate": 6.892219882594523e-06, "loss": 0.06155216693878174, "step": 5207 }, { "epoch": 0.7041049127135687, "grad_norm": 0.10081665217876434, "learning_rate": 6.886462290053159e-06, "loss": 0.0668749213218689, "step": 5208 }, { "epoch": 0.7042401095094046, "grad_norm": 0.06833750009536743, "learning_rate": 6.880706386793614e-06, "loss": 0.052366167306900024, "step": 5209 }, { "epoch": 0.7043753063052406, "grad_norm": 0.17746584117412567, "learning_rate": 6.874952174014298e-06, "loss": 0.08236157894134521, "step": 5210 }, { "epoch": 0.7045105031010765, "grad_norm": 0.05688696727156639, "learning_rate": 6.8691996529132585e-06, "loss": 0.05448341369628906, "step": 5211 }, { "epoch": 0.7046456998969124, "grad_norm": 0.11775568127632141, "learning_rate": 6.863448824688217e-06, "loss": 0.07089114189147949, "step": 5212 }, { "epoch": 0.7047808966927483, "grad_norm": 0.0966481864452362, "learning_rate": 6.857699690536521e-06, "loss": 0.054990530014038086, "step": 5213 }, { "epoch": 0.7049160934885843, "grad_norm": 0.06725800037384033, "learning_rate": 6.8519522516551685e-06, "loss": 0.05025172233581543, "step": 5214 }, { "epoch": 0.7050512902844203, "grad_norm": 0.07729680091142654, "learning_rate": 6.846206509240807e-06, "loss": 0.08900260925292969, "step": 5215 }, { "epoch": 0.7051864870802562, "grad_norm": 0.051595691591501236, "learning_rate": 6.840462464489726e-06, "loss": 0.04011797904968262, "step": 5216 }, { "epoch": 0.7053216838760922, "grad_norm": 0.1215374693274498, "learning_rate": 6.834720118597879e-06, "loss": 0.06545579433441162, "step": 5217 }, { "epoch": 0.705456880671928, "grad_norm": 0.08781389892101288, "learning_rate": 6.828979472760846e-06, "loss": 0.07096457481384277, "step": 5218 }, { "epoch": 0.705592077467764, "grad_norm": 0.06484188139438629, "learning_rate": 6.823240528173858e-06, "loss": 0.06389856338500977, "step": 5219 }, { "epoch": 0.7057272742635999, "grad_norm": 0.16192342340946198, "learning_rate": 6.817503286031797e-06, "loss": 0.061789512634277344, "step": 5220 }, { "epoch": 0.7058624710594359, "grad_norm": 0.05168599635362625, "learning_rate": 6.811767747529181e-06, "loss": 0.04684257507324219, "step": 5221 }, { "epoch": 0.7059976678552718, "grad_norm": 0.0708392783999443, "learning_rate": 6.806033913860195e-06, "loss": 0.06925821304321289, "step": 5222 }, { "epoch": 0.7061328646511078, "grad_norm": 0.08808889985084534, "learning_rate": 6.800301786218634e-06, "loss": 0.04602479934692383, "step": 5223 }, { "epoch": 0.7062680614469438, "grad_norm": 0.08793748915195465, "learning_rate": 6.794571365797971e-06, "loss": 0.04866921901702881, "step": 5224 }, { "epoch": 0.7064032582427796, "grad_norm": 0.1303250938653946, "learning_rate": 6.788842653791308e-06, "loss": 0.07110881805419922, "step": 5225 }, { "epoch": 0.7065384550386156, "grad_norm": 0.05211092531681061, "learning_rate": 6.7831156513913864e-06, "loss": 0.0505070686340332, "step": 5226 }, { "epoch": 0.7066736518344515, "grad_norm": 0.13809941709041595, "learning_rate": 6.777390359790614e-06, "loss": 0.08930253982543945, "step": 5227 }, { "epoch": 0.7068088486302875, "grad_norm": 0.16696114838123322, "learning_rate": 6.771666780181004e-06, "loss": 0.06232607364654541, "step": 5228 }, { "epoch": 0.7069440454261234, "grad_norm": 0.09382566064596176, "learning_rate": 6.765944913754258e-06, "loss": 0.02843475341796875, "step": 5229 }, { "epoch": 0.7070792422219594, "grad_norm": 0.07554718852043152, "learning_rate": 6.7602247617016885e-06, "loss": 0.053112149238586426, "step": 5230 }, { "epoch": 0.7072144390177952, "grad_norm": 0.12718260288238525, "learning_rate": 6.754506325214265e-06, "loss": 0.05199909210205078, "step": 5231 }, { "epoch": 0.7073496358136312, "grad_norm": 0.1223500519990921, "learning_rate": 6.748789605482593e-06, "loss": 0.08435928821563721, "step": 5232 }, { "epoch": 0.7074848326094672, "grad_norm": 0.17492032051086426, "learning_rate": 6.743074603696922e-06, "loss": 0.06446027755737305, "step": 5233 }, { "epoch": 0.7076200294053031, "grad_norm": 0.14729131758213043, "learning_rate": 6.737361321047155e-06, "loss": 0.0760049819946289, "step": 5234 }, { "epoch": 0.7077552262011391, "grad_norm": 0.051025256514549255, "learning_rate": 6.731649758722823e-06, "loss": 0.03641080856323242, "step": 5235 }, { "epoch": 0.707890422996975, "grad_norm": 0.16203363239765167, "learning_rate": 6.725939917913102e-06, "loss": 0.0928335189819336, "step": 5236 }, { "epoch": 0.7080256197928109, "grad_norm": 0.04803748428821564, "learning_rate": 6.720231799806814e-06, "loss": 0.03930628299713135, "step": 5237 }, { "epoch": 0.7081608165886468, "grad_norm": 0.07853997498750687, "learning_rate": 6.7145254055924136e-06, "loss": 0.056729793548583984, "step": 5238 }, { "epoch": 0.7082960133844828, "grad_norm": 0.06909221410751343, "learning_rate": 6.70882073645801e-06, "loss": 0.06695842742919922, "step": 5239 }, { "epoch": 0.7084312101803187, "grad_norm": 0.1401662975549698, "learning_rate": 6.703117793591346e-06, "loss": 0.06996715068817139, "step": 5240 }, { "epoch": 0.7085664069761547, "grad_norm": 0.08216295391321182, "learning_rate": 6.6974165781798e-06, "loss": 0.08788108825683594, "step": 5241 }, { "epoch": 0.7087016037719907, "grad_norm": 0.08670076727867126, "learning_rate": 6.691717091410398e-06, "loss": 0.08666515350341797, "step": 5242 }, { "epoch": 0.7088368005678265, "grad_norm": 0.08967714756727219, "learning_rate": 6.686019334469797e-06, "loss": 0.06845802068710327, "step": 5243 }, { "epoch": 0.7089719973636625, "grad_norm": 0.07060317695140839, "learning_rate": 6.680323308544312e-06, "loss": 0.06550812721252441, "step": 5244 }, { "epoch": 0.7091071941594984, "grad_norm": 0.07658001035451889, "learning_rate": 6.674629014819879e-06, "loss": 0.055979013442993164, "step": 5245 }, { "epoch": 0.7092423909553344, "grad_norm": 0.13820207118988037, "learning_rate": 6.668936454482082e-06, "loss": 0.07737338542938232, "step": 5246 }, { "epoch": 0.7093775877511703, "grad_norm": 0.12385187298059464, "learning_rate": 6.6632456287161426e-06, "loss": 0.06941461563110352, "step": 5247 }, { "epoch": 0.7095127845470063, "grad_norm": 0.14194141328334808, "learning_rate": 6.657556538706914e-06, "loss": 0.09146249294281006, "step": 5248 }, { "epoch": 0.7096479813428421, "grad_norm": 0.07196071743965149, "learning_rate": 6.651869185638907e-06, "loss": 0.0751657485961914, "step": 5249 }, { "epoch": 0.7097831781386781, "grad_norm": 0.07490207999944687, "learning_rate": 6.646183570696253e-06, "loss": 0.08531475067138672, "step": 5250 }, { "epoch": 0.709918374934514, "grad_norm": 0.11638922244310379, "learning_rate": 6.6404996950627275e-06, "loss": 0.06609213352203369, "step": 5251 }, { "epoch": 0.71005357173035, "grad_norm": 0.08221335709095001, "learning_rate": 6.634817559921744e-06, "loss": 0.06448519229888916, "step": 5252 }, { "epoch": 0.710188768526186, "grad_norm": 0.09152976423501968, "learning_rate": 6.629137166456348e-06, "loss": 0.057424187660217285, "step": 5253 }, { "epoch": 0.7103239653220219, "grad_norm": 0.10937793552875519, "learning_rate": 6.623458515849244e-06, "loss": 0.08165574073791504, "step": 5254 }, { "epoch": 0.7104591621178578, "grad_norm": 0.11498243361711502, "learning_rate": 6.6177816092827354e-06, "loss": 0.04826921224594116, "step": 5255 }, { "epoch": 0.7105943589136937, "grad_norm": 0.04066452011466026, "learning_rate": 6.6121064479388e-06, "loss": 0.03294992446899414, "step": 5256 }, { "epoch": 0.7107295557095297, "grad_norm": 0.14823363721370697, "learning_rate": 6.606433032999031e-06, "loss": 0.07320308685302734, "step": 5257 }, { "epoch": 0.7108647525053656, "grad_norm": 0.0708981454372406, "learning_rate": 6.60076136564466e-06, "loss": 0.04673290252685547, "step": 5258 }, { "epoch": 0.7109999493012016, "grad_norm": 0.07480026036500931, "learning_rate": 6.595091447056574e-06, "loss": 0.07643890380859375, "step": 5259 }, { "epoch": 0.7111351460970375, "grad_norm": 0.06935848295688629, "learning_rate": 6.589423278415259e-06, "loss": 0.052535563707351685, "step": 5260 }, { "epoch": 0.7112703428928734, "grad_norm": 0.07488243281841278, "learning_rate": 6.583756860900872e-06, "loss": 0.061678290367126465, "step": 5261 }, { "epoch": 0.7114055396887093, "grad_norm": 0.03968992084264755, "learning_rate": 6.578092195693187e-06, "loss": 0.031808674335479736, "step": 5262 }, { "epoch": 0.7115407364845453, "grad_norm": 0.12007761746644974, "learning_rate": 6.572429283971614e-06, "loss": 0.08109569549560547, "step": 5263 }, { "epoch": 0.7116759332803813, "grad_norm": 0.07446523755788803, "learning_rate": 6.566768126915215e-06, "loss": 0.06934309005737305, "step": 5264 }, { "epoch": 0.7118111300762172, "grad_norm": 0.06147582828998566, "learning_rate": 6.561108725702653e-06, "loss": 0.06867790222167969, "step": 5265 }, { "epoch": 0.7119463268720532, "grad_norm": 0.05361782759428024, "learning_rate": 6.555451081512262e-06, "loss": 0.03309512138366699, "step": 5266 }, { "epoch": 0.712081523667889, "grad_norm": 0.09725134074687958, "learning_rate": 6.549795195521988e-06, "loss": 0.05037188529968262, "step": 5267 }, { "epoch": 0.712216720463725, "grad_norm": 0.06769993156194687, "learning_rate": 6.544141068909416e-06, "loss": 0.06625199317932129, "step": 5268 }, { "epoch": 0.7123519172595609, "grad_norm": 0.09040994197130203, "learning_rate": 6.5384887028517645e-06, "loss": 0.0701906681060791, "step": 5269 }, { "epoch": 0.7124871140553969, "grad_norm": 0.09078021347522736, "learning_rate": 6.532838098525883e-06, "loss": 0.07329720258712769, "step": 5270 }, { "epoch": 0.7126223108512328, "grad_norm": 0.10193806886672974, "learning_rate": 6.5271892571082655e-06, "loss": 0.05244690179824829, "step": 5271 }, { "epoch": 0.7127575076470688, "grad_norm": 0.05036259442567825, "learning_rate": 6.521542179775029e-06, "loss": 0.061924099922180176, "step": 5272 }, { "epoch": 0.7128927044429048, "grad_norm": 0.04565594345331192, "learning_rate": 6.515896867701924e-06, "loss": 0.033681511878967285, "step": 5273 }, { "epoch": 0.7130279012387406, "grad_norm": 0.06286714971065521, "learning_rate": 6.510253322064333e-06, "loss": 0.057758212089538574, "step": 5274 }, { "epoch": 0.7131630980345766, "grad_norm": 0.09807729721069336, "learning_rate": 6.504611544037267e-06, "loss": 0.07482337951660156, "step": 5275 }, { "epoch": 0.7132982948304125, "grad_norm": 0.08780847489833832, "learning_rate": 6.498971534795387e-06, "loss": 0.06775462627410889, "step": 5276 }, { "epoch": 0.7134334916262485, "grad_norm": 0.0908341109752655, "learning_rate": 6.493333295512965e-06, "loss": 0.07049250602722168, "step": 5277 }, { "epoch": 0.7135686884220844, "grad_norm": 0.07731003314256668, "learning_rate": 6.487696827363916e-06, "loss": 0.06020742654800415, "step": 5278 }, { "epoch": 0.7137038852179204, "grad_norm": 0.08584830164909363, "learning_rate": 6.48206213152178e-06, "loss": 0.04250943660736084, "step": 5279 }, { "epoch": 0.7138390820137562, "grad_norm": 0.08685700595378876, "learning_rate": 6.476429209159725e-06, "loss": 0.08098793029785156, "step": 5280 }, { "epoch": 0.7139742788095922, "grad_norm": 0.06242678314447403, "learning_rate": 6.470798061450568e-06, "loss": 0.0533905029296875, "step": 5281 }, { "epoch": 0.7141094756054281, "grad_norm": 0.09067007154226303, "learning_rate": 6.465168689566738e-06, "loss": 0.07322447001934052, "step": 5282 }, { "epoch": 0.7142446724012641, "grad_norm": 0.05833905190229416, "learning_rate": 6.4595410946803e-06, "loss": 0.062474608421325684, "step": 5283 }, { "epoch": 0.7143798691971001, "grad_norm": 0.10698240250349045, "learning_rate": 6.453915277962948e-06, "loss": 0.052127957344055176, "step": 5284 }, { "epoch": 0.714515065992936, "grad_norm": 0.0889761745929718, "learning_rate": 6.4482912405860055e-06, "loss": 0.05313444137573242, "step": 5285 }, { "epoch": 0.7146502627887719, "grad_norm": 0.08653388172388077, "learning_rate": 6.442668983720434e-06, "loss": 0.08327579498291016, "step": 5286 }, { "epoch": 0.7147854595846078, "grad_norm": 0.14286509156227112, "learning_rate": 6.437048508536813e-06, "loss": 0.08691072463989258, "step": 5287 }, { "epoch": 0.7149206563804438, "grad_norm": 0.05450690537691116, "learning_rate": 6.431429816205357e-06, "loss": 0.0380479097366333, "step": 5288 }, { "epoch": 0.7150558531762797, "grad_norm": 0.1602146029472351, "learning_rate": 6.425812907895904e-06, "loss": 0.07602882385253906, "step": 5289 }, { "epoch": 0.7151910499721157, "grad_norm": 0.04617540165781975, "learning_rate": 6.420197784777925e-06, "loss": 0.036473244428634644, "step": 5290 }, { "epoch": 0.7153262467679516, "grad_norm": 0.06446775794029236, "learning_rate": 6.414584448020528e-06, "loss": 0.046510934829711914, "step": 5291 }, { "epoch": 0.7154614435637875, "grad_norm": 0.18855026364326477, "learning_rate": 6.408972898792423e-06, "loss": 0.07486724853515625, "step": 5292 }, { "epoch": 0.7155966403596234, "grad_norm": 0.0894274190068245, "learning_rate": 6.4033631382619766e-06, "loss": 0.05085396766662598, "step": 5293 }, { "epoch": 0.7157318371554594, "grad_norm": 0.09192391484975815, "learning_rate": 6.397755167597171e-06, "loss": 0.05952167510986328, "step": 5294 }, { "epoch": 0.7158670339512954, "grad_norm": 0.07320386916399002, "learning_rate": 6.392148987965603e-06, "loss": 0.06919527053833008, "step": 5295 }, { "epoch": 0.7160022307471313, "grad_norm": 0.16711179912090302, "learning_rate": 6.386544600534532e-06, "loss": 0.06976819038391113, "step": 5296 }, { "epoch": 0.7161374275429673, "grad_norm": 0.07418635487556458, "learning_rate": 6.3809420064707965e-06, "loss": 0.05153822898864746, "step": 5297 }, { "epoch": 0.7162726243388031, "grad_norm": 0.05701493099331856, "learning_rate": 6.375341206940902e-06, "loss": 0.04898262023925781, "step": 5298 }, { "epoch": 0.7164078211346391, "grad_norm": 0.07073450833559036, "learning_rate": 6.369742203110962e-06, "loss": 0.07201194763183594, "step": 5299 }, { "epoch": 0.716543017930475, "grad_norm": 0.05002342909574509, "learning_rate": 6.364144996146716e-06, "loss": 0.04228997230529785, "step": 5300 }, { "epoch": 0.716678214726311, "grad_norm": 0.10059481114149094, "learning_rate": 6.358549587213534e-06, "loss": 0.05995523929595947, "step": 5301 }, { "epoch": 0.716813411522147, "grad_norm": 0.10729820281267166, "learning_rate": 6.352955977476405e-06, "loss": 0.05074715614318848, "step": 5302 }, { "epoch": 0.7169486083179829, "grad_norm": 0.042587559670209885, "learning_rate": 6.347364168099959e-06, "loss": 0.043564677238464355, "step": 5303 }, { "epoch": 0.7170838051138188, "grad_norm": 0.0937381163239479, "learning_rate": 6.341774160248435e-06, "loss": 0.06353425979614258, "step": 5304 }, { "epoch": 0.7172190019096547, "grad_norm": 0.10774174332618713, "learning_rate": 6.3361859550857e-06, "loss": 0.06760931015014648, "step": 5305 }, { "epoch": 0.7173541987054907, "grad_norm": 0.09925224632024765, "learning_rate": 6.330599553775252e-06, "loss": 0.06612062454223633, "step": 5306 }, { "epoch": 0.7174893955013266, "grad_norm": 0.0786658525466919, "learning_rate": 6.325014957480203e-06, "loss": 0.04143184423446655, "step": 5307 }, { "epoch": 0.7176245922971626, "grad_norm": 0.1537683606147766, "learning_rate": 6.319432167363305e-06, "loss": 0.08494281768798828, "step": 5308 }, { "epoch": 0.7177597890929985, "grad_norm": 0.0969962477684021, "learning_rate": 6.313851184586918e-06, "loss": 0.0869135856628418, "step": 5309 }, { "epoch": 0.7178949858888344, "grad_norm": 0.07230785489082336, "learning_rate": 6.308272010313037e-06, "loss": 0.05916094779968262, "step": 5310 }, { "epoch": 0.7180301826846703, "grad_norm": 0.04013494774699211, "learning_rate": 6.302694645703273e-06, "loss": 0.039933621883392334, "step": 5311 }, { "epoch": 0.7181653794805063, "grad_norm": 0.2185848355293274, "learning_rate": 6.297119091918857e-06, "loss": 0.09819304943084717, "step": 5312 }, { "epoch": 0.7183005762763423, "grad_norm": 0.09592147916555405, "learning_rate": 6.2915453501206634e-06, "loss": 0.04529118537902832, "step": 5313 }, { "epoch": 0.7184357730721782, "grad_norm": 0.08197664469480515, "learning_rate": 6.285973421469166e-06, "loss": 0.07849514484405518, "step": 5314 }, { "epoch": 0.7185709698680142, "grad_norm": 0.2049938142299652, "learning_rate": 6.28040330712447e-06, "loss": 0.08316230773925781, "step": 5315 }, { "epoch": 0.7187061666638501, "grad_norm": 0.06143787503242493, "learning_rate": 6.274835008246304e-06, "loss": 0.04585576057434082, "step": 5316 }, { "epoch": 0.718841363459686, "grad_norm": 0.061775028705596924, "learning_rate": 6.269268525994013e-06, "loss": 0.06304037570953369, "step": 5317 }, { "epoch": 0.7189765602555219, "grad_norm": 0.11862292885780334, "learning_rate": 6.263703861526578e-06, "loss": 0.07857632637023926, "step": 5318 }, { "epoch": 0.7191117570513579, "grad_norm": 0.037855781614780426, "learning_rate": 6.258141016002587e-06, "loss": 0.035109519958496094, "step": 5319 }, { "epoch": 0.7192469538471938, "grad_norm": 0.09372851252555847, "learning_rate": 6.252579990580254e-06, "loss": 0.057793617248535156, "step": 5320 }, { "epoch": 0.7193821506430298, "grad_norm": 0.055855777114629745, "learning_rate": 6.247020786417412e-06, "loss": 0.06108808517456055, "step": 5321 }, { "epoch": 0.7195173474388658, "grad_norm": 0.10095803439617157, "learning_rate": 6.241463404671516e-06, "loss": 0.06770330667495728, "step": 5322 }, { "epoch": 0.7196525442347016, "grad_norm": 0.061530306935310364, "learning_rate": 6.235907846499655e-06, "loss": 0.06612920761108398, "step": 5323 }, { "epoch": 0.7197877410305376, "grad_norm": 0.044040828943252563, "learning_rate": 6.230354113058505e-06, "loss": 0.04344582557678223, "step": 5324 }, { "epoch": 0.7199229378263735, "grad_norm": 0.06790877878665924, "learning_rate": 6.2248022055044e-06, "loss": 0.04950857162475586, "step": 5325 }, { "epoch": 0.7200581346222095, "grad_norm": 0.07887686043977737, "learning_rate": 6.219252124993271e-06, "loss": 0.05790352821350098, "step": 5326 }, { "epoch": 0.7201933314180454, "grad_norm": 0.05709953233599663, "learning_rate": 6.213703872680668e-06, "loss": 0.07248306274414062, "step": 5327 }, { "epoch": 0.7203285282138814, "grad_norm": 0.09998508542776108, "learning_rate": 6.208157449721785e-06, "loss": 0.07018446922302246, "step": 5328 }, { "epoch": 0.7204637250097172, "grad_norm": 0.14174021780490875, "learning_rate": 6.202612857271393e-06, "loss": 0.08308422565460205, "step": 5329 }, { "epoch": 0.7205989218055532, "grad_norm": 0.05510513857007027, "learning_rate": 6.197070096483923e-06, "loss": 0.05639457702636719, "step": 5330 }, { "epoch": 0.7207341186013891, "grad_norm": 0.06637461483478546, "learning_rate": 6.191529168513403e-06, "loss": 0.06119728088378906, "step": 5331 }, { "epoch": 0.7208693153972251, "grad_norm": 0.07741259783506393, "learning_rate": 6.1859900745134755e-06, "loss": 0.0739288330078125, "step": 5332 }, { "epoch": 0.721004512193061, "grad_norm": 0.0876217633485794, "learning_rate": 6.180452815637429e-06, "loss": 0.07354676723480225, "step": 5333 }, { "epoch": 0.721139708988897, "grad_norm": 0.1901162564754486, "learning_rate": 6.174917393038126e-06, "loss": 0.07827091217041016, "step": 5334 }, { "epoch": 0.7212749057847329, "grad_norm": 0.08351711928844452, "learning_rate": 6.169383807868088e-06, "loss": 0.06247431039810181, "step": 5335 }, { "epoch": 0.7214101025805688, "grad_norm": 0.10454121977090836, "learning_rate": 6.163852061279432e-06, "loss": 0.05945134162902832, "step": 5336 }, { "epoch": 0.7215452993764048, "grad_norm": 0.09074094146490097, "learning_rate": 6.158322154423897e-06, "loss": 0.06362414360046387, "step": 5337 }, { "epoch": 0.7216804961722407, "grad_norm": 0.041065674275159836, "learning_rate": 6.15279408845284e-06, "loss": 0.03328752517700195, "step": 5338 }, { "epoch": 0.7218156929680767, "grad_norm": 0.1519712507724762, "learning_rate": 6.147267864517226e-06, "loss": 0.05492144823074341, "step": 5339 }, { "epoch": 0.7219508897639126, "grad_norm": 0.06271402537822723, "learning_rate": 6.141743483767658e-06, "loss": 0.03983864188194275, "step": 5340 }, { "epoch": 0.7220860865597485, "grad_norm": 0.070847287774086, "learning_rate": 6.136220947354333e-06, "loss": 0.07993173599243164, "step": 5341 }, { "epoch": 0.7222212833555844, "grad_norm": 0.10266253352165222, "learning_rate": 6.130700256427075e-06, "loss": 0.0762782096862793, "step": 5342 }, { "epoch": 0.7223564801514204, "grad_norm": 0.0735422745347023, "learning_rate": 6.1251814121353204e-06, "loss": 0.06935369968414307, "step": 5343 }, { "epoch": 0.7224916769472564, "grad_norm": 0.05423269793391228, "learning_rate": 6.1196644156281175e-06, "loss": 0.05670738220214844, "step": 5344 }, { "epoch": 0.7226268737430923, "grad_norm": 0.04924255609512329, "learning_rate": 6.114149268054143e-06, "loss": 0.035392045974731445, "step": 5345 }, { "epoch": 0.7227620705389283, "grad_norm": 0.17496107518672943, "learning_rate": 6.108635970561679e-06, "loss": 0.09158384799957275, "step": 5346 }, { "epoch": 0.7228972673347641, "grad_norm": 0.04416352137923241, "learning_rate": 6.103124524298617e-06, "loss": 0.04716068506240845, "step": 5347 }, { "epoch": 0.7230324641306001, "grad_norm": 0.05858367681503296, "learning_rate": 6.097614930412475e-06, "loss": 0.050646618008613586, "step": 5348 }, { "epoch": 0.723167660926436, "grad_norm": 0.09099132567644119, "learning_rate": 6.092107190050371e-06, "loss": 0.06693851947784424, "step": 5349 }, { "epoch": 0.723302857722272, "grad_norm": 0.07344671338796616, "learning_rate": 6.086601304359059e-06, "loss": 0.05541038513183594, "step": 5350 }, { "epoch": 0.7234380545181079, "grad_norm": 0.04452231898903847, "learning_rate": 6.081097274484887e-06, "loss": 0.04966890811920166, "step": 5351 }, { "epoch": 0.7235732513139439, "grad_norm": 0.047814320772886276, "learning_rate": 6.075595101573825e-06, "loss": 0.04716920852661133, "step": 5352 }, { "epoch": 0.7237084481097797, "grad_norm": 0.07035131752490997, "learning_rate": 6.070094786771451e-06, "loss": 0.05995357036590576, "step": 5353 }, { "epoch": 0.7238436449056157, "grad_norm": 0.09810557216405869, "learning_rate": 6.06459633122296e-06, "loss": 0.07535481452941895, "step": 5354 }, { "epoch": 0.7239788417014517, "grad_norm": 0.05407467484474182, "learning_rate": 6.059099736073166e-06, "loss": 0.054631948471069336, "step": 5355 }, { "epoch": 0.7241140384972876, "grad_norm": 0.0503917932510376, "learning_rate": 6.0536050024664865e-06, "loss": 0.05037069320678711, "step": 5356 }, { "epoch": 0.7242492352931236, "grad_norm": 0.086210235953331, "learning_rate": 6.048112131546953e-06, "loss": 0.07773375511169434, "step": 5357 }, { "epoch": 0.7243844320889595, "grad_norm": 0.10230488330125809, "learning_rate": 6.0426211244582105e-06, "loss": 0.08366107940673828, "step": 5358 }, { "epoch": 0.7245196288847955, "grad_norm": 0.09285559505224228, "learning_rate": 6.03713198234351e-06, "loss": 0.08132076263427734, "step": 5359 }, { "epoch": 0.7246548256806313, "grad_norm": 0.06652617454528809, "learning_rate": 6.0316447063457395e-06, "loss": 0.05439925193786621, "step": 5360 }, { "epoch": 0.7247900224764673, "grad_norm": 0.09366998821496964, "learning_rate": 6.026159297607356e-06, "loss": 0.07218027114868164, "step": 5361 }, { "epoch": 0.7249252192723032, "grad_norm": 0.06269343197345734, "learning_rate": 6.020675757270466e-06, "loss": 0.05474060773849487, "step": 5362 }, { "epoch": 0.7250604160681392, "grad_norm": 0.05139464512467384, "learning_rate": 6.015194086476766e-06, "loss": 0.04461979866027832, "step": 5363 }, { "epoch": 0.7251956128639752, "grad_norm": 0.09796824306249619, "learning_rate": 6.009714286367565e-06, "loss": 0.06590306758880615, "step": 5364 }, { "epoch": 0.7253308096598111, "grad_norm": 0.062123388051986694, "learning_rate": 6.004236358083802e-06, "loss": 0.04872941970825195, "step": 5365 }, { "epoch": 0.725466006455647, "grad_norm": 0.10065522789955139, "learning_rate": 5.998760302765989e-06, "loss": 0.04776954650878906, "step": 5366 }, { "epoch": 0.7256012032514829, "grad_norm": 0.12137159705162048, "learning_rate": 5.993286121554289e-06, "loss": 0.09652739763259888, "step": 5367 }, { "epoch": 0.7257364000473189, "grad_norm": 0.10050668567419052, "learning_rate": 5.987813815588447e-06, "loss": 0.07247316837310791, "step": 5368 }, { "epoch": 0.7258715968431548, "grad_norm": 0.10011740028858185, "learning_rate": 5.982343386007827e-06, "loss": 0.06283712387084961, "step": 5369 }, { "epoch": 0.7260067936389908, "grad_norm": 0.06181425228714943, "learning_rate": 5.976874833951404e-06, "loss": 0.037781357765197754, "step": 5370 }, { "epoch": 0.7261419904348267, "grad_norm": 0.046439073979854584, "learning_rate": 5.971408160557751e-06, "loss": 0.043087005615234375, "step": 5371 }, { "epoch": 0.7262771872306626, "grad_norm": 0.12095876038074493, "learning_rate": 5.965943366965069e-06, "loss": 0.07113271951675415, "step": 5372 }, { "epoch": 0.7264123840264985, "grad_norm": 0.09663762897253036, "learning_rate": 5.960480454311155e-06, "loss": 0.08069181442260742, "step": 5373 }, { "epoch": 0.7265475808223345, "grad_norm": 0.11518411338329315, "learning_rate": 5.955019423733416e-06, "loss": 0.06019997596740723, "step": 5374 }, { "epoch": 0.7266827776181705, "grad_norm": 0.05858995020389557, "learning_rate": 5.949560276368866e-06, "loss": 0.06565618515014648, "step": 5375 }, { "epoch": 0.7268179744140064, "grad_norm": 0.1236773282289505, "learning_rate": 5.9441030133541235e-06, "loss": 0.050461649894714355, "step": 5376 }, { "epoch": 0.7269531712098424, "grad_norm": 0.13728676736354828, "learning_rate": 5.938647635825432e-06, "loss": 0.07996439933776855, "step": 5377 }, { "epoch": 0.7270883680056782, "grad_norm": 0.1156599372625351, "learning_rate": 5.933194144918623e-06, "loss": 0.1054983139038086, "step": 5378 }, { "epoch": 0.7272235648015142, "grad_norm": 0.0730336382985115, "learning_rate": 5.927742541769142e-06, "loss": 0.07008910179138184, "step": 5379 }, { "epoch": 0.7273587615973501, "grad_norm": 0.07192105799913406, "learning_rate": 5.9222928275120445e-06, "loss": 0.04196673631668091, "step": 5380 }, { "epoch": 0.7274939583931861, "grad_norm": 0.054765958338975906, "learning_rate": 5.916845003281983e-06, "loss": 0.061878979206085205, "step": 5381 }, { "epoch": 0.727629155189022, "grad_norm": 0.08369892090559006, "learning_rate": 5.911399070213234e-06, "loss": 0.053739070892333984, "step": 5382 }, { "epoch": 0.727764351984858, "grad_norm": 0.08738262206315994, "learning_rate": 5.905955029439665e-06, "loss": 0.06627261638641357, "step": 5383 }, { "epoch": 0.7278995487806939, "grad_norm": 0.05693286657333374, "learning_rate": 5.900512882094754e-06, "loss": 0.04614543914794922, "step": 5384 }, { "epoch": 0.7280347455765298, "grad_norm": 0.07425574213266373, "learning_rate": 5.8950726293115855e-06, "loss": 0.07261180877685547, "step": 5385 }, { "epoch": 0.7281699423723658, "grad_norm": 0.061569489538669586, "learning_rate": 5.889634272222844e-06, "loss": 0.06549835205078125, "step": 5386 }, { "epoch": 0.7283051391682017, "grad_norm": 0.08389081805944443, "learning_rate": 5.8841978119608345e-06, "loss": 0.05438733100891113, "step": 5387 }, { "epoch": 0.7284403359640377, "grad_norm": 0.04788355156779289, "learning_rate": 5.878763249657452e-06, "loss": 0.04955792427062988, "step": 5388 }, { "epoch": 0.7285755327598736, "grad_norm": 0.09368545562028885, "learning_rate": 5.873330586444202e-06, "loss": 0.06662511825561523, "step": 5389 }, { "epoch": 0.7287107295557095, "grad_norm": 0.08902400732040405, "learning_rate": 5.867899823452193e-06, "loss": 0.06015467643737793, "step": 5390 }, { "epoch": 0.7288459263515454, "grad_norm": 0.046715009957551956, "learning_rate": 5.862470961812133e-06, "loss": 0.03995227813720703, "step": 5391 }, { "epoch": 0.7289811231473814, "grad_norm": 0.17600621283054352, "learning_rate": 5.857044002654357e-06, "loss": 0.06038618087768555, "step": 5392 }, { "epoch": 0.7291163199432174, "grad_norm": 0.052368585020303726, "learning_rate": 5.851618947108764e-06, "loss": 0.04511260986328125, "step": 5393 }, { "epoch": 0.7292515167390533, "grad_norm": 0.0867835059762001, "learning_rate": 5.8461957963048984e-06, "loss": 0.046033382415771484, "step": 5394 }, { "epoch": 0.7293867135348893, "grad_norm": 0.09164764732122421, "learning_rate": 5.840774551371882e-06, "loss": 0.06015515327453613, "step": 5395 }, { "epoch": 0.7295219103307251, "grad_norm": 0.06385641545057297, "learning_rate": 5.8353552134384405e-06, "loss": 0.0616612434387207, "step": 5396 }, { "epoch": 0.7296571071265611, "grad_norm": 0.03947180137038231, "learning_rate": 5.829937783632926e-06, "loss": 0.04448610544204712, "step": 5397 }, { "epoch": 0.729792303922397, "grad_norm": 0.11268360167741776, "learning_rate": 5.824522263083256e-06, "loss": 0.05181407928466797, "step": 5398 }, { "epoch": 0.729927500718233, "grad_norm": 0.08491433411836624, "learning_rate": 5.8191086529169855e-06, "loss": 0.06472170352935791, "step": 5399 }, { "epoch": 0.7300626975140689, "grad_norm": 0.12871123850345612, "learning_rate": 5.813696954261253e-06, "loss": 0.05559062957763672, "step": 5400 }, { "epoch": 0.7301978943099049, "grad_norm": 0.08232863247394562, "learning_rate": 5.8082871682428e-06, "loss": 0.06604957580566406, "step": 5401 }, { "epoch": 0.7303330911057407, "grad_norm": 0.15112295746803284, "learning_rate": 5.802879295987975e-06, "loss": 0.06478571891784668, "step": 5402 }, { "epoch": 0.7304682879015767, "grad_norm": 0.10919829457998276, "learning_rate": 5.797473338622722e-06, "loss": 0.08009648323059082, "step": 5403 }, { "epoch": 0.7306034846974127, "grad_norm": 0.10116737335920334, "learning_rate": 5.792069297272599e-06, "loss": 0.06067085266113281, "step": 5404 }, { "epoch": 0.7307386814932486, "grad_norm": 0.07250949740409851, "learning_rate": 5.7866671730627485e-06, "loss": 0.04123795032501221, "step": 5405 }, { "epoch": 0.7308738782890846, "grad_norm": 0.06884024292230606, "learning_rate": 5.781266967117925e-06, "loss": 0.06620335578918457, "step": 5406 }, { "epoch": 0.7310090750849205, "grad_norm": 0.06492038071155548, "learning_rate": 5.7758686805624815e-06, "loss": 0.06308764219284058, "step": 5407 }, { "epoch": 0.7311442718807565, "grad_norm": 0.08034491539001465, "learning_rate": 5.7704723145203605e-06, "loss": 0.05358099937438965, "step": 5408 }, { "epoch": 0.7312794686765923, "grad_norm": 0.08367525041103363, "learning_rate": 5.765077870115126e-06, "loss": 0.05752992630004883, "step": 5409 }, { "epoch": 0.7314146654724283, "grad_norm": 0.07757303863763809, "learning_rate": 5.759685348469928e-06, "loss": 0.07065486907958984, "step": 5410 }, { "epoch": 0.7315498622682642, "grad_norm": 0.06057950481772423, "learning_rate": 5.754294750707514e-06, "loss": 0.04633328318595886, "step": 5411 }, { "epoch": 0.7316850590641002, "grad_norm": 0.07146965712308884, "learning_rate": 5.748906077950237e-06, "loss": 0.07584881782531738, "step": 5412 }, { "epoch": 0.7318202558599362, "grad_norm": 0.11002994328737259, "learning_rate": 5.743519331320042e-06, "loss": 0.06497859954833984, "step": 5413 }, { "epoch": 0.7319554526557721, "grad_norm": 0.037900302559137344, "learning_rate": 5.73813451193849e-06, "loss": 0.03794050216674805, "step": 5414 }, { "epoch": 0.732090649451608, "grad_norm": 0.05841132253408432, "learning_rate": 5.7327516209267225e-06, "loss": 0.052388131618499756, "step": 5415 }, { "epoch": 0.7322258462474439, "grad_norm": 0.06615713983774185, "learning_rate": 5.727370659405486e-06, "loss": 0.06970357894897461, "step": 5416 }, { "epoch": 0.7323610430432799, "grad_norm": 0.1121114194393158, "learning_rate": 5.7219916284951265e-06, "loss": 0.07740664482116699, "step": 5417 }, { "epoch": 0.7324962398391158, "grad_norm": 0.04503386840224266, "learning_rate": 5.716614529315582e-06, "loss": 0.043839067220687866, "step": 5418 }, { "epoch": 0.7326314366349518, "grad_norm": 0.08000829815864563, "learning_rate": 5.711239362986401e-06, "loss": 0.06727242469787598, "step": 5419 }, { "epoch": 0.7327666334307877, "grad_norm": 0.06890582293272018, "learning_rate": 5.705866130626719e-06, "loss": 0.05676770210266113, "step": 5420 }, { "epoch": 0.7329018302266236, "grad_norm": 0.11593379080295563, "learning_rate": 5.700494833355271e-06, "loss": 0.0728616714477539, "step": 5421 }, { "epoch": 0.7330370270224595, "grad_norm": 0.10198880732059479, "learning_rate": 5.69512547229039e-06, "loss": 0.07434606552124023, "step": 5422 }, { "epoch": 0.7331722238182955, "grad_norm": 0.03199901804327965, "learning_rate": 5.689758048550001e-06, "loss": 0.02387791872024536, "step": 5423 }, { "epoch": 0.7333074206141315, "grad_norm": 0.07188189029693604, "learning_rate": 5.684392563251644e-06, "loss": 0.07494354248046875, "step": 5424 }, { "epoch": 0.7334426174099674, "grad_norm": 0.08367427438497543, "learning_rate": 5.679029017512422e-06, "loss": 0.06900453567504883, "step": 5425 }, { "epoch": 0.7335778142058034, "grad_norm": 0.045845434069633484, "learning_rate": 5.6736674124490684e-06, "loss": 0.04075121879577637, "step": 5426 }, { "epoch": 0.7337130110016392, "grad_norm": 0.06971453130245209, "learning_rate": 5.6683077491778935e-06, "loss": 0.058294057846069336, "step": 5427 }, { "epoch": 0.7338482077974752, "grad_norm": 0.08261123299598694, "learning_rate": 5.6629500288148044e-06, "loss": 0.0534512996673584, "step": 5428 }, { "epoch": 0.7339834045933111, "grad_norm": 0.07943519204854965, "learning_rate": 5.657594252475319e-06, "loss": 0.06360459327697754, "step": 5429 }, { "epoch": 0.7341186013891471, "grad_norm": 0.058413002640008926, "learning_rate": 5.652240421274521e-06, "loss": 0.06244230270385742, "step": 5430 }, { "epoch": 0.734253798184983, "grad_norm": 0.1283576786518097, "learning_rate": 5.646888536327121e-06, "loss": 0.09134674072265625, "step": 5431 }, { "epoch": 0.734388994980819, "grad_norm": 0.0840093344449997, "learning_rate": 5.641538598747403e-06, "loss": 0.06548523902893066, "step": 5432 }, { "epoch": 0.7345241917766548, "grad_norm": 0.07205220311880112, "learning_rate": 5.6361906096492495e-06, "loss": 0.03668212890625, "step": 5433 }, { "epoch": 0.7346593885724908, "grad_norm": 0.06774645298719406, "learning_rate": 5.630844570146157e-06, "loss": 0.04929041862487793, "step": 5434 }, { "epoch": 0.7347945853683268, "grad_norm": 0.15659502148628235, "learning_rate": 5.625500481351176e-06, "loss": 0.06994763016700745, "step": 5435 }, { "epoch": 0.7349297821641627, "grad_norm": 0.09570881724357605, "learning_rate": 5.6201583443769895e-06, "loss": 0.06996774673461914, "step": 5436 }, { "epoch": 0.7350649789599987, "grad_norm": 0.07199430465698242, "learning_rate": 5.614818160335857e-06, "loss": 0.05830782651901245, "step": 5437 }, { "epoch": 0.7352001757558346, "grad_norm": 0.15612199902534485, "learning_rate": 5.6094799303396315e-06, "loss": 0.09366416931152344, "step": 5438 }, { "epoch": 0.7353353725516705, "grad_norm": 0.1137339323759079, "learning_rate": 5.6041436554997595e-06, "loss": 0.04423785209655762, "step": 5439 }, { "epoch": 0.7354705693475064, "grad_norm": 0.077876515686512, "learning_rate": 5.598809336927278e-06, "loss": 0.06204056739807129, "step": 5440 }, { "epoch": 0.7356057661433424, "grad_norm": 0.06212661787867546, "learning_rate": 5.5934769757328325e-06, "loss": 0.048281192779541016, "step": 5441 }, { "epoch": 0.7357409629391783, "grad_norm": 0.04554282873868942, "learning_rate": 5.588146573026642e-06, "loss": 0.044075578451156616, "step": 5442 }, { "epoch": 0.7358761597350143, "grad_norm": 0.10456150025129318, "learning_rate": 5.582818129918525e-06, "loss": 0.08704376220703125, "step": 5443 }, { "epoch": 0.7360113565308503, "grad_norm": 0.14092624187469482, "learning_rate": 5.5774916475178915e-06, "loss": 0.1082000732421875, "step": 5444 }, { "epoch": 0.7361465533266861, "grad_norm": 0.07932636141777039, "learning_rate": 5.572167126933738e-06, "loss": 0.04230976104736328, "step": 5445 }, { "epoch": 0.7362817501225221, "grad_norm": 0.07973039895296097, "learning_rate": 5.566844569274669e-06, "loss": 0.062274277210235596, "step": 5446 }, { "epoch": 0.736416946918358, "grad_norm": 0.06242867186665535, "learning_rate": 5.5615239756488665e-06, "loss": 0.04746294021606445, "step": 5447 }, { "epoch": 0.736552143714194, "grad_norm": 0.05520540103316307, "learning_rate": 5.556205347164104e-06, "loss": 0.0515783429145813, "step": 5448 }, { "epoch": 0.7366873405100299, "grad_norm": 0.2551308870315552, "learning_rate": 5.550888684927746e-06, "loss": 0.08831024169921875, "step": 5449 }, { "epoch": 0.7368225373058659, "grad_norm": 0.07695929706096649, "learning_rate": 5.545573990046752e-06, "loss": 0.048056602478027344, "step": 5450 }, { "epoch": 0.7369577341017018, "grad_norm": 0.05442466214299202, "learning_rate": 5.540261263627672e-06, "loss": 0.05111503601074219, "step": 5451 }, { "epoch": 0.7370929308975377, "grad_norm": 0.06580621004104614, "learning_rate": 5.534950506776644e-06, "loss": 0.06391990184783936, "step": 5452 }, { "epoch": 0.7372281276933736, "grad_norm": 0.08065614104270935, "learning_rate": 5.529641720599393e-06, "loss": 0.053240060806274414, "step": 5453 }, { "epoch": 0.7373633244892096, "grad_norm": 0.06250309199094772, "learning_rate": 5.52433490620124e-06, "loss": 0.05691862106323242, "step": 5454 }, { "epoch": 0.7374985212850456, "grad_norm": 0.09920760989189148, "learning_rate": 5.519030064687082e-06, "loss": 0.07890653610229492, "step": 5455 }, { "epoch": 0.7376337180808815, "grad_norm": 0.04598747938871384, "learning_rate": 5.51372719716143e-06, "loss": 0.04904282093048096, "step": 5456 }, { "epoch": 0.7377689148767175, "grad_norm": 0.08992001414299011, "learning_rate": 5.508426304728363e-06, "loss": 0.04861760139465332, "step": 5457 }, { "epoch": 0.7379041116725533, "grad_norm": 0.08290814608335495, "learning_rate": 5.503127388491552e-06, "loss": 0.07291316986083984, "step": 5458 }, { "epoch": 0.7380393084683893, "grad_norm": 0.06295759230852127, "learning_rate": 5.497830449554266e-06, "loss": 0.06519508361816406, "step": 5459 }, { "epoch": 0.7381745052642252, "grad_norm": 0.1149107962846756, "learning_rate": 5.492535489019344e-06, "loss": 0.09826409816741943, "step": 5460 }, { "epoch": 0.7383097020600612, "grad_norm": 0.13201308250427246, "learning_rate": 5.4872425079892454e-06, "loss": 0.07710552215576172, "step": 5461 }, { "epoch": 0.7384448988558971, "grad_norm": 0.08112886548042297, "learning_rate": 5.481951507565973e-06, "loss": 0.06615877151489258, "step": 5462 }, { "epoch": 0.7385800956517331, "grad_norm": 0.04119957610964775, "learning_rate": 5.476662488851159e-06, "loss": 0.040248751640319824, "step": 5463 }, { "epoch": 0.738715292447569, "grad_norm": 0.04800136387348175, "learning_rate": 5.471375452946e-06, "loss": 0.0457518994808197, "step": 5464 }, { "epoch": 0.7388504892434049, "grad_norm": 0.0868966206908226, "learning_rate": 5.466090400951279e-06, "loss": 0.07658815383911133, "step": 5465 }, { "epoch": 0.7389856860392409, "grad_norm": 0.05991123616695404, "learning_rate": 5.460807333967387e-06, "loss": 0.03929173946380615, "step": 5466 }, { "epoch": 0.7391208828350768, "grad_norm": 0.09085798263549805, "learning_rate": 5.455526253094267e-06, "loss": 0.05458378791809082, "step": 5467 }, { "epoch": 0.7392560796309128, "grad_norm": 0.08786561340093613, "learning_rate": 5.450247159431486e-06, "loss": 0.044530630111694336, "step": 5468 }, { "epoch": 0.7393912764267487, "grad_norm": 0.039743077009916306, "learning_rate": 5.44497005407817e-06, "loss": 0.0347599983215332, "step": 5469 }, { "epoch": 0.7395264732225846, "grad_norm": 0.13131289184093475, "learning_rate": 5.439694938133042e-06, "loss": 0.07515549659729004, "step": 5470 }, { "epoch": 0.7396616700184205, "grad_norm": 0.04205523803830147, "learning_rate": 5.434421812694409e-06, "loss": 0.04130291938781738, "step": 5471 }, { "epoch": 0.7397968668142565, "grad_norm": 0.1523616909980774, "learning_rate": 5.4291506788601624e-06, "loss": 0.07195305824279785, "step": 5472 }, { "epoch": 0.7399320636100924, "grad_norm": 0.049770746380090714, "learning_rate": 5.423881537727785e-06, "loss": 0.039507508277893066, "step": 5473 }, { "epoch": 0.7400672604059284, "grad_norm": 0.15474846959114075, "learning_rate": 5.418614390394338e-06, "loss": 0.07103705406188965, "step": 5474 }, { "epoch": 0.7402024572017644, "grad_norm": 0.09364216774702072, "learning_rate": 5.413349237956469e-06, "loss": 0.054895877838134766, "step": 5475 }, { "epoch": 0.7403376539976002, "grad_norm": 0.10518147796392441, "learning_rate": 5.4080860815104125e-06, "loss": 0.06545042991638184, "step": 5476 }, { "epoch": 0.7404728507934362, "grad_norm": 0.10757801681756973, "learning_rate": 5.402824922151977e-06, "loss": 0.07328128814697266, "step": 5477 }, { "epoch": 0.7406080475892721, "grad_norm": 0.045876652002334595, "learning_rate": 5.397565760976577e-06, "loss": 0.03791801631450653, "step": 5478 }, { "epoch": 0.7407432443851081, "grad_norm": 0.09045185893774033, "learning_rate": 5.392308599079193e-06, "loss": 0.053752899169921875, "step": 5479 }, { "epoch": 0.740878441180944, "grad_norm": 0.09355717152357101, "learning_rate": 5.3870534375543916e-06, "loss": 0.05192971229553223, "step": 5480 }, { "epoch": 0.74101363797678, "grad_norm": 0.056373097002506256, "learning_rate": 5.381800277496328e-06, "loss": 0.05078864097595215, "step": 5481 }, { "epoch": 0.7411488347726158, "grad_norm": 0.21935756504535675, "learning_rate": 5.376549119998731e-06, "loss": 0.08102989196777344, "step": 5482 }, { "epoch": 0.7412840315684518, "grad_norm": 0.07524493336677551, "learning_rate": 5.3712999661549314e-06, "loss": 0.06669723987579346, "step": 5483 }, { "epoch": 0.7414192283642878, "grad_norm": 0.057628776878118515, "learning_rate": 5.366052817057826e-06, "loss": 0.053884029388427734, "step": 5484 }, { "epoch": 0.7415544251601237, "grad_norm": 0.0970100685954094, "learning_rate": 5.360807673799899e-06, "loss": 0.07878828048706055, "step": 5485 }, { "epoch": 0.7416896219559597, "grad_norm": 0.09406261146068573, "learning_rate": 5.355564537473214e-06, "loss": 0.043989211320877075, "step": 5486 }, { "epoch": 0.7418248187517956, "grad_norm": 0.10842932015657425, "learning_rate": 5.35032340916942e-06, "loss": 0.07433564215898514, "step": 5487 }, { "epoch": 0.7419600155476315, "grad_norm": 0.060150325298309326, "learning_rate": 5.345084289979755e-06, "loss": 0.03793051838874817, "step": 5488 }, { "epoch": 0.7420952123434674, "grad_norm": 0.08368875086307526, "learning_rate": 5.339847180995026e-06, "loss": 0.07368636131286621, "step": 5489 }, { "epoch": 0.7422304091393034, "grad_norm": 0.07861552387475967, "learning_rate": 5.33461208330563e-06, "loss": 0.06901264190673828, "step": 5490 }, { "epoch": 0.7423656059351393, "grad_norm": 0.06586482375860214, "learning_rate": 5.32937899800154e-06, "loss": 0.062071025371551514, "step": 5491 }, { "epoch": 0.7425008027309753, "grad_norm": 0.060190703719854355, "learning_rate": 5.324147926172307e-06, "loss": 0.049607276916503906, "step": 5492 }, { "epoch": 0.7426359995268113, "grad_norm": 0.055766988545656204, "learning_rate": 5.318918868907084e-06, "loss": 0.043805062770843506, "step": 5493 }, { "epoch": 0.7427711963226471, "grad_norm": 0.09595475345849991, "learning_rate": 5.313691827294568e-06, "loss": 0.04592317342758179, "step": 5494 }, { "epoch": 0.742906393118483, "grad_norm": 0.04057767987251282, "learning_rate": 5.308466802423072e-06, "loss": 0.034522950649261475, "step": 5495 }, { "epoch": 0.743041589914319, "grad_norm": 0.09693080186843872, "learning_rate": 5.303243795380471e-06, "loss": 0.07604217529296875, "step": 5496 }, { "epoch": 0.743176786710155, "grad_norm": 0.12563791871070862, "learning_rate": 5.298022807254215e-06, "loss": 0.052136898040771484, "step": 5497 }, { "epoch": 0.7433119835059909, "grad_norm": 0.0906025841832161, "learning_rate": 5.292803839131358e-06, "loss": 0.0597529411315918, "step": 5498 }, { "epoch": 0.7434471803018269, "grad_norm": 0.045552872121334076, "learning_rate": 5.287586892098496e-06, "loss": 0.04845905303955078, "step": 5499 }, { "epoch": 0.7435823770976628, "grad_norm": 0.09599911421537399, "learning_rate": 5.282371967241842e-06, "loss": 0.0832982063293457, "step": 5500 }, { "epoch": 0.7437175738934987, "grad_norm": 0.0929669588804245, "learning_rate": 5.277159065647164e-06, "loss": 0.06981372833251953, "step": 5501 }, { "epoch": 0.7438527706893346, "grad_norm": 0.06944774091243744, "learning_rate": 5.271948188399814e-06, "loss": 0.03140115737915039, "step": 5502 }, { "epoch": 0.7439879674851706, "grad_norm": 0.06827398389577866, "learning_rate": 5.266739336584735e-06, "loss": 0.04185876250267029, "step": 5503 }, { "epoch": 0.7441231642810066, "grad_norm": 0.09269287437200546, "learning_rate": 5.261532511286422e-06, "loss": 0.07217788696289062, "step": 5504 }, { "epoch": 0.7442583610768425, "grad_norm": 0.15041735768318176, "learning_rate": 5.256327713588977e-06, "loss": 0.07729315757751465, "step": 5505 }, { "epoch": 0.7443935578726785, "grad_norm": 0.04381795600056648, "learning_rate": 5.25112494457606e-06, "loss": 0.03384733200073242, "step": 5506 }, { "epoch": 0.7445287546685143, "grad_norm": 0.09961852431297302, "learning_rate": 5.245924205330919e-06, "loss": 0.05482053756713867, "step": 5507 }, { "epoch": 0.7446639514643503, "grad_norm": 0.06379222869873047, "learning_rate": 5.240725496936373e-06, "loss": 0.047136589884757996, "step": 5508 }, { "epoch": 0.7447991482601862, "grad_norm": 0.04229654371738434, "learning_rate": 5.2355288204748145e-06, "loss": 0.03293609619140625, "step": 5509 }, { "epoch": 0.7449343450560222, "grad_norm": 0.06408032774925232, "learning_rate": 5.230334177028233e-06, "loss": 0.047388315200805664, "step": 5510 }, { "epoch": 0.7450695418518581, "grad_norm": 0.052778828889131546, "learning_rate": 5.2251415676781726e-06, "loss": 0.041927993297576904, "step": 5511 }, { "epoch": 0.7452047386476941, "grad_norm": 0.12281057238578796, "learning_rate": 5.2199509935057655e-06, "loss": 0.0704350471496582, "step": 5512 }, { "epoch": 0.7453399354435299, "grad_norm": 0.141086608171463, "learning_rate": 5.214762455591713e-06, "loss": 0.08241963386535645, "step": 5513 }, { "epoch": 0.7454751322393659, "grad_norm": 0.0525985062122345, "learning_rate": 5.209575955016295e-06, "loss": 0.05631065368652344, "step": 5514 }, { "epoch": 0.7456103290352019, "grad_norm": 0.06778184324502945, "learning_rate": 5.204391492859377e-06, "loss": 0.06846237182617188, "step": 5515 }, { "epoch": 0.7457455258310378, "grad_norm": 0.1181851178407669, "learning_rate": 5.199209070200388e-06, "loss": 0.06189322471618652, "step": 5516 }, { "epoch": 0.7458807226268738, "grad_norm": 0.06505578011274338, "learning_rate": 5.194028688118332e-06, "loss": 0.06946802139282227, "step": 5517 }, { "epoch": 0.7460159194227097, "grad_norm": 0.16072849929332733, "learning_rate": 5.188850347691797e-06, "loss": 0.09738695621490479, "step": 5518 }, { "epoch": 0.7461511162185456, "grad_norm": 0.049335628747940063, "learning_rate": 5.183674049998934e-06, "loss": 0.04713273048400879, "step": 5519 }, { "epoch": 0.7462863130143815, "grad_norm": 0.1295800805091858, "learning_rate": 5.178499796117485e-06, "loss": 0.06938743591308594, "step": 5520 }, { "epoch": 0.7464215098102175, "grad_norm": 0.10742707550525665, "learning_rate": 5.173327587124753e-06, "loss": 0.08169126510620117, "step": 5521 }, { "epoch": 0.7465567066060534, "grad_norm": 0.07736071199178696, "learning_rate": 5.16815742409762e-06, "loss": 0.04987215995788574, "step": 5522 }, { "epoch": 0.7466919034018894, "grad_norm": 0.047543223947286606, "learning_rate": 5.16298930811254e-06, "loss": 0.054995596408843994, "step": 5523 }, { "epoch": 0.7468271001977254, "grad_norm": 0.08714444935321808, "learning_rate": 5.15782324024554e-06, "loss": 0.057285845279693604, "step": 5524 }, { "epoch": 0.7469622969935612, "grad_norm": 0.07695301622152328, "learning_rate": 5.152659221572231e-06, "loss": 0.052234649658203125, "step": 5525 }, { "epoch": 0.7470974937893972, "grad_norm": 0.08860433846712112, "learning_rate": 5.147497253167784e-06, "loss": 0.07405328750610352, "step": 5526 }, { "epoch": 0.7472326905852331, "grad_norm": 0.0780363604426384, "learning_rate": 5.142337336106948e-06, "loss": 0.0707550048828125, "step": 5527 }, { "epoch": 0.7473678873810691, "grad_norm": 0.20865824818611145, "learning_rate": 5.137179471464047e-06, "loss": 0.07080549001693726, "step": 5528 }, { "epoch": 0.747503084176905, "grad_norm": 0.10193055868148804, "learning_rate": 5.13202366031297e-06, "loss": 0.0694284439086914, "step": 5529 }, { "epoch": 0.747638280972741, "grad_norm": 0.10590600967407227, "learning_rate": 5.1268699037272e-06, "loss": 0.05528116226196289, "step": 5530 }, { "epoch": 0.7477734777685768, "grad_norm": 0.07347662001848221, "learning_rate": 5.121718202779756e-06, "loss": 0.06485557556152344, "step": 5531 }, { "epoch": 0.7479086745644128, "grad_norm": 0.0719623938202858, "learning_rate": 5.116568558543264e-06, "loss": 0.05587053298950195, "step": 5532 }, { "epoch": 0.7480438713602487, "grad_norm": 0.09866522997617722, "learning_rate": 5.1114209720899025e-06, "loss": 0.054007112979888916, "step": 5533 }, { "epoch": 0.7481790681560847, "grad_norm": 0.06220245361328125, "learning_rate": 5.106275444491423e-06, "loss": 0.04083812236785889, "step": 5534 }, { "epoch": 0.7483142649519207, "grad_norm": 0.05143401399254799, "learning_rate": 5.101131976819165e-06, "loss": 0.05747842788696289, "step": 5535 }, { "epoch": 0.7484494617477566, "grad_norm": 0.0980193242430687, "learning_rate": 5.095990570144008e-06, "loss": 0.044384002685546875, "step": 5536 }, { "epoch": 0.7485846585435925, "grad_norm": 0.07927174866199493, "learning_rate": 5.090851225536432e-06, "loss": 0.07108783721923828, "step": 5537 }, { "epoch": 0.7487198553394284, "grad_norm": 0.09014839679002762, "learning_rate": 5.085713944066474e-06, "loss": 0.06176948547363281, "step": 5538 }, { "epoch": 0.7488550521352644, "grad_norm": 0.07132940739393234, "learning_rate": 5.080578726803741e-06, "loss": 0.05525040626525879, "step": 5539 }, { "epoch": 0.7489902489311003, "grad_norm": 0.04437018558382988, "learning_rate": 5.075445574817415e-06, "loss": 0.040592730045318604, "step": 5540 }, { "epoch": 0.7491254457269363, "grad_norm": 0.09692610800266266, "learning_rate": 5.07031448917624e-06, "loss": 0.05897831916809082, "step": 5541 }, { "epoch": 0.7492606425227722, "grad_norm": 0.08678141981363297, "learning_rate": 5.065185470948544e-06, "loss": 0.0537571907043457, "step": 5542 }, { "epoch": 0.7493958393186082, "grad_norm": 0.11594772338867188, "learning_rate": 5.060058521202211e-06, "loss": 0.05883944034576416, "step": 5543 }, { "epoch": 0.749531036114444, "grad_norm": 0.06255055218935013, "learning_rate": 5.054933641004703e-06, "loss": 0.059362053871154785, "step": 5544 }, { "epoch": 0.74966623291028, "grad_norm": 0.057039447128772736, "learning_rate": 5.0498108314230425e-06, "loss": 0.03361295908689499, "step": 5545 }, { "epoch": 0.749801429706116, "grad_norm": 0.07067342847585678, "learning_rate": 5.044690093523823e-06, "loss": 0.04484063386917114, "step": 5546 }, { "epoch": 0.7499366265019519, "grad_norm": 0.061600882560014725, "learning_rate": 5.039571428373219e-06, "loss": 0.06914234161376953, "step": 5547 }, { "epoch": 0.7500718232977879, "grad_norm": 0.08802131563425064, "learning_rate": 5.034454837036959e-06, "loss": 0.05873298645019531, "step": 5548 }, { "epoch": 0.7502070200936238, "grad_norm": 0.19037026166915894, "learning_rate": 5.0293403205803455e-06, "loss": 0.09025657176971436, "step": 5549 }, { "epoch": 0.7503422168894597, "grad_norm": 0.05234413966536522, "learning_rate": 5.024227880068247e-06, "loss": 0.04296457767486572, "step": 5550 }, { "epoch": 0.7504774136852956, "grad_norm": 0.061269741505384445, "learning_rate": 5.019117516565096e-06, "loss": 0.047121524810791016, "step": 5551 }, { "epoch": 0.7506126104811316, "grad_norm": 0.2578270733356476, "learning_rate": 5.014009231134908e-06, "loss": 0.06601262092590332, "step": 5552 }, { "epoch": 0.7507478072769675, "grad_norm": 0.0900583490729332, "learning_rate": 5.008903024841248e-06, "loss": 0.09714412689208984, "step": 5553 }, { "epoch": 0.7508830040728035, "grad_norm": 0.06084698066115379, "learning_rate": 5.0037988987472595e-06, "loss": 0.05105161666870117, "step": 5554 }, { "epoch": 0.7510182008686395, "grad_norm": 0.1035371646285057, "learning_rate": 4.998696853915646e-06, "loss": 0.05336737632751465, "step": 5555 }, { "epoch": 0.7511533976644753, "grad_norm": 0.04608193039894104, "learning_rate": 4.993596891408676e-06, "loss": 0.033961713314056396, "step": 5556 }, { "epoch": 0.7512885944603113, "grad_norm": 0.13474397361278534, "learning_rate": 4.988499012288198e-06, "loss": 0.07436126470565796, "step": 5557 }, { "epoch": 0.7514237912561472, "grad_norm": 0.09331358969211578, "learning_rate": 4.983403217615614e-06, "loss": 0.07677650451660156, "step": 5558 }, { "epoch": 0.7515589880519832, "grad_norm": 0.13320577144622803, "learning_rate": 4.978309508451896e-06, "loss": 0.06755125522613525, "step": 5559 }, { "epoch": 0.7516941848478191, "grad_norm": 0.16972249746322632, "learning_rate": 4.973217885857578e-06, "loss": 0.06012868881225586, "step": 5560 }, { "epoch": 0.7518293816436551, "grad_norm": 0.06380508840084076, "learning_rate": 4.968128350892763e-06, "loss": 0.07260751724243164, "step": 5561 }, { "epoch": 0.7519645784394909, "grad_norm": 0.09126947075128555, "learning_rate": 4.963040904617131e-06, "loss": 0.04449129104614258, "step": 5562 }, { "epoch": 0.7520997752353269, "grad_norm": 0.11580614745616913, "learning_rate": 4.9579555480898955e-06, "loss": 0.10945892333984375, "step": 5563 }, { "epoch": 0.7522349720311629, "grad_norm": 0.05059579759836197, "learning_rate": 4.952872282369873e-06, "loss": 0.037107110023498535, "step": 5564 }, { "epoch": 0.7523701688269988, "grad_norm": 0.06567502021789551, "learning_rate": 4.947791108515417e-06, "loss": 0.05544617772102356, "step": 5565 }, { "epoch": 0.7525053656228348, "grad_norm": 0.04168238863348961, "learning_rate": 4.942712027584453e-06, "loss": 0.036566972732543945, "step": 5566 }, { "epoch": 0.7526405624186707, "grad_norm": 0.054161179810762405, "learning_rate": 4.937635040634485e-06, "loss": 0.052449941635131836, "step": 5567 }, { "epoch": 0.7527757592145066, "grad_norm": 0.12100677192211151, "learning_rate": 4.9325601487225545e-06, "loss": 0.06344461441040039, "step": 5568 }, { "epoch": 0.7529109560103425, "grad_norm": 0.0906938910484314, "learning_rate": 4.927487352905289e-06, "loss": 0.040997982025146484, "step": 5569 }, { "epoch": 0.7530461528061785, "grad_norm": 0.06256604194641113, "learning_rate": 4.92241665423887e-06, "loss": 0.05134701728820801, "step": 5570 }, { "epoch": 0.7531813496020144, "grad_norm": 0.09261125326156616, "learning_rate": 4.917348053779039e-06, "loss": 0.08533334732055664, "step": 5571 }, { "epoch": 0.7533165463978504, "grad_norm": 0.052472133189439774, "learning_rate": 4.912281552581122e-06, "loss": 0.059172868728637695, "step": 5572 }, { "epoch": 0.7534517431936864, "grad_norm": 0.06928516924381256, "learning_rate": 4.907217151699969e-06, "loss": 0.06076943874359131, "step": 5573 }, { "epoch": 0.7535869399895222, "grad_norm": 0.09416227042675018, "learning_rate": 4.9021548521900305e-06, "loss": 0.08258247375488281, "step": 5574 }, { "epoch": 0.7537221367853582, "grad_norm": 0.0987280085682869, "learning_rate": 4.8970946551053005e-06, "loss": 0.0695432722568512, "step": 5575 }, { "epoch": 0.7538573335811941, "grad_norm": 0.11186367273330688, "learning_rate": 4.892036561499339e-06, "loss": 0.06056022644042969, "step": 5576 }, { "epoch": 0.7539925303770301, "grad_norm": 0.06027604639530182, "learning_rate": 4.8869805724252675e-06, "loss": 0.04197561740875244, "step": 5577 }, { "epoch": 0.754127727172866, "grad_norm": 0.1207454651594162, "learning_rate": 4.8819266889357665e-06, "loss": 0.06840872764587402, "step": 5578 }, { "epoch": 0.754262923968702, "grad_norm": 0.11013337224721909, "learning_rate": 4.876874912083088e-06, "loss": 0.05472058057785034, "step": 5579 }, { "epoch": 0.7543981207645378, "grad_norm": 0.12484157085418701, "learning_rate": 4.871825242919037e-06, "loss": 0.08231687545776367, "step": 5580 }, { "epoch": 0.7545333175603738, "grad_norm": 0.07946226000785828, "learning_rate": 4.866777682494978e-06, "loss": 0.07387256622314453, "step": 5581 }, { "epoch": 0.7546685143562097, "grad_norm": 0.054265640676021576, "learning_rate": 4.861732231861845e-06, "loss": 0.0603179931640625, "step": 5582 }, { "epoch": 0.7548037111520457, "grad_norm": 0.049201589077711105, "learning_rate": 4.85668889207012e-06, "loss": 0.04238533973693848, "step": 5583 }, { "epoch": 0.7549389079478817, "grad_norm": 0.08540099114179611, "learning_rate": 4.851647664169862e-06, "loss": 0.06142902374267578, "step": 5584 }, { "epoch": 0.7550741047437176, "grad_norm": 0.13426458835601807, "learning_rate": 4.846608549210679e-06, "loss": 0.11732864379882812, "step": 5585 }, { "epoch": 0.7552093015395536, "grad_norm": 0.13599397242069244, "learning_rate": 4.841571548241741e-06, "loss": 0.07292807102203369, "step": 5586 }, { "epoch": 0.7553444983353894, "grad_norm": 0.09144137799739838, "learning_rate": 4.836536662311777e-06, "loss": 0.07636666297912598, "step": 5587 }, { "epoch": 0.7554796951312254, "grad_norm": 0.12282189726829529, "learning_rate": 4.8315038924690745e-06, "loss": 0.07147359848022461, "step": 5588 }, { "epoch": 0.7556148919270613, "grad_norm": 0.04325420781970024, "learning_rate": 4.82647323976149e-06, "loss": 0.051477909088134766, "step": 5589 }, { "epoch": 0.7557500887228973, "grad_norm": 0.06746050715446472, "learning_rate": 4.821444705236429e-06, "loss": 0.05562758445739746, "step": 5590 }, { "epoch": 0.7558852855187332, "grad_norm": 0.05664766579866409, "learning_rate": 4.81641828994086e-06, "loss": 0.07956457138061523, "step": 5591 }, { "epoch": 0.7560204823145692, "grad_norm": 0.07822947204113007, "learning_rate": 4.811393994921308e-06, "loss": 0.06314229965209961, "step": 5592 }, { "epoch": 0.756155679110405, "grad_norm": 0.046915702521800995, "learning_rate": 4.806371821223854e-06, "loss": 0.04811561107635498, "step": 5593 }, { "epoch": 0.756290875906241, "grad_norm": 0.04373643547296524, "learning_rate": 4.801351769894151e-06, "loss": 0.0394287109375, "step": 5594 }, { "epoch": 0.756426072702077, "grad_norm": 0.07949721068143845, "learning_rate": 4.796333841977394e-06, "loss": 0.05352514982223511, "step": 5595 }, { "epoch": 0.7565612694979129, "grad_norm": 0.0772608295083046, "learning_rate": 4.791318038518345e-06, "loss": 0.053855180740356445, "step": 5596 }, { "epoch": 0.7566964662937489, "grad_norm": 0.053752753883600235, "learning_rate": 4.7863043605613185e-06, "loss": 0.04370427131652832, "step": 5597 }, { "epoch": 0.7568316630895848, "grad_norm": 0.05103258043527603, "learning_rate": 4.7812928091501865e-06, "loss": 0.04986906051635742, "step": 5598 }, { "epoch": 0.7569668598854207, "grad_norm": 0.05900655314326286, "learning_rate": 4.7762833853283935e-06, "loss": 0.04721975326538086, "step": 5599 }, { "epoch": 0.7571020566812566, "grad_norm": 0.09072911739349365, "learning_rate": 4.77127609013891e-06, "loss": 0.07277536392211914, "step": 5600 }, { "epoch": 0.7572372534770926, "grad_norm": 0.13973002135753632, "learning_rate": 4.766270924624295e-06, "loss": 0.04611790180206299, "step": 5601 }, { "epoch": 0.7573724502729285, "grad_norm": 0.05370303615927696, "learning_rate": 4.761267889826647e-06, "loss": 0.055280208587646484, "step": 5602 }, { "epoch": 0.7575076470687645, "grad_norm": 0.06365011632442474, "learning_rate": 4.756266986787619e-06, "loss": 0.057503461837768555, "step": 5603 }, { "epoch": 0.7576428438646005, "grad_norm": 0.0598599836230278, "learning_rate": 4.751268216548439e-06, "loss": 0.04886817932128906, "step": 5604 }, { "epoch": 0.7577780406604363, "grad_norm": 0.13633494079113007, "learning_rate": 4.746271580149861e-06, "loss": 0.0834503173828125, "step": 5605 }, { "epoch": 0.7579132374562723, "grad_norm": 0.14826372265815735, "learning_rate": 4.7412770786322244e-06, "loss": 0.06831932067871094, "step": 5606 }, { "epoch": 0.7580484342521082, "grad_norm": 0.13457536697387695, "learning_rate": 4.736284713035406e-06, "loss": 0.08717679977416992, "step": 5607 }, { "epoch": 0.7581836310479442, "grad_norm": 0.06402499973773956, "learning_rate": 4.731294484398843e-06, "loss": 0.06055939197540283, "step": 5608 }, { "epoch": 0.7583188278437801, "grad_norm": 0.057129230350255966, "learning_rate": 4.726306393761526e-06, "loss": 0.039793968200683594, "step": 5609 }, { "epoch": 0.7584540246396161, "grad_norm": 0.05173155292868614, "learning_rate": 4.721320442162001e-06, "loss": 0.03661537170410156, "step": 5610 }, { "epoch": 0.7585892214354519, "grad_norm": 0.059408728033304214, "learning_rate": 4.716336630638378e-06, "loss": 0.031701087951660156, "step": 5611 }, { "epoch": 0.7587244182312879, "grad_norm": 0.07806425541639328, "learning_rate": 4.711354960228306e-06, "loss": 0.04264932870864868, "step": 5612 }, { "epoch": 0.7588596150271238, "grad_norm": 0.048639487475156784, "learning_rate": 4.706375431968998e-06, "loss": 0.028697893023490906, "step": 5613 }, { "epoch": 0.7589948118229598, "grad_norm": 0.05570409819483757, "learning_rate": 4.701398046897218e-06, "loss": 0.05447578430175781, "step": 5614 }, { "epoch": 0.7591300086187958, "grad_norm": 0.06081291660666466, "learning_rate": 4.696422806049277e-06, "loss": 0.039641499519348145, "step": 5615 }, { "epoch": 0.7592652054146317, "grad_norm": 0.11350715905427933, "learning_rate": 4.69144971046106e-06, "loss": 0.05987238883972168, "step": 5616 }, { "epoch": 0.7594004022104676, "grad_norm": 0.06956437230110168, "learning_rate": 4.686478761167984e-06, "loss": 0.06370258331298828, "step": 5617 }, { "epoch": 0.7595355990063035, "grad_norm": 0.05556348338723183, "learning_rate": 4.681509959205028e-06, "loss": 0.046308040618896484, "step": 5618 }, { "epoch": 0.7596707958021395, "grad_norm": 0.05913687124848366, "learning_rate": 4.676543305606724e-06, "loss": 0.04073485732078552, "step": 5619 }, { "epoch": 0.7598059925979754, "grad_norm": 0.07346321642398834, "learning_rate": 4.67157880140715e-06, "loss": 0.06905460357666016, "step": 5620 }, { "epoch": 0.7599411893938114, "grad_norm": 0.07231054455041885, "learning_rate": 4.666616447639952e-06, "loss": 0.0376744270324707, "step": 5621 }, { "epoch": 0.7600763861896473, "grad_norm": 0.10149803012609482, "learning_rate": 4.661656245338314e-06, "loss": 0.07171380519866943, "step": 5622 }, { "epoch": 0.7602115829854832, "grad_norm": 0.04784693196415901, "learning_rate": 4.656698195534978e-06, "loss": 0.05109739303588867, "step": 5623 }, { "epoch": 0.7603467797813191, "grad_norm": 0.07004769891500473, "learning_rate": 4.651742299262233e-06, "loss": 0.07720184326171875, "step": 5624 }, { "epoch": 0.7604819765771551, "grad_norm": 0.10608218610286713, "learning_rate": 4.646788557551921e-06, "loss": 0.08008909225463867, "step": 5625 }, { "epoch": 0.7606171733729911, "grad_norm": 0.1579318791627884, "learning_rate": 4.641836971435445e-06, "loss": 0.07655477523803711, "step": 5626 }, { "epoch": 0.760752370168827, "grad_norm": 0.09159674495458603, "learning_rate": 4.63688754194375e-06, "loss": 0.0635063648223877, "step": 5627 }, { "epoch": 0.760887566964663, "grad_norm": 0.07548218220472336, "learning_rate": 4.6319402701073295e-06, "loss": 0.06471824645996094, "step": 5628 }, { "epoch": 0.7610227637604988, "grad_norm": 0.11820026487112045, "learning_rate": 4.6269951569562355e-06, "loss": 0.06072878837585449, "step": 5629 }, { "epoch": 0.7611579605563348, "grad_norm": 0.07385329902172089, "learning_rate": 4.622052203520061e-06, "loss": 0.04602551460266113, "step": 5630 }, { "epoch": 0.7612931573521707, "grad_norm": 0.14370004832744598, "learning_rate": 4.617111410827968e-06, "loss": 0.07100248336791992, "step": 5631 }, { "epoch": 0.7614283541480067, "grad_norm": 0.06431196630001068, "learning_rate": 4.612172779908639e-06, "loss": 0.06270313262939453, "step": 5632 }, { "epoch": 0.7615635509438426, "grad_norm": 0.1484082192182541, "learning_rate": 4.607236311790335e-06, "loss": 0.06161355972290039, "step": 5633 }, { "epoch": 0.7616987477396786, "grad_norm": 0.1067439615726471, "learning_rate": 4.602302007500854e-06, "loss": 0.07177305221557617, "step": 5634 }, { "epoch": 0.7618339445355146, "grad_norm": 0.0956447571516037, "learning_rate": 4.597369868067537e-06, "loss": 0.0683283805847168, "step": 5635 }, { "epoch": 0.7619691413313504, "grad_norm": 0.07363173365592957, "learning_rate": 4.592439894517296e-06, "loss": 0.05650901794433594, "step": 5636 }, { "epoch": 0.7621043381271864, "grad_norm": 0.11286943405866623, "learning_rate": 4.587512087876559e-06, "loss": 0.05732917785644531, "step": 5637 }, { "epoch": 0.7622395349230223, "grad_norm": 0.0913057029247284, "learning_rate": 4.582586449171336e-06, "loss": 0.06699621677398682, "step": 5638 }, { "epoch": 0.7623747317188583, "grad_norm": 0.0982796847820282, "learning_rate": 4.577662979427168e-06, "loss": 0.07278728485107422, "step": 5639 }, { "epoch": 0.7625099285146942, "grad_norm": 0.052754923701286316, "learning_rate": 4.572741679669147e-06, "loss": 0.04321455955505371, "step": 5640 }, { "epoch": 0.7626451253105302, "grad_norm": 0.1420690417289734, "learning_rate": 4.567822550921912e-06, "loss": 0.059764981269836426, "step": 5641 }, { "epoch": 0.762780322106366, "grad_norm": 0.07855550199747086, "learning_rate": 4.562905594209647e-06, "loss": 0.09063005447387695, "step": 5642 }, { "epoch": 0.762915518902202, "grad_norm": 0.07625294476747513, "learning_rate": 4.557990810556102e-06, "loss": 0.07136201858520508, "step": 5643 }, { "epoch": 0.763050715698038, "grad_norm": 0.13812348246574402, "learning_rate": 4.553078200984553e-06, "loss": 0.0558469295501709, "step": 5644 }, { "epoch": 0.7631859124938739, "grad_norm": 0.13974198698997498, "learning_rate": 4.548167766517832e-06, "loss": 0.07493537664413452, "step": 5645 }, { "epoch": 0.7633211092897099, "grad_norm": 0.06511951237916946, "learning_rate": 4.543259508178318e-06, "loss": 0.055921971797943115, "step": 5646 }, { "epoch": 0.7634563060855458, "grad_norm": 0.13474225997924805, "learning_rate": 4.538353426987931e-06, "loss": 0.05825614929199219, "step": 5647 }, { "epoch": 0.7635915028813817, "grad_norm": 0.11956040561199188, "learning_rate": 4.533449523968154e-06, "loss": 0.0710597038269043, "step": 5648 }, { "epoch": 0.7637266996772176, "grad_norm": 0.08503083139657974, "learning_rate": 4.528547800140001e-06, "loss": 0.06499481201171875, "step": 5649 }, { "epoch": 0.7638618964730536, "grad_norm": 0.06388535350561142, "learning_rate": 4.523648256524037e-06, "loss": 0.06755924224853516, "step": 5650 }, { "epoch": 0.7639970932688895, "grad_norm": 0.18131384253501892, "learning_rate": 4.518750894140372e-06, "loss": 0.07106781005859375, "step": 5651 }, { "epoch": 0.7641322900647255, "grad_norm": 0.06497541069984436, "learning_rate": 4.513855714008659e-06, "loss": 0.03839111328125, "step": 5652 }, { "epoch": 0.7642674868605615, "grad_norm": 0.14690041542053223, "learning_rate": 4.508962717148111e-06, "loss": 0.06930422782897949, "step": 5653 }, { "epoch": 0.7644026836563973, "grad_norm": 0.07699696719646454, "learning_rate": 4.504071904577469e-06, "loss": 0.07210493087768555, "step": 5654 }, { "epoch": 0.7645378804522333, "grad_norm": 0.19169102609157562, "learning_rate": 4.499183277315027e-06, "loss": 0.06988239288330078, "step": 5655 }, { "epoch": 0.7646730772480692, "grad_norm": 0.046422988176345825, "learning_rate": 4.494296836378625e-06, "loss": 0.03894376754760742, "step": 5656 }, { "epoch": 0.7648082740439052, "grad_norm": 0.09825270622968674, "learning_rate": 4.4894125827856415e-06, "loss": 0.08876895904541016, "step": 5657 }, { "epoch": 0.7649434708397411, "grad_norm": 0.05888034403324127, "learning_rate": 4.4845305175530105e-06, "loss": 0.05085289478302002, "step": 5658 }, { "epoch": 0.7650786676355771, "grad_norm": 0.14505313336849213, "learning_rate": 4.479650641697201e-06, "loss": 0.09961342811584473, "step": 5659 }, { "epoch": 0.7652138644314129, "grad_norm": 0.06909554451704025, "learning_rate": 4.4747729562342305e-06, "loss": 0.07560539245605469, "step": 5660 }, { "epoch": 0.7653490612272489, "grad_norm": 0.11348436027765274, "learning_rate": 4.469897462179656e-06, "loss": 0.05191612243652344, "step": 5661 }, { "epoch": 0.7654842580230848, "grad_norm": 0.0610688254237175, "learning_rate": 4.46502416054858e-06, "loss": 0.031334519386291504, "step": 5662 }, { "epoch": 0.7656194548189208, "grad_norm": 0.15756669640541077, "learning_rate": 4.460153052355663e-06, "loss": 0.05693340301513672, "step": 5663 }, { "epoch": 0.7657546516147568, "grad_norm": 0.06645017117261887, "learning_rate": 4.455284138615074e-06, "loss": 0.051447510719299316, "step": 5664 }, { "epoch": 0.7658898484105927, "grad_norm": 0.14653916656970978, "learning_rate": 4.4504174203405656e-06, "loss": 0.09816455841064453, "step": 5665 }, { "epoch": 0.7660250452064286, "grad_norm": 0.18517173826694489, "learning_rate": 4.445552898545407e-06, "loss": 0.07475399971008301, "step": 5666 }, { "epoch": 0.7661602420022645, "grad_norm": 0.06996867805719376, "learning_rate": 4.440690574242413e-06, "loss": 0.053424060344696045, "step": 5667 }, { "epoch": 0.7662954387981005, "grad_norm": 0.06849701702594757, "learning_rate": 4.435830448443961e-06, "loss": 0.03186267614364624, "step": 5668 }, { "epoch": 0.7664306355939364, "grad_norm": 0.039524782449007034, "learning_rate": 4.430972522161934e-06, "loss": 0.03809213638305664, "step": 5669 }, { "epoch": 0.7665658323897724, "grad_norm": 0.11341464519500732, "learning_rate": 4.426116796407794e-06, "loss": 0.08158540725708008, "step": 5670 }, { "epoch": 0.7667010291856083, "grad_norm": 0.14792455732822418, "learning_rate": 4.421263272192523e-06, "loss": 0.07742452621459961, "step": 5671 }, { "epoch": 0.7668362259814442, "grad_norm": 0.08348523825407028, "learning_rate": 4.416411950526648e-06, "loss": 0.04569399356842041, "step": 5672 }, { "epoch": 0.7669714227772801, "grad_norm": 0.07707400619983673, "learning_rate": 4.411562832420252e-06, "loss": 0.06798934936523438, "step": 5673 }, { "epoch": 0.7671066195731161, "grad_norm": 0.12139245122671127, "learning_rate": 4.406715918882929e-06, "loss": 0.05596733093261719, "step": 5674 }, { "epoch": 0.7672418163689521, "grad_norm": 0.07738848775625229, "learning_rate": 4.4018712109238475e-06, "loss": 0.08188116550445557, "step": 5675 }, { "epoch": 0.767377013164788, "grad_norm": 0.12631875276565552, "learning_rate": 4.3970287095516965e-06, "loss": 0.06354045867919922, "step": 5676 }, { "epoch": 0.767512209960624, "grad_norm": 0.056322433054447174, "learning_rate": 4.39218841577471e-06, "loss": 0.049623727798461914, "step": 5677 }, { "epoch": 0.7676474067564599, "grad_norm": 0.062164556235075, "learning_rate": 4.387350330600662e-06, "loss": 0.0751807689666748, "step": 5678 }, { "epoch": 0.7677826035522958, "grad_norm": 0.10083353519439697, "learning_rate": 4.382514455036864e-06, "loss": 0.036834120750427246, "step": 5679 }, { "epoch": 0.7679178003481317, "grad_norm": 0.16395795345306396, "learning_rate": 4.377680790090182e-06, "loss": 0.06953191757202148, "step": 5680 }, { "epoch": 0.7680529971439677, "grad_norm": 0.07130637764930725, "learning_rate": 4.372849336767004e-06, "loss": 0.044980525970458984, "step": 5681 }, { "epoch": 0.7681881939398036, "grad_norm": 0.08591113239526749, "learning_rate": 4.3680200960732645e-06, "loss": 0.0762636661529541, "step": 5682 }, { "epoch": 0.7683233907356396, "grad_norm": 0.09558533132076263, "learning_rate": 4.363193069014439e-06, "loss": 0.11683797836303711, "step": 5683 }, { "epoch": 0.7684585875314756, "grad_norm": 0.03490419313311577, "learning_rate": 4.3583682565955325e-06, "loss": 0.02942836284637451, "step": 5684 }, { "epoch": 0.7685937843273114, "grad_norm": 0.059083644300699234, "learning_rate": 4.3535456598211074e-06, "loss": 0.05377912521362305, "step": 5685 }, { "epoch": 0.7687289811231474, "grad_norm": 0.14577992260456085, "learning_rate": 4.348725279695251e-06, "loss": 0.06428909301757812, "step": 5686 }, { "epoch": 0.7688641779189833, "grad_norm": 0.04511864483356476, "learning_rate": 4.343907117221591e-06, "loss": 0.04201316833496094, "step": 5687 }, { "epoch": 0.7689993747148193, "grad_norm": 0.08045602589845657, "learning_rate": 4.339091173403294e-06, "loss": 0.06159019470214844, "step": 5688 }, { "epoch": 0.7691345715106552, "grad_norm": 0.08565692603588104, "learning_rate": 4.334277449243061e-06, "loss": 0.07035970687866211, "step": 5689 }, { "epoch": 0.7692697683064912, "grad_norm": 0.17514848709106445, "learning_rate": 4.329465945743144e-06, "loss": 0.07039403915405273, "step": 5690 }, { "epoch": 0.769404965102327, "grad_norm": 0.048192404210567474, "learning_rate": 4.32465666390532e-06, "loss": 0.043877601623535156, "step": 5691 }, { "epoch": 0.769540161898163, "grad_norm": 0.08604114502668381, "learning_rate": 4.319849604730905e-06, "loss": 0.054634809494018555, "step": 5692 }, { "epoch": 0.769675358693999, "grad_norm": 0.09068559110164642, "learning_rate": 4.315044769220758e-06, "loss": 0.039613962173461914, "step": 5693 }, { "epoch": 0.7698105554898349, "grad_norm": 0.08562308549880981, "learning_rate": 4.310242158375264e-06, "loss": 0.053876399993896484, "step": 5694 }, { "epoch": 0.7699457522856709, "grad_norm": 0.1034131795167923, "learning_rate": 4.30544177319436e-06, "loss": 0.07596015930175781, "step": 5695 }, { "epoch": 0.7700809490815068, "grad_norm": 0.04676998406648636, "learning_rate": 4.300643614677511e-06, "loss": 0.05100703239440918, "step": 5696 }, { "epoch": 0.7702161458773427, "grad_norm": 0.052511557936668396, "learning_rate": 4.2958476838237165e-06, "loss": 0.053226470947265625, "step": 5697 }, { "epoch": 0.7703513426731786, "grad_norm": 0.1213599443435669, "learning_rate": 4.2910539816315166e-06, "loss": 0.06881570816040039, "step": 5698 }, { "epoch": 0.7704865394690146, "grad_norm": 0.08902842551469803, "learning_rate": 4.286262509098979e-06, "loss": 0.07626307010650635, "step": 5699 }, { "epoch": 0.7706217362648505, "grad_norm": 0.12481430917978287, "learning_rate": 4.28147326722373e-06, "loss": 0.05967378616333008, "step": 5700 }, { "epoch": 0.7707569330606865, "grad_norm": 0.09361330419778824, "learning_rate": 4.2766862570028965e-06, "loss": 0.0902797281742096, "step": 5701 }, { "epoch": 0.7708921298565224, "grad_norm": 0.11980674415826797, "learning_rate": 4.2719014794331715e-06, "loss": 0.08904170989990234, "step": 5702 }, { "epoch": 0.7710273266523583, "grad_norm": 0.13160812854766846, "learning_rate": 4.267118935510767e-06, "loss": 0.08312225341796875, "step": 5703 }, { "epoch": 0.7711625234481942, "grad_norm": 0.11950559914112091, "learning_rate": 4.2623386262314306e-06, "loss": 0.06126868724822998, "step": 5704 }, { "epoch": 0.7712977202440302, "grad_norm": 0.12375812232494354, "learning_rate": 4.257560552590461e-06, "loss": 0.07834815979003906, "step": 5705 }, { "epoch": 0.7714329170398662, "grad_norm": 0.1631339192390442, "learning_rate": 4.252784715582661e-06, "loss": 0.08001017570495605, "step": 5706 }, { "epoch": 0.7715681138357021, "grad_norm": 0.09016553312540054, "learning_rate": 4.2480111162024e-06, "loss": 0.0696326494216919, "step": 5707 }, { "epoch": 0.7717033106315381, "grad_norm": 0.09198438376188278, "learning_rate": 4.243239755443561e-06, "loss": 0.05337882041931152, "step": 5708 }, { "epoch": 0.7718385074273739, "grad_norm": 0.06559540331363678, "learning_rate": 4.238470634299567e-06, "loss": 0.05505228042602539, "step": 5709 }, { "epoch": 0.7719737042232099, "grad_norm": 0.13903583586215973, "learning_rate": 4.233703753763375e-06, "loss": 0.0497514009475708, "step": 5710 }, { "epoch": 0.7721089010190458, "grad_norm": 0.06737703830003738, "learning_rate": 4.228939114827469e-06, "loss": 0.07395601272583008, "step": 5711 }, { "epoch": 0.7722440978148818, "grad_norm": 0.10658776015043259, "learning_rate": 4.224176718483881e-06, "loss": 0.06396293640136719, "step": 5712 }, { "epoch": 0.7723792946107177, "grad_norm": 0.03172273933887482, "learning_rate": 4.219416565724165e-06, "loss": 0.02954268455505371, "step": 5713 }, { "epoch": 0.7725144914065537, "grad_norm": 0.13991527259349823, "learning_rate": 4.21465865753941e-06, "loss": 0.09024268388748169, "step": 5714 }, { "epoch": 0.7726496882023896, "grad_norm": 0.08384308964014053, "learning_rate": 4.209902994920236e-06, "loss": 0.05019557476043701, "step": 5715 }, { "epoch": 0.7727848849982255, "grad_norm": 0.12114990502595901, "learning_rate": 4.205149578856794e-06, "loss": 0.04989200830459595, "step": 5716 }, { "epoch": 0.7729200817940615, "grad_norm": 0.04591487720608711, "learning_rate": 4.200398410338779e-06, "loss": 0.03940868377685547, "step": 5717 }, { "epoch": 0.7730552785898974, "grad_norm": 0.07248803973197937, "learning_rate": 4.1956494903554056e-06, "loss": 0.04620814323425293, "step": 5718 }, { "epoch": 0.7731904753857334, "grad_norm": 0.11163653433322906, "learning_rate": 4.190902819895425e-06, "loss": 0.05672287940979004, "step": 5719 }, { "epoch": 0.7733256721815693, "grad_norm": 0.06271634250879288, "learning_rate": 4.186158399947118e-06, "loss": 0.057398319244384766, "step": 5720 }, { "epoch": 0.7734608689774052, "grad_norm": 0.10149148106575012, "learning_rate": 4.181416231498292e-06, "loss": 0.07166898250579834, "step": 5721 }, { "epoch": 0.7735960657732411, "grad_norm": 0.0582280233502388, "learning_rate": 4.176676315536306e-06, "loss": 0.057489871978759766, "step": 5722 }, { "epoch": 0.7737312625690771, "grad_norm": 0.0627729594707489, "learning_rate": 4.171938653048027e-06, "loss": 0.05167412757873535, "step": 5723 }, { "epoch": 0.773866459364913, "grad_norm": 0.11052908003330231, "learning_rate": 4.1672032450198616e-06, "loss": 0.07015776634216309, "step": 5724 }, { "epoch": 0.774001656160749, "grad_norm": 0.06602715700864792, "learning_rate": 4.16247009243775e-06, "loss": 0.05773043632507324, "step": 5725 }, { "epoch": 0.774136852956585, "grad_norm": 0.08838232606649399, "learning_rate": 4.1577391962871504e-06, "loss": 0.07994794845581055, "step": 5726 }, { "epoch": 0.7742720497524209, "grad_norm": 0.08086957037448883, "learning_rate": 4.153010557553076e-06, "loss": 0.0540698766708374, "step": 5727 }, { "epoch": 0.7744072465482568, "grad_norm": 0.07766497880220413, "learning_rate": 4.148284177220045e-06, "loss": 0.04623603820800781, "step": 5728 }, { "epoch": 0.7745424433440927, "grad_norm": 0.18808479607105255, "learning_rate": 4.143560056272117e-06, "loss": 0.06416220963001251, "step": 5729 }, { "epoch": 0.7746776401399287, "grad_norm": 0.1262831836938858, "learning_rate": 4.1388381956928796e-06, "loss": 0.07453316450119019, "step": 5730 }, { "epoch": 0.7748128369357646, "grad_norm": 0.13727988302707672, "learning_rate": 4.134118596465443e-06, "loss": 0.0624697208404541, "step": 5731 }, { "epoch": 0.7749480337316006, "grad_norm": 0.038737379014492035, "learning_rate": 4.1294012595724675e-06, "loss": 0.04214668273925781, "step": 5732 }, { "epoch": 0.7750832305274366, "grad_norm": 0.07069750875234604, "learning_rate": 4.1246861859961114e-06, "loss": 0.04573345184326172, "step": 5733 }, { "epoch": 0.7752184273232724, "grad_norm": 0.10714850574731827, "learning_rate": 4.119973376718089e-06, "loss": 0.06184267997741699, "step": 5734 }, { "epoch": 0.7753536241191084, "grad_norm": 0.08067939430475235, "learning_rate": 4.115262832719628e-06, "loss": 0.06285619735717773, "step": 5735 }, { "epoch": 0.7754888209149443, "grad_norm": 0.07119523733854294, "learning_rate": 4.110554554981486e-06, "loss": 0.03945732116699219, "step": 5736 }, { "epoch": 0.7756240177107803, "grad_norm": 0.07371142506599426, "learning_rate": 4.1058485444839655e-06, "loss": 0.07352876663208008, "step": 5737 }, { "epoch": 0.7757592145066162, "grad_norm": 0.0705307349562645, "learning_rate": 4.101144802206862e-06, "loss": 0.05722975730895996, "step": 5738 }, { "epoch": 0.7758944113024522, "grad_norm": 0.04387529566884041, "learning_rate": 4.096443329129535e-06, "loss": 0.026026010513305664, "step": 5739 }, { "epoch": 0.776029608098288, "grad_norm": 0.0630331039428711, "learning_rate": 4.091744126230853e-06, "loss": 0.05737662315368652, "step": 5740 }, { "epoch": 0.776164804894124, "grad_norm": 0.05152873322367668, "learning_rate": 4.08704719448921e-06, "loss": 0.04090714454650879, "step": 5741 }, { "epoch": 0.7763000016899599, "grad_norm": 0.09099595993757248, "learning_rate": 4.082352534882543e-06, "loss": 0.047368526458740234, "step": 5742 }, { "epoch": 0.7764351984857959, "grad_norm": 0.047396838665008545, "learning_rate": 4.07766014838829e-06, "loss": 0.03992140293121338, "step": 5743 }, { "epoch": 0.7765703952816319, "grad_norm": 0.05859273299574852, "learning_rate": 4.072970035983443e-06, "loss": 0.054872751235961914, "step": 5744 }, { "epoch": 0.7767055920774678, "grad_norm": 0.058849774301052094, "learning_rate": 4.068282198644505e-06, "loss": 0.061443328857421875, "step": 5745 }, { "epoch": 0.7768407888733037, "grad_norm": 0.0690573900938034, "learning_rate": 4.06359663734751e-06, "loss": 0.06186199188232422, "step": 5746 }, { "epoch": 0.7769759856691396, "grad_norm": 0.10576310753822327, "learning_rate": 4.058913353068013e-06, "loss": 0.08347606658935547, "step": 5747 }, { "epoch": 0.7771111824649756, "grad_norm": 0.07534656673669815, "learning_rate": 4.0542323467810985e-06, "loss": 0.054612159729003906, "step": 5748 }, { "epoch": 0.7772463792608115, "grad_norm": 0.07083319872617722, "learning_rate": 4.049553619461381e-06, "loss": 0.04709815979003906, "step": 5749 }, { "epoch": 0.7773815760566475, "grad_norm": 0.0577218234539032, "learning_rate": 4.044877172082997e-06, "loss": 0.04214411973953247, "step": 5750 }, { "epoch": 0.7775167728524834, "grad_norm": 0.07540381699800491, "learning_rate": 4.040203005619604e-06, "loss": 0.06586265563964844, "step": 5751 }, { "epoch": 0.7776519696483193, "grad_norm": 0.1522982269525528, "learning_rate": 4.035531121044392e-06, "loss": 0.07368206977844238, "step": 5752 }, { "epoch": 0.7777871664441552, "grad_norm": 0.049702271819114685, "learning_rate": 4.030861519330065e-06, "loss": 0.04225802421569824, "step": 5753 }, { "epoch": 0.7779223632399912, "grad_norm": 0.14703910052776337, "learning_rate": 4.026194201448868e-06, "loss": 0.086081862449646, "step": 5754 }, { "epoch": 0.7780575600358272, "grad_norm": 0.09344411641359329, "learning_rate": 4.021529168372558e-06, "loss": 0.06851387023925781, "step": 5755 }, { "epoch": 0.7781927568316631, "grad_norm": 0.10253384709358215, "learning_rate": 4.01686642107242e-06, "loss": 0.05027914047241211, "step": 5756 }, { "epoch": 0.7783279536274991, "grad_norm": 0.0766432136297226, "learning_rate": 4.0122059605192624e-06, "loss": 0.06454163789749146, "step": 5757 }, { "epoch": 0.7784631504233349, "grad_norm": 0.11253654956817627, "learning_rate": 4.007547787683412e-06, "loss": 0.05473829805850983, "step": 5758 }, { "epoch": 0.7785983472191709, "grad_norm": 0.09446777403354645, "learning_rate": 4.002891903534736e-06, "loss": 0.06635713577270508, "step": 5759 }, { "epoch": 0.7787335440150068, "grad_norm": 0.0888349637389183, "learning_rate": 3.998238309042611e-06, "loss": 0.07001912593841553, "step": 5760 }, { "epoch": 0.7788687408108428, "grad_norm": 0.10076601058244705, "learning_rate": 3.993587005175937e-06, "loss": 0.08518075942993164, "step": 5761 }, { "epoch": 0.7790039376066787, "grad_norm": 0.06220710277557373, "learning_rate": 3.988937992903144e-06, "loss": 0.04938507080078125, "step": 5762 }, { "epoch": 0.7791391344025147, "grad_norm": 0.14608749747276306, "learning_rate": 3.9842912731921716e-06, "loss": 0.07331264019012451, "step": 5763 }, { "epoch": 0.7792743311983505, "grad_norm": 0.09500111639499664, "learning_rate": 3.979646847010506e-06, "loss": 0.07750976085662842, "step": 5764 }, { "epoch": 0.7794095279941865, "grad_norm": 0.10417024791240692, "learning_rate": 3.975004715325134e-06, "loss": 0.09924459457397461, "step": 5765 }, { "epoch": 0.7795447247900225, "grad_norm": 0.07900304347276688, "learning_rate": 3.970364879102572e-06, "loss": 0.053186893463134766, "step": 5766 }, { "epoch": 0.7796799215858584, "grad_norm": 0.06283389776945114, "learning_rate": 3.96572733930886e-06, "loss": 0.07232850790023804, "step": 5767 }, { "epoch": 0.7798151183816944, "grad_norm": 0.09084179997444153, "learning_rate": 3.961092096909552e-06, "loss": 0.042107582092285156, "step": 5768 }, { "epoch": 0.7799503151775303, "grad_norm": 0.09927485883235931, "learning_rate": 3.9564591528697455e-06, "loss": 0.056203603744506836, "step": 5769 }, { "epoch": 0.7800855119733663, "grad_norm": 0.0700564756989479, "learning_rate": 3.9518285081540275e-06, "loss": 0.052378058433532715, "step": 5770 }, { "epoch": 0.7802207087692021, "grad_norm": 0.056838974356651306, "learning_rate": 3.947200163726534e-06, "loss": 0.0572657585144043, "step": 5771 }, { "epoch": 0.7803559055650381, "grad_norm": 0.08624331653118134, "learning_rate": 3.9425741205509055e-06, "loss": 0.051546335220336914, "step": 5772 }, { "epoch": 0.780491102360874, "grad_norm": 0.06352335959672928, "learning_rate": 3.9379503795903065e-06, "loss": 0.0782475471496582, "step": 5773 }, { "epoch": 0.78062629915671, "grad_norm": 0.04690715670585632, "learning_rate": 3.933328941807439e-06, "loss": 0.04123187065124512, "step": 5774 }, { "epoch": 0.780761495952546, "grad_norm": 0.0718836709856987, "learning_rate": 3.928709808164491e-06, "loss": 0.05680561065673828, "step": 5775 }, { "epoch": 0.7808966927483819, "grad_norm": 0.12922799587249756, "learning_rate": 3.924092979623203e-06, "loss": 0.07539081573486328, "step": 5776 }, { "epoch": 0.7810318895442178, "grad_norm": 0.10052436590194702, "learning_rate": 3.919478457144824e-06, "loss": 0.07405269145965576, "step": 5777 }, { "epoch": 0.7811670863400537, "grad_norm": 0.12398896366357803, "learning_rate": 3.914866241690115e-06, "loss": 0.05944347381591797, "step": 5778 }, { "epoch": 0.7813022831358897, "grad_norm": 0.08246397227048874, "learning_rate": 3.9102563342193695e-06, "loss": 0.07052463293075562, "step": 5779 }, { "epoch": 0.7814374799317256, "grad_norm": 0.09794431924819946, "learning_rate": 3.905648735692389e-06, "loss": 0.07231402397155762, "step": 5780 }, { "epoch": 0.7815726767275616, "grad_norm": 0.1034655049443245, "learning_rate": 3.901043447068508e-06, "loss": 0.08643817901611328, "step": 5781 }, { "epoch": 0.7817078735233975, "grad_norm": 0.06430243700742722, "learning_rate": 3.896440469306567e-06, "loss": 0.05180072784423828, "step": 5782 }, { "epoch": 0.7818430703192334, "grad_norm": 0.10815218091011047, "learning_rate": 3.891839803364934e-06, "loss": 0.05089712142944336, "step": 5783 }, { "epoch": 0.7819782671150693, "grad_norm": 0.08369754254817963, "learning_rate": 3.887241450201487e-06, "loss": 0.04656982421875, "step": 5784 }, { "epoch": 0.7821134639109053, "grad_norm": 0.04828212782740593, "learning_rate": 3.882645410773629e-06, "loss": 0.04306960105895996, "step": 5785 }, { "epoch": 0.7822486607067413, "grad_norm": 0.1344897449016571, "learning_rate": 3.878051686038284e-06, "loss": 0.09119057655334473, "step": 5786 }, { "epoch": 0.7823838575025772, "grad_norm": 0.08893289417028427, "learning_rate": 3.873460276951889e-06, "loss": 0.06274032592773438, "step": 5787 }, { "epoch": 0.7825190542984132, "grad_norm": 0.0842614695429802, "learning_rate": 3.868871184470397e-06, "loss": 0.060044050216674805, "step": 5788 }, { "epoch": 0.782654251094249, "grad_norm": 0.11243007332086563, "learning_rate": 3.864284409549282e-06, "loss": 0.07468581199645996, "step": 5789 }, { "epoch": 0.782789447890085, "grad_norm": 0.06746405363082886, "learning_rate": 3.859699953143532e-06, "loss": 0.05449056625366211, "step": 5790 }, { "epoch": 0.7829246446859209, "grad_norm": 0.2180185467004776, "learning_rate": 3.855117816207665e-06, "loss": 0.07177698612213135, "step": 5791 }, { "epoch": 0.7830598414817569, "grad_norm": 0.07927478104829788, "learning_rate": 3.850537999695699e-06, "loss": 0.06865692138671875, "step": 5792 }, { "epoch": 0.7831950382775928, "grad_norm": 0.11424429714679718, "learning_rate": 3.845960504561179e-06, "loss": 0.06290602684020996, "step": 5793 }, { "epoch": 0.7833302350734288, "grad_norm": 0.0978156253695488, "learning_rate": 3.841385331757161e-06, "loss": 0.058560967445373535, "step": 5794 }, { "epoch": 0.7834654318692647, "grad_norm": 0.07030512392520905, "learning_rate": 3.8368124822362184e-06, "loss": 0.06477928161621094, "step": 5795 }, { "epoch": 0.7836006286651006, "grad_norm": 0.15952721238136292, "learning_rate": 3.832241956950449e-06, "loss": 0.07527303695678711, "step": 5796 }, { "epoch": 0.7837358254609366, "grad_norm": 0.08921771496534348, "learning_rate": 3.82767375685146e-06, "loss": 0.0729837417602539, "step": 5797 }, { "epoch": 0.7838710222567725, "grad_norm": 0.06520732492208481, "learning_rate": 3.823107882890373e-06, "loss": 0.06862860918045044, "step": 5798 }, { "epoch": 0.7840062190526085, "grad_norm": 0.04068349674344063, "learning_rate": 3.8185443360178265e-06, "loss": 0.030504941940307617, "step": 5799 }, { "epoch": 0.7841414158484444, "grad_norm": 0.1395060122013092, "learning_rate": 3.813983117183973e-06, "loss": 0.07504129409790039, "step": 5800 }, { "epoch": 0.7842766126442803, "grad_norm": 0.1359846144914627, "learning_rate": 3.8094242273384932e-06, "loss": 0.09117484092712402, "step": 5801 }, { "epoch": 0.7844118094401162, "grad_norm": 0.04848155751824379, "learning_rate": 3.804867667430555e-06, "loss": 0.05596351623535156, "step": 5802 }, { "epoch": 0.7845470062359522, "grad_norm": 0.09631361067295074, "learning_rate": 3.800313438408874e-06, "loss": 0.06917047500610352, "step": 5803 }, { "epoch": 0.7846822030317882, "grad_norm": 0.06552135944366455, "learning_rate": 3.7957615412216582e-06, "loss": 0.057166337966918945, "step": 5804 }, { "epoch": 0.7848173998276241, "grad_norm": 0.05808146297931671, "learning_rate": 3.791211976816634e-06, "loss": 0.05141258239746094, "step": 5805 }, { "epoch": 0.7849525966234601, "grad_norm": 0.06210657209157944, "learning_rate": 3.786664746141057e-06, "loss": 0.03990602493286133, "step": 5806 }, { "epoch": 0.7850877934192959, "grad_norm": 0.07927499711513519, "learning_rate": 3.782119850141669e-06, "loss": 0.0732259750366211, "step": 5807 }, { "epoch": 0.7852229902151319, "grad_norm": 0.07797801494598389, "learning_rate": 3.777577289764752e-06, "loss": 0.052412986755371094, "step": 5808 }, { "epoch": 0.7853581870109678, "grad_norm": 0.058375872671604156, "learning_rate": 3.7730370659560904e-06, "loss": 0.03686520457267761, "step": 5809 }, { "epoch": 0.7854933838068038, "grad_norm": 0.10792986303567886, "learning_rate": 3.7684991796609746e-06, "loss": 0.061506032943725586, "step": 5810 }, { "epoch": 0.7856285806026397, "grad_norm": 0.11434817314147949, "learning_rate": 3.7639636318242344e-06, "loss": 0.07185375690460205, "step": 5811 }, { "epoch": 0.7857637773984757, "grad_norm": 0.06701134890317917, "learning_rate": 3.7594304233901738e-06, "loss": 0.05864572525024414, "step": 5812 }, { "epoch": 0.7858989741943117, "grad_norm": 0.12902435660362244, "learning_rate": 3.754899555302645e-06, "loss": 0.05607867240905762, "step": 5813 }, { "epoch": 0.7860341709901475, "grad_norm": 0.05804478004574776, "learning_rate": 3.7503710285049964e-06, "loss": 0.04204225540161133, "step": 5814 }, { "epoch": 0.7861693677859835, "grad_norm": 0.0753183662891388, "learning_rate": 3.7458448439400888e-06, "loss": 0.06431722640991211, "step": 5815 }, { "epoch": 0.7863045645818194, "grad_norm": 0.11462990939617157, "learning_rate": 3.7413210025502985e-06, "loss": 0.08178341388702393, "step": 5816 }, { "epoch": 0.7864397613776554, "grad_norm": 0.07708786427974701, "learning_rate": 3.7367995052775123e-06, "loss": 0.05583548545837402, "step": 5817 }, { "epoch": 0.7865749581734913, "grad_norm": 0.07676519453525543, "learning_rate": 3.732280353063133e-06, "loss": 0.05231192708015442, "step": 5818 }, { "epoch": 0.7867101549693273, "grad_norm": 0.09140397608280182, "learning_rate": 3.727763546848074e-06, "loss": 0.06257450580596924, "step": 5819 }, { "epoch": 0.7868453517651631, "grad_norm": 0.09161291271448135, "learning_rate": 3.7232490875727544e-06, "loss": 0.06683731079101562, "step": 5820 }, { "epoch": 0.7869805485609991, "grad_norm": 0.1485453099012375, "learning_rate": 3.718736976177108e-06, "loss": 0.0742807388305664, "step": 5821 }, { "epoch": 0.787115745356835, "grad_norm": 0.06495513021945953, "learning_rate": 3.71422721360058e-06, "loss": 0.06864309310913086, "step": 5822 }, { "epoch": 0.787250942152671, "grad_norm": 0.047900985926389694, "learning_rate": 3.709719800782133e-06, "loss": 0.03637409210205078, "step": 5823 }, { "epoch": 0.787386138948507, "grad_norm": 0.07076506316661835, "learning_rate": 3.7052147386602304e-06, "loss": 0.03696131706237793, "step": 5824 }, { "epoch": 0.7875213357443429, "grad_norm": 0.1019882783293724, "learning_rate": 3.700712028172851e-06, "loss": 0.06559419631958008, "step": 5825 }, { "epoch": 0.7876565325401788, "grad_norm": 0.0746975913643837, "learning_rate": 3.696211670257481e-06, "loss": 0.061260342597961426, "step": 5826 }, { "epoch": 0.7877917293360147, "grad_norm": 0.05356961488723755, "learning_rate": 3.691713665851117e-06, "loss": 0.05988669395446777, "step": 5827 }, { "epoch": 0.7879269261318507, "grad_norm": 0.08384816348552704, "learning_rate": 3.6872180158902764e-06, "loss": 0.05983622372150421, "step": 5828 }, { "epoch": 0.7880621229276866, "grad_norm": 0.06372924149036407, "learning_rate": 3.6827247213109705e-06, "loss": 0.05476784706115723, "step": 5829 }, { "epoch": 0.7881973197235226, "grad_norm": 0.08513446152210236, "learning_rate": 3.6782337830487294e-06, "loss": 0.05332779884338379, "step": 5830 }, { "epoch": 0.7883325165193585, "grad_norm": 0.10654087364673615, "learning_rate": 3.6737452020385886e-06, "loss": 0.09494352340698242, "step": 5831 }, { "epoch": 0.7884677133151944, "grad_norm": 0.07302692532539368, "learning_rate": 3.6692589792150923e-06, "loss": 0.04778182506561279, "step": 5832 }, { "epoch": 0.7886029101110303, "grad_norm": 0.07393590360879898, "learning_rate": 3.6647751155123026e-06, "loss": 0.06770825386047363, "step": 5833 }, { "epoch": 0.7887381069068663, "grad_norm": 0.0764707699418068, "learning_rate": 3.660293611863782e-06, "loss": 0.040668368339538574, "step": 5834 }, { "epoch": 0.7888733037027023, "grad_norm": 0.06927548348903656, "learning_rate": 3.655814469202602e-06, "loss": 0.04694366455078125, "step": 5835 }, { "epoch": 0.7890085004985382, "grad_norm": 0.10794918239116669, "learning_rate": 3.6513376884613446e-06, "loss": 0.06515288352966309, "step": 5836 }, { "epoch": 0.7891436972943742, "grad_norm": 0.1458577811717987, "learning_rate": 3.6468632705720934e-06, "loss": 0.08035993576049805, "step": 5837 }, { "epoch": 0.78927889409021, "grad_norm": 0.07329337298870087, "learning_rate": 3.6423912164664606e-06, "loss": 0.048241376876831055, "step": 5838 }, { "epoch": 0.789414090886046, "grad_norm": 0.06068097800016403, "learning_rate": 3.637921527075534e-06, "loss": 0.052973270416259766, "step": 5839 }, { "epoch": 0.7895492876818819, "grad_norm": 0.04826470836997032, "learning_rate": 3.63345420332994e-06, "loss": 0.031844139099121094, "step": 5840 }, { "epoch": 0.7896844844777179, "grad_norm": 0.0977715328335762, "learning_rate": 3.628989246159795e-06, "loss": 0.08012515306472778, "step": 5841 }, { "epoch": 0.7898196812735538, "grad_norm": 0.11876091361045837, "learning_rate": 3.6245266564947205e-06, "loss": 0.057781100273132324, "step": 5842 }, { "epoch": 0.7899548780693898, "grad_norm": 0.044788289815187454, "learning_rate": 3.620066435263868e-06, "loss": 0.024888038635253906, "step": 5843 }, { "epoch": 0.7900900748652256, "grad_norm": 0.08737047016620636, "learning_rate": 3.6156085833958596e-06, "loss": 0.05821573734283447, "step": 5844 }, { "epoch": 0.7902252716610616, "grad_norm": 0.11403119564056396, "learning_rate": 3.6111531018188584e-06, "loss": 0.0505141019821167, "step": 5845 }, { "epoch": 0.7903604684568976, "grad_norm": 0.07073387503623962, "learning_rate": 3.606699991460513e-06, "loss": 0.061865806579589844, "step": 5846 }, { "epoch": 0.7904956652527335, "grad_norm": 0.11276034265756607, "learning_rate": 3.602249253247986e-06, "loss": 0.07218074798583984, "step": 5847 }, { "epoch": 0.7906308620485695, "grad_norm": 0.09160439670085907, "learning_rate": 3.5978008881079445e-06, "loss": 0.07404851913452148, "step": 5848 }, { "epoch": 0.7907660588444054, "grad_norm": 0.1586209386587143, "learning_rate": 3.5933548969665587e-06, "loss": 0.08043265342712402, "step": 5849 }, { "epoch": 0.7909012556402413, "grad_norm": 0.06926486641168594, "learning_rate": 3.5889112807495152e-06, "loss": 0.06469619274139404, "step": 5850 }, { "epoch": 0.7910364524360772, "grad_norm": 0.08617951720952988, "learning_rate": 3.5844700403819935e-06, "loss": 0.08688092231750488, "step": 5851 }, { "epoch": 0.7911716492319132, "grad_norm": 0.059393443167209625, "learning_rate": 3.5800311767886847e-06, "loss": 0.04292649030685425, "step": 5852 }, { "epoch": 0.7913068460277491, "grad_norm": 0.09488843381404877, "learning_rate": 3.575594690893784e-06, "loss": 0.06721115112304688, "step": 5853 }, { "epoch": 0.7914420428235851, "grad_norm": 0.11273609101772308, "learning_rate": 3.5711605836209853e-06, "loss": 0.07346487045288086, "step": 5854 }, { "epoch": 0.7915772396194211, "grad_norm": 0.09509149938821793, "learning_rate": 3.566728855893505e-06, "loss": 0.04930388927459717, "step": 5855 }, { "epoch": 0.7917124364152569, "grad_norm": 0.059821393340826035, "learning_rate": 3.5622995086340466e-06, "loss": 0.054166436195373535, "step": 5856 }, { "epoch": 0.7918476332110929, "grad_norm": 0.1228511705994606, "learning_rate": 3.5578725427648233e-06, "loss": 0.051804184913635254, "step": 5857 }, { "epoch": 0.7919828300069288, "grad_norm": 0.05270153284072876, "learning_rate": 3.553447959207553e-06, "loss": 0.03704023361206055, "step": 5858 }, { "epoch": 0.7921180268027648, "grad_norm": 0.0815044641494751, "learning_rate": 3.5490257588834552e-06, "loss": 0.04775381088256836, "step": 5859 }, { "epoch": 0.7922532235986007, "grad_norm": 0.10000798851251602, "learning_rate": 3.5446059427132615e-06, "loss": 0.09478378295898438, "step": 5860 }, { "epoch": 0.7923884203944367, "grad_norm": 0.08404769748449326, "learning_rate": 3.5401885116171977e-06, "loss": 0.05052304267883301, "step": 5861 }, { "epoch": 0.7925236171902726, "grad_norm": 0.04907684773206711, "learning_rate": 3.5357734665149983e-06, "loss": 0.040949106216430664, "step": 5862 }, { "epoch": 0.7926588139861085, "grad_norm": 0.07489892095327377, "learning_rate": 3.5313608083258975e-06, "loss": 0.06591272354125977, "step": 5863 }, { "epoch": 0.7927940107819444, "grad_norm": 0.07882266491651535, "learning_rate": 3.526950537968629e-06, "loss": 0.0416867733001709, "step": 5864 }, { "epoch": 0.7929292075777804, "grad_norm": 0.14749272167682648, "learning_rate": 3.5225426563614466e-06, "loss": 0.09202718734741211, "step": 5865 }, { "epoch": 0.7930644043736164, "grad_norm": 0.13158512115478516, "learning_rate": 3.518137164422088e-06, "loss": 0.07324403524398804, "step": 5866 }, { "epoch": 0.7931996011694523, "grad_norm": 0.09891493618488312, "learning_rate": 3.513734063067799e-06, "loss": 0.046997666358947754, "step": 5867 }, { "epoch": 0.7933347979652883, "grad_norm": 0.11289020627737045, "learning_rate": 3.5093333532153316e-06, "loss": 0.06782937049865723, "step": 5868 }, { "epoch": 0.7934699947611241, "grad_norm": 0.06969927251338959, "learning_rate": 3.504935035780931e-06, "loss": 0.06851321458816528, "step": 5869 }, { "epoch": 0.7936051915569601, "grad_norm": 0.05267447233200073, "learning_rate": 3.500539111680364e-06, "loss": 0.06720972061157227, "step": 5870 }, { "epoch": 0.793740388352796, "grad_norm": 0.13022063672542572, "learning_rate": 3.4961455818288683e-06, "loss": 0.048854708671569824, "step": 5871 }, { "epoch": 0.793875585148632, "grad_norm": 0.08528923243284225, "learning_rate": 3.491754447141212e-06, "loss": 0.0514446496963501, "step": 5872 }, { "epoch": 0.794010781944468, "grad_norm": 0.1936180740594864, "learning_rate": 3.4873657085316504e-06, "loss": 0.06984138488769531, "step": 5873 }, { "epoch": 0.7941459787403039, "grad_norm": 0.2576288878917694, "learning_rate": 3.482979366913935e-06, "loss": 0.12042355537414551, "step": 5874 }, { "epoch": 0.7942811755361397, "grad_norm": 0.0762060359120369, "learning_rate": 3.4785954232013423e-06, "loss": 0.0587085485458374, "step": 5875 }, { "epoch": 0.7944163723319757, "grad_norm": 0.07142416387796402, "learning_rate": 3.4742138783066122e-06, "loss": 0.054024696350097656, "step": 5876 }, { "epoch": 0.7945515691278117, "grad_norm": 0.0944850817322731, "learning_rate": 3.4698347331420206e-06, "loss": 0.05852842330932617, "step": 5877 }, { "epoch": 0.7946867659236476, "grad_norm": 0.16262933611869812, "learning_rate": 3.4654579886193223e-06, "loss": 0.07511472702026367, "step": 5878 }, { "epoch": 0.7948219627194836, "grad_norm": 0.06424234807491302, "learning_rate": 3.461083645649782e-06, "loss": 0.060793161392211914, "step": 5879 }, { "epoch": 0.7949571595153195, "grad_norm": 0.05788527801632881, "learning_rate": 3.4567117051441594e-06, "loss": 0.043811798095703125, "step": 5880 }, { "epoch": 0.7950923563111554, "grad_norm": 0.07045552134513855, "learning_rate": 3.4523421680127115e-06, "loss": 0.05317676067352295, "step": 5881 }, { "epoch": 0.7952275531069913, "grad_norm": 0.06306800246238708, "learning_rate": 3.447975035165209e-06, "loss": 0.052356839179992676, "step": 5882 }, { "epoch": 0.7953627499028273, "grad_norm": 0.17548410594463348, "learning_rate": 3.4436103075109076e-06, "loss": 0.07306051254272461, "step": 5883 }, { "epoch": 0.7954979466986632, "grad_norm": 0.09498739242553711, "learning_rate": 3.4392479859585642e-06, "loss": 0.05462980270385742, "step": 5884 }, { "epoch": 0.7956331434944992, "grad_norm": 0.137440025806427, "learning_rate": 3.4348880714164416e-06, "loss": 0.06501126289367676, "step": 5885 }, { "epoch": 0.7957683402903352, "grad_norm": 0.060025379061698914, "learning_rate": 3.430530564792289e-06, "loss": 0.060994505882263184, "step": 5886 }, { "epoch": 0.795903537086171, "grad_norm": 0.053567856550216675, "learning_rate": 3.426175466993374e-06, "loss": 0.0558621883392334, "step": 5887 }, { "epoch": 0.796038733882007, "grad_norm": 0.060433726757764816, "learning_rate": 3.4218227789264468e-06, "loss": 0.06391000747680664, "step": 5888 }, { "epoch": 0.7961739306778429, "grad_norm": 0.08559460192918777, "learning_rate": 3.417472501497758e-06, "loss": 0.07879400253295898, "step": 5889 }, { "epoch": 0.7963091274736789, "grad_norm": 0.11101758480072021, "learning_rate": 3.413124635613061e-06, "loss": 0.08178916573524475, "step": 5890 }, { "epoch": 0.7964443242695148, "grad_norm": 0.20513050258159637, "learning_rate": 3.4087791821775986e-06, "loss": 0.10080024600028992, "step": 5891 }, { "epoch": 0.7965795210653508, "grad_norm": 0.04593917727470398, "learning_rate": 3.4044361420961285e-06, "loss": 0.040120840072631836, "step": 5892 }, { "epoch": 0.7967147178611866, "grad_norm": 0.05298652499914169, "learning_rate": 3.4000955162728866e-06, "loss": 0.032567501068115234, "step": 5893 }, { "epoch": 0.7968499146570226, "grad_norm": 0.05706121027469635, "learning_rate": 3.3957573056116164e-06, "loss": 0.05450248718261719, "step": 5894 }, { "epoch": 0.7969851114528586, "grad_norm": 0.07949705421924591, "learning_rate": 3.391421511015558e-06, "loss": 0.06602859497070312, "step": 5895 }, { "epoch": 0.7971203082486945, "grad_norm": 0.07784634083509445, "learning_rate": 3.38708813338744e-06, "loss": 0.056526631116867065, "step": 5896 }, { "epoch": 0.7972555050445305, "grad_norm": 0.1227860227227211, "learning_rate": 3.382757173629506e-06, "loss": 0.07503247261047363, "step": 5897 }, { "epoch": 0.7973907018403664, "grad_norm": 0.0445844866335392, "learning_rate": 3.378428632643478e-06, "loss": 0.03827035427093506, "step": 5898 }, { "epoch": 0.7975258986362023, "grad_norm": 0.10407643020153046, "learning_rate": 3.3741025113305825e-06, "loss": 0.07637548446655273, "step": 5899 }, { "epoch": 0.7976610954320382, "grad_norm": 0.09840883314609528, "learning_rate": 3.369778810591541e-06, "loss": 0.053110599517822266, "step": 5900 }, { "epoch": 0.7977962922278742, "grad_norm": 0.06578446924686432, "learning_rate": 3.3654575313265664e-06, "loss": 0.04084807634353638, "step": 5901 }, { "epoch": 0.7979314890237101, "grad_norm": 0.05395715311169624, "learning_rate": 3.361138674435386e-06, "loss": 0.05305075645446777, "step": 5902 }, { "epoch": 0.7980666858195461, "grad_norm": 0.11053182184696198, "learning_rate": 3.35682224081719e-06, "loss": 0.06735897064208984, "step": 5903 }, { "epoch": 0.798201882615382, "grad_norm": 0.08346085250377655, "learning_rate": 3.352508231370699e-06, "loss": 0.059014320373535156, "step": 5904 }, { "epoch": 0.798337079411218, "grad_norm": 0.10122118145227432, "learning_rate": 3.3481966469941044e-06, "loss": 0.06972360610961914, "step": 5905 }, { "epoch": 0.7984722762070539, "grad_norm": 0.10055501013994217, "learning_rate": 3.3438874885850984e-06, "loss": 0.060906171798706055, "step": 5906 }, { "epoch": 0.7986074730028898, "grad_norm": 0.0458751805126667, "learning_rate": 3.3395807570408847e-06, "loss": 0.04475867748260498, "step": 5907 }, { "epoch": 0.7987426697987258, "grad_norm": 0.07080793380737305, "learning_rate": 3.33527645325813e-06, "loss": 0.05774796009063721, "step": 5908 }, { "epoch": 0.7988778665945617, "grad_norm": 0.07462490350008011, "learning_rate": 3.3309745781330247e-06, "loss": 0.05011320114135742, "step": 5909 }, { "epoch": 0.7990130633903977, "grad_norm": 0.06588814407587051, "learning_rate": 3.32667513256124e-06, "loss": 0.03573489189147949, "step": 5910 }, { "epoch": 0.7991482601862336, "grad_norm": 0.08016342669725418, "learning_rate": 3.3223781174379375e-06, "loss": 0.07845830917358398, "step": 5911 }, { "epoch": 0.7992834569820695, "grad_norm": 0.06420721858739853, "learning_rate": 3.3180835336577917e-06, "loss": 0.06180620193481445, "step": 5912 }, { "epoch": 0.7994186537779054, "grad_norm": 0.08457471430301666, "learning_rate": 3.313791382114943e-06, "loss": 0.07882165908813477, "step": 5913 }, { "epoch": 0.7995538505737414, "grad_norm": 0.19721674919128418, "learning_rate": 3.3095016637030505e-06, "loss": 0.06384015083312988, "step": 5914 }, { "epoch": 0.7996890473695774, "grad_norm": 0.11330549418926239, "learning_rate": 3.3052143793152524e-06, "loss": 0.0427393913269043, "step": 5915 }, { "epoch": 0.7998242441654133, "grad_norm": 0.042108796536922455, "learning_rate": 3.3009295298441855e-06, "loss": 0.03156137466430664, "step": 5916 }, { "epoch": 0.7999594409612493, "grad_norm": 0.13222014904022217, "learning_rate": 3.2966471161819767e-06, "loss": 0.07802689075469971, "step": 5917 }, { "epoch": 0.8000946377570851, "grad_norm": 0.05636896938085556, "learning_rate": 3.292367139220246e-06, "loss": 0.05374807119369507, "step": 5918 }, { "epoch": 0.8002298345529211, "grad_norm": 0.07512359321117401, "learning_rate": 3.288089599850112e-06, "loss": 0.04730796813964844, "step": 5919 }, { "epoch": 0.800365031348757, "grad_norm": 0.1327861249446869, "learning_rate": 3.2838144989621795e-06, "loss": 0.08629894256591797, "step": 5920 }, { "epoch": 0.800500228144593, "grad_norm": 0.05734610557556152, "learning_rate": 3.2795418374465458e-06, "loss": 0.049720048904418945, "step": 5921 }, { "epoch": 0.8006354249404289, "grad_norm": 0.04889432713389397, "learning_rate": 3.275271616192803e-06, "loss": 0.04104304313659668, "step": 5922 }, { "epoch": 0.8007706217362649, "grad_norm": 0.03819684684276581, "learning_rate": 3.2710038360900303e-06, "loss": 0.040823400020599365, "step": 5923 }, { "epoch": 0.8009058185321007, "grad_norm": 0.07306928187608719, "learning_rate": 3.266738498026808e-06, "loss": 0.05711174011230469, "step": 5924 }, { "epoch": 0.8010410153279367, "grad_norm": 0.09495928138494492, "learning_rate": 3.2624756028912005e-06, "loss": 0.08249831199645996, "step": 5925 }, { "epoch": 0.8011762121237727, "grad_norm": 0.10753341764211655, "learning_rate": 3.2582151515707655e-06, "loss": 0.06158852577209473, "step": 5926 }, { "epoch": 0.8013114089196086, "grad_norm": 0.09272406250238419, "learning_rate": 3.253957144952551e-06, "loss": 0.08705449104309082, "step": 5927 }, { "epoch": 0.8014466057154446, "grad_norm": 0.13662089407444, "learning_rate": 3.249701583923091e-06, "loss": 0.09015393257141113, "step": 5928 }, { "epoch": 0.8015818025112805, "grad_norm": 0.14579512178897858, "learning_rate": 3.2454484693684257e-06, "loss": 0.060903072357177734, "step": 5929 }, { "epoch": 0.8017169993071164, "grad_norm": 0.11067908257246017, "learning_rate": 3.2411978021740727e-06, "loss": 0.07764029502868652, "step": 5930 }, { "epoch": 0.8018521961029523, "grad_norm": 0.05757344886660576, "learning_rate": 3.2369495832250434e-06, "loss": 0.04710733890533447, "step": 5931 }, { "epoch": 0.8019873928987883, "grad_norm": 0.06984589993953705, "learning_rate": 3.2327038134058378e-06, "loss": 0.0804290771484375, "step": 5932 }, { "epoch": 0.8021225896946242, "grad_norm": 0.061518795788288116, "learning_rate": 3.228460493600446e-06, "loss": 0.05602109432220459, "step": 5933 }, { "epoch": 0.8022577864904602, "grad_norm": 0.04481853172183037, "learning_rate": 3.2242196246923554e-06, "loss": 0.04060816764831543, "step": 5934 }, { "epoch": 0.8023929832862962, "grad_norm": 0.04452382028102875, "learning_rate": 3.2199812075645375e-06, "loss": 0.04606294631958008, "step": 5935 }, { "epoch": 0.802528180082132, "grad_norm": 0.10836662352085114, "learning_rate": 3.2157452430994487e-06, "loss": 0.07996106147766113, "step": 5936 }, { "epoch": 0.802663376877968, "grad_norm": 0.08266950398683548, "learning_rate": 3.2115117321790427e-06, "loss": 0.05531609058380127, "step": 5937 }, { "epoch": 0.8027985736738039, "grad_norm": 0.09042800217866898, "learning_rate": 3.207280675684754e-06, "loss": 0.07632696628570557, "step": 5938 }, { "epoch": 0.8029337704696399, "grad_norm": 0.095298171043396, "learning_rate": 3.203052074497523e-06, "loss": 0.0833730697631836, "step": 5939 }, { "epoch": 0.8030689672654758, "grad_norm": 0.0649867132306099, "learning_rate": 3.198825929497752e-06, "loss": 0.0504755973815918, "step": 5940 }, { "epoch": 0.8032041640613118, "grad_norm": 0.08923760801553726, "learning_rate": 3.194602241565357e-06, "loss": 0.048790931701660156, "step": 5941 }, { "epoch": 0.8033393608571476, "grad_norm": 0.07723937183618546, "learning_rate": 3.1903810115797282e-06, "loss": 0.06285953521728516, "step": 5942 }, { "epoch": 0.8034745576529836, "grad_norm": 0.10952747613191605, "learning_rate": 3.1861622404197475e-06, "loss": 0.06120920181274414, "step": 5943 }, { "epoch": 0.8036097544488195, "grad_norm": 0.04562440514564514, "learning_rate": 3.181945928963794e-06, "loss": 0.05415225028991699, "step": 5944 }, { "epoch": 0.8037449512446555, "grad_norm": 0.07636386901140213, "learning_rate": 3.1777320780897124e-06, "loss": 0.06434714794158936, "step": 5945 }, { "epoch": 0.8038801480404915, "grad_norm": 0.040571264922618866, "learning_rate": 3.1735206886748602e-06, "loss": 0.04030346870422363, "step": 5946 }, { "epoch": 0.8040153448363274, "grad_norm": 0.10323947668075562, "learning_rate": 3.1693117615960665e-06, "loss": 0.08153200149536133, "step": 5947 }, { "epoch": 0.8041505416321633, "grad_norm": 0.07518929243087769, "learning_rate": 3.1651052977296537e-06, "loss": 0.0575709342956543, "step": 5948 }, { "epoch": 0.8042857384279992, "grad_norm": 0.08393549919128418, "learning_rate": 3.1609012979514273e-06, "loss": 0.07520294189453125, "step": 5949 }, { "epoch": 0.8044209352238352, "grad_norm": 0.05883118510246277, "learning_rate": 3.156699763136683e-06, "loss": 0.052559852600097656, "step": 5950 }, { "epoch": 0.8045561320196711, "grad_norm": 0.06830005347728729, "learning_rate": 3.152500694160207e-06, "loss": 0.056876182556152344, "step": 5951 }, { "epoch": 0.8046913288155071, "grad_norm": 0.06434983760118484, "learning_rate": 3.148304091896265e-06, "loss": 0.053955793380737305, "step": 5952 }, { "epoch": 0.804826525611343, "grad_norm": 0.09729620069265366, "learning_rate": 3.144109957218612e-06, "loss": 0.0799567699432373, "step": 5953 }, { "epoch": 0.804961722407179, "grad_norm": 0.05446004122495651, "learning_rate": 3.1399182910004893e-06, "loss": 0.04692339897155762, "step": 5954 }, { "epoch": 0.8050969192030148, "grad_norm": 0.0832701027393341, "learning_rate": 3.1357290941146215e-06, "loss": 0.07751703262329102, "step": 5955 }, { "epoch": 0.8052321159988508, "grad_norm": 0.0588974803686142, "learning_rate": 3.1315423674332265e-06, "loss": 0.04872506856918335, "step": 5956 }, { "epoch": 0.8053673127946868, "grad_norm": 0.0907578393816948, "learning_rate": 3.127358111828002e-06, "loss": 0.06001472473144531, "step": 5957 }, { "epoch": 0.8055025095905227, "grad_norm": 0.10768931359052658, "learning_rate": 3.123176328170131e-06, "loss": 0.0764617919921875, "step": 5958 }, { "epoch": 0.8056377063863587, "grad_norm": 0.1117824986577034, "learning_rate": 3.1189970173302816e-06, "loss": 0.0704648494720459, "step": 5959 }, { "epoch": 0.8057729031821946, "grad_norm": 0.12021458894014359, "learning_rate": 3.1148201801786085e-06, "loss": 0.050488829612731934, "step": 5960 }, { "epoch": 0.8059080999780305, "grad_norm": 0.08330684155225754, "learning_rate": 3.1106458175847572e-06, "loss": 0.07120692729949951, "step": 5961 }, { "epoch": 0.8060432967738664, "grad_norm": 0.07720736414194107, "learning_rate": 3.106473930417848e-06, "loss": 0.05233168601989746, "step": 5962 }, { "epoch": 0.8061784935697024, "grad_norm": 0.02804306335747242, "learning_rate": 3.1023045195464903e-06, "loss": 0.023141920566558838, "step": 5963 }, { "epoch": 0.8063136903655383, "grad_norm": 0.06017075479030609, "learning_rate": 3.098137585838779e-06, "loss": 0.05536526441574097, "step": 5964 }, { "epoch": 0.8064488871613743, "grad_norm": 0.07462954521179199, "learning_rate": 3.093973130162286e-06, "loss": 0.05035829544067383, "step": 5965 }, { "epoch": 0.8065840839572103, "grad_norm": 0.027970924973487854, "learning_rate": 3.089811153384083e-06, "loss": 0.02471745014190674, "step": 5966 }, { "epoch": 0.8067192807530461, "grad_norm": 0.09911976754665375, "learning_rate": 3.08565165637071e-06, "loss": 0.07069277763366699, "step": 5967 }, { "epoch": 0.8068544775488821, "grad_norm": 0.03710552304983139, "learning_rate": 3.081494639988196e-06, "loss": 0.03911447525024414, "step": 5968 }, { "epoch": 0.806989674344718, "grad_norm": 0.06144268065690994, "learning_rate": 3.077340105102057e-06, "loss": 0.06378602981567383, "step": 5969 }, { "epoch": 0.807124871140554, "grad_norm": 0.09934115409851074, "learning_rate": 3.0731880525772817e-06, "loss": 0.07390356063842773, "step": 5970 }, { "epoch": 0.8072600679363899, "grad_norm": 0.06829255819320679, "learning_rate": 3.069038483278364e-06, "loss": 0.05382704734802246, "step": 5971 }, { "epoch": 0.8073952647322259, "grad_norm": 0.06651218980550766, "learning_rate": 3.0648913980692505e-06, "loss": 0.05577385425567627, "step": 5972 }, { "epoch": 0.8075304615280617, "grad_norm": 0.07062459737062454, "learning_rate": 3.0607467978133985e-06, "loss": 0.04932081699371338, "step": 5973 }, { "epoch": 0.8076656583238977, "grad_norm": 0.07902832329273224, "learning_rate": 3.0566046833737294e-06, "loss": 0.05445671081542969, "step": 5974 }, { "epoch": 0.8078008551197337, "grad_norm": 0.07453961670398712, "learning_rate": 3.0524650556126517e-06, "loss": 0.042623937129974365, "step": 5975 }, { "epoch": 0.8079360519155696, "grad_norm": 0.04572852700948715, "learning_rate": 3.048327915392069e-06, "loss": 0.04386827349662781, "step": 5976 }, { "epoch": 0.8080712487114056, "grad_norm": 0.07879126071929932, "learning_rate": 3.044193263573341e-06, "loss": 0.07558250427246094, "step": 5977 }, { "epoch": 0.8082064455072415, "grad_norm": 0.12469901889562607, "learning_rate": 3.0400611010173355e-06, "loss": 0.07771492004394531, "step": 5978 }, { "epoch": 0.8083416423030774, "grad_norm": 0.058566123247146606, "learning_rate": 3.0359314285843863e-06, "loss": 0.04871082305908203, "step": 5979 }, { "epoch": 0.8084768390989133, "grad_norm": 0.09537943452596664, "learning_rate": 3.0318042471343104e-06, "loss": 0.07441234588623047, "step": 5980 }, { "epoch": 0.8086120358947493, "grad_norm": 0.09384571760892868, "learning_rate": 3.027679557526422e-06, "loss": 0.0855410099029541, "step": 5981 }, { "epoch": 0.8087472326905852, "grad_norm": 0.06223554164171219, "learning_rate": 3.0235573606194844e-06, "loss": 0.04994767904281616, "step": 5982 }, { "epoch": 0.8088824294864212, "grad_norm": 0.06187053024768829, "learning_rate": 3.0194376572717743e-06, "loss": 0.04442010819911957, "step": 5983 }, { "epoch": 0.8090176262822572, "grad_norm": 0.16885416209697723, "learning_rate": 3.0153204483410318e-06, "loss": 0.07637345790863037, "step": 5984 }, { "epoch": 0.809152823078093, "grad_norm": 0.07922068983316422, "learning_rate": 3.0112057346844834e-06, "loss": 0.06919288635253906, "step": 5985 }, { "epoch": 0.809288019873929, "grad_norm": 0.10322743654251099, "learning_rate": 3.007093517158832e-06, "loss": 0.06879043579101562, "step": 5986 }, { "epoch": 0.8094232166697649, "grad_norm": 0.17313022911548615, "learning_rate": 3.002983796620261e-06, "loss": 0.12035131454467773, "step": 5987 }, { "epoch": 0.8095584134656009, "grad_norm": 0.17304308712482452, "learning_rate": 2.9988765739244427e-06, "loss": 0.07915401458740234, "step": 5988 }, { "epoch": 0.8096936102614368, "grad_norm": 0.07025691866874695, "learning_rate": 2.9947718499265197e-06, "loss": 0.05477166175842285, "step": 5989 }, { "epoch": 0.8098288070572728, "grad_norm": 0.11004645377397537, "learning_rate": 2.9906696254811184e-06, "loss": 0.07807785272598267, "step": 5990 }, { "epoch": 0.8099640038531086, "grad_norm": 0.062981016933918, "learning_rate": 2.9865699014423404e-06, "loss": 0.056607723236083984, "step": 5991 }, { "epoch": 0.8100992006489446, "grad_norm": 0.13426287472248077, "learning_rate": 2.9824726786637698e-06, "loss": 0.06773710250854492, "step": 5992 }, { "epoch": 0.8102343974447805, "grad_norm": 0.06862668693065643, "learning_rate": 2.978377957998477e-06, "loss": 0.06562089920043945, "step": 5993 }, { "epoch": 0.8103695942406165, "grad_norm": 0.07917889952659607, "learning_rate": 2.974285740299001e-06, "loss": 0.08538436889648438, "step": 5994 }, { "epoch": 0.8105047910364525, "grad_norm": 0.07612813264131546, "learning_rate": 2.9701960264173612e-06, "loss": 0.05460333824157715, "step": 5995 }, { "epoch": 0.8106399878322884, "grad_norm": 0.07773082703351974, "learning_rate": 2.96610881720506e-06, "loss": 0.06387847661972046, "step": 5996 }, { "epoch": 0.8107751846281244, "grad_norm": 0.04482792317867279, "learning_rate": 2.9620241135130715e-06, "loss": 0.040294647216796875, "step": 5997 }, { "epoch": 0.8109103814239602, "grad_norm": 0.12301840633153915, "learning_rate": 2.9579419161918607e-06, "loss": 0.07533693313598633, "step": 5998 }, { "epoch": 0.8110455782197962, "grad_norm": 0.06225501745939255, "learning_rate": 2.9538622260913595e-06, "loss": 0.05575919151306152, "step": 5999 }, { "epoch": 0.8111807750156321, "grad_norm": 0.11108613014221191, "learning_rate": 2.9497850440609814e-06, "loss": 0.08737683296203613, "step": 6000 }, { "epoch": 0.8113159718114681, "grad_norm": 0.05581967905163765, "learning_rate": 2.945710370949616e-06, "loss": 0.05908411741256714, "step": 6001 }, { "epoch": 0.811451168607304, "grad_norm": 0.10604329407215118, "learning_rate": 2.941638207605629e-06, "loss": 0.07580089569091797, "step": 6002 }, { "epoch": 0.81158636540314, "grad_norm": 0.12798796594142914, "learning_rate": 2.937568554876873e-06, "loss": 0.07222437858581543, "step": 6003 }, { "epoch": 0.8117215621989758, "grad_norm": 0.06994862109422684, "learning_rate": 2.9335014136106704e-06, "loss": 0.053401947021484375, "step": 6004 }, { "epoch": 0.8118567589948118, "grad_norm": 0.04690518230199814, "learning_rate": 2.929436784653818e-06, "loss": 0.03975629806518555, "step": 6005 }, { "epoch": 0.8119919557906478, "grad_norm": 0.08309400081634521, "learning_rate": 2.925374668852597e-06, "loss": 0.08277511596679688, "step": 6006 }, { "epoch": 0.8121271525864837, "grad_norm": 0.07683993875980377, "learning_rate": 2.921315067052754e-06, "loss": 0.052703857421875, "step": 6007 }, { "epoch": 0.8122623493823197, "grad_norm": 0.07410429418087006, "learning_rate": 2.917257980099535e-06, "loss": 0.06193804740905762, "step": 6008 }, { "epoch": 0.8123975461781556, "grad_norm": 0.1447361558675766, "learning_rate": 2.913203408837629e-06, "loss": 0.07332003116607666, "step": 6009 }, { "epoch": 0.8125327429739915, "grad_norm": 0.093193918466568, "learning_rate": 2.909151354111232e-06, "loss": 0.056601524353027344, "step": 6010 }, { "epoch": 0.8126679397698274, "grad_norm": 0.08866371959447861, "learning_rate": 2.905101816763998e-06, "loss": 0.0764303207397461, "step": 6011 }, { "epoch": 0.8128031365656634, "grad_norm": 0.09341947734355927, "learning_rate": 2.9010547976390617e-06, "loss": 0.057110607624053955, "step": 6012 }, { "epoch": 0.8129383333614993, "grad_norm": 0.043275635689496994, "learning_rate": 2.897010297579042e-06, "loss": 0.04230165481567383, "step": 6013 }, { "epoch": 0.8130735301573353, "grad_norm": 0.0816507637500763, "learning_rate": 2.8929683174260133e-06, "loss": 0.06053817272186279, "step": 6014 }, { "epoch": 0.8132087269531713, "grad_norm": 0.04000457748770714, "learning_rate": 2.8889288580215467e-06, "loss": 0.04054450988769531, "step": 6015 }, { "epoch": 0.8133439237490071, "grad_norm": 0.08455421030521393, "learning_rate": 2.8848919202066752e-06, "loss": 0.07124114036560059, "step": 6016 }, { "epoch": 0.8134791205448431, "grad_norm": 0.11735472828149796, "learning_rate": 2.8808575048219123e-06, "loss": 0.08161544799804688, "step": 6017 }, { "epoch": 0.813614317340679, "grad_norm": 0.0470954105257988, "learning_rate": 2.8768256127072436e-06, "loss": 0.042212843894958496, "step": 6018 }, { "epoch": 0.813749514136515, "grad_norm": 0.08306185156106949, "learning_rate": 2.872796244702128e-06, "loss": 0.039395809173583984, "step": 6019 }, { "epoch": 0.8138847109323509, "grad_norm": 0.07860999554395676, "learning_rate": 2.8687694016455075e-06, "loss": 0.05400872230529785, "step": 6020 }, { "epoch": 0.8140199077281869, "grad_norm": 0.09967145323753357, "learning_rate": 2.86474508437579e-06, "loss": 0.06613636016845703, "step": 6021 }, { "epoch": 0.8141551045240227, "grad_norm": 0.13687734305858612, "learning_rate": 2.8607232937308587e-06, "loss": 0.08966779708862305, "step": 6022 }, { "epoch": 0.8142903013198587, "grad_norm": 0.09404417127370834, "learning_rate": 2.856704030548072e-06, "loss": 0.03868138790130615, "step": 6023 }, { "epoch": 0.8144254981156946, "grad_norm": 0.07717647403478622, "learning_rate": 2.8526872956642568e-06, "loss": 0.056162476539611816, "step": 6024 }, { "epoch": 0.8145606949115306, "grad_norm": 0.1365281641483307, "learning_rate": 2.84867308991573e-06, "loss": 0.043724775314331055, "step": 6025 }, { "epoch": 0.8146958917073666, "grad_norm": 0.08922185748815536, "learning_rate": 2.8446614141382638e-06, "loss": 0.05934906005859375, "step": 6026 }, { "epoch": 0.8148310885032025, "grad_norm": 0.062307439744472504, "learning_rate": 2.8406522691671104e-06, "loss": 0.04979896545410156, "step": 6027 }, { "epoch": 0.8149662852990384, "grad_norm": 0.06810615956783295, "learning_rate": 2.8366456558369975e-06, "loss": 0.057108163833618164, "step": 6028 }, { "epoch": 0.8151014820948743, "grad_norm": 0.08926712721586227, "learning_rate": 2.8326415749821186e-06, "loss": 0.044838547706604004, "step": 6029 }, { "epoch": 0.8152366788907103, "grad_norm": 0.0773458480834961, "learning_rate": 2.828640027436151e-06, "loss": 0.04747915267944336, "step": 6030 }, { "epoch": 0.8153718756865462, "grad_norm": 0.08920859545469284, "learning_rate": 2.824641014032235e-06, "loss": 0.05875211954116821, "step": 6031 }, { "epoch": 0.8155070724823822, "grad_norm": 0.11277290433645248, "learning_rate": 2.820644535602987e-06, "loss": 0.05080556869506836, "step": 6032 }, { "epoch": 0.8156422692782181, "grad_norm": 0.14920265972614288, "learning_rate": 2.8166505929804953e-06, "loss": 0.06355094909667969, "step": 6033 }, { "epoch": 0.815777466074054, "grad_norm": 0.06301344186067581, "learning_rate": 2.8126591869963163e-06, "loss": 0.04443478584289551, "step": 6034 }, { "epoch": 0.81591266286989, "grad_norm": 0.060130201280117035, "learning_rate": 2.8086703184814887e-06, "loss": 0.05647540092468262, "step": 6035 }, { "epoch": 0.8160478596657259, "grad_norm": 0.05849962681531906, "learning_rate": 2.8046839882665134e-06, "loss": 0.04903674125671387, "step": 6036 }, { "epoch": 0.8161830564615619, "grad_norm": 0.08409731835126877, "learning_rate": 2.800700197181364e-06, "loss": 0.06807661056518555, "step": 6037 }, { "epoch": 0.8163182532573978, "grad_norm": 0.07239754498004913, "learning_rate": 2.7967189460554876e-06, "loss": 0.05629897117614746, "step": 6038 }, { "epoch": 0.8164534500532338, "grad_norm": 0.10625198483467102, "learning_rate": 2.792740235717801e-06, "loss": 0.07801389694213867, "step": 6039 }, { "epoch": 0.8165886468490697, "grad_norm": 0.07800500839948654, "learning_rate": 2.7887640669967e-06, "loss": 0.06755685806274414, "step": 6040 }, { "epoch": 0.8167238436449056, "grad_norm": 0.049376923590898514, "learning_rate": 2.7847904407200327e-06, "loss": 0.05147850513458252, "step": 6041 }, { "epoch": 0.8168590404407415, "grad_norm": 0.14100128412246704, "learning_rate": 2.7808193577151363e-06, "loss": 0.08207482099533081, "step": 6042 }, { "epoch": 0.8169942372365775, "grad_norm": 0.08680377900600433, "learning_rate": 2.776850818808812e-06, "loss": 0.06275999546051025, "step": 6043 }, { "epoch": 0.8171294340324134, "grad_norm": 0.06518331915140152, "learning_rate": 2.772884824827325e-06, "loss": 0.05167031288146973, "step": 6044 }, { "epoch": 0.8172646308282494, "grad_norm": 0.08073441684246063, "learning_rate": 2.768921376596429e-06, "loss": 0.03640162944793701, "step": 6045 }, { "epoch": 0.8173998276240854, "grad_norm": 0.06530150026082993, "learning_rate": 2.7649604749413176e-06, "loss": 0.052735328674316406, "step": 6046 }, { "epoch": 0.8175350244199212, "grad_norm": 0.15498989820480347, "learning_rate": 2.7610021206866837e-06, "loss": 0.06781244277954102, "step": 6047 }, { "epoch": 0.8176702212157572, "grad_norm": 0.1281179040670395, "learning_rate": 2.757046314656676e-06, "loss": 0.05950215458869934, "step": 6048 }, { "epoch": 0.8178054180115931, "grad_norm": 0.05508748069405556, "learning_rate": 2.753093057674909e-06, "loss": 0.053448550403118134, "step": 6049 }, { "epoch": 0.8179406148074291, "grad_norm": 0.13670194149017334, "learning_rate": 2.749142350564483e-06, "loss": 0.09100627899169922, "step": 6050 }, { "epoch": 0.818075811603265, "grad_norm": 0.045564886182546616, "learning_rate": 2.7451941941479414e-06, "loss": 0.04336047172546387, "step": 6051 }, { "epoch": 0.818211008399101, "grad_norm": 0.07601013779640198, "learning_rate": 2.741248589247323e-06, "loss": 0.06634902954101562, "step": 6052 }, { "epoch": 0.8183462051949368, "grad_norm": 0.0758441761136055, "learning_rate": 2.73730553668412e-06, "loss": 0.05973625183105469, "step": 6053 }, { "epoch": 0.8184814019907728, "grad_norm": 0.05449908599257469, "learning_rate": 2.7333650372792978e-06, "loss": 0.054160118103027344, "step": 6054 }, { "epoch": 0.8186165987866088, "grad_norm": 0.08510931581258774, "learning_rate": 2.7294270918532876e-06, "loss": 0.05118298530578613, "step": 6055 }, { "epoch": 0.8187517955824447, "grad_norm": 0.13163307309150696, "learning_rate": 2.7254917012259882e-06, "loss": 0.11231327056884766, "step": 6056 }, { "epoch": 0.8188869923782807, "grad_norm": 0.07046449184417725, "learning_rate": 2.721558866216776e-06, "loss": 0.049848198890686035, "step": 6057 }, { "epoch": 0.8190221891741166, "grad_norm": 0.17723996937274933, "learning_rate": 2.7176285876444846e-06, "loss": 0.06799310445785522, "step": 6058 }, { "epoch": 0.8191573859699525, "grad_norm": 0.09139125794172287, "learning_rate": 2.713700866327417e-06, "loss": 0.05922532081604004, "step": 6059 }, { "epoch": 0.8192925827657884, "grad_norm": 0.07579890638589859, "learning_rate": 2.7097757030833497e-06, "loss": 0.05732536315917969, "step": 6060 }, { "epoch": 0.8194277795616244, "grad_norm": 0.194512277841568, "learning_rate": 2.705853098729517e-06, "loss": 0.07489776611328125, "step": 6061 }, { "epoch": 0.8195629763574603, "grad_norm": 0.16335757076740265, "learning_rate": 2.7019330540826325e-06, "loss": 0.0741119384765625, "step": 6062 }, { "epoch": 0.8196981731532963, "grad_norm": 0.047551579773426056, "learning_rate": 2.6980155699588666e-06, "loss": 0.03277790546417236, "step": 6063 }, { "epoch": 0.8198333699491323, "grad_norm": 0.1278848499059677, "learning_rate": 2.6941006471738633e-06, "loss": 0.055891990661621094, "step": 6064 }, { "epoch": 0.8199685667449681, "grad_norm": 0.07446646690368652, "learning_rate": 2.690188286542726e-06, "loss": 0.06618189811706543, "step": 6065 }, { "epoch": 0.820103763540804, "grad_norm": 0.13082048296928406, "learning_rate": 2.686278488880029e-06, "loss": 0.08866214752197266, "step": 6066 }, { "epoch": 0.82023896033664, "grad_norm": 0.0632932186126709, "learning_rate": 2.6823712549998187e-06, "loss": 0.057805657386779785, "step": 6067 }, { "epoch": 0.820374157132476, "grad_norm": 0.0960746705532074, "learning_rate": 2.678466585715599e-06, "loss": 0.05298125743865967, "step": 6068 }, { "epoch": 0.8205093539283119, "grad_norm": 0.11340481042861938, "learning_rate": 2.6745644818403426e-06, "loss": 0.07090479135513306, "step": 6069 }, { "epoch": 0.8206445507241479, "grad_norm": 0.20877237617969513, "learning_rate": 2.6706649441864883e-06, "loss": 0.06981277465820312, "step": 6070 }, { "epoch": 0.8207797475199837, "grad_norm": 0.06653616577386856, "learning_rate": 2.666767973565937e-06, "loss": 0.055539608001708984, "step": 6071 }, { "epoch": 0.8209149443158197, "grad_norm": 0.18026667833328247, "learning_rate": 2.6628735707900653e-06, "loss": 0.07924997806549072, "step": 6072 }, { "epoch": 0.8210501411116556, "grad_norm": 0.11764676868915558, "learning_rate": 2.658981736669707e-06, "loss": 0.045154869556427, "step": 6073 }, { "epoch": 0.8211853379074916, "grad_norm": 0.11506257951259613, "learning_rate": 2.655092472015161e-06, "loss": 0.04538559913635254, "step": 6074 }, { "epoch": 0.8213205347033276, "grad_norm": 0.08606728911399841, "learning_rate": 2.6512057776361935e-06, "loss": 0.06519508361816406, "step": 6075 }, { "epoch": 0.8214557314991635, "grad_norm": 0.10523980110883713, "learning_rate": 2.64732165434203e-06, "loss": 0.0547870397567749, "step": 6076 }, { "epoch": 0.8215909282949994, "grad_norm": 0.08119869977235794, "learning_rate": 2.6434401029413792e-06, "loss": 0.042222678661346436, "step": 6077 }, { "epoch": 0.8217261250908353, "grad_norm": 0.11899159848690033, "learning_rate": 2.639561124242385e-06, "loss": 0.06535935401916504, "step": 6078 }, { "epoch": 0.8218613218866713, "grad_norm": 0.052903711795806885, "learning_rate": 2.635684719052682e-06, "loss": 0.03343915939331055, "step": 6079 }, { "epoch": 0.8219965186825072, "grad_norm": 0.038406386971473694, "learning_rate": 2.631810888179355e-06, "loss": 0.030523747205734253, "step": 6080 }, { "epoch": 0.8221317154783432, "grad_norm": 0.040950872004032135, "learning_rate": 2.627939632428952e-06, "loss": 0.04197734594345093, "step": 6081 }, { "epoch": 0.8222669122741791, "grad_norm": 0.06389142572879791, "learning_rate": 2.624070952607502e-06, "loss": 0.04342985153198242, "step": 6082 }, { "epoch": 0.822402109070015, "grad_norm": 0.12661327421665192, "learning_rate": 2.620204849520468e-06, "loss": 0.07553958892822266, "step": 6083 }, { "epoch": 0.8225373058658509, "grad_norm": 0.06931939721107483, "learning_rate": 2.616341323972806e-06, "loss": 0.053159356117248535, "step": 6084 }, { "epoch": 0.8226725026616869, "grad_norm": 0.09741483628749847, "learning_rate": 2.612480376768917e-06, "loss": 0.0687553882598877, "step": 6085 }, { "epoch": 0.8228076994575229, "grad_norm": 0.06940137594938278, "learning_rate": 2.608622008712672e-06, "loss": 0.059928178787231445, "step": 6086 }, { "epoch": 0.8229428962533588, "grad_norm": 0.07559097558259964, "learning_rate": 2.6047662206074034e-06, "loss": 0.053426504135131836, "step": 6087 }, { "epoch": 0.8230780930491948, "grad_norm": 0.12496233731508255, "learning_rate": 2.600913013255904e-06, "loss": 0.0745849609375, "step": 6088 }, { "epoch": 0.8232132898450307, "grad_norm": 0.1043851375579834, "learning_rate": 2.59706238746044e-06, "loss": 0.06103801727294922, "step": 6089 }, { "epoch": 0.8233484866408666, "grad_norm": 0.11068032681941986, "learning_rate": 2.593214344022725e-06, "loss": 0.06898415088653564, "step": 6090 }, { "epoch": 0.8234836834367025, "grad_norm": 0.07048270106315613, "learning_rate": 2.5893688837439474e-06, "loss": 0.06960821151733398, "step": 6091 }, { "epoch": 0.8236188802325385, "grad_norm": 0.061021458357572556, "learning_rate": 2.5855260074247473e-06, "loss": 0.03547525405883789, "step": 6092 }, { "epoch": 0.8237540770283744, "grad_norm": 0.05903881415724754, "learning_rate": 2.581685715865232e-06, "loss": 0.0526430606842041, "step": 6093 }, { "epoch": 0.8238892738242104, "grad_norm": 0.05299198627471924, "learning_rate": 2.5778480098649766e-06, "loss": 0.05292820930480957, "step": 6094 }, { "epoch": 0.8240244706200464, "grad_norm": 0.06044149771332741, "learning_rate": 2.5740128902230087e-06, "loss": 0.05222296714782715, "step": 6095 }, { "epoch": 0.8241596674158822, "grad_norm": 0.07073352485895157, "learning_rate": 2.5701803577378214e-06, "loss": 0.06006669998168945, "step": 6096 }, { "epoch": 0.8242948642117182, "grad_norm": 0.08059313148260117, "learning_rate": 2.566350413207366e-06, "loss": 0.0693356990814209, "step": 6097 }, { "epoch": 0.8244300610075541, "grad_norm": 0.03012383170425892, "learning_rate": 2.5625230574290554e-06, "loss": 0.026161015033721924, "step": 6098 }, { "epoch": 0.8245652578033901, "grad_norm": 0.04499491676688194, "learning_rate": 2.558698291199773e-06, "loss": 0.03758955001831055, "step": 6099 }, { "epoch": 0.824700454599226, "grad_norm": 0.06370619684457779, "learning_rate": 2.5548761153158524e-06, "loss": 0.06589460372924805, "step": 6100 }, { "epoch": 0.824835651395062, "grad_norm": 0.0985814705491066, "learning_rate": 2.55105653057309e-06, "loss": 0.05526965856552124, "step": 6101 }, { "epoch": 0.8249708481908978, "grad_norm": 0.13217344880104065, "learning_rate": 2.547239537766743e-06, "loss": 0.0590825080871582, "step": 6102 }, { "epoch": 0.8251060449867338, "grad_norm": 0.1189265102148056, "learning_rate": 2.543425137691526e-06, "loss": 0.08219194412231445, "step": 6103 }, { "epoch": 0.8252412417825697, "grad_norm": 0.2378210425376892, "learning_rate": 2.5396133311416264e-06, "loss": 0.10416507720947266, "step": 6104 }, { "epoch": 0.8253764385784057, "grad_norm": 0.06367621570825577, "learning_rate": 2.5358041189106784e-06, "loss": 0.06879687309265137, "step": 6105 }, { "epoch": 0.8255116353742417, "grad_norm": 0.04631015658378601, "learning_rate": 2.531997501791779e-06, "loss": 0.05221867561340332, "step": 6106 }, { "epoch": 0.8256468321700776, "grad_norm": 0.055972620844841, "learning_rate": 2.528193480577489e-06, "loss": 0.05498814582824707, "step": 6107 }, { "epoch": 0.8257820289659135, "grad_norm": 0.11147332191467285, "learning_rate": 2.5243920560598186e-06, "loss": 0.04958057403564453, "step": 6108 }, { "epoch": 0.8259172257617494, "grad_norm": 0.16497240960597992, "learning_rate": 2.5205932290302598e-06, "loss": 0.06731081008911133, "step": 6109 }, { "epoch": 0.8260524225575854, "grad_norm": 0.08538535237312317, "learning_rate": 2.516797000279729e-06, "loss": 0.06237339973449707, "step": 6110 }, { "epoch": 0.8261876193534213, "grad_norm": 0.08002668619155884, "learning_rate": 2.513003370598637e-06, "loss": 0.04990202188491821, "step": 6111 }, { "epoch": 0.8263228161492573, "grad_norm": 0.18474170565605164, "learning_rate": 2.509212340776832e-06, "loss": 0.07783794403076172, "step": 6112 }, { "epoch": 0.8264580129450932, "grad_norm": 0.06811453402042389, "learning_rate": 2.505423911603622e-06, "loss": 0.05213165283203125, "step": 6113 }, { "epoch": 0.8265932097409291, "grad_norm": 0.10158013552427292, "learning_rate": 2.501638083867789e-06, "loss": 0.05647468566894531, "step": 6114 }, { "epoch": 0.826728406536765, "grad_norm": 0.1357419490814209, "learning_rate": 2.497854858357552e-06, "loss": 0.058693885803222656, "step": 6115 }, { "epoch": 0.826863603332601, "grad_norm": 0.08465646952390671, "learning_rate": 2.494074235860604e-06, "loss": 0.06606149673461914, "step": 6116 }, { "epoch": 0.826998800128437, "grad_norm": 0.04319896176457405, "learning_rate": 2.4902962171640913e-06, "loss": 0.02218031883239746, "step": 6117 }, { "epoch": 0.8271339969242729, "grad_norm": 0.09505002200603485, "learning_rate": 2.4865208030546167e-06, "loss": 0.04378557205200195, "step": 6118 }, { "epoch": 0.8272691937201089, "grad_norm": 0.05450539290904999, "learning_rate": 2.482747994318239e-06, "loss": 0.06740474700927734, "step": 6119 }, { "epoch": 0.8274043905159447, "grad_norm": 0.11607981473207474, "learning_rate": 2.478977791740477e-06, "loss": 0.08897721767425537, "step": 6120 }, { "epoch": 0.8275395873117807, "grad_norm": 0.11895623058080673, "learning_rate": 2.475210196106313e-06, "loss": 0.051215410232543945, "step": 6121 }, { "epoch": 0.8276747841076166, "grad_norm": 0.08639207482337952, "learning_rate": 2.4714452082001753e-06, "loss": 0.05059981346130371, "step": 6122 }, { "epoch": 0.8278099809034526, "grad_norm": 0.07842420041561127, "learning_rate": 2.467682828805956e-06, "loss": 0.05503344535827637, "step": 6123 }, { "epoch": 0.8279451776992885, "grad_norm": 0.07524443417787552, "learning_rate": 2.4639230587070017e-06, "loss": 0.08451175689697266, "step": 6124 }, { "epoch": 0.8280803744951245, "grad_norm": 0.06326291710138321, "learning_rate": 2.460165898686114e-06, "loss": 0.050694823265075684, "step": 6125 }, { "epoch": 0.8282155712909604, "grad_norm": 0.033216092735528946, "learning_rate": 2.4564113495255597e-06, "loss": 0.03252220153808594, "step": 6126 }, { "epoch": 0.8283507680867963, "grad_norm": 0.10565914958715439, "learning_rate": 2.4526594120070545e-06, "loss": 0.04973626136779785, "step": 6127 }, { "epoch": 0.8284859648826323, "grad_norm": 0.03650711104273796, "learning_rate": 2.4489100869117686e-06, "loss": 0.04173707962036133, "step": 6128 }, { "epoch": 0.8286211616784682, "grad_norm": 0.11496257036924362, "learning_rate": 2.4451633750203344e-06, "loss": 0.05859875679016113, "step": 6129 }, { "epoch": 0.8287563584743042, "grad_norm": 0.05917206034064293, "learning_rate": 2.441419277112831e-06, "loss": 0.047881126403808594, "step": 6130 }, { "epoch": 0.8288915552701401, "grad_norm": 0.08097994327545166, "learning_rate": 2.4376777939688107e-06, "loss": 0.07488274574279785, "step": 6131 }, { "epoch": 0.8290267520659761, "grad_norm": 0.08778148889541626, "learning_rate": 2.4339389263672625e-06, "loss": 0.052522242069244385, "step": 6132 }, { "epoch": 0.8291619488618119, "grad_norm": 0.06195569410920143, "learning_rate": 2.4302026750866406e-06, "loss": 0.05515170097351074, "step": 6133 }, { "epoch": 0.8292971456576479, "grad_norm": 0.07184108346700668, "learning_rate": 2.4264690409048517e-06, "loss": 0.06892275810241699, "step": 6134 }, { "epoch": 0.8294323424534839, "grad_norm": 0.1113007664680481, "learning_rate": 2.4227380245992555e-06, "loss": 0.06133723258972168, "step": 6135 }, { "epoch": 0.8295675392493198, "grad_norm": 0.13878749310970306, "learning_rate": 2.4190096269466767e-06, "loss": 0.09042072296142578, "step": 6136 }, { "epoch": 0.8297027360451558, "grad_norm": 0.1017458587884903, "learning_rate": 2.415283848723383e-06, "loss": 0.056163787841796875, "step": 6137 }, { "epoch": 0.8298379328409917, "grad_norm": 0.07465185970067978, "learning_rate": 2.411560690705101e-06, "loss": 0.06628525257110596, "step": 6138 }, { "epoch": 0.8299731296368276, "grad_norm": 0.10816925019025803, "learning_rate": 2.4078401536670146e-06, "loss": 0.0725405216217041, "step": 6139 }, { "epoch": 0.8301083264326635, "grad_norm": 0.07475408166646957, "learning_rate": 2.4041222383837538e-06, "loss": 0.04389333724975586, "step": 6140 }, { "epoch": 0.8302435232284995, "grad_norm": 0.052984319627285004, "learning_rate": 2.400406945629418e-06, "loss": 0.04824090003967285, "step": 6141 }, { "epoch": 0.8303787200243354, "grad_norm": 0.08021537214517593, "learning_rate": 2.3966942761775396e-06, "loss": 0.04873466491699219, "step": 6142 }, { "epoch": 0.8305139168201714, "grad_norm": 0.11845871806144714, "learning_rate": 2.3929842308011263e-06, "loss": 0.056719303131103516, "step": 6143 }, { "epoch": 0.8306491136160074, "grad_norm": 0.06073852255940437, "learning_rate": 2.3892768102726236e-06, "loss": 0.06802797317504883, "step": 6144 }, { "epoch": 0.8307843104118432, "grad_norm": 0.07287821173667908, "learning_rate": 2.3855720153639344e-06, "loss": 0.06714892387390137, "step": 6145 }, { "epoch": 0.8309195072076792, "grad_norm": 0.04477251321077347, "learning_rate": 2.381869846846428e-06, "loss": 0.04327821731567383, "step": 6146 }, { "epoch": 0.8310547040035151, "grad_norm": 0.059387944638729095, "learning_rate": 2.3781703054908993e-06, "loss": 0.05419564247131348, "step": 6147 }, { "epoch": 0.8311899007993511, "grad_norm": 0.11517562717199326, "learning_rate": 2.374473392067624e-06, "loss": 0.06175410747528076, "step": 6148 }, { "epoch": 0.831325097595187, "grad_norm": 0.11997115612030029, "learning_rate": 2.370779107346317e-06, "loss": 0.06570863723754883, "step": 6149 }, { "epoch": 0.831460294391023, "grad_norm": 0.04856313019990921, "learning_rate": 2.3670874520961437e-06, "loss": 0.0464855432510376, "step": 6150 }, { "epoch": 0.8315954911868588, "grad_norm": 0.04417033493518829, "learning_rate": 2.3633984270857367e-06, "loss": 0.04998207092285156, "step": 6151 }, { "epoch": 0.8317306879826948, "grad_norm": 0.07728380709886551, "learning_rate": 2.359712033083156e-06, "loss": 0.03751516342163086, "step": 6152 }, { "epoch": 0.8318658847785307, "grad_norm": 0.058518558740615845, "learning_rate": 2.35602827085594e-06, "loss": 0.05121469497680664, "step": 6153 }, { "epoch": 0.8320010815743667, "grad_norm": 0.10029186308383942, "learning_rate": 2.3523471411710644e-06, "loss": 0.05831766128540039, "step": 6154 }, { "epoch": 0.8321362783702027, "grad_norm": 0.15154419839382172, "learning_rate": 2.3486686447949585e-06, "loss": 0.07819652557373047, "step": 6155 }, { "epoch": 0.8322714751660386, "grad_norm": 0.14425918459892273, "learning_rate": 2.3449927824935075e-06, "loss": 0.04434826970100403, "step": 6156 }, { "epoch": 0.8324066719618745, "grad_norm": 0.044169846922159195, "learning_rate": 2.3413195550320393e-06, "loss": 0.048297882080078125, "step": 6157 }, { "epoch": 0.8325418687577104, "grad_norm": 0.08879756927490234, "learning_rate": 2.3376489631753474e-06, "loss": 0.0571284294128418, "step": 6158 }, { "epoch": 0.8326770655535464, "grad_norm": 0.1313953548669815, "learning_rate": 2.3339810076876665e-06, "loss": 0.06466960906982422, "step": 6159 }, { "epoch": 0.8328122623493823, "grad_norm": 0.04046588018536568, "learning_rate": 2.3303156893326815e-06, "loss": 0.025317251682281494, "step": 6160 }, { "epoch": 0.8329474591452183, "grad_norm": 0.05012265220284462, "learning_rate": 2.326653008873535e-06, "loss": 0.04676949977874756, "step": 6161 }, { "epoch": 0.8330826559410542, "grad_norm": 0.064298115670681, "learning_rate": 2.3229929670728085e-06, "loss": 0.049768269062042236, "step": 6162 }, { "epoch": 0.8332178527368901, "grad_norm": 0.12205102294683456, "learning_rate": 2.319335564692554e-06, "loss": 0.06771159172058105, "step": 6163 }, { "epoch": 0.833353049532726, "grad_norm": 0.08381735533475876, "learning_rate": 2.315680802494256e-06, "loss": 0.06295493245124817, "step": 6164 }, { "epoch": 0.833488246328562, "grad_norm": 0.08064660429954529, "learning_rate": 2.312028681238856e-06, "loss": 0.04182124137878418, "step": 6165 }, { "epoch": 0.833623443124398, "grad_norm": 0.1880762130022049, "learning_rate": 2.3083792016867434e-06, "loss": 0.09366559982299805, "step": 6166 }, { "epoch": 0.8337586399202339, "grad_norm": 0.05527402088046074, "learning_rate": 2.304732364597759e-06, "loss": 0.04879117012023926, "step": 6167 }, { "epoch": 0.8338938367160699, "grad_norm": 0.041861142963171005, "learning_rate": 2.3010881707311994e-06, "loss": 0.03336071968078613, "step": 6168 }, { "epoch": 0.8340290335119057, "grad_norm": 0.12642940878868103, "learning_rate": 2.2974466208458017e-06, "loss": 0.06938660144805908, "step": 6169 }, { "epoch": 0.8341642303077417, "grad_norm": 0.037987321615219116, "learning_rate": 2.293807715699755e-06, "loss": 0.03653538227081299, "step": 6170 }, { "epoch": 0.8342994271035776, "grad_norm": 0.11393680423498154, "learning_rate": 2.2901714560507e-06, "loss": 0.08897686004638672, "step": 6171 }, { "epoch": 0.8344346238994136, "grad_norm": 0.16335955262184143, "learning_rate": 2.286537842655722e-06, "loss": 0.07434940338134766, "step": 6172 }, { "epoch": 0.8345698206952495, "grad_norm": 0.06670046597719193, "learning_rate": 2.2829068762713633e-06, "loss": 0.0550236701965332, "step": 6173 }, { "epoch": 0.8347050174910855, "grad_norm": 0.05221809074282646, "learning_rate": 2.279278557653611e-06, "loss": 0.041936516761779785, "step": 6174 }, { "epoch": 0.8348402142869213, "grad_norm": 0.08448124676942825, "learning_rate": 2.2756528875578965e-06, "loss": 0.07308614253997803, "step": 6175 }, { "epoch": 0.8349754110827573, "grad_norm": 0.07125779986381531, "learning_rate": 2.2720298667391067e-06, "loss": 0.06439208984375, "step": 6176 }, { "epoch": 0.8351106078785933, "grad_norm": 0.08707048743963242, "learning_rate": 2.268409495951568e-06, "loss": 0.06947469711303711, "step": 6177 }, { "epoch": 0.8352458046744292, "grad_norm": 0.06138402596116066, "learning_rate": 2.2647917759490723e-06, "loss": 0.04711627960205078, "step": 6178 }, { "epoch": 0.8353810014702652, "grad_norm": 0.06254766136407852, "learning_rate": 2.261176707484834e-06, "loss": 0.060301780700683594, "step": 6179 }, { "epoch": 0.8355161982661011, "grad_norm": 0.0709356889128685, "learning_rate": 2.2575642913115408e-06, "loss": 0.05669832229614258, "step": 6180 }, { "epoch": 0.8356513950619371, "grad_norm": 0.0727134570479393, "learning_rate": 2.253954528181313e-06, "loss": 0.05334728956222534, "step": 6181 }, { "epoch": 0.8357865918577729, "grad_norm": 0.06224825978279114, "learning_rate": 2.2503474188457206e-06, "loss": 0.046790242195129395, "step": 6182 }, { "epoch": 0.8359217886536089, "grad_norm": 0.06544957309961319, "learning_rate": 2.2467429640557903e-06, "loss": 0.054503440856933594, "step": 6183 }, { "epoch": 0.8360569854494448, "grad_norm": 0.07928641885519028, "learning_rate": 2.2431411645619776e-06, "loss": 0.04765033721923828, "step": 6184 }, { "epoch": 0.8361921822452808, "grad_norm": 0.07874953746795654, "learning_rate": 2.239542021114205e-06, "loss": 0.05323934555053711, "step": 6185 }, { "epoch": 0.8363273790411168, "grad_norm": 0.07254187017679214, "learning_rate": 2.2359455344618306e-06, "loss": 0.06038475036621094, "step": 6186 }, { "epoch": 0.8364625758369527, "grad_norm": 0.04952237010002136, "learning_rate": 2.232351705353663e-06, "loss": 0.045972347259521484, "step": 6187 }, { "epoch": 0.8365977726327886, "grad_norm": 0.08560050278902054, "learning_rate": 2.228760534537955e-06, "loss": 0.08045077323913574, "step": 6188 }, { "epoch": 0.8367329694286245, "grad_norm": 0.051411427557468414, "learning_rate": 2.2251720227624044e-06, "loss": 0.04520010948181152, "step": 6189 }, { "epoch": 0.8368681662244605, "grad_norm": 0.07734411209821701, "learning_rate": 2.2215861707741666e-06, "loss": 0.049101829528808594, "step": 6190 }, { "epoch": 0.8370033630202964, "grad_norm": 0.08254475146532059, "learning_rate": 2.2180029793198313e-06, "loss": 0.053664207458496094, "step": 6191 }, { "epoch": 0.8371385598161324, "grad_norm": 0.06961311399936676, "learning_rate": 2.2144224491454363e-06, "loss": 0.05651545524597168, "step": 6192 }, { "epoch": 0.8372737566119683, "grad_norm": 0.06746058166027069, "learning_rate": 2.2108445809964695e-06, "loss": 0.05730557441711426, "step": 6193 }, { "epoch": 0.8374089534078042, "grad_norm": 0.04513616859912872, "learning_rate": 2.2072693756178567e-06, "loss": 0.0493779182434082, "step": 6194 }, { "epoch": 0.8375441502036401, "grad_norm": 0.16110411286354065, "learning_rate": 2.203696833753983e-06, "loss": 0.0833125114440918, "step": 6195 }, { "epoch": 0.8376793469994761, "grad_norm": 0.07493987679481506, "learning_rate": 2.200126956148668e-06, "loss": 0.07624554634094238, "step": 6196 }, { "epoch": 0.8378145437953121, "grad_norm": 0.08896756172180176, "learning_rate": 2.196559743545177e-06, "loss": 0.058403462171554565, "step": 6197 }, { "epoch": 0.837949740591148, "grad_norm": 0.09199562668800354, "learning_rate": 2.1929951966862233e-06, "loss": 0.10968255996704102, "step": 6198 }, { "epoch": 0.838084937386984, "grad_norm": 0.055772487074136734, "learning_rate": 2.1894333163139607e-06, "loss": 0.05327785015106201, "step": 6199 }, { "epoch": 0.8382201341828198, "grad_norm": 0.07642515748739243, "learning_rate": 2.1858741031700015e-06, "loss": 0.06313419342041016, "step": 6200 }, { "epoch": 0.8383553309786558, "grad_norm": 0.07300259917974472, "learning_rate": 2.1823175579953856e-06, "loss": 0.0383075475692749, "step": 6201 }, { "epoch": 0.8384905277744917, "grad_norm": 0.09861123561859131, "learning_rate": 2.1787636815306065e-06, "loss": 0.06454706192016602, "step": 6202 }, { "epoch": 0.8386257245703277, "grad_norm": 0.06772708147764206, "learning_rate": 2.1752124745156005e-06, "loss": 0.05844491720199585, "step": 6203 }, { "epoch": 0.8387609213661636, "grad_norm": 0.07884043455123901, "learning_rate": 2.171663937689744e-06, "loss": 0.0663137435913086, "step": 6204 }, { "epoch": 0.8388961181619996, "grad_norm": 0.09211120009422302, "learning_rate": 2.168118071791868e-06, "loss": 0.049735426902770996, "step": 6205 }, { "epoch": 0.8390313149578355, "grad_norm": 0.08327385038137436, "learning_rate": 2.164574877560237e-06, "loss": 0.06398111581802368, "step": 6206 }, { "epoch": 0.8391665117536714, "grad_norm": 0.06487874686717987, "learning_rate": 2.161034355732564e-06, "loss": 0.062338173389434814, "step": 6207 }, { "epoch": 0.8393017085495074, "grad_norm": 0.0806300938129425, "learning_rate": 2.1574965070460047e-06, "loss": 0.06329292058944702, "step": 6208 }, { "epoch": 0.8394369053453433, "grad_norm": 0.05296044051647186, "learning_rate": 2.1539613322371527e-06, "loss": 0.057851314544677734, "step": 6209 }, { "epoch": 0.8395721021411793, "grad_norm": 0.04556573927402496, "learning_rate": 2.1504288320420613e-06, "loss": 0.03304433822631836, "step": 6210 }, { "epoch": 0.8397072989370152, "grad_norm": 0.05280722677707672, "learning_rate": 2.1468990071962038e-06, "loss": 0.03946375846862793, "step": 6211 }, { "epoch": 0.8398424957328511, "grad_norm": 0.05114326253533363, "learning_rate": 2.143371858434515e-06, "loss": 0.03976273536682129, "step": 6212 }, { "epoch": 0.839977692528687, "grad_norm": 0.14617249369621277, "learning_rate": 2.139847386491367e-06, "loss": 0.07262492179870605, "step": 6213 }, { "epoch": 0.840112889324523, "grad_norm": 0.0926634669303894, "learning_rate": 2.1363255921005685e-06, "loss": 0.07397842407226562, "step": 6214 }, { "epoch": 0.840248086120359, "grad_norm": 0.08349807560443878, "learning_rate": 2.1328064759953853e-06, "loss": 0.054540157318115234, "step": 6215 }, { "epoch": 0.8403832829161949, "grad_norm": 0.08934856951236725, "learning_rate": 2.129290038908504e-06, "loss": 0.06272006034851074, "step": 6216 }, { "epoch": 0.8405184797120309, "grad_norm": 0.12437047809362411, "learning_rate": 2.1257762815720745e-06, "loss": 0.054378509521484375, "step": 6217 }, { "epoch": 0.8406536765078667, "grad_norm": 0.10967876762151718, "learning_rate": 2.122265204717678e-06, "loss": 0.07696533203125, "step": 6218 }, { "epoch": 0.8407888733037027, "grad_norm": 0.07253522425889969, "learning_rate": 2.1187568090763328e-06, "loss": 0.062096595764160156, "step": 6219 }, { "epoch": 0.8409240700995386, "grad_norm": 0.14087271690368652, "learning_rate": 2.1152510953785196e-06, "loss": 0.07767057418823242, "step": 6220 }, { "epoch": 0.8410592668953746, "grad_norm": 0.10102976113557816, "learning_rate": 2.1117480643541304e-06, "loss": 0.0737464427947998, "step": 6221 }, { "epoch": 0.8411944636912105, "grad_norm": 0.06434816122055054, "learning_rate": 2.1082477167325275e-06, "loss": 0.05290699005126953, "step": 6222 }, { "epoch": 0.8413296604870465, "grad_norm": 0.06449396163225174, "learning_rate": 2.1047500532424968e-06, "loss": 0.04849052429199219, "step": 6223 }, { "epoch": 0.8414648572828825, "grad_norm": 0.06327880918979645, "learning_rate": 2.1012550746122705e-06, "loss": 0.07477855682373047, "step": 6224 }, { "epoch": 0.8416000540787183, "grad_norm": 0.10428311675786972, "learning_rate": 2.0977627815695217e-06, "loss": 0.07810711860656738, "step": 6225 }, { "epoch": 0.8417352508745543, "grad_norm": 0.11010656505823135, "learning_rate": 2.094273174841362e-06, "loss": 0.07221651077270508, "step": 6226 }, { "epoch": 0.8418704476703902, "grad_norm": 0.05010322853922844, "learning_rate": 2.0907862551543516e-06, "loss": 0.03895598649978638, "step": 6227 }, { "epoch": 0.8420056444662262, "grad_norm": 0.09927906095981598, "learning_rate": 2.087302023234485e-06, "loss": 0.0524754524230957, "step": 6228 }, { "epoch": 0.8421408412620621, "grad_norm": 0.09309618920087814, "learning_rate": 2.083820479807194e-06, "loss": 0.071075439453125, "step": 6229 }, { "epoch": 0.8422760380578981, "grad_norm": 0.0662274956703186, "learning_rate": 2.0803416255973585e-06, "loss": 0.048351287841796875, "step": 6230 }, { "epoch": 0.8424112348537339, "grad_norm": 0.030056042596697807, "learning_rate": 2.0768654613292887e-06, "loss": 0.0256117582321167, "step": 6231 }, { "epoch": 0.8425464316495699, "grad_norm": 0.1632716804742813, "learning_rate": 2.0733919877267477e-06, "loss": 0.06250828504562378, "step": 6232 }, { "epoch": 0.8426816284454058, "grad_norm": 0.10840299725532532, "learning_rate": 2.0699212055129268e-06, "loss": 0.05613970756530762, "step": 6233 }, { "epoch": 0.8428168252412418, "grad_norm": 0.15623316168785095, "learning_rate": 2.066453115410463e-06, "loss": 0.08043575286865234, "step": 6234 }, { "epoch": 0.8429520220370778, "grad_norm": 0.07265260815620422, "learning_rate": 2.062987718141431e-06, "loss": 0.0694270133972168, "step": 6235 }, { "epoch": 0.8430872188329137, "grad_norm": 0.08338449150323868, "learning_rate": 2.0595250144273423e-06, "loss": 0.04454910755157471, "step": 6236 }, { "epoch": 0.8432224156287496, "grad_norm": 0.11968682706356049, "learning_rate": 2.056065004989155e-06, "loss": 0.06780791282653809, "step": 6237 }, { "epoch": 0.8433576124245855, "grad_norm": 0.10412868857383728, "learning_rate": 2.0526076905472585e-06, "loss": 0.045495063066482544, "step": 6238 }, { "epoch": 0.8434928092204215, "grad_norm": 0.18360310792922974, "learning_rate": 2.0491530718214855e-06, "loss": 0.06943023204803467, "step": 6239 }, { "epoch": 0.8436280060162574, "grad_norm": 0.10384724289178848, "learning_rate": 2.0457011495311045e-06, "loss": 0.05608558654785156, "step": 6240 }, { "epoch": 0.8437632028120934, "grad_norm": 0.12235202640295029, "learning_rate": 2.0422519243948232e-06, "loss": 0.04145188629627228, "step": 6241 }, { "epoch": 0.8438983996079293, "grad_norm": 0.08759697526693344, "learning_rate": 2.0388053971307927e-06, "loss": 0.08816301822662354, "step": 6242 }, { "epoch": 0.8440335964037652, "grad_norm": 0.07537052780389786, "learning_rate": 2.0353615684565956e-06, "loss": 0.06231236457824707, "step": 6243 }, { "epoch": 0.8441687931996011, "grad_norm": 0.1309131532907486, "learning_rate": 2.0319204390892566e-06, "loss": 0.09835195541381836, "step": 6244 }, { "epoch": 0.8443039899954371, "grad_norm": 0.14323389530181885, "learning_rate": 2.0284820097452374e-06, "loss": 0.08635735511779785, "step": 6245 }, { "epoch": 0.844439186791273, "grad_norm": 0.06707777827978134, "learning_rate": 2.02504628114043e-06, "loss": 0.06563258171081543, "step": 6246 }, { "epoch": 0.844574383587109, "grad_norm": 0.109011709690094, "learning_rate": 2.0216132539901865e-06, "loss": 0.06651854515075684, "step": 6247 }, { "epoch": 0.844709580382945, "grad_norm": 0.09222259372472763, "learning_rate": 2.0181829290092663e-06, "loss": 0.07601046562194824, "step": 6248 }, { "epoch": 0.8448447771787808, "grad_norm": 0.08063499629497528, "learning_rate": 2.014755306911891e-06, "loss": 0.054407358169555664, "step": 6249 }, { "epoch": 0.8449799739746168, "grad_norm": 0.11135166138410568, "learning_rate": 2.0113303884117057e-06, "loss": 0.06512022018432617, "step": 6250 }, { "epoch": 0.8451151707704527, "grad_norm": 0.07940854877233505, "learning_rate": 2.0079081742217957e-06, "loss": 0.08141922950744629, "step": 6251 }, { "epoch": 0.8452503675662887, "grad_norm": 0.06298718601465225, "learning_rate": 2.0044886650546915e-06, "loss": 0.06259512901306152, "step": 6252 }, { "epoch": 0.8453855643621246, "grad_norm": 0.15344993770122528, "learning_rate": 2.0010718616223406e-06, "loss": 0.061038196086883545, "step": 6253 }, { "epoch": 0.8455207611579606, "grad_norm": 0.07286316156387329, "learning_rate": 1.9976577646361514e-06, "loss": 0.06543761491775513, "step": 6254 }, { "epoch": 0.8456559579537964, "grad_norm": 0.09404855221509933, "learning_rate": 1.994246374806953e-06, "loss": 0.07211899757385254, "step": 6255 }, { "epoch": 0.8457911547496324, "grad_norm": 0.13926230370998383, "learning_rate": 1.9908376928450128e-06, "loss": 0.06549692153930664, "step": 6256 }, { "epoch": 0.8459263515454684, "grad_norm": 0.07288368046283722, "learning_rate": 1.987431719460039e-06, "loss": 0.056478023529052734, "step": 6257 }, { "epoch": 0.8460615483413043, "grad_norm": 0.10324602574110031, "learning_rate": 1.9840284553611706e-06, "loss": 0.04634881019592285, "step": 6258 }, { "epoch": 0.8461967451371403, "grad_norm": 0.1018214002251625, "learning_rate": 1.980627901256989e-06, "loss": 0.065582275390625, "step": 6259 }, { "epoch": 0.8463319419329762, "grad_norm": 0.1337098628282547, "learning_rate": 1.9772300578555062e-06, "loss": 0.09366035461425781, "step": 6260 }, { "epoch": 0.8464671387288121, "grad_norm": 0.05762092024087906, "learning_rate": 1.973834925864172e-06, "loss": 0.059086740016937256, "step": 6261 }, { "epoch": 0.846602335524648, "grad_norm": 0.06379427015781403, "learning_rate": 1.97044250598987e-06, "loss": 0.047879695892333984, "step": 6262 }, { "epoch": 0.846737532320484, "grad_norm": 0.1311216652393341, "learning_rate": 1.9670527989389177e-06, "loss": 0.0744023323059082, "step": 6263 }, { "epoch": 0.8468727291163199, "grad_norm": 0.06466415524482727, "learning_rate": 1.9636658054170747e-06, "loss": 0.05891752243041992, "step": 6264 }, { "epoch": 0.8470079259121559, "grad_norm": 0.09935913980007172, "learning_rate": 1.960281526129531e-06, "loss": 0.053803443908691406, "step": 6265 }, { "epoch": 0.8471431227079919, "grad_norm": 0.10352463275194168, "learning_rate": 1.9568999617809077e-06, "loss": 0.0675363540649414, "step": 6266 }, { "epoch": 0.8472783195038278, "grad_norm": 0.06425938755273819, "learning_rate": 1.9535211130752676e-06, "loss": 0.04772520065307617, "step": 6267 }, { "epoch": 0.8474135162996637, "grad_norm": 0.07881362736225128, "learning_rate": 1.950144980716101e-06, "loss": 0.049092769622802734, "step": 6268 }, { "epoch": 0.8475487130954996, "grad_norm": 0.09278977662324905, "learning_rate": 1.9467715654063444e-06, "loss": 0.0947115421295166, "step": 6269 }, { "epoch": 0.8476839098913356, "grad_norm": 0.1020590290427208, "learning_rate": 1.9434008678483532e-06, "loss": 0.08096301555633545, "step": 6270 }, { "epoch": 0.8478191066871715, "grad_norm": 0.07810697704553604, "learning_rate": 1.9400328887439295e-06, "loss": 0.06392109394073486, "step": 6271 }, { "epoch": 0.8479543034830075, "grad_norm": 0.10360649228096008, "learning_rate": 1.9366676287943038e-06, "loss": 0.06831574440002441, "step": 6272 }, { "epoch": 0.8480895002788434, "grad_norm": 0.07456852495670319, "learning_rate": 1.9333050887001337e-06, "loss": 0.0719747543334961, "step": 6273 }, { "epoch": 0.8482246970746793, "grad_norm": 0.08693388104438782, "learning_rate": 1.9299452691615293e-06, "loss": 0.09015655517578125, "step": 6274 }, { "epoch": 0.8483598938705152, "grad_norm": 0.08228273689746857, "learning_rate": 1.9265881708780182e-06, "loss": 0.07655012607574463, "step": 6275 }, { "epoch": 0.8484950906663512, "grad_norm": 0.12034351378679276, "learning_rate": 1.9232337945485657e-06, "loss": 0.07366728782653809, "step": 6276 }, { "epoch": 0.8486302874621872, "grad_norm": 0.07807251065969467, "learning_rate": 1.91988214087157e-06, "loss": 0.06461310386657715, "step": 6277 }, { "epoch": 0.8487654842580231, "grad_norm": 0.11119145900011063, "learning_rate": 1.9165332105448613e-06, "loss": 0.05874037742614746, "step": 6278 }, { "epoch": 0.8489006810538591, "grad_norm": 0.048088837414979935, "learning_rate": 1.913187004265715e-06, "loss": 0.041738033294677734, "step": 6279 }, { "epoch": 0.8490358778496949, "grad_norm": 0.06721097975969315, "learning_rate": 1.909843522730814e-06, "loss": 0.05828624963760376, "step": 6280 }, { "epoch": 0.8491710746455309, "grad_norm": 0.07519472390413284, "learning_rate": 1.9065027666363017e-06, "loss": 0.054174184799194336, "step": 6281 }, { "epoch": 0.8493062714413668, "grad_norm": 0.06168925389647484, "learning_rate": 1.903164736677736e-06, "loss": 0.045952796936035156, "step": 6282 }, { "epoch": 0.8494414682372028, "grad_norm": 0.09956198930740356, "learning_rate": 1.8998294335501082e-06, "loss": 0.060864925384521484, "step": 6283 }, { "epoch": 0.8495766650330387, "grad_norm": 0.08205647021532059, "learning_rate": 1.8964968579478592e-06, "loss": 0.07142376899719238, "step": 6284 }, { "epoch": 0.8497118618288747, "grad_norm": 0.07485213875770569, "learning_rate": 1.893167010564834e-06, "loss": 0.0645444393157959, "step": 6285 }, { "epoch": 0.8498470586247105, "grad_norm": 0.08847309648990631, "learning_rate": 1.8898398920943349e-06, "loss": 0.07096385955810547, "step": 6286 }, { "epoch": 0.8499822554205465, "grad_norm": 0.1267227977514267, "learning_rate": 1.886515503229081e-06, "loss": 0.06325006484985352, "step": 6287 }, { "epoch": 0.8501174522163825, "grad_norm": 0.14188162982463837, "learning_rate": 1.8831938446612269e-06, "loss": 0.06595194339752197, "step": 6288 }, { "epoch": 0.8502526490122184, "grad_norm": 0.07252778112888336, "learning_rate": 1.8798749170823676e-06, "loss": 0.05689281225204468, "step": 6289 }, { "epoch": 0.8503878458080544, "grad_norm": 0.07427436113357544, "learning_rate": 1.8765587211835089e-06, "loss": 0.05604982376098633, "step": 6290 }, { "epoch": 0.8505230426038903, "grad_norm": 0.03143913671374321, "learning_rate": 1.8732452576551102e-06, "loss": 0.03462386131286621, "step": 6291 }, { "epoch": 0.8506582393997262, "grad_norm": 0.12276497483253479, "learning_rate": 1.8699345271870493e-06, "loss": 0.05977284908294678, "step": 6292 }, { "epoch": 0.8507934361955621, "grad_norm": 0.14457136392593384, "learning_rate": 1.8666265304686387e-06, "loss": 0.0800027847290039, "step": 6293 }, { "epoch": 0.8509286329913981, "grad_norm": 0.13843873143196106, "learning_rate": 1.8633212681886203e-06, "loss": 0.0555652379989624, "step": 6294 }, { "epoch": 0.851063829787234, "grad_norm": 0.06576929986476898, "learning_rate": 1.8600187410351621e-06, "loss": 0.05843544006347656, "step": 6295 }, { "epoch": 0.85119902658307, "grad_norm": 0.07701294869184494, "learning_rate": 1.8567189496958776e-06, "loss": 0.0521770715713501, "step": 6296 }, { "epoch": 0.851334223378906, "grad_norm": 0.0984310656785965, "learning_rate": 1.853421894857797e-06, "loss": 0.06432175636291504, "step": 6297 }, { "epoch": 0.8514694201747418, "grad_norm": 0.11677698791027069, "learning_rate": 1.8501275772073827e-06, "loss": 0.06746864318847656, "step": 6298 }, { "epoch": 0.8516046169705778, "grad_norm": 0.04813215509057045, "learning_rate": 1.8468359974305315e-06, "loss": 0.04853618144989014, "step": 6299 }, { "epoch": 0.8517398137664137, "grad_norm": 0.09483543783426285, "learning_rate": 1.8435471562125633e-06, "loss": 0.07366073131561279, "step": 6300 }, { "epoch": 0.8518750105622497, "grad_norm": 0.08742405474185944, "learning_rate": 1.8402610542382386e-06, "loss": 0.055742740631103516, "step": 6301 }, { "epoch": 0.8520102073580856, "grad_norm": 0.09750664979219437, "learning_rate": 1.836977692191742e-06, "loss": 0.062215328216552734, "step": 6302 }, { "epoch": 0.8521454041539216, "grad_norm": 0.05253782495856285, "learning_rate": 1.8336970707566781e-06, "loss": 0.052700042724609375, "step": 6303 }, { "epoch": 0.8522806009497574, "grad_norm": 0.0742323100566864, "learning_rate": 1.8304191906160973e-06, "loss": 0.03688383102416992, "step": 6304 }, { "epoch": 0.8524157977455934, "grad_norm": 0.052266329526901245, "learning_rate": 1.8271440524524668e-06, "loss": 0.0567936897277832, "step": 6305 }, { "epoch": 0.8525509945414294, "grad_norm": 0.05437502637505531, "learning_rate": 1.8238716569476949e-06, "loss": 0.03851604461669922, "step": 6306 }, { "epoch": 0.8526861913372653, "grad_norm": 0.07193050533533096, "learning_rate": 1.8206020047831078e-06, "loss": 0.07477569580078125, "step": 6307 }, { "epoch": 0.8528213881331013, "grad_norm": 0.06978517770767212, "learning_rate": 1.8173350966394648e-06, "loss": 0.05048036575317383, "step": 6308 }, { "epoch": 0.8529565849289372, "grad_norm": 0.029680214822292328, "learning_rate": 1.8140709331969513e-06, "loss": 0.02732035517692566, "step": 6309 }, { "epoch": 0.8530917817247731, "grad_norm": 0.05021276697516441, "learning_rate": 1.810809515135184e-06, "loss": 0.04528331756591797, "step": 6310 }, { "epoch": 0.853226978520609, "grad_norm": 0.09833616763353348, "learning_rate": 1.8075508431332111e-06, "loss": 0.04064047336578369, "step": 6311 }, { "epoch": 0.853362175316445, "grad_norm": 0.07684347778558731, "learning_rate": 1.8042949178695034e-06, "loss": 0.0632781982421875, "step": 6312 }, { "epoch": 0.8534973721122809, "grad_norm": 0.1029210090637207, "learning_rate": 1.8010417400219636e-06, "loss": 0.06871858239173889, "step": 6313 }, { "epoch": 0.8536325689081169, "grad_norm": 0.10687116533517838, "learning_rate": 1.7977913102679167e-06, "loss": 0.09267044067382812, "step": 6314 }, { "epoch": 0.8537677657039529, "grad_norm": 0.09061265736818314, "learning_rate": 1.7945436292841193e-06, "loss": 0.06876087188720703, "step": 6315 }, { "epoch": 0.8539029624997888, "grad_norm": 0.06640708446502686, "learning_rate": 1.791298697746766e-06, "loss": 0.04834812879562378, "step": 6316 }, { "epoch": 0.8540381592956247, "grad_norm": 0.11356284469366074, "learning_rate": 1.7880565163314545e-06, "loss": 0.07348084449768066, "step": 6317 }, { "epoch": 0.8541733560914606, "grad_norm": 0.0762079507112503, "learning_rate": 1.784817085713233e-06, "loss": 0.0552525520324707, "step": 6318 }, { "epoch": 0.8543085528872966, "grad_norm": 0.09714668989181519, "learning_rate": 1.7815804065665669e-06, "loss": 0.05569267272949219, "step": 6319 }, { "epoch": 0.8544437496831325, "grad_norm": 0.0934680625796318, "learning_rate": 1.778346479565346e-06, "loss": 0.06911420822143555, "step": 6320 }, { "epoch": 0.8545789464789685, "grad_norm": 0.061427902430295944, "learning_rate": 1.7751153053829011e-06, "loss": 0.06157207489013672, "step": 6321 }, { "epoch": 0.8547141432748044, "grad_norm": 0.04562776908278465, "learning_rate": 1.7718868846919662e-06, "loss": 0.03926849365234375, "step": 6322 }, { "epoch": 0.8548493400706403, "grad_norm": 0.14218325912952423, "learning_rate": 1.7686612181647266e-06, "loss": 0.06679058074951172, "step": 6323 }, { "epoch": 0.8549845368664762, "grad_norm": 0.08945848792791367, "learning_rate": 1.7654383064727802e-06, "loss": 0.04896807670593262, "step": 6324 }, { "epoch": 0.8551197336623122, "grad_norm": 0.05994424223899841, "learning_rate": 1.762218150287152e-06, "loss": 0.0772860050201416, "step": 6325 }, { "epoch": 0.8552549304581482, "grad_norm": 0.09649288654327393, "learning_rate": 1.759000750278299e-06, "loss": 0.05411577224731445, "step": 6326 }, { "epoch": 0.8553901272539841, "grad_norm": 0.11805935204029083, "learning_rate": 1.7557861071160953e-06, "loss": 0.04920238256454468, "step": 6327 }, { "epoch": 0.8555253240498201, "grad_norm": 0.09620372205972672, "learning_rate": 1.7525742214698538e-06, "loss": 0.06359004974365234, "step": 6328 }, { "epoch": 0.8556605208456559, "grad_norm": 0.05717959627509117, "learning_rate": 1.7493650940083045e-06, "loss": 0.061995506286621094, "step": 6329 }, { "epoch": 0.8557957176414919, "grad_norm": 0.05568467453122139, "learning_rate": 1.746158725399603e-06, "loss": 0.05309653282165527, "step": 6330 }, { "epoch": 0.8559309144373278, "grad_norm": 0.0946793407201767, "learning_rate": 1.7429551163113322e-06, "loss": 0.042881131172180176, "step": 6331 }, { "epoch": 0.8560661112331638, "grad_norm": 0.11048036813735962, "learning_rate": 1.7397542674105e-06, "loss": 0.05759406089782715, "step": 6332 }, { "epoch": 0.8562013080289997, "grad_norm": 0.1003682091832161, "learning_rate": 1.7365561793635431e-06, "loss": 0.0689697265625, "step": 6333 }, { "epoch": 0.8563365048248357, "grad_norm": 0.09098163992166519, "learning_rate": 1.7333608528363227e-06, "loss": 0.07697200775146484, "step": 6334 }, { "epoch": 0.8564717016206715, "grad_norm": 0.08087503165006638, "learning_rate": 1.7301682884941128e-06, "loss": 0.06306719779968262, "step": 6335 }, { "epoch": 0.8566068984165075, "grad_norm": 0.0822829082608223, "learning_rate": 1.726978487001632e-06, "loss": 0.06019258499145508, "step": 6336 }, { "epoch": 0.8567420952123435, "grad_norm": 0.07111406326293945, "learning_rate": 1.7237914490230072e-06, "loss": 0.06962203979492188, "step": 6337 }, { "epoch": 0.8568772920081794, "grad_norm": 0.06223228573799133, "learning_rate": 1.7206071752218027e-06, "loss": 0.051291823387145996, "step": 6338 }, { "epoch": 0.8570124888040154, "grad_norm": 0.035153474658727646, "learning_rate": 1.7174256662610032e-06, "loss": 0.03708934783935547, "step": 6339 }, { "epoch": 0.8571476855998513, "grad_norm": 0.0890684425830841, "learning_rate": 1.714246922803004e-06, "loss": 0.06419539451599121, "step": 6340 }, { "epoch": 0.8572828823956872, "grad_norm": 0.04762372374534607, "learning_rate": 1.7110709455096468e-06, "loss": 0.038701534271240234, "step": 6341 }, { "epoch": 0.8574180791915231, "grad_norm": 0.12369276583194733, "learning_rate": 1.7078977350421815e-06, "loss": 0.0657840371131897, "step": 6342 }, { "epoch": 0.8575532759873591, "grad_norm": 0.026443932205438614, "learning_rate": 1.7047272920612926e-06, "loss": 0.020498961210250854, "step": 6343 }, { "epoch": 0.857688472783195, "grad_norm": 0.041917845606803894, "learning_rate": 1.7015596172270841e-06, "loss": 0.03945636749267578, "step": 6344 }, { "epoch": 0.857823669579031, "grad_norm": 0.041961077600717545, "learning_rate": 1.6983947111990717e-06, "loss": 0.03614044189453125, "step": 6345 }, { "epoch": 0.857958866374867, "grad_norm": 0.06169293448328972, "learning_rate": 1.695232574636218e-06, "loss": 0.05241173505783081, "step": 6346 }, { "epoch": 0.8580940631707028, "grad_norm": 0.1265294998884201, "learning_rate": 1.6920732081968882e-06, "loss": 0.0695028305053711, "step": 6347 }, { "epoch": 0.8582292599665388, "grad_norm": 0.08882235735654831, "learning_rate": 1.6889166125388878e-06, "loss": 0.05205345153808594, "step": 6348 }, { "epoch": 0.8583644567623747, "grad_norm": 0.07851480692625046, "learning_rate": 1.6857627883194277e-06, "loss": 0.041811466217041016, "step": 6349 }, { "epoch": 0.8584996535582107, "grad_norm": 0.10772155225276947, "learning_rate": 1.6826117361951577e-06, "loss": 0.08319354057312012, "step": 6350 }, { "epoch": 0.8586348503540466, "grad_norm": 0.0804390013217926, "learning_rate": 1.6794634568221412e-06, "loss": 0.06261491775512695, "step": 6351 }, { "epoch": 0.8587700471498826, "grad_norm": 0.04552323743700981, "learning_rate": 1.676317950855864e-06, "loss": 0.04698586463928223, "step": 6352 }, { "epoch": 0.8589052439457184, "grad_norm": 0.15698979794979095, "learning_rate": 1.6731752189512456e-06, "loss": 0.08278107643127441, "step": 6353 }, { "epoch": 0.8590404407415544, "grad_norm": 0.05993194878101349, "learning_rate": 1.6700352617626092e-06, "loss": 0.054193854331970215, "step": 6354 }, { "epoch": 0.8591756375373903, "grad_norm": 0.07977376878261566, "learning_rate": 1.6668980799437167e-06, "loss": 0.04984641075134277, "step": 6355 }, { "epoch": 0.8593108343332263, "grad_norm": 0.05026770383119583, "learning_rate": 1.6637636741477458e-06, "loss": 0.03841531276702881, "step": 6356 }, { "epoch": 0.8594460311290623, "grad_norm": 0.1296905130147934, "learning_rate": 1.6606320450272943e-06, "loss": 0.0711016058921814, "step": 6357 }, { "epoch": 0.8595812279248982, "grad_norm": 0.06973902136087418, "learning_rate": 1.657503193234386e-06, "loss": 0.05986309051513672, "step": 6358 }, { "epoch": 0.8597164247207342, "grad_norm": 0.06751244515180588, "learning_rate": 1.654377119420461e-06, "loss": 0.049954235553741455, "step": 6359 }, { "epoch": 0.85985162151657, "grad_norm": 0.17090043425559998, "learning_rate": 1.6512538242363889e-06, "loss": 0.08775246143341064, "step": 6360 }, { "epoch": 0.859986818312406, "grad_norm": 0.14085212349891663, "learning_rate": 1.6481333083324563e-06, "loss": 0.06416988372802734, "step": 6361 }, { "epoch": 0.8601220151082419, "grad_norm": 0.0892537534236908, "learning_rate": 1.6450155723583698e-06, "loss": 0.07040572166442871, "step": 6362 }, { "epoch": 0.8602572119040779, "grad_norm": 0.08346202969551086, "learning_rate": 1.6419006169632573e-06, "loss": 0.07629728317260742, "step": 6363 }, { "epoch": 0.8603924086999138, "grad_norm": 0.11694836616516113, "learning_rate": 1.638788442795668e-06, "loss": 0.0595853328704834, "step": 6364 }, { "epoch": 0.8605276054957498, "grad_norm": 0.07121625542640686, "learning_rate": 1.6356790505035785e-06, "loss": 0.06560654938220978, "step": 6365 }, { "epoch": 0.8606628022915856, "grad_norm": 0.042880576103925705, "learning_rate": 1.6325724407343795e-06, "loss": 0.03971362113952637, "step": 6366 }, { "epoch": 0.8607979990874216, "grad_norm": 0.0610007643699646, "learning_rate": 1.6294686141348801e-06, "loss": 0.04184424877166748, "step": 6367 }, { "epoch": 0.8609331958832576, "grad_norm": 0.1306169331073761, "learning_rate": 1.626367571351317e-06, "loss": 0.047617197036743164, "step": 6368 }, { "epoch": 0.8610683926790935, "grad_norm": 0.0728248730301857, "learning_rate": 1.6232693130293386e-06, "loss": 0.0598452091217041, "step": 6369 }, { "epoch": 0.8612035894749295, "grad_norm": 0.1365279257297516, "learning_rate": 1.6201738398140254e-06, "loss": 0.08390998840332031, "step": 6370 }, { "epoch": 0.8613387862707654, "grad_norm": 0.06906488537788391, "learning_rate": 1.6170811523498718e-06, "loss": 0.05446815490722656, "step": 6371 }, { "epoch": 0.8614739830666013, "grad_norm": 0.052067436277866364, "learning_rate": 1.613991251280783e-06, "loss": 0.04620945453643799, "step": 6372 }, { "epoch": 0.8616091798624372, "grad_norm": 0.05181831866502762, "learning_rate": 1.6109041372501028e-06, "loss": 0.045969247817993164, "step": 6373 }, { "epoch": 0.8617443766582732, "grad_norm": 0.1120200976729393, "learning_rate": 1.6078198109005766e-06, "loss": 0.06043815612792969, "step": 6374 }, { "epoch": 0.8618795734541091, "grad_norm": 0.09435294568538666, "learning_rate": 1.6047382728743843e-06, "loss": 0.06208467483520508, "step": 6375 }, { "epoch": 0.8620147702499451, "grad_norm": 0.0897468775510788, "learning_rate": 1.6016595238131176e-06, "loss": 0.06417274475097656, "step": 6376 }, { "epoch": 0.8621499670457811, "grad_norm": 0.17501172423362732, "learning_rate": 1.5985835643577824e-06, "loss": 0.08631551265716553, "step": 6377 }, { "epoch": 0.8622851638416169, "grad_norm": 0.0604604072868824, "learning_rate": 1.5955103951488177e-06, "loss": 0.05453217029571533, "step": 6378 }, { "epoch": 0.8624203606374529, "grad_norm": 0.07583752274513245, "learning_rate": 1.5924400168260666e-06, "loss": 0.07736921310424805, "step": 6379 }, { "epoch": 0.8625555574332888, "grad_norm": 0.23592795431613922, "learning_rate": 1.5893724300288064e-06, "loss": 0.07904243469238281, "step": 6380 }, { "epoch": 0.8626907542291248, "grad_norm": 0.051680997014045715, "learning_rate": 1.5863076353957196e-06, "loss": 0.03864121437072754, "step": 6381 }, { "epoch": 0.8628259510249607, "grad_norm": 0.07241277396678925, "learning_rate": 1.5832456335649104e-06, "loss": 0.05378365516662598, "step": 6382 }, { "epoch": 0.8629611478207967, "grad_norm": 0.05904010683298111, "learning_rate": 1.580186425173909e-06, "loss": 0.04015928506851196, "step": 6383 }, { "epoch": 0.8630963446166325, "grad_norm": 0.10654803365468979, "learning_rate": 1.5771300108596543e-06, "loss": 0.06052899360656738, "step": 6384 }, { "epoch": 0.8632315414124685, "grad_norm": 0.08163385838270187, "learning_rate": 1.5740763912585171e-06, "loss": 0.06293773651123047, "step": 6385 }, { "epoch": 0.8633667382083045, "grad_norm": 0.06008554995059967, "learning_rate": 1.5710255670062657e-06, "loss": 0.06254291534423828, "step": 6386 }, { "epoch": 0.8635019350041404, "grad_norm": 0.049865126609802246, "learning_rate": 1.567977538738105e-06, "loss": 0.049284160137176514, "step": 6387 }, { "epoch": 0.8636371317999764, "grad_norm": 0.16868528723716736, "learning_rate": 1.5649323070886494e-06, "loss": 0.05782385170459747, "step": 6388 }, { "epoch": 0.8637723285958123, "grad_norm": 0.04562008008360863, "learning_rate": 1.5618898726919284e-06, "loss": 0.036471009254455566, "step": 6389 }, { "epoch": 0.8639075253916482, "grad_norm": 0.041728463023900986, "learning_rate": 1.5588502361814032e-06, "loss": 0.033078670501708984, "step": 6390 }, { "epoch": 0.8640427221874841, "grad_norm": 0.06650735437870026, "learning_rate": 1.5558133981899314e-06, "loss": 0.05904310941696167, "step": 6391 }, { "epoch": 0.8641779189833201, "grad_norm": 0.12570209801197052, "learning_rate": 1.5527793593498053e-06, "loss": 0.08055615425109863, "step": 6392 }, { "epoch": 0.864313115779156, "grad_norm": 0.056440144777297974, "learning_rate": 1.5497481202927244e-06, "loss": 0.053415536880493164, "step": 6393 }, { "epoch": 0.864448312574992, "grad_norm": 0.06267638504505157, "learning_rate": 1.5467196816498107e-06, "loss": 0.052117109298706055, "step": 6394 }, { "epoch": 0.864583509370828, "grad_norm": 0.09015851467847824, "learning_rate": 1.5436940440516018e-06, "loss": 0.046578384935855865, "step": 6395 }, { "epoch": 0.8647187061666638, "grad_norm": 0.041786979883909225, "learning_rate": 1.5406712081280484e-06, "loss": 0.03513932228088379, "step": 6396 }, { "epoch": 0.8648539029624998, "grad_norm": 0.12346234917640686, "learning_rate": 1.5376511745085254e-06, "loss": 0.05023002624511719, "step": 6397 }, { "epoch": 0.8649890997583357, "grad_norm": 0.0752951055765152, "learning_rate": 1.5346339438218181e-06, "loss": 0.06355094909667969, "step": 6398 }, { "epoch": 0.8651242965541717, "grad_norm": 0.0592183843255043, "learning_rate": 1.5316195166961295e-06, "loss": 0.04720258712768555, "step": 6399 }, { "epoch": 0.8652594933500076, "grad_norm": 0.10795643925666809, "learning_rate": 1.5286078937590802e-06, "loss": 0.07311928272247314, "step": 6400 }, { "epoch": 0.8653946901458436, "grad_norm": 0.08860747516155243, "learning_rate": 1.5255990756377025e-06, "loss": 0.07887554168701172, "step": 6401 }, { "epoch": 0.8655298869416794, "grad_norm": 0.0760936439037323, "learning_rate": 1.5225930629584534e-06, "loss": 0.06229209899902344, "step": 6402 }, { "epoch": 0.8656650837375154, "grad_norm": 0.0736214891076088, "learning_rate": 1.5195898563472038e-06, "loss": 0.06769466400146484, "step": 6403 }, { "epoch": 0.8658002805333513, "grad_norm": 0.06015823781490326, "learning_rate": 1.5165894564292254e-06, "loss": 0.05713927745819092, "step": 6404 }, { "epoch": 0.8659354773291873, "grad_norm": 0.07352651655673981, "learning_rate": 1.5135918638292269e-06, "loss": 0.061180710792541504, "step": 6405 }, { "epoch": 0.8660706741250233, "grad_norm": 0.10576921701431274, "learning_rate": 1.5105970791713186e-06, "loss": 0.06670761108398438, "step": 6406 }, { "epoch": 0.8662058709208592, "grad_norm": 0.25882086157798767, "learning_rate": 1.5076051030790355e-06, "loss": 0.07758545875549316, "step": 6407 }, { "epoch": 0.8663410677166952, "grad_norm": 0.0488014779984951, "learning_rate": 1.5046159361753226e-06, "loss": 0.06250643730163574, "step": 6408 }, { "epoch": 0.866476264512531, "grad_norm": 0.07846884429454803, "learning_rate": 1.5016295790825336e-06, "loss": 0.04989957809448242, "step": 6409 }, { "epoch": 0.866611461308367, "grad_norm": 0.13717509806156158, "learning_rate": 1.4986460324224493e-06, "loss": 0.07029587030410767, "step": 6410 }, { "epoch": 0.8667466581042029, "grad_norm": 0.079444520175457, "learning_rate": 1.4956652968162582e-06, "loss": 0.08028125762939453, "step": 6411 }, { "epoch": 0.8668818549000389, "grad_norm": 0.08889906853437424, "learning_rate": 1.492687372884567e-06, "loss": 0.0611722469329834, "step": 6412 }, { "epoch": 0.8670170516958748, "grad_norm": 0.155953511595726, "learning_rate": 1.4897122612473978e-06, "loss": 0.07591962814331055, "step": 6413 }, { "epoch": 0.8671522484917108, "grad_norm": 0.17730297148227692, "learning_rate": 1.4867399625241772e-06, "loss": 0.06012523174285889, "step": 6414 }, { "epoch": 0.8672874452875466, "grad_norm": 0.06775970011949539, "learning_rate": 1.4837704773337602e-06, "loss": 0.05423814058303833, "step": 6415 }, { "epoch": 0.8674226420833826, "grad_norm": 0.0893474817276001, "learning_rate": 1.4808038062944036e-06, "loss": 0.07478094100952148, "step": 6416 }, { "epoch": 0.8675578388792186, "grad_norm": 0.07795003056526184, "learning_rate": 1.4778399500237933e-06, "loss": 0.040270090103149414, "step": 6417 }, { "epoch": 0.8676930356750545, "grad_norm": 0.09488378465175629, "learning_rate": 1.4748789091390124e-06, "loss": 0.07558703422546387, "step": 6418 }, { "epoch": 0.8678282324708905, "grad_norm": 0.07098179310560226, "learning_rate": 1.471920684256563e-06, "loss": 0.07665181159973145, "step": 6419 }, { "epoch": 0.8679634292667264, "grad_norm": 0.08094755560159683, "learning_rate": 1.4689652759923721e-06, "loss": 0.06972360610961914, "step": 6420 }, { "epoch": 0.8680986260625623, "grad_norm": 0.22221647202968597, "learning_rate": 1.4660126849617645e-06, "loss": 0.0952761173248291, "step": 6421 }, { "epoch": 0.8682338228583982, "grad_norm": 0.09138244390487671, "learning_rate": 1.4630629117794914e-06, "loss": 0.0722116231918335, "step": 6422 }, { "epoch": 0.8683690196542342, "grad_norm": 0.06013472378253937, "learning_rate": 1.4601159570597033e-06, "loss": 0.04598188400268555, "step": 6423 }, { "epoch": 0.8685042164500701, "grad_norm": 0.08180181682109833, "learning_rate": 1.4571718214159795e-06, "loss": 0.059500694274902344, "step": 6424 }, { "epoch": 0.8686394132459061, "grad_norm": 0.11993712931871414, "learning_rate": 1.454230505461303e-06, "loss": 0.0618743896484375, "step": 6425 }, { "epoch": 0.8687746100417421, "grad_norm": 0.06103155016899109, "learning_rate": 1.4512920098080672e-06, "loss": 0.03406292200088501, "step": 6426 }, { "epoch": 0.8689098068375779, "grad_norm": 0.05530266836285591, "learning_rate": 1.4483563350680878e-06, "loss": 0.05251955986022949, "step": 6427 }, { "epoch": 0.8690450036334139, "grad_norm": 0.07514005899429321, "learning_rate": 1.4454234818525824e-06, "loss": 0.05506563186645508, "step": 6428 }, { "epoch": 0.8691802004292498, "grad_norm": 0.04610864445567131, "learning_rate": 1.4424934507721926e-06, "loss": 0.031284451484680176, "step": 6429 }, { "epoch": 0.8693153972250858, "grad_norm": 0.1004101112484932, "learning_rate": 1.4395662424369622e-06, "loss": 0.057344913482666016, "step": 6430 }, { "epoch": 0.8694505940209217, "grad_norm": 0.0901050791144371, "learning_rate": 1.436641857456355e-06, "loss": 0.08454418182373047, "step": 6431 }, { "epoch": 0.8695857908167577, "grad_norm": 0.12006206065416336, "learning_rate": 1.4337202964392409e-06, "loss": 0.06084930896759033, "step": 6432 }, { "epoch": 0.8697209876125935, "grad_norm": 0.038087278604507446, "learning_rate": 1.4308015599939033e-06, "loss": 0.024875640869140625, "step": 6433 }, { "epoch": 0.8698561844084295, "grad_norm": 0.08178307116031647, "learning_rate": 1.4278856487280428e-06, "loss": 0.07812666893005371, "step": 6434 }, { "epoch": 0.8699913812042654, "grad_norm": 0.059284958988428116, "learning_rate": 1.4249725632487653e-06, "loss": 0.0552448034286499, "step": 6435 }, { "epoch": 0.8701265780001014, "grad_norm": 0.082722507417202, "learning_rate": 1.4220623041625924e-06, "loss": 0.05972862243652344, "step": 6436 }, { "epoch": 0.8702617747959374, "grad_norm": 0.11636929214000702, "learning_rate": 1.4191548720754527e-06, "loss": 0.0633622407913208, "step": 6437 }, { "epoch": 0.8703969715917733, "grad_norm": 0.060393013060092926, "learning_rate": 1.4162502675926887e-06, "loss": 0.05115801841020584, "step": 6438 }, { "epoch": 0.8705321683876092, "grad_norm": 0.06341095268726349, "learning_rate": 1.4133484913190596e-06, "loss": 0.05683088302612305, "step": 6439 }, { "epoch": 0.8706673651834451, "grad_norm": 0.059537675231695175, "learning_rate": 1.4104495438587295e-06, "loss": 0.035642027854919434, "step": 6440 }, { "epoch": 0.8708025619792811, "grad_norm": 0.08261965215206146, "learning_rate": 1.4075534258152667e-06, "loss": 0.05670452117919922, "step": 6441 }, { "epoch": 0.870937758775117, "grad_norm": 0.0936427116394043, "learning_rate": 1.4046601377916673e-06, "loss": 0.08454561233520508, "step": 6442 }, { "epoch": 0.871072955570953, "grad_norm": 0.18029776215553284, "learning_rate": 1.4017696803903246e-06, "loss": 0.07367730140686035, "step": 6443 }, { "epoch": 0.871208152366789, "grad_norm": 0.13922595977783203, "learning_rate": 1.3988820542130504e-06, "loss": 0.08683896064758301, "step": 6444 }, { "epoch": 0.8713433491626248, "grad_norm": 0.12686164677143097, "learning_rate": 1.395997259861067e-06, "loss": 0.07768511772155762, "step": 6445 }, { "epoch": 0.8714785459584607, "grad_norm": 0.14138749241828918, "learning_rate": 1.3931152979349926e-06, "loss": 0.07920169830322266, "step": 6446 }, { "epoch": 0.8716137427542967, "grad_norm": 0.13059298694133759, "learning_rate": 1.3902361690348769e-06, "loss": 0.06594228744506836, "step": 6447 }, { "epoch": 0.8717489395501327, "grad_norm": 0.05394219607114792, "learning_rate": 1.3873598737601639e-06, "loss": 0.042543888092041016, "step": 6448 }, { "epoch": 0.8718841363459686, "grad_norm": 0.055598847568035126, "learning_rate": 1.3844864127097229e-06, "loss": 0.05002450942993164, "step": 6449 }, { "epoch": 0.8720193331418046, "grad_norm": 0.12051721662282944, "learning_rate": 1.3816157864818151e-06, "loss": 0.06903493404388428, "step": 6450 }, { "epoch": 0.8721545299376405, "grad_norm": 0.040695756673812866, "learning_rate": 1.3787479956741194e-06, "loss": 0.049883127212524414, "step": 6451 }, { "epoch": 0.8722897267334764, "grad_norm": 0.07189293950796127, "learning_rate": 1.3758830408837314e-06, "loss": 0.06341159343719482, "step": 6452 }, { "epoch": 0.8724249235293123, "grad_norm": 0.0494973286986351, "learning_rate": 1.3730209227071439e-06, "loss": 0.04201161861419678, "step": 6453 }, { "epoch": 0.8725601203251483, "grad_norm": 0.13717636466026306, "learning_rate": 1.3701616417402734e-06, "loss": 0.09896183013916016, "step": 6454 }, { "epoch": 0.8726953171209842, "grad_norm": 0.06824973225593567, "learning_rate": 1.367305198578429e-06, "loss": 0.0618937611579895, "step": 6455 }, { "epoch": 0.8728305139168202, "grad_norm": 0.07186117023229599, "learning_rate": 1.36445159381634e-06, "loss": 0.05238616466522217, "step": 6456 }, { "epoch": 0.8729657107126562, "grad_norm": 0.16033010184764862, "learning_rate": 1.361600828048144e-06, "loss": 0.07703065872192383, "step": 6457 }, { "epoch": 0.873100907508492, "grad_norm": 0.06869401782751083, "learning_rate": 1.3587529018673816e-06, "loss": 0.05274546146392822, "step": 6458 }, { "epoch": 0.873236104304328, "grad_norm": 0.1351899653673172, "learning_rate": 1.3559078158670152e-06, "loss": 0.07019662857055664, "step": 6459 }, { "epoch": 0.8733713011001639, "grad_norm": 0.12785902619361877, "learning_rate": 1.353065570639394e-06, "loss": 0.06765604019165039, "step": 6460 }, { "epoch": 0.8735064978959999, "grad_norm": 0.094843789935112, "learning_rate": 1.3502261667763e-06, "loss": 0.04578077793121338, "step": 6461 }, { "epoch": 0.8736416946918358, "grad_norm": 0.0506066232919693, "learning_rate": 1.3473896048689067e-06, "loss": 0.04484367370605469, "step": 6462 }, { "epoch": 0.8737768914876718, "grad_norm": 0.05722508579492569, "learning_rate": 1.3445558855078017e-06, "loss": 0.04959213733673096, "step": 6463 }, { "epoch": 0.8739120882835076, "grad_norm": 0.07660440355539322, "learning_rate": 1.3417250092829814e-06, "loss": 0.0594487190246582, "step": 6464 }, { "epoch": 0.8740472850793436, "grad_norm": 0.09044113755226135, "learning_rate": 1.338896976783846e-06, "loss": 0.058452486991882324, "step": 6465 }, { "epoch": 0.8741824818751796, "grad_norm": 0.1256711184978485, "learning_rate": 1.336071788599213e-06, "loss": 0.06017589569091797, "step": 6466 }, { "epoch": 0.8743176786710155, "grad_norm": 0.09011678397655487, "learning_rate": 1.3332494453172982e-06, "loss": 0.08049631118774414, "step": 6467 }, { "epoch": 0.8744528754668515, "grad_norm": 0.07490069419145584, "learning_rate": 1.3304299475257287e-06, "loss": 0.06297492980957031, "step": 6468 }, { "epoch": 0.8745880722626874, "grad_norm": 0.09171540290117264, "learning_rate": 1.3276132958115394e-06, "loss": 0.058758556842803955, "step": 6469 }, { "epoch": 0.8747232690585233, "grad_norm": 0.05838421359658241, "learning_rate": 1.32479949076117e-06, "loss": 0.05766165256500244, "step": 6470 }, { "epoch": 0.8748584658543592, "grad_norm": 0.11151295155286789, "learning_rate": 1.3219885329604747e-06, "loss": 0.05652773380279541, "step": 6471 }, { "epoch": 0.8749936626501952, "grad_norm": 0.060704734176397324, "learning_rate": 1.319180422994709e-06, "loss": 0.03921103477478027, "step": 6472 }, { "epoch": 0.8751288594460311, "grad_norm": 0.07868202775716782, "learning_rate": 1.3163751614485287e-06, "loss": 0.07724672555923462, "step": 6473 }, { "epoch": 0.8752640562418671, "grad_norm": 0.07434491068124771, "learning_rate": 1.3135727489060113e-06, "loss": 0.058353424072265625, "step": 6474 }, { "epoch": 0.875399253037703, "grad_norm": 0.09557139873504639, "learning_rate": 1.3107731859506317e-06, "loss": 0.07402229309082031, "step": 6475 }, { "epoch": 0.8755344498335389, "grad_norm": 0.09994301199913025, "learning_rate": 1.3079764731652772e-06, "loss": 0.05762887001037598, "step": 6476 }, { "epoch": 0.8756696466293749, "grad_norm": 0.07793942093849182, "learning_rate": 1.3051826111322368e-06, "loss": 0.07263338565826416, "step": 6477 }, { "epoch": 0.8758048434252108, "grad_norm": 0.07906635850667953, "learning_rate": 1.3023916004332021e-06, "loss": 0.06643915176391602, "step": 6478 }, { "epoch": 0.8759400402210468, "grad_norm": 0.09959713369607925, "learning_rate": 1.2996034416492847e-06, "loss": 0.06354546546936035, "step": 6479 }, { "epoch": 0.8760752370168827, "grad_norm": 0.055056583136320114, "learning_rate": 1.2968181353609854e-06, "loss": 0.05894780158996582, "step": 6480 }, { "epoch": 0.8762104338127187, "grad_norm": 0.07187693566083908, "learning_rate": 1.2940356821482285e-06, "loss": 0.04204225540161133, "step": 6481 }, { "epoch": 0.8763456306085545, "grad_norm": 0.06411078572273254, "learning_rate": 1.291256082590334e-06, "loss": 0.05649673938751221, "step": 6482 }, { "epoch": 0.8764808274043905, "grad_norm": 0.14113785326480865, "learning_rate": 1.2884793372660208e-06, "loss": 0.07549786567687988, "step": 6483 }, { "epoch": 0.8766160242002264, "grad_norm": 0.05166800692677498, "learning_rate": 1.285705446753433e-06, "loss": 0.0476759672164917, "step": 6484 }, { "epoch": 0.8767512209960624, "grad_norm": 0.07620629668235779, "learning_rate": 1.2829344116301e-06, "loss": 0.06698322296142578, "step": 6485 }, { "epoch": 0.8768864177918984, "grad_norm": 0.11518441140651703, "learning_rate": 1.2801662324729774e-06, "loss": 0.06765782833099365, "step": 6486 }, { "epoch": 0.8770216145877343, "grad_norm": 0.06900651752948761, "learning_rate": 1.2774009098584055e-06, "loss": 0.05993431806564331, "step": 6487 }, { "epoch": 0.8771568113835702, "grad_norm": 0.09508582204580307, "learning_rate": 1.274638444362139e-06, "loss": 0.08182764053344727, "step": 6488 }, { "epoch": 0.8772920081794061, "grad_norm": 0.07269113510847092, "learning_rate": 1.2718788365593443e-06, "loss": 0.06250470876693726, "step": 6489 }, { "epoch": 0.8774272049752421, "grad_norm": 0.05016093701124191, "learning_rate": 1.26912208702458e-06, "loss": 0.04906296730041504, "step": 6490 }, { "epoch": 0.877562401771078, "grad_norm": 0.12695634365081787, "learning_rate": 1.2663681963318242e-06, "loss": 0.09316396713256836, "step": 6491 }, { "epoch": 0.877697598566914, "grad_norm": 0.06621631234884262, "learning_rate": 1.2636171650544443e-06, "loss": 0.0663766860961914, "step": 6492 }, { "epoch": 0.8778327953627499, "grad_norm": 0.11906833946704865, "learning_rate": 1.260868993765219e-06, "loss": 0.08048009872436523, "step": 6493 }, { "epoch": 0.8779679921585858, "grad_norm": 0.10896174609661102, "learning_rate": 1.258123683036339e-06, "loss": 0.06722664833068848, "step": 6494 }, { "epoch": 0.8781031889544217, "grad_norm": 0.0913810133934021, "learning_rate": 1.2553812334393872e-06, "loss": 0.0606541633605957, "step": 6495 }, { "epoch": 0.8782383857502577, "grad_norm": 0.0818769559264183, "learning_rate": 1.2526416455453582e-06, "loss": 0.058750152587890625, "step": 6496 }, { "epoch": 0.8783735825460937, "grad_norm": 0.1103515625, "learning_rate": 1.249904919924646e-06, "loss": 0.06089746952056885, "step": 6497 }, { "epoch": 0.8785087793419296, "grad_norm": 0.06198696419596672, "learning_rate": 1.2471710571470579e-06, "loss": 0.044019997119903564, "step": 6498 }, { "epoch": 0.8786439761377656, "grad_norm": 0.0762113630771637, "learning_rate": 1.2444400577817922e-06, "loss": 0.0538862943649292, "step": 6499 }, { "epoch": 0.8787791729336015, "grad_norm": 0.08310767263174057, "learning_rate": 1.2417119223974621e-06, "loss": 0.042129844427108765, "step": 6500 }, { "epoch": 0.8789143697294374, "grad_norm": 0.1790885329246521, "learning_rate": 1.2389866515620768e-06, "loss": 0.07005803287029266, "step": 6501 }, { "epoch": 0.8790495665252733, "grad_norm": 0.1959482580423355, "learning_rate": 1.2362642458430505e-06, "loss": 0.10361224412918091, "step": 6502 }, { "epoch": 0.8791847633211093, "grad_norm": 0.08510959893465042, "learning_rate": 1.2335447058072103e-06, "loss": 0.05635261535644531, "step": 6503 }, { "epoch": 0.8793199601169452, "grad_norm": 0.04887179285287857, "learning_rate": 1.230828032020771e-06, "loss": 0.03641188144683838, "step": 6504 }, { "epoch": 0.8794551569127812, "grad_norm": 0.07791148126125336, "learning_rate": 1.2281142250493638e-06, "loss": 0.05992138385772705, "step": 6505 }, { "epoch": 0.8795903537086172, "grad_norm": 0.07690058648586273, "learning_rate": 1.225403285458015e-06, "loss": 0.07168054580688477, "step": 6506 }, { "epoch": 0.879725550504453, "grad_norm": 0.1491979956626892, "learning_rate": 1.2226952138111546e-06, "loss": 0.07484722137451172, "step": 6507 }, { "epoch": 0.879860747300289, "grad_norm": 0.13239221274852753, "learning_rate": 1.219990010672622e-06, "loss": 0.06789350509643555, "step": 6508 }, { "epoch": 0.8799959440961249, "grad_norm": 0.13040296733379364, "learning_rate": 1.2172876766056562e-06, "loss": 0.05312696099281311, "step": 6509 }, { "epoch": 0.8801311408919609, "grad_norm": 0.12148870527744293, "learning_rate": 1.2145882121728906e-06, "loss": 0.059514760971069336, "step": 6510 }, { "epoch": 0.8802663376877968, "grad_norm": 0.1337987184524536, "learning_rate": 1.2118916179363727e-06, "loss": 0.08131563663482666, "step": 6511 }, { "epoch": 0.8804015344836328, "grad_norm": 0.0752175822854042, "learning_rate": 1.209197894457546e-06, "loss": 0.046537160873413086, "step": 6512 }, { "epoch": 0.8805367312794686, "grad_norm": 0.11400206387042999, "learning_rate": 1.2065070422972606e-06, "loss": 0.05353587865829468, "step": 6513 }, { "epoch": 0.8806719280753046, "grad_norm": 0.10037486255168915, "learning_rate": 1.2038190620157685e-06, "loss": 0.07340002059936523, "step": 6514 }, { "epoch": 0.8808071248711405, "grad_norm": 0.061386581510305405, "learning_rate": 1.2011339541727117e-06, "loss": 0.04145979881286621, "step": 6515 }, { "epoch": 0.8809423216669765, "grad_norm": 0.05132085084915161, "learning_rate": 1.198451719327155e-06, "loss": 0.049624860286712646, "step": 6516 }, { "epoch": 0.8810775184628125, "grad_norm": 0.06391947716474533, "learning_rate": 1.1957723580375447e-06, "loss": 0.047849178314208984, "step": 6517 }, { "epoch": 0.8812127152586484, "grad_norm": 0.04655515402555466, "learning_rate": 1.193095870861748e-06, "loss": 0.04912972450256348, "step": 6518 }, { "epoch": 0.8813479120544843, "grad_norm": 0.10650839656591415, "learning_rate": 1.1904222583570156e-06, "loss": 0.04141688346862793, "step": 6519 }, { "epoch": 0.8814831088503202, "grad_norm": 0.10529980808496475, "learning_rate": 1.1877515210800077e-06, "loss": 0.0585789680480957, "step": 6520 }, { "epoch": 0.8816183056461562, "grad_norm": 0.09063461422920227, "learning_rate": 1.1850836595867925e-06, "loss": 0.04475545883178711, "step": 6521 }, { "epoch": 0.8817535024419921, "grad_norm": 0.11571851372718811, "learning_rate": 1.1824186744328259e-06, "loss": 0.08102607727050781, "step": 6522 }, { "epoch": 0.8818886992378281, "grad_norm": 0.12886452674865723, "learning_rate": 1.179756566172982e-06, "loss": 0.07580780982971191, "step": 6523 }, { "epoch": 0.882023896033664, "grad_norm": 0.05057654529809952, "learning_rate": 1.177097335361516e-06, "loss": 0.03611111640930176, "step": 6524 }, { "epoch": 0.8821590928294999, "grad_norm": 0.08624257147312164, "learning_rate": 1.1744409825520969e-06, "loss": 0.07086181640625, "step": 6525 }, { "epoch": 0.8822942896253358, "grad_norm": 0.08988473564386368, "learning_rate": 1.171787508297792e-06, "loss": 0.07788324356079102, "step": 6526 }, { "epoch": 0.8824294864211718, "grad_norm": 0.07482783496379852, "learning_rate": 1.1691369131510676e-06, "loss": 0.039360642433166504, "step": 6527 }, { "epoch": 0.8825646832170078, "grad_norm": 0.03648466616868973, "learning_rate": 1.1664891976637992e-06, "loss": 0.02965986728668213, "step": 6528 }, { "epoch": 0.8826998800128437, "grad_norm": 0.0596202127635479, "learning_rate": 1.1638443623872442e-06, "loss": 0.05133655667304993, "step": 6529 }, { "epoch": 0.8828350768086797, "grad_norm": 0.11942271143198013, "learning_rate": 1.1612024078720752e-06, "loss": 0.054498091340065, "step": 6530 }, { "epoch": 0.8829702736045155, "grad_norm": 0.05281819775700569, "learning_rate": 1.1585633346683655e-06, "loss": 0.05314207077026367, "step": 6531 }, { "epoch": 0.8831054704003515, "grad_norm": 0.048382170498371124, "learning_rate": 1.155927143325579e-06, "loss": 0.04397740960121155, "step": 6532 }, { "epoch": 0.8832406671961874, "grad_norm": 0.05771726742386818, "learning_rate": 1.1532938343925887e-06, "loss": 0.049910902976989746, "step": 6533 }, { "epoch": 0.8833758639920234, "grad_norm": 0.04804328456521034, "learning_rate": 1.1506634084176587e-06, "loss": 0.04077553749084473, "step": 6534 }, { "epoch": 0.8835110607878593, "grad_norm": 0.0726543515920639, "learning_rate": 1.148035865948463e-06, "loss": 0.06297606229782104, "step": 6535 }, { "epoch": 0.8836462575836953, "grad_norm": 0.11173998564481735, "learning_rate": 1.1454112075320688e-06, "loss": 0.05508708953857422, "step": 6536 }, { "epoch": 0.8837814543795312, "grad_norm": 0.10670069605112076, "learning_rate": 1.1427894337149426e-06, "loss": 0.09070062637329102, "step": 6537 }, { "epoch": 0.8839166511753671, "grad_norm": 0.0660172700881958, "learning_rate": 1.1401705450429506e-06, "loss": 0.07214498519897461, "step": 6538 }, { "epoch": 0.8840518479712031, "grad_norm": 0.022525470703840256, "learning_rate": 1.1375545420613586e-06, "loss": 0.019761621952056885, "step": 6539 }, { "epoch": 0.884187044767039, "grad_norm": 0.07742978632450104, "learning_rate": 1.1349414253148377e-06, "loss": 0.051624417304992676, "step": 6540 }, { "epoch": 0.884322241562875, "grad_norm": 0.08196750283241272, "learning_rate": 1.1323311953474524e-06, "loss": 0.07137250900268555, "step": 6541 }, { "epoch": 0.8844574383587109, "grad_norm": 0.07884689420461655, "learning_rate": 1.1297238527026582e-06, "loss": 0.0605010986328125, "step": 6542 }, { "epoch": 0.8845926351545469, "grad_norm": 0.05428411066532135, "learning_rate": 1.1271193979233258e-06, "loss": 0.056774139404296875, "step": 6543 }, { "epoch": 0.8847278319503827, "grad_norm": 0.04940522834658623, "learning_rate": 1.1245178315517113e-06, "loss": 0.05476069450378418, "step": 6544 }, { "epoch": 0.8848630287462187, "grad_norm": 0.10771366953849792, "learning_rate": 1.1219191541294798e-06, "loss": 0.07926285266876221, "step": 6545 }, { "epoch": 0.8849982255420547, "grad_norm": 0.12758027017116547, "learning_rate": 1.1193233661976887e-06, "loss": 0.0677649974822998, "step": 6546 }, { "epoch": 0.8851334223378906, "grad_norm": 0.15208247303962708, "learning_rate": 1.1167304682967904e-06, "loss": 0.06079322099685669, "step": 6547 }, { "epoch": 0.8852686191337266, "grad_norm": 0.05892002582550049, "learning_rate": 1.114140460966645e-06, "loss": 0.05365419387817383, "step": 6548 }, { "epoch": 0.8854038159295625, "grad_norm": 0.07697014510631561, "learning_rate": 1.111553344746501e-06, "loss": 0.06839609146118164, "step": 6549 }, { "epoch": 0.8855390127253984, "grad_norm": 0.0700211301445961, "learning_rate": 1.1089691201750174e-06, "loss": 0.045278072357177734, "step": 6550 }, { "epoch": 0.8856742095212343, "grad_norm": 0.1417781412601471, "learning_rate": 1.106387787790239e-06, "loss": 0.08942627906799316, "step": 6551 }, { "epoch": 0.8858094063170703, "grad_norm": 0.1318359375, "learning_rate": 1.1038093481296091e-06, "loss": 0.09242439270019531, "step": 6552 }, { "epoch": 0.8859446031129062, "grad_norm": 0.055270396173000336, "learning_rate": 1.10123380172998e-06, "loss": 0.04508829116821289, "step": 6553 }, { "epoch": 0.8860797999087422, "grad_norm": 0.08247637003660202, "learning_rate": 1.098661149127586e-06, "loss": 0.05121803283691406, "step": 6554 }, { "epoch": 0.8862149967045782, "grad_norm": 0.05553044006228447, "learning_rate": 1.0960913908580788e-06, "loss": 0.050912559032440186, "step": 6555 }, { "epoch": 0.886350193500414, "grad_norm": 0.04795118793845177, "learning_rate": 1.0935245274564852e-06, "loss": 0.04163360595703125, "step": 6556 }, { "epoch": 0.88648539029625, "grad_norm": 0.12914790213108063, "learning_rate": 1.0909605594572413e-06, "loss": 0.050554513931274414, "step": 6557 }, { "epoch": 0.8866205870920859, "grad_norm": 0.1704612374305725, "learning_rate": 1.0883994873941816e-06, "loss": 0.09965455532073975, "step": 6558 }, { "epoch": 0.8867557838879219, "grad_norm": 0.06700973957777023, "learning_rate": 1.0858413118005345e-06, "loss": 0.0470738410949707, "step": 6559 }, { "epoch": 0.8868909806837578, "grad_norm": 0.19251349568367004, "learning_rate": 1.0832860332089288e-06, "loss": 0.06895112991333008, "step": 6560 }, { "epoch": 0.8870261774795938, "grad_norm": 0.10217111557722092, "learning_rate": 1.0807336521513828e-06, "loss": 0.043393611907958984, "step": 6561 }, { "epoch": 0.8871613742754296, "grad_norm": 0.07085638493299484, "learning_rate": 1.0781841691593142e-06, "loss": 0.047635555267333984, "step": 6562 }, { "epoch": 0.8872965710712656, "grad_norm": 0.11534271389245987, "learning_rate": 1.0756375847635435e-06, "loss": 0.08743667602539062, "step": 6563 }, { "epoch": 0.8874317678671015, "grad_norm": 0.042759209871292114, "learning_rate": 1.0730938994942818e-06, "loss": 0.04030489921569824, "step": 6564 }, { "epoch": 0.8875669646629375, "grad_norm": 0.07410503923892975, "learning_rate": 1.070553113881137e-06, "loss": 0.06687331199645996, "step": 6565 }, { "epoch": 0.8877021614587735, "grad_norm": 0.06275312602519989, "learning_rate": 1.0680152284531158e-06, "loss": 0.040627479553222656, "step": 6566 }, { "epoch": 0.8878373582546094, "grad_norm": 0.0754169449210167, "learning_rate": 1.0654802437386157e-06, "loss": 0.045983314514160156, "step": 6567 }, { "epoch": 0.8879725550504453, "grad_norm": 0.06962469220161438, "learning_rate": 1.062948160265438e-06, "loss": 0.057654619216918945, "step": 6568 }, { "epoch": 0.8881077518462812, "grad_norm": 0.0985807329416275, "learning_rate": 1.0604189785607772e-06, "loss": 0.06784486770629883, "step": 6569 }, { "epoch": 0.8882429486421172, "grad_norm": 0.13257592916488647, "learning_rate": 1.0578926991512171e-06, "loss": 0.07983052730560303, "step": 6570 }, { "epoch": 0.8883781454379531, "grad_norm": 0.08442649245262146, "learning_rate": 1.0553693225627458e-06, "loss": 0.05161643028259277, "step": 6571 }, { "epoch": 0.8885133422337891, "grad_norm": 0.09726759046316147, "learning_rate": 1.0528488493207444e-06, "loss": 0.06853289902210236, "step": 6572 }, { "epoch": 0.888648539029625, "grad_norm": 0.08419055491685867, "learning_rate": 1.0503312799499898e-06, "loss": 0.08247542381286621, "step": 6573 }, { "epoch": 0.8887837358254609, "grad_norm": 0.06069283187389374, "learning_rate": 1.0478166149746476e-06, "loss": 0.052207231521606445, "step": 6574 }, { "epoch": 0.8889189326212968, "grad_norm": 0.05220550671219826, "learning_rate": 1.0453048549182892e-06, "loss": 0.040143370628356934, "step": 6575 }, { "epoch": 0.8890541294171328, "grad_norm": 0.12548846006393433, "learning_rate": 1.0427960003038744e-06, "loss": 0.05970048904418945, "step": 6576 }, { "epoch": 0.8891893262129688, "grad_norm": 0.10632126778364182, "learning_rate": 1.040290051653764e-06, "loss": 0.06353044509887695, "step": 6577 }, { "epoch": 0.8893245230088047, "grad_norm": 0.05560656636953354, "learning_rate": 1.0377870094897085e-06, "loss": 0.04810154438018799, "step": 6578 }, { "epoch": 0.8894597198046407, "grad_norm": 0.07954412698745728, "learning_rate": 1.0352868743328497e-06, "loss": 0.048441171646118164, "step": 6579 }, { "epoch": 0.8895949166004765, "grad_norm": 0.06586475670337677, "learning_rate": 1.032789646703733e-06, "loss": 0.04835247993469238, "step": 6580 }, { "epoch": 0.8897301133963125, "grad_norm": 0.10117357224225998, "learning_rate": 1.0302953271222938e-06, "loss": 0.07866621017456055, "step": 6581 }, { "epoch": 0.8898653101921484, "grad_norm": 0.12333240360021591, "learning_rate": 1.0278039161078634e-06, "loss": 0.07080721855163574, "step": 6582 }, { "epoch": 0.8900005069879844, "grad_norm": 0.10532978177070618, "learning_rate": 1.0253154141791705e-06, "loss": 0.09575176239013672, "step": 6583 }, { "epoch": 0.8901357037838203, "grad_norm": 0.06972696632146835, "learning_rate": 1.0228298218543253e-06, "loss": 0.04833173751831055, "step": 6584 }, { "epoch": 0.8902709005796563, "grad_norm": 0.10652590543031693, "learning_rate": 1.020347139650849e-06, "loss": 0.0778203010559082, "step": 6585 }, { "epoch": 0.8904060973754923, "grad_norm": 0.08605887740850449, "learning_rate": 1.0178673680856448e-06, "loss": 0.05930304527282715, "step": 6586 }, { "epoch": 0.8905412941713281, "grad_norm": 0.07626832276582718, "learning_rate": 1.0153905076750196e-06, "loss": 0.07395672798156738, "step": 6587 }, { "epoch": 0.8906764909671641, "grad_norm": 0.06566493213176727, "learning_rate": 1.0129165589346644e-06, "loss": 0.05273330211639404, "step": 6588 }, { "epoch": 0.890811687763, "grad_norm": 0.06652422994375229, "learning_rate": 1.0104455223796688e-06, "loss": 0.060160696506500244, "step": 6589 }, { "epoch": 0.890946884558836, "grad_norm": 0.07430104911327362, "learning_rate": 1.0079773985245178e-06, "loss": 0.05275726318359375, "step": 6590 }, { "epoch": 0.8910820813546719, "grad_norm": 0.10063283890485764, "learning_rate": 1.0055121878830837e-06, "loss": 0.0899662971496582, "step": 6591 }, { "epoch": 0.8912172781505079, "grad_norm": 0.06059126555919647, "learning_rate": 1.0030498909686458e-06, "loss": 0.05206775665283203, "step": 6592 }, { "epoch": 0.8913524749463437, "grad_norm": 0.05247807502746582, "learning_rate": 1.0005905082938593e-06, "loss": 0.03754448890686035, "step": 6593 }, { "epoch": 0.8914876717421797, "grad_norm": 0.05807942524552345, "learning_rate": 9.981340403707794e-07, "loss": 0.03816002607345581, "step": 6594 }, { "epoch": 0.8916228685380156, "grad_norm": 0.061262134462594986, "learning_rate": 9.956804877108638e-07, "loss": 0.043181657791137695, "step": 6595 }, { "epoch": 0.8917580653338516, "grad_norm": 0.07644299417734146, "learning_rate": 9.932298508249488e-07, "loss": 0.06563282012939453, "step": 6596 }, { "epoch": 0.8918932621296876, "grad_norm": 0.07854171842336655, "learning_rate": 9.907821302232729e-07, "loss": 0.06135678291320801, "step": 6597 }, { "epoch": 0.8920284589255235, "grad_norm": 0.0681791827082634, "learning_rate": 9.883373264154633e-07, "loss": 0.046141624450683594, "step": 6598 }, { "epoch": 0.8921636557213594, "grad_norm": 0.055947039276361465, "learning_rate": 9.858954399105397e-07, "loss": 0.04207766056060791, "step": 6599 }, { "epoch": 0.8922988525171953, "grad_norm": 0.06317632645368576, "learning_rate": 9.834564712169202e-07, "loss": 0.05024838447570801, "step": 6600 }, { "epoch": 0.8924340493130313, "grad_norm": 0.09453734755516052, "learning_rate": 9.81020420842409e-07, "loss": 0.062131643295288086, "step": 6601 }, { "epoch": 0.8925692461088672, "grad_norm": 0.0783371552824974, "learning_rate": 9.785872892942033e-07, "loss": 0.055925726890563965, "step": 6602 }, { "epoch": 0.8927044429047032, "grad_norm": 0.09139455109834671, "learning_rate": 9.761570770788964e-07, "loss": 0.07199573516845703, "step": 6603 }, { "epoch": 0.8928396397005391, "grad_norm": 0.055934008210897446, "learning_rate": 9.737297847024685e-07, "loss": 0.05993682146072388, "step": 6604 }, { "epoch": 0.892974836496375, "grad_norm": 0.11721453815698624, "learning_rate": 9.713054126702968e-07, "loss": 0.04857802391052246, "step": 6605 }, { "epoch": 0.893110033292211, "grad_norm": 0.09238698333501816, "learning_rate": 9.688839614871497e-07, "loss": 0.08888101577758789, "step": 6606 }, { "epoch": 0.8932452300880469, "grad_norm": 0.10019996017217636, "learning_rate": 9.664654316571852e-07, "loss": 0.10535812377929688, "step": 6607 }, { "epoch": 0.8933804268838829, "grad_norm": 0.04973414167761803, "learning_rate": 9.640498236839507e-07, "loss": 0.05350446701049805, "step": 6608 }, { "epoch": 0.8935156236797188, "grad_norm": 0.09560219943523407, "learning_rate": 9.616371380703953e-07, "loss": 0.06395101547241211, "step": 6609 }, { "epoch": 0.8936508204755548, "grad_norm": 0.06275220215320587, "learning_rate": 9.592273753188507e-07, "loss": 0.06680488586425781, "step": 6610 }, { "epoch": 0.8937860172713906, "grad_norm": 0.065501369535923, "learning_rate": 9.568205359310372e-07, "loss": 0.04054737091064453, "step": 6611 }, { "epoch": 0.8939212140672266, "grad_norm": 0.12545578181743622, "learning_rate": 9.544166204080772e-07, "loss": 0.054447293281555176, "step": 6612 }, { "epoch": 0.8940564108630625, "grad_norm": 0.10772591829299927, "learning_rate": 9.520156292504739e-07, "loss": 0.0805930495262146, "step": 6613 }, { "epoch": 0.8941916076588985, "grad_norm": 0.07085982710123062, "learning_rate": 9.496175629581322e-07, "loss": 0.04218930006027222, "step": 6614 }, { "epoch": 0.8943268044547344, "grad_norm": 0.10710301250219345, "learning_rate": 9.472224220303427e-07, "loss": 0.059938594698905945, "step": 6615 }, { "epoch": 0.8944620012505704, "grad_norm": 0.06642382591962814, "learning_rate": 9.448302069657799e-07, "loss": 0.048637986183166504, "step": 6616 }, { "epoch": 0.8945971980464063, "grad_norm": 0.07268780469894409, "learning_rate": 9.424409182625205e-07, "loss": 0.06392055749893188, "step": 6617 }, { "epoch": 0.8947323948422422, "grad_norm": 0.1987529844045639, "learning_rate": 9.40054556418023e-07, "loss": 0.07520723342895508, "step": 6618 }, { "epoch": 0.8948675916380782, "grad_norm": 0.09070106595754623, "learning_rate": 9.376711219291483e-07, "loss": 0.05606275796890259, "step": 6619 }, { "epoch": 0.8950027884339141, "grad_norm": 0.0709957405924797, "learning_rate": 9.352906152921348e-07, "loss": 0.060788631439208984, "step": 6620 }, { "epoch": 0.8951379852297501, "grad_norm": 0.09190468490123749, "learning_rate": 9.32913037002614e-07, "loss": 0.04943346977233887, "step": 6621 }, { "epoch": 0.895273182025586, "grad_norm": 0.15285420417785645, "learning_rate": 9.30538387555615e-07, "loss": 0.049030959606170654, "step": 6622 }, { "epoch": 0.8954083788214219, "grad_norm": 0.0674080178141594, "learning_rate": 9.281666674455508e-07, "loss": 0.046922922134399414, "step": 6623 }, { "epoch": 0.8955435756172578, "grad_norm": 0.14944292604923248, "learning_rate": 9.257978771662295e-07, "loss": 0.06735897064208984, "step": 6624 }, { "epoch": 0.8956787724130938, "grad_norm": 0.07170566916465759, "learning_rate": 9.234320172108418e-07, "loss": 0.07200336456298828, "step": 6625 }, { "epoch": 0.8958139692089298, "grad_norm": 0.0958380252122879, "learning_rate": 9.210690880719719e-07, "loss": 0.0538938045501709, "step": 6626 }, { "epoch": 0.8959491660047657, "grad_norm": 0.13218814134597778, "learning_rate": 9.187090902415962e-07, "loss": 0.08578038215637207, "step": 6627 }, { "epoch": 0.8960843628006017, "grad_norm": 0.1322334110736847, "learning_rate": 9.163520242110784e-07, "loss": 0.06138420104980469, "step": 6628 }, { "epoch": 0.8962195595964375, "grad_norm": 0.041208699345588684, "learning_rate": 9.13997890471176e-07, "loss": 0.03258466720581055, "step": 6629 }, { "epoch": 0.8963547563922735, "grad_norm": 0.0809289962053299, "learning_rate": 9.116466895120251e-07, "loss": 0.06803727149963379, "step": 6630 }, { "epoch": 0.8964899531881094, "grad_norm": 0.04746970906853676, "learning_rate": 9.092984218231609e-07, "loss": 0.044019997119903564, "step": 6631 }, { "epoch": 0.8966251499839454, "grad_norm": 0.07196468114852905, "learning_rate": 9.069530878935072e-07, "loss": 0.059203267097473145, "step": 6632 }, { "epoch": 0.8967603467797813, "grad_norm": 0.18243137001991272, "learning_rate": 9.046106882113753e-07, "loss": 0.09470769762992859, "step": 6633 }, { "epoch": 0.8968955435756173, "grad_norm": 0.05943652614951134, "learning_rate": 9.022712232644631e-07, "loss": 0.03992493450641632, "step": 6634 }, { "epoch": 0.8970307403714533, "grad_norm": 0.04794257879257202, "learning_rate": 8.999346935398611e-07, "loss": 0.05938720703125, "step": 6635 }, { "epoch": 0.8971659371672891, "grad_norm": 0.07320133596658707, "learning_rate": 8.976010995240436e-07, "loss": 0.07886743545532227, "step": 6636 }, { "epoch": 0.897301133963125, "grad_norm": 0.0737534761428833, "learning_rate": 8.952704417028818e-07, "loss": 0.05039691925048828, "step": 6637 }, { "epoch": 0.897436330758961, "grad_norm": 0.07039651274681091, "learning_rate": 8.929427205616308e-07, "loss": 0.055694580078125, "step": 6638 }, { "epoch": 0.897571527554797, "grad_norm": 0.05775744095444679, "learning_rate": 8.906179365849332e-07, "loss": 0.04315638542175293, "step": 6639 }, { "epoch": 0.8977067243506329, "grad_norm": 0.06370531767606735, "learning_rate": 8.882960902568216e-07, "loss": 0.027716398239135742, "step": 6640 }, { "epoch": 0.8978419211464689, "grad_norm": 0.06135125458240509, "learning_rate": 8.85977182060716e-07, "loss": 0.05271148681640625, "step": 6641 }, { "epoch": 0.8979771179423047, "grad_norm": 0.0692668929696083, "learning_rate": 8.836612124794285e-07, "loss": 0.05684030055999756, "step": 6642 }, { "epoch": 0.8981123147381407, "grad_norm": 0.060235388576984406, "learning_rate": 8.813481819951502e-07, "loss": 0.051661014556884766, "step": 6643 }, { "epoch": 0.8982475115339766, "grad_norm": 0.16336274147033691, "learning_rate": 8.790380910894724e-07, "loss": 0.06594669818878174, "step": 6644 }, { "epoch": 0.8983827083298126, "grad_norm": 0.10090608149766922, "learning_rate": 8.767309402433671e-07, "loss": 0.07398748397827148, "step": 6645 }, { "epoch": 0.8985179051256486, "grad_norm": 0.09646004438400269, "learning_rate": 8.744267299371917e-07, "loss": 0.05701184272766113, "step": 6646 }, { "epoch": 0.8986531019214845, "grad_norm": 0.18584474921226501, "learning_rate": 8.721254606507023e-07, "loss": 0.09387397766113281, "step": 6647 }, { "epoch": 0.8987882987173204, "grad_norm": 0.060422707349061966, "learning_rate": 8.698271328630275e-07, "loss": 0.06333351135253906, "step": 6648 }, { "epoch": 0.8989234955131563, "grad_norm": 0.07320088893175125, "learning_rate": 8.675317470526961e-07, "loss": 0.06865859031677246, "step": 6649 }, { "epoch": 0.8990586923089923, "grad_norm": 0.09771262109279633, "learning_rate": 8.652393036976159e-07, "loss": 0.06720256805419922, "step": 6650 }, { "epoch": 0.8991938891048282, "grad_norm": 0.11702706664800644, "learning_rate": 8.629498032750916e-07, "loss": 0.05758178234100342, "step": 6651 }, { "epoch": 0.8993290859006642, "grad_norm": 0.08316123485565186, "learning_rate": 8.606632462618069e-07, "loss": 0.06345510482788086, "step": 6652 }, { "epoch": 0.8994642826965001, "grad_norm": 0.05326274037361145, "learning_rate": 8.583796331338311e-07, "loss": 0.04036378860473633, "step": 6653 }, { "epoch": 0.899599479492336, "grad_norm": 0.07759150117635727, "learning_rate": 8.560989643666306e-07, "loss": 0.06870555877685547, "step": 6654 }, { "epoch": 0.8997346762881719, "grad_norm": 0.07183264940977097, "learning_rate": 8.538212404350471e-07, "loss": 0.05275440216064453, "step": 6655 }, { "epoch": 0.8998698730840079, "grad_norm": 0.0721798911690712, "learning_rate": 8.515464618133228e-07, "loss": 0.07120466232299805, "step": 6656 }, { "epoch": 0.9000050698798439, "grad_norm": 0.12253816425800323, "learning_rate": 8.492746289750725e-07, "loss": 0.06766670942306519, "step": 6657 }, { "epoch": 0.9001402666756798, "grad_norm": 0.09368474781513214, "learning_rate": 8.470057423933026e-07, "loss": 0.05730867385864258, "step": 6658 }, { "epoch": 0.9002754634715158, "grad_norm": 0.04538887366652489, "learning_rate": 8.447398025404118e-07, "loss": 0.04840803146362305, "step": 6659 }, { "epoch": 0.9004106602673516, "grad_norm": 0.06308309733867645, "learning_rate": 8.42476809888178e-07, "loss": 0.046575188636779785, "step": 6660 }, { "epoch": 0.9005458570631876, "grad_norm": 0.05168364197015762, "learning_rate": 8.402167649077725e-07, "loss": 0.04607181251049042, "step": 6661 }, { "epoch": 0.9006810538590235, "grad_norm": 0.05424867570400238, "learning_rate": 8.379596680697454e-07, "loss": 0.04422760009765625, "step": 6662 }, { "epoch": 0.9008162506548595, "grad_norm": 0.11579970270395279, "learning_rate": 8.357055198440328e-07, "loss": 0.05030703544616699, "step": 6663 }, { "epoch": 0.9009514474506954, "grad_norm": 0.0502345971763134, "learning_rate": 8.334543206999673e-07, "loss": 0.04666423797607422, "step": 6664 }, { "epoch": 0.9010866442465314, "grad_norm": 0.1272699236869812, "learning_rate": 8.312060711062558e-07, "loss": 0.06950902938842773, "step": 6665 }, { "epoch": 0.9012218410423672, "grad_norm": 0.11976861953735352, "learning_rate": 8.289607715309988e-07, "loss": 0.06149446964263916, "step": 6666 }, { "epoch": 0.9013570378382032, "grad_norm": 0.054975979030132294, "learning_rate": 8.267184224416791e-07, "loss": 0.05180993676185608, "step": 6667 }, { "epoch": 0.9014922346340392, "grad_norm": 0.08684058487415314, "learning_rate": 8.244790243051614e-07, "loss": 0.07228899002075195, "step": 6668 }, { "epoch": 0.9016274314298751, "grad_norm": 0.05990104377269745, "learning_rate": 8.222425775877079e-07, "loss": 0.029060959815979004, "step": 6669 }, { "epoch": 0.9017626282257111, "grad_norm": 0.050528429448604584, "learning_rate": 8.200090827549527e-07, "loss": 0.052945494651794434, "step": 6670 }, { "epoch": 0.901897825021547, "grad_norm": 0.16809502243995667, "learning_rate": 8.17778540271924e-07, "loss": 0.0878763198852539, "step": 6671 }, { "epoch": 0.9020330218173829, "grad_norm": 0.11046073585748672, "learning_rate": 8.155509506030334e-07, "loss": 0.08097012341022491, "step": 6672 }, { "epoch": 0.9021682186132188, "grad_norm": 0.057846855372190475, "learning_rate": 8.133263142120717e-07, "loss": 0.0437619686126709, "step": 6673 }, { "epoch": 0.9023034154090548, "grad_norm": 0.09421905130147934, "learning_rate": 8.111046315622284e-07, "loss": 0.06803774833679199, "step": 6674 }, { "epoch": 0.9024386122048907, "grad_norm": 0.06651435047388077, "learning_rate": 8.088859031160633e-07, "loss": 0.06949138641357422, "step": 6675 }, { "epoch": 0.9025738090007267, "grad_norm": 0.11255306750535965, "learning_rate": 8.066701293355288e-07, "loss": 0.05358874797821045, "step": 6676 }, { "epoch": 0.9027090057965627, "grad_norm": 0.08829524368047714, "learning_rate": 8.044573106819625e-07, "loss": 0.08324098587036133, "step": 6677 }, { "epoch": 0.9028442025923986, "grad_norm": 0.04944612830877304, "learning_rate": 8.022474476160824e-07, "loss": 0.045194387435913086, "step": 6678 }, { "epoch": 0.9029793993882345, "grad_norm": 0.08622890710830688, "learning_rate": 8.000405405979988e-07, "loss": 0.07186532020568848, "step": 6679 }, { "epoch": 0.9031145961840704, "grad_norm": 0.057132501155138016, "learning_rate": 7.978365900871943e-07, "loss": 0.040940821170806885, "step": 6680 }, { "epoch": 0.9032497929799064, "grad_norm": 0.06894759088754654, "learning_rate": 7.956355965425482e-07, "loss": 0.0367276668548584, "step": 6681 }, { "epoch": 0.9033849897757423, "grad_norm": 0.044041674584150314, "learning_rate": 7.934375604223193e-07, "loss": 0.039381980895996094, "step": 6682 }, { "epoch": 0.9035201865715783, "grad_norm": 0.1334766298532486, "learning_rate": 7.912424821841463e-07, "loss": 0.06312200427055359, "step": 6683 }, { "epoch": 0.9036553833674142, "grad_norm": 0.1010698527097702, "learning_rate": 7.89050362285062e-07, "loss": 0.060697078704833984, "step": 6684 }, { "epoch": 0.9037905801632501, "grad_norm": 0.039662402123212814, "learning_rate": 7.868612011814713e-07, "loss": 0.038825154304504395, "step": 6685 }, { "epoch": 0.903925776959086, "grad_norm": 0.08480498194694519, "learning_rate": 7.846749993291746e-07, "loss": 0.06257915496826172, "step": 6686 }, { "epoch": 0.904060973754922, "grad_norm": 0.04940733313560486, "learning_rate": 7.824917571833445e-07, "loss": 0.04669523239135742, "step": 6687 }, { "epoch": 0.904196170550758, "grad_norm": 0.08223103731870651, "learning_rate": 7.80311475198554e-07, "loss": 0.05759322643280029, "step": 6688 }, { "epoch": 0.9043313673465939, "grad_norm": 0.07242028415203094, "learning_rate": 7.781341538287384e-07, "loss": 0.0670386552810669, "step": 6689 }, { "epoch": 0.9044665641424299, "grad_norm": 0.09505094587802887, "learning_rate": 7.759597935272316e-07, "loss": 0.07269632816314697, "step": 6690 }, { "epoch": 0.9046017609382657, "grad_norm": 0.06608331948518753, "learning_rate": 7.7378839474675e-07, "loss": 0.07195568084716797, "step": 6691 }, { "epoch": 0.9047369577341017, "grad_norm": 0.05378718301653862, "learning_rate": 7.716199579393851e-07, "loss": 0.03840494155883789, "step": 6692 }, { "epoch": 0.9048721545299376, "grad_norm": 0.07324188202619553, "learning_rate": 7.694544835566259e-07, "loss": 0.0505528450012207, "step": 6693 }, { "epoch": 0.9050073513257736, "grad_norm": 0.04650648683309555, "learning_rate": 7.672919720493249e-07, "loss": 0.041406214237213135, "step": 6694 }, { "epoch": 0.9051425481216095, "grad_norm": 0.11132923513650894, "learning_rate": 7.651324238677338e-07, "loss": 0.06971263885498047, "step": 6695 }, { "epoch": 0.9052777449174455, "grad_norm": 0.061683230102062225, "learning_rate": 7.629758394614828e-07, "loss": 0.055091023445129395, "step": 6696 }, { "epoch": 0.9054129417132813, "grad_norm": 0.11126754432916641, "learning_rate": 7.608222192795794e-07, "loss": 0.06579005718231201, "step": 6697 }, { "epoch": 0.9055481385091173, "grad_norm": 0.08219487965106964, "learning_rate": 7.586715637704284e-07, "loss": 0.08167386054992676, "step": 6698 }, { "epoch": 0.9056833353049533, "grad_norm": 0.058290623128414154, "learning_rate": 7.565238733817998e-07, "loss": 0.056861162185668945, "step": 6699 }, { "epoch": 0.9058185321007892, "grad_norm": 0.03432619944214821, "learning_rate": 7.543791485608542e-07, "loss": 0.03377723693847656, "step": 6700 }, { "epoch": 0.9059537288966252, "grad_norm": 0.0552808940410614, "learning_rate": 7.52237389754138e-07, "loss": 0.04391968250274658, "step": 6701 }, { "epoch": 0.9060889256924611, "grad_norm": 0.04706662893295288, "learning_rate": 7.500985974075758e-07, "loss": 0.03809022903442383, "step": 6702 }, { "epoch": 0.906224122488297, "grad_norm": 0.06526406854391098, "learning_rate": 7.479627719664767e-07, "loss": 0.04655647277832031, "step": 6703 }, { "epoch": 0.9063593192841329, "grad_norm": 0.06305108964443207, "learning_rate": 7.458299138755281e-07, "loss": 0.06414985656738281, "step": 6704 }, { "epoch": 0.9064945160799689, "grad_norm": 0.050837915390729904, "learning_rate": 7.437000235788033e-07, "loss": 0.04151761531829834, "step": 6705 }, { "epoch": 0.9066297128758048, "grad_norm": 0.139189675450325, "learning_rate": 7.415731015197575e-07, "loss": 0.06122446060180664, "step": 6706 }, { "epoch": 0.9067649096716408, "grad_norm": 0.11274229735136032, "learning_rate": 7.39449148141228e-07, "loss": 0.06790328025817871, "step": 6707 }, { "epoch": 0.9069001064674768, "grad_norm": 0.04873769357800484, "learning_rate": 7.373281638854329e-07, "loss": 0.04820966720581055, "step": 6708 }, { "epoch": 0.9070353032633126, "grad_norm": 0.08780638873577118, "learning_rate": 7.352101491939722e-07, "loss": 0.05083608627319336, "step": 6709 }, { "epoch": 0.9071705000591486, "grad_norm": 0.07825469970703125, "learning_rate": 7.330951045078249e-07, "loss": 0.0728602409362793, "step": 6710 }, { "epoch": 0.9073056968549845, "grad_norm": 0.096316859126091, "learning_rate": 7.309830302673621e-07, "loss": 0.04883098602294922, "step": 6711 }, { "epoch": 0.9074408936508205, "grad_norm": 0.20904502272605896, "learning_rate": 7.288739269123184e-07, "loss": 0.07873964309692383, "step": 6712 }, { "epoch": 0.9075760904466564, "grad_norm": 0.08129238337278366, "learning_rate": 7.267677948818296e-07, "loss": 0.062062978744506836, "step": 6713 }, { "epoch": 0.9077112872424924, "grad_norm": 0.14217080175876617, "learning_rate": 7.246646346143997e-07, "loss": 0.0724191665649414, "step": 6714 }, { "epoch": 0.9078464840383282, "grad_norm": 0.15996962785720825, "learning_rate": 7.225644465479153e-07, "loss": 0.07497239112854004, "step": 6715 }, { "epoch": 0.9079816808341642, "grad_norm": 0.045987971127033234, "learning_rate": 7.204672311196547e-07, "loss": 0.03400075435638428, "step": 6716 }, { "epoch": 0.9081168776300002, "grad_norm": 0.07036244124174118, "learning_rate": 7.183729887662604e-07, "loss": 0.04816460609436035, "step": 6717 }, { "epoch": 0.9082520744258361, "grad_norm": 0.0665932148694992, "learning_rate": 7.162817199237703e-07, "loss": 0.07290220260620117, "step": 6718 }, { "epoch": 0.9083872712216721, "grad_norm": 0.13764077425003052, "learning_rate": 7.141934250275978e-07, "loss": 0.08014583587646484, "step": 6719 }, { "epoch": 0.908522468017508, "grad_norm": 0.1037042960524559, "learning_rate": 7.121081045125316e-07, "loss": 0.06541967391967773, "step": 6720 }, { "epoch": 0.9086576648133439, "grad_norm": 0.060645900666713715, "learning_rate": 7.100257588127545e-07, "loss": 0.04391634464263916, "step": 6721 }, { "epoch": 0.9087928616091798, "grad_norm": 0.08745758980512619, "learning_rate": 7.079463883618148e-07, "loss": 0.07028853893280029, "step": 6722 }, { "epoch": 0.9089280584050158, "grad_norm": 0.09235255420207977, "learning_rate": 7.058699935926527e-07, "loss": 0.052414894104003906, "step": 6723 }, { "epoch": 0.9090632552008517, "grad_norm": 0.114885613322258, "learning_rate": 7.037965749375808e-07, "loss": 0.061815500259399414, "step": 6724 }, { "epoch": 0.9091984519966877, "grad_norm": 0.09272823482751846, "learning_rate": 7.017261328283037e-07, "loss": 0.08160018920898438, "step": 6725 }, { "epoch": 0.9093336487925237, "grad_norm": 0.15473969280719757, "learning_rate": 6.996586676958916e-07, "loss": 0.0699617862701416, "step": 6726 }, { "epoch": 0.9094688455883596, "grad_norm": 0.09846507012844086, "learning_rate": 6.975941799708019e-07, "loss": 0.07828950881958008, "step": 6727 }, { "epoch": 0.9096040423841955, "grad_norm": 0.12600812315940857, "learning_rate": 6.955326700828757e-07, "loss": 0.08341217041015625, "step": 6728 }, { "epoch": 0.9097392391800314, "grad_norm": 0.18116658926010132, "learning_rate": 6.934741384613246e-07, "loss": 0.06686651706695557, "step": 6729 }, { "epoch": 0.9098744359758674, "grad_norm": 0.060926783829927444, "learning_rate": 6.91418585534756e-07, "loss": 0.062319278717041016, "step": 6730 }, { "epoch": 0.9100096327717033, "grad_norm": 0.08466867357492447, "learning_rate": 6.893660117311373e-07, "loss": 0.0571293830871582, "step": 6731 }, { "epoch": 0.9101448295675393, "grad_norm": 0.1459810733795166, "learning_rate": 6.873164174778252e-07, "loss": 0.0804172158241272, "step": 6732 }, { "epoch": 0.9102800263633752, "grad_norm": 0.095429927110672, "learning_rate": 6.852698032015631e-07, "loss": 0.07612800598144531, "step": 6733 }, { "epoch": 0.9104152231592111, "grad_norm": 0.12787973880767822, "learning_rate": 6.832261693284636e-07, "loss": 0.07410478591918945, "step": 6734 }, { "epoch": 0.910550419955047, "grad_norm": 0.11708753556013107, "learning_rate": 6.811855162840214e-07, "loss": 0.0609288215637207, "step": 6735 }, { "epoch": 0.910685616750883, "grad_norm": 0.08623882383108139, "learning_rate": 6.791478444931132e-07, "loss": 0.06495475769042969, "step": 6736 }, { "epoch": 0.910820813546719, "grad_norm": 0.08381275087594986, "learning_rate": 6.77113154379988e-07, "loss": 0.04236018657684326, "step": 6737 }, { "epoch": 0.9109560103425549, "grad_norm": 0.05358347296714783, "learning_rate": 6.75081446368287e-07, "loss": 0.050638437271118164, "step": 6738 }, { "epoch": 0.9110912071383909, "grad_norm": 0.07180771231651306, "learning_rate": 6.730527208810166e-07, "loss": 0.04754924774169922, "step": 6739 }, { "epoch": 0.9112264039342267, "grad_norm": 0.07693973183631897, "learning_rate": 6.710269783405709e-07, "loss": 0.05288201570510864, "step": 6740 }, { "epoch": 0.9113616007300627, "grad_norm": 0.056572265923023224, "learning_rate": 6.690042191687206e-07, "loss": 0.04134058952331543, "step": 6741 }, { "epoch": 0.9114967975258986, "grad_norm": 0.11207717657089233, "learning_rate": 6.669844437866124e-07, "loss": 0.06454777717590332, "step": 6742 }, { "epoch": 0.9116319943217346, "grad_norm": 0.08345456421375275, "learning_rate": 6.649676526147764e-07, "loss": 0.05972456932067871, "step": 6743 }, { "epoch": 0.9117671911175705, "grad_norm": 0.03956246376037598, "learning_rate": 6.629538460731199e-07, "loss": 0.02511155605316162, "step": 6744 }, { "epoch": 0.9119023879134065, "grad_norm": 0.08143890649080276, "learning_rate": 6.609430245809261e-07, "loss": 0.06749105453491211, "step": 6745 }, { "epoch": 0.9120375847092423, "grad_norm": 0.09006833285093307, "learning_rate": 6.589351885568617e-07, "loss": 0.08936500549316406, "step": 6746 }, { "epoch": 0.9121727815050783, "grad_norm": 0.07447297126054764, "learning_rate": 6.569303384189624e-07, "loss": 0.07627391815185547, "step": 6747 }, { "epoch": 0.9123079783009143, "grad_norm": 0.108567014336586, "learning_rate": 6.54928474584659e-07, "loss": 0.0623016357421875, "step": 6748 }, { "epoch": 0.9124431750967502, "grad_norm": 0.08347965031862259, "learning_rate": 6.5292959747074e-07, "loss": 0.04192352294921875, "step": 6749 }, { "epoch": 0.9125783718925862, "grad_norm": 0.03402784839272499, "learning_rate": 6.509337074933891e-07, "loss": 0.037055015563964844, "step": 6750 }, { "epoch": 0.9127135686884221, "grad_norm": 0.04279724135994911, "learning_rate": 6.489408050681589e-07, "loss": 0.03884100914001465, "step": 6751 }, { "epoch": 0.912848765484258, "grad_norm": 0.06605985015630722, "learning_rate": 6.469508906099792e-07, "loss": 0.05292713642120361, "step": 6752 }, { "epoch": 0.9129839622800939, "grad_norm": 0.06558471918106079, "learning_rate": 6.449639645331684e-07, "loss": 0.06713080406188965, "step": 6753 }, { "epoch": 0.9131191590759299, "grad_norm": 0.07589532434940338, "learning_rate": 6.429800272514058e-07, "loss": 0.048555850982666016, "step": 6754 }, { "epoch": 0.9132543558717658, "grad_norm": 0.043322283774614334, "learning_rate": 6.409990791777659e-07, "loss": 0.04274141788482666, "step": 6755 }, { "epoch": 0.9133895526676018, "grad_norm": 0.08356264978647232, "learning_rate": 6.390211207246888e-07, "loss": 0.07425069808959961, "step": 6756 }, { "epoch": 0.9135247494634378, "grad_norm": 0.06733854860067368, "learning_rate": 6.370461523039967e-07, "loss": 0.05004429817199707, "step": 6757 }, { "epoch": 0.9136599462592736, "grad_norm": 0.06644568592309952, "learning_rate": 6.350741743268873e-07, "loss": 0.054903388023376465, "step": 6758 }, { "epoch": 0.9137951430551096, "grad_norm": 0.07641942799091339, "learning_rate": 6.331051872039373e-07, "loss": 0.044489383697509766, "step": 6759 }, { "epoch": 0.9139303398509455, "grad_norm": 0.07348938286304474, "learning_rate": 6.31139191345102e-07, "loss": 0.05042374134063721, "step": 6760 }, { "epoch": 0.9140655366467815, "grad_norm": 0.06905170530080795, "learning_rate": 6.291761871597091e-07, "loss": 0.047342449426651, "step": 6761 }, { "epoch": 0.9142007334426174, "grad_norm": 0.0952875167131424, "learning_rate": 6.272161750564731e-07, "loss": 0.06904077529907227, "step": 6762 }, { "epoch": 0.9143359302384534, "grad_norm": 0.059866953641176224, "learning_rate": 6.252591554434728e-07, "loss": 0.05153632164001465, "step": 6763 }, { "epoch": 0.9144711270342892, "grad_norm": 0.06745777279138565, "learning_rate": 6.233051287281688e-07, "loss": 0.062015533447265625, "step": 6764 }, { "epoch": 0.9146063238301252, "grad_norm": 0.062436845153570175, "learning_rate": 6.213540953174057e-07, "loss": 0.0528794527053833, "step": 6765 }, { "epoch": 0.9147415206259611, "grad_norm": 0.0822429433465004, "learning_rate": 6.194060556173953e-07, "loss": 0.07905292510986328, "step": 6766 }, { "epoch": 0.9148767174217971, "grad_norm": 0.044968072324991226, "learning_rate": 6.17461010033733e-07, "loss": 0.0505375862121582, "step": 6767 }, { "epoch": 0.9150119142176331, "grad_norm": 0.09746953845024109, "learning_rate": 6.155189589713833e-07, "loss": 0.05556917190551758, "step": 6768 }, { "epoch": 0.915147111013469, "grad_norm": 0.15062448382377625, "learning_rate": 6.135799028346928e-07, "loss": 0.06459987163543701, "step": 6769 }, { "epoch": 0.915282307809305, "grad_norm": 0.08286046236753464, "learning_rate": 6.116438420273868e-07, "loss": 0.06889796257019043, "step": 6770 }, { "epoch": 0.9154175046051408, "grad_norm": 0.09522316604852676, "learning_rate": 6.097107769525595e-07, "loss": 0.06338739395141602, "step": 6771 }, { "epoch": 0.9155527014009768, "grad_norm": 0.1612064093351364, "learning_rate": 6.077807080126873e-07, "loss": 0.06690263748168945, "step": 6772 }, { "epoch": 0.9156878981968127, "grad_norm": 0.07434094697237015, "learning_rate": 6.058536356096206e-07, "loss": 0.06382691860198975, "step": 6773 }, { "epoch": 0.9158230949926487, "grad_norm": 0.07352715730667114, "learning_rate": 6.039295601445833e-07, "loss": 0.06329339742660522, "step": 6774 }, { "epoch": 0.9159582917884846, "grad_norm": 0.155765563249588, "learning_rate": 6.020084820181831e-07, "loss": 0.07581257820129395, "step": 6775 }, { "epoch": 0.9160934885843206, "grad_norm": 0.04926585033535957, "learning_rate": 6.000904016303971e-07, "loss": 0.0404854416847229, "step": 6776 }, { "epoch": 0.9162286853801564, "grad_norm": 0.0913032665848732, "learning_rate": 5.981753193805789e-07, "loss": 0.08167791366577148, "step": 6777 }, { "epoch": 0.9163638821759924, "grad_norm": 0.0839015319943428, "learning_rate": 5.962632356674597e-07, "loss": 0.05323457717895508, "step": 6778 }, { "epoch": 0.9164990789718284, "grad_norm": 0.11051979660987854, "learning_rate": 5.94354150889141e-07, "loss": 0.07772564888000488, "step": 6779 }, { "epoch": 0.9166342757676643, "grad_norm": 0.0768633633852005, "learning_rate": 5.924480654431147e-07, "loss": 0.05799245834350586, "step": 6780 }, { "epoch": 0.9167694725635003, "grad_norm": 0.1127280592918396, "learning_rate": 5.905449797262252e-07, "loss": 0.06021428108215332, "step": 6781 }, { "epoch": 0.9169046693593362, "grad_norm": 0.07609863579273224, "learning_rate": 5.886448941347156e-07, "loss": 0.06479433178901672, "step": 6782 }, { "epoch": 0.9170398661551721, "grad_norm": 0.04923299327492714, "learning_rate": 5.867478090641892e-07, "loss": 0.040313392877578735, "step": 6783 }, { "epoch": 0.917175062951008, "grad_norm": 0.09888669848442078, "learning_rate": 5.848537249096269e-07, "loss": 0.05931282043457031, "step": 6784 }, { "epoch": 0.917310259746844, "grad_norm": 0.12415360659360886, "learning_rate": 5.829626420653949e-07, "loss": 0.05547034740447998, "step": 6785 }, { "epoch": 0.91744545654268, "grad_norm": 0.08682490885257721, "learning_rate": 5.810745609252166e-07, "loss": 0.0454595685005188, "step": 6786 }, { "epoch": 0.9175806533385159, "grad_norm": 0.11459515243768692, "learning_rate": 5.791894818822091e-07, "loss": 0.05810356140136719, "step": 6787 }, { "epoch": 0.9177158501343519, "grad_norm": 0.09491605311632156, "learning_rate": 5.773074053288519e-07, "loss": 0.06572043895721436, "step": 6788 }, { "epoch": 0.9178510469301877, "grad_norm": 0.09550150483846664, "learning_rate": 5.75428331657003e-07, "loss": 0.08484911918640137, "step": 6789 }, { "epoch": 0.9179862437260237, "grad_norm": 0.07265599817037582, "learning_rate": 5.735522612578998e-07, "loss": 0.07644820213317871, "step": 6790 }, { "epoch": 0.9181214405218596, "grad_norm": 0.0837293341755867, "learning_rate": 5.716791945221444e-07, "loss": 0.06304502487182617, "step": 6791 }, { "epoch": 0.9182566373176956, "grad_norm": 0.03523504361510277, "learning_rate": 5.698091318397219e-07, "loss": 0.028102397918701172, "step": 6792 }, { "epoch": 0.9183918341135315, "grad_norm": 0.05463401600718498, "learning_rate": 5.679420735999908e-07, "loss": 0.05178022384643555, "step": 6793 }, { "epoch": 0.9185270309093675, "grad_norm": 0.06810581684112549, "learning_rate": 5.660780201916799e-07, "loss": 0.030712023377418518, "step": 6794 }, { "epoch": 0.9186622277052033, "grad_norm": 0.052742671221494675, "learning_rate": 5.642169720028973e-07, "loss": 0.04141056537628174, "step": 6795 }, { "epoch": 0.9187974245010393, "grad_norm": 0.10572075843811035, "learning_rate": 5.623589294211196e-07, "loss": 0.06541228294372559, "step": 6796 }, { "epoch": 0.9189326212968753, "grad_norm": 0.05111096799373627, "learning_rate": 5.605038928332057e-07, "loss": 0.04832315444946289, "step": 6797 }, { "epoch": 0.9190678180927112, "grad_norm": 0.044135306030511856, "learning_rate": 5.586518626253817e-07, "loss": 0.037868618965148926, "step": 6798 }, { "epoch": 0.9192030148885472, "grad_norm": 0.06819167733192444, "learning_rate": 5.568028391832524e-07, "loss": 0.057631611824035645, "step": 6799 }, { "epoch": 0.9193382116843831, "grad_norm": 0.070487380027771, "learning_rate": 5.549568228917917e-07, "loss": 0.05653238296508789, "step": 6800 }, { "epoch": 0.919473408480219, "grad_norm": 0.1173807829618454, "learning_rate": 5.531138141353486e-07, "loss": 0.061377644538879395, "step": 6801 }, { "epoch": 0.9196086052760549, "grad_norm": 0.12492447346448898, "learning_rate": 5.512738132976514e-07, "loss": 0.05899500846862793, "step": 6802 }, { "epoch": 0.9197438020718909, "grad_norm": 0.14777909219264984, "learning_rate": 5.49436820761795e-07, "loss": 0.06536829471588135, "step": 6803 }, { "epoch": 0.9198789988677268, "grad_norm": 0.09018542617559433, "learning_rate": 5.476028369102537e-07, "loss": 0.06111180782318115, "step": 6804 }, { "epoch": 0.9200141956635628, "grad_norm": 0.07368019223213196, "learning_rate": 5.45771862124872e-07, "loss": 0.061571359634399414, "step": 6805 }, { "epoch": 0.9201493924593988, "grad_norm": 0.14074435830116272, "learning_rate": 5.439438967868649e-07, "loss": 0.08659911155700684, "step": 6806 }, { "epoch": 0.9202845892552346, "grad_norm": 0.08623439073562622, "learning_rate": 5.421189412768296e-07, "loss": 0.04534733295440674, "step": 6807 }, { "epoch": 0.9204197860510706, "grad_norm": 0.12841518223285675, "learning_rate": 5.402969959747306e-07, "loss": 0.062029361724853516, "step": 6808 }, { "epoch": 0.9205549828469065, "grad_norm": 0.11448809504508972, "learning_rate": 5.384780612599044e-07, "loss": 0.0379372239112854, "step": 6809 }, { "epoch": 0.9206901796427425, "grad_norm": 0.05587799474596977, "learning_rate": 5.366621375110647e-07, "loss": 0.03373980522155762, "step": 6810 }, { "epoch": 0.9208253764385784, "grad_norm": 0.06052003055810928, "learning_rate": 5.348492251062942e-07, "loss": 0.04782605171203613, "step": 6811 }, { "epoch": 0.9209605732344144, "grad_norm": 0.07805603742599487, "learning_rate": 5.330393244230558e-07, "loss": 0.053964436054229736, "step": 6812 }, { "epoch": 0.9210957700302503, "grad_norm": 0.10421637445688248, "learning_rate": 5.312324358381731e-07, "loss": 0.07691431045532227, "step": 6813 }, { "epoch": 0.9212309668260862, "grad_norm": 0.10782796889543533, "learning_rate": 5.29428559727857e-07, "loss": 0.08362793922424316, "step": 6814 }, { "epoch": 0.9213661636219221, "grad_norm": 0.09339733421802521, "learning_rate": 5.276276964676802e-07, "loss": 0.06957411766052246, "step": 6815 }, { "epoch": 0.9215013604177581, "grad_norm": 0.05293793976306915, "learning_rate": 5.258298464325928e-07, "loss": 0.03976625204086304, "step": 6816 }, { "epoch": 0.921636557213594, "grad_norm": 0.06883855164051056, "learning_rate": 5.240350099969204e-07, "loss": 0.05500668287277222, "step": 6817 }, { "epoch": 0.92177175400943, "grad_norm": 0.10639632493257523, "learning_rate": 5.222431875343492e-07, "loss": 0.05985760688781738, "step": 6818 }, { "epoch": 0.921906950805266, "grad_norm": 0.08619681745767593, "learning_rate": 5.204543794179539e-07, "loss": 0.0613057017326355, "step": 6819 }, { "epoch": 0.9220421476011018, "grad_norm": 0.06661764532327652, "learning_rate": 5.186685860201717e-07, "loss": 0.05211687088012695, "step": 6820 }, { "epoch": 0.9221773443969378, "grad_norm": 0.09220027923583984, "learning_rate": 5.16885807712812e-07, "loss": 0.04319643974304199, "step": 6821 }, { "epoch": 0.9223125411927737, "grad_norm": 0.09631132334470749, "learning_rate": 5.151060448670625e-07, "loss": 0.05782175064086914, "step": 6822 }, { "epoch": 0.9224477379886097, "grad_norm": 0.053621068596839905, "learning_rate": 5.133292978534754e-07, "loss": 0.04122114181518555, "step": 6823 }, { "epoch": 0.9225829347844456, "grad_norm": 0.07459413260221481, "learning_rate": 5.115555670419814e-07, "loss": 0.05349469184875488, "step": 6824 }, { "epoch": 0.9227181315802816, "grad_norm": 0.08251022547483444, "learning_rate": 5.097848528018817e-07, "loss": 0.046590209007263184, "step": 6825 }, { "epoch": 0.9228533283761174, "grad_norm": 0.10208684206008911, "learning_rate": 5.080171555018448e-07, "loss": 0.08421897888183594, "step": 6826 }, { "epoch": 0.9229885251719534, "grad_norm": 0.07252252101898193, "learning_rate": 5.06252475509918e-07, "loss": 0.05518531799316406, "step": 6827 }, { "epoch": 0.9231237219677894, "grad_norm": 0.07434801757335663, "learning_rate": 5.044908131935139e-07, "loss": 0.052265167236328125, "step": 6828 }, { "epoch": 0.9232589187636253, "grad_norm": 0.08358155936002731, "learning_rate": 5.027321689194242e-07, "loss": 0.053508758544921875, "step": 6829 }, { "epoch": 0.9233941155594613, "grad_norm": 0.06201353296637535, "learning_rate": 5.009765430538061e-07, "loss": 0.06040000915527344, "step": 6830 }, { "epoch": 0.9235293123552972, "grad_norm": 0.07067056000232697, "learning_rate": 4.992239359621886e-07, "loss": 0.05200427770614624, "step": 6831 }, { "epoch": 0.9236645091511331, "grad_norm": 0.07980097830295563, "learning_rate": 4.974743480094767e-07, "loss": 0.06742334365844727, "step": 6832 }, { "epoch": 0.923799705946969, "grad_norm": 0.10068795830011368, "learning_rate": 4.957277795599407e-07, "loss": 0.06462322175502777, "step": 6833 }, { "epoch": 0.923934902742805, "grad_norm": 0.056041549891233444, "learning_rate": 4.93984230977228e-07, "loss": 0.05373716354370117, "step": 6834 }, { "epoch": 0.9240700995386409, "grad_norm": 0.0804627314209938, "learning_rate": 4.922437026243531e-07, "loss": 0.06502079963684082, "step": 6835 }, { "epoch": 0.9242052963344769, "grad_norm": 0.12691618502140045, "learning_rate": 4.905061948637063e-07, "loss": 0.060770273208618164, "step": 6836 }, { "epoch": 0.9243404931303129, "grad_norm": 0.09311460703611374, "learning_rate": 4.887717080570431e-07, "loss": 0.06514084339141846, "step": 6837 }, { "epoch": 0.9244756899261487, "grad_norm": 0.08764898031949997, "learning_rate": 4.870402425654913e-07, "loss": 0.0783529281616211, "step": 6838 }, { "epoch": 0.9246108867219847, "grad_norm": 0.08409794420003891, "learning_rate": 4.853117987495542e-07, "loss": 0.06356537342071533, "step": 6839 }, { "epoch": 0.9247460835178206, "grad_norm": 0.0501297190785408, "learning_rate": 4.83586376969104e-07, "loss": 0.04085803031921387, "step": 6840 }, { "epoch": 0.9248812803136566, "grad_norm": 0.05269839987158775, "learning_rate": 4.818639775833816e-07, "loss": 0.04741021245718002, "step": 6841 }, { "epoch": 0.9250164771094925, "grad_norm": 0.05799153074622154, "learning_rate": 4.801446009509969e-07, "loss": 0.03713059425354004, "step": 6842 }, { "epoch": 0.9251516739053285, "grad_norm": 0.07219300419092178, "learning_rate": 4.784282474299367e-07, "loss": 0.06656515598297119, "step": 6843 }, { "epoch": 0.9252868707011643, "grad_norm": 0.09508062154054642, "learning_rate": 4.767149173775537e-07, "loss": 0.042938947677612305, "step": 6844 }, { "epoch": 0.9254220674970003, "grad_norm": 0.09607056528329849, "learning_rate": 4.750046111505724e-07, "loss": 0.06508302688598633, "step": 6845 }, { "epoch": 0.9255572642928362, "grad_norm": 0.061726950109004974, "learning_rate": 4.732973291050896e-07, "loss": 0.04613304138183594, "step": 6846 }, { "epoch": 0.9256924610886722, "grad_norm": 0.06372092664241791, "learning_rate": 4.7159307159656607e-07, "loss": 0.04676032066345215, "step": 6847 }, { "epoch": 0.9258276578845082, "grad_norm": 0.17753368616104126, "learning_rate": 4.6989183897983954e-07, "loss": 0.05688464641571045, "step": 6848 }, { "epoch": 0.9259628546803441, "grad_norm": 0.08127326518297195, "learning_rate": 4.681936316091201e-07, "loss": 0.05523126572370529, "step": 6849 }, { "epoch": 0.92609805147618, "grad_norm": 0.07534569501876831, "learning_rate": 4.664984498379765e-07, "loss": 0.05183809995651245, "step": 6850 }, { "epoch": 0.9262332482720159, "grad_norm": 0.043646253645420074, "learning_rate": 4.6480629401935814e-07, "loss": 0.03409457206726074, "step": 6851 }, { "epoch": 0.9263684450678519, "grad_norm": 0.06272473931312561, "learning_rate": 4.631171645055815e-07, "loss": 0.04832053184509277, "step": 6852 }, { "epoch": 0.9265036418636878, "grad_norm": 0.08324024826288223, "learning_rate": 4.614310616483286e-07, "loss": 0.05918073654174805, "step": 6853 }, { "epoch": 0.9266388386595238, "grad_norm": 0.07095614820718765, "learning_rate": 4.5974798579866193e-07, "loss": 0.04324913024902344, "step": 6854 }, { "epoch": 0.9267740354553597, "grad_norm": 0.0709993913769722, "learning_rate": 4.580679373069996e-07, "loss": 0.0503043532371521, "step": 6855 }, { "epoch": 0.9269092322511956, "grad_norm": 0.10518515110015869, "learning_rate": 4.5639091652314e-07, "loss": 0.06734371185302734, "step": 6856 }, { "epoch": 0.9270444290470315, "grad_norm": 0.10777423530817032, "learning_rate": 4.54716923796249e-07, "loss": 0.05843234062194824, "step": 6857 }, { "epoch": 0.9271796258428675, "grad_norm": 0.09842325001955032, "learning_rate": 4.5304595947485927e-07, "loss": 0.06998658180236816, "step": 6858 }, { "epoch": 0.9273148226387035, "grad_norm": 0.13332384824752808, "learning_rate": 4.5137802390687433e-07, "loss": 0.06135225296020508, "step": 6859 }, { "epoch": 0.9274500194345394, "grad_norm": 0.12472131103277206, "learning_rate": 4.497131174395663e-07, "loss": 0.06375408172607422, "step": 6860 }, { "epoch": 0.9275852162303754, "grad_norm": 0.08886054158210754, "learning_rate": 4.4805124041957967e-07, "loss": 0.057554006576538086, "step": 6861 }, { "epoch": 0.9277204130262113, "grad_norm": 0.15123821794986725, "learning_rate": 4.463923931929259e-07, "loss": 0.06347322463989258, "step": 6862 }, { "epoch": 0.9278556098220472, "grad_norm": 0.050435133278369904, "learning_rate": 4.4473657610498377e-07, "loss": 0.035222649574279785, "step": 6863 }, { "epoch": 0.9279908066178831, "grad_norm": 0.12263201177120209, "learning_rate": 4.430837895005058e-07, "loss": 0.07786369323730469, "step": 6864 }, { "epoch": 0.9281260034137191, "grad_norm": 0.07072308659553528, "learning_rate": 4.4143403372360836e-07, "loss": 0.04534858465194702, "step": 6865 }, { "epoch": 0.928261200209555, "grad_norm": 0.08886685967445374, "learning_rate": 4.3978730911778176e-07, "loss": 0.05535316467285156, "step": 6866 }, { "epoch": 0.928396397005391, "grad_norm": 0.06547877192497253, "learning_rate": 4.381436160258834e-07, "loss": 0.059301525354385376, "step": 6867 }, { "epoch": 0.928531593801227, "grad_norm": 0.055500734597444534, "learning_rate": 4.3650295479013615e-07, "loss": 0.04136532545089722, "step": 6868 }, { "epoch": 0.9286667905970628, "grad_norm": 0.22133472561836243, "learning_rate": 4.348653257521351e-07, "loss": 0.07832002639770508, "step": 6869 }, { "epoch": 0.9288019873928988, "grad_norm": 0.0687526986002922, "learning_rate": 4.332307292528442e-07, "loss": 0.057385802268981934, "step": 6870 }, { "epoch": 0.9289371841887347, "grad_norm": 0.1347644031047821, "learning_rate": 4.315991656325946e-07, "loss": 0.08509588241577148, "step": 6871 }, { "epoch": 0.9290723809845707, "grad_norm": 0.10149525851011276, "learning_rate": 4.299706352310895e-07, "loss": 0.08035612106323242, "step": 6872 }, { "epoch": 0.9292075777804066, "grad_norm": 0.11910875886678696, "learning_rate": 4.283451383873926e-07, "loss": 0.07304835319519043, "step": 6873 }, { "epoch": 0.9293427745762426, "grad_norm": 0.10757680982351303, "learning_rate": 4.26722675439945e-07, "loss": 0.0601048469543457, "step": 6874 }, { "epoch": 0.9294779713720784, "grad_norm": 0.130685493350029, "learning_rate": 4.251032467265481e-07, "loss": 0.08267974853515625, "step": 6875 }, { "epoch": 0.9296131681679144, "grad_norm": 0.07935380935668945, "learning_rate": 4.234868525843805e-07, "loss": 0.05322980880737305, "step": 6876 }, { "epoch": 0.9297483649637504, "grad_norm": 0.12427707016468048, "learning_rate": 4.218734933499796e-07, "loss": 0.06458759307861328, "step": 6877 }, { "epoch": 0.9298835617595863, "grad_norm": 0.08099223673343658, "learning_rate": 4.202631693592601e-07, "loss": 0.04376697540283203, "step": 6878 }, { "epoch": 0.9300187585554223, "grad_norm": 0.051493365317583084, "learning_rate": 4.186558809474955e-07, "loss": 0.055303096771240234, "step": 6879 }, { "epoch": 0.9301539553512582, "grad_norm": 0.0659814402461052, "learning_rate": 4.170516284493331e-07, "loss": 0.06276798248291016, "step": 6880 }, { "epoch": 0.9302891521470941, "grad_norm": 0.13775216042995453, "learning_rate": 4.1545041219879063e-07, "loss": 0.044480085372924805, "step": 6881 }, { "epoch": 0.93042434894293, "grad_norm": 0.055290453135967255, "learning_rate": 4.138522325292432e-07, "loss": 0.03249400854110718, "step": 6882 }, { "epoch": 0.930559545738766, "grad_norm": 0.039650361984968185, "learning_rate": 4.1225708977344457e-07, "loss": 0.027518153190612793, "step": 6883 }, { "epoch": 0.9306947425346019, "grad_norm": 0.12958823144435883, "learning_rate": 4.106649842635124e-07, "loss": 0.06265556812286377, "step": 6884 }, { "epoch": 0.9308299393304379, "grad_norm": 0.043656568974256516, "learning_rate": 4.090759163309282e-07, "loss": 0.043683528900146484, "step": 6885 }, { "epoch": 0.9309651361262739, "grad_norm": 0.07034581899642944, "learning_rate": 4.07489886306549e-07, "loss": 0.05775117874145508, "step": 6886 }, { "epoch": 0.9311003329221097, "grad_norm": 0.06452146917581558, "learning_rate": 4.059068945205907e-07, "loss": 0.034664273262023926, "step": 6887 }, { "epoch": 0.9312355297179457, "grad_norm": 0.05761135369539261, "learning_rate": 4.043269413026429e-07, "loss": 0.03112006187438965, "step": 6888 }, { "epoch": 0.9313707265137816, "grad_norm": 0.1013440415263176, "learning_rate": 4.027500269816592e-07, "loss": 0.05026751756668091, "step": 6889 }, { "epoch": 0.9315059233096176, "grad_norm": 0.07188749313354492, "learning_rate": 4.011761518859619e-07, "loss": 0.05354803800582886, "step": 6890 }, { "epoch": 0.9316411201054535, "grad_norm": 0.10590389370918274, "learning_rate": 3.996053163432406e-07, "loss": 0.05647069215774536, "step": 6891 }, { "epoch": 0.9317763169012895, "grad_norm": 0.060343142598867416, "learning_rate": 3.980375206805503e-07, "loss": 0.04913735389709473, "step": 6892 }, { "epoch": 0.9319115136971253, "grad_norm": 0.06492726504802704, "learning_rate": 3.9647276522431664e-07, "loss": 0.05971336364746094, "step": 6893 }, { "epoch": 0.9320467104929613, "grad_norm": 0.11738261580467224, "learning_rate": 3.949110503003289e-07, "loss": 0.05980730056762695, "step": 6894 }, { "epoch": 0.9321819072887972, "grad_norm": 0.06924591213464737, "learning_rate": 3.9335237623374377e-07, "loss": 0.0476231575012207, "step": 6895 }, { "epoch": 0.9323171040846332, "grad_norm": 0.08711355179548264, "learning_rate": 3.917967433490849e-07, "loss": 0.08286428451538086, "step": 6896 }, { "epoch": 0.9324523008804692, "grad_norm": 0.04757753759622574, "learning_rate": 3.902441519702449e-07, "loss": 0.050853729248046875, "step": 6897 }, { "epoch": 0.9325874976763051, "grad_norm": 0.059648267924785614, "learning_rate": 3.886946024204818e-07, "loss": 0.052712440490722656, "step": 6898 }, { "epoch": 0.932722694472141, "grad_norm": 0.07008446007966995, "learning_rate": 3.871480950224193e-07, "loss": 0.058917999267578125, "step": 6899 }, { "epoch": 0.9328578912679769, "grad_norm": 0.028207402676343918, "learning_rate": 3.856046300980498e-07, "loss": 0.032725900411605835, "step": 6900 }, { "epoch": 0.9329930880638129, "grad_norm": 0.11810152232646942, "learning_rate": 3.8406420796872953e-07, "loss": 0.0660848617553711, "step": 6901 }, { "epoch": 0.9331282848596488, "grad_norm": 0.07528826594352722, "learning_rate": 3.825268289551803e-07, "loss": 0.04598945379257202, "step": 6902 }, { "epoch": 0.9332634816554848, "grad_norm": 0.06875009089708328, "learning_rate": 3.8099249337749777e-07, "loss": 0.050786495208740234, "step": 6903 }, { "epoch": 0.9333986784513207, "grad_norm": 0.029056215658783913, "learning_rate": 3.7946120155513465e-07, "loss": 0.0377044677734375, "step": 6904 }, { "epoch": 0.9335338752471567, "grad_norm": 0.08003580570220947, "learning_rate": 3.7793295380691595e-07, "loss": 0.05276012420654297, "step": 6905 }, { "epoch": 0.9336690720429925, "grad_norm": 0.05763528496026993, "learning_rate": 3.7640775045103214e-07, "loss": 0.052684903144836426, "step": 6906 }, { "epoch": 0.9338042688388285, "grad_norm": 0.08067308366298676, "learning_rate": 3.7488559180503423e-07, "loss": 0.039466023445129395, "step": 6907 }, { "epoch": 0.9339394656346645, "grad_norm": 0.11801900714635849, "learning_rate": 3.7336647818584866e-07, "loss": 0.07549607753753662, "step": 6908 }, { "epoch": 0.9340746624305004, "grad_norm": 0.1064031645655632, "learning_rate": 3.718504099097625e-07, "loss": 0.057752132415771484, "step": 6909 }, { "epoch": 0.9342098592263364, "grad_norm": 0.071253702044487, "learning_rate": 3.703373872924265e-07, "loss": 0.04210996627807617, "step": 6910 }, { "epoch": 0.9343450560221723, "grad_norm": 0.05311444774270058, "learning_rate": 3.688274106488604e-07, "loss": 0.04536402225494385, "step": 6911 }, { "epoch": 0.9344802528180082, "grad_norm": 0.11050664633512497, "learning_rate": 3.67320480293451e-07, "loss": 0.07140803337097168, "step": 6912 }, { "epoch": 0.9346154496138441, "grad_norm": 0.12018724530935287, "learning_rate": 3.6581659653994736e-07, "loss": 0.08607006072998047, "step": 6913 }, { "epoch": 0.9347506464096801, "grad_norm": 0.050158578902482986, "learning_rate": 3.64315759701469e-07, "loss": 0.037581801414489746, "step": 6914 }, { "epoch": 0.934885843205516, "grad_norm": 0.04933745041489601, "learning_rate": 3.6281797009049765e-07, "loss": 0.03782397508621216, "step": 6915 }, { "epoch": 0.935021040001352, "grad_norm": 0.04649464040994644, "learning_rate": 3.613232280188772e-07, "loss": 0.034948378801345825, "step": 6916 }, { "epoch": 0.935156236797188, "grad_norm": 0.08013904094696045, "learning_rate": 3.5983153379782363e-07, "loss": 0.06444597244262695, "step": 6917 }, { "epoch": 0.9352914335930238, "grad_norm": 0.08652915805578232, "learning_rate": 3.5834288773791854e-07, "loss": 0.05016756057739258, "step": 6918 }, { "epoch": 0.9354266303888598, "grad_norm": 0.11209945380687714, "learning_rate": 3.568572901491007e-07, "loss": 0.06284534931182861, "step": 6919 }, { "epoch": 0.9355618271846957, "grad_norm": 0.08809870481491089, "learning_rate": 3.553747413406827e-07, "loss": 0.062105655670166016, "step": 6920 }, { "epoch": 0.9356970239805317, "grad_norm": 0.10317903012037277, "learning_rate": 3.538952416213376e-07, "loss": 0.0722661018371582, "step": 6921 }, { "epoch": 0.9358322207763676, "grad_norm": 0.09842576831579208, "learning_rate": 3.524187912991056e-07, "loss": 0.046496033668518066, "step": 6922 }, { "epoch": 0.9359674175722036, "grad_norm": 0.07721572369337082, "learning_rate": 3.5094539068139254e-07, "loss": 0.06457924842834473, "step": 6923 }, { "epoch": 0.9361026143680394, "grad_norm": 0.10088503360748291, "learning_rate": 3.494750400749663e-07, "loss": 0.06267070770263672, "step": 6924 }, { "epoch": 0.9362378111638754, "grad_norm": 0.09097261726856232, "learning_rate": 3.480077397859638e-07, "loss": 0.07631969451904297, "step": 6925 }, { "epoch": 0.9363730079597113, "grad_norm": 0.07021842896938324, "learning_rate": 3.4654349011988384e-07, "loss": 0.05778908729553223, "step": 6926 }, { "epoch": 0.9365082047555473, "grad_norm": 0.14088305830955505, "learning_rate": 3.4508229138159095e-07, "loss": 0.07998228073120117, "step": 6927 }, { "epoch": 0.9366434015513833, "grad_norm": 0.061945971101522446, "learning_rate": 3.4362414387531516e-07, "loss": 0.05021810531616211, "step": 6928 }, { "epoch": 0.9367785983472192, "grad_norm": 0.1050914004445076, "learning_rate": 3.4216904790464854e-07, "loss": 0.053131103515625, "step": 6929 }, { "epoch": 0.9369137951430551, "grad_norm": 0.05285362899303436, "learning_rate": 3.407170037725521e-07, "loss": 0.049791574478149414, "step": 6930 }, { "epoch": 0.937048991938891, "grad_norm": 0.07455403357744217, "learning_rate": 3.3926801178134737e-07, "loss": 0.058953285217285156, "step": 6931 }, { "epoch": 0.937184188734727, "grad_norm": 0.16067960858345032, "learning_rate": 3.3782207223272467e-07, "loss": 0.06723785400390625, "step": 6932 }, { "epoch": 0.9373193855305629, "grad_norm": 0.11488193273544312, "learning_rate": 3.363791854277348e-07, "loss": 0.07103681564331055, "step": 6933 }, { "epoch": 0.9374545823263989, "grad_norm": 0.10198970884084702, "learning_rate": 3.349393516667926e-07, "loss": 0.07474517822265625, "step": 6934 }, { "epoch": 0.9375897791222348, "grad_norm": 0.06418319046497345, "learning_rate": 3.335025712496814e-07, "loss": 0.05176424980163574, "step": 6935 }, { "epoch": 0.9377249759180707, "grad_norm": 0.07827525585889816, "learning_rate": 3.320688444755471e-07, "loss": 0.05374348163604736, "step": 6936 }, { "epoch": 0.9378601727139066, "grad_norm": 0.12284183502197266, "learning_rate": 3.306381716428991e-07, "loss": 0.08468794822692871, "step": 6937 }, { "epoch": 0.9379953695097426, "grad_norm": 0.09308009594678879, "learning_rate": 3.2921055304960925e-07, "loss": 0.05273151397705078, "step": 6938 }, { "epoch": 0.9381305663055786, "grad_norm": 0.11022285372018814, "learning_rate": 3.277859889929147e-07, "loss": 0.053481101989746094, "step": 6939 }, { "epoch": 0.9382657631014145, "grad_norm": 0.1451026350259781, "learning_rate": 3.263644797694215e-07, "loss": 0.08204197883605957, "step": 6940 }, { "epoch": 0.9384009598972505, "grad_norm": 0.057752303779125214, "learning_rate": 3.2494602567509303e-07, "loss": 0.031233549118041992, "step": 6941 }, { "epoch": 0.9385361566930863, "grad_norm": 0.07729972153902054, "learning_rate": 3.2353062700525794e-07, "loss": 0.060292959213256836, "step": 6942 }, { "epoch": 0.9386713534889223, "grad_norm": 0.0540657602250576, "learning_rate": 3.221182840546122e-07, "loss": 0.04839235544204712, "step": 6943 }, { "epoch": 0.9388065502847582, "grad_norm": 0.08703777194023132, "learning_rate": 3.207089971172089e-07, "loss": 0.06039083003997803, "step": 6944 }, { "epoch": 0.9389417470805942, "grad_norm": 0.0702759325504303, "learning_rate": 3.1930276648647504e-07, "loss": 0.05310368537902832, "step": 6945 }, { "epoch": 0.9390769438764301, "grad_norm": 0.05793435126543045, "learning_rate": 3.178995924551914e-07, "loss": 0.0605621337890625, "step": 6946 }, { "epoch": 0.9392121406722661, "grad_norm": 0.18370160460472107, "learning_rate": 3.164994753155059e-07, "loss": 0.051070213317871094, "step": 6947 }, { "epoch": 0.939347337468102, "grad_norm": 0.1674375832080841, "learning_rate": 3.1510241535893215e-07, "loss": 0.07889413833618164, "step": 6948 }, { "epoch": 0.9394825342639379, "grad_norm": 0.10024214535951614, "learning_rate": 3.1370841287634567e-07, "loss": 0.04334592819213867, "step": 6949 }, { "epoch": 0.9396177310597739, "grad_norm": 0.06585827469825745, "learning_rate": 3.1231746815798436e-07, "loss": 0.05802345275878906, "step": 6950 }, { "epoch": 0.9397529278556098, "grad_norm": 0.08680598437786102, "learning_rate": 3.1092958149344985e-07, "loss": 0.0465472936630249, "step": 6951 }, { "epoch": 0.9398881246514458, "grad_norm": 0.12279269844293594, "learning_rate": 3.095447531717077e-07, "loss": 0.07993555068969727, "step": 6952 }, { "epoch": 0.9400233214472817, "grad_norm": 0.11014531552791595, "learning_rate": 3.08162983481089e-07, "loss": 0.08346843719482422, "step": 6953 }, { "epoch": 0.9401585182431177, "grad_norm": 0.04653956741094589, "learning_rate": 3.067842727092801e-07, "loss": 0.043128013610839844, "step": 6954 }, { "epoch": 0.9402937150389535, "grad_norm": 0.07728352397680283, "learning_rate": 3.0540862114334323e-07, "loss": 0.0655207633972168, "step": 6955 }, { "epoch": 0.9404289118347895, "grad_norm": 0.07413399964570999, "learning_rate": 3.0403602906969086e-07, "loss": 0.07045590877532959, "step": 6956 }, { "epoch": 0.9405641086306255, "grad_norm": 0.07928022742271423, "learning_rate": 3.0266649677410605e-07, "loss": 0.06980705261230469, "step": 6957 }, { "epoch": 0.9406993054264614, "grad_norm": 0.09311087429523468, "learning_rate": 3.0130002454173243e-07, "loss": 0.06402349472045898, "step": 6958 }, { "epoch": 0.9408345022222974, "grad_norm": 0.10275434702634811, "learning_rate": 2.9993661265707407e-07, "loss": 0.07254600524902344, "step": 6959 }, { "epoch": 0.9409696990181333, "grad_norm": 0.04765182361006737, "learning_rate": 2.985762614040072e-07, "loss": 0.034528255462646484, "step": 6960 }, { "epoch": 0.9411048958139692, "grad_norm": 0.08414024859666824, "learning_rate": 2.972189710657586e-07, "loss": 0.054224252700805664, "step": 6961 }, { "epoch": 0.9412400926098051, "grad_norm": 0.05579359829425812, "learning_rate": 2.958647419249255e-07, "loss": 0.0525822639465332, "step": 6962 }, { "epoch": 0.9413752894056411, "grad_norm": 0.06046384572982788, "learning_rate": 2.9451357426346415e-07, "loss": 0.05457305908203125, "step": 6963 }, { "epoch": 0.941510486201477, "grad_norm": 0.04738999530673027, "learning_rate": 2.9316546836269776e-07, "loss": 0.030657470226287842, "step": 6964 }, { "epoch": 0.941645682997313, "grad_norm": 0.08934573084115982, "learning_rate": 2.9182042450330516e-07, "loss": 0.0614393949508667, "step": 6965 }, { "epoch": 0.941780879793149, "grad_norm": 0.06653844565153122, "learning_rate": 2.9047844296533397e-07, "loss": 0.049520254135131836, "step": 6966 }, { "epoch": 0.9419160765889848, "grad_norm": 0.06392962485551834, "learning_rate": 2.8913952402819246e-07, "loss": 0.06250190734863281, "step": 6967 }, { "epoch": 0.9420512733848208, "grad_norm": 0.06784596294164658, "learning_rate": 2.878036679706492e-07, "loss": 0.0459897518157959, "step": 6968 }, { "epoch": 0.9421864701806567, "grad_norm": 0.061562154442071915, "learning_rate": 2.8647087507083837e-07, "loss": 0.05291271209716797, "step": 6969 }, { "epoch": 0.9423216669764927, "grad_norm": 0.058297812938690186, "learning_rate": 2.8514114560625303e-07, "loss": 0.05229926109313965, "step": 6970 }, { "epoch": 0.9424568637723286, "grad_norm": 0.0412035770714283, "learning_rate": 2.8381447985375007e-07, "loss": 0.037505149841308594, "step": 6971 }, { "epoch": 0.9425920605681646, "grad_norm": 0.050766970962285995, "learning_rate": 2.8249087808954853e-07, "loss": 0.03810882568359375, "step": 6972 }, { "epoch": 0.9427272573640004, "grad_norm": 0.07791668176651001, "learning_rate": 2.811703405892296e-07, "loss": 0.07019102573394775, "step": 6973 }, { "epoch": 0.9428624541598364, "grad_norm": 0.054396599531173706, "learning_rate": 2.798528676277368e-07, "loss": 0.04919242858886719, "step": 6974 }, { "epoch": 0.9429976509556723, "grad_norm": 0.07416316121816635, "learning_rate": 2.785384594793738e-07, "loss": 0.05912590026855469, "step": 6975 }, { "epoch": 0.9431328477515083, "grad_norm": 0.08746141940355301, "learning_rate": 2.772271164178086e-07, "loss": 0.044594764709472656, "step": 6976 }, { "epoch": 0.9432680445473443, "grad_norm": 0.07813379168510437, "learning_rate": 2.759188387160677e-07, "loss": 0.05049169063568115, "step": 6977 }, { "epoch": 0.9434032413431802, "grad_norm": 0.04530896618962288, "learning_rate": 2.746136266465449e-07, "loss": 0.03863561153411865, "step": 6978 }, { "epoch": 0.9435384381390161, "grad_norm": 0.06993469595909119, "learning_rate": 2.7331148048098943e-07, "loss": 0.054784297943115234, "step": 6979 }, { "epoch": 0.943673634934852, "grad_norm": 0.06468288600444794, "learning_rate": 2.7201240049051613e-07, "loss": 0.05142354965209961, "step": 6980 }, { "epoch": 0.943808831730688, "grad_norm": 0.043773747980594635, "learning_rate": 2.707163869455986e-07, "loss": 0.03817009925842285, "step": 6981 }, { "epoch": 0.9439440285265239, "grad_norm": 0.11098359525203705, "learning_rate": 2.694234401160778e-07, "loss": 0.07822735607624054, "step": 6982 }, { "epoch": 0.9440792253223599, "grad_norm": 0.12570399045944214, "learning_rate": 2.6813356027114986e-07, "loss": 0.08304531872272491, "step": 6983 }, { "epoch": 0.9442144221181958, "grad_norm": 0.0534651055932045, "learning_rate": 2.6684674767937346e-07, "loss": 0.04551506042480469, "step": 6984 }, { "epoch": 0.9443496189140317, "grad_norm": 0.13213485479354858, "learning_rate": 2.655630026086708e-07, "loss": 0.09651803970336914, "step": 6985 }, { "epoch": 0.9444848157098676, "grad_norm": 0.06385689973831177, "learning_rate": 2.642823253263249e-07, "loss": 0.054630815982818604, "step": 6986 }, { "epoch": 0.9446200125057036, "grad_norm": 0.07354943454265594, "learning_rate": 2.630047160989807e-07, "loss": 0.0584794282913208, "step": 6987 }, { "epoch": 0.9447552093015396, "grad_norm": 0.04315639287233353, "learning_rate": 2.6173017519263875e-07, "loss": 0.04550975561141968, "step": 6988 }, { "epoch": 0.9448904060973755, "grad_norm": 0.0740494504570961, "learning_rate": 2.6045870287267014e-07, "loss": 0.053692519664764404, "step": 6989 }, { "epoch": 0.9450256028932115, "grad_norm": 0.05050640553236008, "learning_rate": 2.5919029940380147e-07, "loss": 0.03628945350646973, "step": 6990 }, { "epoch": 0.9451607996890473, "grad_norm": 0.09610525518655777, "learning_rate": 2.5792496505011807e-07, "loss": 0.08017885684967041, "step": 6991 }, { "epoch": 0.9452959964848833, "grad_norm": 0.14941200613975525, "learning_rate": 2.5666270007507266e-07, "loss": 0.07337033748626709, "step": 6992 }, { "epoch": 0.9454311932807192, "grad_norm": 0.057521216571331024, "learning_rate": 2.5540350474147324e-07, "loss": 0.04524034261703491, "step": 6993 }, { "epoch": 0.9455663900765552, "grad_norm": 0.05158543586730957, "learning_rate": 2.5414737931149346e-07, "loss": 0.03907042741775513, "step": 6994 }, { "epoch": 0.9457015868723911, "grad_norm": 0.06468634307384491, "learning_rate": 2.5289432404666246e-07, "loss": 0.05111527442932129, "step": 6995 }, { "epoch": 0.9458367836682271, "grad_norm": 0.07391882687807083, "learning_rate": 2.5164433920787487e-07, "loss": 0.07205140590667725, "step": 6996 }, { "epoch": 0.9459719804640631, "grad_norm": 0.03957660496234894, "learning_rate": 2.503974250553842e-07, "loss": 0.04009866714477539, "step": 6997 }, { "epoch": 0.9461071772598989, "grad_norm": 0.06391185522079468, "learning_rate": 2.491535818488011e-07, "loss": 0.04989814758300781, "step": 6998 }, { "epoch": 0.9462423740557349, "grad_norm": 0.16935226321220398, "learning_rate": 2.479128098471067e-07, "loss": 0.060219258069992065, "step": 6999 }, { "epoch": 0.9463775708515708, "grad_norm": 0.08015145361423492, "learning_rate": 2.466751093086328e-07, "loss": 0.05099630355834961, "step": 7000 }, { "epoch": 0.9465127676474068, "grad_norm": 0.07341624796390533, "learning_rate": 2.454404804910748e-07, "loss": 0.050408005714416504, "step": 7001 }, { "epoch": 0.9466479644432427, "grad_norm": 0.045486945658922195, "learning_rate": 2.442089236514888e-07, "loss": 0.03636839985847473, "step": 7002 }, { "epoch": 0.9467831612390787, "grad_norm": 0.06731517612934113, "learning_rate": 2.429804390462931e-07, "loss": 0.06496882438659668, "step": 7003 }, { "epoch": 0.9469183580349145, "grad_norm": 0.057919710874557495, "learning_rate": 2.4175502693126293e-07, "loss": 0.04881696403026581, "step": 7004 }, { "epoch": 0.9470535548307505, "grad_norm": 0.10030993819236755, "learning_rate": 2.4053268756153933e-07, "loss": 0.049046725034713745, "step": 7005 }, { "epoch": 0.9471887516265864, "grad_norm": 0.11372458189725876, "learning_rate": 2.393134211916154e-07, "loss": 0.09297668933868408, "step": 7006 }, { "epoch": 0.9473239484224224, "grad_norm": 0.13003067672252655, "learning_rate": 2.3809722807535128e-07, "loss": 0.05327713489532471, "step": 7007 }, { "epoch": 0.9474591452182584, "grad_norm": 0.13583219051361084, "learning_rate": 2.3688410846596287e-07, "loss": 0.06853389739990234, "step": 7008 }, { "epoch": 0.9475943420140943, "grad_norm": 0.08847765624523163, "learning_rate": 2.3567406261603143e-07, "loss": 0.05989217758178711, "step": 7009 }, { "epoch": 0.9477295388099302, "grad_norm": 0.05565788596868515, "learning_rate": 2.3446709077749206e-07, "loss": 0.0533231645822525, "step": 7010 }, { "epoch": 0.9478647356057661, "grad_norm": 0.07357075065374374, "learning_rate": 2.3326319320164546e-07, "loss": 0.08249294757843018, "step": 7011 }, { "epoch": 0.9479999324016021, "grad_norm": 0.11841505765914917, "learning_rate": 2.320623701391461e-07, "loss": 0.05325794219970703, "step": 7012 }, { "epoch": 0.948135129197438, "grad_norm": 0.04237016290426254, "learning_rate": 2.30864621840014e-07, "loss": 0.04903078079223633, "step": 7013 }, { "epoch": 0.948270325993274, "grad_norm": 0.04357681795954704, "learning_rate": 2.2966994855362633e-07, "loss": 0.043715476989746094, "step": 7014 }, { "epoch": 0.94840552278911, "grad_norm": 0.07724309712648392, "learning_rate": 2.2847835052872079e-07, "loss": 0.053927481174468994, "step": 7015 }, { "epoch": 0.9485407195849458, "grad_norm": 0.05171968787908554, "learning_rate": 2.2728982801339392e-07, "loss": 0.04001206159591675, "step": 7016 }, { "epoch": 0.9486759163807817, "grad_norm": 0.05697864294052124, "learning_rate": 2.261043812551028e-07, "loss": 0.04075813293457031, "step": 7017 }, { "epoch": 0.9488111131766177, "grad_norm": 0.08187760412693024, "learning_rate": 2.249220105006633e-07, "loss": 0.05548381805419922, "step": 7018 }, { "epoch": 0.9489463099724537, "grad_norm": 0.07538948953151703, "learning_rate": 2.2374271599625185e-07, "loss": 0.0672069787979126, "step": 7019 }, { "epoch": 0.9490815067682896, "grad_norm": 0.08061212301254272, "learning_rate": 2.2256649798740204e-07, "loss": 0.04513835906982422, "step": 7020 }, { "epoch": 0.9492167035641256, "grad_norm": 0.16609831154346466, "learning_rate": 2.2139335671901294e-07, "loss": 0.09791260957717896, "step": 7021 }, { "epoch": 0.9493519003599614, "grad_norm": 0.03898888826370239, "learning_rate": 2.2022329243533422e-07, "loss": 0.033918142318725586, "step": 7022 }, { "epoch": 0.9494870971557974, "grad_norm": 0.07824122160673141, "learning_rate": 2.19056305379981e-07, "loss": 0.049491167068481445, "step": 7023 }, { "epoch": 0.9496222939516333, "grad_norm": 0.09263777732849121, "learning_rate": 2.178923957959289e-07, "loss": 0.05721890926361084, "step": 7024 }, { "epoch": 0.9497574907474693, "grad_norm": 0.05878986418247223, "learning_rate": 2.1673156392550408e-07, "loss": 0.05939221382141113, "step": 7025 }, { "epoch": 0.9498926875433052, "grad_norm": 0.16350974142551422, "learning_rate": 2.155738100104049e-07, "loss": 0.08903127908706665, "step": 7026 }, { "epoch": 0.9500278843391412, "grad_norm": 0.05151699110865593, "learning_rate": 2.1441913429167682e-07, "loss": 0.02555510401725769, "step": 7027 }, { "epoch": 0.950163081134977, "grad_norm": 0.1628103256225586, "learning_rate": 2.1326753700973256e-07, "loss": 0.0980062484741211, "step": 7028 }, { "epoch": 0.950298277930813, "grad_norm": 0.15239852666854858, "learning_rate": 2.1211901840434034e-07, "loss": 0.0710453987121582, "step": 7029 }, { "epoch": 0.950433474726649, "grad_norm": 0.06128983572125435, "learning_rate": 2.1097357871462386e-07, "loss": 0.037694454193115234, "step": 7030 }, { "epoch": 0.9505686715224849, "grad_norm": 0.08963966369628906, "learning_rate": 2.098312181790757e-07, "loss": 0.06391239166259766, "step": 7031 }, { "epoch": 0.9507038683183209, "grad_norm": 0.0813160240650177, "learning_rate": 2.086919370355389e-07, "loss": 0.06886947154998779, "step": 7032 }, { "epoch": 0.9508390651141568, "grad_norm": 0.08002682775259018, "learning_rate": 2.075557355212171e-07, "loss": 0.06606996059417725, "step": 7033 }, { "epoch": 0.9509742619099927, "grad_norm": 0.1222519651055336, "learning_rate": 2.0642261387267268e-07, "loss": 0.06932210922241211, "step": 7034 }, { "epoch": 0.9511094587058286, "grad_norm": 0.03813016042113304, "learning_rate": 2.0529257232583033e-07, "loss": 0.03692770004272461, "step": 7035 }, { "epoch": 0.9512446555016646, "grad_norm": 0.05937309190630913, "learning_rate": 2.0416561111596844e-07, "loss": 0.044332027435302734, "step": 7036 }, { "epoch": 0.9513798522975006, "grad_norm": 0.07218378037214279, "learning_rate": 2.0304173047772933e-07, "loss": 0.05002550780773163, "step": 7037 }, { "epoch": 0.9515150490933365, "grad_norm": 0.06665070354938507, "learning_rate": 2.0192093064510753e-07, "loss": 0.04206490516662598, "step": 7038 }, { "epoch": 0.9516502458891725, "grad_norm": 0.038208600133657455, "learning_rate": 2.0080321185146134e-07, "loss": 0.027156829833984375, "step": 7039 }, { "epoch": 0.9517854426850084, "grad_norm": 0.07616526633501053, "learning_rate": 1.996885743295046e-07, "loss": 0.06332600116729736, "step": 7040 }, { "epoch": 0.9519206394808443, "grad_norm": 0.08870632946491241, "learning_rate": 1.985770183113117e-07, "loss": 0.060047268867492676, "step": 7041 }, { "epoch": 0.9520558362766802, "grad_norm": 0.05216154828667641, "learning_rate": 1.9746854402831583e-07, "loss": 0.0407564640045166, "step": 7042 }, { "epoch": 0.9521910330725162, "grad_norm": 0.09871078282594681, "learning_rate": 1.963631517113057e-07, "loss": 0.08051782846450806, "step": 7043 }, { "epoch": 0.9523262298683521, "grad_norm": 0.0778815969824791, "learning_rate": 1.952608415904289e-07, "loss": 0.053549885749816895, "step": 7044 }, { "epoch": 0.9524614266641881, "grad_norm": 0.05686617270112038, "learning_rate": 1.9416161389519348e-07, "loss": 0.04177820682525635, "step": 7045 }, { "epoch": 0.952596623460024, "grad_norm": 0.05548636242747307, "learning_rate": 1.9306546885446475e-07, "loss": 0.04991555213928223, "step": 7046 }, { "epoch": 0.9527318202558599, "grad_norm": 0.09127151221036911, "learning_rate": 1.919724066964651e-07, "loss": 0.04975247383117676, "step": 7047 }, { "epoch": 0.9528670170516959, "grad_norm": 0.0802159234881401, "learning_rate": 1.908824276487775e-07, "loss": 0.05361223220825195, "step": 7048 }, { "epoch": 0.9530022138475318, "grad_norm": 0.0906638652086258, "learning_rate": 1.8979553193833876e-07, "loss": 0.0714186429977417, "step": 7049 }, { "epoch": 0.9531374106433678, "grad_norm": 0.10248968005180359, "learning_rate": 1.8871171979144786e-07, "loss": 0.04005098342895508, "step": 7050 }, { "epoch": 0.9532726074392037, "grad_norm": 0.06315653771162033, "learning_rate": 1.8763099143376262e-07, "loss": 0.04148685932159424, "step": 7051 }, { "epoch": 0.9534078042350397, "grad_norm": 0.11445050686597824, "learning_rate": 1.8655334709029303e-07, "loss": 0.0642225444316864, "step": 7052 }, { "epoch": 0.9535430010308755, "grad_norm": 0.11468757688999176, "learning_rate": 1.8547878698541132e-07, "loss": 0.07507681846618652, "step": 7053 }, { "epoch": 0.9536781978267115, "grad_norm": 0.08256664872169495, "learning_rate": 1.8440731134284684e-07, "loss": 0.05866289138793945, "step": 7054 }, { "epoch": 0.9538133946225474, "grad_norm": 0.07681503891944885, "learning_rate": 1.833389203856861e-07, "loss": 0.06564569473266602, "step": 7055 }, { "epoch": 0.9539485914183834, "grad_norm": 0.04535171762108803, "learning_rate": 1.8227361433637625e-07, "loss": 0.04410219192504883, "step": 7056 }, { "epoch": 0.9540837882142194, "grad_norm": 0.07874813675880432, "learning_rate": 1.812113934167148e-07, "loss": 0.05748295783996582, "step": 7057 }, { "epoch": 0.9542189850100553, "grad_norm": 0.06298123300075531, "learning_rate": 1.8015225784786483e-07, "loss": 0.05262655019760132, "step": 7058 }, { "epoch": 0.9543541818058912, "grad_norm": 0.1091485545039177, "learning_rate": 1.7909620785034663e-07, "loss": 0.0746760368347168, "step": 7059 }, { "epoch": 0.9544893786017271, "grad_norm": 0.10086274147033691, "learning_rate": 1.7804324364402936e-07, "loss": 0.04511404037475586, "step": 7060 }, { "epoch": 0.9546245753975631, "grad_norm": 0.0677308589220047, "learning_rate": 1.769933654481526e-07, "loss": 0.05417579412460327, "step": 7061 }, { "epoch": 0.954759772193399, "grad_norm": 0.18758665025234222, "learning_rate": 1.7594657348129984e-07, "loss": 0.07589542865753174, "step": 7062 }, { "epoch": 0.954894968989235, "grad_norm": 0.07013189792633057, "learning_rate": 1.749028679614234e-07, "loss": 0.0611720085144043, "step": 7063 }, { "epoch": 0.9550301657850709, "grad_norm": 0.1402924507856369, "learning_rate": 1.7386224910582615e-07, "loss": 0.06402111053466797, "step": 7064 }, { "epoch": 0.9551653625809068, "grad_norm": 0.11010725051164627, "learning_rate": 1.728247171311731e-07, "loss": 0.10113859176635742, "step": 7065 }, { "epoch": 0.9553005593767427, "grad_norm": 0.06461682170629501, "learning_rate": 1.7179027225348142e-07, "loss": 0.056375741958618164, "step": 7066 }, { "epoch": 0.9554357561725787, "grad_norm": 0.11112015694379807, "learning_rate": 1.7075891468812722e-07, "loss": 0.08144569396972656, "step": 7067 }, { "epoch": 0.9555709529684147, "grad_norm": 0.08525556325912476, "learning_rate": 1.69730644649847e-07, "loss": 0.05287277698516846, "step": 7068 }, { "epoch": 0.9557061497642506, "grad_norm": 0.1605376899242401, "learning_rate": 1.687054623527312e-07, "loss": 0.06701302528381348, "step": 7069 }, { "epoch": 0.9558413465600866, "grad_norm": 0.11374951153993607, "learning_rate": 1.676833680102291e-07, "loss": 0.07854437828063965, "step": 7070 }, { "epoch": 0.9559765433559224, "grad_norm": 0.10113842040300369, "learning_rate": 1.6666436183514378e-07, "loss": 0.049597740173339844, "step": 7071 }, { "epoch": 0.9561117401517584, "grad_norm": 0.06007857620716095, "learning_rate": 1.6564844403964053e-07, "loss": 0.048806190490722656, "step": 7072 }, { "epoch": 0.9562469369475943, "grad_norm": 0.04048439860343933, "learning_rate": 1.6463561483523682e-07, "loss": 0.0379909873008728, "step": 7073 }, { "epoch": 0.9563821337434303, "grad_norm": 0.06618840247392654, "learning_rate": 1.6362587443281063e-07, "loss": 0.05173051357269287, "step": 7074 }, { "epoch": 0.9565173305392662, "grad_norm": 0.09910482168197632, "learning_rate": 1.626192230425938e-07, "loss": 0.05704766511917114, "step": 7075 }, { "epoch": 0.9566525273351022, "grad_norm": 0.050393156707286835, "learning_rate": 1.6161566087417868e-07, "loss": 0.04679848253726959, "step": 7076 }, { "epoch": 0.956787724130938, "grad_norm": 0.05030618980526924, "learning_rate": 1.6061518813650977e-07, "loss": 0.05124664306640625, "step": 7077 }, { "epoch": 0.956922920926774, "grad_norm": 0.04689155891537666, "learning_rate": 1.5961780503789215e-07, "loss": 0.04890674352645874, "step": 7078 }, { "epoch": 0.95705811772261, "grad_norm": 0.13164012134075165, "learning_rate": 1.5862351178598633e-07, "loss": 0.06755131483078003, "step": 7079 }, { "epoch": 0.9571933145184459, "grad_norm": 0.06548308581113815, "learning_rate": 1.5763230858781008e-07, "loss": 0.04771256446838379, "step": 7080 }, { "epoch": 0.9573285113142819, "grad_norm": 0.05163513869047165, "learning_rate": 1.5664419564973497e-07, "loss": 0.0486447811126709, "step": 7081 }, { "epoch": 0.9574637081101178, "grad_norm": 0.08825210481882095, "learning_rate": 1.5565917317749146e-07, "loss": 0.05896306037902832, "step": 7082 }, { "epoch": 0.9575989049059537, "grad_norm": 0.053509511053562164, "learning_rate": 1.5467724137617046e-07, "loss": 0.0438838005065918, "step": 7083 }, { "epoch": 0.9577341017017896, "grad_norm": 0.08421257138252258, "learning_rate": 1.5369840045021178e-07, "loss": 0.08101487159729004, "step": 7084 }, { "epoch": 0.9578692984976256, "grad_norm": 0.08983268588781357, "learning_rate": 1.5272265060341572e-07, "loss": 0.06287050247192383, "step": 7085 }, { "epoch": 0.9580044952934615, "grad_norm": 0.06738763302564621, "learning_rate": 1.517499920389398e-07, "loss": 0.06264352798461914, "step": 7086 }, { "epoch": 0.9581396920892975, "grad_norm": 0.0920373946428299, "learning_rate": 1.5078042495929534e-07, "loss": 0.05201005935668945, "step": 7087 }, { "epoch": 0.9582748888851335, "grad_norm": 0.07492781430482864, "learning_rate": 1.498139495663542e-07, "loss": 0.04308652877807617, "step": 7088 }, { "epoch": 0.9584100856809694, "grad_norm": 0.10955853015184402, "learning_rate": 1.4885056606133707e-07, "loss": 0.06994962692260742, "step": 7089 }, { "epoch": 0.9585452824768053, "grad_norm": 0.06498569995164871, "learning_rate": 1.478902746448302e-07, "loss": 0.0503995418548584, "step": 7090 }, { "epoch": 0.9586804792726412, "grad_norm": 0.09128726273775101, "learning_rate": 1.469330755167686e-07, "loss": 0.06974124908447266, "step": 7091 }, { "epoch": 0.9588156760684772, "grad_norm": 0.13307435810565948, "learning_rate": 1.4597896887644458e-07, "loss": 0.0832834243774414, "step": 7092 }, { "epoch": 0.9589508728643131, "grad_norm": 0.07883147150278091, "learning_rate": 1.4502795492251418e-07, "loss": 0.04476213455200195, "step": 7093 }, { "epoch": 0.9590860696601491, "grad_norm": 0.0650823637843132, "learning_rate": 1.4408003385297742e-07, "loss": 0.029877454042434692, "step": 7094 }, { "epoch": 0.959221266455985, "grad_norm": 0.11174848675727844, "learning_rate": 1.4313520586519968e-07, "loss": 0.0674666166305542, "step": 7095 }, { "epoch": 0.9593564632518209, "grad_norm": 0.057634565979242325, "learning_rate": 1.4219347115589863e-07, "loss": 0.04873943328857422, "step": 7096 }, { "epoch": 0.9594916600476568, "grad_norm": 0.09915723651647568, "learning_rate": 1.4125482992114914e-07, "loss": 0.07328510284423828, "step": 7097 }, { "epoch": 0.9596268568434928, "grad_norm": 0.09499924629926682, "learning_rate": 1.403192823563798e-07, "loss": 0.05839717388153076, "step": 7098 }, { "epoch": 0.9597620536393288, "grad_norm": 0.13537491858005524, "learning_rate": 1.3938682865637654e-07, "loss": 0.08152484893798828, "step": 7099 }, { "epoch": 0.9598972504351647, "grad_norm": 0.10001429915428162, "learning_rate": 1.38457469015284e-07, "loss": 0.06018853187561035, "step": 7100 }, { "epoch": 0.9600324472310007, "grad_norm": 0.06562701612710953, "learning_rate": 1.3753120362659576e-07, "loss": 0.0664825439453125, "step": 7101 }, { "epoch": 0.9601676440268365, "grad_norm": 0.03771786764264107, "learning_rate": 1.3660803268316925e-07, "loss": 0.03746604919433594, "step": 7102 }, { "epoch": 0.9603028408226725, "grad_norm": 0.044692859053611755, "learning_rate": 1.3568795637721065e-07, "loss": 0.0363612174987793, "step": 7103 }, { "epoch": 0.9604380376185084, "grad_norm": 0.04847042262554169, "learning_rate": 1.347709749002851e-07, "loss": 0.040572166442871094, "step": 7104 }, { "epoch": 0.9605732344143444, "grad_norm": 0.16118532419204712, "learning_rate": 1.338570884433149e-07, "loss": 0.07378178834915161, "step": 7105 }, { "epoch": 0.9607084312101803, "grad_norm": 0.048179082572460175, "learning_rate": 1.3294629719657448e-07, "loss": 0.05052661895751953, "step": 7106 }, { "epoch": 0.9608436280060163, "grad_norm": 0.06481017917394638, "learning_rate": 1.3203860134969548e-07, "loss": 0.034099578857421875, "step": 7107 }, { "epoch": 0.9609788248018521, "grad_norm": 0.09291870146989822, "learning_rate": 1.3113400109166508e-07, "loss": 0.04570817947387695, "step": 7108 }, { "epoch": 0.9611140215976881, "grad_norm": 0.07057782262563705, "learning_rate": 1.3023249661082592e-07, "loss": 0.06121492385864258, "step": 7109 }, { "epoch": 0.9612492183935241, "grad_norm": 0.07935141772031784, "learning_rate": 1.2933408809487623e-07, "loss": 0.0702776312828064, "step": 7110 }, { "epoch": 0.96138441518936, "grad_norm": 0.08906565606594086, "learning_rate": 1.2843877573086972e-07, "loss": 0.08137989044189453, "step": 7111 }, { "epoch": 0.961519611985196, "grad_norm": 0.1408509612083435, "learning_rate": 1.2754655970521556e-07, "loss": 0.08360910415649414, "step": 7112 }, { "epoch": 0.9616548087810319, "grad_norm": 0.12053748965263367, "learning_rate": 1.2665744020367686e-07, "loss": 0.06547069549560547, "step": 7113 }, { "epoch": 0.9617900055768678, "grad_norm": 0.0661163479089737, "learning_rate": 1.2577141741137388e-07, "loss": 0.06647157669067383, "step": 7114 }, { "epoch": 0.9619252023727037, "grad_norm": 0.11322446167469025, "learning_rate": 1.248884915127807e-07, "loss": 0.06463050842285156, "step": 7115 }, { "epoch": 0.9620603991685397, "grad_norm": 0.08344893902540207, "learning_rate": 1.2400866269172694e-07, "loss": 0.055196285247802734, "step": 7116 }, { "epoch": 0.9621955959643756, "grad_norm": 0.165816068649292, "learning_rate": 1.2313193113139777e-07, "loss": 0.06582069396972656, "step": 7117 }, { "epoch": 0.9623307927602116, "grad_norm": 0.047542229294776917, "learning_rate": 1.2225829701433545e-07, "loss": 0.038216590881347656, "step": 7118 }, { "epoch": 0.9624659895560476, "grad_norm": 0.14484909176826477, "learning_rate": 1.2138776052243116e-07, "loss": 0.055254220962524414, "step": 7119 }, { "epoch": 0.9626011863518834, "grad_norm": 0.0934133380651474, "learning_rate": 1.2052032183693996e-07, "loss": 0.06642961502075195, "step": 7120 }, { "epoch": 0.9627363831477194, "grad_norm": 0.10766567289829254, "learning_rate": 1.196559811384623e-07, "loss": 0.06594440340995789, "step": 7121 }, { "epoch": 0.9628715799435553, "grad_norm": 0.0728660449385643, "learning_rate": 1.1879473860696266e-07, "loss": 0.04430907964706421, "step": 7122 }, { "epoch": 0.9630067767393913, "grad_norm": 0.10184944421052933, "learning_rate": 1.179365944217542e-07, "loss": 0.053729116916656494, "step": 7123 }, { "epoch": 0.9631419735352272, "grad_norm": 0.13168060779571533, "learning_rate": 1.1708154876150735e-07, "loss": 0.08052492141723633, "step": 7124 }, { "epoch": 0.9632771703310632, "grad_norm": 0.09283110499382019, "learning_rate": 1.1622960180424801e-07, "loss": 0.054796457290649414, "step": 7125 }, { "epoch": 0.963412367126899, "grad_norm": 0.09164842963218689, "learning_rate": 1.1538075372735435e-07, "loss": 0.051995038986206055, "step": 7126 }, { "epoch": 0.963547563922735, "grad_norm": 0.11548691987991333, "learning_rate": 1.1453500470756328e-07, "loss": 0.06072866916656494, "step": 7127 }, { "epoch": 0.963682760718571, "grad_norm": 0.171317458152771, "learning_rate": 1.1369235492096397e-07, "loss": 0.08099818229675293, "step": 7128 }, { "epoch": 0.9638179575144069, "grad_norm": 0.0761416107416153, "learning_rate": 1.1285280454299774e-07, "loss": 0.05479097366333008, "step": 7129 }, { "epoch": 0.9639531543102429, "grad_norm": 0.05584999918937683, "learning_rate": 1.1201635374846808e-07, "loss": 0.05953693389892578, "step": 7130 }, { "epoch": 0.9640883511060788, "grad_norm": 0.06278429925441742, "learning_rate": 1.1118300271152404e-07, "loss": 0.04982280731201172, "step": 7131 }, { "epoch": 0.9642235479019148, "grad_norm": 0.12395094335079193, "learning_rate": 1.1035275160567682e-07, "loss": 0.06191062927246094, "step": 7132 }, { "epoch": 0.9643587446977506, "grad_norm": 0.07014553248882294, "learning_rate": 1.0952560060378813e-07, "loss": 0.05559396743774414, "step": 7133 }, { "epoch": 0.9644939414935866, "grad_norm": 0.15083228051662445, "learning_rate": 1.0870154987807523e-07, "loss": 0.04974627494812012, "step": 7134 }, { "epoch": 0.9646291382894225, "grad_norm": 0.09887823462486267, "learning_rate": 1.0788059960010921e-07, "loss": 0.07526826858520508, "step": 7135 }, { "epoch": 0.9647643350852585, "grad_norm": 0.08582594245672226, "learning_rate": 1.0706274994081499e-07, "loss": 0.05914449691772461, "step": 7136 }, { "epoch": 0.9648995318810945, "grad_norm": 0.103627510368824, "learning_rate": 1.0624800107047805e-07, "loss": 0.06508827209472656, "step": 7137 }, { "epoch": 0.9650347286769304, "grad_norm": 0.08308898657560349, "learning_rate": 1.0543635315872934e-07, "loss": 0.05675673484802246, "step": 7138 }, { "epoch": 0.9651699254727663, "grad_norm": 0.12995609641075134, "learning_rate": 1.0462780637455871e-07, "loss": 0.04163527488708496, "step": 7139 }, { "epoch": 0.9653051222686022, "grad_norm": 0.04397675767540932, "learning_rate": 1.0382236088631148e-07, "loss": 0.041150569915771484, "step": 7140 }, { "epoch": 0.9654403190644382, "grad_norm": 0.05513301119208336, "learning_rate": 1.0302001686168349e-07, "loss": 0.05726146697998047, "step": 7141 }, { "epoch": 0.9655755158602741, "grad_norm": 0.06386985629796982, "learning_rate": 1.0222077446772949e-07, "loss": 0.060245513916015625, "step": 7142 }, { "epoch": 0.9657107126561101, "grad_norm": 0.11923275887966156, "learning_rate": 1.0142463387085465e-07, "loss": 0.0731656551361084, "step": 7143 }, { "epoch": 0.965845909451946, "grad_norm": 0.09917903691530228, "learning_rate": 1.0063159523682142e-07, "loss": 0.0786084234714508, "step": 7144 }, { "epoch": 0.9659811062477819, "grad_norm": 0.13281050324440002, "learning_rate": 9.984165873074102e-08, "loss": 0.0727696418762207, "step": 7145 }, { "epoch": 0.9661163030436178, "grad_norm": 0.1297747641801834, "learning_rate": 9.905482451708526e-08, "loss": 0.07542753219604492, "step": 7146 }, { "epoch": 0.9662514998394538, "grad_norm": 0.07161790132522583, "learning_rate": 9.827109275967638e-08, "loss": 0.05749034881591797, "step": 7147 }, { "epoch": 0.9663866966352898, "grad_norm": 0.09546342492103577, "learning_rate": 9.749046362169223e-08, "loss": 0.06731534004211426, "step": 7148 }, { "epoch": 0.9665218934311257, "grad_norm": 0.055850736796855927, "learning_rate": 9.671293726566443e-08, "loss": 0.045067548751831055, "step": 7149 }, { "epoch": 0.9666570902269617, "grad_norm": 0.11278649419546127, "learning_rate": 9.593851385347518e-08, "loss": 0.07315880060195923, "step": 7150 }, { "epoch": 0.9667922870227975, "grad_norm": 0.05858650803565979, "learning_rate": 9.516719354636716e-08, "loss": 0.032311201095581055, "step": 7151 }, { "epoch": 0.9669274838186335, "grad_norm": 0.05344470962882042, "learning_rate": 9.439897650493024e-08, "loss": 0.04370051622390747, "step": 7152 }, { "epoch": 0.9670626806144694, "grad_norm": 0.053297895938158035, "learning_rate": 9.363386288911313e-08, "loss": 0.0451812744140625, "step": 7153 }, { "epoch": 0.9671978774103054, "grad_norm": 0.08843020349740982, "learning_rate": 9.287185285821675e-08, "loss": 0.07363772392272949, "step": 7154 }, { "epoch": 0.9673330742061413, "grad_norm": 0.10848035663366318, "learning_rate": 9.211294657089587e-08, "loss": 0.0557703971862793, "step": 7155 }, { "epoch": 0.9674682710019773, "grad_norm": 0.08803831040859222, "learning_rate": 9.135714418515573e-08, "loss": 0.04780101776123047, "step": 7156 }, { "epoch": 0.9676034677978131, "grad_norm": 0.08944506198167801, "learning_rate": 9.060444585836381e-08, "loss": 0.06505727767944336, "step": 7157 }, { "epoch": 0.9677386645936491, "grad_norm": 0.10320509970188141, "learning_rate": 8.985485174722974e-08, "loss": 0.05461621284484863, "step": 7158 }, { "epoch": 0.9678738613894851, "grad_norm": 0.08219880610704422, "learning_rate": 8.910836200782868e-08, "loss": 0.060118675231933594, "step": 7159 }, { "epoch": 0.968009058185321, "grad_norm": 0.09920164197683334, "learning_rate": 8.836497679557964e-08, "loss": 0.06039631366729736, "step": 7160 }, { "epoch": 0.968144254981157, "grad_norm": 0.11348406970500946, "learning_rate": 8.762469626526048e-08, "loss": 0.07171249389648438, "step": 7161 }, { "epoch": 0.9682794517769929, "grad_norm": 0.10537512600421906, "learning_rate": 8.688752057100457e-08, "loss": 0.09157943725585938, "step": 7162 }, { "epoch": 0.9684146485728288, "grad_norm": 0.05739142373204231, "learning_rate": 8.615344986629082e-08, "loss": 0.043684959411621094, "step": 7163 }, { "epoch": 0.9685498453686647, "grad_norm": 0.12338952720165253, "learning_rate": 8.542248430396027e-08, "loss": 0.056375980377197266, "step": 7164 }, { "epoch": 0.9686850421645007, "grad_norm": 0.07579125463962555, "learning_rate": 8.469462403620287e-08, "loss": 0.041066765785217285, "step": 7165 }, { "epoch": 0.9688202389603366, "grad_norm": 0.13266021013259888, "learning_rate": 8.39698692145624e-08, "loss": 0.04218494892120361, "step": 7166 }, { "epoch": 0.9689554357561726, "grad_norm": 0.04894666746258736, "learning_rate": 8.324821998993648e-08, "loss": 0.03298592567443848, "step": 7167 }, { "epoch": 0.9690906325520086, "grad_norm": 0.059127770364284515, "learning_rate": 8.252967651257826e-08, "loss": 0.044711530208587646, "step": 7168 }, { "epoch": 0.9692258293478444, "grad_norm": 0.07320144027471542, "learning_rate": 8.181423893208973e-08, "loss": 0.06138050556182861, "step": 7169 }, { "epoch": 0.9693610261436804, "grad_norm": 0.08178849518299103, "learning_rate": 8.110190739743172e-08, "loss": 0.04873943328857422, "step": 7170 }, { "epoch": 0.9694962229395163, "grad_norm": 0.06717972457408905, "learning_rate": 8.03926820569123e-08, "loss": 0.04816484451293945, "step": 7171 }, { "epoch": 0.9696314197353523, "grad_norm": 0.15655025839805603, "learning_rate": 7.968656305819833e-08, "loss": 0.05584096908569336, "step": 7172 }, { "epoch": 0.9697666165311882, "grad_norm": 0.06611785292625427, "learning_rate": 7.898355054830719e-08, "loss": 0.05482077598571777, "step": 7173 }, { "epoch": 0.9699018133270242, "grad_norm": 0.10130004584789276, "learning_rate": 7.828364467360849e-08, "loss": 0.06627640128135681, "step": 7174 }, { "epoch": 0.97003701012286, "grad_norm": 0.07988695800304413, "learning_rate": 7.758684557982731e-08, "loss": 0.056477874517440796, "step": 7175 }, { "epoch": 0.970172206918696, "grad_norm": 0.07316162437200546, "learning_rate": 7.689315341204262e-08, "loss": 0.06038975715637207, "step": 7176 }, { "epoch": 0.970307403714532, "grad_norm": 0.02777114138007164, "learning_rate": 7.62025683146822e-08, "loss": 0.02889883518218994, "step": 7177 }, { "epoch": 0.9704426005103679, "grad_norm": 0.05920135974884033, "learning_rate": 7.551509043152937e-08, "loss": 0.0374600887298584, "step": 7178 }, { "epoch": 0.9705777973062039, "grad_norm": 0.08617579936981201, "learning_rate": 7.483071990572132e-08, "loss": 0.05197763442993164, "step": 7179 }, { "epoch": 0.9707129941020398, "grad_norm": 0.07711437344551086, "learning_rate": 7.414945687975072e-08, "loss": 0.06940889358520508, "step": 7180 }, { "epoch": 0.9708481908978758, "grad_norm": 0.15267902612686157, "learning_rate": 7.347130149545578e-08, "loss": 0.09007930755615234, "step": 7181 }, { "epoch": 0.9709833876937116, "grad_norm": 0.11504143476486206, "learning_rate": 7.279625389403355e-08, "loss": 0.07199335098266602, "step": 7182 }, { "epoch": 0.9711185844895476, "grad_norm": 0.09618354588747025, "learning_rate": 7.212431421603327e-08, "loss": 0.0663142204284668, "step": 7183 }, { "epoch": 0.9712537812853835, "grad_norm": 0.06710486859083176, "learning_rate": 7.145548260135638e-08, "loss": 0.056787967681884766, "step": 7184 }, { "epoch": 0.9713889780812195, "grad_norm": 0.10158078372478485, "learning_rate": 7.078975918925645e-08, "loss": 0.0723709836602211, "step": 7185 }, { "epoch": 0.9715241748770554, "grad_norm": 0.05166007578372955, "learning_rate": 7.012714411834098e-08, "loss": 0.04442107677459717, "step": 7186 }, { "epoch": 0.9716593716728914, "grad_norm": 0.04055127128958702, "learning_rate": 6.946763752656959e-08, "loss": 0.04286599159240723, "step": 7187 }, { "epoch": 0.9717945684687272, "grad_norm": 0.1107621118426323, "learning_rate": 6.881123955125579e-08, "loss": 0.08385276794433594, "step": 7188 }, { "epoch": 0.9719297652645632, "grad_norm": 0.06057797744870186, "learning_rate": 6.815795032906524e-08, "loss": 0.05335426330566406, "step": 7189 }, { "epoch": 0.9720649620603992, "grad_norm": 0.07267526537179947, "learning_rate": 6.750776999601415e-08, "loss": 0.05660605430603027, "step": 7190 }, { "epoch": 0.9722001588562351, "grad_norm": 0.1337149441242218, "learning_rate": 6.68606986874759e-08, "loss": 0.05895906686782837, "step": 7191 }, { "epoch": 0.9723353556520711, "grad_norm": 0.08872835338115692, "learning_rate": 6.62167365381744e-08, "loss": 0.05801975727081299, "step": 7192 }, { "epoch": 0.972470552447907, "grad_norm": 0.0661938264966011, "learning_rate": 6.557588368218237e-08, "loss": 0.05499553680419922, "step": 7193 }, { "epoch": 0.9726057492437429, "grad_norm": 0.12666253745555878, "learning_rate": 6.493814025293476e-08, "loss": 0.06466269493103027, "step": 7194 }, { "epoch": 0.9727409460395788, "grad_norm": 0.07209885120391846, "learning_rate": 6.430350638320704e-08, "loss": 0.05181649327278137, "step": 7195 }, { "epoch": 0.9728761428354148, "grad_norm": 0.08253423869609833, "learning_rate": 6.367198220513848e-08, "loss": 0.05649292469024658, "step": 7196 }, { "epoch": 0.9730113396312507, "grad_norm": 0.07808000594377518, "learning_rate": 6.304356785021226e-08, "loss": 0.07116007804870605, "step": 7197 }, { "epoch": 0.9731465364270867, "grad_norm": 0.05046825483441353, "learning_rate": 6.241826344926704e-08, "loss": 0.0363239049911499, "step": 7198 }, { "epoch": 0.9732817332229227, "grad_norm": 0.08713020384311676, "learning_rate": 6.17960691324987e-08, "loss": 0.054728031158447266, "step": 7199 }, { "epoch": 0.9734169300187585, "grad_norm": 0.06168987601995468, "learning_rate": 6.117698502944857e-08, "loss": 0.03146451711654663, "step": 7200 }, { "epoch": 0.9735521268145945, "grad_norm": 0.13072951138019562, "learning_rate": 6.056101126901358e-08, "loss": 0.07768011093139648, "step": 7201 }, { "epoch": 0.9736873236104304, "grad_norm": 0.056231435388326645, "learning_rate": 5.994814797944281e-08, "loss": 0.055208683013916016, "step": 7202 }, { "epoch": 0.9738225204062664, "grad_norm": 0.15364541113376617, "learning_rate": 5.933839528833751e-08, "loss": 0.06423664093017578, "step": 7203 }, { "epoch": 0.9739577172021023, "grad_norm": 0.09420917928218842, "learning_rate": 5.873175332265279e-08, "loss": 0.0685586929321289, "step": 7204 }, { "epoch": 0.9740929139979383, "grad_norm": 0.1318870633840561, "learning_rate": 5.812822220869096e-08, "loss": 0.07795560359954834, "step": 7205 }, { "epoch": 0.9742281107937741, "grad_norm": 0.1434108316898346, "learning_rate": 5.752780207211483e-08, "loss": 0.08306550979614258, "step": 7206 }, { "epoch": 0.9743633075896101, "grad_norm": 0.07491429895162582, "learning_rate": 5.693049303793274e-08, "loss": 0.04940158128738403, "step": 7207 }, { "epoch": 0.974498504385446, "grad_norm": 0.048310086131095886, "learning_rate": 5.6336295230508536e-08, "loss": 0.036936283111572266, "step": 7208 }, { "epoch": 0.974633701181282, "grad_norm": 0.12229932099580765, "learning_rate": 5.5745208773558266e-08, "loss": 0.045446574687957764, "step": 7209 }, { "epoch": 0.974768897977118, "grad_norm": 0.13875699043273926, "learning_rate": 5.515723379014681e-08, "loss": 0.05692625045776367, "step": 7210 }, { "epoch": 0.9749040947729539, "grad_norm": 0.08782761543989182, "learning_rate": 5.4572370402694583e-08, "loss": 0.07181024551391602, "step": 7211 }, { "epoch": 0.9750392915687898, "grad_norm": 0.07965756952762604, "learning_rate": 5.399061873297417e-08, "loss": 0.049305230379104614, "step": 7212 }, { "epoch": 0.9751744883646257, "grad_norm": 0.05086463689804077, "learning_rate": 5.341197890210869e-08, "loss": 0.03241884708404541, "step": 7213 }, { "epoch": 0.9753096851604617, "grad_norm": 0.06518615037202835, "learning_rate": 5.283645103057344e-08, "loss": 0.04001426696777344, "step": 7214 }, { "epoch": 0.9754448819562976, "grad_norm": 0.08768096566200256, "learning_rate": 5.226403523819756e-08, "loss": 0.07132196426391602, "step": 7215 }, { "epoch": 0.9755800787521336, "grad_norm": 0.13590385019779205, "learning_rate": 5.169473164416072e-08, "loss": 0.0567096471786499, "step": 7216 }, { "epoch": 0.9757152755479696, "grad_norm": 0.04079766944050789, "learning_rate": 5.112854036699477e-08, "loss": 0.03612542152404785, "step": 7217 }, { "epoch": 0.9758504723438054, "grad_norm": 0.1411398947238922, "learning_rate": 5.0565461524583745e-08, "loss": 0.09985828399658203, "step": 7218 }, { "epoch": 0.9759856691396414, "grad_norm": 0.06892736256122589, "learning_rate": 5.0005495234163865e-08, "loss": 0.04550325870513916, "step": 7219 }, { "epoch": 0.9761208659354773, "grad_norm": 0.058781880885362625, "learning_rate": 4.9448641612321874e-08, "loss": 0.044287681579589844, "step": 7220 }, { "epoch": 0.9762560627313133, "grad_norm": 0.14731939136981964, "learning_rate": 4.889490077500003e-08, "loss": 0.058156371116638184, "step": 7221 }, { "epoch": 0.9763912595271492, "grad_norm": 0.12105114758014679, "learning_rate": 4.8344272837489434e-08, "loss": 0.08028936386108398, "step": 7222 }, { "epoch": 0.9765264563229852, "grad_norm": 0.09475019574165344, "learning_rate": 4.779675791443172e-08, "loss": 0.07539916038513184, "step": 7223 }, { "epoch": 0.9766616531188211, "grad_norm": 0.06494379788637161, "learning_rate": 4.72523561198257e-08, "loss": 0.05502772331237793, "step": 7224 }, { "epoch": 0.976796849914657, "grad_norm": 0.051857080310583115, "learning_rate": 4.6711067567014044e-08, "loss": 0.0483931303024292, "step": 7225 }, { "epoch": 0.9769320467104929, "grad_norm": 0.04452521353960037, "learning_rate": 4.6172892368701595e-08, "loss": 0.03325977921485901, "step": 7226 }, { "epoch": 0.9770672435063289, "grad_norm": 0.06195031851530075, "learning_rate": 4.5637830636935385e-08, "loss": 0.055794715881347656, "step": 7227 }, { "epoch": 0.9772024403021649, "grad_norm": 0.05458105355501175, "learning_rate": 4.5105882483119643e-08, "loss": 0.050904035568237305, "step": 7228 }, { "epoch": 0.9773376370980008, "grad_norm": 0.07816591113805771, "learning_rate": 4.4577048018007436e-08, "loss": 0.07050323486328125, "step": 7229 }, { "epoch": 0.9774728338938368, "grad_norm": 0.05761956423521042, "learning_rate": 4.405132735170569e-08, "loss": 0.05326282978057861, "step": 7230 }, { "epoch": 0.9776080306896726, "grad_norm": 0.0911867767572403, "learning_rate": 4.3528720593675184e-08, "loss": 0.0633094310760498, "step": 7231 }, { "epoch": 0.9777432274855086, "grad_norm": 0.13230380415916443, "learning_rate": 4.300922785271888e-08, "loss": 0.07651877403259277, "step": 7232 }, { "epoch": 0.9778784242813445, "grad_norm": 0.14072942733764648, "learning_rate": 4.249284923700358e-08, "loss": 0.048138707876205444, "step": 7233 }, { "epoch": 0.9780136210771805, "grad_norm": 0.11088144779205322, "learning_rate": 4.197958485404163e-08, "loss": 0.0700385570526123, "step": 7234 }, { "epoch": 0.9781488178730164, "grad_norm": 0.06729786098003387, "learning_rate": 4.1469434810694206e-08, "loss": 0.054015517234802246, "step": 7235 }, { "epoch": 0.9782840146688524, "grad_norm": 0.05143892765045166, "learning_rate": 4.096239921317968e-08, "loss": 0.04444146156311035, "step": 7236 }, { "epoch": 0.9784192114646882, "grad_norm": 0.16051821410655975, "learning_rate": 4.045847816706361e-08, "loss": 0.06583833694458008, "step": 7237 }, { "epoch": 0.9785544082605242, "grad_norm": 0.0900055542588234, "learning_rate": 3.9957671777268724e-08, "loss": 0.04785585403442383, "step": 7238 }, { "epoch": 0.9786896050563602, "grad_norm": 0.08714839816093445, "learning_rate": 3.945998014806163e-08, "loss": 0.04713749885559082, "step": 7239 }, { "epoch": 0.9788248018521961, "grad_norm": 0.06403005123138428, "learning_rate": 3.896540338306609e-08, "loss": 0.0555572509765625, "step": 7240 }, { "epoch": 0.9789599986480321, "grad_norm": 0.04254044219851494, "learning_rate": 3.847394158525641e-08, "loss": 0.02935624122619629, "step": 7241 }, { "epoch": 0.979095195443868, "grad_norm": 0.09209254384040833, "learning_rate": 3.798559485695574e-08, "loss": 0.07053327560424805, "step": 7242 }, { "epoch": 0.9792303922397039, "grad_norm": 0.10998386144638062, "learning_rate": 3.7500363299842746e-08, "loss": 0.07876110076904297, "step": 7243 }, { "epoch": 0.9793655890355398, "grad_norm": 0.04358939826488495, "learning_rate": 3.701824701494327e-08, "loss": 0.0397646427154541, "step": 7244 }, { "epoch": 0.9795007858313758, "grad_norm": 0.08134984970092773, "learning_rate": 3.653924610263703e-08, "loss": 0.051084041595458984, "step": 7245 }, { "epoch": 0.9796359826272117, "grad_norm": 0.16544319689273834, "learning_rate": 3.6063360662654255e-08, "loss": 0.06447172164916992, "step": 7246 }, { "epoch": 0.9797711794230477, "grad_norm": 0.060077276080846786, "learning_rate": 3.559059079407734e-08, "loss": 0.05467081069946289, "step": 7247 }, { "epoch": 0.9799063762188837, "grad_norm": 0.06446938961744308, "learning_rate": 3.512093659533922e-08, "loss": 0.05666780471801758, "step": 7248 }, { "epoch": 0.9800415730147195, "grad_norm": 0.06144725903868675, "learning_rate": 3.4654398164225e-08, "loss": 0.04402303695678711, "step": 7249 }, { "epoch": 0.9801767698105555, "grad_norm": 0.06942480802536011, "learning_rate": 3.4190975597870325e-08, "loss": 0.06115114688873291, "step": 7250 }, { "epoch": 0.9803119666063914, "grad_norm": 0.10614340752363205, "learning_rate": 3.373066899276134e-08, "loss": 0.0576479434967041, "step": 7251 }, { "epoch": 0.9804471634022274, "grad_norm": 0.10466573387384415, "learning_rate": 3.3273478444736386e-08, "loss": 0.04949367046356201, "step": 7252 }, { "epoch": 0.9805823601980633, "grad_norm": 0.08140348643064499, "learning_rate": 3.281940404898764e-08, "loss": 0.065673828125, "step": 7253 }, { "epoch": 0.9807175569938993, "grad_norm": 0.12110307067632675, "learning_rate": 3.236844590005117e-08, "loss": 0.06766033172607422, "step": 7254 }, { "epoch": 0.9808527537897351, "grad_norm": 0.06678981333971024, "learning_rate": 3.192060409182351e-08, "loss": 0.050559282302856445, "step": 7255 }, { "epoch": 0.9809879505855711, "grad_norm": 0.09222646057605743, "learning_rate": 3.147587871754509e-08, "loss": 0.07135415077209473, "step": 7256 }, { "epoch": 0.981123147381407, "grad_norm": 0.0893658846616745, "learning_rate": 3.1034269869810174e-08, "loss": 0.034998536109924316, "step": 7257 }, { "epoch": 0.981258344177243, "grad_norm": 0.13266150653362274, "learning_rate": 3.05957776405652e-08, "loss": 0.046663522720336914, "step": 7258 }, { "epoch": 0.981393540973079, "grad_norm": 0.07934768497943878, "learning_rate": 3.016040212110549e-08, "loss": 0.0372769832611084, "step": 7259 }, { "epoch": 0.9815287377689149, "grad_norm": 0.07053276896476746, "learning_rate": 2.9728143402078522e-08, "loss": 0.052707672119140625, "step": 7260 }, { "epoch": 0.9816639345647508, "grad_norm": 0.06684913486242294, "learning_rate": 2.9299001573483975e-08, "loss": 0.0597834587097168, "step": 7261 }, { "epoch": 0.9817991313605867, "grad_norm": 0.06725136935710907, "learning_rate": 2.8872976724670375e-08, "loss": 0.050567626953125, "step": 7262 }, { "epoch": 0.9819343281564227, "grad_norm": 0.06243680417537689, "learning_rate": 2.8450068944338436e-08, "loss": 0.05402660369873047, "step": 7263 }, { "epoch": 0.9820695249522586, "grad_norm": 0.06280216574668884, "learning_rate": 2.803027832054106e-08, "loss": 0.04726457595825195, "step": 7264 }, { "epoch": 0.9822047217480946, "grad_norm": 0.12313113361597061, "learning_rate": 2.7613604940679995e-08, "loss": 0.06346559524536133, "step": 7265 }, { "epoch": 0.9823399185439305, "grad_norm": 0.2555335462093353, "learning_rate": 2.7200048891509176e-08, "loss": 0.10686159133911133, "step": 7266 }, { "epoch": 0.9824751153397665, "grad_norm": 0.05700648948550224, "learning_rate": 2.67896102591314e-08, "loss": 0.052342891693115234, "step": 7267 }, { "epoch": 0.9826103121356023, "grad_norm": 0.053727101534605026, "learning_rate": 2.6382289129004978e-08, "loss": 0.04517078399658203, "step": 7268 }, { "epoch": 0.9827455089314383, "grad_norm": 0.0781005397439003, "learning_rate": 2.5978085585935395e-08, "loss": 0.07580232620239258, "step": 7269 }, { "epoch": 0.9828807057272743, "grad_norm": 0.034053657203912735, "learning_rate": 2.5576999714078676e-08, "loss": 0.0352177619934082, "step": 7270 }, { "epoch": 0.9830159025231102, "grad_norm": 0.07882603257894516, "learning_rate": 2.517903159694468e-08, "loss": 0.047789573669433594, "step": 7271 }, { "epoch": 0.9831510993189462, "grad_norm": 0.04295378923416138, "learning_rate": 2.4784181317390465e-08, "loss": 0.027587890625, "step": 7272 }, { "epoch": 0.9832862961147821, "grad_norm": 0.1099075973033905, "learning_rate": 2.4392448957628598e-08, "loss": 0.0524439811706543, "step": 7273 }, { "epoch": 0.983421492910618, "grad_norm": 0.15199990570545197, "learning_rate": 2.4003834599217177e-08, "loss": 0.08845829963684082, "step": 7274 }, { "epoch": 0.9835566897064539, "grad_norm": 0.13113896548748016, "learning_rate": 2.3618338323071474e-08, "loss": 0.08320808410644531, "step": 7275 }, { "epoch": 0.9836918865022899, "grad_norm": 0.09326287358999252, "learning_rate": 2.3235960209448958e-08, "loss": 0.05952954292297363, "step": 7276 }, { "epoch": 0.9838270832981258, "grad_norm": 0.07457846403121948, "learning_rate": 2.2856700337967606e-08, "loss": 0.045603036880493164, "step": 7277 }, { "epoch": 0.9839622800939618, "grad_norm": 0.09696845710277557, "learning_rate": 2.2480558787587592e-08, "loss": 0.06147873401641846, "step": 7278 }, { "epoch": 0.9840974768897978, "grad_norm": 0.036029696464538574, "learning_rate": 2.2107535636626263e-08, "loss": 0.02816241979598999, "step": 7279 }, { "epoch": 0.9842326736856336, "grad_norm": 0.10190917551517487, "learning_rate": 2.1737630962746502e-08, "loss": 0.07305526733398438, "step": 7280 }, { "epoch": 0.9843678704814696, "grad_norm": 0.07059134542942047, "learning_rate": 2.1370844842966696e-08, "loss": 0.0533447265625, "step": 7281 }, { "epoch": 0.9845030672773055, "grad_norm": 0.08832582086324692, "learning_rate": 2.100717735365243e-08, "loss": 0.051555335521698, "step": 7282 }, { "epoch": 0.9846382640731415, "grad_norm": 0.1104244813323021, "learning_rate": 2.0646628570521464e-08, "loss": 0.08354133367538452, "step": 7283 }, { "epoch": 0.9847734608689774, "grad_norm": 0.0623931810259819, "learning_rate": 2.028919856864375e-08, "loss": 0.04960620403289795, "step": 7284 }, { "epoch": 0.9849086576648134, "grad_norm": 0.09466513246297836, "learning_rate": 1.9934887422434766e-08, "loss": 0.06949377059936523, "step": 7285 }, { "epoch": 0.9850438544606492, "grad_norm": 0.03106377087533474, "learning_rate": 1.9583695205665496e-08, "loss": 0.02700960636138916, "step": 7286 }, { "epoch": 0.9851790512564852, "grad_norm": 0.05648316070437431, "learning_rate": 1.9235621991457454e-08, "loss": 0.03772091865539551, "step": 7287 }, { "epoch": 0.9853142480523212, "grad_norm": 0.07968435436487198, "learning_rate": 1.889066785227933e-08, "loss": 0.04428505897521973, "step": 7288 }, { "epoch": 0.9854494448481571, "grad_norm": 0.07778004556894302, "learning_rate": 1.854883285995368e-08, "loss": 0.06160151958465576, "step": 7289 }, { "epoch": 0.9855846416439931, "grad_norm": 0.05391272157430649, "learning_rate": 1.8210117085651902e-08, "loss": 0.03964868187904358, "step": 7290 }, { "epoch": 0.985719838439829, "grad_norm": 0.08631722629070282, "learning_rate": 1.7874520599894252e-08, "loss": 0.0598507821559906, "step": 7291 }, { "epoch": 0.9858550352356649, "grad_norm": 0.08468089252710342, "learning_rate": 1.7542043472558166e-08, "loss": 0.07365989685058594, "step": 7292 }, { "epoch": 0.9859902320315008, "grad_norm": 0.12107174098491669, "learning_rate": 1.7212685772864945e-08, "loss": 0.07382214069366455, "step": 7293 }, { "epoch": 0.9861254288273368, "grad_norm": 0.07340415567159653, "learning_rate": 1.68864475693864e-08, "loss": 0.04373335838317871, "step": 7294 }, { "epoch": 0.9862606256231727, "grad_norm": 0.11149155348539352, "learning_rate": 1.6563328930051526e-08, "loss": 0.0391998291015625, "step": 7295 }, { "epoch": 0.9863958224190087, "grad_norm": 0.12067710608243942, "learning_rate": 1.624332992213151e-08, "loss": 0.07052433490753174, "step": 7296 }, { "epoch": 0.9865310192148447, "grad_norm": 0.06118866801261902, "learning_rate": 1.5926450612254728e-08, "loss": 0.03546231985092163, "step": 7297 }, { "epoch": 0.9866662160106805, "grad_norm": 0.06422483921051025, "learning_rate": 1.5612691066395068e-08, "loss": 0.049187541007995605, "step": 7298 }, { "epoch": 0.9868014128065165, "grad_norm": 0.08040238171815872, "learning_rate": 1.530205134987861e-08, "loss": 0.07235908508300781, "step": 7299 }, { "epoch": 0.9869366096023524, "grad_norm": 0.048238355666399, "learning_rate": 1.499453152738528e-08, "loss": 0.04775857925415039, "step": 7300 }, { "epoch": 0.9870718063981884, "grad_norm": 0.11366164684295654, "learning_rate": 1.4690131662938866e-08, "loss": 0.09452676773071289, "step": 7301 }, { "epoch": 0.9872070031940243, "grad_norm": 0.07032579183578491, "learning_rate": 1.438885181991867e-08, "loss": 0.05037975311279297, "step": 7302 }, { "epoch": 0.9873421999898603, "grad_norm": 0.052110254764556885, "learning_rate": 1.4090692061052846e-08, "loss": 0.05149391293525696, "step": 7303 }, { "epoch": 0.9874773967856961, "grad_norm": 0.11958946287631989, "learning_rate": 1.3795652448420071e-08, "loss": 0.08651185035705566, "step": 7304 }, { "epoch": 0.9876125935815321, "grad_norm": 0.03916904330253601, "learning_rate": 1.3503733043447874e-08, "loss": 0.029096364974975586, "step": 7305 }, { "epoch": 0.987747790377368, "grad_norm": 0.07844559848308563, "learning_rate": 1.3214933906915971e-08, "loss": 0.0634145438671112, "step": 7306 }, { "epoch": 0.987882987173204, "grad_norm": 0.1720583587884903, "learning_rate": 1.2929255098954596e-08, "loss": 0.060106754302978516, "step": 7307 }, { "epoch": 0.98801818396904, "grad_norm": 0.05431899055838585, "learning_rate": 1.2646696679042835e-08, "loss": 0.041239380836486816, "step": 7308 }, { "epoch": 0.9881533807648759, "grad_norm": 0.05242586508393288, "learning_rate": 1.2367258706010298e-08, "loss": 0.041638731956481934, "step": 7309 }, { "epoch": 0.9882885775607118, "grad_norm": 0.0778360441327095, "learning_rate": 1.2090941238040443e-08, "loss": 0.05116933584213257, "step": 7310 }, { "epoch": 0.9884237743565477, "grad_norm": 0.16519494354724884, "learning_rate": 1.1817744332660584e-08, "loss": 0.05723142623901367, "step": 7311 }, { "epoch": 0.9885589711523837, "grad_norm": 0.024900518357753754, "learning_rate": 1.1547668046751891e-08, "loss": 0.022734999656677246, "step": 7312 }, { "epoch": 0.9886941679482196, "grad_norm": 0.120774045586586, "learning_rate": 1.1280712436549379e-08, "loss": 0.0833730697631836, "step": 7313 }, { "epoch": 0.9888293647440556, "grad_norm": 0.08230368047952652, "learning_rate": 1.1016877557630257e-08, "loss": 0.04598128795623779, "step": 7314 }, { "epoch": 0.9889645615398915, "grad_norm": 0.06282849609851837, "learning_rate": 1.0756163464928915e-08, "loss": 0.05870342254638672, "step": 7315 }, { "epoch": 0.9890997583357275, "grad_norm": 0.0756852999329567, "learning_rate": 1.0498570212726932e-08, "loss": 0.07147502899169922, "step": 7316 }, { "epoch": 0.9892349551315633, "grad_norm": 0.04308709874749184, "learning_rate": 1.024409785465641e-08, "loss": 0.042134761810302734, "step": 7317 }, { "epoch": 0.9893701519273993, "grad_norm": 0.11743681877851486, "learning_rate": 9.992746443699962e-09, "loss": 0.06059384346008301, "step": 7318 }, { "epoch": 0.9895053487232353, "grad_norm": 0.05181999132037163, "learning_rate": 9.744516032190731e-09, "loss": 0.043380022048950195, "step": 7319 }, { "epoch": 0.9896405455190712, "grad_norm": 0.07927172631025314, "learning_rate": 9.499406671809041e-09, "loss": 0.06056690216064453, "step": 7320 }, { "epoch": 0.9897757423149072, "grad_norm": 0.09364833682775497, "learning_rate": 9.2574184135924e-09, "loss": 0.05971479415893555, "step": 7321 }, { "epoch": 0.9899109391107431, "grad_norm": 0.05973111838102341, "learning_rate": 9.018551307920508e-09, "loss": 0.035719871520996094, "step": 7322 }, { "epoch": 0.990046135906579, "grad_norm": 0.06893175095319748, "learning_rate": 8.782805404526917e-09, "loss": 0.04376935958862305, "step": 7323 }, { "epoch": 0.9901813327024149, "grad_norm": 0.07715661078691483, "learning_rate": 8.55018075249736e-09, "loss": 0.044313669204711914, "step": 7324 }, { "epoch": 0.9903165294982509, "grad_norm": 0.08267417550086975, "learning_rate": 8.320677400264764e-09, "loss": 0.0799022912979126, "step": 7325 }, { "epoch": 0.9904517262940868, "grad_norm": 0.08510856330394745, "learning_rate": 8.094295395610906e-09, "loss": 0.047110795974731445, "step": 7326 }, { "epoch": 0.9905869230899228, "grad_norm": 0.10011366754770279, "learning_rate": 7.87103478567308e-09, "loss": 0.06752943992614746, "step": 7327 }, { "epoch": 0.9907221198857588, "grad_norm": 0.11906718462705612, "learning_rate": 7.65089561693244e-09, "loss": 0.09389448165893555, "step": 7328 }, { "epoch": 0.9908573166815946, "grad_norm": 0.0777163952589035, "learning_rate": 7.433877935225652e-09, "loss": 0.05770304799079895, "step": 7329 }, { "epoch": 0.9909925134774306, "grad_norm": 0.0878908783197403, "learning_rate": 7.219981785733243e-09, "loss": 0.06152153015136719, "step": 7330 }, { "epoch": 0.9911277102732665, "grad_norm": 0.13056887686252594, "learning_rate": 7.009207212992919e-09, "loss": 0.08936762809753418, "step": 7331 }, { "epoch": 0.9912629070691025, "grad_norm": 0.07207076251506805, "learning_rate": 6.801554260889575e-09, "loss": 0.057196855545043945, "step": 7332 }, { "epoch": 0.9913981038649384, "grad_norm": 0.09439665824174881, "learning_rate": 6.5970229726552976e-09, "loss": 0.07920801639556885, "step": 7333 }, { "epoch": 0.9915333006607744, "grad_norm": 0.08448266237974167, "learning_rate": 6.3956133908743556e-09, "loss": 0.0589594841003418, "step": 7334 }, { "epoch": 0.9916684974566102, "grad_norm": 0.07885945588350296, "learning_rate": 6.197325557483202e-09, "loss": 0.060319721698760986, "step": 7335 }, { "epoch": 0.9918036942524462, "grad_norm": 0.07197452336549759, "learning_rate": 6.002159513765482e-09, "loss": 0.04823002219200134, "step": 7336 }, { "epoch": 0.9919388910482821, "grad_norm": 0.04160919785499573, "learning_rate": 5.810115300355357e-09, "loss": 0.03542327880859375, "step": 7337 }, { "epoch": 0.9920740878441181, "grad_norm": 0.08904135972261429, "learning_rate": 5.621192957239174e-09, "loss": 0.07356023788452148, "step": 7338 }, { "epoch": 0.9922092846399541, "grad_norm": 0.06244876980781555, "learning_rate": 5.435392523748806e-09, "loss": 0.05868411064147949, "step": 7339 }, { "epoch": 0.99234448143579, "grad_norm": 0.0832950696349144, "learning_rate": 5.252714038571638e-09, "loss": 0.07088655233383179, "step": 7340 }, { "epoch": 0.9924796782316259, "grad_norm": 0.10306744277477264, "learning_rate": 5.073157539742246e-09, "loss": 0.06284856796264648, "step": 7341 }, { "epoch": 0.9926148750274618, "grad_norm": 0.06996965408325195, "learning_rate": 4.896723064642394e-09, "loss": 0.0495152473449707, "step": 7342 }, { "epoch": 0.9927500718232978, "grad_norm": 0.09587699919939041, "learning_rate": 4.723410650009363e-09, "loss": 0.05898404121398926, "step": 7343 }, { "epoch": 0.9928852686191337, "grad_norm": 0.05208943411707878, "learning_rate": 4.553220331925956e-09, "loss": 0.05413079261779785, "step": 7344 }, { "epoch": 0.9930204654149697, "grad_norm": 0.05242899805307388, "learning_rate": 4.38615214582716e-09, "loss": 0.03993582725524902, "step": 7345 }, { "epoch": 0.9931556622108056, "grad_norm": 0.07335025072097778, "learning_rate": 4.2222061265001496e-09, "loss": 0.05894613265991211, "step": 7346 }, { "epoch": 0.9932908590066415, "grad_norm": 0.09117382764816284, "learning_rate": 4.0613823080742905e-09, "loss": 0.06613492965698242, "step": 7347 }, { "epoch": 0.9934260558024774, "grad_norm": 0.042938247323036194, "learning_rate": 3.903680724037795e-09, "loss": 0.04477345943450928, "step": 7348 }, { "epoch": 0.9935612525983134, "grad_norm": 0.058746881783008575, "learning_rate": 3.749101407224398e-09, "loss": 0.06394344568252563, "step": 7349 }, { "epoch": 0.9936964493941494, "grad_norm": 0.13351649045944214, "learning_rate": 3.597644389818355e-09, "loss": 0.09245014190673828, "step": 7350 }, { "epoch": 0.9938316461899853, "grad_norm": 0.034742143005132675, "learning_rate": 3.4493097033527767e-09, "loss": 0.03508591651916504, "step": 7351 }, { "epoch": 0.9939668429858213, "grad_norm": 0.08260580152273178, "learning_rate": 3.3040973787112904e-09, "loss": 0.08246707916259766, "step": 7352 }, { "epoch": 0.9941020397816571, "grad_norm": 0.085362009704113, "learning_rate": 3.162007446129711e-09, "loss": 0.05268669128417969, "step": 7353 }, { "epoch": 0.9942372365774931, "grad_norm": 0.04928789660334587, "learning_rate": 3.023039935191041e-09, "loss": 0.04291635751724243, "step": 7354 }, { "epoch": 0.994372433373329, "grad_norm": 0.11875756829977036, "learning_rate": 2.887194874830468e-09, "loss": 0.064117431640625, "step": 7355 }, { "epoch": 0.994507630169165, "grad_norm": 0.060207534581422806, "learning_rate": 2.7544722933287026e-09, "loss": 0.05113565921783447, "step": 7356 }, { "epoch": 0.994642826965001, "grad_norm": 0.08969463407993317, "learning_rate": 2.6248722183203066e-09, "loss": 0.07146716117858887, "step": 7357 }, { "epoch": 0.9947780237608369, "grad_norm": 0.08677563071250916, "learning_rate": 2.498394676790361e-09, "loss": 0.07406949996948242, "step": 7358 }, { "epoch": 0.9949132205566729, "grad_norm": 0.15845677256584167, "learning_rate": 2.375039695071135e-09, "loss": 0.05764305591583252, "step": 7359 }, { "epoch": 0.9950484173525087, "grad_norm": 0.10074788331985474, "learning_rate": 2.2548072988454184e-09, "loss": 0.06560428440570831, "step": 7360 }, { "epoch": 0.9951836141483447, "grad_norm": 0.08231482654809952, "learning_rate": 2.1376975131465194e-09, "loss": 0.05889749526977539, "step": 7361 }, { "epoch": 0.9953188109441806, "grad_norm": 0.12215745449066162, "learning_rate": 2.023710362356601e-09, "loss": 0.07542967796325684, "step": 7362 }, { "epoch": 0.9954540077400166, "grad_norm": 0.07525450736284256, "learning_rate": 1.9128458702100117e-09, "loss": 0.05368232727050781, "step": 7363 }, { "epoch": 0.9955892045358525, "grad_norm": 0.07393708825111389, "learning_rate": 1.8051040597882873e-09, "loss": 0.03982853889465332, "step": 7364 }, { "epoch": 0.9957244013316885, "grad_norm": 0.06943808495998383, "learning_rate": 1.70048495352515e-09, "loss": 0.05326399207115173, "step": 7365 }, { "epoch": 0.9958595981275243, "grad_norm": 0.11843971163034439, "learning_rate": 1.5989885731998443e-09, "loss": 0.0825052261352539, "step": 7366 }, { "epoch": 0.9959947949233603, "grad_norm": 0.0996103286743164, "learning_rate": 1.5006149399487966e-09, "loss": 0.0654592514038086, "step": 7367 }, { "epoch": 0.9961299917191963, "grad_norm": 0.05979285389184952, "learning_rate": 1.4053640742489604e-09, "loss": 0.04976773262023926, "step": 7368 }, { "epoch": 0.9962651885150322, "grad_norm": 0.13997714221477509, "learning_rate": 1.3132359959361351e-09, "loss": 0.08460056781768799, "step": 7369 }, { "epoch": 0.9964003853108682, "grad_norm": 0.06448747217655182, "learning_rate": 1.2242307241899787e-09, "loss": 0.052953124046325684, "step": 7370 }, { "epoch": 0.9965355821067041, "grad_norm": 0.041787318885326385, "learning_rate": 1.1383482775406685e-09, "loss": 0.03504984825849533, "step": 7371 }, { "epoch": 0.99667077890254, "grad_norm": 0.12908028066158295, "learning_rate": 1.0555886738738973e-09, "loss": 0.0784159004688263, "step": 7372 }, { "epoch": 0.9968059756983759, "grad_norm": 0.08244071900844574, "learning_rate": 9.75951930415886e-10, "loss": 0.06368780136108398, "step": 7373 }, { "epoch": 0.9969411724942119, "grad_norm": 0.10129138082265854, "learning_rate": 8.994380637483701e-10, "loss": 0.052173614501953125, "step": 7374 }, { "epoch": 0.9970763692900478, "grad_norm": 0.05334750562906265, "learning_rate": 8.260470898036054e-10, "loss": 0.045610785484313965, "step": 7375 }, { "epoch": 0.9972115660858838, "grad_norm": 0.10708335041999817, "learning_rate": 7.557790238627016e-10, "loss": 0.054087936878204346, "step": 7376 }, { "epoch": 0.9973467628817198, "grad_norm": 0.07844416797161102, "learning_rate": 6.886338805522918e-10, "loss": 0.04304170608520508, "step": 7377 }, { "epoch": 0.9974819596775556, "grad_norm": 0.0769147202372551, "learning_rate": 6.246116738561903e-10, "loss": 0.061806440353393555, "step": 7378 }, { "epoch": 0.9976171564733916, "grad_norm": 0.07373236864805222, "learning_rate": 5.637124171004038e-10, "loss": 0.04380142688751221, "step": 7379 }, { "epoch": 0.9977523532692275, "grad_norm": 0.11770644038915634, "learning_rate": 5.059361229681203e-10, "loss": 0.08809971809387207, "step": 7380 }, { "epoch": 0.9978875500650635, "grad_norm": 0.09737664461135864, "learning_rate": 4.5128280348638583e-10, "loss": 0.0631260871887207, "step": 7381 }, { "epoch": 0.9980227468608994, "grad_norm": 0.04032571613788605, "learning_rate": 3.9975247003443127e-10, "loss": 0.03821921348571777, "step": 7382 }, { "epoch": 0.9981579436567354, "grad_norm": 0.06047150120139122, "learning_rate": 3.51345133342007e-10, "loss": 0.03913068771362305, "step": 7383 }, { "epoch": 0.9982931404525712, "grad_norm": 0.10665332525968552, "learning_rate": 3.060608034877177e-10, "loss": 0.06272637844085693, "step": 7384 }, { "epoch": 0.9984283372484072, "grad_norm": 0.0563480518758297, "learning_rate": 2.638994898990221e-10, "loss": 0.041876569390296936, "step": 7385 }, { "epoch": 0.9985635340442431, "grad_norm": 0.05935134366154671, "learning_rate": 2.2486120135556398e-10, "loss": 0.05381488800048828, "step": 7386 }, { "epoch": 0.9986987308400791, "grad_norm": 0.08404790610074997, "learning_rate": 1.889459459841758e-10, "loss": 0.06423234939575195, "step": 7387 }, { "epoch": 0.998833927635915, "grad_norm": 0.0969076156616211, "learning_rate": 1.56153731263875e-10, "loss": 0.05673551559448242, "step": 7388 }, { "epoch": 0.998969124431751, "grad_norm": 0.05608352646231651, "learning_rate": 1.2648456402086784e-10, "loss": 0.050678133964538574, "step": 7389 }, { "epoch": 0.9991043212275869, "grad_norm": 0.03873895853757858, "learning_rate": 9.99384504318801e-11, "loss": 0.03996634483337402, "step": 7390 }, { "epoch": 0.9992395180234228, "grad_norm": 0.07401720434427261, "learning_rate": 7.651539602582247e-11, "loss": 0.04620383679866791, "step": 7391 }, { "epoch": 0.9993747148192588, "grad_norm": 0.15651960670948029, "learning_rate": 5.6215405678794464e-11, "loss": 0.07623988389968872, "step": 7392 }, { "epoch": 0.9995099116150947, "grad_norm": 0.10791472345590591, "learning_rate": 3.9038483615749795e-11, "loss": 0.051717519760131836, "step": 7393 }, { "epoch": 0.9996451084109307, "grad_norm": 0.06786425411701202, "learning_rate": 2.4984633415492398e-11, "loss": 0.06241750717163086, "step": 7394 }, { "epoch": 0.9997803052067666, "grad_norm": 0.033510491251945496, "learning_rate": 1.4053858004015041e-11, "loss": 0.03623461723327637, "step": 7395 }, { "epoch": 0.9999155020026025, "grad_norm": 0.08831936866044998, "learning_rate": 6.246159654499373e-12, "loss": 0.06389164924621582, "step": 7396 }, { "epoch": 1.0, "grad_norm": 0.08253154158592224, "learning_rate": 1.561539995642569e-12, "loss": 0.07079315185546875, "step": 7397 }, { "epoch": 1.0, "step": 7397, "total_flos": 2.893062630172682e+19, "train_loss": 0.011039156519602775, "train_runtime": 12905.1847, "train_samples_per_second": 146.725, "train_steps_per_second": 0.573 } ], "logging_steps": 1.0, "max_steps": 7397, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.893062630172682e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }