{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 2408, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016637204949568473, "grad_norm": 2.482388973236084, "learning_rate": 0.0, "loss": 4.477285861968994, "step": 1 }, { "epoch": 0.0033274409899136945, "grad_norm": 2.6023879051208496, "learning_rate": 4.132231404958678e-08, "loss": 4.723363876342773, "step": 2 }, { "epoch": 0.004991161484870541, "grad_norm": 2.5578877925872803, "learning_rate": 8.264462809917357e-08, "loss": 4.560671806335449, "step": 3 }, { "epoch": 0.006654881979827389, "grad_norm": 2.4592692852020264, "learning_rate": 1.2396694214876034e-07, "loss": 4.479525089263916, "step": 4 }, { "epoch": 0.008318602474784236, "grad_norm": 2.3753788471221924, "learning_rate": 1.6528925619834713e-07, "loss": 4.508234024047852, "step": 5 }, { "epoch": 0.009982322969741083, "grad_norm": 2.473618984222412, "learning_rate": 2.066115702479339e-07, "loss": 4.489030361175537, "step": 6 }, { "epoch": 0.011646043464697931, "grad_norm": 2.4029815196990967, "learning_rate": 2.4793388429752067e-07, "loss": 4.410640716552734, "step": 7 }, { "epoch": 0.013309763959654778, "grad_norm": 2.378934621810913, "learning_rate": 2.892561983471075e-07, "loss": 4.521676063537598, "step": 8 }, { "epoch": 0.014973484454611625, "grad_norm": 2.234776496887207, "learning_rate": 3.3057851239669426e-07, "loss": 4.27446174621582, "step": 9 }, { "epoch": 0.01663720494956847, "grad_norm": 2.0892786979675293, "learning_rate": 3.7190082644628103e-07, "loss": 4.092978000640869, "step": 10 }, { "epoch": 0.01830092544452532, "grad_norm": 2.2632219791412354, "learning_rate": 4.132231404958678e-07, "loss": 4.533838272094727, "step": 11 }, { "epoch": 0.019964645939482165, "grad_norm": 1.433074712753296, "learning_rate": 4.5454545454545457e-07, "loss": 3.4296646118164062, "step": 12 }, { "epoch": 0.021628366434439016, "grad_norm": 1.3564655780792236, "learning_rate": 4.958677685950413e-07, "loss": 3.300870895385742, "step": 13 }, { "epoch": 0.023292086929395862, "grad_norm": 1.2193964719772339, "learning_rate": 5.371900826446281e-07, "loss": 3.117865800857544, "step": 14 }, { "epoch": 0.02495580742435271, "grad_norm": 1.2911186218261719, "learning_rate": 5.78512396694215e-07, "loss": 3.2935025691986084, "step": 15 }, { "epoch": 0.026619527919309556, "grad_norm": 0.36625346541404724, "learning_rate": 6.198347107438018e-07, "loss": 2.1773428916931152, "step": 16 }, { "epoch": 0.028283248414266403, "grad_norm": 0.3553527593612671, "learning_rate": 6.611570247933885e-07, "loss": 2.2286384105682373, "step": 17 }, { "epoch": 0.02994696890922325, "grad_norm": 0.3257206380367279, "learning_rate": 7.024793388429753e-07, "loss": 2.212399959564209, "step": 18 }, { "epoch": 0.031610689404180096, "grad_norm": 0.28514525294303894, "learning_rate": 7.438016528925621e-07, "loss": 2.16679048538208, "step": 19 }, { "epoch": 0.03327440989913694, "grad_norm": 0.28953635692596436, "learning_rate": 7.851239669421488e-07, "loss": 2.240231990814209, "step": 20 }, { "epoch": 0.03493813039409379, "grad_norm": 0.2675462067127228, "learning_rate": 8.264462809917356e-07, "loss": 2.0887300968170166, "step": 21 }, { "epoch": 0.03660185088905064, "grad_norm": 0.2351558357477188, "learning_rate": 8.677685950413224e-07, "loss": 1.871799111366272, "step": 22 }, { "epoch": 0.038265571384007484, "grad_norm": 0.2161591798067093, "learning_rate": 9.090909090909091e-07, "loss": 1.8348283767700195, "step": 23 }, { "epoch": 0.03992929187896433, "grad_norm": 0.2249748706817627, "learning_rate": 9.50413223140496e-07, "loss": 1.9596117734909058, "step": 24 }, { "epoch": 0.041593012373921184, "grad_norm": 0.21401794254779816, "learning_rate": 9.917355371900827e-07, "loss": 1.9094750881195068, "step": 25 }, { "epoch": 0.04325673286887803, "grad_norm": 0.2133910208940506, "learning_rate": 1.0330578512396695e-06, "loss": 1.9063758850097656, "step": 26 }, { "epoch": 0.04492045336383488, "grad_norm": 0.20730602741241455, "learning_rate": 1.0743801652892562e-06, "loss": 1.8436386585235596, "step": 27 }, { "epoch": 0.046584173858791725, "grad_norm": 0.20181401073932648, "learning_rate": 1.115702479338843e-06, "loss": 1.8216067552566528, "step": 28 }, { "epoch": 0.04824789435374857, "grad_norm": 0.2049887478351593, "learning_rate": 1.15702479338843e-06, "loss": 1.8613402843475342, "step": 29 }, { "epoch": 0.04991161484870542, "grad_norm": 0.18033020198345184, "learning_rate": 1.1983471074380167e-06, "loss": 1.6658318042755127, "step": 30 }, { "epoch": 0.051575335343662265, "grad_norm": 0.15923167765140533, "learning_rate": 1.2396694214876035e-06, "loss": 1.532716989517212, "step": 31 }, { "epoch": 0.05323905583861911, "grad_norm": 0.1302931010723114, "learning_rate": 1.28099173553719e-06, "loss": 1.4400815963745117, "step": 32 }, { "epoch": 0.05490277633357596, "grad_norm": 0.11385945230722427, "learning_rate": 1.322314049586777e-06, "loss": 1.3887157440185547, "step": 33 }, { "epoch": 0.056566496828532806, "grad_norm": 0.11334631592035294, "learning_rate": 1.3636363636363636e-06, "loss": 1.3524459600448608, "step": 34 }, { "epoch": 0.05823021732348965, "grad_norm": 0.10479549318552017, "learning_rate": 1.4049586776859506e-06, "loss": 1.292414903640747, "step": 35 }, { "epoch": 0.0598939378184465, "grad_norm": 0.09981797635555267, "learning_rate": 1.4462809917355372e-06, "loss": 1.2074761390686035, "step": 36 }, { "epoch": 0.061557658313403346, "grad_norm": 0.10237134248018265, "learning_rate": 1.4876033057851241e-06, "loss": 1.2024866342544556, "step": 37 }, { "epoch": 0.06322137880836019, "grad_norm": 0.09421642869710922, "learning_rate": 1.5289256198347107e-06, "loss": 1.1445802450180054, "step": 38 }, { "epoch": 0.06488509930331704, "grad_norm": 0.08208701759576797, "learning_rate": 1.5702479338842977e-06, "loss": 1.114914894104004, "step": 39 }, { "epoch": 0.06654881979827389, "grad_norm": 0.0781988576054573, "learning_rate": 1.6115702479338842e-06, "loss": 1.1362968683242798, "step": 40 }, { "epoch": 0.06821254029323073, "grad_norm": 0.06956593692302704, "learning_rate": 1.6528925619834712e-06, "loss": 1.050331950187683, "step": 41 }, { "epoch": 0.06987626078818758, "grad_norm": 0.07109206914901733, "learning_rate": 1.694214876033058e-06, "loss": 1.0830775499343872, "step": 42 }, { "epoch": 0.07153998128314443, "grad_norm": 0.06287192553281784, "learning_rate": 1.7355371900826448e-06, "loss": 1.0035347938537598, "step": 43 }, { "epoch": 0.07320370177810127, "grad_norm": 0.060101497918367386, "learning_rate": 1.7768595041322315e-06, "loss": 0.9792177677154541, "step": 44 }, { "epoch": 0.07486742227305812, "grad_norm": 0.05643174424767494, "learning_rate": 1.8181818181818183e-06, "loss": 0.9080207347869873, "step": 45 }, { "epoch": 0.07653114276801497, "grad_norm": 0.05656706541776657, "learning_rate": 1.859504132231405e-06, "loss": 0.9312663078308105, "step": 46 }, { "epoch": 0.07819486326297181, "grad_norm": 0.04832266643643379, "learning_rate": 1.900826446280992e-06, "loss": 0.8578022718429565, "step": 47 }, { "epoch": 0.07985858375792866, "grad_norm": 0.051747020334005356, "learning_rate": 1.9421487603305786e-06, "loss": 0.9018316268920898, "step": 48 }, { "epoch": 0.08152230425288552, "grad_norm": 0.04696454107761383, "learning_rate": 1.9834710743801654e-06, "loss": 0.8227735757827759, "step": 49 }, { "epoch": 0.08318602474784237, "grad_norm": 0.04074595496058464, "learning_rate": 2.024793388429752e-06, "loss": 0.7895236015319824, "step": 50 }, { "epoch": 0.08484974524279922, "grad_norm": 0.041805122047662735, "learning_rate": 2.066115702479339e-06, "loss": 0.7831016778945923, "step": 51 }, { "epoch": 0.08651346573775606, "grad_norm": 0.03949985280632973, "learning_rate": 2.1074380165289257e-06, "loss": 0.7389830350875854, "step": 52 }, { "epoch": 0.08817718623271291, "grad_norm": 0.041112784296274185, "learning_rate": 2.1487603305785124e-06, "loss": 0.7490829229354858, "step": 53 }, { "epoch": 0.08984090672766976, "grad_norm": 0.044654764235019684, "learning_rate": 2.1900826446280992e-06, "loss": 0.7309138774871826, "step": 54 }, { "epoch": 0.0915046272226266, "grad_norm": 0.05389357730746269, "learning_rate": 2.231404958677686e-06, "loss": 0.7365468740463257, "step": 55 }, { "epoch": 0.09316834771758345, "grad_norm": 0.044067952781915665, "learning_rate": 2.2727272727272728e-06, "loss": 0.7065483927726746, "step": 56 }, { "epoch": 0.0948320682125403, "grad_norm": 0.03494361788034439, "learning_rate": 2.31404958677686e-06, "loss": 0.7050533294677734, "step": 57 }, { "epoch": 0.09649578870749714, "grad_norm": 0.027442242950201035, "learning_rate": 2.3553719008264463e-06, "loss": 0.6887029409408569, "step": 58 }, { "epoch": 0.09815950920245399, "grad_norm": 0.031428925693035126, "learning_rate": 2.3966942148760335e-06, "loss": 0.6625376343727112, "step": 59 }, { "epoch": 0.09982322969741084, "grad_norm": 0.032608762383461, "learning_rate": 2.43801652892562e-06, "loss": 0.6581825017929077, "step": 60 }, { "epoch": 0.10148695019236768, "grad_norm": 0.02459588088095188, "learning_rate": 2.479338842975207e-06, "loss": 0.6294938325881958, "step": 61 }, { "epoch": 0.10315067068732453, "grad_norm": 0.01995225064456463, "learning_rate": 2.5206611570247934e-06, "loss": 0.6105325222015381, "step": 62 }, { "epoch": 0.10481439118228138, "grad_norm": 0.028617018833756447, "learning_rate": 2.56198347107438e-06, "loss": 0.6707576513290405, "step": 63 }, { "epoch": 0.10647811167723822, "grad_norm": 0.02642955631017685, "learning_rate": 2.6033057851239673e-06, "loss": 0.624178409576416, "step": 64 }, { "epoch": 0.10814183217219507, "grad_norm": 0.025090189650654793, "learning_rate": 2.644628099173554e-06, "loss": 0.6277080774307251, "step": 65 }, { "epoch": 0.10980555266715192, "grad_norm": 0.03007493168115616, "learning_rate": 2.6859504132231405e-06, "loss": 0.6461099982261658, "step": 66 }, { "epoch": 0.11146927316210876, "grad_norm": 0.026639141142368317, "learning_rate": 2.7272727272727272e-06, "loss": 0.6154550909996033, "step": 67 }, { "epoch": 0.11313299365706561, "grad_norm": 0.029324783012270927, "learning_rate": 2.7685950413223144e-06, "loss": 0.6163111329078674, "step": 68 }, { "epoch": 0.11479671415202246, "grad_norm": 0.02367994748055935, "learning_rate": 2.809917355371901e-06, "loss": 0.6023034453392029, "step": 69 }, { "epoch": 0.1164604346469793, "grad_norm": 0.02719041146337986, "learning_rate": 2.851239669421488e-06, "loss": 0.6273308992385864, "step": 70 }, { "epoch": 0.11812415514193615, "grad_norm": 0.02549588494002819, "learning_rate": 2.8925619834710743e-06, "loss": 0.5701053142547607, "step": 71 }, { "epoch": 0.119787875636893, "grad_norm": 0.022750338539481163, "learning_rate": 2.9338842975206615e-06, "loss": 0.5723459720611572, "step": 72 }, { "epoch": 0.12145159613184985, "grad_norm": 0.025367699563503265, "learning_rate": 2.9752066115702483e-06, "loss": 0.5892397165298462, "step": 73 }, { "epoch": 0.12311531662680669, "grad_norm": 0.0319160595536232, "learning_rate": 3.016528925619835e-06, "loss": 0.5924912691116333, "step": 74 }, { "epoch": 0.12477903712176354, "grad_norm": 0.021769287064671516, "learning_rate": 3.0578512396694214e-06, "loss": 0.5665951371192932, "step": 75 }, { "epoch": 0.12644275761672039, "grad_norm": 0.027082569897174835, "learning_rate": 3.0991735537190086e-06, "loss": 0.563019871711731, "step": 76 }, { "epoch": 0.12810647811167725, "grad_norm": 0.022641802206635475, "learning_rate": 3.1404958677685953e-06, "loss": 0.5703021883964539, "step": 77 }, { "epoch": 0.12977019860663408, "grad_norm": 0.021977661177515984, "learning_rate": 3.181818181818182e-06, "loss": 0.5566818118095398, "step": 78 }, { "epoch": 0.13143391910159094, "grad_norm": 0.02996179461479187, "learning_rate": 3.2231404958677685e-06, "loss": 0.566483199596405, "step": 79 }, { "epoch": 0.13309763959654777, "grad_norm": 0.02014978975057602, "learning_rate": 3.264462809917356e-06, "loss": 0.5580374002456665, "step": 80 }, { "epoch": 0.13476136009150463, "grad_norm": 0.03401867672801018, "learning_rate": 3.3057851239669424e-06, "loss": 0.5588016510009766, "step": 81 }, { "epoch": 0.13642508058646147, "grad_norm": 0.022355899214744568, "learning_rate": 3.347107438016529e-06, "loss": 0.5484458208084106, "step": 82 }, { "epoch": 0.13808880108141833, "grad_norm": 0.02415446750819683, "learning_rate": 3.388429752066116e-06, "loss": 0.5682621002197266, "step": 83 }, { "epoch": 0.13975252157637516, "grad_norm": 0.023742761462926865, "learning_rate": 3.429752066115703e-06, "loss": 0.5533846616744995, "step": 84 }, { "epoch": 0.14141624207133202, "grad_norm": 0.022532276809215546, "learning_rate": 3.4710743801652895e-06, "loss": 0.5288005471229553, "step": 85 }, { "epoch": 0.14307996256628885, "grad_norm": 0.019874311983585358, "learning_rate": 3.5123966942148763e-06, "loss": 0.5370491743087769, "step": 86 }, { "epoch": 0.14474368306124571, "grad_norm": 0.022755231708288193, "learning_rate": 3.553719008264463e-06, "loss": 0.511932373046875, "step": 87 }, { "epoch": 0.14640740355620255, "grad_norm": 0.02134534902870655, "learning_rate": 3.5950413223140502e-06, "loss": 0.49379733204841614, "step": 88 }, { "epoch": 0.1480711240511594, "grad_norm": 0.021491173654794693, "learning_rate": 3.6363636363636366e-06, "loss": 0.4925214648246765, "step": 89 }, { "epoch": 0.14973484454611624, "grad_norm": 0.02258123643696308, "learning_rate": 3.6776859504132234e-06, "loss": 0.5131945610046387, "step": 90 }, { "epoch": 0.1513985650410731, "grad_norm": 0.025489268824458122, "learning_rate": 3.71900826446281e-06, "loss": 0.4968642592430115, "step": 91 }, { "epoch": 0.15306228553602994, "grad_norm": 0.02109348215162754, "learning_rate": 3.7603305785123973e-06, "loss": 0.49502578377723694, "step": 92 }, { "epoch": 0.1547260060309868, "grad_norm": 0.025884000584483147, "learning_rate": 3.801652892561984e-06, "loss": 0.4799806475639343, "step": 93 }, { "epoch": 0.15638972652594363, "grad_norm": 0.023585375398397446, "learning_rate": 3.842975206611571e-06, "loss": 0.49822694063186646, "step": 94 }, { "epoch": 0.1580534470209005, "grad_norm": 0.031796254217624664, "learning_rate": 3.884297520661157e-06, "loss": 0.49923157691955566, "step": 95 }, { "epoch": 0.15971716751585732, "grad_norm": 0.03561358526349068, "learning_rate": 3.925619834710744e-06, "loss": 0.4884190261363983, "step": 96 }, { "epoch": 0.16138088801081418, "grad_norm": 0.021946346387267113, "learning_rate": 3.966942148760331e-06, "loss": 0.4643127918243408, "step": 97 }, { "epoch": 0.16304460850577104, "grad_norm": 0.028085844591259956, "learning_rate": 4.008264462809918e-06, "loss": 0.4594374895095825, "step": 98 }, { "epoch": 0.16470832900072788, "grad_norm": 0.027761919423937798, "learning_rate": 4.049586776859504e-06, "loss": 0.4878089725971222, "step": 99 }, { "epoch": 0.16637204949568474, "grad_norm": 0.02483516000211239, "learning_rate": 4.0909090909090915e-06, "loss": 0.4365704357624054, "step": 100 }, { "epoch": 0.16637204949568474, "eval_loss": 0.4434735178947449, "eval_runtime": 398.423, "eval_samples_per_second": 12.068, "eval_steps_per_second": 3.017, "step": 100 }, { "epoch": 0.16803576999064157, "grad_norm": 0.026426592841744423, "learning_rate": 4.132231404958678e-06, "loss": 0.42997342348098755, "step": 101 }, { "epoch": 0.16969949048559843, "grad_norm": 0.02919408679008484, "learning_rate": 4.173553719008265e-06, "loss": 0.4375656843185425, "step": 102 }, { "epoch": 0.17136321098055526, "grad_norm": 0.03021477535367012, "learning_rate": 4.214876033057851e-06, "loss": 0.4166721999645233, "step": 103 }, { "epoch": 0.17302693147551212, "grad_norm": 0.0274681244045496, "learning_rate": 4.2561983471074386e-06, "loss": 0.41593968868255615, "step": 104 }, { "epoch": 0.17469065197046896, "grad_norm": 0.026254650205373764, "learning_rate": 4.297520661157025e-06, "loss": 0.42187049984931946, "step": 105 }, { "epoch": 0.17635437246542582, "grad_norm": 0.04353519156575203, "learning_rate": 4.338842975206612e-06, "loss": 0.41187649965286255, "step": 106 }, { "epoch": 0.17801809296038265, "grad_norm": 0.02748814970254898, "learning_rate": 4.3801652892561984e-06, "loss": 0.4013650715351105, "step": 107 }, { "epoch": 0.1796818134553395, "grad_norm": 0.053706467151641846, "learning_rate": 4.421487603305786e-06, "loss": 0.4117678701877594, "step": 108 }, { "epoch": 0.18134553395029634, "grad_norm": 0.04221956431865692, "learning_rate": 4.462809917355372e-06, "loss": 0.3744428753852844, "step": 109 }, { "epoch": 0.1830092544452532, "grad_norm": 0.04144236072897911, "learning_rate": 4.504132231404959e-06, "loss": 0.37903785705566406, "step": 110 }, { "epoch": 0.18467297494021004, "grad_norm": 0.038312312215566635, "learning_rate": 4.5454545454545455e-06, "loss": 0.3629874885082245, "step": 111 }, { "epoch": 0.1863366954351669, "grad_norm": 0.0359899066388607, "learning_rate": 4.586776859504133e-06, "loss": 0.37735822796821594, "step": 112 }, { "epoch": 0.18800041593012373, "grad_norm": 0.030285200104117393, "learning_rate": 4.62809917355372e-06, "loss": 0.3504555821418762, "step": 113 }, { "epoch": 0.1896641364250806, "grad_norm": 0.04384183883666992, "learning_rate": 4.669421487603306e-06, "loss": 0.36508214473724365, "step": 114 }, { "epoch": 0.19132785692003743, "grad_norm": 0.039419692009687424, "learning_rate": 4.710743801652893e-06, "loss": 0.3422982096672058, "step": 115 }, { "epoch": 0.1929915774149943, "grad_norm": 0.0356181338429451, "learning_rate": 4.75206611570248e-06, "loss": 0.3440757989883423, "step": 116 }, { "epoch": 0.19465529790995112, "grad_norm": 0.041206084191799164, "learning_rate": 4.793388429752067e-06, "loss": 0.32975438237190247, "step": 117 }, { "epoch": 0.19631901840490798, "grad_norm": 0.025390787050127983, "learning_rate": 4.834710743801653e-06, "loss": 0.33478546142578125, "step": 118 }, { "epoch": 0.1979827388998648, "grad_norm": 0.03742411360144615, "learning_rate": 4.87603305785124e-06, "loss": 0.30944791436195374, "step": 119 }, { "epoch": 0.19964645939482167, "grad_norm": 0.02903994731605053, "learning_rate": 4.917355371900827e-06, "loss": 0.2991634011268616, "step": 120 }, { "epoch": 0.2013101798897785, "grad_norm": 0.03681575134396553, "learning_rate": 4.958677685950414e-06, "loss": 0.28756508231163025, "step": 121 }, { "epoch": 0.20297390038473537, "grad_norm": 0.031143750995397568, "learning_rate": 5e-06, "loss": 0.286681592464447, "step": 122 }, { "epoch": 0.2046376208796922, "grad_norm": 0.030605066567659378, "learning_rate": 4.999997641274725e-06, "loss": 0.31048089265823364, "step": 123 }, { "epoch": 0.20630134137464906, "grad_norm": 0.03684311732649803, "learning_rate": 4.999990565103349e-06, "loss": 0.2921098470687866, "step": 124 }, { "epoch": 0.2079650618696059, "grad_norm": 0.05223636329174042, "learning_rate": 4.999978771499224e-06, "loss": 0.2894522249698639, "step": 125 }, { "epoch": 0.20962878236456275, "grad_norm": 0.02680801972746849, "learning_rate": 4.999962260484607e-06, "loss": 0.2657696604728699, "step": 126 }, { "epoch": 0.2112925028595196, "grad_norm": 0.028682326897978783, "learning_rate": 4.999941032090652e-06, "loss": 0.27642691135406494, "step": 127 }, { "epoch": 0.21295622335447645, "grad_norm": 0.059440821409225464, "learning_rate": 4.999915086357417e-06, "loss": 0.27616798877716064, "step": 128 }, { "epoch": 0.2146199438494333, "grad_norm": 0.038857702165842056, "learning_rate": 4.99988442333386e-06, "loss": 0.26294657588005066, "step": 129 }, { "epoch": 0.21628366434439014, "grad_norm": 0.03957323357462883, "learning_rate": 4.999849043077843e-06, "loss": 0.2662772536277771, "step": 130 }, { "epoch": 0.217947384839347, "grad_norm": 0.03721378371119499, "learning_rate": 4.999808945656128e-06, "loss": 0.25390905141830444, "step": 131 }, { "epoch": 0.21961110533430384, "grad_norm": 0.037432651966810226, "learning_rate": 4.999764131144377e-06, "loss": 0.24143514037132263, "step": 132 }, { "epoch": 0.2212748258292607, "grad_norm": 0.029236387461423874, "learning_rate": 4.999714599627155e-06, "loss": 0.2406114637851715, "step": 133 }, { "epoch": 0.22293854632421753, "grad_norm": 0.04499300941824913, "learning_rate": 4.999660351197926e-06, "loss": 0.24877291917800903, "step": 134 }, { "epoch": 0.2246022668191744, "grad_norm": 0.026736732572317123, "learning_rate": 4.999601385959056e-06, "loss": 0.22672352194786072, "step": 135 }, { "epoch": 0.22626598731413122, "grad_norm": 0.046254824846982956, "learning_rate": 4.999537704021812e-06, "loss": 0.2213183343410492, "step": 136 }, { "epoch": 0.22792970780908808, "grad_norm": 0.02913353592157364, "learning_rate": 4.99946930550636e-06, "loss": 0.21404042840003967, "step": 137 }, { "epoch": 0.22959342830404492, "grad_norm": 0.05235866829752922, "learning_rate": 4.999396190541766e-06, "loss": 0.23582462966442108, "step": 138 }, { "epoch": 0.23125714879900178, "grad_norm": 0.030388308688998222, "learning_rate": 4.999318359265998e-06, "loss": 0.22350290417671204, "step": 139 }, { "epoch": 0.2329208692939586, "grad_norm": 0.04241563007235527, "learning_rate": 4.999235811825921e-06, "loss": 0.2113513946533203, "step": 140 }, { "epoch": 0.23458458978891547, "grad_norm": 0.0259566493332386, "learning_rate": 4.9991485483773e-06, "loss": 0.2061123251914978, "step": 141 }, { "epoch": 0.2362483102838723, "grad_norm": 0.04808220639824867, "learning_rate": 4.999056569084801e-06, "loss": 0.24066820740699768, "step": 142 }, { "epoch": 0.23791203077882916, "grad_norm": 0.027925971895456314, "learning_rate": 4.998959874121986e-06, "loss": 0.2096669226884842, "step": 143 }, { "epoch": 0.239575751273786, "grad_norm": 0.04741431772708893, "learning_rate": 4.998858463671316e-06, "loss": 0.2042136937379837, "step": 144 }, { "epoch": 0.24123947176874286, "grad_norm": 0.030547140166163445, "learning_rate": 4.998752337924152e-06, "loss": 0.16891774535179138, "step": 145 }, { "epoch": 0.2429031922636997, "grad_norm": 0.041504278779029846, "learning_rate": 4.998641497080749e-06, "loss": 0.1880994737148285, "step": 146 }, { "epoch": 0.24456691275865655, "grad_norm": 0.023324500769376755, "learning_rate": 4.998525941350264e-06, "loss": 0.18950693309307098, "step": 147 }, { "epoch": 0.24623063325361338, "grad_norm": 0.037083689123392105, "learning_rate": 4.998405670950747e-06, "loss": 0.18887647986412048, "step": 148 }, { "epoch": 0.24789435374857025, "grad_norm": 0.023503053933382034, "learning_rate": 4.998280686109146e-06, "loss": 0.16628959774971008, "step": 149 }, { "epoch": 0.24955807424352708, "grad_norm": 0.030192900449037552, "learning_rate": 4.998150987061304e-06, "loss": 0.1572273075580597, "step": 150 }, { "epoch": 0.25122179473848394, "grad_norm": 0.03396950662136078, "learning_rate": 4.9980165740519625e-06, "loss": 0.16307464241981506, "step": 151 }, { "epoch": 0.25288551523344077, "grad_norm": 0.0340605154633522, "learning_rate": 4.997877447334754e-06, "loss": 0.16034038364887238, "step": 152 }, { "epoch": 0.2545492357283976, "grad_norm": 0.03912411257624626, "learning_rate": 4.99773360717221e-06, "loss": 0.14050956070423126, "step": 153 }, { "epoch": 0.2562129562233545, "grad_norm": 0.03576822578907013, "learning_rate": 4.997585053835754e-06, "loss": 0.13379324972629547, "step": 154 }, { "epoch": 0.2578766767183113, "grad_norm": 0.03452559560537338, "learning_rate": 4.997431787605701e-06, "loss": 0.1446094661951065, "step": 155 }, { "epoch": 0.25954039721326816, "grad_norm": 0.037528105080127716, "learning_rate": 4.997273808771263e-06, "loss": 0.15133655071258545, "step": 156 }, { "epoch": 0.261204117708225, "grad_norm": 0.04468218609690666, "learning_rate": 4.997111117630543e-06, "loss": 0.1459997594356537, "step": 157 }, { "epoch": 0.2628678382031819, "grad_norm": 0.034537848085165024, "learning_rate": 4.996943714490535e-06, "loss": 0.15663175284862518, "step": 158 }, { "epoch": 0.2645315586981387, "grad_norm": 0.04926777631044388, "learning_rate": 4.996771599667126e-06, "loss": 0.1453685462474823, "step": 159 }, { "epoch": 0.26619527919309555, "grad_norm": 0.02621885947883129, "learning_rate": 4.996594773485093e-06, "loss": 0.12285202741622925, "step": 160 }, { "epoch": 0.26785899968805243, "grad_norm": 0.046555280685424805, "learning_rate": 4.996413236278104e-06, "loss": 0.1389390230178833, "step": 161 }, { "epoch": 0.26952272018300927, "grad_norm": 0.023526031523942947, "learning_rate": 4.996226988388716e-06, "loss": 0.13394689559936523, "step": 162 }, { "epoch": 0.2711864406779661, "grad_norm": 0.046248551458120346, "learning_rate": 4.9960360301683755e-06, "loss": 0.14260414242744446, "step": 163 }, { "epoch": 0.27285016117292293, "grad_norm": 0.02372513897716999, "learning_rate": 4.995840361977416e-06, "loss": 0.11465226113796234, "step": 164 }, { "epoch": 0.2745138816678798, "grad_norm": 0.03185545653104782, "learning_rate": 4.995639984185059e-06, "loss": 0.12264078855514526, "step": 165 }, { "epoch": 0.27617760216283666, "grad_norm": 0.03572575002908707, "learning_rate": 4.9954348971694146e-06, "loss": 0.11168736964464188, "step": 166 }, { "epoch": 0.2778413226577935, "grad_norm": 0.02216448448598385, "learning_rate": 4.995225101317478e-06, "loss": 0.11359298229217529, "step": 167 }, { "epoch": 0.2795050431527503, "grad_norm": 0.035710953176021576, "learning_rate": 4.99501059702513e-06, "loss": 0.12145813554525375, "step": 168 }, { "epoch": 0.2811687636477072, "grad_norm": 0.0302723441272974, "learning_rate": 4.9947913846971345e-06, "loss": 0.09259926527738571, "step": 169 }, { "epoch": 0.28283248414266404, "grad_norm": 0.018867801874876022, "learning_rate": 4.994567464747141e-06, "loss": 0.1060592383146286, "step": 170 }, { "epoch": 0.2844962046376209, "grad_norm": 0.022908039391040802, "learning_rate": 4.994338837597683e-06, "loss": 0.09731046855449677, "step": 171 }, { "epoch": 0.2861599251325777, "grad_norm": 0.024193648248910904, "learning_rate": 4.994105503680176e-06, "loss": 0.09389811009168625, "step": 172 }, { "epoch": 0.2878236456275346, "grad_norm": 0.01756378822028637, "learning_rate": 4.993867463434916e-06, "loss": 0.09718440473079681, "step": 173 }, { "epoch": 0.28948736612249143, "grad_norm": 0.0198496226221323, "learning_rate": 4.9936247173110785e-06, "loss": 0.0892348438501358, "step": 174 }, { "epoch": 0.29115108661744826, "grad_norm": 0.023847704753279686, "learning_rate": 4.993377265766723e-06, "loss": 0.07874070107936859, "step": 175 }, { "epoch": 0.2928148071124051, "grad_norm": 0.02083473838865757, "learning_rate": 4.993125109268784e-06, "loss": 0.09188570082187653, "step": 176 }, { "epoch": 0.294478527607362, "grad_norm": 0.023864734917879105, "learning_rate": 4.992868248293077e-06, "loss": 0.08353804051876068, "step": 177 }, { "epoch": 0.2961422481023188, "grad_norm": 0.01996394246816635, "learning_rate": 4.9926066833242926e-06, "loss": 0.07662493735551834, "step": 178 }, { "epoch": 0.29780596859727565, "grad_norm": 0.01867249421775341, "learning_rate": 4.9923404148559995e-06, "loss": 0.06500700116157532, "step": 179 }, { "epoch": 0.2994696890922325, "grad_norm": 0.020658332854509354, "learning_rate": 4.992069443390641e-06, "loss": 0.08731688559055328, "step": 180 }, { "epoch": 0.30113340958718937, "grad_norm": 0.019181935116648674, "learning_rate": 4.991793769439534e-06, "loss": 0.06426648795604706, "step": 181 }, { "epoch": 0.3027971300821462, "grad_norm": 0.02977469190955162, "learning_rate": 4.991513393522871e-06, "loss": 0.07214197516441345, "step": 182 }, { "epoch": 0.30446085057710304, "grad_norm": 0.01673358865082264, "learning_rate": 4.991228316169715e-06, "loss": 0.07281684875488281, "step": 183 }, { "epoch": 0.30612457107205987, "grad_norm": 0.01972797140479088, "learning_rate": 4.990938537918001e-06, "loss": 0.06969514489173889, "step": 184 }, { "epoch": 0.30778829156701676, "grad_norm": 0.026684733107686043, "learning_rate": 4.990644059314536e-06, "loss": 0.07704263925552368, "step": 185 }, { "epoch": 0.3094520120619736, "grad_norm": 0.01813516579568386, "learning_rate": 4.990344880914994e-06, "loss": 0.06715594232082367, "step": 186 }, { "epoch": 0.3111157325569304, "grad_norm": 0.021501829847693443, "learning_rate": 4.990041003283921e-06, "loss": 0.06312450766563416, "step": 187 }, { "epoch": 0.31277945305188726, "grad_norm": 0.023688537999987602, "learning_rate": 4.989732426994725e-06, "loss": 0.07297050952911377, "step": 188 }, { "epoch": 0.31444317354684415, "grad_norm": 0.020870845764875412, "learning_rate": 4.989419152629685e-06, "loss": 0.05973198264837265, "step": 189 }, { "epoch": 0.316106894041801, "grad_norm": 0.031512096524238586, "learning_rate": 4.9891011807799435e-06, "loss": 0.06264424324035645, "step": 190 }, { "epoch": 0.3177706145367578, "grad_norm": 0.013584833592176437, "learning_rate": 4.988778512045507e-06, "loss": 0.05872301757335663, "step": 191 }, { "epoch": 0.31943433503171464, "grad_norm": 0.022901279851794243, "learning_rate": 4.9884511470352456e-06, "loss": 0.058490075170993805, "step": 192 }, { "epoch": 0.32109805552667153, "grad_norm": 0.023982392624020576, "learning_rate": 4.9881190863668895e-06, "loss": 0.05945117026567459, "step": 193 }, { "epoch": 0.32276177602162837, "grad_norm": 0.014951342716813087, "learning_rate": 4.98778233066703e-06, "loss": 0.04956051707267761, "step": 194 }, { "epoch": 0.3244254965165852, "grad_norm": 0.034372977912425995, "learning_rate": 4.987440880571121e-06, "loss": 0.06177012622356415, "step": 195 }, { "epoch": 0.3260892170115421, "grad_norm": 0.01750808022916317, "learning_rate": 4.98709473672347e-06, "loss": 0.04706805199384689, "step": 196 }, { "epoch": 0.3277529375064989, "grad_norm": 0.017525361850857735, "learning_rate": 4.986743899777244e-06, "loss": 0.05709604173898697, "step": 197 }, { "epoch": 0.32941665800145575, "grad_norm": 0.026951247826218605, "learning_rate": 4.986388370394466e-06, "loss": 0.05336014926433563, "step": 198 }, { "epoch": 0.3310803784964126, "grad_norm": 0.019481414929032326, "learning_rate": 4.986028149246013e-06, "loss": 0.04981919378042221, "step": 199 }, { "epoch": 0.3327440989913695, "grad_norm": 0.017069054767489433, "learning_rate": 4.985663237011614e-06, "loss": 0.04121176898479462, "step": 200 }, { "epoch": 0.3327440989913695, "eval_loss": 0.04646755009889603, "eval_runtime": 402.3664, "eval_samples_per_second": 11.949, "eval_steps_per_second": 2.987, "step": 200 }, { "epoch": 0.3344078194863263, "grad_norm": 0.01519837137311697, "learning_rate": 4.985293634379852e-06, "loss": 0.05122241750359535, "step": 201 }, { "epoch": 0.33607153998128314, "grad_norm": 0.018817614763975143, "learning_rate": 4.984919342048159e-06, "loss": 0.052712492644786835, "step": 202 }, { "epoch": 0.33773526047624, "grad_norm": 0.015790430828928947, "learning_rate": 4.984540360722819e-06, "loss": 0.041432932019233704, "step": 203 }, { "epoch": 0.33939898097119686, "grad_norm": 0.013813985511660576, "learning_rate": 4.98415669111896e-06, "loss": 0.044672828167676926, "step": 204 }, { "epoch": 0.3410627014661537, "grad_norm": 0.018357550725340843, "learning_rate": 4.9837683339605615e-06, "loss": 0.05308464914560318, "step": 205 }, { "epoch": 0.34272642196111053, "grad_norm": 0.013275988399982452, "learning_rate": 4.983375289980443e-06, "loss": 0.05014698952436447, "step": 206 }, { "epoch": 0.34439014245606736, "grad_norm": 0.014597652480006218, "learning_rate": 4.982977559920273e-06, "loss": 0.04254170134663582, "step": 207 }, { "epoch": 0.34605386295102425, "grad_norm": 0.014993159100413322, "learning_rate": 4.982575144530559e-06, "loss": 0.040170349180698395, "step": 208 }, { "epoch": 0.3477175834459811, "grad_norm": 0.011311318725347519, "learning_rate": 4.982168044570652e-06, "loss": 0.04351898282766342, "step": 209 }, { "epoch": 0.3493813039409379, "grad_norm": 0.01357428077608347, "learning_rate": 4.981756260808741e-06, "loss": 0.045017972588539124, "step": 210 }, { "epoch": 0.35104502443589475, "grad_norm": 0.011063991114497185, "learning_rate": 4.981339794021853e-06, "loss": 0.036168403923511505, "step": 211 }, { "epoch": 0.35270874493085164, "grad_norm": 0.015618620440363884, "learning_rate": 4.9809186449958536e-06, "loss": 0.03904163837432861, "step": 212 }, { "epoch": 0.35437246542580847, "grad_norm": 0.010496679693460464, "learning_rate": 4.980492814525442e-06, "loss": 0.026133833453059196, "step": 213 }, { "epoch": 0.3560361859207653, "grad_norm": 0.013019095174968243, "learning_rate": 4.980062303414152e-06, "loss": 0.03268534690141678, "step": 214 }, { "epoch": 0.35769990641572214, "grad_norm": 0.013511061668395996, "learning_rate": 4.97962711247435e-06, "loss": 0.03932594135403633, "step": 215 }, { "epoch": 0.359363626910679, "grad_norm": 0.013383673503994942, "learning_rate": 4.979187242527233e-06, "loss": 0.0299623254686594, "step": 216 }, { "epoch": 0.36102734740563586, "grad_norm": 0.030445793643593788, "learning_rate": 4.978742694402825e-06, "loss": 0.03553349897265434, "step": 217 }, { "epoch": 0.3626910679005927, "grad_norm": 0.013040585443377495, "learning_rate": 4.978293468939982e-06, "loss": 0.035122938454151154, "step": 218 }, { "epoch": 0.3643547883955495, "grad_norm": 0.015449252910912037, "learning_rate": 4.977839566986382e-06, "loss": 0.0295286662876606, "step": 219 }, { "epoch": 0.3660185088905064, "grad_norm": 0.026898793876171112, "learning_rate": 4.977380989398529e-06, "loss": 0.047140851616859436, "step": 220 }, { "epoch": 0.36768222938546324, "grad_norm": 0.018636824563145638, "learning_rate": 4.976917737041751e-06, "loss": 0.03673876076936722, "step": 221 }, { "epoch": 0.3693459498804201, "grad_norm": 0.0145517997443676, "learning_rate": 4.976449810790196e-06, "loss": 0.037538688629865646, "step": 222 }, { "epoch": 0.3710096703753769, "grad_norm": 0.012170245870947838, "learning_rate": 4.97597721152683e-06, "loss": 0.02781190723180771, "step": 223 }, { "epoch": 0.3726733908703338, "grad_norm": 0.01407319400459528, "learning_rate": 4.975499940143439e-06, "loss": 0.03755120187997818, "step": 224 }, { "epoch": 0.37433711136529063, "grad_norm": 0.012709653936326504, "learning_rate": 4.975017997540625e-06, "loss": 0.04252926632761955, "step": 225 }, { "epoch": 0.37600083186024746, "grad_norm": 0.013191037811338902, "learning_rate": 4.974531384627805e-06, "loss": 0.030559688806533813, "step": 226 }, { "epoch": 0.37766455235520435, "grad_norm": 0.018450967967510223, "learning_rate": 4.974040102323207e-06, "loss": 0.030250580981373787, "step": 227 }, { "epoch": 0.3793282728501612, "grad_norm": 0.009675616398453712, "learning_rate": 4.973544151553869e-06, "loss": 0.022407352924346924, "step": 228 }, { "epoch": 0.380991993345118, "grad_norm": 0.01245384756475687, "learning_rate": 4.973043533255645e-06, "loss": 0.027813564985990524, "step": 229 }, { "epoch": 0.38265571384007485, "grad_norm": 0.013367368839681149, "learning_rate": 4.972538248373188e-06, "loss": 0.04025476798415184, "step": 230 }, { "epoch": 0.38431943433503174, "grad_norm": 0.011741326190531254, "learning_rate": 4.9720282978599625e-06, "loss": 0.030507639050483704, "step": 231 }, { "epoch": 0.3859831548299886, "grad_norm": 0.009616893716156483, "learning_rate": 4.971513682678234e-06, "loss": 0.030176818370819092, "step": 232 }, { "epoch": 0.3876468753249454, "grad_norm": 0.009078212082386017, "learning_rate": 4.970994403799072e-06, "loss": 0.021030820906162262, "step": 233 }, { "epoch": 0.38931059581990224, "grad_norm": 0.008414865471422672, "learning_rate": 4.970470462202343e-06, "loss": 0.022331478074193, "step": 234 }, { "epoch": 0.3909743163148591, "grad_norm": 0.010737700387835503, "learning_rate": 4.969941858876719e-06, "loss": 0.026705848053097725, "step": 235 }, { "epoch": 0.39263803680981596, "grad_norm": 0.009779262356460094, "learning_rate": 4.96940859481966e-06, "loss": 0.02494858205318451, "step": 236 }, { "epoch": 0.3943017573047728, "grad_norm": 0.012061214074492455, "learning_rate": 4.968870671037427e-06, "loss": 0.02954268828034401, "step": 237 }, { "epoch": 0.3959654777997296, "grad_norm": 0.011517210863530636, "learning_rate": 4.96832808854507e-06, "loss": 0.025342591106891632, "step": 238 }, { "epoch": 0.3976291982946865, "grad_norm": 0.009201574139297009, "learning_rate": 4.967780848366432e-06, "loss": 0.026798926293849945, "step": 239 }, { "epoch": 0.39929291878964335, "grad_norm": 0.015082907862961292, "learning_rate": 4.967228951534144e-06, "loss": 0.02584957890212536, "step": 240 }, { "epoch": 0.4009566392846002, "grad_norm": 0.017343197017908096, "learning_rate": 4.966672399089626e-06, "loss": 0.030483413487672806, "step": 241 }, { "epoch": 0.402620359779557, "grad_norm": 0.00961192324757576, "learning_rate": 4.966111192083081e-06, "loss": 0.029386430978775024, "step": 242 }, { "epoch": 0.4042840802745139, "grad_norm": 0.011751300655305386, "learning_rate": 4.965545331573493e-06, "loss": 0.02346094697713852, "step": 243 }, { "epoch": 0.40594780076947073, "grad_norm": 0.013170548714697361, "learning_rate": 4.964974818628633e-06, "loss": 0.02498645707964897, "step": 244 }, { "epoch": 0.40761152126442757, "grad_norm": 0.008634310215711594, "learning_rate": 4.964399654325045e-06, "loss": 0.02685185708105564, "step": 245 }, { "epoch": 0.4092752417593844, "grad_norm": 0.010694021359086037, "learning_rate": 4.963819839748055e-06, "loss": 0.028527498245239258, "step": 246 }, { "epoch": 0.4109389622543413, "grad_norm": 0.01221507042646408, "learning_rate": 4.96323537599176e-06, "loss": 0.0299068596214056, "step": 247 }, { "epoch": 0.4126026827492981, "grad_norm": 0.009258517995476723, "learning_rate": 4.962646264159031e-06, "loss": 0.026110809296369553, "step": 248 }, { "epoch": 0.41426640324425495, "grad_norm": 0.009516320191323757, "learning_rate": 4.962052505361512e-06, "loss": 0.03480090573430061, "step": 249 }, { "epoch": 0.4159301237392118, "grad_norm": 0.009755507111549377, "learning_rate": 4.9614541007196136e-06, "loss": 0.02209116891026497, "step": 250 }, { "epoch": 0.4175938442341687, "grad_norm": 0.009852356277406216, "learning_rate": 4.960851051362514e-06, "loss": 0.01930748112499714, "step": 251 }, { "epoch": 0.4192575647291255, "grad_norm": 0.011041955091059208, "learning_rate": 4.960243358428154e-06, "loss": 0.029225628823041916, "step": 252 }, { "epoch": 0.42092128522408234, "grad_norm": 0.00869595818221569, "learning_rate": 4.959631023063238e-06, "loss": 0.016868792474269867, "step": 253 }, { "epoch": 0.4225850057190392, "grad_norm": 0.016593001782894135, "learning_rate": 4.959014046423233e-06, "loss": 0.02676609717309475, "step": 254 }, { "epoch": 0.42424872621399606, "grad_norm": 0.00895164255052805, "learning_rate": 4.9583924296723606e-06, "loss": 0.016503386199474335, "step": 255 }, { "epoch": 0.4259124467089529, "grad_norm": 0.009723796509206295, "learning_rate": 4.957766173983598e-06, "loss": 0.025092676281929016, "step": 256 }, { "epoch": 0.42757616720390973, "grad_norm": 0.011724260635674, "learning_rate": 4.9571352805386795e-06, "loss": 0.018208272755146027, "step": 257 }, { "epoch": 0.4292398876988666, "grad_norm": 0.016934333369135857, "learning_rate": 4.956499750528086e-06, "loss": 0.04804690182209015, "step": 258 }, { "epoch": 0.43090360819382345, "grad_norm": 0.009145854972302914, "learning_rate": 4.955859585151054e-06, "loss": 0.017941124737262726, "step": 259 }, { "epoch": 0.4325673286887803, "grad_norm": 0.010602661408483982, "learning_rate": 4.955214785615558e-06, "loss": 0.027120577171444893, "step": 260 }, { "epoch": 0.4342310491837371, "grad_norm": 0.007691461127251387, "learning_rate": 4.9545653531383255e-06, "loss": 0.017808586359024048, "step": 261 }, { "epoch": 0.435894769678694, "grad_norm": 0.007216070778667927, "learning_rate": 4.953911288944821e-06, "loss": 0.02255088835954666, "step": 262 }, { "epoch": 0.43755849017365084, "grad_norm": 0.010287751443684101, "learning_rate": 4.953252594269252e-06, "loss": 0.02571818232536316, "step": 263 }, { "epoch": 0.43922221066860767, "grad_norm": 0.012414435856044292, "learning_rate": 4.9525892703545604e-06, "loss": 0.03532583266496658, "step": 264 }, { "epoch": 0.4408859311635645, "grad_norm": 0.007705547381192446, "learning_rate": 4.951921318452428e-06, "loss": 0.0289824940264225, "step": 265 }, { "epoch": 0.4425496516585214, "grad_norm": 0.009268342517316341, "learning_rate": 4.951248739823264e-06, "loss": 0.020963333547115326, "step": 266 }, { "epoch": 0.4442133721534782, "grad_norm": 0.009462636895477772, "learning_rate": 4.950571535736214e-06, "loss": 0.02270900085568428, "step": 267 }, { "epoch": 0.44587709264843506, "grad_norm": 0.009764865040779114, "learning_rate": 4.949889707469145e-06, "loss": 0.020391490310430527, "step": 268 }, { "epoch": 0.4475408131433919, "grad_norm": 0.013523961417376995, "learning_rate": 4.949203256308658e-06, "loss": 0.021415244787931442, "step": 269 }, { "epoch": 0.4492045336383488, "grad_norm": 0.014478780329227448, "learning_rate": 4.948512183550068e-06, "loss": 0.027904432266950607, "step": 270 }, { "epoch": 0.4508682541333056, "grad_norm": 0.0173338670283556, "learning_rate": 4.947816490497419e-06, "loss": 0.01066598016768694, "step": 271 }, { "epoch": 0.45253197462826245, "grad_norm": 0.01282422710210085, "learning_rate": 4.947116178463469e-06, "loss": 0.027157828211784363, "step": 272 }, { "epoch": 0.4541956951232193, "grad_norm": 0.012170843780040741, "learning_rate": 4.946411248769693e-06, "loss": 0.031752899289131165, "step": 273 }, { "epoch": 0.45585941561817617, "grad_norm": 0.011489557102322578, "learning_rate": 4.945701702746279e-06, "loss": 0.0212496779859066, "step": 274 }, { "epoch": 0.457523136113133, "grad_norm": 0.017554108053445816, "learning_rate": 4.944987541732126e-06, "loss": 0.024693092331290245, "step": 275 }, { "epoch": 0.45918685660808983, "grad_norm": 0.015555243007838726, "learning_rate": 4.944268767074842e-06, "loss": 0.020125292241573334, "step": 276 }, { "epoch": 0.46085057710304667, "grad_norm": 0.011039176024496555, "learning_rate": 4.943545380130742e-06, "loss": 0.024579813703894615, "step": 277 }, { "epoch": 0.46251429759800355, "grad_norm": 0.011976576410233974, "learning_rate": 4.942817382264842e-06, "loss": 0.014248369261622429, "step": 278 }, { "epoch": 0.4641780180929604, "grad_norm": 0.015246872790157795, "learning_rate": 4.942084774850858e-06, "loss": 0.027225427329540253, "step": 279 }, { "epoch": 0.4658417385879172, "grad_norm": 0.013577229343354702, "learning_rate": 4.941347559271208e-06, "loss": 0.024211157113313675, "step": 280 }, { "epoch": 0.46750545908287405, "grad_norm": 0.013304123654961586, "learning_rate": 4.9406057369170015e-06, "loss": 0.020554721355438232, "step": 281 }, { "epoch": 0.46916917957783094, "grad_norm": 0.019589155912399292, "learning_rate": 4.939859309188044e-06, "loss": 0.027771135792136192, "step": 282 }, { "epoch": 0.4708329000727878, "grad_norm": 0.01041374821215868, "learning_rate": 4.939108277492829e-06, "loss": 0.020846642553806305, "step": 283 }, { "epoch": 0.4724966205677446, "grad_norm": 0.013426681980490685, "learning_rate": 4.9383526432485375e-06, "loss": 0.02354995533823967, "step": 284 }, { "epoch": 0.47416034106270144, "grad_norm": 0.012636139057576656, "learning_rate": 4.937592407881039e-06, "loss": 0.01683727838099003, "step": 285 }, { "epoch": 0.47582406155765833, "grad_norm": 0.0078094336204230785, "learning_rate": 4.93682757282488e-06, "loss": 0.02656428888440132, "step": 286 }, { "epoch": 0.47748778205261516, "grad_norm": 0.014173673465847969, "learning_rate": 4.936058139523291e-06, "loss": 0.02244233526289463, "step": 287 }, { "epoch": 0.479151502547572, "grad_norm": 0.013154593296349049, "learning_rate": 4.935284109428177e-06, "loss": 0.014531275257468224, "step": 288 }, { "epoch": 0.4808152230425288, "grad_norm": 0.0084519749507308, "learning_rate": 4.934505484000116e-06, "loss": 0.013687445782124996, "step": 289 }, { "epoch": 0.4824789435374857, "grad_norm": 0.008050220087170601, "learning_rate": 4.93372226470836e-06, "loss": 0.022462567314505577, "step": 290 }, { "epoch": 0.48414266403244255, "grad_norm": 0.008466185070574284, "learning_rate": 4.932934453030829e-06, "loss": 0.01815696805715561, "step": 291 }, { "epoch": 0.4858063845273994, "grad_norm": 0.01022345945239067, "learning_rate": 4.932142050454107e-06, "loss": 0.0166168250143528, "step": 292 }, { "epoch": 0.48747010502235627, "grad_norm": 0.010604492388665676, "learning_rate": 4.931345058473443e-06, "loss": 0.01394139975309372, "step": 293 }, { "epoch": 0.4891338255173131, "grad_norm": 0.007332377601414919, "learning_rate": 4.930543478592743e-06, "loss": 0.017569545656442642, "step": 294 }, { "epoch": 0.49079754601226994, "grad_norm": 0.010255846194922924, "learning_rate": 4.929737312324574e-06, "loss": 0.019131770357489586, "step": 295 }, { "epoch": 0.49246126650722677, "grad_norm": 0.007818172685801983, "learning_rate": 4.928926561190155e-06, "loss": 0.01540689542889595, "step": 296 }, { "epoch": 0.49412498700218366, "grad_norm": 0.01050427183508873, "learning_rate": 4.928111226719359e-06, "loss": 0.018683919683098793, "step": 297 }, { "epoch": 0.4957887074971405, "grad_norm": 0.007114852778613567, "learning_rate": 4.927291310450705e-06, "loss": 0.01786038652062416, "step": 298 }, { "epoch": 0.4974524279920973, "grad_norm": 0.008571256883442402, "learning_rate": 4.926466813931358e-06, "loss": 0.018031764775514603, "step": 299 }, { "epoch": 0.49911614848705416, "grad_norm": 0.009803226217627525, "learning_rate": 4.925637738717127e-06, "loss": 0.020492907613515854, "step": 300 }, { "epoch": 0.49911614848705416, "eval_loss": 0.016338517889380455, "eval_runtime": 403.1488, "eval_samples_per_second": 11.926, "eval_steps_per_second": 2.982, "step": 300 }, { "epoch": 0.500779868982011, "grad_norm": 0.005585647653788328, "learning_rate": 4.924804086372462e-06, "loss": 0.013384521938860416, "step": 301 }, { "epoch": 0.5024435894769679, "grad_norm": 0.006244071293622255, "learning_rate": 4.9239658584704466e-06, "loss": 0.015450171194970608, "step": 302 }, { "epoch": 0.5041073099719248, "grad_norm": 0.006051548756659031, "learning_rate": 4.923123056592801e-06, "loss": 0.00981193408370018, "step": 303 }, { "epoch": 0.5057710304668815, "grad_norm": 0.0061712805181741714, "learning_rate": 4.922275682329876e-06, "loss": 0.015286393463611603, "step": 304 }, { "epoch": 0.5074347509618384, "grad_norm": 0.009269554167985916, "learning_rate": 4.921423737280649e-06, "loss": 0.014119069091975689, "step": 305 }, { "epoch": 0.5090984714567952, "grad_norm": 0.00824077520519495, "learning_rate": 4.9205672230527254e-06, "loss": 0.011779182590544224, "step": 306 }, { "epoch": 0.5107621919517521, "grad_norm": 0.007063789293169975, "learning_rate": 4.919706141262329e-06, "loss": 0.011469569057226181, "step": 307 }, { "epoch": 0.512425912446709, "grad_norm": 0.009946978650987148, "learning_rate": 4.918840493534305e-06, "loss": 0.020134299993515015, "step": 308 }, { "epoch": 0.5140896329416658, "grad_norm": 0.007792671676725149, "learning_rate": 4.917970281502112e-06, "loss": 0.014403749257326126, "step": 309 }, { "epoch": 0.5157533534366227, "grad_norm": 0.010617299936711788, "learning_rate": 4.917095506807824e-06, "loss": 0.015932071954011917, "step": 310 }, { "epoch": 0.5174170739315795, "grad_norm": 0.008703233674168587, "learning_rate": 4.916216171102124e-06, "loss": 0.022535301744937897, "step": 311 }, { "epoch": 0.5190807944265363, "grad_norm": 0.006629075389355421, "learning_rate": 4.9153322760443015e-06, "loss": 0.013775508850812912, "step": 312 }, { "epoch": 0.5207445149214932, "grad_norm": 0.007406014949083328, "learning_rate": 4.914443823302246e-06, "loss": 0.01799451746046543, "step": 313 }, { "epoch": 0.52240823541645, "grad_norm": 0.00845777615904808, "learning_rate": 4.913550814552454e-06, "loss": 0.018990565091371536, "step": 314 }, { "epoch": 0.5240719559114069, "grad_norm": 0.007644719909876585, "learning_rate": 4.912653251480013e-06, "loss": 0.016380393877625465, "step": 315 }, { "epoch": 0.5257356764063638, "grad_norm": 0.005281711462885141, "learning_rate": 4.9117511357786075e-06, "loss": 0.011785021051764488, "step": 316 }, { "epoch": 0.5273993969013205, "grad_norm": 0.004629249218851328, "learning_rate": 4.910844469150512e-06, "loss": 0.014201750978827477, "step": 317 }, { "epoch": 0.5290631173962774, "grad_norm": 0.006540986709296703, "learning_rate": 4.909933253306588e-06, "loss": 0.016089923679828644, "step": 318 }, { "epoch": 0.5307268378912343, "grad_norm": 0.007417282089591026, "learning_rate": 4.909017489966283e-06, "loss": 0.015274600125849247, "step": 319 }, { "epoch": 0.5323905583861911, "grad_norm": 0.006533796899020672, "learning_rate": 4.9080971808576226e-06, "loss": 0.014022988267242908, "step": 320 }, { "epoch": 0.534054278881148, "grad_norm": 0.005975611042231321, "learning_rate": 4.907172327717214e-06, "loss": 0.009007789194583893, "step": 321 }, { "epoch": 0.5357179993761049, "grad_norm": 0.0060996185056865215, "learning_rate": 4.906242932290234e-06, "loss": 0.013682247139513493, "step": 322 }, { "epoch": 0.5373817198710616, "grad_norm": 0.004868973977863789, "learning_rate": 4.905308996330437e-06, "loss": 0.013650172390043736, "step": 323 }, { "epoch": 0.5390454403660185, "grad_norm": 0.005156499799340963, "learning_rate": 4.904370521600138e-06, "loss": 0.008532417006790638, "step": 324 }, { "epoch": 0.5407091608609753, "grad_norm": 0.007405860815197229, "learning_rate": 4.903427509870222e-06, "loss": 0.01490192674100399, "step": 325 }, { "epoch": 0.5423728813559322, "grad_norm": 0.009611138142645359, "learning_rate": 4.902479962920134e-06, "loss": 0.012371081858873367, "step": 326 }, { "epoch": 0.5440366018508891, "grad_norm": 0.011518461629748344, "learning_rate": 4.901527882537876e-06, "loss": 0.014121288433670998, "step": 327 }, { "epoch": 0.5457003223458459, "grad_norm": 0.006337135564535856, "learning_rate": 4.900571270520004e-06, "loss": 0.01352706365287304, "step": 328 }, { "epoch": 0.5473640428408028, "grad_norm": 0.006071313749998808, "learning_rate": 4.899610128671626e-06, "loss": 0.012974245473742485, "step": 329 }, { "epoch": 0.5490277633357596, "grad_norm": 0.007717214059084654, "learning_rate": 4.898644458806398e-06, "loss": 0.010514363646507263, "step": 330 }, { "epoch": 0.5506914838307164, "grad_norm": 0.011666782200336456, "learning_rate": 4.897674262746522e-06, "loss": 0.01974363438785076, "step": 331 }, { "epoch": 0.5523552043256733, "grad_norm": 0.006378008518368006, "learning_rate": 4.896699542322736e-06, "loss": 0.01286529004573822, "step": 332 }, { "epoch": 0.5540189248206301, "grad_norm": 0.018436580896377563, "learning_rate": 4.895720299374319e-06, "loss": 0.011777098290622234, "step": 333 }, { "epoch": 0.555682645315587, "grad_norm": 0.008302860893309116, "learning_rate": 4.894736535749083e-06, "loss": 0.014747174456715584, "step": 334 }, { "epoch": 0.5573463658105439, "grad_norm": 0.02435437962412834, "learning_rate": 4.89374825330337e-06, "loss": 0.02497844025492668, "step": 335 }, { "epoch": 0.5590100863055006, "grad_norm": 0.007081752642989159, "learning_rate": 4.892755453902051e-06, "loss": 0.016742464154958725, "step": 336 }, { "epoch": 0.5606738068004575, "grad_norm": 0.007361928932368755, "learning_rate": 4.8917581394185175e-06, "loss": 0.013661217875778675, "step": 337 }, { "epoch": 0.5623375272954144, "grad_norm": 0.008742809295654297, "learning_rate": 4.890756311734683e-06, "loss": 0.014807759784162045, "step": 338 }, { "epoch": 0.5640012477903712, "grad_norm": 0.0050587523728609085, "learning_rate": 4.8897499727409755e-06, "loss": 0.013927068561315536, "step": 339 }, { "epoch": 0.5656649682853281, "grad_norm": 0.008532224223017693, "learning_rate": 4.888739124336338e-06, "loss": 0.012824540957808495, "step": 340 }, { "epoch": 0.5673286887802849, "grad_norm": 0.014145714230835438, "learning_rate": 4.8877237684282205e-06, "loss": 0.019548412412405014, "step": 341 }, { "epoch": 0.5689924092752418, "grad_norm": 0.00613539619371295, "learning_rate": 4.8867039069325804e-06, "loss": 0.011379221454262733, "step": 342 }, { "epoch": 0.5706561297701986, "grad_norm": 0.006859153043478727, "learning_rate": 4.8856795417738754e-06, "loss": 0.013419722206890583, "step": 343 }, { "epoch": 0.5723198502651554, "grad_norm": 0.006972996052354574, "learning_rate": 4.884650674885062e-06, "loss": 0.017972735688090324, "step": 344 }, { "epoch": 0.5739835707601123, "grad_norm": 0.007068189792335033, "learning_rate": 4.883617308207592e-06, "loss": 0.013157499022781849, "step": 345 }, { "epoch": 0.5756472912550692, "grad_norm": 0.00740961916744709, "learning_rate": 4.88257944369141e-06, "loss": 0.011037548072636127, "step": 346 }, { "epoch": 0.577311011750026, "grad_norm": 0.008075796999037266, "learning_rate": 4.8815370832949425e-06, "loss": 0.01143650058656931, "step": 347 }, { "epoch": 0.5789747322449829, "grad_norm": 0.005925778299570084, "learning_rate": 4.880490228985104e-06, "loss": 0.014801234006881714, "step": 348 }, { "epoch": 0.5806384527399396, "grad_norm": 0.005958295427262783, "learning_rate": 4.8794388827372884e-06, "loss": 0.018268218263983727, "step": 349 }, { "epoch": 0.5823021732348965, "grad_norm": 0.005769642069935799, "learning_rate": 4.878383046535366e-06, "loss": 0.015677250921726227, "step": 350 }, { "epoch": 0.5839658937298534, "grad_norm": 0.003173237666487694, "learning_rate": 4.877322722371677e-06, "loss": 0.007178253494203091, "step": 351 }, { "epoch": 0.5856296142248102, "grad_norm": 0.005019443575292826, "learning_rate": 4.876257912247033e-06, "loss": 0.009780826047062874, "step": 352 }, { "epoch": 0.5872933347197671, "grad_norm": 0.0037745118606835604, "learning_rate": 4.8751886181707105e-06, "loss": 0.010863647796213627, "step": 353 }, { "epoch": 0.588957055214724, "grad_norm": 0.0066299536265432835, "learning_rate": 4.874114842160445e-06, "loss": 0.014221194200217724, "step": 354 }, { "epoch": 0.5906207757096807, "grad_norm": 0.005365578457713127, "learning_rate": 4.873036586242431e-06, "loss": 0.018708698451519012, "step": 355 }, { "epoch": 0.5922844962046376, "grad_norm": 0.00685572624206543, "learning_rate": 4.871953852451316e-06, "loss": 0.014886384829878807, "step": 356 }, { "epoch": 0.5939482166995945, "grad_norm": 0.006036871578544378, "learning_rate": 4.8708666428301975e-06, "loss": 0.015632428228855133, "step": 357 }, { "epoch": 0.5956119371945513, "grad_norm": 0.005431063938885927, "learning_rate": 4.869774959430619e-06, "loss": 0.010408837348222733, "step": 358 }, { "epoch": 0.5972756576895082, "grad_norm": 0.0059727029874920845, "learning_rate": 4.868678804312565e-06, "loss": 0.018925290554761887, "step": 359 }, { "epoch": 0.598939378184465, "grad_norm": 0.004846646916121244, "learning_rate": 4.867578179544457e-06, "loss": 0.010519372299313545, "step": 360 }, { "epoch": 0.6006030986794219, "grad_norm": 0.005863612052053213, "learning_rate": 4.866473087203154e-06, "loss": 0.018922332674264908, "step": 361 }, { "epoch": 0.6022668191743787, "grad_norm": 0.004887004382908344, "learning_rate": 4.865363529373944e-06, "loss": 0.008048690855503082, "step": 362 }, { "epoch": 0.6039305396693355, "grad_norm": 0.003912640269845724, "learning_rate": 4.864249508150539e-06, "loss": 0.008222410455346107, "step": 363 }, { "epoch": 0.6055942601642924, "grad_norm": 0.005772759206593037, "learning_rate": 4.863131025635076e-06, "loss": 0.01382957212626934, "step": 364 }, { "epoch": 0.6072579806592493, "grad_norm": 0.006792543921619654, "learning_rate": 4.862008083938109e-06, "loss": 0.01627694070339203, "step": 365 }, { "epoch": 0.6089217011542061, "grad_norm": 0.004902017302811146, "learning_rate": 4.8608806851786075e-06, "loss": 0.009956824593245983, "step": 366 }, { "epoch": 0.610585421649163, "grad_norm": 0.006868287455290556, "learning_rate": 4.859748831483949e-06, "loss": 0.008180022239685059, "step": 367 }, { "epoch": 0.6122491421441197, "grad_norm": 0.008069746196269989, "learning_rate": 4.858612524989921e-06, "loss": 0.01568032242357731, "step": 368 }, { "epoch": 0.6139128626390766, "grad_norm": 0.005045538302510977, "learning_rate": 4.857471767840709e-06, "loss": 0.008955922909080982, "step": 369 }, { "epoch": 0.6155765831340335, "grad_norm": 0.005875969305634499, "learning_rate": 4.856326562188902e-06, "loss": 0.01971004158258438, "step": 370 }, { "epoch": 0.6172403036289903, "grad_norm": 0.005288930144160986, "learning_rate": 4.855176910195479e-06, "loss": 0.010098019614815712, "step": 371 }, { "epoch": 0.6189040241239472, "grad_norm": 0.00646026199683547, "learning_rate": 4.854022814029809e-06, "loss": 0.014291519299149513, "step": 372 }, { "epoch": 0.6205677446189041, "grad_norm": 0.004560984671115875, "learning_rate": 4.852864275869652e-06, "loss": 0.012375302612781525, "step": 373 }, { "epoch": 0.6222314651138608, "grad_norm": 0.007243362721055746, "learning_rate": 4.851701297901144e-06, "loss": 0.012917522341012955, "step": 374 }, { "epoch": 0.6238951856088177, "grad_norm": 0.006706541404128075, "learning_rate": 4.850533882318803e-06, "loss": 0.011306913569569588, "step": 375 }, { "epoch": 0.6255589061037745, "grad_norm": 0.005922200623899698, "learning_rate": 4.849362031325518e-06, "loss": 0.009204890578985214, "step": 376 }, { "epoch": 0.6272226265987314, "grad_norm": 0.005413861479610205, "learning_rate": 4.8481857471325485e-06, "loss": 0.010358748957514763, "step": 377 }, { "epoch": 0.6288863470936883, "grad_norm": 0.005771263036876917, "learning_rate": 4.847005031959521e-06, "loss": 0.007761461194604635, "step": 378 }, { "epoch": 0.6305500675886451, "grad_norm": 0.007504676003009081, "learning_rate": 4.84581988803442e-06, "loss": 0.0132828364148736, "step": 379 }, { "epoch": 0.632213788083602, "grad_norm": 0.007642901036888361, "learning_rate": 4.84463031759359e-06, "loss": 0.008316827937960625, "step": 380 }, { "epoch": 0.6338775085785588, "grad_norm": 0.004544087685644627, "learning_rate": 4.843436322881725e-06, "loss": 0.00889534130692482, "step": 381 }, { "epoch": 0.6355412290735156, "grad_norm": 0.007763517554849386, "learning_rate": 4.8422379061518705e-06, "loss": 0.010410662740468979, "step": 382 }, { "epoch": 0.6372049495684725, "grad_norm": 0.004702461417764425, "learning_rate": 4.841035069665416e-06, "loss": 0.012421442195773125, "step": 383 }, { "epoch": 0.6388686700634293, "grad_norm": 0.004728985484689474, "learning_rate": 4.83982781569209e-06, "loss": 0.009557276964187622, "step": 384 }, { "epoch": 0.6405323905583862, "grad_norm": 0.006002399139106274, "learning_rate": 4.838616146509956e-06, "loss": 0.008976309560239315, "step": 385 }, { "epoch": 0.6421961110533431, "grad_norm": 0.0073576439172029495, "learning_rate": 4.83740006440541e-06, "loss": 0.01807243376970291, "step": 386 }, { "epoch": 0.6438598315482998, "grad_norm": 0.004097748547792435, "learning_rate": 4.8361795716731744e-06, "loss": 0.009704215452075005, "step": 387 }, { "epoch": 0.6455235520432567, "grad_norm": 0.0038165440782904625, "learning_rate": 4.8349546706162965e-06, "loss": 0.0072257500141859055, "step": 388 }, { "epoch": 0.6471872725382136, "grad_norm": 0.007018840406090021, "learning_rate": 4.833725363546139e-06, "loss": 0.008374448865652084, "step": 389 }, { "epoch": 0.6488509930331704, "grad_norm": 0.006301086861640215, "learning_rate": 4.8324916527823795e-06, "loss": 0.022184694185853004, "step": 390 }, { "epoch": 0.6505147135281273, "grad_norm": 0.004370077047497034, "learning_rate": 4.831253540653007e-06, "loss": 0.01324211061000824, "step": 391 }, { "epoch": 0.6521784340230842, "grad_norm": 0.0047884974628686905, "learning_rate": 4.8300110294943145e-06, "loss": 0.011861605569720268, "step": 392 }, { "epoch": 0.653842154518041, "grad_norm": 0.0038174018263816833, "learning_rate": 4.828764121650896e-06, "loss": 0.006326655857264996, "step": 393 }, { "epoch": 0.6555058750129978, "grad_norm": 0.004340061917901039, "learning_rate": 4.827512819475641e-06, "loss": 0.012556311674416065, "step": 394 }, { "epoch": 0.6571695955079546, "grad_norm": 0.0038324350025504827, "learning_rate": 4.826257125329733e-06, "loss": 0.007697253488004208, "step": 395 }, { "epoch": 0.6588333160029115, "grad_norm": 0.006832764483988285, "learning_rate": 4.824997041582641e-06, "loss": 0.01178714819252491, "step": 396 }, { "epoch": 0.6604970364978684, "grad_norm": 0.004772037733346224, "learning_rate": 4.82373257061212e-06, "loss": 0.012505128979682922, "step": 397 }, { "epoch": 0.6621607569928252, "grad_norm": 0.0030513901729136705, "learning_rate": 4.8224637148042e-06, "loss": 0.007846292108297348, "step": 398 }, { "epoch": 0.6638244774877821, "grad_norm": 0.0031088057439774275, "learning_rate": 4.821190476553186e-06, "loss": 0.004913134500384331, "step": 399 }, { "epoch": 0.665488197982739, "grad_norm": 0.0029906572308391333, "learning_rate": 4.819912858261656e-06, "loss": 0.008030945435166359, "step": 400 }, { "epoch": 0.665488197982739, "eval_loss": 0.008412709459662437, "eval_runtime": 407.1614, "eval_samples_per_second": 11.809, "eval_steps_per_second": 2.952, "step": 400 }, { "epoch": 0.6671519184776957, "grad_norm": 0.003743760986253619, "learning_rate": 4.818630862340449e-06, "loss": 0.009701243601739407, "step": 401 }, { "epoch": 0.6688156389726526, "grad_norm": 0.003921910189092159, "learning_rate": 4.817344491208665e-06, "loss": 0.006662596948444843, "step": 402 }, { "epoch": 0.6704793594676094, "grad_norm": 0.00469661969691515, "learning_rate": 4.816053747293663e-06, "loss": 0.008967725560069084, "step": 403 }, { "epoch": 0.6721430799625663, "grad_norm": 0.00589489983394742, "learning_rate": 4.814758633031049e-06, "loss": 0.013237478211522102, "step": 404 }, { "epoch": 0.6738068004575232, "grad_norm": 0.00632744375616312, "learning_rate": 4.813459150864681e-06, "loss": 0.0064522032625973225, "step": 405 }, { "epoch": 0.67547052095248, "grad_norm": 0.004063498694449663, "learning_rate": 4.812155303246653e-06, "loss": 0.0071722520515322685, "step": 406 }, { "epoch": 0.6771342414474368, "grad_norm": 0.005490950774401426, "learning_rate": 4.810847092637301e-06, "loss": 0.00855968426913023, "step": 407 }, { "epoch": 0.6787979619423937, "grad_norm": 0.004029491916298866, "learning_rate": 4.809534521505192e-06, "loss": 0.0076716807670891285, "step": 408 }, { "epoch": 0.6804616824373505, "grad_norm": 0.004423109348863363, "learning_rate": 4.8082175923271235e-06, "loss": 0.009134914726018906, "step": 409 }, { "epoch": 0.6821254029323074, "grad_norm": 0.003607613267377019, "learning_rate": 4.806896307588113e-06, "loss": 0.007768969517201185, "step": 410 }, { "epoch": 0.6837891234272642, "grad_norm": 0.0037080564070492983, "learning_rate": 4.805570669781399e-06, "loss": 0.008052971214056015, "step": 411 }, { "epoch": 0.6854528439222211, "grad_norm": 0.006682890933007002, "learning_rate": 4.804240681408434e-06, "loss": 0.011350871995091438, "step": 412 }, { "epoch": 0.6871165644171779, "grad_norm": 0.00619282154366374, "learning_rate": 4.802906344978881e-06, "loss": 0.008059765212237835, "step": 413 }, { "epoch": 0.6887802849121347, "grad_norm": 0.004879550542682409, "learning_rate": 4.801567663010605e-06, "loss": 0.011704351752996445, "step": 414 }, { "epoch": 0.6904440054070916, "grad_norm": 0.0034507052041590214, "learning_rate": 4.800224638029672e-06, "loss": 0.006550830323249102, "step": 415 }, { "epoch": 0.6921077259020485, "grad_norm": 0.0060535939410328865, "learning_rate": 4.798877272570343e-06, "loss": 0.012915357947349548, "step": 416 }, { "epoch": 0.6937714463970053, "grad_norm": 0.0054816617630422115, "learning_rate": 4.797525569175073e-06, "loss": 0.008053340017795563, "step": 417 }, { "epoch": 0.6954351668919622, "grad_norm": 0.0040940698236227036, "learning_rate": 4.796169530394498e-06, "loss": 0.008299745619297028, "step": 418 }, { "epoch": 0.697098887386919, "grad_norm": 0.003937297500669956, "learning_rate": 4.7948091587874355e-06, "loss": 0.006818284280598164, "step": 419 }, { "epoch": 0.6987626078818758, "grad_norm": 0.0057077184319496155, "learning_rate": 4.793444456920881e-06, "loss": 0.008323611691594124, "step": 420 }, { "epoch": 0.7004263283768327, "grad_norm": 0.006937435362488031, "learning_rate": 4.7920754273699985e-06, "loss": 0.011208346113562584, "step": 421 }, { "epoch": 0.7020900488717895, "grad_norm": 0.00426529860123992, "learning_rate": 4.790702072718121e-06, "loss": 0.006764519028365612, "step": 422 }, { "epoch": 0.7037537693667464, "grad_norm": 0.005305912345647812, "learning_rate": 4.789324395556741e-06, "loss": 0.014592036604881287, "step": 423 }, { "epoch": 0.7054174898617033, "grad_norm": 0.005696433130651712, "learning_rate": 4.7879423984855085e-06, "loss": 0.007089572958648205, "step": 424 }, { "epoch": 0.70708121035666, "grad_norm": 0.005689716432243586, "learning_rate": 4.786556084112224e-06, "loss": 0.008991958573460579, "step": 425 }, { "epoch": 0.7087449308516169, "grad_norm": 0.005220255348831415, "learning_rate": 4.785165455052836e-06, "loss": 0.007952387444674969, "step": 426 }, { "epoch": 0.7104086513465738, "grad_norm": 0.0035842740908265114, "learning_rate": 4.783770513931433e-06, "loss": 0.005702752619981766, "step": 427 }, { "epoch": 0.7120723718415306, "grad_norm": 0.003953936044126749, "learning_rate": 4.782371263380242e-06, "loss": 0.008399353362619877, "step": 428 }, { "epoch": 0.7137360923364875, "grad_norm": 0.004692696966230869, "learning_rate": 4.780967706039622e-06, "loss": 0.0066901338286697865, "step": 429 }, { "epoch": 0.7153998128314443, "grad_norm": 0.0040604835376143456, "learning_rate": 4.779559844558056e-06, "loss": 0.005349840968847275, "step": 430 }, { "epoch": 0.7170635333264012, "grad_norm": 0.005523476283997297, "learning_rate": 4.778147681592152e-06, "loss": 0.008249819278717041, "step": 431 }, { "epoch": 0.718727253821358, "grad_norm": 0.004006467759609222, "learning_rate": 4.776731219806634e-06, "loss": 0.006997613701969385, "step": 432 }, { "epoch": 0.7203909743163148, "grad_norm": 0.0044905198737978935, "learning_rate": 4.775310461874337e-06, "loss": 0.004558880813419819, "step": 433 }, { "epoch": 0.7220546948112717, "grad_norm": 0.004894842393696308, "learning_rate": 4.773885410476202e-06, "loss": 0.0108189033344388, "step": 434 }, { "epoch": 0.7237184153062286, "grad_norm": 0.01109437644481659, "learning_rate": 4.7724560683012735e-06, "loss": 0.010264476761221886, "step": 435 }, { "epoch": 0.7253821358011854, "grad_norm": 0.005047748796641827, "learning_rate": 4.771022438046693e-06, "loss": 0.010578740388154984, "step": 436 }, { "epoch": 0.7270458562961423, "grad_norm": 0.0065911151468753815, "learning_rate": 4.769584522417691e-06, "loss": 0.011462481692433357, "step": 437 }, { "epoch": 0.728709576791099, "grad_norm": 0.008467431180179119, "learning_rate": 4.768142324127586e-06, "loss": 0.012880068272352219, "step": 438 }, { "epoch": 0.7303732972860559, "grad_norm": 0.004129709675908089, "learning_rate": 4.766695845897778e-06, "loss": 0.004490282386541367, "step": 439 }, { "epoch": 0.7320370177810128, "grad_norm": 0.00397163350135088, "learning_rate": 4.765245090457744e-06, "loss": 0.006365049630403519, "step": 440 }, { "epoch": 0.7337007382759696, "grad_norm": 0.009726262651383877, "learning_rate": 4.763790060545028e-06, "loss": 0.005823507439345121, "step": 441 }, { "epoch": 0.7353644587709265, "grad_norm": 0.00505091343075037, "learning_rate": 4.762330758905246e-06, "loss": 0.01049719750881195, "step": 442 }, { "epoch": 0.7370281792658834, "grad_norm": 0.005302008707076311, "learning_rate": 4.760867188292068e-06, "loss": 0.012403653934597969, "step": 443 }, { "epoch": 0.7386918997608402, "grad_norm": 0.002863701432943344, "learning_rate": 4.7593993514672255e-06, "loss": 0.004971321672201157, "step": 444 }, { "epoch": 0.740355620255797, "grad_norm": 0.0034518903121352196, "learning_rate": 4.757927251200497e-06, "loss": 0.00744717987254262, "step": 445 }, { "epoch": 0.7420193407507538, "grad_norm": 0.009052886627614498, "learning_rate": 4.756450890269705e-06, "loss": 0.010446527972817421, "step": 446 }, { "epoch": 0.7436830612457107, "grad_norm": 0.005404800642281771, "learning_rate": 4.754970271460714e-06, "loss": 0.007915949448943138, "step": 447 }, { "epoch": 0.7453467817406676, "grad_norm": 0.005113918334245682, "learning_rate": 4.753485397567424e-06, "loss": 0.011439811438322067, "step": 448 }, { "epoch": 0.7470105022356244, "grad_norm": 0.013223345391452312, "learning_rate": 4.751996271391761e-06, "loss": 0.008794520981609821, "step": 449 }, { "epoch": 0.7486742227305813, "grad_norm": 0.0056835380382835865, "learning_rate": 4.750502895743677e-06, "loss": 0.010445352643728256, "step": 450 }, { "epoch": 0.7503379432255382, "grad_norm": 0.0067290510050952435, "learning_rate": 4.749005273441143e-06, "loss": 0.015891049057245255, "step": 451 }, { "epoch": 0.7520016637204949, "grad_norm": 0.00862638559192419, "learning_rate": 4.747503407310142e-06, "loss": 0.014896569773554802, "step": 452 }, { "epoch": 0.7536653842154518, "grad_norm": 0.004024629946798086, "learning_rate": 4.745997300184666e-06, "loss": 0.0071207070723176, "step": 453 }, { "epoch": 0.7553291047104087, "grad_norm": 0.00700328778475523, "learning_rate": 4.744486954906709e-06, "loss": 0.014192566275596619, "step": 454 }, { "epoch": 0.7569928252053655, "grad_norm": 0.006798639427870512, "learning_rate": 4.742972374326262e-06, "loss": 0.011122412979602814, "step": 455 }, { "epoch": 0.7586565457003224, "grad_norm": 0.0045930189080536366, "learning_rate": 4.74145356130131e-06, "loss": 0.009697200730443, "step": 456 }, { "epoch": 0.7603202661952791, "grad_norm": 0.0071418690495193005, "learning_rate": 4.739930518697823e-06, "loss": 0.012642932124435902, "step": 457 }, { "epoch": 0.761983986690236, "grad_norm": 0.00647162739187479, "learning_rate": 4.738403249389752e-06, "loss": 0.013247480615973473, "step": 458 }, { "epoch": 0.7636477071851929, "grad_norm": 0.004375535994768143, "learning_rate": 4.736871756259023e-06, "loss": 0.0069526853039860725, "step": 459 }, { "epoch": 0.7653114276801497, "grad_norm": 0.005763936322182417, "learning_rate": 4.7353360421955345e-06, "loss": 0.01148962415754795, "step": 460 }, { "epoch": 0.7669751481751066, "grad_norm": 0.004523094277828932, "learning_rate": 4.733796110097148e-06, "loss": 0.011522913351655006, "step": 461 }, { "epoch": 0.7686388686700635, "grad_norm": 0.004391341470181942, "learning_rate": 4.732251962869685e-06, "loss": 0.00787794217467308, "step": 462 }, { "epoch": 0.7703025891650203, "grad_norm": 0.00304810656234622, "learning_rate": 4.730703603426921e-06, "loss": 0.005599565804004669, "step": 463 }, { "epoch": 0.7719663096599771, "grad_norm": 0.00340471975505352, "learning_rate": 4.729151034690579e-06, "loss": 0.004527213517576456, "step": 464 }, { "epoch": 0.7736300301549339, "grad_norm": 0.004050911404192448, "learning_rate": 4.727594259590326e-06, "loss": 0.0071553378365933895, "step": 465 }, { "epoch": 0.7752937506498908, "grad_norm": 0.0033549219369888306, "learning_rate": 4.726033281063766e-06, "loss": 0.0059393346309661865, "step": 466 }, { "epoch": 0.7769574711448477, "grad_norm": 0.004678431898355484, "learning_rate": 4.724468102056434e-06, "loss": 0.007732603698968887, "step": 467 }, { "epoch": 0.7786211916398045, "grad_norm": 0.002846264746040106, "learning_rate": 4.722898725521793e-06, "loss": 0.00735393725335598, "step": 468 }, { "epoch": 0.7802849121347614, "grad_norm": 0.005869272630661726, "learning_rate": 4.721325154421224e-06, "loss": 0.011904164217412472, "step": 469 }, { "epoch": 0.7819486326297183, "grad_norm": 0.004066616762429476, "learning_rate": 4.7197473917240255e-06, "loss": 0.005801680032163858, "step": 470 }, { "epoch": 0.783612353124675, "grad_norm": 0.0035934511106461287, "learning_rate": 4.718165440407404e-06, "loss": 0.005442335270345211, "step": 471 }, { "epoch": 0.7852760736196319, "grad_norm": 0.0047324420884251595, "learning_rate": 4.716579303456471e-06, "loss": 0.00940224714577198, "step": 472 }, { "epoch": 0.7869397941145887, "grad_norm": 0.003724937792867422, "learning_rate": 4.714988983864235e-06, "loss": 0.009013586677610874, "step": 473 }, { "epoch": 0.7886035146095456, "grad_norm": 0.004245929419994354, "learning_rate": 4.713394484631598e-06, "loss": 0.006094337906688452, "step": 474 }, { "epoch": 0.7902672351045025, "grad_norm": 0.0049941991455852985, "learning_rate": 4.711795808767348e-06, "loss": 0.011073598638176918, "step": 475 }, { "epoch": 0.7919309555994593, "grad_norm": 0.0026603529695421457, "learning_rate": 4.7101929592881545e-06, "loss": 0.0036363271065056324, "step": 476 }, { "epoch": 0.7935946760944161, "grad_norm": 0.004774894565343857, "learning_rate": 4.708585939218564e-06, "loss": 0.01305093988776207, "step": 477 }, { "epoch": 0.795258396589373, "grad_norm": 0.00325093069113791, "learning_rate": 4.7069747515909905e-06, "loss": 0.009489022195339203, "step": 478 }, { "epoch": 0.7969221170843298, "grad_norm": 0.006839904002845287, "learning_rate": 4.7053593994457135e-06, "loss": 0.012426530010998249, "step": 479 }, { "epoch": 0.7985858375792867, "grad_norm": 0.002349321963265538, "learning_rate": 4.70373988583087e-06, "loss": 0.004337068181484938, "step": 480 }, { "epoch": 0.8002495580742435, "grad_norm": 0.0030962666496634483, "learning_rate": 4.7021162138024524e-06, "loss": 0.008469605818390846, "step": 481 }, { "epoch": 0.8019132785692004, "grad_norm": 0.003444101894274354, "learning_rate": 4.700488386424294e-06, "loss": 0.0059173512272536755, "step": 482 }, { "epoch": 0.8035769990641572, "grad_norm": 0.006231680046766996, "learning_rate": 4.698856406768076e-06, "loss": 0.009944312274456024, "step": 483 }, { "epoch": 0.805240719559114, "grad_norm": 0.005668159108608961, "learning_rate": 4.697220277913311e-06, "loss": 0.009258732199668884, "step": 484 }, { "epoch": 0.8069044400540709, "grad_norm": 0.005199704319238663, "learning_rate": 4.695580002947341e-06, "loss": 0.00513799674808979, "step": 485 }, { "epoch": 0.8085681605490278, "grad_norm": 0.0023570070043206215, "learning_rate": 4.6939355849653325e-06, "loss": 0.005083505064249039, "step": 486 }, { "epoch": 0.8102318810439846, "grad_norm": 0.0046600759960711, "learning_rate": 4.69228702707027e-06, "loss": 0.005510847084224224, "step": 487 }, { "epoch": 0.8118956015389415, "grad_norm": 0.0029210986103862524, "learning_rate": 4.69063433237295e-06, "loss": 0.008221244439482689, "step": 488 }, { "epoch": 0.8135593220338984, "grad_norm": 0.0037554206792265177, "learning_rate": 4.688977503991975e-06, "loss": 0.008256680332124233, "step": 489 }, { "epoch": 0.8152230425288551, "grad_norm": 0.003074623178690672, "learning_rate": 4.687316545053746e-06, "loss": 0.006913225632160902, "step": 490 }, { "epoch": 0.816886763023812, "grad_norm": 0.002423194469884038, "learning_rate": 4.68565145869246e-06, "loss": 0.0056283967569470406, "step": 491 }, { "epoch": 0.8185504835187688, "grad_norm": 0.0032063298858702183, "learning_rate": 4.683982248050103e-06, "loss": 0.007283125072717667, "step": 492 }, { "epoch": 0.8202142040137257, "grad_norm": 0.0034963127691298723, "learning_rate": 4.6823089162764425e-06, "loss": 0.009975203312933445, "step": 493 }, { "epoch": 0.8218779245086826, "grad_norm": 0.0035684818867594004, "learning_rate": 4.6806314665290205e-06, "loss": 0.006531362421810627, "step": 494 }, { "epoch": 0.8235416450036394, "grad_norm": 0.002319071441888809, "learning_rate": 4.678949901973154e-06, "loss": 0.006020743865519762, "step": 495 }, { "epoch": 0.8252053654985962, "grad_norm": 0.0032957608345896006, "learning_rate": 4.677264225781921e-06, "loss": 0.006968480534851551, "step": 496 }, { "epoch": 0.8268690859935531, "grad_norm": 0.006399734411388636, "learning_rate": 4.6755744411361585e-06, "loss": 0.005256010685116053, "step": 497 }, { "epoch": 0.8285328064885099, "grad_norm": 0.002357139950618148, "learning_rate": 4.6738805512244575e-06, "loss": 0.004715315066277981, "step": 498 }, { "epoch": 0.8301965269834668, "grad_norm": 0.002225536620244384, "learning_rate": 4.672182559243155e-06, "loss": 0.003946832846850157, "step": 499 }, { "epoch": 0.8318602474784236, "grad_norm": 0.007506450638175011, "learning_rate": 4.670480468396327e-06, "loss": 0.007973461411893368, "step": 500 }, { "epoch": 0.8318602474784236, "eval_loss": 0.006512382533401251, "eval_runtime": 404.3517, "eval_samples_per_second": 11.891, "eval_steps_per_second": 2.973, "step": 500 }, { "epoch": 0.8335239679733805, "grad_norm": 0.004953257739543915, "learning_rate": 4.668774281895786e-06, "loss": 0.007463683374226093, "step": 501 }, { "epoch": 0.8351876884683374, "grad_norm": 0.002295607700943947, "learning_rate": 4.667064002961073e-06, "loss": 0.0029762398917227983, "step": 502 }, { "epoch": 0.8368514089632941, "grad_norm": 0.005000600591301918, "learning_rate": 4.66534963481945e-06, "loss": 0.00649390509352088, "step": 503 }, { "epoch": 0.838515129458251, "grad_norm": 0.003147828159853816, "learning_rate": 4.663631180705894e-06, "loss": 0.007842538878321648, "step": 504 }, { "epoch": 0.8401788499532079, "grad_norm": 0.003913369495421648, "learning_rate": 4.661908643863096e-06, "loss": 0.005828930996358395, "step": 505 }, { "epoch": 0.8418425704481647, "grad_norm": 0.004644430708140135, "learning_rate": 4.66018202754145e-06, "loss": 0.008422689512372017, "step": 506 }, { "epoch": 0.8435062909431216, "grad_norm": 0.0037597171030938625, "learning_rate": 4.658451334999043e-06, "loss": 0.0072099887765944, "step": 507 }, { "epoch": 0.8451700114380784, "grad_norm": 0.007217934355139732, "learning_rate": 4.656716569501661e-06, "loss": 0.005425385665148497, "step": 508 }, { "epoch": 0.8468337319330352, "grad_norm": 0.004139721859246492, "learning_rate": 4.654977734322772e-06, "loss": 0.005886915139853954, "step": 509 }, { "epoch": 0.8484974524279921, "grad_norm": 0.003015086753293872, "learning_rate": 4.653234832743521e-06, "loss": 0.008951975964009762, "step": 510 }, { "epoch": 0.8501611729229489, "grad_norm": 0.0028621021192520857, "learning_rate": 4.651487868052731e-06, "loss": 0.0064186276867985725, "step": 511 }, { "epoch": 0.8518248934179058, "grad_norm": 0.0029435402248054743, "learning_rate": 4.64973684354689e-06, "loss": 0.005053696688264608, "step": 512 }, { "epoch": 0.8534886139128627, "grad_norm": 0.00349103263579309, "learning_rate": 4.647981762530145e-06, "loss": 0.005872088484466076, "step": 513 }, { "epoch": 0.8551523344078195, "grad_norm": 0.005280966870486736, "learning_rate": 4.6462226283143e-06, "loss": 0.005002675112336874, "step": 514 }, { "epoch": 0.8568160549027763, "grad_norm": 0.006165878847241402, "learning_rate": 4.644459444218807e-06, "loss": 0.00517700519412756, "step": 515 }, { "epoch": 0.8584797753977332, "grad_norm": 0.003584573045372963, "learning_rate": 4.642692213570759e-06, "loss": 0.011866070330142975, "step": 516 }, { "epoch": 0.86014349589269, "grad_norm": 0.003298037452623248, "learning_rate": 4.640920939704885e-06, "loss": 0.005188156384974718, "step": 517 }, { "epoch": 0.8618072163876469, "grad_norm": 0.00396351283416152, "learning_rate": 4.639145625963544e-06, "loss": 0.008152484893798828, "step": 518 }, { "epoch": 0.8634709368826037, "grad_norm": 0.0071251713670790195, "learning_rate": 4.637366275696718e-06, "loss": 0.006925216875970364, "step": 519 }, { "epoch": 0.8651346573775606, "grad_norm": 0.006336830090731382, "learning_rate": 4.635582892262006e-06, "loss": 0.0077257608063519, "step": 520 }, { "epoch": 0.8667983778725175, "grad_norm": 0.004709393717348576, "learning_rate": 4.633795479024616e-06, "loss": 0.007717994041740894, "step": 521 }, { "epoch": 0.8684620983674742, "grad_norm": 0.003954755142331123, "learning_rate": 4.632004039357364e-06, "loss": 0.0050251539796590805, "step": 522 }, { "epoch": 0.8701258188624311, "grad_norm": 0.00402754545211792, "learning_rate": 4.630208576640659e-06, "loss": 0.00838618166744709, "step": 523 }, { "epoch": 0.871789539357388, "grad_norm": 0.003631069790571928, "learning_rate": 4.628409094262504e-06, "loss": 0.006958806421607733, "step": 524 }, { "epoch": 0.8734532598523448, "grad_norm": 0.0026326472871005535, "learning_rate": 4.6266055956184865e-06, "loss": 0.005749491509050131, "step": 525 }, { "epoch": 0.8751169803473017, "grad_norm": 0.0023095738142728806, "learning_rate": 4.624798084111773e-06, "loss": 0.004408619366586208, "step": 526 }, { "epoch": 0.8767807008422585, "grad_norm": 0.003138443687930703, "learning_rate": 4.622986563153104e-06, "loss": 0.0046043386682868, "step": 527 }, { "epoch": 0.8784444213372153, "grad_norm": 0.0023057162761688232, "learning_rate": 4.621171036160781e-06, "loss": 0.00428372249007225, "step": 528 }, { "epoch": 0.8801081418321722, "grad_norm": 0.004754107911139727, "learning_rate": 4.6193515065606675e-06, "loss": 0.01612052507698536, "step": 529 }, { "epoch": 0.881771862327129, "grad_norm": 0.004191739484667778, "learning_rate": 4.617527977786182e-06, "loss": 0.007633868604898453, "step": 530 }, { "epoch": 0.8834355828220859, "grad_norm": 0.003158772364258766, "learning_rate": 4.615700453278285e-06, "loss": 0.004591359756886959, "step": 531 }, { "epoch": 0.8850993033170428, "grad_norm": 0.0042033726349473, "learning_rate": 4.61386893648548e-06, "loss": 0.005673332139849663, "step": 532 }, { "epoch": 0.8867630238119996, "grad_norm": 0.003734700381755829, "learning_rate": 4.612033430863804e-06, "loss": 0.005909692961722612, "step": 533 }, { "epoch": 0.8884267443069565, "grad_norm": 0.0027026941534131765, "learning_rate": 4.610193939876818e-06, "loss": 0.006266783457249403, "step": 534 }, { "epoch": 0.8900904648019132, "grad_norm": 0.00583456689491868, "learning_rate": 4.608350466995606e-06, "loss": 0.010947637259960175, "step": 535 }, { "epoch": 0.8917541852968701, "grad_norm": 0.0036986898630857468, "learning_rate": 4.606503015698765e-06, "loss": 0.00387162109836936, "step": 536 }, { "epoch": 0.893417905791827, "grad_norm": 0.004644804168492556, "learning_rate": 4.6046515894723985e-06, "loss": 0.004419853910803795, "step": 537 }, { "epoch": 0.8950816262867838, "grad_norm": 0.005302789155393839, "learning_rate": 4.602796191810113e-06, "loss": 0.00521225668489933, "step": 538 }, { "epoch": 0.8967453467817407, "grad_norm": 0.004432398360222578, "learning_rate": 4.600936826213004e-06, "loss": 0.0068048653192818165, "step": 539 }, { "epoch": 0.8984090672766976, "grad_norm": 0.0038318608421832323, "learning_rate": 4.59907349618966e-06, "loss": 0.006457334849983454, "step": 540 }, { "epoch": 0.9000727877716543, "grad_norm": 0.004747864790260792, "learning_rate": 4.597206205256147e-06, "loss": 0.00862425658851862, "step": 541 }, { "epoch": 0.9017365082666112, "grad_norm": 0.002571320626884699, "learning_rate": 4.595334956936007e-06, "loss": 0.008329037576913834, "step": 542 }, { "epoch": 0.903400228761568, "grad_norm": 0.004095368552953005, "learning_rate": 4.593459754760248e-06, "loss": 0.006220923736691475, "step": 543 }, { "epoch": 0.9050639492565249, "grad_norm": 0.004221968352794647, "learning_rate": 4.591580602267338e-06, "loss": 0.006353544071316719, "step": 544 }, { "epoch": 0.9067276697514818, "grad_norm": 0.006588945630937815, "learning_rate": 4.589697503003203e-06, "loss": 0.005587139166891575, "step": 545 }, { "epoch": 0.9083913902464386, "grad_norm": 0.002770567312836647, "learning_rate": 4.587810460521213e-06, "loss": 0.006217245943844318, "step": 546 }, { "epoch": 0.9100551107413954, "grad_norm": 0.0035460866056382656, "learning_rate": 4.585919478382178e-06, "loss": 0.005783476866781712, "step": 547 }, { "epoch": 0.9117188312363523, "grad_norm": 0.0028954341541975737, "learning_rate": 4.584024560154348e-06, "loss": 0.005333957262337208, "step": 548 }, { "epoch": 0.9133825517313091, "grad_norm": 0.0033749546855688095, "learning_rate": 4.582125709413392e-06, "loss": 0.004037967883050442, "step": 549 }, { "epoch": 0.915046272226266, "grad_norm": 0.004284190014004707, "learning_rate": 4.580222929742407e-06, "loss": 0.0068781087175011635, "step": 550 }, { "epoch": 0.9167099927212229, "grad_norm": 0.004720574710518122, "learning_rate": 4.5783162247318986e-06, "loss": 0.005199882667511702, "step": 551 }, { "epoch": 0.9183737132161797, "grad_norm": 0.002596945036202669, "learning_rate": 4.576405597979782e-06, "loss": 0.005764994770288467, "step": 552 }, { "epoch": 0.9200374337111366, "grad_norm": 0.0030985120683908463, "learning_rate": 4.5744910530913725e-06, "loss": 0.004004698246717453, "step": 553 }, { "epoch": 0.9217011542060933, "grad_norm": 0.0047891330905258656, "learning_rate": 4.572572593679379e-06, "loss": 0.009581279940903187, "step": 554 }, { "epoch": 0.9233648747010502, "grad_norm": 0.002359537873417139, "learning_rate": 4.5706502233638935e-06, "loss": 0.0038718758150935173, "step": 555 }, { "epoch": 0.9250285951960071, "grad_norm": 0.0029691008385270834, "learning_rate": 4.568723945772394e-06, "loss": 0.0046098604798316956, "step": 556 }, { "epoch": 0.9266923156909639, "grad_norm": 0.003485879860818386, "learning_rate": 4.5667937645397276e-06, "loss": 0.004569823853671551, "step": 557 }, { "epoch": 0.9283560361859208, "grad_norm": 0.010340786539018154, "learning_rate": 4.564859683308107e-06, "loss": 0.007273489609360695, "step": 558 }, { "epoch": 0.9300197566808777, "grad_norm": 0.003216391894966364, "learning_rate": 4.562921705727106e-06, "loss": 0.005173889454454184, "step": 559 }, { "epoch": 0.9316834771758344, "grad_norm": 0.0023886016570031643, "learning_rate": 4.5609798354536495e-06, "loss": 0.0035293418914079666, "step": 560 }, { "epoch": 0.9333471976707913, "grad_norm": 0.0051674325950443745, "learning_rate": 4.559034076152009e-06, "loss": 0.007310433778911829, "step": 561 }, { "epoch": 0.9350109181657481, "grad_norm": 0.0061208331026136875, "learning_rate": 4.557084431493793e-06, "loss": 0.01674576662480831, "step": 562 }, { "epoch": 0.936674638660705, "grad_norm": 0.0033059692941606045, "learning_rate": 4.555130905157943e-06, "loss": 0.00993238016963005, "step": 563 }, { "epoch": 0.9383383591556619, "grad_norm": 0.002794879488646984, "learning_rate": 4.553173500830724e-06, "loss": 0.004958215169608593, "step": 564 }, { "epoch": 0.9400020796506187, "grad_norm": 0.006112914066761732, "learning_rate": 4.55121222220572e-06, "loss": 0.009362846612930298, "step": 565 }, { "epoch": 0.9416658001455755, "grad_norm": 0.0030292437877506018, "learning_rate": 4.549247072983825e-06, "loss": 0.00661174301058054, "step": 566 }, { "epoch": 0.9433295206405324, "grad_norm": 0.002375996205955744, "learning_rate": 4.5472780568732356e-06, "loss": 0.0036187791265547276, "step": 567 }, { "epoch": 0.9449932411354892, "grad_norm": 0.0034390664659440517, "learning_rate": 4.545305177589448e-06, "loss": 0.0048618377186357975, "step": 568 }, { "epoch": 0.9466569616304461, "grad_norm": 0.003132295561954379, "learning_rate": 4.5433284388552435e-06, "loss": 0.004123467952013016, "step": 569 }, { "epoch": 0.9483206821254029, "grad_norm": 0.002180474577471614, "learning_rate": 4.541347844400692e-06, "loss": 0.004508224315941334, "step": 570 }, { "epoch": 0.9499844026203598, "grad_norm": 0.0022491244599223137, "learning_rate": 4.539363397963134e-06, "loss": 0.004375042859464884, "step": 571 }, { "epoch": 0.9516481231153167, "grad_norm": 0.001735589117743075, "learning_rate": 4.537375103287183e-06, "loss": 0.002892045769840479, "step": 572 }, { "epoch": 0.9533118436102734, "grad_norm": 0.0044294800609350204, "learning_rate": 4.53538296412471e-06, "loss": 0.006532138213515282, "step": 573 }, { "epoch": 0.9549755641052303, "grad_norm": 0.007447395473718643, "learning_rate": 4.533386984234841e-06, "loss": 0.012636320665478706, "step": 574 }, { "epoch": 0.9566392846001872, "grad_norm": 0.0024636040907353163, "learning_rate": 4.5313871673839525e-06, "loss": 0.004774153232574463, "step": 575 }, { "epoch": 0.958303005095144, "grad_norm": 0.004148229956626892, "learning_rate": 4.52938351734566e-06, "loss": 0.007745377719402313, "step": 576 }, { "epoch": 0.9599667255901009, "grad_norm": 0.0033451179042458534, "learning_rate": 4.52737603790081e-06, "loss": 0.00580889917910099, "step": 577 }, { "epoch": 0.9616304460850577, "grad_norm": 0.0032389156986027956, "learning_rate": 4.525364732837476e-06, "loss": 0.008575042709708214, "step": 578 }, { "epoch": 0.9632941665800145, "grad_norm": 0.0024506989866495132, "learning_rate": 4.523349605950953e-06, "loss": 0.00456388620659709, "step": 579 }, { "epoch": 0.9649578870749714, "grad_norm": 0.002531093545258045, "learning_rate": 4.521330661043744e-06, "loss": 0.004104273393750191, "step": 580 }, { "epoch": 0.9666216075699282, "grad_norm": 0.00328632234595716, "learning_rate": 4.519307901925558e-06, "loss": 0.007781069725751877, "step": 581 }, { "epoch": 0.9682853280648851, "grad_norm": 0.0028874180279672146, "learning_rate": 4.517281332413302e-06, "loss": 0.004761362448334694, "step": 582 }, { "epoch": 0.969949048559842, "grad_norm": 0.0018867182079702616, "learning_rate": 4.515250956331072e-06, "loss": 0.00552754569798708, "step": 583 }, { "epoch": 0.9716127690547988, "grad_norm": 0.002540376503020525, "learning_rate": 4.513216777510149e-06, "loss": 0.004502263385802507, "step": 584 }, { "epoch": 0.9732764895497557, "grad_norm": 0.0025226708967238665, "learning_rate": 4.511178799788987e-06, "loss": 0.004058694466948509, "step": 585 }, { "epoch": 0.9749402100447125, "grad_norm": 0.0028099576011300087, "learning_rate": 4.50913702701321e-06, "loss": 0.004276437684893608, "step": 586 }, { "epoch": 0.9766039305396693, "grad_norm": 0.0035055428743362427, "learning_rate": 4.507091463035601e-06, "loss": 0.006080637685954571, "step": 587 }, { "epoch": 0.9782676510346262, "grad_norm": 0.0025878252927213907, "learning_rate": 4.505042111716103e-06, "loss": 0.0048270029947161674, "step": 588 }, { "epoch": 0.979931371529583, "grad_norm": 0.0019265188602730632, "learning_rate": 4.502988976921797e-06, "loss": 0.0017514314968138933, "step": 589 }, { "epoch": 0.9815950920245399, "grad_norm": 0.004005370195955038, "learning_rate": 4.50093206252691e-06, "loss": 0.0036932427901774645, "step": 590 }, { "epoch": 0.9832588125194968, "grad_norm": 0.004234121646732092, "learning_rate": 4.498871372412798e-06, "loss": 0.005153659265488386, "step": 591 }, { "epoch": 0.9849225330144535, "grad_norm": 0.0056281927973032, "learning_rate": 4.496806910467944e-06, "loss": 0.006571756675839424, "step": 592 }, { "epoch": 0.9865862535094104, "grad_norm": 0.0052912007085978985, "learning_rate": 4.494738680587946e-06, "loss": 0.012203991413116455, "step": 593 }, { "epoch": 0.9882499740043673, "grad_norm": 0.008162816055119038, "learning_rate": 4.492666686675511e-06, "loss": 0.0037072275299578905, "step": 594 }, { "epoch": 0.9899136944993241, "grad_norm": 0.002978774020448327, "learning_rate": 4.490590932640453e-06, "loss": 0.004548577591776848, "step": 595 }, { "epoch": 0.991577414994281, "grad_norm": 0.005547270644456148, "learning_rate": 4.488511422399677e-06, "loss": 0.0054444968700408936, "step": 596 }, { "epoch": 0.9932411354892378, "grad_norm": 0.006614347919821739, "learning_rate": 4.48642815987718e-06, "loss": 0.011927146464586258, "step": 597 }, { "epoch": 0.9949048559841946, "grad_norm": 0.0036901719868183136, "learning_rate": 4.484341149004035e-06, "loss": 0.00477896211668849, "step": 598 }, { "epoch": 0.9965685764791515, "grad_norm": 0.004520885646343231, "learning_rate": 4.482250393718392e-06, "loss": 0.007755149155855179, "step": 599 }, { "epoch": 0.9982322969741083, "grad_norm": 0.0036596583668142557, "learning_rate": 4.480155897965463e-06, "loss": 0.007180670741945505, "step": 600 }, { "epoch": 0.9982322969741083, "eval_loss": 0.007219767663627863, "eval_runtime": 401.9917, "eval_samples_per_second": 11.96, "eval_steps_per_second": 2.99, "step": 600 }, { "epoch": 0.9998960174690652, "grad_norm": 0.0034411894157528877, "learning_rate": 4.47805766569752e-06, "loss": 0.007506140973418951, "step": 601 }, { "epoch": 1.0, "grad_norm": 0.04566219821572304, "learning_rate": 4.475955700873888e-06, "loss": 0.020605536177754402, "step": 602 }, { "epoch": 1.0016637204949568, "grad_norm": 0.0035529271699488163, "learning_rate": 4.473850007460932e-06, "loss": 0.00510389544069767, "step": 603 }, { "epoch": 1.0033274409899138, "grad_norm": 0.008444433100521564, "learning_rate": 4.471740589432053e-06, "loss": 0.013971086591482162, "step": 604 }, { "epoch": 1.0049911614848706, "grad_norm": 0.016102664172649384, "learning_rate": 4.469627450767682e-06, "loss": 0.04315765202045441, "step": 605 }, { "epoch": 1.0066548819798273, "grad_norm": 0.009336776100099087, "learning_rate": 4.46751059545527e-06, "loss": 0.018110543489456177, "step": 606 }, { "epoch": 1.0083186024747843, "grad_norm": 0.003961662296205759, "learning_rate": 4.465390027489279e-06, "loss": 0.009482689201831818, "step": 607 }, { "epoch": 1.009982322969741, "grad_norm": 0.0029724633786827326, "learning_rate": 4.463265750871182e-06, "loss": 0.004566639196127653, "step": 608 }, { "epoch": 1.0116460434646979, "grad_norm": 0.010566702112555504, "learning_rate": 4.461137769609445e-06, "loss": 0.01625455915927887, "step": 609 }, { "epoch": 1.0133097639596549, "grad_norm": 0.0051901922561228275, "learning_rate": 4.459006087719527e-06, "loss": 0.008888859301805496, "step": 610 }, { "epoch": 1.0149734844546117, "grad_norm": 0.004209975246340036, "learning_rate": 4.45687070922387e-06, "loss": 0.005752148572355509, "step": 611 }, { "epoch": 1.0166372049495684, "grad_norm": 0.006711136549711227, "learning_rate": 4.4547316381518905e-06, "loss": 0.004719719290733337, "step": 612 }, { "epoch": 1.0183009254445252, "grad_norm": 0.0029179907869547606, "learning_rate": 4.4525888785399725e-06, "loss": 0.006970624905079603, "step": 613 }, { "epoch": 1.0199646459394822, "grad_norm": 0.002345362678170204, "learning_rate": 4.450442434431463e-06, "loss": 0.0034522705245763063, "step": 614 }, { "epoch": 1.021628366434439, "grad_norm": 0.004603294190019369, "learning_rate": 4.448292309876657e-06, "loss": 0.004663745407015085, "step": 615 }, { "epoch": 1.0232920869293958, "grad_norm": 0.0023862654343247414, "learning_rate": 4.4461385089328e-06, "loss": 0.003139633685350418, "step": 616 }, { "epoch": 1.0249558074243528, "grad_norm": 0.0021421839483082294, "learning_rate": 4.44398103566407e-06, "loss": 0.004743538796901703, "step": 617 }, { "epoch": 1.0266195279193095, "grad_norm": 0.0013927406398579478, "learning_rate": 4.4418198941415756e-06, "loss": 0.0038331456016749144, "step": 618 }, { "epoch": 1.0282832484142663, "grad_norm": 0.0032657769042998552, "learning_rate": 4.4396550884433495e-06, "loss": 0.0025713355280458927, "step": 619 }, { "epoch": 1.0299469689092233, "grad_norm": 0.0034895415883511305, "learning_rate": 4.437486622654337e-06, "loss": 0.005330443382263184, "step": 620 }, { "epoch": 1.03161068940418, "grad_norm": 0.014472505077719688, "learning_rate": 4.43531450086639e-06, "loss": 0.007480642758309841, "step": 621 }, { "epoch": 1.0332744098991369, "grad_norm": 0.0033677637111395597, "learning_rate": 4.433138727178259e-06, "loss": 0.004082255996763706, "step": 622 }, { "epoch": 1.0349381303940939, "grad_norm": 0.0026485221460461617, "learning_rate": 4.4309593056955865e-06, "loss": 0.0040733059868216515, "step": 623 }, { "epoch": 1.0366018508890507, "grad_norm": 0.0036252597346901894, "learning_rate": 4.4287762405308974e-06, "loss": 0.0023388115223497152, "step": 624 }, { "epoch": 1.0382655713840074, "grad_norm": 0.003635956672951579, "learning_rate": 4.426589535803593e-06, "loss": 0.00605718232691288, "step": 625 }, { "epoch": 1.0399292918789644, "grad_norm": 0.007902245968580246, "learning_rate": 4.424399195639941e-06, "loss": 0.009553415700793266, "step": 626 }, { "epoch": 1.0415930123739212, "grad_norm": 0.003865851555019617, "learning_rate": 4.422205224173071e-06, "loss": 0.008454103022813797, "step": 627 }, { "epoch": 1.043256732868878, "grad_norm": 0.003319124225527048, "learning_rate": 4.420007625542963e-06, "loss": 0.0036944844760000706, "step": 628 }, { "epoch": 1.0449204533638348, "grad_norm": 0.0050729489885270596, "learning_rate": 4.417806403896442e-06, "loss": 0.005002877209335566, "step": 629 }, { "epoch": 1.0465841738587918, "grad_norm": 0.0038563492707908154, "learning_rate": 4.41560156338717e-06, "loss": 0.007914157584309578, "step": 630 }, { "epoch": 1.0482478943537485, "grad_norm": 0.004178495611995459, "learning_rate": 4.413393108175637e-06, "loss": 0.004781308118253946, "step": 631 }, { "epoch": 1.0499116148487053, "grad_norm": 0.004806080367416143, "learning_rate": 4.411181042429156e-06, "loss": 0.004015027079731226, "step": 632 }, { "epoch": 1.0515753353436623, "grad_norm": 0.0037014191038906574, "learning_rate": 4.40896537032185e-06, "loss": 0.004629138857126236, "step": 633 }, { "epoch": 1.053239055838619, "grad_norm": 0.003243440529331565, "learning_rate": 4.406746096034647e-06, "loss": 0.005871012806892395, "step": 634 }, { "epoch": 1.0549027763335759, "grad_norm": 0.0017317542806267738, "learning_rate": 4.4045232237552756e-06, "loss": 0.003214429132640362, "step": 635 }, { "epoch": 1.0565664968285329, "grad_norm": 0.00374778569675982, "learning_rate": 4.4022967576782525e-06, "loss": 0.004213203210383654, "step": 636 }, { "epoch": 1.0582302173234897, "grad_norm": 0.0024602594785392284, "learning_rate": 4.400066702004874e-06, "loss": 0.007073926739394665, "step": 637 }, { "epoch": 1.0598939378184464, "grad_norm": 0.002045772271230817, "learning_rate": 4.39783306094321e-06, "loss": 0.0070329755544662476, "step": 638 }, { "epoch": 1.0615576583134034, "grad_norm": 0.0015325051499530673, "learning_rate": 4.395595838708099e-06, "loss": 0.0028078206814825535, "step": 639 }, { "epoch": 1.0632213788083602, "grad_norm": 0.002612901385873556, "learning_rate": 4.393355039521134e-06, "loss": 0.004282036330550909, "step": 640 }, { "epoch": 1.064885099303317, "grad_norm": 0.0035253753885626793, "learning_rate": 4.391110667610658e-06, "loss": 0.004646215122193098, "step": 641 }, { "epoch": 1.066548819798274, "grad_norm": 0.0026419060304760933, "learning_rate": 4.388862727211759e-06, "loss": 0.006227807141840458, "step": 642 }, { "epoch": 1.0682125402932308, "grad_norm": 0.0018523207399994135, "learning_rate": 4.386611222566254e-06, "loss": 0.00431039510294795, "step": 643 }, { "epoch": 1.0698762607881875, "grad_norm": 0.002442911732941866, "learning_rate": 4.384356157922688e-06, "loss": 0.003634359687566757, "step": 644 }, { "epoch": 1.0715399812831445, "grad_norm": 0.0021309482399374247, "learning_rate": 4.382097537536322e-06, "loss": 0.005421295762062073, "step": 645 }, { "epoch": 1.0732037017781013, "grad_norm": 0.0033649380784481764, "learning_rate": 4.379835365669132e-06, "loss": 0.009508009068667889, "step": 646 }, { "epoch": 1.074867422273058, "grad_norm": 0.0021837574895471334, "learning_rate": 4.377569646589789e-06, "loss": 0.004345447290688753, "step": 647 }, { "epoch": 1.0765311427680149, "grad_norm": 0.0032484072726219893, "learning_rate": 4.375300384573659e-06, "loss": 0.0040743970312178135, "step": 648 }, { "epoch": 1.0781948632629719, "grad_norm": 0.0024877728428691626, "learning_rate": 4.373027583902796e-06, "loss": 0.006332829128950834, "step": 649 }, { "epoch": 1.0798585837579286, "grad_norm": 0.0017824380192905664, "learning_rate": 4.370751248865929e-06, "loss": 0.0035579982213675976, "step": 650 }, { "epoch": 1.0815223042528854, "grad_norm": 0.0031106146052479744, "learning_rate": 4.368471383758459e-06, "loss": 0.007911598309874535, "step": 651 }, { "epoch": 1.0831860247478424, "grad_norm": 0.0014041169779375196, "learning_rate": 4.366187992882444e-06, "loss": 0.003576268907636404, "step": 652 }, { "epoch": 1.0848497452427992, "grad_norm": 0.0017640722217038274, "learning_rate": 4.3639010805466e-06, "loss": 0.004032103344798088, "step": 653 }, { "epoch": 1.086513465737756, "grad_norm": 0.0037411150988191366, "learning_rate": 4.361610651066283e-06, "loss": 0.004132252652198076, "step": 654 }, { "epoch": 1.088177186232713, "grad_norm": 0.0016895364969968796, "learning_rate": 4.35931670876349e-06, "loss": 0.0023081060498952866, "step": 655 }, { "epoch": 1.0898409067276698, "grad_norm": 0.00220140116289258, "learning_rate": 4.357019257966844e-06, "loss": 0.005424368195235729, "step": 656 }, { "epoch": 1.0915046272226265, "grad_norm": 0.0015891756629571319, "learning_rate": 4.354718303011588e-06, "loss": 0.004366081207990646, "step": 657 }, { "epoch": 1.0931683477175835, "grad_norm": 0.0026955781504511833, "learning_rate": 4.352413848239579e-06, "loss": 0.0043494547717273235, "step": 658 }, { "epoch": 1.0948320682125403, "grad_norm": 0.0034271839540451765, "learning_rate": 4.35010589799928e-06, "loss": 0.0057737985625863075, "step": 659 }, { "epoch": 1.096495788707497, "grad_norm": 0.002068024128675461, "learning_rate": 4.347794456645744e-06, "loss": 0.005561623256653547, "step": 660 }, { "epoch": 1.098159509202454, "grad_norm": 0.0031483988277614117, "learning_rate": 4.345479528540618e-06, "loss": 0.003334908513352275, "step": 661 }, { "epoch": 1.0998232296974109, "grad_norm": 0.001954516861587763, "learning_rate": 4.343161118052123e-06, "loss": 0.003126378171145916, "step": 662 }, { "epoch": 1.1014869501923676, "grad_norm": 0.002192142652347684, "learning_rate": 4.340839229555056e-06, "loss": 0.0036504999734461308, "step": 663 }, { "epoch": 1.1031506706873246, "grad_norm": 0.0024770835880190134, "learning_rate": 4.338513867430773e-06, "loss": 0.005833543371409178, "step": 664 }, { "epoch": 1.1048143911822814, "grad_norm": 0.0008247373043559492, "learning_rate": 4.336185036067187e-06, "loss": 0.0029306081123650074, "step": 665 }, { "epoch": 1.1064781116772382, "grad_norm": 0.002471225569024682, "learning_rate": 4.3338527398587575e-06, "loss": 0.0034327320754528046, "step": 666 }, { "epoch": 1.108141832172195, "grad_norm": 0.0027939577121287584, "learning_rate": 4.33151698320648e-06, "loss": 0.0042039137333631516, "step": 667 }, { "epoch": 1.109805552667152, "grad_norm": 0.003027573460713029, "learning_rate": 4.329177770517881e-06, "loss": 0.005466432776302099, "step": 668 }, { "epoch": 1.1114692731621088, "grad_norm": 0.0014210582012310624, "learning_rate": 4.32683510620701e-06, "loss": 0.0019816220737993717, "step": 669 }, { "epoch": 1.1131329936570655, "grad_norm": 0.0018567409133538604, "learning_rate": 4.324488994694427e-06, "loss": 0.0032230927608907223, "step": 670 }, { "epoch": 1.1147967141520225, "grad_norm": 0.0019692997448146343, "learning_rate": 4.322139440407198e-06, "loss": 0.0035078530199825764, "step": 671 }, { "epoch": 1.1164604346469793, "grad_norm": 0.002447731327265501, "learning_rate": 4.319786447778887e-06, "loss": 0.00550186587497592, "step": 672 }, { "epoch": 1.118124155141936, "grad_norm": 0.004330574534833431, "learning_rate": 4.317430021249543e-06, "loss": 0.004071483854204416, "step": 673 }, { "epoch": 1.119787875636893, "grad_norm": 0.0016852137632668018, "learning_rate": 4.315070165265695e-06, "loss": 0.0034770139027386904, "step": 674 }, { "epoch": 1.1214515961318499, "grad_norm": 0.003290498396381736, "learning_rate": 4.312706884280349e-06, "loss": 0.004757840186357498, "step": 675 }, { "epoch": 1.1231153166268066, "grad_norm": 0.0012338663218542933, "learning_rate": 4.310340182752965e-06, "loss": 0.0025315401144325733, "step": 676 }, { "epoch": 1.1247790371217636, "grad_norm": 0.0045959739945828915, "learning_rate": 4.307970065149464e-06, "loss": 0.006023851688951254, "step": 677 }, { "epoch": 1.1264427576167204, "grad_norm": 0.004816236440092325, "learning_rate": 4.305596535942211e-06, "loss": 0.004382929764688015, "step": 678 }, { "epoch": 1.1281064781116772, "grad_norm": 0.002231198363006115, "learning_rate": 4.303219599610009e-06, "loss": 0.004849039483815432, "step": 679 }, { "epoch": 1.129770198606634, "grad_norm": 0.004572253208607435, "learning_rate": 4.300839260638089e-06, "loss": 0.004676291719079018, "step": 680 }, { "epoch": 1.131433919101591, "grad_norm": 0.0013777704443782568, "learning_rate": 4.298455523518102e-06, "loss": 0.0025581195950508118, "step": 681 }, { "epoch": 1.1330976395965477, "grad_norm": 0.0034841045271605253, "learning_rate": 4.296068392748116e-06, "loss": 0.006778066046535969, "step": 682 }, { "epoch": 1.1347613600915047, "grad_norm": 0.0038167042657732964, "learning_rate": 4.293677872832599e-06, "loss": 0.005039317067712545, "step": 683 }, { "epoch": 1.1364250805864615, "grad_norm": 0.002462555654346943, "learning_rate": 4.291283968282413e-06, "loss": 0.004252985585480928, "step": 684 }, { "epoch": 1.1380888010814183, "grad_norm": 0.002770144259557128, "learning_rate": 4.288886683614809e-06, "loss": 0.005728703923523426, "step": 685 }, { "epoch": 1.139752521576375, "grad_norm": 0.00589687330648303, "learning_rate": 4.286486023353417e-06, "loss": 0.004119477234780788, "step": 686 }, { "epoch": 1.141416242071332, "grad_norm": 0.0018810235196724534, "learning_rate": 4.284081992028235e-06, "loss": 0.0024635386653244495, "step": 687 }, { "epoch": 1.1430799625662889, "grad_norm": 0.0035444023087620735, "learning_rate": 4.281674594175621e-06, "loss": 0.005115187726914883, "step": 688 }, { "epoch": 1.1447436830612456, "grad_norm": 0.003653658786788583, "learning_rate": 4.2792638343382894e-06, "loss": 0.004692722111940384, "step": 689 }, { "epoch": 1.1464074035562026, "grad_norm": 0.0026509996969252825, "learning_rate": 4.276849717065295e-06, "loss": 0.004140852950513363, "step": 690 }, { "epoch": 1.1480711240511594, "grad_norm": 0.002603845437988639, "learning_rate": 4.2744322469120296e-06, "loss": 0.004025573376566172, "step": 691 }, { "epoch": 1.1497348445461162, "grad_norm": 0.0027512742672115564, "learning_rate": 4.272011428440212e-06, "loss": 0.004732303321361542, "step": 692 }, { "epoch": 1.1513985650410732, "grad_norm": 0.002622104249894619, "learning_rate": 4.269587266217878e-06, "loss": 0.007063106633722782, "step": 693 }, { "epoch": 1.15306228553603, "grad_norm": 0.0021548105869442225, "learning_rate": 4.2671597648193745e-06, "loss": 0.0035763587802648544, "step": 694 }, { "epoch": 1.1547260060309867, "grad_norm": 0.002422524616122246, "learning_rate": 4.264728928825347e-06, "loss": 0.007466421462595463, "step": 695 }, { "epoch": 1.1563897265259437, "grad_norm": 0.0017905524000525475, "learning_rate": 4.262294762822738e-06, "loss": 0.0030708981212228537, "step": 696 }, { "epoch": 1.1580534470209005, "grad_norm": 0.0022994892206043005, "learning_rate": 4.259857271404767e-06, "loss": 0.0022998168133199215, "step": 697 }, { "epoch": 1.1597171675158573, "grad_norm": 0.0017049030866473913, "learning_rate": 4.257416459170935e-06, "loss": 0.0031123387161642313, "step": 698 }, { "epoch": 1.161380888010814, "grad_norm": 0.002179944422096014, "learning_rate": 4.254972330727004e-06, "loss": 0.003777222940698266, "step": 699 }, { "epoch": 1.163044608505771, "grad_norm": 0.004692301619797945, "learning_rate": 4.252524890685e-06, "loss": 0.009404093027114868, "step": 700 }, { "epoch": 1.163044608505771, "eval_loss": 0.004640148486942053, "eval_runtime": 424.8823, "eval_samples_per_second": 11.316, "eval_steps_per_second": 2.829, "step": 700 }, { "epoch": 1.1647083290007278, "grad_norm": 0.002540596527978778, "learning_rate": 4.250074143663189e-06, "loss": 0.006953324191272259, "step": 701 }, { "epoch": 1.1663720494956848, "grad_norm": 0.006684183143079281, "learning_rate": 4.247620094286085e-06, "loss": 0.004303854890167713, "step": 702 }, { "epoch": 1.1680357699906416, "grad_norm": 0.002960954559966922, "learning_rate": 4.2451627471844305e-06, "loss": 0.0035925274714827538, "step": 703 }, { "epoch": 1.1696994904855984, "grad_norm": 0.003732532262802124, "learning_rate": 4.24270210699519e-06, "loss": 0.008118562400341034, "step": 704 }, { "epoch": 1.1713632109805552, "grad_norm": 0.004170422907918692, "learning_rate": 4.240238178361543e-06, "loss": 0.003138990141451359, "step": 705 }, { "epoch": 1.1730269314755122, "grad_norm": 0.002722041215747595, "learning_rate": 4.237770965932875e-06, "loss": 0.0035777364391833544, "step": 706 }, { "epoch": 1.174690651970469, "grad_norm": 0.00289748003706336, "learning_rate": 4.235300474364766e-06, "loss": 0.005866762716323137, "step": 707 }, { "epoch": 1.1763543724654257, "grad_norm": 0.0030904258601367474, "learning_rate": 4.232826708318985e-06, "loss": 0.0064615714363753796, "step": 708 }, { "epoch": 1.1780180929603827, "grad_norm": 0.004810953047126532, "learning_rate": 4.230349672463481e-06, "loss": 0.004663039464503527, "step": 709 }, { "epoch": 1.1796818134553395, "grad_norm": 0.0038745985366404057, "learning_rate": 4.22786937147237e-06, "loss": 0.007224446628242731, "step": 710 }, { "epoch": 1.1813455339502963, "grad_norm": 0.0015594467986375093, "learning_rate": 4.2253858100259304e-06, "loss": 0.0034856877755373716, "step": 711 }, { "epoch": 1.1830092544452533, "grad_norm": 0.0020052522886544466, "learning_rate": 4.222898992810596e-06, "loss": 0.004635176621377468, "step": 712 }, { "epoch": 1.18467297494021, "grad_norm": 0.001965244999155402, "learning_rate": 4.220408924518939e-06, "loss": 0.0020197900012135506, "step": 713 }, { "epoch": 1.1863366954351668, "grad_norm": 0.002565849805250764, "learning_rate": 4.217915609849671e-06, "loss": 0.005158688873052597, "step": 714 }, { "epoch": 1.1880004159301238, "grad_norm": 0.0020221301820129156, "learning_rate": 4.215419053507626e-06, "loss": 0.004507225006818771, "step": 715 }, { "epoch": 1.1896641364250806, "grad_norm": 0.004605746828019619, "learning_rate": 4.212919260203757e-06, "loss": 0.006004433147609234, "step": 716 }, { "epoch": 1.1913278569200374, "grad_norm": 0.0044348943047225475, "learning_rate": 4.210416234655125e-06, "loss": 0.007470245473086834, "step": 717 }, { "epoch": 1.1929915774149942, "grad_norm": 0.0030243340879678726, "learning_rate": 4.207909981584889e-06, "loss": 0.0035587972961366177, "step": 718 }, { "epoch": 1.1946552979099512, "grad_norm": 0.0022278032265603542, "learning_rate": 4.2054005057223e-06, "loss": 0.006495712324976921, "step": 719 }, { "epoch": 1.196319018404908, "grad_norm": 0.002851974219083786, "learning_rate": 4.202887811802687e-06, "loss": 0.004013349302113056, "step": 720 }, { "epoch": 1.1979827388998647, "grad_norm": 0.0021993438713252544, "learning_rate": 4.200371904567457e-06, "loss": 0.0037416350096464157, "step": 721 }, { "epoch": 1.1996464593948217, "grad_norm": 0.004688817076385021, "learning_rate": 4.197852788764075e-06, "loss": 0.00362020474858582, "step": 722 }, { "epoch": 1.2013101798897785, "grad_norm": 0.004926290363073349, "learning_rate": 4.195330469146063e-06, "loss": 0.007041393779218197, "step": 723 }, { "epoch": 1.2029739003847353, "grad_norm": 0.0027303057722747326, "learning_rate": 4.1928049504729886e-06, "loss": 0.004923125728964806, "step": 724 }, { "epoch": 1.2046376208796923, "grad_norm": 0.0019440047908574343, "learning_rate": 4.1902762375104555e-06, "loss": 0.0022577960044145584, "step": 725 }, { "epoch": 1.206301341374649, "grad_norm": 0.0027244926895946264, "learning_rate": 4.187744335030095e-06, "loss": 0.0067812055349349976, "step": 726 }, { "epoch": 1.2079650618696058, "grad_norm": 0.002273574937134981, "learning_rate": 4.185209247809557e-06, "loss": 0.003740078303962946, "step": 727 }, { "epoch": 1.2096287823645628, "grad_norm": 0.0034744152799248695, "learning_rate": 4.182670980632501e-06, "loss": 0.013247332535684109, "step": 728 }, { "epoch": 1.2112925028595196, "grad_norm": 0.004806975368410349, "learning_rate": 4.180129538288587e-06, "loss": 0.006336216814815998, "step": 729 }, { "epoch": 1.2129562233544764, "grad_norm": 0.0007596082286909223, "learning_rate": 4.177584925573466e-06, "loss": 0.0027835089713335037, "step": 730 }, { "epoch": 1.2146199438494334, "grad_norm": 0.002162647433578968, "learning_rate": 4.175037147288772e-06, "loss": 0.005248047411441803, "step": 731 }, { "epoch": 1.2162836643443902, "grad_norm": 0.0020637372508645058, "learning_rate": 4.172486208242113e-06, "loss": 0.0023893651086837053, "step": 732 }, { "epoch": 1.217947384839347, "grad_norm": 0.004547535441815853, "learning_rate": 4.169932113247059e-06, "loss": 0.004172275308519602, "step": 733 }, { "epoch": 1.219611105334304, "grad_norm": 0.0015233661979436874, "learning_rate": 4.167374867123138e-06, "loss": 0.0022961003705859184, "step": 734 }, { "epoch": 1.2212748258292607, "grad_norm": 0.001560075324960053, "learning_rate": 4.164814474695823e-06, "loss": 0.0037685995921492577, "step": 735 }, { "epoch": 1.2229385463242175, "grad_norm": 0.002407171530649066, "learning_rate": 4.162250940796523e-06, "loss": 0.003144836286082864, "step": 736 }, { "epoch": 1.2246022668191743, "grad_norm": 0.0025342016015201807, "learning_rate": 4.159684270262576e-06, "loss": 0.005925535224378109, "step": 737 }, { "epoch": 1.2262659873141313, "grad_norm": 0.0020147510804235935, "learning_rate": 4.157114467937239e-06, "loss": 0.002619993407279253, "step": 738 }, { "epoch": 1.227929707809088, "grad_norm": 0.002741551026701927, "learning_rate": 4.154541538669677e-06, "loss": 0.0038237329572439194, "step": 739 }, { "epoch": 1.2295934283040448, "grad_norm": 0.0026750387623906136, "learning_rate": 4.151965487314959e-06, "loss": 0.00299008353613317, "step": 740 }, { "epoch": 1.2312571487990018, "grad_norm": 0.0017631236696615815, "learning_rate": 4.1493863187340415e-06, "loss": 0.00323181739076972, "step": 741 }, { "epoch": 1.2329208692939586, "grad_norm": 0.00393708935007453, "learning_rate": 4.146804037793763e-06, "loss": 0.0074392640963196754, "step": 742 }, { "epoch": 1.2345845897889154, "grad_norm": 0.002047119429334998, "learning_rate": 4.144218649366839e-06, "loss": 0.003371204948052764, "step": 743 }, { "epoch": 1.2362483102838724, "grad_norm": 0.002852058270946145, "learning_rate": 4.141630158331845e-06, "loss": 0.005799049511551857, "step": 744 }, { "epoch": 1.2379120307788292, "grad_norm": 0.002589620416983962, "learning_rate": 4.139038569573213e-06, "loss": 0.0037557545583695173, "step": 745 }, { "epoch": 1.239575751273786, "grad_norm": 0.0013611821923404932, "learning_rate": 4.1364438879812194e-06, "loss": 0.0026148685719817877, "step": 746 }, { "epoch": 1.241239471768743, "grad_norm": 0.0026877254713326693, "learning_rate": 4.1338461184519776e-06, "loss": 0.00639910064637661, "step": 747 }, { "epoch": 1.2429031922636997, "grad_norm": 0.004890333395451307, "learning_rate": 4.131245265887426e-06, "loss": 0.014384834095835686, "step": 748 }, { "epoch": 1.2445669127586565, "grad_norm": 0.0029951855540275574, "learning_rate": 4.1286413351953235e-06, "loss": 0.00565043231472373, "step": 749 }, { "epoch": 1.2462306332536133, "grad_norm": 0.0024212165735661983, "learning_rate": 4.126034331289235e-06, "loss": 0.004203692078590393, "step": 750 }, { "epoch": 1.2478943537485703, "grad_norm": 0.0020162512082606554, "learning_rate": 4.123424259088525e-06, "loss": 0.0035710574593394995, "step": 751 }, { "epoch": 1.249558074243527, "grad_norm": 0.0020386476535350084, "learning_rate": 4.120811123518349e-06, "loss": 0.004487013444304466, "step": 752 }, { "epoch": 1.251221794738484, "grad_norm": 0.002236759290099144, "learning_rate": 4.1181949295096415e-06, "loss": 0.004523813258856535, "step": 753 }, { "epoch": 1.2528855152334408, "grad_norm": 0.0019163981778547168, "learning_rate": 4.11557568199911e-06, "loss": 0.003014134708791971, "step": 754 }, { "epoch": 1.2545492357283976, "grad_norm": 0.001937209046445787, "learning_rate": 4.112953385929221e-06, "loss": 0.003906419035047293, "step": 755 }, { "epoch": 1.2562129562233544, "grad_norm": 0.0027945186011493206, "learning_rate": 4.110328046248196e-06, "loss": 0.005981692112982273, "step": 756 }, { "epoch": 1.2578766767183114, "grad_norm": 0.0011745948577299714, "learning_rate": 4.107699667909999e-06, "loss": 0.0016988021088764071, "step": 757 }, { "epoch": 1.2595403972132682, "grad_norm": 0.001418607891537249, "learning_rate": 4.105068255874328e-06, "loss": 0.003487003967165947, "step": 758 }, { "epoch": 1.261204117708225, "grad_norm": 0.0013838058803230524, "learning_rate": 4.102433815106606e-06, "loss": 0.0015933482209220529, "step": 759 }, { "epoch": 1.262867838203182, "grad_norm": 0.002978533273562789, "learning_rate": 4.09979635057797e-06, "loss": 0.00687625166028738, "step": 760 }, { "epoch": 1.2645315586981387, "grad_norm": 0.001718941261060536, "learning_rate": 4.097155867265264e-06, "loss": 0.003526041517034173, "step": 761 }, { "epoch": 1.2661952791930955, "grad_norm": 0.003133240854367614, "learning_rate": 4.094512370151027e-06, "loss": 0.003928614780306816, "step": 762 }, { "epoch": 1.2678589996880525, "grad_norm": 0.0022098422050476074, "learning_rate": 4.091865864223487e-06, "loss": 0.0026667716447263956, "step": 763 }, { "epoch": 1.2695227201830093, "grad_norm": 0.0017531696939840913, "learning_rate": 4.089216354476545e-06, "loss": 0.001974761486053467, "step": 764 }, { "epoch": 1.271186440677966, "grad_norm": 0.0009637777111493051, "learning_rate": 4.086563845909779e-06, "loss": 0.0017917206278070807, "step": 765 }, { "epoch": 1.272850161172923, "grad_norm": 0.0018993336707353592, "learning_rate": 4.083908343528415e-06, "loss": 0.00392130296677351, "step": 766 }, { "epoch": 1.2745138816678798, "grad_norm": 0.0016326892655342817, "learning_rate": 4.081249852343336e-06, "loss": 0.003777984995394945, "step": 767 }, { "epoch": 1.2761776021628366, "grad_norm": 0.0037745193112641573, "learning_rate": 4.078588377371062e-06, "loss": 0.003253435716032982, "step": 768 }, { "epoch": 1.2778413226577934, "grad_norm": 0.005981610622256994, "learning_rate": 4.075923923633745e-06, "loss": 0.006038327235728502, "step": 769 }, { "epoch": 1.2795050431527504, "grad_norm": 0.004553422797471285, "learning_rate": 4.073256496159153e-06, "loss": 0.0040462627075612545, "step": 770 }, { "epoch": 1.2811687636477072, "grad_norm": 0.0017073902999982238, "learning_rate": 4.070586099980672e-06, "loss": 0.0034780718851834536, "step": 771 }, { "epoch": 1.2828324841426642, "grad_norm": 0.0033805659040808678, "learning_rate": 4.067912740137285e-06, "loss": 0.00390542671084404, "step": 772 }, { "epoch": 1.284496204637621, "grad_norm": 0.0035299034789204597, "learning_rate": 4.06523642167357e-06, "loss": 0.003843412036076188, "step": 773 }, { "epoch": 1.2861599251325777, "grad_norm": 0.004187435377389193, "learning_rate": 4.062557149639688e-06, "loss": 0.004727951250970364, "step": 774 }, { "epoch": 1.2878236456275345, "grad_norm": 0.0027708059642463923, "learning_rate": 4.059874929091369e-06, "loss": 0.0035429196432232857, "step": 775 }, { "epoch": 1.2894873661224915, "grad_norm": 0.0021549989469349384, "learning_rate": 4.057189765089914e-06, "loss": 0.0030433572828769684, "step": 776 }, { "epoch": 1.2911510866174483, "grad_norm": 0.0018761793617159128, "learning_rate": 4.054501662702172e-06, "loss": 0.0032656132243573666, "step": 777 }, { "epoch": 1.292814807112405, "grad_norm": 0.0019973707385361195, "learning_rate": 4.05181062700054e-06, "loss": 0.0048477197997272015, "step": 778 }, { "epoch": 1.294478527607362, "grad_norm": 0.002293492667376995, "learning_rate": 4.049116663062949e-06, "loss": 0.002738622482866049, "step": 779 }, { "epoch": 1.2961422481023188, "grad_norm": 0.0017012613825500011, "learning_rate": 4.046419775972855e-06, "loss": 0.002342861844226718, "step": 780 }, { "epoch": 1.2978059685972756, "grad_norm": 0.0008426354033872485, "learning_rate": 4.043719970819231e-06, "loss": 0.0015461614821106195, "step": 781 }, { "epoch": 1.2994696890922324, "grad_norm": 0.0017862239619717002, "learning_rate": 4.041017252696556e-06, "loss": 0.003129967488348484, "step": 782 }, { "epoch": 1.3011334095871894, "grad_norm": 0.0016204863786697388, "learning_rate": 4.038311626704806e-06, "loss": 0.0013765785843133926, "step": 783 }, { "epoch": 1.3027971300821461, "grad_norm": 0.0023099181707948446, "learning_rate": 4.035603097949444e-06, "loss": 0.0042774975299835205, "step": 784 }, { "epoch": 1.3044608505771031, "grad_norm": 0.0017065017018467188, "learning_rate": 4.032891671541409e-06, "loss": 0.00256139412522316, "step": 785 }, { "epoch": 1.30612457107206, "grad_norm": 0.0013910774141550064, "learning_rate": 4.030177352597109e-06, "loss": 0.003075619228184223, "step": 786 }, { "epoch": 1.3077882915670167, "grad_norm": 0.003909314516931772, "learning_rate": 4.027460146238411e-06, "loss": 0.006473972462117672, "step": 787 }, { "epoch": 1.3094520120619735, "grad_norm": 0.015759041532874107, "learning_rate": 4.02474005759263e-06, "loss": 0.007488309871405363, "step": 788 }, { "epoch": 1.3111157325569305, "grad_norm": 0.002802561968564987, "learning_rate": 4.022017091792518e-06, "loss": 0.0052157179452478886, "step": 789 }, { "epoch": 1.3127794530518873, "grad_norm": 0.0033724752720445395, "learning_rate": 4.01929125397626e-06, "loss": 0.004162366967648268, "step": 790 }, { "epoch": 1.3144431735468443, "grad_norm": 0.0031333763618022203, "learning_rate": 4.016562549287455e-06, "loss": 0.004047158639878035, "step": 791 }, { "epoch": 1.316106894041801, "grad_norm": 0.0016262867720797658, "learning_rate": 4.013830982875117e-06, "loss": 0.0020962050184607506, "step": 792 }, { "epoch": 1.3177706145367578, "grad_norm": 0.002156744012609124, "learning_rate": 4.0110965598936565e-06, "loss": 0.003945136908441782, "step": 793 }, { "epoch": 1.3194343350317146, "grad_norm": 0.003021260490640998, "learning_rate": 4.008359285502877e-06, "loss": 0.00483740633353591, "step": 794 }, { "epoch": 1.3210980555266716, "grad_norm": 0.0018034290987998247, "learning_rate": 4.005619164867959e-06, "loss": 0.002895489800721407, "step": 795 }, { "epoch": 1.3227617760216284, "grad_norm": 0.0027613251004368067, "learning_rate": 4.002876203159458e-06, "loss": 0.004742368124425411, "step": 796 }, { "epoch": 1.3244254965165851, "grad_norm": 0.004044833593070507, "learning_rate": 4.000130405553287e-06, "loss": 0.0023626082111150026, "step": 797 }, { "epoch": 1.3260892170115421, "grad_norm": 0.0029203095473349094, "learning_rate": 3.997381777230714e-06, "loss": 0.0063598062843084335, "step": 798 }, { "epoch": 1.327752937506499, "grad_norm": 0.0028201239183545113, "learning_rate": 3.994630323378344e-06, "loss": 0.004977297969162464, "step": 799 }, { "epoch": 1.3294166580014557, "grad_norm": 0.0036451423075050116, "learning_rate": 3.991876049188116e-06, "loss": 0.0038258880376815796, "step": 800 }, { "epoch": 1.3294166580014557, "eval_loss": 0.004822524730116129, "eval_runtime": 402.1932, "eval_samples_per_second": 11.954, "eval_steps_per_second": 2.989, "step": 800 }, { "epoch": 1.3310803784964125, "grad_norm": 0.0028423636686056852, "learning_rate": 3.989118959857293e-06, "loss": 0.0061726756393909454, "step": 801 }, { "epoch": 1.3327440989913695, "grad_norm": 0.002013375284150243, "learning_rate": 3.986359060588446e-06, "loss": 0.00220022676512599, "step": 802 }, { "epoch": 1.3344078194863263, "grad_norm": 0.00471561448648572, "learning_rate": 3.983596356589452e-06, "loss": 0.0031772053334861994, "step": 803 }, { "epoch": 1.3360715399812833, "grad_norm": 0.002453225664794445, "learning_rate": 3.980830853073476e-06, "loss": 0.004086391534656286, "step": 804 }, { "epoch": 1.33773526047624, "grad_norm": 0.002689510118216276, "learning_rate": 3.978062555258972e-06, "loss": 0.0053067514672875404, "step": 805 }, { "epoch": 1.3393989809711968, "grad_norm": 0.002451070351526141, "learning_rate": 3.975291468369661e-06, "loss": 0.0021515078842639923, "step": 806 }, { "epoch": 1.3410627014661536, "grad_norm": 0.0024362937547266483, "learning_rate": 3.97251759763453e-06, "loss": 0.004400534089654684, "step": 807 }, { "epoch": 1.3427264219611106, "grad_norm": 0.0022519261110574007, "learning_rate": 3.969740948287817e-06, "loss": 0.0035801767371594906, "step": 808 }, { "epoch": 1.3443901424560674, "grad_norm": 0.004936764016747475, "learning_rate": 3.966961525569005e-06, "loss": 0.007511000614613295, "step": 809 }, { "epoch": 1.3460538629510244, "grad_norm": 0.004098286386579275, "learning_rate": 3.964179334722811e-06, "loss": 0.0044007147662341595, "step": 810 }, { "epoch": 1.3477175834459811, "grad_norm": 0.004742534831166267, "learning_rate": 3.961394380999173e-06, "loss": 0.0067534856498241425, "step": 811 }, { "epoch": 1.349381303940938, "grad_norm": 0.001244925195351243, "learning_rate": 3.958606669653243e-06, "loss": 0.0035037680063396692, "step": 812 }, { "epoch": 1.3510450244358947, "grad_norm": 0.0022341671865433455, "learning_rate": 3.955816205945378e-06, "loss": 0.005220518913120031, "step": 813 }, { "epoch": 1.3527087449308517, "grad_norm": 0.002156665548682213, "learning_rate": 3.953022995141128e-06, "loss": 0.005255310796201229, "step": 814 }, { "epoch": 1.3543724654258085, "grad_norm": 0.0027065190952271223, "learning_rate": 3.950227042511226e-06, "loss": 0.004826219752430916, "step": 815 }, { "epoch": 1.3560361859207652, "grad_norm": 0.001812028931453824, "learning_rate": 3.947428353331579e-06, "loss": 0.005146780516952276, "step": 816 }, { "epoch": 1.3576999064157222, "grad_norm": 0.0019188732840120792, "learning_rate": 3.94462693288326e-06, "loss": 0.002942196559160948, "step": 817 }, { "epoch": 1.359363626910679, "grad_norm": 0.0029397630132734776, "learning_rate": 3.941822786452491e-06, "loss": 0.0025993953458964825, "step": 818 }, { "epoch": 1.3610273474056358, "grad_norm": 0.002346345689147711, "learning_rate": 3.939015919330643e-06, "loss": 0.003963056020438671, "step": 819 }, { "epoch": 1.3626910679005926, "grad_norm": 0.003207869129255414, "learning_rate": 3.936206336814219e-06, "loss": 0.005814023315906525, "step": 820 }, { "epoch": 1.3643547883955496, "grad_norm": 0.0013737737899646163, "learning_rate": 3.933394044204843e-06, "loss": 0.0030613031703978777, "step": 821 }, { "epoch": 1.3660185088905064, "grad_norm": 0.0035271679516881704, "learning_rate": 3.930579046809259e-06, "loss": 0.005814910866320133, "step": 822 }, { "epoch": 1.3676822293854634, "grad_norm": 0.0025771872606128454, "learning_rate": 3.92776134993931e-06, "loss": 0.005833122879266739, "step": 823 }, { "epoch": 1.3693459498804201, "grad_norm": 0.002427369588986039, "learning_rate": 3.924940958911933e-06, "loss": 0.004450193606317043, "step": 824 }, { "epoch": 1.371009670375377, "grad_norm": 0.0019530977588146925, "learning_rate": 3.922117879049152e-06, "loss": 0.003191176801919937, "step": 825 }, { "epoch": 1.3726733908703337, "grad_norm": 0.003067216370254755, "learning_rate": 3.91929211567806e-06, "loss": 0.0036342469975352287, "step": 826 }, { "epoch": 1.3743371113652907, "grad_norm": 0.0018951890524476767, "learning_rate": 3.916463674130821e-06, "loss": 0.0027206242084503174, "step": 827 }, { "epoch": 1.3760008318602475, "grad_norm": 0.004039694555103779, "learning_rate": 3.913632559744645e-06, "loss": 0.006162284407764673, "step": 828 }, { "epoch": 1.3776645523552045, "grad_norm": 0.003776967292651534, "learning_rate": 3.910798777861788e-06, "loss": 0.01263501588255167, "step": 829 }, { "epoch": 1.3793282728501612, "grad_norm": 0.00310622900724411, "learning_rate": 3.9079623338295436e-06, "loss": 0.004743027966469526, "step": 830 }, { "epoch": 1.380991993345118, "grad_norm": 0.003913409076631069, "learning_rate": 3.9051232330002245e-06, "loss": 0.0044493465684354305, "step": 831 }, { "epoch": 1.3826557138400748, "grad_norm": 0.0017946249572560191, "learning_rate": 3.902281480731156e-06, "loss": 0.0026314181741327047, "step": 832 }, { "epoch": 1.3843194343350318, "grad_norm": 0.0020571874920278788, "learning_rate": 3.899437082384671e-06, "loss": 0.004028120543807745, "step": 833 }, { "epoch": 1.3859831548299886, "grad_norm": 0.0026965150609612465, "learning_rate": 3.89659004332809e-06, "loss": 0.004003521986305714, "step": 834 }, { "epoch": 1.3876468753249454, "grad_norm": 0.003063056617975235, "learning_rate": 3.893740368933722e-06, "loss": 0.0029916935600340366, "step": 835 }, { "epoch": 1.3893105958199023, "grad_norm": 0.0036277994513511658, "learning_rate": 3.8908880645788464e-06, "loss": 0.007052903063595295, "step": 836 }, { "epoch": 1.3909743163148591, "grad_norm": 0.0017010894371196628, "learning_rate": 3.888033135645702e-06, "loss": 0.0030756453052163124, "step": 837 }, { "epoch": 1.392638036809816, "grad_norm": 0.0024809178430587053, "learning_rate": 3.885175587521486e-06, "loss": 0.007132468745112419, "step": 838 }, { "epoch": 1.3943017573047727, "grad_norm": 0.0016055714804679155, "learning_rate": 3.882315425598334e-06, "loss": 0.0038319770246744156, "step": 839 }, { "epoch": 1.3959654777997297, "grad_norm": 0.0018153567798435688, "learning_rate": 3.879452655273316e-06, "loss": 0.001998928841203451, "step": 840 }, { "epoch": 1.3976291982946865, "grad_norm": 0.0028074143920093775, "learning_rate": 3.876587281948422e-06, "loss": 0.005024431273341179, "step": 841 }, { "epoch": 1.3992929187896435, "grad_norm": 0.0022321913857012987, "learning_rate": 3.873719311030556e-06, "loss": 0.002615418517962098, "step": 842 }, { "epoch": 1.4009566392846002, "grad_norm": 0.0021348358131945133, "learning_rate": 3.8708487479315204e-06, "loss": 0.0033859461545944214, "step": 843 }, { "epoch": 1.402620359779557, "grad_norm": 0.005225852597504854, "learning_rate": 3.867975598068012e-06, "loss": 0.002719119656831026, "step": 844 }, { "epoch": 1.4042840802745138, "grad_norm": 0.0009146020747721195, "learning_rate": 3.8650998668616085e-06, "loss": 0.0021196685265749693, "step": 845 }, { "epoch": 1.4059478007694708, "grad_norm": 0.0017917942022904754, "learning_rate": 3.862221559738757e-06, "loss": 0.0072954390197992325, "step": 846 }, { "epoch": 1.4076115212644276, "grad_norm": 0.002280753804370761, "learning_rate": 3.859340682130766e-06, "loss": 0.004195714369416237, "step": 847 }, { "epoch": 1.4092752417593843, "grad_norm": 0.0031667782459408045, "learning_rate": 3.856457239473795e-06, "loss": 0.002995600923895836, "step": 848 }, { "epoch": 1.4109389622543413, "grad_norm": 0.00300840032286942, "learning_rate": 3.853571237208843e-06, "loss": 0.006693383678793907, "step": 849 }, { "epoch": 1.4126026827492981, "grad_norm": 0.0032315480057150126, "learning_rate": 3.8506826807817395e-06, "loss": 0.008001086302101612, "step": 850 }, { "epoch": 1.414266403244255, "grad_norm": 0.0031034990679472685, "learning_rate": 3.847791575643134e-06, "loss": 0.005453841295093298, "step": 851 }, { "epoch": 1.4159301237392117, "grad_norm": 0.0038969414308667183, "learning_rate": 3.844897927248483e-06, "loss": 0.0055399080738425255, "step": 852 }, { "epoch": 1.4175938442341687, "grad_norm": 0.002615910256281495, "learning_rate": 3.842001741058045e-06, "loss": 0.0037770913913846016, "step": 853 }, { "epoch": 1.4192575647291255, "grad_norm": 0.001906339661218226, "learning_rate": 3.839103022536865e-06, "loss": 0.005109758581966162, "step": 854 }, { "epoch": 1.4209212852240825, "grad_norm": 0.0017247337382286787, "learning_rate": 3.836201777154769e-06, "loss": 0.004554691258817911, "step": 855 }, { "epoch": 1.4225850057190392, "grad_norm": 0.0023999211844056845, "learning_rate": 3.833298010386347e-06, "loss": 0.006145539693534374, "step": 856 }, { "epoch": 1.424248726213996, "grad_norm": 0.0028406865894794464, "learning_rate": 3.830391727710954e-06, "loss": 0.006535790394991636, "step": 857 }, { "epoch": 1.4259124467089528, "grad_norm": 0.002053679432719946, "learning_rate": 3.827482934612684e-06, "loss": 0.006250452250242233, "step": 858 }, { "epoch": 1.4275761672039098, "grad_norm": 0.001439629471860826, "learning_rate": 3.824571636580372e-06, "loss": 0.0042776912450790405, "step": 859 }, { "epoch": 1.4292398876988666, "grad_norm": 0.0013407040387392044, "learning_rate": 3.821657839107583e-06, "loss": 0.002986511215567589, "step": 860 }, { "epoch": 1.4309036081938236, "grad_norm": 0.0020117536187171936, "learning_rate": 3.818741547692593e-06, "loss": 0.003501957282423973, "step": 861 }, { "epoch": 1.4325673286887803, "grad_norm": 0.0011241771280765533, "learning_rate": 3.815822767838386e-06, "loss": 0.003273077541962266, "step": 862 }, { "epoch": 1.4342310491837371, "grad_norm": 0.0014655692502856255, "learning_rate": 3.812901505052642e-06, "loss": 0.002575968625023961, "step": 863 }, { "epoch": 1.435894769678694, "grad_norm": 0.0026631092187017202, "learning_rate": 3.8099777648477264e-06, "loss": 0.004115140065550804, "step": 864 }, { "epoch": 1.437558490173651, "grad_norm": 0.0026415600441396236, "learning_rate": 3.8070515527406803e-06, "loss": 0.006131789647042751, "step": 865 }, { "epoch": 1.4392222106686077, "grad_norm": 0.0013655349612236023, "learning_rate": 3.8041228742532064e-06, "loss": 0.004735004622489214, "step": 866 }, { "epoch": 1.4408859311635644, "grad_norm": 0.0008041858091019094, "learning_rate": 3.8011917349116633e-06, "loss": 0.0017425359692424536, "step": 867 }, { "epoch": 1.4425496516585214, "grad_norm": 0.0026084021665155888, "learning_rate": 3.7982581402470536e-06, "loss": 0.003985221032053232, "step": 868 }, { "epoch": 1.4442133721534782, "grad_norm": 0.0017706190701574087, "learning_rate": 3.795322095795012e-06, "loss": 0.005514065269380808, "step": 869 }, { "epoch": 1.445877092648435, "grad_norm": 0.0013553850585594773, "learning_rate": 3.7923836070957963e-06, "loss": 0.0028120880015194416, "step": 870 }, { "epoch": 1.4475408131433918, "grad_norm": 0.0017212049569934607, "learning_rate": 3.7894426796942773e-06, "loss": 0.003140087705105543, "step": 871 }, { "epoch": 1.4492045336383488, "grad_norm": 0.002766014775261283, "learning_rate": 3.786499319139926e-06, "loss": 0.0036492799408733845, "step": 872 }, { "epoch": 1.4508682541333056, "grad_norm": 0.0016567467246204615, "learning_rate": 3.7835535309868055e-06, "loss": 0.0021711941808462143, "step": 873 }, { "epoch": 1.4525319746282626, "grad_norm": 0.001408170210197568, "learning_rate": 3.78060532079356e-06, "loss": 0.002645156579092145, "step": 874 }, { "epoch": 1.4541956951232193, "grad_norm": 0.0050718155689537525, "learning_rate": 3.777654694123404e-06, "loss": 0.0038958254735916853, "step": 875 }, { "epoch": 1.4558594156181761, "grad_norm": 0.0029584639705717564, "learning_rate": 3.7747016565441112e-06, "loss": 0.004871165845543146, "step": 876 }, { "epoch": 1.4575231361131329, "grad_norm": 0.001699764747172594, "learning_rate": 3.771746213628006e-06, "loss": 0.0027038324624300003, "step": 877 }, { "epoch": 1.4591868566080899, "grad_norm": 0.0031087801326066256, "learning_rate": 3.7687883709519496e-06, "loss": 0.004658531863242388, "step": 878 }, { "epoch": 1.4608505771030467, "grad_norm": 0.0026078419759869576, "learning_rate": 3.7658281340973336e-06, "loss": 0.0025535072200000286, "step": 879 }, { "epoch": 1.4625142975980037, "grad_norm": 0.002413343172520399, "learning_rate": 3.7628655086500654e-06, "loss": 0.0027896517422050238, "step": 880 }, { "epoch": 1.4641780180929604, "grad_norm": 0.001798558863811195, "learning_rate": 3.7599005002005616e-06, "loss": 0.0048567005433142185, "step": 881 }, { "epoch": 1.4658417385879172, "grad_norm": 0.002467783633619547, "learning_rate": 3.7569331143437336e-06, "loss": 0.009476818144321442, "step": 882 }, { "epoch": 1.467505459082874, "grad_norm": 0.002080975566059351, "learning_rate": 3.7539633566789812e-06, "loss": 0.0033652521669864655, "step": 883 }, { "epoch": 1.469169179577831, "grad_norm": 0.003472168231382966, "learning_rate": 3.750991232810177e-06, "loss": 0.00317693455144763, "step": 884 }, { "epoch": 1.4708329000727878, "grad_norm": 0.0024736488703638315, "learning_rate": 3.7480167483456603e-06, "loss": 0.0052384547889232635, "step": 885 }, { "epoch": 1.4724966205677446, "grad_norm": 0.0017958751413971186, "learning_rate": 3.7450399088982247e-06, "loss": 0.001992971170693636, "step": 886 }, { "epoch": 1.4741603410627016, "grad_norm": 0.003060857066884637, "learning_rate": 3.742060720085107e-06, "loss": 0.005137836094945669, "step": 887 }, { "epoch": 1.4758240615576583, "grad_norm": 0.0017445053672417998, "learning_rate": 3.739079187527978e-06, "loss": 0.002876036800444126, "step": 888 }, { "epoch": 1.477487782052615, "grad_norm": 0.002199475420638919, "learning_rate": 3.73609531685293e-06, "loss": 0.0028552846051752567, "step": 889 }, { "epoch": 1.4791515025475719, "grad_norm": 0.0037617129273712635, "learning_rate": 3.733109113690469e-06, "loss": 0.009398862719535828, "step": 890 }, { "epoch": 1.4808152230425289, "grad_norm": 0.0013438520254567266, "learning_rate": 3.7301205836755006e-06, "loss": 0.0023825191892683506, "step": 891 }, { "epoch": 1.4824789435374857, "grad_norm": 0.00315672904253006, "learning_rate": 3.727129732447322e-06, "loss": 0.004341416992247105, "step": 892 }, { "epoch": 1.4841426640324427, "grad_norm": 0.0012259150389581919, "learning_rate": 3.7241365656496103e-06, "loss": 0.0016382039757445455, "step": 893 }, { "epoch": 1.4858063845273994, "grad_norm": 0.0030357290524989367, "learning_rate": 3.7211410889304117e-06, "loss": 0.003687058575451374, "step": 894 }, { "epoch": 1.4874701050223562, "grad_norm": 0.002371704438701272, "learning_rate": 3.7181433079421316e-06, "loss": 0.0053842682391405106, "step": 895 }, { "epoch": 1.489133825517313, "grad_norm": 0.0021484766621142626, "learning_rate": 3.7151432283415244e-06, "loss": 0.002064719330519438, "step": 896 }, { "epoch": 1.49079754601227, "grad_norm": 0.0012575231958180666, "learning_rate": 3.712140855789679e-06, "loss": 0.002172749023884535, "step": 897 }, { "epoch": 1.4924612665072268, "grad_norm": 0.0008468807209283113, "learning_rate": 3.709136195952015e-06, "loss": 0.002431442029774189, "step": 898 }, { "epoch": 1.4941249870021838, "grad_norm": 0.0007434627623297274, "learning_rate": 3.706129254498266e-06, "loss": 0.0017739442409947515, "step": 899 }, { "epoch": 1.4957887074971405, "grad_norm": 0.001650674152188003, "learning_rate": 3.703120037102469e-06, "loss": 0.0070024230517446995, "step": 900 }, { "epoch": 1.4957887074971405, "eval_loss": 0.004188983701169491, "eval_runtime": 395.6086, "eval_samples_per_second": 12.153, "eval_steps_per_second": 3.038, "step": 900 }, { "epoch": 1.4974524279920973, "grad_norm": 0.0013096841285005212, "learning_rate": 3.7001085494429596e-06, "loss": 0.0030500690918415785, "step": 901 }, { "epoch": 1.499116148487054, "grad_norm": 0.0012824746081605554, "learning_rate": 3.697094797202355e-06, "loss": 0.001866386504843831, "step": 902 }, { "epoch": 1.5007798689820109, "grad_norm": 0.0028050485998392105, "learning_rate": 3.694078786067546e-06, "loss": 0.005148045253008604, "step": 903 }, { "epoch": 1.5024435894769679, "grad_norm": 0.0022381776943802834, "learning_rate": 3.691060521729686e-06, "loss": 0.004079541191458702, "step": 904 }, { "epoch": 1.5041073099719249, "grad_norm": 0.0034186223056167364, "learning_rate": 3.6880400098841794e-06, "loss": 0.00605606148019433, "step": 905 }, { "epoch": 1.5057710304668817, "grad_norm": 0.0015580811304971576, "learning_rate": 3.6850172562306735e-06, "loss": 0.0045418585650622845, "step": 906 }, { "epoch": 1.5074347509618384, "grad_norm": 0.002241261536255479, "learning_rate": 3.681992266473044e-06, "loss": 0.00643511675298214, "step": 907 }, { "epoch": 1.5090984714567952, "grad_norm": 0.002026507630944252, "learning_rate": 3.6789650463193864e-06, "loss": 0.003510694019496441, "step": 908 }, { "epoch": 1.510762191951752, "grad_norm": 0.001170490519143641, "learning_rate": 3.675935601482006e-06, "loss": 0.0016428721137344837, "step": 909 }, { "epoch": 1.512425912446709, "grad_norm": 0.0025286872405558825, "learning_rate": 3.6729039376774055e-06, "loss": 0.003139745444059372, "step": 910 }, { "epoch": 1.5140896329416658, "grad_norm": 0.002121243393048644, "learning_rate": 3.6698700606262733e-06, "loss": 0.0036724861711263657, "step": 911 }, { "epoch": 1.5157533534366228, "grad_norm": 0.0019067934481427073, "learning_rate": 3.6668339760534768e-06, "loss": 0.0035516428761184216, "step": 912 }, { "epoch": 1.5174170739315795, "grad_norm": 0.002206754172220826, "learning_rate": 3.6637956896880465e-06, "loss": 0.0029350381810218096, "step": 913 }, { "epoch": 1.5190807944265363, "grad_norm": 0.0027417195960879326, "learning_rate": 3.6607552072631685e-06, "loss": 0.0031718441750854254, "step": 914 }, { "epoch": 1.520744514921493, "grad_norm": 0.002210053149610758, "learning_rate": 3.6577125345161748e-06, "loss": 0.003069754224270582, "step": 915 }, { "epoch": 1.5224082354164499, "grad_norm": 0.0014066651929169893, "learning_rate": 3.6546676771885257e-06, "loss": 0.0033890805207192898, "step": 916 }, { "epoch": 1.5240719559114069, "grad_norm": 0.0011378617491573095, "learning_rate": 3.6516206410258092e-06, "loss": 0.002046656096354127, "step": 917 }, { "epoch": 1.5257356764063639, "grad_norm": 0.0013073384761810303, "learning_rate": 3.6485714317777223e-06, "loss": 0.002027935115620494, "step": 918 }, { "epoch": 1.5273993969013206, "grad_norm": 0.001576530165039003, "learning_rate": 3.6455200551980605e-06, "loss": 0.0013289693742990494, "step": 919 }, { "epoch": 1.5290631173962774, "grad_norm": 0.0018064347095787525, "learning_rate": 3.642466517044713e-06, "loss": 0.002037541940808296, "step": 920 }, { "epoch": 1.5307268378912342, "grad_norm": 0.002317433012649417, "learning_rate": 3.6394108230796455e-06, "loss": 0.005125693045556545, "step": 921 }, { "epoch": 1.532390558386191, "grad_norm": 0.000983527977950871, "learning_rate": 3.636352979068891e-06, "loss": 0.0022681993432343006, "step": 922 }, { "epoch": 1.534054278881148, "grad_norm": 0.0016607396537438035, "learning_rate": 3.6332929907825426e-06, "loss": 0.0016774425748735666, "step": 923 }, { "epoch": 1.535717999376105, "grad_norm": 0.002821697387844324, "learning_rate": 3.630230863994736e-06, "loss": 0.003204245353117585, "step": 924 }, { "epoch": 1.5373817198710618, "grad_norm": 0.0008679834427312016, "learning_rate": 3.6271666044836433e-06, "loss": 0.001236174488440156, "step": 925 }, { "epoch": 1.5390454403660185, "grad_norm": 0.0009674489265307784, "learning_rate": 3.624100218031464e-06, "loss": 0.001415852690115571, "step": 926 }, { "epoch": 1.5407091608609753, "grad_norm": 0.0015830635093152523, "learning_rate": 3.621031710424407e-06, "loss": 0.0019250408513471484, "step": 927 }, { "epoch": 1.542372881355932, "grad_norm": 0.002309768460690975, "learning_rate": 3.6179610874526856e-06, "loss": 0.004901132546365261, "step": 928 }, { "epoch": 1.544036601850889, "grad_norm": 0.0043786936439573765, "learning_rate": 3.614888354910505e-06, "loss": 0.0032006818801164627, "step": 929 }, { "epoch": 1.5457003223458459, "grad_norm": 0.0012186798267066479, "learning_rate": 3.6118135185960507e-06, "loss": 0.002999824471771717, "step": 930 }, { "epoch": 1.5473640428408029, "grad_norm": 0.0018106299685314298, "learning_rate": 3.6087365843114773e-06, "loss": 0.0023737032897770405, "step": 931 }, { "epoch": 1.5490277633357596, "grad_norm": 0.0017335520824417472, "learning_rate": 3.6056575578629006e-06, "loss": 0.0030579883605241776, "step": 932 }, { "epoch": 1.5506914838307164, "grad_norm": 0.004123002290725708, "learning_rate": 3.6025764450603808e-06, "loss": 0.003146671922877431, "step": 933 }, { "epoch": 1.5523552043256732, "grad_norm": 0.004050532355904579, "learning_rate": 3.5994932517179182e-06, "loss": 0.005657460540533066, "step": 934 }, { "epoch": 1.55401892482063, "grad_norm": 0.0015287046553567052, "learning_rate": 3.596407983653436e-06, "loss": 0.0027307921554893255, "step": 935 }, { "epoch": 1.555682645315587, "grad_norm": 0.0017233432736247778, "learning_rate": 3.5933206466887755e-06, "loss": 0.002712239045649767, "step": 936 }, { "epoch": 1.557346365810544, "grad_norm": 0.001591035514138639, "learning_rate": 3.59023124664968e-06, "loss": 0.004103376530110836, "step": 937 }, { "epoch": 1.5590100863055008, "grad_norm": 0.0024902494624257088, "learning_rate": 3.5871397893657867e-06, "loss": 0.0025554909370839596, "step": 938 }, { "epoch": 1.5606738068004575, "grad_norm": 0.0032940879464149475, "learning_rate": 3.5840462806706126e-06, "loss": 0.004046164453029633, "step": 939 }, { "epoch": 1.5623375272954143, "grad_norm": 0.002541156252846122, "learning_rate": 3.5809507264015502e-06, "loss": 0.0029931843746453524, "step": 940 }, { "epoch": 1.564001247790371, "grad_norm": 0.0018427117029204965, "learning_rate": 3.5778531323998465e-06, "loss": 0.003148130141198635, "step": 941 }, { "epoch": 1.565664968285328, "grad_norm": 0.0030140052549540997, "learning_rate": 3.574753504510602e-06, "loss": 0.0034573671873658895, "step": 942 }, { "epoch": 1.5673286887802849, "grad_norm": 0.001244527637027204, "learning_rate": 3.571651848582753e-06, "loss": 0.003539226483553648, "step": 943 }, { "epoch": 1.5689924092752419, "grad_norm": 0.002971372799947858, "learning_rate": 3.5685481704690617e-06, "loss": 0.003306737169623375, "step": 944 }, { "epoch": 1.5706561297701986, "grad_norm": 0.0023480779491364956, "learning_rate": 3.5654424760261082e-06, "loss": 0.0052779726684093475, "step": 945 }, { "epoch": 1.5723198502651554, "grad_norm": 0.0023597951512783766, "learning_rate": 3.5623347711142764e-06, "loss": 0.0025963601656258106, "step": 946 }, { "epoch": 1.5739835707601122, "grad_norm": 0.001831749570555985, "learning_rate": 3.5592250615977434e-06, "loss": 0.00302727147936821, "step": 947 }, { "epoch": 1.5756472912550692, "grad_norm": 0.0016239514807239175, "learning_rate": 3.5561133533444703e-06, "loss": 0.0025464145001024008, "step": 948 }, { "epoch": 1.577311011750026, "grad_norm": 0.00151860062032938, "learning_rate": 3.552999652226189e-06, "loss": 0.0034899080637842417, "step": 949 }, { "epoch": 1.578974732244983, "grad_norm": 0.0012699831277132034, "learning_rate": 3.549883964118392e-06, "loss": 0.001983851194381714, "step": 950 }, { "epoch": 1.5806384527399397, "grad_norm": 0.0034781943541020155, "learning_rate": 3.54676629490032e-06, "loss": 0.003068584017455578, "step": 951 }, { "epoch": 1.5823021732348965, "grad_norm": 0.002003491623327136, "learning_rate": 3.543646650454955e-06, "loss": 0.0037051017861813307, "step": 952 }, { "epoch": 1.5839658937298533, "grad_norm": 0.0011521793203428388, "learning_rate": 3.5405250366690023e-06, "loss": 0.00235694064758718, "step": 953 }, { "epoch": 1.58562961422481, "grad_norm": 0.0016058378387242556, "learning_rate": 3.5374014594328877e-06, "loss": 0.0022253813222050667, "step": 954 }, { "epoch": 1.587293334719767, "grad_norm": 0.0031109461560845375, "learning_rate": 3.5342759246407378e-06, "loss": 0.008151844143867493, "step": 955 }, { "epoch": 1.588957055214724, "grad_norm": 0.0023460695520043373, "learning_rate": 3.5311484381903754e-06, "loss": 0.006699217949062586, "step": 956 }, { "epoch": 1.5906207757096809, "grad_norm": 0.0008898034575395286, "learning_rate": 3.528019005983306e-06, "loss": 0.0008910002070479095, "step": 957 }, { "epoch": 1.5922844962046376, "grad_norm": 0.0015714402543380857, "learning_rate": 3.5248876339247053e-06, "loss": 0.002021456602960825, "step": 958 }, { "epoch": 1.5939482166995944, "grad_norm": 0.0008527041645720601, "learning_rate": 3.521754327923412e-06, "loss": 0.0010136617347598076, "step": 959 }, { "epoch": 1.5956119371945512, "grad_norm": 0.0016987391281872988, "learning_rate": 3.5186190938919106e-06, "loss": 0.002373310038819909, "step": 960 }, { "epoch": 1.5972756576895082, "grad_norm": 0.0019705703016370535, "learning_rate": 3.515481937746327e-06, "loss": 0.0033961357548832893, "step": 961 }, { "epoch": 1.598939378184465, "grad_norm": 0.0015687725972384214, "learning_rate": 3.5123428654064134e-06, "loss": 0.0030006766319274902, "step": 962 }, { "epoch": 1.600603098679422, "grad_norm": 0.0016754200914874673, "learning_rate": 3.509201882795536e-06, "loss": 0.002887715119868517, "step": 963 }, { "epoch": 1.6022668191743787, "grad_norm": 0.0008902286062948406, "learning_rate": 3.5060589958406677e-06, "loss": 0.0015371054178103805, "step": 964 }, { "epoch": 1.6039305396693355, "grad_norm": 0.0012788430321961641, "learning_rate": 3.5029142104723725e-06, "loss": 0.0015881682047620416, "step": 965 }, { "epoch": 1.6055942601642923, "grad_norm": 0.001669449033215642, "learning_rate": 3.4997675326247993e-06, "loss": 0.0019540064968168736, "step": 966 }, { "epoch": 1.6072579806592493, "grad_norm": 0.0008664173074066639, "learning_rate": 3.4966189682356677e-06, "loss": 0.0009041058365255594, "step": 967 }, { "epoch": 1.608921701154206, "grad_norm": 0.0017394538735970855, "learning_rate": 3.493468523246255e-06, "loss": 0.0033552986569702625, "step": 968 }, { "epoch": 1.610585421649163, "grad_norm": 0.0015175569569692016, "learning_rate": 3.4903162036013894e-06, "loss": 0.004245993215590715, "step": 969 }, { "epoch": 1.6122491421441199, "grad_norm": 0.0025712379720062017, "learning_rate": 3.487162015249436e-06, "loss": 0.006007185205817223, "step": 970 }, { "epoch": 1.6139128626390766, "grad_norm": 0.0024991503451019526, "learning_rate": 3.484005964142285e-06, "loss": 0.0040008206851780415, "step": 971 }, { "epoch": 1.6155765831340334, "grad_norm": 0.0014480374520644546, "learning_rate": 3.4808480562353426e-06, "loss": 0.0017751908162608743, "step": 972 }, { "epoch": 1.6172403036289902, "grad_norm": 0.0022420494351536036, "learning_rate": 3.477688297487519e-06, "loss": 0.002922196639701724, "step": 973 }, { "epoch": 1.6189040241239472, "grad_norm": 0.0012288083089515567, "learning_rate": 3.474526693861216e-06, "loss": 0.0026381013449281454, "step": 974 }, { "epoch": 1.6205677446189042, "grad_norm": 0.00684167118743062, "learning_rate": 3.4713632513223178e-06, "loss": 0.009229960851371288, "step": 975 }, { "epoch": 1.622231465113861, "grad_norm": 0.0032470698934048414, "learning_rate": 3.4681979758401767e-06, "loss": 0.00305845495313406, "step": 976 }, { "epoch": 1.6238951856088177, "grad_norm": 0.001162294065579772, "learning_rate": 3.465030873387606e-06, "loss": 0.0036955790128558874, "step": 977 }, { "epoch": 1.6255589061037745, "grad_norm": 0.0008722259080968797, "learning_rate": 3.461861949940865e-06, "loss": 0.0015360023826360703, "step": 978 }, { "epoch": 1.6272226265987313, "grad_norm": 0.0019653120543807745, "learning_rate": 3.458691211479649e-06, "loss": 0.0034643567632883787, "step": 979 }, { "epoch": 1.6288863470936883, "grad_norm": 0.0019016037695109844, "learning_rate": 3.4555186639870795e-06, "loss": 0.00340990349650383, "step": 980 }, { "epoch": 1.630550067588645, "grad_norm": 0.002036824356764555, "learning_rate": 3.4523443134496916e-06, "loss": 0.0035561122931540012, "step": 981 }, { "epoch": 1.632213788083602, "grad_norm": 0.0014058734523132443, "learning_rate": 3.4491681658574205e-06, "loss": 0.0019305794266983867, "step": 982 }, { "epoch": 1.6338775085785588, "grad_norm": 0.0015478340210393071, "learning_rate": 3.445990227203594e-06, "loss": 0.0024740060325711966, "step": 983 }, { "epoch": 1.6355412290735156, "grad_norm": 0.0011504017747938633, "learning_rate": 3.442810503484921e-06, "loss": 0.0033877063542604446, "step": 984 }, { "epoch": 1.6372049495684724, "grad_norm": 0.002352361800149083, "learning_rate": 3.4396290007014752e-06, "loss": 0.0032197399996221066, "step": 985 }, { "epoch": 1.6388686700634292, "grad_norm": 0.0008741469937376678, "learning_rate": 3.4364457248566913e-06, "loss": 0.0018215301679447293, "step": 986 }, { "epoch": 1.6405323905583862, "grad_norm": 0.0013750221114605665, "learning_rate": 3.433260681957346e-06, "loss": 0.00448352936655283, "step": 987 }, { "epoch": 1.6421961110533432, "grad_norm": 0.0018812837079167366, "learning_rate": 3.430073878013554e-06, "loss": 0.002898102393373847, "step": 988 }, { "epoch": 1.6438598315483, "grad_norm": 0.0017209555953741074, "learning_rate": 3.4268853190387496e-06, "loss": 0.003182763233780861, "step": 989 }, { "epoch": 1.6455235520432567, "grad_norm": 0.00356616685166955, "learning_rate": 3.423695011049683e-06, "loss": 0.006524332799017429, "step": 990 }, { "epoch": 1.6471872725382135, "grad_norm": 0.0020170877687633038, "learning_rate": 3.4205029600663996e-06, "loss": 0.0026602193247526884, "step": 991 }, { "epoch": 1.6488509930331703, "grad_norm": 0.0013372714165598154, "learning_rate": 3.4173091721122375e-06, "loss": 0.0021778345108032227, "step": 992 }, { "epoch": 1.6505147135281273, "grad_norm": 0.0016821332974359393, "learning_rate": 3.414113653213812e-06, "loss": 0.0026598330587148666, "step": 993 }, { "epoch": 1.6521784340230843, "grad_norm": 0.0012136506848037243, "learning_rate": 3.410916409401004e-06, "loss": 0.0019583716057240963, "step": 994 }, { "epoch": 1.653842154518041, "grad_norm": 0.003831377485767007, "learning_rate": 3.407717446706948e-06, "loss": 0.00612489553168416, "step": 995 }, { "epoch": 1.6555058750129978, "grad_norm": 0.002008938929066062, "learning_rate": 3.4045167711680244e-06, "loss": 0.0036759148351848125, "step": 996 }, { "epoch": 1.6571695955079546, "grad_norm": 0.0018878724658861756, "learning_rate": 3.4013143888238455e-06, "loss": 0.0036940951831638813, "step": 997 }, { "epoch": 1.6588333160029114, "grad_norm": 0.0028713566716760397, "learning_rate": 3.398110305717241e-06, "loss": 0.0030482043512165546, "step": 998 }, { "epoch": 1.6604970364978684, "grad_norm": 0.0028156277257949114, "learning_rate": 3.3949045278942545e-06, "loss": 0.0030587539076805115, "step": 999 }, { "epoch": 1.6621607569928252, "grad_norm": 0.0009347902378067374, "learning_rate": 3.3916970614041244e-06, "loss": 0.0013425936922430992, "step": 1000 }, { "epoch": 1.6621607569928252, "eval_loss": 0.0032257065176963806, "eval_runtime": 490.8151, "eval_samples_per_second": 9.796, "eval_steps_per_second": 2.449, "step": 1000 }, { "epoch": 1.6638244774877822, "grad_norm": 0.001645615790039301, "learning_rate": 3.3884879122992762e-06, "loss": 0.004788869060575962, "step": 1001 }, { "epoch": 1.665488197982739, "grad_norm": 0.0014216083800420165, "learning_rate": 3.3852770866353125e-06, "loss": 0.00593174621462822, "step": 1002 }, { "epoch": 1.6671519184776957, "grad_norm": 0.002447444712743163, "learning_rate": 3.382064590470996e-06, "loss": 0.0037248479202389717, "step": 1003 }, { "epoch": 1.6688156389726525, "grad_norm": 0.002557245083153248, "learning_rate": 3.378850429868244e-06, "loss": 0.0015867094043642282, "step": 1004 }, { "epoch": 1.6704793594676093, "grad_norm": 0.0016936559695750475, "learning_rate": 3.3756346108921145e-06, "loss": 0.00401366176083684, "step": 1005 }, { "epoch": 1.6721430799625663, "grad_norm": 0.0017694927519187331, "learning_rate": 3.372417139610793e-06, "loss": 0.004327396862208843, "step": 1006 }, { "epoch": 1.6738068004575233, "grad_norm": 0.002197755267843604, "learning_rate": 3.369198022095585e-06, "loss": 0.002049348782747984, "step": 1007 }, { "epoch": 1.67547052095248, "grad_norm": 0.0014030742458999157, "learning_rate": 3.3659772644209023e-06, "loss": 0.002163403434678912, "step": 1008 }, { "epoch": 1.6771342414474368, "grad_norm": 0.001984333386644721, "learning_rate": 3.36275487266425e-06, "loss": 0.00507196132093668, "step": 1009 }, { "epoch": 1.6787979619423936, "grad_norm": 0.0017958490643650293, "learning_rate": 3.3595308529062176e-06, "loss": 0.003381962887942791, "step": 1010 }, { "epoch": 1.6804616824373504, "grad_norm": 0.0017129637999460101, "learning_rate": 3.3563052112304674e-06, "loss": 0.007319668307900429, "step": 1011 }, { "epoch": 1.6821254029323074, "grad_norm": 0.0027047405019402504, "learning_rate": 3.3530779537237194e-06, "loss": 0.004077594727277756, "step": 1012 }, { "epoch": 1.6837891234272642, "grad_norm": 0.0012556860456243157, "learning_rate": 3.349849086475747e-06, "loss": 0.0024214675650000572, "step": 1013 }, { "epoch": 1.6854528439222212, "grad_norm": 0.002287906827405095, "learning_rate": 3.346618615579359e-06, "loss": 0.0034168255515396595, "step": 1014 }, { "epoch": 1.687116564417178, "grad_norm": 0.0010098470374941826, "learning_rate": 3.3433865471303876e-06, "loss": 0.0009828556794673204, "step": 1015 }, { "epoch": 1.6887802849121347, "grad_norm": 0.0012711078161373734, "learning_rate": 3.3401528872276847e-06, "loss": 0.0018059266731142998, "step": 1016 }, { "epoch": 1.6904440054070915, "grad_norm": 0.0008985710446722806, "learning_rate": 3.3369176419731004e-06, "loss": 0.0007820754544809461, "step": 1017 }, { "epoch": 1.6921077259020485, "grad_norm": 0.0005346555262804031, "learning_rate": 3.33368081747148e-06, "loss": 0.0012233923189342022, "step": 1018 }, { "epoch": 1.6937714463970053, "grad_norm": 0.002259831642732024, "learning_rate": 3.3304424198306464e-06, "loss": 0.006927793845534325, "step": 1019 }, { "epoch": 1.6954351668919623, "grad_norm": 0.0024394611828029156, "learning_rate": 3.3272024551613926e-06, "loss": 0.0051927645690739155, "step": 1020 }, { "epoch": 1.697098887386919, "grad_norm": 0.0016627139411866665, "learning_rate": 3.3239609295774667e-06, "loss": 0.0044672624208033085, "step": 1021 }, { "epoch": 1.6987626078818758, "grad_norm": 0.002487657591700554, "learning_rate": 3.3207178491955656e-06, "loss": 0.005251655355095863, "step": 1022 }, { "epoch": 1.7004263283768326, "grad_norm": 0.0012484157923609018, "learning_rate": 3.3174732201353155e-06, "loss": 0.0025723387952893972, "step": 1023 }, { "epoch": 1.7020900488717894, "grad_norm": 0.0019987961277365685, "learning_rate": 3.3142270485192683e-06, "loss": 0.0029333368875086308, "step": 1024 }, { "epoch": 1.7037537693667464, "grad_norm": 0.0024946676567196846, "learning_rate": 3.3109793404728855e-06, "loss": 0.002550342585891485, "step": 1025 }, { "epoch": 1.7054174898617034, "grad_norm": 0.0016121126245707273, "learning_rate": 3.3077301021245285e-06, "loss": 0.0028350313659757376, "step": 1026 }, { "epoch": 1.7070812103566602, "grad_norm": 0.0029108431190252304, "learning_rate": 3.3044793396054447e-06, "loss": 0.004767598584294319, "step": 1027 }, { "epoch": 1.708744930851617, "grad_norm": 0.001531237387098372, "learning_rate": 3.3012270590497596e-06, "loss": 0.00230689300224185, "step": 1028 }, { "epoch": 1.7104086513465737, "grad_norm": 0.0027158576995134354, "learning_rate": 3.2979732665944615e-06, "loss": 0.007252411916851997, "step": 1029 }, { "epoch": 1.7120723718415305, "grad_norm": 0.001231711357831955, "learning_rate": 3.2947179683793928e-06, "loss": 0.001815581344999373, "step": 1030 }, { "epoch": 1.7137360923364875, "grad_norm": 0.002293652854859829, "learning_rate": 3.291461170547237e-06, "loss": 0.004373159259557724, "step": 1031 }, { "epoch": 1.7153998128314443, "grad_norm": 0.0017853020690381527, "learning_rate": 3.2882028792435072e-06, "loss": 0.004501050338149071, "step": 1032 }, { "epoch": 1.7170635333264013, "grad_norm": 0.0012384027941152453, "learning_rate": 3.2849431006165343e-06, "loss": 0.0017943148268386722, "step": 1033 }, { "epoch": 1.718727253821358, "grad_norm": 0.0015722275711596012, "learning_rate": 3.2816818408174567e-06, "loss": 0.002293921774253249, "step": 1034 }, { "epoch": 1.7203909743163148, "grad_norm": 0.0022003373596817255, "learning_rate": 3.278419106000206e-06, "loss": 0.0037125535309314728, "step": 1035 }, { "epoch": 1.7220546948112716, "grad_norm": 0.0007672826759517193, "learning_rate": 3.2751549023214995e-06, "loss": 0.0020989507902413607, "step": 1036 }, { "epoch": 1.7237184153062286, "grad_norm": 0.0009453165694139898, "learning_rate": 3.2718892359408245e-06, "loss": 0.003136922139674425, "step": 1037 }, { "epoch": 1.7253821358011854, "grad_norm": 0.0012431323993951082, "learning_rate": 3.2686221130204287e-06, "loss": 0.0036514741368591785, "step": 1038 }, { "epoch": 1.7270458562961424, "grad_norm": 0.0009162081405520439, "learning_rate": 3.265353539725309e-06, "loss": 0.0027881120331585407, "step": 1039 }, { "epoch": 1.7287095767910992, "grad_norm": 0.0009822389110922813, "learning_rate": 3.2620835222231972e-06, "loss": 0.002034775447100401, "step": 1040 }, { "epoch": 1.730373297286056, "grad_norm": 0.001126887509599328, "learning_rate": 3.2588120666845534e-06, "loss": 0.003957305569201708, "step": 1041 }, { "epoch": 1.7320370177810127, "grad_norm": 0.001378571498207748, "learning_rate": 3.255539179282548e-06, "loss": 0.0029517512302845716, "step": 1042 }, { "epoch": 1.7337007382759695, "grad_norm": 0.0016041906783357263, "learning_rate": 3.2522648661930558e-06, "loss": 0.0043833935633301735, "step": 1043 }, { "epoch": 1.7353644587709265, "grad_norm": 0.0011807052651420236, "learning_rate": 3.2489891335946413e-06, "loss": 0.0019419104792177677, "step": 1044 }, { "epoch": 1.7370281792658835, "grad_norm": 0.0009768899763002992, "learning_rate": 3.245711987668545e-06, "loss": 0.002313793171197176, "step": 1045 }, { "epoch": 1.7386918997608403, "grad_norm": 0.0015365976141765714, "learning_rate": 3.2424334345986787e-06, "loss": 0.002823270158842206, "step": 1046 }, { "epoch": 1.740355620255797, "grad_norm": 0.0016022294294089079, "learning_rate": 3.239153480571605e-06, "loss": 0.0011497450759634376, "step": 1047 }, { "epoch": 1.7420193407507538, "grad_norm": 0.002018695930019021, "learning_rate": 3.2358721317765344e-06, "loss": 0.0035544075071811676, "step": 1048 }, { "epoch": 1.7436830612457106, "grad_norm": 0.002275625476613641, "learning_rate": 3.2325893944053066e-06, "loss": 0.0036176592111587524, "step": 1049 }, { "epoch": 1.7453467817406676, "grad_norm": 0.0009655337198637426, "learning_rate": 3.2293052746523814e-06, "loss": 0.0015187448589131236, "step": 1050 }, { "epoch": 1.7470105022356244, "grad_norm": 0.0012670238502323627, "learning_rate": 3.2260197787148277e-06, "loss": 0.0028458700980991125, "step": 1051 }, { "epoch": 1.7486742227305814, "grad_norm": 0.002225345466285944, "learning_rate": 3.222732912792313e-06, "loss": 0.0032379874028265476, "step": 1052 }, { "epoch": 1.7503379432255382, "grad_norm": 0.00402183597907424, "learning_rate": 3.2194446830870865e-06, "loss": 0.0032164682634174824, "step": 1053 }, { "epoch": 1.752001663720495, "grad_norm": 0.0012006743345409632, "learning_rate": 3.2161550958039732e-06, "loss": 0.004632690455764532, "step": 1054 }, { "epoch": 1.7536653842154517, "grad_norm": 0.005980351008474827, "learning_rate": 3.2128641571503594e-06, "loss": 0.003166868118569255, "step": 1055 }, { "epoch": 1.7553291047104087, "grad_norm": 0.002437678398564458, "learning_rate": 3.2095718733361803e-06, "loss": 0.008781610988080502, "step": 1056 }, { "epoch": 1.7569928252053655, "grad_norm": 0.0013743985909968615, "learning_rate": 3.2062782505739125e-06, "loss": 0.0027862025890499353, "step": 1057 }, { "epoch": 1.7586565457003225, "grad_norm": 0.0014083994319662452, "learning_rate": 3.202983295078555e-06, "loss": 0.004423089791089296, "step": 1058 }, { "epoch": 1.7603202661952793, "grad_norm": 0.0012103941990062594, "learning_rate": 3.199687013067624e-06, "loss": 0.0016060488997027278, "step": 1059 }, { "epoch": 1.761983986690236, "grad_norm": 0.001576002687215805, "learning_rate": 3.1963894107611395e-06, "loss": 0.002927867928519845, "step": 1060 }, { "epoch": 1.7636477071851928, "grad_norm": 0.0017253441037610173, "learning_rate": 3.1930904943816104e-06, "loss": 0.0021498226560652256, "step": 1061 }, { "epoch": 1.7653114276801496, "grad_norm": 0.0037647176068276167, "learning_rate": 3.189790270154028e-06, "loss": 0.004621579311788082, "step": 1062 }, { "epoch": 1.7669751481751066, "grad_norm": 0.0019720825366675854, "learning_rate": 3.186488744305849e-06, "loss": 0.0062108938582241535, "step": 1063 }, { "epoch": 1.7686388686700636, "grad_norm": 0.0019118713680654764, "learning_rate": 3.183185923066988e-06, "loss": 0.003158648731186986, "step": 1064 }, { "epoch": 1.7703025891650204, "grad_norm": 0.0035292403772473335, "learning_rate": 3.179881812669804e-06, "loss": 0.003806828986853361, "step": 1065 }, { "epoch": 1.7719663096599771, "grad_norm": 0.0009236359037458897, "learning_rate": 3.1765764193490863e-06, "loss": 0.0024340436793863773, "step": 1066 }, { "epoch": 1.773630030154934, "grad_norm": 0.0012912286911159754, "learning_rate": 3.173269749342047e-06, "loss": 0.0013527884148061275, "step": 1067 }, { "epoch": 1.7752937506498907, "grad_norm": 0.0019723335281014442, "learning_rate": 3.1699618088883094e-06, "loss": 0.0035508442670106888, "step": 1068 }, { "epoch": 1.7769574711448477, "grad_norm": 0.0014623295282945037, "learning_rate": 3.1666526042298883e-06, "loss": 0.00359426811337471, "step": 1069 }, { "epoch": 1.7786211916398045, "grad_norm": 0.0019803629256784916, "learning_rate": 3.16334214161119e-06, "loss": 0.0036465353332459927, "step": 1070 }, { "epoch": 1.7802849121347615, "grad_norm": 0.0022365187760442495, "learning_rate": 3.1600304272789904e-06, "loss": 0.002207161160185933, "step": 1071 }, { "epoch": 1.7819486326297183, "grad_norm": 0.0012209509732201695, "learning_rate": 3.1567174674824303e-06, "loss": 0.0025660775136202574, "step": 1072 }, { "epoch": 1.783612353124675, "grad_norm": 0.0028881127946078777, "learning_rate": 3.1534032684729978e-06, "loss": 0.004914230667054653, "step": 1073 }, { "epoch": 1.7852760736196318, "grad_norm": 0.001741714426316321, "learning_rate": 3.1500878365045217e-06, "loss": 0.0034641423262655735, "step": 1074 }, { "epoch": 1.7869397941145886, "grad_norm": 0.00211711716838181, "learning_rate": 3.1467711778331573e-06, "loss": 0.004626632202416658, "step": 1075 }, { "epoch": 1.7886035146095456, "grad_norm": 0.002563687739893794, "learning_rate": 3.143453298717373e-06, "loss": 0.00369662931188941, "step": 1076 }, { "epoch": 1.7902672351045026, "grad_norm": 0.001437278580851853, "learning_rate": 3.14013420541794e-06, "loss": 0.0028063696809113026, "step": 1077 }, { "epoch": 1.7919309555994594, "grad_norm": 0.003704370930790901, "learning_rate": 3.1368139041979235e-06, "loss": 0.0021443888545036316, "step": 1078 }, { "epoch": 1.7935946760944161, "grad_norm": 0.0024365736171603203, "learning_rate": 3.133492401322666e-06, "loss": 0.0016761664301156998, "step": 1079 }, { "epoch": 1.795258396589373, "grad_norm": 0.001835328177548945, "learning_rate": 3.1301697030597772e-06, "loss": 0.003327090060338378, "step": 1080 }, { "epoch": 1.7969221170843297, "grad_norm": 0.0011926194420084357, "learning_rate": 3.126845815679123e-06, "loss": 0.002464383840560913, "step": 1081 }, { "epoch": 1.7985858375792867, "grad_norm": 0.0010406651999801397, "learning_rate": 3.1235207454528137e-06, "loss": 0.0008981494465842843, "step": 1082 }, { "epoch": 1.8002495580742435, "grad_norm": 0.0010186567669734359, "learning_rate": 3.12019449865519e-06, "loss": 0.003182903630658984, "step": 1083 }, { "epoch": 1.8019132785692005, "grad_norm": 0.0012448715278878808, "learning_rate": 3.116867081562815e-06, "loss": 0.0023764080833643675, "step": 1084 }, { "epoch": 1.8035769990641572, "grad_norm": 0.007940222509205341, "learning_rate": 3.1135385004544584e-06, "loss": 0.0053514945320785046, "step": 1085 }, { "epoch": 1.805240719559114, "grad_norm": 0.0014136300887912512, "learning_rate": 3.1102087616110866e-06, "loss": 0.00209840782918036, "step": 1086 }, { "epoch": 1.8069044400540708, "grad_norm": 0.002945336978882551, "learning_rate": 3.1068778713158515e-06, "loss": 0.002496186876669526, "step": 1087 }, { "epoch": 1.8085681605490278, "grad_norm": 0.001605421188287437, "learning_rate": 3.1035458358540764e-06, "loss": 0.002253869315609336, "step": 1088 }, { "epoch": 1.8102318810439846, "grad_norm": 0.0010100161889567971, "learning_rate": 3.100212661513247e-06, "loss": 0.001364353229291737, "step": 1089 }, { "epoch": 1.8118956015389416, "grad_norm": 0.001074383850209415, "learning_rate": 3.096878354582998e-06, "loss": 0.0016808705404400826, "step": 1090 }, { "epoch": 1.8135593220338984, "grad_norm": 0.0030826677102595568, "learning_rate": 3.093542921355099e-06, "loss": 0.0047744084149599075, "step": 1091 }, { "epoch": 1.8152230425288551, "grad_norm": 0.0025080074556171894, "learning_rate": 3.0902063681234473e-06, "loss": 0.0018723970279097557, "step": 1092 }, { "epoch": 1.816886763023812, "grad_norm": 0.0015788818709552288, "learning_rate": 3.086868701184054e-06, "loss": 0.001651267521083355, "step": 1093 }, { "epoch": 1.8185504835187687, "grad_norm": 0.0016100323991850019, "learning_rate": 3.083529926835028e-06, "loss": 0.003768754657357931, "step": 1094 }, { "epoch": 1.8202142040137257, "grad_norm": 0.003728320589289069, "learning_rate": 3.0801900513765732e-06, "loss": 0.0027151263784617186, "step": 1095 }, { "epoch": 1.8218779245086827, "grad_norm": 0.0030173619743436575, "learning_rate": 3.076849081110967e-06, "loss": 0.003926532343029976, "step": 1096 }, { "epoch": 1.8235416450036395, "grad_norm": 0.002205587923526764, "learning_rate": 3.073507022342554e-06, "loss": 0.002268116921186447, "step": 1097 }, { "epoch": 1.8252053654985962, "grad_norm": 0.002099486766383052, "learning_rate": 3.070163881377734e-06, "loss": 0.0035694725811481476, "step": 1098 }, { "epoch": 1.826869085993553, "grad_norm": 0.0023339507170021534, "learning_rate": 3.066819664524947e-06, "loss": 0.0021657149773091078, "step": 1099 }, { "epoch": 1.8285328064885098, "grad_norm": 0.0017025353154167533, "learning_rate": 3.063474378094665e-06, "loss": 0.001973213627934456, "step": 1100 }, { "epoch": 1.8285328064885098, "eval_loss": 0.0031945928931236267, "eval_runtime": 492.7979, "eval_samples_per_second": 9.757, "eval_steps_per_second": 2.439, "step": 1100 }, { "epoch": 1.8301965269834668, "grad_norm": 0.001167317503131926, "learning_rate": 3.060128028399376e-06, "loss": 0.002630096860229969, "step": 1101 }, { "epoch": 1.8318602474784236, "grad_norm": 0.001953288447111845, "learning_rate": 3.056780621753577e-06, "loss": 0.0033156988210976124, "step": 1102 }, { "epoch": 1.8335239679733806, "grad_norm": 0.0020119582768529654, "learning_rate": 3.0534321644737574e-06, "loss": 0.001855833805166185, "step": 1103 }, { "epoch": 1.8351876884683374, "grad_norm": 0.002142886398360133, "learning_rate": 3.0500826628783903e-06, "loss": 0.002585946349427104, "step": 1104 }, { "epoch": 1.8368514089632941, "grad_norm": 0.0028928187675774097, "learning_rate": 3.046732123287918e-06, "loss": 0.005204905290156603, "step": 1105 }, { "epoch": 1.838515129458251, "grad_norm": 0.0011265117209404707, "learning_rate": 3.043380552024744e-06, "loss": 0.0032711224630475044, "step": 1106 }, { "epoch": 1.840178849953208, "grad_norm": 0.0027929674834012985, "learning_rate": 3.0400279554132157e-06, "loss": 0.0024414071813225746, "step": 1107 }, { "epoch": 1.8418425704481647, "grad_norm": 0.0014338293112814426, "learning_rate": 3.0366743397796166e-06, "loss": 0.002678456250578165, "step": 1108 }, { "epoch": 1.8435062909431217, "grad_norm": 0.0025491593405604362, "learning_rate": 3.033319711452154e-06, "loss": 0.0031573877204209566, "step": 1109 }, { "epoch": 1.8451700114380785, "grad_norm": 0.0013114232569932938, "learning_rate": 3.0299640767609447e-06, "loss": 0.002688421867787838, "step": 1110 }, { "epoch": 1.8468337319330352, "grad_norm": 0.002950844122096896, "learning_rate": 3.0266074420380043e-06, "loss": 0.0028441757895052433, "step": 1111 }, { "epoch": 1.848497452427992, "grad_norm": 0.002961844904348254, "learning_rate": 3.023249813617238e-06, "loss": 0.0035082674585282803, "step": 1112 }, { "epoch": 1.8501611729229488, "grad_norm": 0.001897338661365211, "learning_rate": 3.0198911978344213e-06, "loss": 0.0033026670571416616, "step": 1113 }, { "epoch": 1.8518248934179058, "grad_norm": 0.0018700726795941591, "learning_rate": 3.0165316010271982e-06, "loss": 0.0023832167498767376, "step": 1114 }, { "epoch": 1.8534886139128628, "grad_norm": 0.0016329766949638724, "learning_rate": 3.0131710295350615e-06, "loss": 0.0044321902096271515, "step": 1115 }, { "epoch": 1.8551523344078196, "grad_norm": 0.00122320675291121, "learning_rate": 3.0098094896993413e-06, "loss": 0.0025666982401162386, "step": 1116 }, { "epoch": 1.8568160549027763, "grad_norm": 0.0013701791176572442, "learning_rate": 3.0064469878631986e-06, "loss": 0.0031274936627596617, "step": 1117 }, { "epoch": 1.8584797753977331, "grad_norm": 0.0023116201627999544, "learning_rate": 3.003083530371606e-06, "loss": 0.0033771577291190624, "step": 1118 }, { "epoch": 1.86014349589269, "grad_norm": 0.0016470644623041153, "learning_rate": 2.9997191235713435e-06, "loss": 0.004839047789573669, "step": 1119 }, { "epoch": 1.861807216387647, "grad_norm": 0.002336474135518074, "learning_rate": 2.9963537738109783e-06, "loss": 0.008031599223613739, "step": 1120 }, { "epoch": 1.8634709368826037, "grad_norm": 0.0022251568734645844, "learning_rate": 2.9929874874408595e-06, "loss": 0.0022900504991412163, "step": 1121 }, { "epoch": 1.8651346573775607, "grad_norm": 0.00211269105784595, "learning_rate": 2.9896202708131027e-06, "loss": 0.00309790950268507, "step": 1122 }, { "epoch": 1.8667983778725175, "grad_norm": 0.002572975354269147, "learning_rate": 2.98625213028158e-06, "loss": 0.0036967257037758827, "step": 1123 }, { "epoch": 1.8684620983674742, "grad_norm": 0.0022551484871655703, "learning_rate": 2.9828830722019046e-06, "loss": 0.00239252345636487, "step": 1124 }, { "epoch": 1.870125818862431, "grad_norm": 0.0015825015725567937, "learning_rate": 2.979513102931424e-06, "loss": 0.002202708274126053, "step": 1125 }, { "epoch": 1.871789539357388, "grad_norm": 0.0018934940453618765, "learning_rate": 2.9761422288292017e-06, "loss": 0.002752728760242462, "step": 1126 }, { "epoch": 1.8734532598523448, "grad_norm": 0.002726193517446518, "learning_rate": 2.9727704562560124e-06, "loss": 0.003436418017372489, "step": 1127 }, { "epoch": 1.8751169803473018, "grad_norm": 0.0008292084676213562, "learning_rate": 2.9693977915743227e-06, "loss": 0.0028455648571252823, "step": 1128 }, { "epoch": 1.8767807008422586, "grad_norm": 0.00249700085259974, "learning_rate": 2.9660242411482848e-06, "loss": 0.0030478155240416527, "step": 1129 }, { "epoch": 1.8784444213372153, "grad_norm": 0.0016656204825267196, "learning_rate": 2.9626498113437215e-06, "loss": 0.0029142191633582115, "step": 1130 }, { "epoch": 1.8801081418321721, "grad_norm": 0.0011652596294879913, "learning_rate": 2.9592745085281154e-06, "loss": 0.0027298261411488056, "step": 1131 }, { "epoch": 1.881771862327129, "grad_norm": 0.00196881964802742, "learning_rate": 2.955898339070596e-06, "loss": 0.0025863386690616608, "step": 1132 }, { "epoch": 1.883435582822086, "grad_norm": 0.0018504951149225235, "learning_rate": 2.9525213093419275e-06, "loss": 0.002715812064707279, "step": 1133 }, { "epoch": 1.885099303317043, "grad_norm": 0.0023180446587502956, "learning_rate": 2.9491434257144995e-06, "loss": 0.0040229130536317825, "step": 1134 }, { "epoch": 1.8867630238119997, "grad_norm": 0.002672442002221942, "learning_rate": 2.9457646945623107e-06, "loss": 0.0018784047570079565, "step": 1135 }, { "epoch": 1.8884267443069565, "grad_norm": 0.0018968256190419197, "learning_rate": 2.9423851222609607e-06, "loss": 0.002349269110709429, "step": 1136 }, { "epoch": 1.8900904648019132, "grad_norm": 0.0006651251460425556, "learning_rate": 2.939004715187635e-06, "loss": 0.0016391770914196968, "step": 1137 }, { "epoch": 1.89175418529687, "grad_norm": 0.0026850176509469748, "learning_rate": 2.935623479721095e-06, "loss": 0.004446036648005247, "step": 1138 }, { "epoch": 1.893417905791827, "grad_norm": 0.001719061634503305, "learning_rate": 2.932241422241665e-06, "loss": 0.00394339207559824, "step": 1139 }, { "epoch": 1.8950816262867838, "grad_norm": 0.0012696747435256839, "learning_rate": 2.9288585491312206e-06, "loss": 0.0010805041529238224, "step": 1140 }, { "epoch": 1.8967453467817408, "grad_norm": 0.0013430069666355848, "learning_rate": 2.925474866773176e-06, "loss": 0.0016727771144360304, "step": 1141 }, { "epoch": 1.8984090672766976, "grad_norm": 0.0015959268203005195, "learning_rate": 2.922090381552475e-06, "loss": 0.0031905556097626686, "step": 1142 }, { "epoch": 1.9000727877716543, "grad_norm": 0.0014629323268309236, "learning_rate": 2.9187050998555715e-06, "loss": 0.002370205009356141, "step": 1143 }, { "epoch": 1.9017365082666111, "grad_norm": 0.0007864710642024875, "learning_rate": 2.915319028070427e-06, "loss": 0.0023790618870407343, "step": 1144 }, { "epoch": 1.903400228761568, "grad_norm": 0.0021873966325074434, "learning_rate": 2.9119321725864914e-06, "loss": 0.00319493655115366, "step": 1145 }, { "epoch": 1.905063949256525, "grad_norm": 0.0016217492520809174, "learning_rate": 2.908544539794693e-06, "loss": 0.0021452787332236767, "step": 1146 }, { "epoch": 1.906727669751482, "grad_norm": 0.0013570383889600635, "learning_rate": 2.9051561360874297e-06, "loss": 0.004207832273095846, "step": 1147 }, { "epoch": 1.9083913902464387, "grad_norm": 0.0018747537396848202, "learning_rate": 2.901766967858551e-06, "loss": 0.003897372167557478, "step": 1148 }, { "epoch": 1.9100551107413954, "grad_norm": 0.001689862459897995, "learning_rate": 2.8983770415033507e-06, "loss": 0.002389749512076378, "step": 1149 }, { "epoch": 1.9117188312363522, "grad_norm": 0.0020829078275710344, "learning_rate": 2.8949863634185533e-06, "loss": 0.004125273786485195, "step": 1150 }, { "epoch": 1.913382551731309, "grad_norm": 0.0015352623304352164, "learning_rate": 2.8915949400022995e-06, "loss": 0.002619950333610177, "step": 1151 }, { "epoch": 1.915046272226266, "grad_norm": 0.0009680234943516552, "learning_rate": 2.8882027776541406e-06, "loss": 0.0014335473533719778, "step": 1152 }, { "epoch": 1.916709992721223, "grad_norm": 0.0008594352402724326, "learning_rate": 2.8848098827750186e-06, "loss": 0.0028461103793233633, "step": 1153 }, { "epoch": 1.9183737132161798, "grad_norm": 0.0011779307387769222, "learning_rate": 2.8814162617672586e-06, "loss": 0.0027607521042227745, "step": 1154 }, { "epoch": 1.9200374337111366, "grad_norm": 0.0011340143391862512, "learning_rate": 2.8780219210345573e-06, "loss": 0.0025814450345933437, "step": 1155 }, { "epoch": 1.9217011542060933, "grad_norm": 0.0008464404963888228, "learning_rate": 2.8746268669819676e-06, "loss": 0.0018523922190070152, "step": 1156 }, { "epoch": 1.92336487470105, "grad_norm": 0.001362015143968165, "learning_rate": 2.8712311060158904e-06, "loss": 0.0011739463079720736, "step": 1157 }, { "epoch": 1.925028595196007, "grad_norm": 0.0014083880232647061, "learning_rate": 2.8678346445440588e-06, "loss": 0.003657055553048849, "step": 1158 }, { "epoch": 1.9266923156909639, "grad_norm": 0.0016203102422878146, "learning_rate": 2.8644374889755284e-06, "loss": 0.0018270399887114763, "step": 1159 }, { "epoch": 1.9283560361859209, "grad_norm": 0.0005126083851791918, "learning_rate": 2.861039645720664e-06, "loss": 0.0013917206088081002, "step": 1160 }, { "epoch": 1.9300197566808777, "grad_norm": 0.0013629321474581957, "learning_rate": 2.85764112119113e-06, "loss": 0.00415899557992816, "step": 1161 }, { "epoch": 1.9316834771758344, "grad_norm": 0.0008958380785770714, "learning_rate": 2.854241921799874e-06, "loss": 0.0022279848344624043, "step": 1162 }, { "epoch": 1.9333471976707912, "grad_norm": 0.000760416267439723, "learning_rate": 2.850842053961119e-06, "loss": 0.0014205033658072352, "step": 1163 }, { "epoch": 1.935010918165748, "grad_norm": 0.0006858188426122069, "learning_rate": 2.847441524090347e-06, "loss": 0.0008534875814802945, "step": 1164 }, { "epoch": 1.936674638660705, "grad_norm": 0.0022949844133108854, "learning_rate": 2.844040338604291e-06, "loss": 0.003076274413615465, "step": 1165 }, { "epoch": 1.938338359155662, "grad_norm": 0.0010439804755151272, "learning_rate": 2.8406385039209217e-06, "loss": 0.004112100228667259, "step": 1166 }, { "epoch": 1.9400020796506188, "grad_norm": 0.002190642524510622, "learning_rate": 2.837236026459432e-06, "loss": 0.007742525544017553, "step": 1167 }, { "epoch": 1.9416658001455755, "grad_norm": 0.0006106036016717553, "learning_rate": 2.833832912640232e-06, "loss": 0.0012337150983512402, "step": 1168 }, { "epoch": 1.9433295206405323, "grad_norm": 0.0016449900576844811, "learning_rate": 2.8304291688849283e-06, "loss": 0.002717619063332677, "step": 1169 }, { "epoch": 1.944993241135489, "grad_norm": 0.0008701571496203542, "learning_rate": 2.827024801616319e-06, "loss": 0.002019080799072981, "step": 1170 }, { "epoch": 1.946656961630446, "grad_norm": 0.0016879767645150423, "learning_rate": 2.8236198172583765e-06, "loss": 0.004312856588512659, "step": 1171 }, { "epoch": 1.9483206821254029, "grad_norm": 0.0020904159173369408, "learning_rate": 2.820214222236241e-06, "loss": 0.0037140892818570137, "step": 1172 }, { "epoch": 1.9499844026203599, "grad_norm": 0.002312425058335066, "learning_rate": 2.816808022976201e-06, "loss": 0.003156473394483328, "step": 1173 }, { "epoch": 1.9516481231153167, "grad_norm": 0.00271781743504107, "learning_rate": 2.813401225905688e-06, "loss": 0.005529695190489292, "step": 1174 }, { "epoch": 1.9533118436102734, "grad_norm": 0.001697467640042305, "learning_rate": 2.8099938374532615e-06, "loss": 0.004889095202088356, "step": 1175 }, { "epoch": 1.9549755641052302, "grad_norm": 0.0007833896670490503, "learning_rate": 2.806585864048594e-06, "loss": 0.0024441727437078953, "step": 1176 }, { "epoch": 1.9566392846001872, "grad_norm": 0.0011363000376150012, "learning_rate": 2.8031773121224665e-06, "loss": 0.0024421929847449064, "step": 1177 }, { "epoch": 1.958303005095144, "grad_norm": 0.001958192326128483, "learning_rate": 2.799768188106747e-06, "loss": 0.0029815295711159706, "step": 1178 }, { "epoch": 1.959966725590101, "grad_norm": 0.0009831757051870227, "learning_rate": 2.7963584984343856e-06, "loss": 0.0018389209872111678, "step": 1179 }, { "epoch": 1.9616304460850578, "grad_norm": 0.0021405015140771866, "learning_rate": 2.7929482495393995e-06, "loss": 0.002608387963846326, "step": 1180 }, { "epoch": 1.9632941665800145, "grad_norm": 0.0015074997209012508, "learning_rate": 2.7895374478568608e-06, "loss": 0.0015620214398950338, "step": 1181 }, { "epoch": 1.9649578870749713, "grad_norm": 0.0005247404333204031, "learning_rate": 2.786126099822885e-06, "loss": 0.0012147929519414902, "step": 1182 }, { "epoch": 1.966621607569928, "grad_norm": 0.0006081117899157107, "learning_rate": 2.7827142118746187e-06, "loss": 0.00113436218816787, "step": 1183 }, { "epoch": 1.968285328064885, "grad_norm": 0.0008387586567550898, "learning_rate": 2.779301790450226e-06, "loss": 0.0012308708392083645, "step": 1184 }, { "epoch": 1.969949048559842, "grad_norm": 0.003745946567505598, "learning_rate": 2.7758888419888797e-06, "loss": 0.012694332748651505, "step": 1185 }, { "epoch": 1.9716127690547989, "grad_norm": 0.0008437780779786408, "learning_rate": 2.7724753729307454e-06, "loss": 0.0010596851352602243, "step": 1186 }, { "epoch": 1.9732764895497557, "grad_norm": 0.0026630330830812454, "learning_rate": 2.769061389716971e-06, "loss": 0.0029301990289241076, "step": 1187 }, { "epoch": 1.9749402100447124, "grad_norm": 0.0033500911667943, "learning_rate": 2.765646898789677e-06, "loss": 0.006291859783232212, "step": 1188 }, { "epoch": 1.9766039305396692, "grad_norm": 0.0017212156672030687, "learning_rate": 2.762231906591939e-06, "loss": 0.004028333351016045, "step": 1189 }, { "epoch": 1.9782676510346262, "grad_norm": 0.0007014305447228253, "learning_rate": 2.75881641956778e-06, "loss": 0.0016968459822237492, "step": 1190 }, { "epoch": 1.979931371529583, "grad_norm": 0.0030681570060551167, "learning_rate": 2.7554004441621562e-06, "loss": 0.0026011753361672163, "step": 1191 }, { "epoch": 1.98159509202454, "grad_norm": 0.002008343581110239, "learning_rate": 2.7519839868209462e-06, "loss": 0.005046532489359379, "step": 1192 }, { "epoch": 1.9832588125194968, "grad_norm": 0.0019504876108840108, "learning_rate": 2.748567053990937e-06, "loss": 0.003577637020498514, "step": 1193 }, { "epoch": 1.9849225330144535, "grad_norm": 0.002227432793006301, "learning_rate": 2.7451496521198144e-06, "loss": 0.005140396300703287, "step": 1194 }, { "epoch": 1.9865862535094103, "grad_norm": 0.0017220035661011934, "learning_rate": 2.741731787656146e-06, "loss": 0.0027806637808680534, "step": 1195 }, { "epoch": 1.9882499740043673, "grad_norm": 0.0010862553026527166, "learning_rate": 2.7383134670493765e-06, "loss": 0.0017553982324898243, "step": 1196 }, { "epoch": 1.989913694499324, "grad_norm": 0.0007901210919953883, "learning_rate": 2.734894696749808e-06, "loss": 0.0015418350230902433, "step": 1197 }, { "epoch": 1.991577414994281, "grad_norm": 0.0011503990972414613, "learning_rate": 2.7314754832085926e-06, "loss": 0.0023582661524415016, "step": 1198 }, { "epoch": 1.9932411354892379, "grad_norm": 0.002424992388114333, "learning_rate": 2.728055832877719e-06, "loss": 0.0024170917458832264, "step": 1199 }, { "epoch": 1.9949048559841946, "grad_norm": 0.0009666327969171107, "learning_rate": 2.7246357522099996e-06, "loss": 0.0011229885276407003, "step": 1200 }, { "epoch": 1.9949048559841946, "eval_loss": 0.0028202177491039038, "eval_runtime": 492.8314, "eval_samples_per_second": 9.756, "eval_steps_per_second": 2.439, "step": 1200 }, { "epoch": 1.9965685764791514, "grad_norm": 0.0008989392081275582, "learning_rate": 2.721215247659059e-06, "loss": 0.0006293777259998024, "step": 1201 }, { "epoch": 1.9982322969741082, "grad_norm": 0.0019021104089915752, "learning_rate": 2.7177943256793214e-06, "loss": 0.0019024854991585016, "step": 1202 }, { "epoch": 1.9998960174690652, "grad_norm": 0.001411781762726605, "learning_rate": 2.7143729927259992e-06, "loss": 0.003327167360112071, "step": 1203 }, { "epoch": 2.0, "grad_norm": 0.000364909996278584, "learning_rate": 2.7109512552550804e-06, "loss": 0.00020373324514366686, "step": 1204 }, { "epoch": 2.0016637204949568, "grad_norm": 0.0010985415428876877, "learning_rate": 2.707529119723315e-06, "loss": 0.0023868954740464687, "step": 1205 }, { "epoch": 2.0033274409899136, "grad_norm": 0.0006375758093781769, "learning_rate": 2.7041065925882054e-06, "loss": 0.0005520120030269027, "step": 1206 }, { "epoch": 2.0049911614848703, "grad_norm": 0.0005418790387921035, "learning_rate": 2.7006836803079934e-06, "loss": 0.0009355536894872785, "step": 1207 }, { "epoch": 2.0066548819798276, "grad_norm": 0.0011295054573565722, "learning_rate": 2.697260389341645e-06, "loss": 0.0026254295371472836, "step": 1208 }, { "epoch": 2.0083186024747843, "grad_norm": 0.00038410667912103236, "learning_rate": 2.693836726148844e-06, "loss": 0.0011726864613592625, "step": 1209 }, { "epoch": 2.009982322969741, "grad_norm": 0.00015486196207348257, "learning_rate": 2.6904126971899754e-06, "loss": 0.00015572525444440544, "step": 1210 }, { "epoch": 2.011646043464698, "grad_norm": 0.001346680917777121, "learning_rate": 2.686988308926112e-06, "loss": 0.0015534437261521816, "step": 1211 }, { "epoch": 2.0133097639596547, "grad_norm": 0.0009657160262577236, "learning_rate": 2.68356356781901e-06, "loss": 0.003846096573397517, "step": 1212 }, { "epoch": 2.0149734844546114, "grad_norm": 0.0003977494779974222, "learning_rate": 2.6801384803310855e-06, "loss": 0.0006163832149468362, "step": 1213 }, { "epoch": 2.0166372049495687, "grad_norm": 0.000773407518863678, "learning_rate": 2.676713052925411e-06, "loss": 0.0017007028218358755, "step": 1214 }, { "epoch": 2.0183009254445254, "grad_norm": 0.0014128134353086352, "learning_rate": 2.6732872920657018e-06, "loss": 0.0023884056136012077, "step": 1215 }, { "epoch": 2.019964645939482, "grad_norm": 0.0012431552167981863, "learning_rate": 2.6698612042162995e-06, "loss": 0.00187435292173177, "step": 1216 }, { "epoch": 2.021628366434439, "grad_norm": 0.0012625644449144602, "learning_rate": 2.6664347958421647e-06, "loss": 0.0011748441029340029, "step": 1217 }, { "epoch": 2.0232920869293958, "grad_norm": 0.0012824207078665495, "learning_rate": 2.6630080734088625e-06, "loss": 0.001920399023219943, "step": 1218 }, { "epoch": 2.0249558074243525, "grad_norm": 0.0007549470174126327, "learning_rate": 2.6595810433825496e-06, "loss": 0.0016656176885589957, "step": 1219 }, { "epoch": 2.0266195279193098, "grad_norm": 0.001832195557653904, "learning_rate": 2.6561537122299647e-06, "loss": 0.002855651779100299, "step": 1220 }, { "epoch": 2.0282832484142665, "grad_norm": 0.0010537878843024373, "learning_rate": 2.6527260864184135e-06, "loss": 0.0023729479871690273, "step": 1221 }, { "epoch": 2.0299469689092233, "grad_norm": 0.0011735305888578296, "learning_rate": 2.6492981724157576e-06, "loss": 0.0042937640100717545, "step": 1222 }, { "epoch": 2.03161068940418, "grad_norm": 0.0014465588610619307, "learning_rate": 2.6458699766904033e-06, "loss": 0.0026445563416928053, "step": 1223 }, { "epoch": 2.033274409899137, "grad_norm": 0.001032729516737163, "learning_rate": 2.6424415057112883e-06, "loss": 0.0027413975913077593, "step": 1224 }, { "epoch": 2.0349381303940937, "grad_norm": 0.000898035941645503, "learning_rate": 2.6390127659478698e-06, "loss": 0.0032835854217410088, "step": 1225 }, { "epoch": 2.0366018508890504, "grad_norm": 0.0013310214271768928, "learning_rate": 2.6355837638701115e-06, "loss": 0.0020950636826455593, "step": 1226 }, { "epoch": 2.0382655713840077, "grad_norm": 0.00045725522795692086, "learning_rate": 2.632154505948472e-06, "loss": 0.0014126868918538094, "step": 1227 }, { "epoch": 2.0399292918789644, "grad_norm": 0.0009511184180155396, "learning_rate": 2.6287249986538944e-06, "loss": 0.0018513341201469302, "step": 1228 }, { "epoch": 2.041593012373921, "grad_norm": 0.0011876067146658897, "learning_rate": 2.62529524845779e-06, "loss": 0.002044444438070059, "step": 1229 }, { "epoch": 2.043256732868878, "grad_norm": 0.0030956417322158813, "learning_rate": 2.6218652618320306e-06, "loss": 0.002602633088827133, "step": 1230 }, { "epoch": 2.0449204533638348, "grad_norm": 0.0008830548031255603, "learning_rate": 2.6184350452489317e-06, "loss": 0.0022057429887354374, "step": 1231 }, { "epoch": 2.0465841738587915, "grad_norm": 0.0007592173642478883, "learning_rate": 2.615004605181246e-06, "loss": 0.001316930167376995, "step": 1232 }, { "epoch": 2.0482478943537488, "grad_norm": 0.0010199942626059055, "learning_rate": 2.611573948102144e-06, "loss": 0.0023947772569954395, "step": 1233 }, { "epoch": 2.0499116148487055, "grad_norm": 0.000911121373064816, "learning_rate": 2.6081430804852093e-06, "loss": 0.001219225930981338, "step": 1234 }, { "epoch": 2.0515753353436623, "grad_norm": 0.0014884383417665958, "learning_rate": 2.604712008804421e-06, "loss": 0.0033853487111628056, "step": 1235 }, { "epoch": 2.053239055838619, "grad_norm": 0.0007127722492441535, "learning_rate": 2.601280739534143e-06, "loss": 0.0009231525473296642, "step": 1236 }, { "epoch": 2.054902776333576, "grad_norm": 0.0017329002730548382, "learning_rate": 2.5978492791491126e-06, "loss": 0.003366638207808137, "step": 1237 }, { "epoch": 2.0565664968285327, "grad_norm": 0.0020664972253143787, "learning_rate": 2.594417634124428e-06, "loss": 0.00315756443887949, "step": 1238 }, { "epoch": 2.05823021732349, "grad_norm": 0.0015665602404624224, "learning_rate": 2.590985810935535e-06, "loss": 0.001807854394428432, "step": 1239 }, { "epoch": 2.0598939378184467, "grad_norm": 0.0004127174324821681, "learning_rate": 2.5875538160582176e-06, "loss": 0.0014860054943710566, "step": 1240 }, { "epoch": 2.0615576583134034, "grad_norm": 0.0011286535300314426, "learning_rate": 2.58412165596858e-06, "loss": 0.0014345969539135695, "step": 1241 }, { "epoch": 2.06322137880836, "grad_norm": 0.0011797641636803746, "learning_rate": 2.5806893371430413e-06, "loss": 0.0024553739931434393, "step": 1242 }, { "epoch": 2.064885099303317, "grad_norm": 0.002415080787613988, "learning_rate": 2.57725686605832e-06, "loss": 0.003503277897834778, "step": 1243 }, { "epoch": 2.0665488197982738, "grad_norm": 0.001448194496333599, "learning_rate": 2.5738242491914206e-06, "loss": 0.0021282355301082134, "step": 1244 }, { "epoch": 2.0682125402932305, "grad_norm": 0.0005141524598002434, "learning_rate": 2.5703914930196227e-06, "loss": 0.0011137824039906263, "step": 1245 }, { "epoch": 2.0698762607881878, "grad_norm": 0.0024350290186703205, "learning_rate": 2.5669586040204697e-06, "loss": 0.002408391796052456, "step": 1246 }, { "epoch": 2.0715399812831445, "grad_norm": 0.0010828844970092177, "learning_rate": 2.5635255886717553e-06, "loss": 0.0017078034579753876, "step": 1247 }, { "epoch": 2.0732037017781013, "grad_norm": 0.0007732762023806572, "learning_rate": 2.560092453451512e-06, "loss": 0.0011814554454758763, "step": 1248 }, { "epoch": 2.074867422273058, "grad_norm": 0.004280226770788431, "learning_rate": 2.5566592048379975e-06, "loss": 0.004877451341599226, "step": 1249 }, { "epoch": 2.076531142768015, "grad_norm": 0.0010332359233871102, "learning_rate": 2.553225849309684e-06, "loss": 0.0014195141848176718, "step": 1250 }, { "epoch": 2.0781948632629716, "grad_norm": 0.0026121155824512243, "learning_rate": 2.5497923933452464e-06, "loss": 0.001718388288281858, "step": 1251 }, { "epoch": 2.079858583757929, "grad_norm": 0.0010297087719663978, "learning_rate": 2.5463588434235463e-06, "loss": 0.002603790257126093, "step": 1252 }, { "epoch": 2.0815223042528856, "grad_norm": 0.0010890235425904393, "learning_rate": 2.542925206023626e-06, "loss": 0.001935492386110127, "step": 1253 }, { "epoch": 2.0831860247478424, "grad_norm": 0.0012174684088677168, "learning_rate": 2.5394914876246916e-06, "loss": 0.0012549603125080466, "step": 1254 }, { "epoch": 2.084849745242799, "grad_norm": 0.0012653002049773932, "learning_rate": 2.5360576947061004e-06, "loss": 0.0020303502678871155, "step": 1255 }, { "epoch": 2.086513465737756, "grad_norm": 0.001454854034818709, "learning_rate": 2.5326238337473537e-06, "loss": 0.001309222192503512, "step": 1256 }, { "epoch": 2.0881771862327128, "grad_norm": 0.001833008136600256, "learning_rate": 2.5291899112280765e-06, "loss": 0.002758257556706667, "step": 1257 }, { "epoch": 2.0898409067276695, "grad_norm": 0.0009782977867871523, "learning_rate": 2.5257559336280145e-06, "loss": 0.0014265577774494886, "step": 1258 }, { "epoch": 2.0915046272226268, "grad_norm": 0.0008711738046258688, "learning_rate": 2.522321907427016e-06, "loss": 0.0021401201374828815, "step": 1259 }, { "epoch": 2.0931683477175835, "grad_norm": 0.0015375674702227116, "learning_rate": 2.5188878391050187e-06, "loss": 0.003357475157827139, "step": 1260 }, { "epoch": 2.0948320682125403, "grad_norm": 0.0019423742778599262, "learning_rate": 2.515453735142043e-06, "loss": 0.0037136247847229242, "step": 1261 }, { "epoch": 2.096495788707497, "grad_norm": 0.0012821252457797527, "learning_rate": 2.5120196020181752e-06, "loss": 0.0030806218273937702, "step": 1262 }, { "epoch": 2.098159509202454, "grad_norm": 0.00048500695265829563, "learning_rate": 2.5085854462135556e-06, "loss": 0.001238503959029913, "step": 1263 }, { "epoch": 2.0998232296974106, "grad_norm": 0.0020267192739993334, "learning_rate": 2.505151274208369e-06, "loss": 0.002566360402852297, "step": 1264 }, { "epoch": 2.101486950192368, "grad_norm": 0.0007626867154613137, "learning_rate": 2.50171709248283e-06, "loss": 0.0007100152433849871, "step": 1265 }, { "epoch": 2.1031506706873246, "grad_norm": 0.001000856515020132, "learning_rate": 2.4982829075171714e-06, "loss": 0.001992270816117525, "step": 1266 }, { "epoch": 2.1048143911822814, "grad_norm": 0.0008598253480158746, "learning_rate": 2.494848725791632e-06, "loss": 0.0017945771105587482, "step": 1267 }, { "epoch": 2.106478111677238, "grad_norm": 0.00042779420618899167, "learning_rate": 2.4914145537864453e-06, "loss": 0.001541482168249786, "step": 1268 }, { "epoch": 2.108141832172195, "grad_norm": 0.0016860082978382707, "learning_rate": 2.4879803979818256e-06, "loss": 0.002277143532410264, "step": 1269 }, { "epoch": 2.1098055526671518, "grad_norm": 0.0016471690032631159, "learning_rate": 2.4845462648579573e-06, "loss": 0.0015763354022055864, "step": 1270 }, { "epoch": 2.111469273162109, "grad_norm": 0.006739214528352022, "learning_rate": 2.481112160894982e-06, "loss": 0.0013695925008505583, "step": 1271 }, { "epoch": 2.1131329936570658, "grad_norm": 0.000667403161060065, "learning_rate": 2.4776780925729853e-06, "loss": 0.001070688245818019, "step": 1272 }, { "epoch": 2.1147967141520225, "grad_norm": 0.0034296319354325533, "learning_rate": 2.474244066371986e-06, "loss": 0.0019356642151251435, "step": 1273 }, { "epoch": 2.1164604346469793, "grad_norm": 0.0010742850136011839, "learning_rate": 2.4708100887719243e-06, "loss": 0.00449890922755003, "step": 1274 }, { "epoch": 2.118124155141936, "grad_norm": 0.0008409210131503642, "learning_rate": 2.4673761662526475e-06, "loss": 0.0014720135368406773, "step": 1275 }, { "epoch": 2.119787875636893, "grad_norm": 0.0013643614947795868, "learning_rate": 2.4639423052938995e-06, "loss": 0.0017583542503416538, "step": 1276 }, { "epoch": 2.1214515961318496, "grad_norm": 0.001211738446727395, "learning_rate": 2.4605085123753097e-06, "loss": 0.0019427321385592222, "step": 1277 }, { "epoch": 2.123115316626807, "grad_norm": 0.0016727972542867064, "learning_rate": 2.4570747939763745e-06, "loss": 0.0026699069421738386, "step": 1278 }, { "epoch": 2.1247790371217636, "grad_norm": 0.0009743532282300293, "learning_rate": 2.453641156576454e-06, "loss": 0.0024324795231223106, "step": 1279 }, { "epoch": 2.1264427576167204, "grad_norm": 0.0011676120338961482, "learning_rate": 2.4502076066547545e-06, "loss": 0.001394494785927236, "step": 1280 }, { "epoch": 2.128106478111677, "grad_norm": 0.001042691059410572, "learning_rate": 2.4467741506903162e-06, "loss": 0.0019729523919522762, "step": 1281 }, { "epoch": 2.129770198606634, "grad_norm": 0.0010410549584776163, "learning_rate": 2.443340795162003e-06, "loss": 0.0013870508410036564, "step": 1282 }, { "epoch": 2.1314339191015907, "grad_norm": 0.0010129284346476197, "learning_rate": 2.4399075465484883e-06, "loss": 0.001577532384544611, "step": 1283 }, { "epoch": 2.133097639596548, "grad_norm": 0.0017204082105308771, "learning_rate": 2.4364744113282455e-06, "loss": 0.0015860439743846655, "step": 1284 }, { "epoch": 2.1347613600915047, "grad_norm": 0.002252376638352871, "learning_rate": 2.433041395979531e-06, "loss": 0.0027545292396098375, "step": 1285 }, { "epoch": 2.1364250805864615, "grad_norm": 0.0013701152056455612, "learning_rate": 2.429608506980378e-06, "loss": 0.0038595260120928288, "step": 1286 }, { "epoch": 2.1380888010814183, "grad_norm": 0.0010166936554014683, "learning_rate": 2.4261757508085803e-06, "loss": 0.0021340944804251194, "step": 1287 }, { "epoch": 2.139752521576375, "grad_norm": 0.001493407879024744, "learning_rate": 2.422743133941681e-06, "loss": 0.00273123593069613, "step": 1288 }, { "epoch": 2.141416242071332, "grad_norm": 0.0010118827922269702, "learning_rate": 2.419310662856959e-06, "loss": 0.0008498326642438769, "step": 1289 }, { "epoch": 2.143079962566289, "grad_norm": 0.002250266494229436, "learning_rate": 2.415878344031421e-06, "loss": 0.0029660004656761885, "step": 1290 }, { "epoch": 2.144743683061246, "grad_norm": 0.0014006498968228698, "learning_rate": 2.4124461839417832e-06, "loss": 0.0014974691439419985, "step": 1291 }, { "epoch": 2.1464074035562026, "grad_norm": 0.0015807535964995623, "learning_rate": 2.4090141890644654e-06, "loss": 0.0029796557500958443, "step": 1292 }, { "epoch": 2.1480711240511594, "grad_norm": 0.0005720490007661283, "learning_rate": 2.405582365875573e-06, "loss": 0.0011643368052318692, "step": 1293 }, { "epoch": 2.149734844546116, "grad_norm": 0.0018117536092177033, "learning_rate": 2.4021507208508882e-06, "loss": 0.002712543122470379, "step": 1294 }, { "epoch": 2.151398565041073, "grad_norm": 0.0020061598625034094, "learning_rate": 2.398719260465858e-06, "loss": 0.0019743884913623333, "step": 1295 }, { "epoch": 2.1530622855360297, "grad_norm": 0.0008923659333959222, "learning_rate": 2.3952879911955794e-06, "loss": 0.0012087312061339617, "step": 1296 }, { "epoch": 2.154726006030987, "grad_norm": 0.00046620931243523955, "learning_rate": 2.391856919514791e-06, "loss": 0.0005710732657462358, "step": 1297 }, { "epoch": 2.1563897265259437, "grad_norm": 0.000859422842040658, "learning_rate": 2.3884260518978562e-06, "loss": 0.0024305065162479877, "step": 1298 }, { "epoch": 2.1580534470209005, "grad_norm": 0.00048544761375524104, "learning_rate": 2.3849953948187552e-06, "loss": 0.002606534631922841, "step": 1299 }, { "epoch": 2.1597171675158573, "grad_norm": 0.0009777328232303262, "learning_rate": 2.3815649547510687e-06, "loss": 0.0029517700895667076, "step": 1300 }, { "epoch": 2.1597171675158573, "eval_loss": 0.0023095491342246532, "eval_runtime": 491.4412, "eval_samples_per_second": 9.783, "eval_steps_per_second": 2.446, "step": 1300 }, { "epoch": 2.161380888010814, "grad_norm": 0.000683385122101754, "learning_rate": 2.37813473816797e-06, "loss": 0.0020315079018473625, "step": 1301 }, { "epoch": 2.163044608505771, "grad_norm": 0.000580743420869112, "learning_rate": 2.3747047515422102e-06, "loss": 0.0006980924517847598, "step": 1302 }, { "epoch": 2.164708329000728, "grad_norm": 0.0011160477297380567, "learning_rate": 2.371275001346106e-06, "loss": 0.0017467639409005642, "step": 1303 }, { "epoch": 2.166372049495685, "grad_norm": 0.0030428199097514153, "learning_rate": 2.367845494051529e-06, "loss": 0.0016722860746085644, "step": 1304 }, { "epoch": 2.1680357699906416, "grad_norm": 0.0008094862569123507, "learning_rate": 2.3644162361298897e-06, "loss": 0.0006268250290304422, "step": 1305 }, { "epoch": 2.1696994904855984, "grad_norm": 0.0016655492363497615, "learning_rate": 2.360987234052131e-06, "loss": 0.0021564862690865993, "step": 1306 }, { "epoch": 2.171363210980555, "grad_norm": 0.0011375686153769493, "learning_rate": 2.357558494288712e-06, "loss": 0.0009511542157270014, "step": 1307 }, { "epoch": 2.173026931475512, "grad_norm": 0.0017173878150060773, "learning_rate": 2.354130023309597e-06, "loss": 0.0010811176616698503, "step": 1308 }, { "epoch": 2.1746906519704687, "grad_norm": 0.0007098483620211482, "learning_rate": 2.350701827584243e-06, "loss": 0.0018661936046555638, "step": 1309 }, { "epoch": 2.176354372465426, "grad_norm": 0.0013119137147441506, "learning_rate": 2.3472739135815877e-06, "loss": 0.0024188230745494366, "step": 1310 }, { "epoch": 2.1780180929603827, "grad_norm": 0.0021531626116484404, "learning_rate": 2.343846287770036e-06, "loss": 0.001036312896758318, "step": 1311 }, { "epoch": 2.1796818134553395, "grad_norm": 0.0008549646590836346, "learning_rate": 2.340418956617451e-06, "loss": 0.0008003504481166601, "step": 1312 }, { "epoch": 2.1813455339502963, "grad_norm": 0.00044107611756771803, "learning_rate": 2.336991926591138e-06, "loss": 0.001624225522391498, "step": 1313 }, { "epoch": 2.183009254445253, "grad_norm": 0.0014437201898545027, "learning_rate": 2.3335652041578352e-06, "loss": 0.001267612329684198, "step": 1314 }, { "epoch": 2.18467297494021, "grad_norm": 0.0021631629206240177, "learning_rate": 2.3301387957837017e-06, "loss": 0.001268639462068677, "step": 1315 }, { "epoch": 2.186336695435167, "grad_norm": 0.000520045985467732, "learning_rate": 2.326712707934299e-06, "loss": 0.0013215752551332116, "step": 1316 }, { "epoch": 2.188000415930124, "grad_norm": 0.0011714874999597669, "learning_rate": 2.3232869470745893e-06, "loss": 0.001322682248428464, "step": 1317 }, { "epoch": 2.1896641364250806, "grad_norm": 0.0010334184626117349, "learning_rate": 2.3198615196689153e-06, "loss": 0.001695943996310234, "step": 1318 }, { "epoch": 2.1913278569200374, "grad_norm": 0.0021170424297451973, "learning_rate": 2.3164364321809906e-06, "loss": 0.0025002877227962017, "step": 1319 }, { "epoch": 2.192991577414994, "grad_norm": 0.0027872321661561728, "learning_rate": 2.3130116910738874e-06, "loss": 0.0025419231969863176, "step": 1320 }, { "epoch": 2.194655297909951, "grad_norm": 0.0006817941321060061, "learning_rate": 2.309587302810026e-06, "loss": 0.0017084216233342886, "step": 1321 }, { "epoch": 2.196319018404908, "grad_norm": 0.001300262869335711, "learning_rate": 2.306163273851157e-06, "loss": 0.0018340634414926171, "step": 1322 }, { "epoch": 2.197982738899865, "grad_norm": 0.000826711009722203, "learning_rate": 2.302739610658356e-06, "loss": 0.0016389775555580854, "step": 1323 }, { "epoch": 2.1996464593948217, "grad_norm": 0.005244811996817589, "learning_rate": 2.2993163196920075e-06, "loss": 0.005631591659039259, "step": 1324 }, { "epoch": 2.2013101798897785, "grad_norm": 0.0012326426804065704, "learning_rate": 2.295893407411795e-06, "loss": 0.0012298778165131807, "step": 1325 }, { "epoch": 2.2029739003847353, "grad_norm": 0.0007586965220980346, "learning_rate": 2.2924708802766857e-06, "loss": 0.000672422640491277, "step": 1326 }, { "epoch": 2.204637620879692, "grad_norm": 0.0018974530976265669, "learning_rate": 2.2890487447449204e-06, "loss": 0.00189477507956326, "step": 1327 }, { "epoch": 2.2063013413746493, "grad_norm": 0.001914669992402196, "learning_rate": 2.285627007274001e-06, "loss": 0.0016375902341678739, "step": 1328 }, { "epoch": 2.207965061869606, "grad_norm": 0.0006555395666509867, "learning_rate": 2.282205674320679e-06, "loss": 0.0009445602772757411, "step": 1329 }, { "epoch": 2.209628782364563, "grad_norm": 0.0006078650476410985, "learning_rate": 2.2787847523409416e-06, "loss": 0.0010109645081683993, "step": 1330 }, { "epoch": 2.2112925028595196, "grad_norm": 0.0008630360243842006, "learning_rate": 2.2753642477900012e-06, "loss": 0.0009613513248041272, "step": 1331 }, { "epoch": 2.2129562233544764, "grad_norm": 0.0013055541785433888, "learning_rate": 2.2719441671222815e-06, "loss": 0.0035888648126274347, "step": 1332 }, { "epoch": 2.214619943849433, "grad_norm": 0.0025269987527281046, "learning_rate": 2.268524516791408e-06, "loss": 0.003346838988363743, "step": 1333 }, { "epoch": 2.21628366434439, "grad_norm": 0.0008320605847984552, "learning_rate": 2.2651053032501928e-06, "loss": 0.002192307496443391, "step": 1334 }, { "epoch": 2.217947384839347, "grad_norm": 0.0015165675431489944, "learning_rate": 2.261686532950624e-06, "loss": 0.0017252475954592228, "step": 1335 }, { "epoch": 2.219611105334304, "grad_norm": 0.0014292318373918533, "learning_rate": 2.2582682123438547e-06, "loss": 0.002852159086614847, "step": 1336 }, { "epoch": 2.2212748258292607, "grad_norm": 0.002281657885760069, "learning_rate": 2.254850347880187e-06, "loss": 0.004102081526070833, "step": 1337 }, { "epoch": 2.2229385463242175, "grad_norm": 0.0006204011733643711, "learning_rate": 2.2514329460090633e-06, "loss": 0.000904565560631454, "step": 1338 }, { "epoch": 2.2246022668191743, "grad_norm": 0.0009091427782550454, "learning_rate": 2.248016013179054e-06, "loss": 0.0014529551845043898, "step": 1339 }, { "epoch": 2.226265987314131, "grad_norm": 0.0004098558274563402, "learning_rate": 2.244599555837844e-06, "loss": 0.0008046920411288738, "step": 1340 }, { "epoch": 2.2279297078090883, "grad_norm": 0.000955933821387589, "learning_rate": 2.2411835804322206e-06, "loss": 0.0019180045928806067, "step": 1341 }, { "epoch": 2.229593428304045, "grad_norm": 0.0008697609300725162, "learning_rate": 2.2377680934080625e-06, "loss": 0.0011718597961589694, "step": 1342 }, { "epoch": 2.231257148799002, "grad_norm": 0.0013697652611881495, "learning_rate": 2.2343531012103244e-06, "loss": 0.0017025491688400507, "step": 1343 }, { "epoch": 2.2329208692939586, "grad_norm": 0.001512395218014717, "learning_rate": 2.2309386102830295e-06, "loss": 0.004064799752086401, "step": 1344 }, { "epoch": 2.2345845897889154, "grad_norm": 0.0010302969021722674, "learning_rate": 2.227524627069256e-06, "loss": 0.0012793681817129254, "step": 1345 }, { "epoch": 2.236248310283872, "grad_norm": 0.0011541899293661118, "learning_rate": 2.2241111580111207e-06, "loss": 0.0014875237829983234, "step": 1346 }, { "epoch": 2.237912030778829, "grad_norm": 0.0012230722932145, "learning_rate": 2.220698209549774e-06, "loss": 0.0022882595658302307, "step": 1347 }, { "epoch": 2.239575751273786, "grad_norm": 0.0009680510847829282, "learning_rate": 2.2172857881253825e-06, "loss": 0.0015226060058921576, "step": 1348 }, { "epoch": 2.241239471768743, "grad_norm": 0.0006873685633763671, "learning_rate": 2.2138739001771157e-06, "loss": 0.0015360141405835748, "step": 1349 }, { "epoch": 2.2429031922636997, "grad_norm": 0.0008744509541429579, "learning_rate": 2.2104625521431396e-06, "loss": 0.00249713147059083, "step": 1350 }, { "epoch": 2.2445669127586565, "grad_norm": 0.0005692843697033823, "learning_rate": 2.207051750460601e-06, "loss": 0.0018285006517544389, "step": 1351 }, { "epoch": 2.2462306332536133, "grad_norm": 0.0012063119793310761, "learning_rate": 2.2036415015656148e-06, "loss": 0.001575134927406907, "step": 1352 }, { "epoch": 2.24789435374857, "grad_norm": 0.0007556597120128572, "learning_rate": 2.2002318118932543e-06, "loss": 0.0016872566193342209, "step": 1353 }, { "epoch": 2.2495580742435273, "grad_norm": 0.0036170482635498047, "learning_rate": 2.1968226878775347e-06, "loss": 0.004833456128835678, "step": 1354 }, { "epoch": 2.251221794738484, "grad_norm": 0.0002517940883990377, "learning_rate": 2.1934141359514062e-06, "loss": 0.0006511223036795855, "step": 1355 }, { "epoch": 2.252885515233441, "grad_norm": 0.0014082285342738032, "learning_rate": 2.1900061625467393e-06, "loss": 0.003841613419353962, "step": 1356 }, { "epoch": 2.2545492357283976, "grad_norm": 0.0004671132192015648, "learning_rate": 2.1865987740943116e-06, "loss": 0.000753692933358252, "step": 1357 }, { "epoch": 2.2562129562233544, "grad_norm": 0.0010657520033419132, "learning_rate": 2.183191977023799e-06, "loss": 0.0016676551895216107, "step": 1358 }, { "epoch": 2.257876676718311, "grad_norm": 0.0010002084309235215, "learning_rate": 2.17978577776376e-06, "loss": 0.0006241835653781891, "step": 1359 }, { "epoch": 2.259540397213268, "grad_norm": 0.0013890896225348115, "learning_rate": 2.176380182741624e-06, "loss": 0.0022400356829166412, "step": 1360 }, { "epoch": 2.261204117708225, "grad_norm": 0.00048793599125929177, "learning_rate": 2.172975198383682e-06, "loss": 0.0003696032799780369, "step": 1361 }, { "epoch": 2.262867838203182, "grad_norm": 0.002617779653519392, "learning_rate": 2.169570831115072e-06, "loss": 0.002739534480497241, "step": 1362 }, { "epoch": 2.2645315586981387, "grad_norm": 0.0011962675489485264, "learning_rate": 2.1661670873597686e-06, "loss": 0.002317224396392703, "step": 1363 }, { "epoch": 2.2661952791930955, "grad_norm": 0.0007817652076482773, "learning_rate": 2.1627639735405683e-06, "loss": 0.0005807623383589089, "step": 1364 }, { "epoch": 2.2678589996880523, "grad_norm": 0.0009937586728483438, "learning_rate": 2.1593614960790795e-06, "loss": 0.0020293928682804108, "step": 1365 }, { "epoch": 2.2695227201830095, "grad_norm": 0.0014677675208076835, "learning_rate": 2.15595966139571e-06, "loss": 0.0034731775522232056, "step": 1366 }, { "epoch": 2.2711864406779663, "grad_norm": 0.0005615411209873855, "learning_rate": 2.152558475909654e-06, "loss": 0.0013579119695350528, "step": 1367 }, { "epoch": 2.272850161172923, "grad_norm": 0.001657299930229783, "learning_rate": 2.149157946038882e-06, "loss": 0.0017475045751780272, "step": 1368 }, { "epoch": 2.27451388166788, "grad_norm": 0.0018001466523855925, "learning_rate": 2.145758078200126e-06, "loss": 0.0034491694532334805, "step": 1369 }, { "epoch": 2.2761776021628366, "grad_norm": 0.0018540103919804096, "learning_rate": 2.1423588788088704e-06, "loss": 0.00254809926263988, "step": 1370 }, { "epoch": 2.2778413226577934, "grad_norm": 0.0010992720490321517, "learning_rate": 2.1389603542793364e-06, "loss": 0.0014880215749144554, "step": 1371 }, { "epoch": 2.27950504315275, "grad_norm": 0.0004704767488874495, "learning_rate": 2.1355625110244725e-06, "loss": 0.0011282748309895396, "step": 1372 }, { "epoch": 2.2811687636477074, "grad_norm": 0.0004566149436868727, "learning_rate": 2.1321653554559425e-06, "loss": 0.0008396217599511147, "step": 1373 }, { "epoch": 2.282832484142664, "grad_norm": 0.0017184752505272627, "learning_rate": 2.1287688939841104e-06, "loss": 0.0032348756212741137, "step": 1374 }, { "epoch": 2.284496204637621, "grad_norm": 0.0005679796449840069, "learning_rate": 2.125373133018033e-06, "loss": 0.0008454375201836228, "step": 1375 }, { "epoch": 2.2861599251325777, "grad_norm": 0.0016176571371033788, "learning_rate": 2.1219780789654436e-06, "loss": 0.0013313901145011187, "step": 1376 }, { "epoch": 2.2878236456275345, "grad_norm": 0.0033084191381931305, "learning_rate": 2.1185837382327422e-06, "loss": 0.0016603253316134214, "step": 1377 }, { "epoch": 2.2894873661224913, "grad_norm": 0.0013860368635505438, "learning_rate": 2.1151901172249823e-06, "loss": 0.0009522234322503209, "step": 1378 }, { "epoch": 2.2911510866174485, "grad_norm": 0.0012617844622582197, "learning_rate": 2.1117972223458598e-06, "loss": 0.00269622216001153, "step": 1379 }, { "epoch": 2.2928148071124053, "grad_norm": 0.0007739314460195601, "learning_rate": 2.108405059997701e-06, "loss": 0.0008796498877927661, "step": 1380 }, { "epoch": 2.294478527607362, "grad_norm": 0.0017251380486413836, "learning_rate": 2.1050136365814484e-06, "loss": 0.0015982100740075111, "step": 1381 }, { "epoch": 2.296142248102319, "grad_norm": 0.0005771266296505928, "learning_rate": 2.10162295849665e-06, "loss": 0.0005673667183145881, "step": 1382 }, { "epoch": 2.2978059685972756, "grad_norm": 0.0017698346637189388, "learning_rate": 2.0982330321414495e-06, "loss": 0.0014651466626673937, "step": 1383 }, { "epoch": 2.2994696890922324, "grad_norm": 0.0010502157965674996, "learning_rate": 2.094843863912571e-06, "loss": 0.002775599481537938, "step": 1384 }, { "epoch": 2.301133409587189, "grad_norm": 0.00099456706084311, "learning_rate": 2.0914554602053072e-06, "loss": 0.0009711871389299631, "step": 1385 }, { "epoch": 2.3027971300821464, "grad_norm": 0.0018341030227020383, "learning_rate": 2.0880678274135103e-06, "loss": 0.002917747711762786, "step": 1386 }, { "epoch": 2.304460850577103, "grad_norm": 0.0009427666082046926, "learning_rate": 2.084680971929574e-06, "loss": 0.0017029845621436834, "step": 1387 }, { "epoch": 2.30612457107206, "grad_norm": 0.004653818905353546, "learning_rate": 2.0812949001444293e-06, "loss": 0.002260258886963129, "step": 1388 }, { "epoch": 2.3077882915670167, "grad_norm": 0.0009278209763579071, "learning_rate": 2.077909618447526e-06, "loss": 0.0015303139807656407, "step": 1389 }, { "epoch": 2.3094520120619735, "grad_norm": 0.002043406944721937, "learning_rate": 2.0745251332268238e-06, "loss": 0.0017000572988763452, "step": 1390 }, { "epoch": 2.3111157325569303, "grad_norm": 0.0007454622536897659, "learning_rate": 2.07114145086878e-06, "loss": 0.001288210623897612, "step": 1391 }, { "epoch": 2.3127794530518875, "grad_norm": 0.0007290096255019307, "learning_rate": 2.0677585777583366e-06, "loss": 0.0012519609881564975, "step": 1392 }, { "epoch": 2.3144431735468443, "grad_norm": 0.001200285041704774, "learning_rate": 2.0643765202789064e-06, "loss": 0.0011761067435145378, "step": 1393 }, { "epoch": 2.316106894041801, "grad_norm": 0.00094900670228526, "learning_rate": 2.060995284812366e-06, "loss": 0.0020300294272601604, "step": 1394 }, { "epoch": 2.317770614536758, "grad_norm": 0.00196209829300642, "learning_rate": 2.0576148777390397e-06, "loss": 0.0012100592721253633, "step": 1395 }, { "epoch": 2.3194343350317146, "grad_norm": 0.0006102774059399962, "learning_rate": 2.0542353054376893e-06, "loss": 0.0009437316912226379, "step": 1396 }, { "epoch": 2.3210980555266714, "grad_norm": 0.001464645960368216, "learning_rate": 2.0508565742855017e-06, "loss": 0.0018985284259542823, "step": 1397 }, { "epoch": 2.322761776021628, "grad_norm": 0.0013890875270590186, "learning_rate": 2.0474786906580733e-06, "loss": 0.0029032896272838116, "step": 1398 }, { "epoch": 2.3244254965165854, "grad_norm": 0.0017763370415195823, "learning_rate": 2.044101660929405e-06, "loss": 0.0016929400153458118, "step": 1399 }, { "epoch": 2.326089217011542, "grad_norm": 0.000577530125156045, "learning_rate": 2.040725491471885e-06, "loss": 0.0017563023138791323, "step": 1400 }, { "epoch": 2.326089217011542, "eval_loss": 0.002368729095906019, "eval_runtime": 491.5856, "eval_samples_per_second": 9.781, "eval_steps_per_second": 2.445, "step": 1400 }, { "epoch": 2.327752937506499, "grad_norm": 0.00043292154441587627, "learning_rate": 2.037350188656279e-06, "loss": 0.0007160247769206762, "step": 1401 }, { "epoch": 2.3294166580014557, "grad_norm": 0.0012572737177833915, "learning_rate": 2.0339757588517165e-06, "loss": 0.0022942391224205494, "step": 1402 }, { "epoch": 2.3310803784964125, "grad_norm": 0.0006292902980931103, "learning_rate": 2.0306022084256786e-06, "loss": 0.0004353314870968461, "step": 1403 }, { "epoch": 2.3327440989913697, "grad_norm": 0.001153657678514719, "learning_rate": 2.027229543743989e-06, "loss": 0.0024223560467362404, "step": 1404 }, { "epoch": 2.3344078194863265, "grad_norm": 0.000907072564586997, "learning_rate": 2.0238577711707987e-06, "loss": 0.0014202623860910535, "step": 1405 }, { "epoch": 2.3360715399812833, "grad_norm": 0.0012798805255442858, "learning_rate": 2.0204868970685764e-06, "loss": 0.0024821199476718903, "step": 1406 }, { "epoch": 2.33773526047624, "grad_norm": 0.0017148812767118216, "learning_rate": 2.0171169277980954e-06, "loss": 0.0014755388256162405, "step": 1407 }, { "epoch": 2.339398980971197, "grad_norm": 0.0031123829539865255, "learning_rate": 2.0137478697184205e-06, "loss": 0.004206657875329256, "step": 1408 }, { "epoch": 2.3410627014661536, "grad_norm": 0.001084084389731288, "learning_rate": 2.0103797291868977e-06, "loss": 0.0021153988782316446, "step": 1409 }, { "epoch": 2.3427264219611104, "grad_norm": 0.002781338058412075, "learning_rate": 2.0070125125591414e-06, "loss": 0.001565725775435567, "step": 1410 }, { "epoch": 2.344390142456067, "grad_norm": 0.0021625482477247715, "learning_rate": 2.0036462261890225e-06, "loss": 0.003212597919628024, "step": 1411 }, { "epoch": 2.3460538629510244, "grad_norm": 0.0009133850689977407, "learning_rate": 2.0002808764286573e-06, "loss": 0.0007037279428914189, "step": 1412 }, { "epoch": 2.347717583445981, "grad_norm": 0.0021134146954864264, "learning_rate": 1.9969164696283945e-06, "loss": 0.0028274108190089464, "step": 1413 }, { "epoch": 2.349381303940938, "grad_norm": 0.0014362254878506064, "learning_rate": 1.9935530121368023e-06, "loss": 0.002772002946585417, "step": 1414 }, { "epoch": 2.3510450244358947, "grad_norm": 0.001166126225143671, "learning_rate": 1.990190510300659e-06, "loss": 0.0007662165444344282, "step": 1415 }, { "epoch": 2.3527087449308515, "grad_norm": 0.007451492827385664, "learning_rate": 1.986828970464939e-06, "loss": 0.0043607125990092754, "step": 1416 }, { "epoch": 2.3543724654258087, "grad_norm": 0.0006244085379876196, "learning_rate": 1.983468398972802e-06, "loss": 0.001204540254548192, "step": 1417 }, { "epoch": 2.3560361859207655, "grad_norm": 0.0016244313446804881, "learning_rate": 1.980108802165579e-06, "loss": 0.003739001462236047, "step": 1418 }, { "epoch": 2.3576999064157222, "grad_norm": 0.0021900099236518145, "learning_rate": 1.976750186382764e-06, "loss": 0.0014113308861851692, "step": 1419 }, { "epoch": 2.359363626910679, "grad_norm": 0.0023189790081232786, "learning_rate": 1.9733925579619965e-06, "loss": 0.003480505431070924, "step": 1420 }, { "epoch": 2.361027347405636, "grad_norm": 0.002194036729633808, "learning_rate": 1.970035923239056e-06, "loss": 0.002109128050506115, "step": 1421 }, { "epoch": 2.3626910679005926, "grad_norm": 0.0006388880428858101, "learning_rate": 1.9666802885478463e-06, "loss": 0.00182803301140666, "step": 1422 }, { "epoch": 2.3643547883955494, "grad_norm": 0.0022078445181250572, "learning_rate": 1.963325660220384e-06, "loss": 0.0020670914091169834, "step": 1423 }, { "epoch": 2.3660185088905066, "grad_norm": 0.008101390674710274, "learning_rate": 1.9599720445867856e-06, "loss": 0.0016147526912391186, "step": 1424 }, { "epoch": 2.3676822293854634, "grad_norm": 0.0006417955737560987, "learning_rate": 1.956619447975257e-06, "loss": 0.001665701624006033, "step": 1425 }, { "epoch": 2.36934594988042, "grad_norm": 0.001856435090303421, "learning_rate": 1.9532678767120827e-06, "loss": 0.001060395734384656, "step": 1426 }, { "epoch": 2.371009670375377, "grad_norm": 0.002327260095626116, "learning_rate": 1.9499173371216105e-06, "loss": 0.005265223328024149, "step": 1427 }, { "epoch": 2.3726733908703337, "grad_norm": 0.0020023256074637175, "learning_rate": 1.946567835526243e-06, "loss": 0.0014220771845430136, "step": 1428 }, { "epoch": 2.3743371113652905, "grad_norm": 0.0010369986994192004, "learning_rate": 1.943219378246423e-06, "loss": 0.0021268511191010475, "step": 1429 }, { "epoch": 2.3760008318602477, "grad_norm": 0.001503292704001069, "learning_rate": 1.9398719716006246e-06, "loss": 0.0016041295602917671, "step": 1430 }, { "epoch": 2.3776645523552045, "grad_norm": 0.0012758244993165135, "learning_rate": 1.936525621905336e-06, "loss": 0.002271541627123952, "step": 1431 }, { "epoch": 2.3793282728501612, "grad_norm": 0.0009672525920905173, "learning_rate": 1.9331803354750537e-06, "loss": 0.0027922196313738823, "step": 1432 }, { "epoch": 2.380991993345118, "grad_norm": 0.0017020419472828507, "learning_rate": 1.9298361186222665e-06, "loss": 0.003419260261580348, "step": 1433 }, { "epoch": 2.382655713840075, "grad_norm": 0.0012927661882713437, "learning_rate": 1.926492977657446e-06, "loss": 0.0022287052124738693, "step": 1434 }, { "epoch": 2.3843194343350316, "grad_norm": 0.0008178834686987102, "learning_rate": 1.9231509188890345e-06, "loss": 0.001787499408237636, "step": 1435 }, { "epoch": 2.3859831548299884, "grad_norm": 0.0016086464747786522, "learning_rate": 1.919809948623428e-06, "loss": 0.001454781275242567, "step": 1436 }, { "epoch": 2.3876468753249456, "grad_norm": 0.0008009792654775083, "learning_rate": 1.9164700731649723e-06, "loss": 0.0010338013526052237, "step": 1437 }, { "epoch": 2.3893105958199023, "grad_norm": 0.0006302039255388081, "learning_rate": 1.913131298815947e-06, "loss": 0.0007497619953937829, "step": 1438 }, { "epoch": 2.390974316314859, "grad_norm": 0.00047956148046068847, "learning_rate": 1.9097936318765527e-06, "loss": 0.0007028962718322873, "step": 1439 }, { "epoch": 2.392638036809816, "grad_norm": 0.0009208683623000979, "learning_rate": 1.906457078644901e-06, "loss": 0.0025395380798727274, "step": 1440 }, { "epoch": 2.3943017573047727, "grad_norm": 0.0004860176413785666, "learning_rate": 1.903121645417003e-06, "loss": 0.0011189526412636042, "step": 1441 }, { "epoch": 2.3959654777997295, "grad_norm": 0.0008918407256715, "learning_rate": 1.8997873384867534e-06, "loss": 0.0019672035705298185, "step": 1442 }, { "epoch": 2.3976291982946867, "grad_norm": 0.0006723460974171758, "learning_rate": 1.8964541641459242e-06, "loss": 0.001565326121635735, "step": 1443 }, { "epoch": 2.3992929187896435, "grad_norm": 0.0010983988177031279, "learning_rate": 1.893122128684149e-06, "loss": 0.0020677861757576466, "step": 1444 }, { "epoch": 2.4009566392846002, "grad_norm": 0.000979145523160696, "learning_rate": 1.8897912383889138e-06, "loss": 0.0024629514664411545, "step": 1445 }, { "epoch": 2.402620359779557, "grad_norm": 0.0004037956823594868, "learning_rate": 1.886461499545543e-06, "loss": 0.001216419623233378, "step": 1446 }, { "epoch": 2.404284080274514, "grad_norm": 0.003060317598283291, "learning_rate": 1.883132918437186e-06, "loss": 0.003621977288275957, "step": 1447 }, { "epoch": 2.4059478007694706, "grad_norm": 0.0002842900576069951, "learning_rate": 1.8798055013448105e-06, "loss": 0.0003931760147679597, "step": 1448 }, { "epoch": 2.4076115212644273, "grad_norm": 0.0008548618061468005, "learning_rate": 1.8764792545471872e-06, "loss": 0.0012658205814659595, "step": 1449 }, { "epoch": 2.4092752417593846, "grad_norm": 0.001995617290958762, "learning_rate": 1.8731541843208772e-06, "loss": 0.004006143659353256, "step": 1450 }, { "epoch": 2.4109389622543413, "grad_norm": 0.000583532324526459, "learning_rate": 1.869830296940223e-06, "loss": 0.0003809987974818796, "step": 1451 }, { "epoch": 2.412602682749298, "grad_norm": 0.0008577098487876356, "learning_rate": 1.8665075986773346e-06, "loss": 0.0008792573353275657, "step": 1452 }, { "epoch": 2.414266403244255, "grad_norm": 0.0004475556779652834, "learning_rate": 1.863186095802077e-06, "loss": 0.0011853489559143782, "step": 1453 }, { "epoch": 2.4159301237392117, "grad_norm": 0.00038343455526046455, "learning_rate": 1.8598657945820605e-06, "loss": 0.00043216568883508444, "step": 1454 }, { "epoch": 2.417593844234169, "grad_norm": 0.0008230686653405428, "learning_rate": 1.8565467012826282e-06, "loss": 0.0017967612948268652, "step": 1455 }, { "epoch": 2.4192575647291257, "grad_norm": 0.00038567421142943203, "learning_rate": 1.853228822166843e-06, "loss": 0.0010309724602848291, "step": 1456 }, { "epoch": 2.4209212852240825, "grad_norm": 0.0013990384759381413, "learning_rate": 1.849912163495479e-06, "loss": 0.0018748044967651367, "step": 1457 }, { "epoch": 2.4225850057190392, "grad_norm": 0.002193356631323695, "learning_rate": 1.8465967315270029e-06, "loss": 0.002720393007621169, "step": 1458 }, { "epoch": 2.424248726213996, "grad_norm": 0.000335589109454304, "learning_rate": 1.8432825325175707e-06, "loss": 0.00045209412928670645, "step": 1459 }, { "epoch": 2.425912446708953, "grad_norm": 0.001692134770564735, "learning_rate": 1.8399695727210098e-06, "loss": 0.005261164158582687, "step": 1460 }, { "epoch": 2.4275761672039096, "grad_norm": 0.0018994332058355212, "learning_rate": 1.836657858388811e-06, "loss": 0.0019658301025629044, "step": 1461 }, { "epoch": 2.429239887698867, "grad_norm": 0.0006664514658041298, "learning_rate": 1.8333473957701126e-06, "loss": 0.0028958944603800774, "step": 1462 }, { "epoch": 2.4309036081938236, "grad_norm": 0.0017626987537369132, "learning_rate": 1.830038191111692e-06, "loss": 0.0033536856062710285, "step": 1463 }, { "epoch": 2.4325673286887803, "grad_norm": 0.0005109024350531399, "learning_rate": 1.8267302506579532e-06, "loss": 0.0013197731459513307, "step": 1464 }, { "epoch": 2.434231049183737, "grad_norm": 0.0014629487413913012, "learning_rate": 1.8234235806509145e-06, "loss": 0.0009660832583904266, "step": 1465 }, { "epoch": 2.435894769678694, "grad_norm": 0.0006256970809772611, "learning_rate": 1.8201181873301967e-06, "loss": 0.0014558584662154317, "step": 1466 }, { "epoch": 2.4375584901736507, "grad_norm": 0.0007523475214838982, "learning_rate": 1.816814076933012e-06, "loss": 0.000893545220606029, "step": 1467 }, { "epoch": 2.439222210668608, "grad_norm": 0.0005765758687630296, "learning_rate": 1.813511255694152e-06, "loss": 0.0009665571851655841, "step": 1468 }, { "epoch": 2.4408859311635647, "grad_norm": 0.0018482616869732738, "learning_rate": 1.8102097298459732e-06, "loss": 0.002785992342978716, "step": 1469 }, { "epoch": 2.4425496516585214, "grad_norm": 0.0007372192339971662, "learning_rate": 1.80690950561839e-06, "loss": 0.0010839628521353006, "step": 1470 }, { "epoch": 2.4442133721534782, "grad_norm": 0.0007215483929030597, "learning_rate": 1.8036105892388611e-06, "loss": 0.001313893124461174, "step": 1471 }, { "epoch": 2.445877092648435, "grad_norm": 0.001106858253479004, "learning_rate": 1.800312986932376e-06, "loss": 0.0017755581066012383, "step": 1472 }, { "epoch": 2.447540813143392, "grad_norm": 0.0011772233992815018, "learning_rate": 1.7970167049214466e-06, "loss": 0.002397167030721903, "step": 1473 }, { "epoch": 2.4492045336383486, "grad_norm": 0.0008078587707132101, "learning_rate": 1.7937217494260888e-06, "loss": 0.0017798661720007658, "step": 1474 }, { "epoch": 2.450868254133306, "grad_norm": 0.0008612503297626972, "learning_rate": 1.7904281266638201e-06, "loss": 0.002131294459104538, "step": 1475 }, { "epoch": 2.4525319746282626, "grad_norm": 0.000806903641205281, "learning_rate": 1.7871358428496416e-06, "loss": 0.0010623048292472959, "step": 1476 }, { "epoch": 2.4541956951232193, "grad_norm": 0.0006560089532285929, "learning_rate": 1.7838449041960276e-06, "loss": 0.0005079662660136819, "step": 1477 }, { "epoch": 2.455859415618176, "grad_norm": 0.0010120128281414509, "learning_rate": 1.7805553169129142e-06, "loss": 0.001596860121935606, "step": 1478 }, { "epoch": 2.457523136113133, "grad_norm": 0.0013024996733292937, "learning_rate": 1.7772670872076883e-06, "loss": 0.004441859200596809, "step": 1479 }, { "epoch": 2.4591868566080897, "grad_norm": 0.0021134542766958475, "learning_rate": 1.773980221285173e-06, "loss": 0.0017620328580960631, "step": 1480 }, { "epoch": 2.460850577103047, "grad_norm": 0.0005604978650808334, "learning_rate": 1.7706947253476194e-06, "loss": 0.0015471463557332754, "step": 1481 }, { "epoch": 2.4625142975980037, "grad_norm": 0.0011455026688054204, "learning_rate": 1.767410605594694e-06, "loss": 0.0016314920503646135, "step": 1482 }, { "epoch": 2.4641780180929604, "grad_norm": 0.0012556511210277677, "learning_rate": 1.7641278682234658e-06, "loss": 0.0021901875734329224, "step": 1483 }, { "epoch": 2.465841738587917, "grad_norm": 0.0010981862433254719, "learning_rate": 1.7608465194283958e-06, "loss": 0.002816542750224471, "step": 1484 }, { "epoch": 2.467505459082874, "grad_norm": 0.001629547798074782, "learning_rate": 1.757566565401323e-06, "loss": 0.003777154488489032, "step": 1485 }, { "epoch": 2.4691691795778308, "grad_norm": 0.0017756317974999547, "learning_rate": 1.7542880123314559e-06, "loss": 0.0037420601584017277, "step": 1486 }, { "epoch": 2.4708329000727876, "grad_norm": 0.0009217644692398608, "learning_rate": 1.75101086640536e-06, "loss": 0.0017659610603004694, "step": 1487 }, { "epoch": 2.4724966205677448, "grad_norm": 0.0008854199550114572, "learning_rate": 1.7477351338069442e-06, "loss": 0.0025656609795987606, "step": 1488 }, { "epoch": 2.4741603410627016, "grad_norm": 0.0008711411501280963, "learning_rate": 1.7444608207174519e-06, "loss": 0.001252155052497983, "step": 1489 }, { "epoch": 2.4758240615576583, "grad_norm": 0.0007815634598955512, "learning_rate": 1.741187933315448e-06, "loss": 0.0015573770506307483, "step": 1490 }, { "epoch": 2.477487782052615, "grad_norm": 0.0009990883991122246, "learning_rate": 1.7379164777768038e-06, "loss": 0.0016516413306817412, "step": 1491 }, { "epoch": 2.479151502547572, "grad_norm": 0.0006764251738786697, "learning_rate": 1.734646460274692e-06, "loss": 0.0020175199024379253, "step": 1492 }, { "epoch": 2.4808152230425287, "grad_norm": 0.0008993525989353657, "learning_rate": 1.7313778869795717e-06, "loss": 0.0015614933799952269, "step": 1493 }, { "epoch": 2.482478943537486, "grad_norm": 0.001278067473322153, "learning_rate": 1.728110764059176e-06, "loss": 0.0010163411498069763, "step": 1494 }, { "epoch": 2.4841426640324427, "grad_norm": 0.0007695929380133748, "learning_rate": 1.7248450976785011e-06, "loss": 0.001716417958959937, "step": 1495 }, { "epoch": 2.4858063845273994, "grad_norm": 0.00044241335126571357, "learning_rate": 1.7215808939997945e-06, "loss": 0.002265610732138157, "step": 1496 }, { "epoch": 2.487470105022356, "grad_norm": 0.0005241918261162937, "learning_rate": 1.7183181591825437e-06, "loss": 0.0009319846867583692, "step": 1497 }, { "epoch": 2.489133825517313, "grad_norm": 0.0007762466557323933, "learning_rate": 1.7150568993834666e-06, "loss": 0.001524902181699872, "step": 1498 }, { "epoch": 2.4907975460122698, "grad_norm": 0.0008093323558568954, "learning_rate": 1.7117971207564934e-06, "loss": 0.0015047211199998856, "step": 1499 }, { "epoch": 2.4924612665072265, "grad_norm": 0.0007091707666404545, "learning_rate": 1.7085388294527632e-06, "loss": 0.0006919258739799261, "step": 1500 }, { "epoch": 2.4924612665072265, "eval_loss": 0.0021714933682233095, "eval_runtime": 490.2004, "eval_samples_per_second": 9.808, "eval_steps_per_second": 2.452, "step": 1500 }, { "epoch": 2.4941249870021838, "grad_norm": 0.0005224543274380267, "learning_rate": 1.705282031620608e-06, "loss": 0.0010630788747221231, "step": 1501 }, { "epoch": 2.4957887074971405, "grad_norm": 0.0009647620609030128, "learning_rate": 1.7020267334055393e-06, "loss": 0.0013143199030309916, "step": 1502 }, { "epoch": 2.4974524279920973, "grad_norm": 0.0020491466857492924, "learning_rate": 1.6987729409502412e-06, "loss": 0.0006549229728989303, "step": 1503 }, { "epoch": 2.499116148487054, "grad_norm": 0.0010662437416613102, "learning_rate": 1.6955206603945557e-06, "loss": 0.0024829788599163294, "step": 1504 }, { "epoch": 2.500779868982011, "grad_norm": 0.0021262792870402336, "learning_rate": 1.6922698978754726e-06, "loss": 0.0022876132279634476, "step": 1505 }, { "epoch": 2.502443589476968, "grad_norm": 0.0037959532346576452, "learning_rate": 1.6890206595271153e-06, "loss": 0.0065306369215250015, "step": 1506 }, { "epoch": 2.504107309971925, "grad_norm": 0.0008538314723409712, "learning_rate": 1.6857729514807325e-06, "loss": 0.0015486277407035232, "step": 1507 }, { "epoch": 2.5057710304668817, "grad_norm": 0.0008208471699617803, "learning_rate": 1.6825267798646851e-06, "loss": 0.0010096883634105325, "step": 1508 }, { "epoch": 2.5074347509618384, "grad_norm": 0.0006657431949861348, "learning_rate": 1.6792821508044352e-06, "loss": 0.0013219774700701237, "step": 1509 }, { "epoch": 2.509098471456795, "grad_norm": 0.0006319488165900111, "learning_rate": 1.6760390704225333e-06, "loss": 0.0018334081396460533, "step": 1510 }, { "epoch": 2.510762191951752, "grad_norm": 0.0003473910328466445, "learning_rate": 1.672797544838608e-06, "loss": 0.0008822108502499759, "step": 1511 }, { "epoch": 2.5124259124467088, "grad_norm": 0.001212777686305344, "learning_rate": 1.6695575801693549e-06, "loss": 0.0028206915594637394, "step": 1512 }, { "epoch": 2.5140896329416655, "grad_norm": 0.0005771344294771552, "learning_rate": 1.6663191825285214e-06, "loss": 0.0005732635036110878, "step": 1513 }, { "epoch": 2.5157533534366228, "grad_norm": 0.0009036052506417036, "learning_rate": 1.6630823580269005e-06, "loss": 0.0025500047486275434, "step": 1514 }, { "epoch": 2.5174170739315795, "grad_norm": 0.000790099729783833, "learning_rate": 1.6598471127723162e-06, "loss": 0.00228320574387908, "step": 1515 }, { "epoch": 2.5190807944265363, "grad_norm": 0.0009197267354466021, "learning_rate": 1.6566134528696126e-06, "loss": 0.0007472310098819435, "step": 1516 }, { "epoch": 2.520744514921493, "grad_norm": 0.0008512712665833533, "learning_rate": 1.6533813844206426e-06, "loss": 0.001149134011939168, "step": 1517 }, { "epoch": 2.52240823541645, "grad_norm": 0.0008269053651019931, "learning_rate": 1.6501509135242533e-06, "loss": 0.0011230545351281762, "step": 1518 }, { "epoch": 2.524071955911407, "grad_norm": 0.0018875693203881383, "learning_rate": 1.6469220462762807e-06, "loss": 0.005237270146608353, "step": 1519 }, { "epoch": 2.525735676406364, "grad_norm": 0.0010742016602307558, "learning_rate": 1.6436947887695336e-06, "loss": 0.0016066599637269974, "step": 1520 }, { "epoch": 2.5273993969013206, "grad_norm": 0.0008471138426102698, "learning_rate": 1.6404691470937829e-06, "loss": 0.0014250362291932106, "step": 1521 }, { "epoch": 2.5290631173962774, "grad_norm": 0.00104066904168576, "learning_rate": 1.6372451273357504e-06, "loss": 0.0016985085094347596, "step": 1522 }, { "epoch": 2.530726837891234, "grad_norm": 0.0012702214298769832, "learning_rate": 1.6340227355790988e-06, "loss": 0.001974192913621664, "step": 1523 }, { "epoch": 2.532390558386191, "grad_norm": 0.00034718395909294486, "learning_rate": 1.6308019779044154e-06, "loss": 0.000869637297000736, "step": 1524 }, { "epoch": 2.5340542788811478, "grad_norm": 0.000563792185857892, "learning_rate": 1.6275828603892078e-06, "loss": 0.001410376513376832, "step": 1525 }, { "epoch": 2.535717999376105, "grad_norm": 0.0015897756675258279, "learning_rate": 1.6243653891078864e-06, "loss": 0.0031209930311888456, "step": 1526 }, { "epoch": 2.5373817198710618, "grad_norm": 0.0014952553901821375, "learning_rate": 1.6211495701317565e-06, "loss": 0.002497080946341157, "step": 1527 }, { "epoch": 2.5390454403660185, "grad_norm": 0.0002881517866626382, "learning_rate": 1.6179354095290051e-06, "loss": 0.00020394183229655027, "step": 1528 }, { "epoch": 2.5407091608609753, "grad_norm": 0.0009688301361165941, "learning_rate": 1.6147229133646885e-06, "loss": 0.001731213997118175, "step": 1529 }, { "epoch": 2.542372881355932, "grad_norm": 0.0005621787277050316, "learning_rate": 1.611512087700724e-06, "loss": 0.001863375655375421, "step": 1530 }, { "epoch": 2.5440366018508893, "grad_norm": 0.0005942243151366711, "learning_rate": 1.6083029385958762e-06, "loss": 0.001412746263667941, "step": 1531 }, { "epoch": 2.545700322345846, "grad_norm": 0.0009608308901078999, "learning_rate": 1.6050954721057461e-06, "loss": 0.002480145078152418, "step": 1532 }, { "epoch": 2.547364042840803, "grad_norm": 0.00087307597277686, "learning_rate": 1.6018896942827595e-06, "loss": 0.0018921629525721073, "step": 1533 }, { "epoch": 2.5490277633357596, "grad_norm": 0.0017327595269307494, "learning_rate": 1.5986856111761562e-06, "loss": 0.004610971547663212, "step": 1534 }, { "epoch": 2.5506914838307164, "grad_norm": 0.000762734969612211, "learning_rate": 1.595483228831976e-06, "loss": 0.002128410153090954, "step": 1535 }, { "epoch": 2.552355204325673, "grad_norm": 0.0002462914271745831, "learning_rate": 1.5922825532930526e-06, "loss": 0.0005376354092732072, "step": 1536 }, { "epoch": 2.55401892482063, "grad_norm": 0.001250227796845138, "learning_rate": 1.5890835905989969e-06, "loss": 0.0013491283170878887, "step": 1537 }, { "epoch": 2.5556826453155868, "grad_norm": 0.0010119171347469091, "learning_rate": 1.5858863467861882e-06, "loss": 0.0018797065131366253, "step": 1538 }, { "epoch": 2.557346365810544, "grad_norm": 0.0011957940878346562, "learning_rate": 1.582690827887763e-06, "loss": 0.000866060727275908, "step": 1539 }, { "epoch": 2.5590100863055008, "grad_norm": 0.0012566793011501431, "learning_rate": 1.5794970399336012e-06, "loss": 0.0031211625318974257, "step": 1540 }, { "epoch": 2.5606738068004575, "grad_norm": 0.0006489444640465081, "learning_rate": 1.576304988950318e-06, "loss": 0.0026007932610809803, "step": 1541 }, { "epoch": 2.5623375272954143, "grad_norm": 0.001092761754989624, "learning_rate": 1.5731146809612508e-06, "loss": 0.001698095933534205, "step": 1542 }, { "epoch": 2.564001247790371, "grad_norm": 0.0014394314493983984, "learning_rate": 1.569926121986447e-06, "loss": 0.0019551399163901806, "step": 1543 }, { "epoch": 2.5656649682853283, "grad_norm": 0.001035700086504221, "learning_rate": 1.566739318042655e-06, "loss": 0.002465068595483899, "step": 1544 }, { "epoch": 2.567328688780285, "grad_norm": 0.0009444122551940382, "learning_rate": 1.56355427514331e-06, "loss": 0.001828762935474515, "step": 1545 }, { "epoch": 2.568992409275242, "grad_norm": 0.0007945806719362736, "learning_rate": 1.5603709992985256e-06, "loss": 0.0018355362117290497, "step": 1546 }, { "epoch": 2.5706561297701986, "grad_norm": 0.0015775883803144097, "learning_rate": 1.5571894965150796e-06, "loss": 0.002111088950186968, "step": 1547 }, { "epoch": 2.5723198502651554, "grad_norm": 0.001175825484097004, "learning_rate": 1.554009772796406e-06, "loss": 0.0020981403067708015, "step": 1548 }, { "epoch": 2.573983570760112, "grad_norm": 0.0011401629308238626, "learning_rate": 1.55083183414258e-06, "loss": 0.0018020938150584698, "step": 1549 }, { "epoch": 2.575647291255069, "grad_norm": 0.00028235308127477765, "learning_rate": 1.5476556865503095e-06, "loss": 0.0011889156885445118, "step": 1550 }, { "epoch": 2.5773110117500257, "grad_norm": 0.0009733039187267423, "learning_rate": 1.5444813360129207e-06, "loss": 0.0014338747132569551, "step": 1551 }, { "epoch": 2.578974732244983, "grad_norm": 0.0006195545429363847, "learning_rate": 1.5413087885203515e-06, "loss": 0.0003205559041816741, "step": 1552 }, { "epoch": 2.5806384527399397, "grad_norm": 0.000734661181923002, "learning_rate": 1.538138050059136e-06, "loss": 0.0014326430391520262, "step": 1553 }, { "epoch": 2.5823021732348965, "grad_norm": 0.0009669451974332333, "learning_rate": 1.5349691266123946e-06, "loss": 0.0034975572489202023, "step": 1554 }, { "epoch": 2.5839658937298533, "grad_norm": 0.0009032751549966633, "learning_rate": 1.5318020241598248e-06, "loss": 0.0009406222961843014, "step": 1555 }, { "epoch": 2.58562961422481, "grad_norm": 0.0013129219878464937, "learning_rate": 1.5286367486776835e-06, "loss": 0.003194645280018449, "step": 1556 }, { "epoch": 2.5872933347197673, "grad_norm": 0.0017833646852523088, "learning_rate": 1.5254733061387846e-06, "loss": 0.0013546156696975231, "step": 1557 }, { "epoch": 2.588957055214724, "grad_norm": 0.0005147892516106367, "learning_rate": 1.5223117025124817e-06, "loss": 0.0015783794224262238, "step": 1558 }, { "epoch": 2.590620775709681, "grad_norm": 0.0008144800085574389, "learning_rate": 1.5191519437646576e-06, "loss": 0.0038211564533412457, "step": 1559 }, { "epoch": 2.5922844962046376, "grad_norm": 0.000820773362647742, "learning_rate": 1.5159940358577151e-06, "loss": 0.0019291674252599478, "step": 1560 }, { "epoch": 2.5939482166995944, "grad_norm": 0.001188530120998621, "learning_rate": 1.512837984750565e-06, "loss": 0.0013952332083135843, "step": 1561 }, { "epoch": 2.595611937194551, "grad_norm": 0.0013983040116727352, "learning_rate": 1.5096837963986112e-06, "loss": 0.0018704570829868317, "step": 1562 }, { "epoch": 2.597275657689508, "grad_norm": 0.0008498321985825896, "learning_rate": 1.5065314767537453e-06, "loss": 0.0011971730273216963, "step": 1563 }, { "epoch": 2.5989393781844647, "grad_norm": 0.00019363206229172647, "learning_rate": 1.5033810317643327e-06, "loss": 0.00046173634473234415, "step": 1564 }, { "epoch": 2.600603098679422, "grad_norm": 0.0007203574641607702, "learning_rate": 1.5002324673752006e-06, "loss": 0.0010821465402841568, "step": 1565 }, { "epoch": 2.6022668191743787, "grad_norm": 0.00038083441904745996, "learning_rate": 1.4970857895276285e-06, "loss": 0.0011750578414648771, "step": 1566 }, { "epoch": 2.6039305396693355, "grad_norm": 0.0007648352766409516, "learning_rate": 1.4939410041593338e-06, "loss": 0.0026476874481886625, "step": 1567 }, { "epoch": 2.6055942601642923, "grad_norm": 0.0018421369604766369, "learning_rate": 1.4907981172044647e-06, "loss": 0.002846265211701393, "step": 1568 }, { "epoch": 2.6072579806592495, "grad_norm": 0.0003029352519661188, "learning_rate": 1.487657134593587e-06, "loss": 0.0008935644291341305, "step": 1569 }, { "epoch": 2.6089217011542063, "grad_norm": 0.0004757127317134291, "learning_rate": 1.4845180622536728e-06, "loss": 0.0011599808931350708, "step": 1570 }, { "epoch": 2.610585421649163, "grad_norm": 0.0005109715275466442, "learning_rate": 1.4813809061080893e-06, "loss": 0.0024393724743276834, "step": 1571 }, { "epoch": 2.61224914214412, "grad_norm": 0.0002291719283675775, "learning_rate": 1.4782456720765895e-06, "loss": 0.00032998324604704976, "step": 1572 }, { "epoch": 2.6139128626390766, "grad_norm": 0.0007486707763746381, "learning_rate": 1.4751123660752955e-06, "loss": 0.002003737259656191, "step": 1573 }, { "epoch": 2.6155765831340334, "grad_norm": 0.0005735998274758458, "learning_rate": 1.4719809940166952e-06, "loss": 0.001227132510393858, "step": 1574 }, { "epoch": 2.61724030362899, "grad_norm": 0.00040597538463771343, "learning_rate": 1.4688515618096252e-06, "loss": 0.0011647650972008705, "step": 1575 }, { "epoch": 2.618904024123947, "grad_norm": 0.0011245380155742168, "learning_rate": 1.4657240753592627e-06, "loss": 0.000804451119620353, "step": 1576 }, { "epoch": 2.620567744618904, "grad_norm": 0.00036305448156781495, "learning_rate": 1.462598540567113e-06, "loss": 0.00045339000644162297, "step": 1577 }, { "epoch": 2.622231465113861, "grad_norm": 0.0013733444502577186, "learning_rate": 1.4594749633309981e-06, "loss": 0.00537478644400835, "step": 1578 }, { "epoch": 2.6238951856088177, "grad_norm": 0.0005763823282904923, "learning_rate": 1.456353349545046e-06, "loss": 0.0016458909958600998, "step": 1579 }, { "epoch": 2.6255589061037745, "grad_norm": 0.0003702778776641935, "learning_rate": 1.4532337050996804e-06, "loss": 0.0006556995795108378, "step": 1580 }, { "epoch": 2.6272226265987313, "grad_norm": 0.00021504143660422415, "learning_rate": 1.4501160358816085e-06, "loss": 0.0008495253277942538, "step": 1581 }, { "epoch": 2.6288863470936885, "grad_norm": 0.0007969992002472281, "learning_rate": 1.4470003477738111e-06, "loss": 0.0007396379369311035, "step": 1582 }, { "epoch": 2.6305500675886453, "grad_norm": 0.0006778067909181118, "learning_rate": 1.4438866466555308e-06, "loss": 0.00211876118555665, "step": 1583 }, { "epoch": 2.632213788083602, "grad_norm": 0.0008974986267276108, "learning_rate": 1.4407749384022576e-06, "loss": 0.0029981655534356833, "step": 1584 }, { "epoch": 2.633877508578559, "grad_norm": 0.0011483733542263508, "learning_rate": 1.4376652288857249e-06, "loss": 0.000901217048522085, "step": 1585 }, { "epoch": 2.6355412290735156, "grad_norm": 0.0005572153022512794, "learning_rate": 1.4345575239738928e-06, "loss": 0.0009018393466249108, "step": 1586 }, { "epoch": 2.6372049495684724, "grad_norm": 0.000704800128005445, "learning_rate": 1.431451829530939e-06, "loss": 0.0024085640907287598, "step": 1587 }, { "epoch": 2.638868670063429, "grad_norm": 0.0006277773063629866, "learning_rate": 1.4283481514172487e-06, "loss": 0.0006163071375340223, "step": 1588 }, { "epoch": 2.640532390558386, "grad_norm": 0.0006596968742087483, "learning_rate": 1.425246495489399e-06, "loss": 0.0014800459612160921, "step": 1589 }, { "epoch": 2.642196111053343, "grad_norm": 0.0012964921770617366, "learning_rate": 1.4221468676001544e-06, "loss": 0.0024225322995334864, "step": 1590 }, { "epoch": 2.6438598315483, "grad_norm": 0.0008854639600031078, "learning_rate": 1.419049273598451e-06, "loss": 0.0015666240360587835, "step": 1591 }, { "epoch": 2.6455235520432567, "grad_norm": 0.00041664467426016927, "learning_rate": 1.4159537193293876e-06, "loss": 0.0004793188127223402, "step": 1592 }, { "epoch": 2.6471872725382135, "grad_norm": 0.00037037362926639616, "learning_rate": 1.4128602106342154e-06, "loss": 0.0007164277485571802, "step": 1593 }, { "epoch": 2.6488509930331703, "grad_norm": 0.0005810911534354091, "learning_rate": 1.4097687533503213e-06, "loss": 0.0017024344997480512, "step": 1594 }, { "epoch": 2.6505147135281275, "grad_norm": 0.0015127704245969653, "learning_rate": 1.4066793533112255e-06, "loss": 0.003262751968577504, "step": 1595 }, { "epoch": 2.6521784340230843, "grad_norm": 0.0008967657340690494, "learning_rate": 1.4035920163465648e-06, "loss": 0.0025144058745354414, "step": 1596 }, { "epoch": 2.653842154518041, "grad_norm": 0.0009657472255639732, "learning_rate": 1.400506748282083e-06, "loss": 0.002190806670114398, "step": 1597 }, { "epoch": 2.655505875012998, "grad_norm": 0.0011267064837738872, "learning_rate": 1.3974235549396198e-06, "loss": 0.0018630200065672398, "step": 1598 }, { "epoch": 2.6571695955079546, "grad_norm": 0.0006663509993813932, "learning_rate": 1.3943424421370998e-06, "loss": 0.0017727947561070323, "step": 1599 }, { "epoch": 2.6588333160029114, "grad_norm": 0.0015259669162333012, "learning_rate": 1.3912634156885235e-06, "loss": 0.004836976062506437, "step": 1600 }, { "epoch": 2.6588333160029114, "eval_loss": 0.002130915177986026, "eval_runtime": 491.4387, "eval_samples_per_second": 9.784, "eval_steps_per_second": 2.446, "step": 1600 }, { "epoch": 2.660497036497868, "grad_norm": 0.0005084536387585104, "learning_rate": 1.3881864814039503e-06, "loss": 0.0009671523002907634, "step": 1601 }, { "epoch": 2.662160756992825, "grad_norm": 0.000593173666857183, "learning_rate": 1.3851116450894959e-06, "loss": 0.0017778316978365183, "step": 1602 }, { "epoch": 2.663824477487782, "grad_norm": 0.0002884052519220859, "learning_rate": 1.382038912547315e-06, "loss": 0.0005450017051771283, "step": 1603 }, { "epoch": 2.665488197982739, "grad_norm": 0.0009035166003741324, "learning_rate": 1.3789682895755935e-06, "loss": 0.0014512368943542242, "step": 1604 }, { "epoch": 2.6671519184776957, "grad_norm": 0.0010885243536904454, "learning_rate": 1.3758997819685366e-06, "loss": 0.002957823220640421, "step": 1605 }, { "epoch": 2.6688156389726525, "grad_norm": 0.0012139276368543506, "learning_rate": 1.3728333955163565e-06, "loss": 0.0007539030630141497, "step": 1606 }, { "epoch": 2.6704793594676093, "grad_norm": 0.0011482624104246497, "learning_rate": 1.3697691360052646e-06, "loss": 0.0011043865233659744, "step": 1607 }, { "epoch": 2.6721430799625665, "grad_norm": 0.00046592974103987217, "learning_rate": 1.3667070092174587e-06, "loss": 0.0020422644447535276, "step": 1608 }, { "epoch": 2.6738068004575233, "grad_norm": 0.0017777342582121491, "learning_rate": 1.3636470209311093e-06, "loss": 0.0010365403722971678, "step": 1609 }, { "epoch": 2.67547052095248, "grad_norm": 0.001754077966324985, "learning_rate": 1.360589176920355e-06, "loss": 0.0025688004679977894, "step": 1610 }, { "epoch": 2.677134241447437, "grad_norm": 0.0008552870131097734, "learning_rate": 1.357533482955287e-06, "loss": 0.0021906231995671988, "step": 1611 }, { "epoch": 2.6787979619423936, "grad_norm": 0.0007275700918398798, "learning_rate": 1.354479944801939e-06, "loss": 0.0016428211238235235, "step": 1612 }, { "epoch": 2.6804616824373504, "grad_norm": 0.0017000288935378194, "learning_rate": 1.3514285682222777e-06, "loss": 0.0026704478077590466, "step": 1613 }, { "epoch": 2.682125402932307, "grad_norm": 0.0007832667324692011, "learning_rate": 1.3483793589741901e-06, "loss": 0.001527699758298695, "step": 1614 }, { "epoch": 2.683789123427264, "grad_norm": 0.0010062839137390256, "learning_rate": 1.3453323228114745e-06, "loss": 0.0007239821134135127, "step": 1615 }, { "epoch": 2.685452843922221, "grad_norm": 0.0005412401515059173, "learning_rate": 1.3422874654838263e-06, "loss": 0.0009945163037627935, "step": 1616 }, { "epoch": 2.687116564417178, "grad_norm": 0.0015882287407293916, "learning_rate": 1.3392447927368315e-06, "loss": 0.001623464049771428, "step": 1617 }, { "epoch": 2.6887802849121347, "grad_norm": 0.0002611389209050685, "learning_rate": 1.3362043103119537e-06, "loss": 0.0006480780430138111, "step": 1618 }, { "epoch": 2.6904440054070915, "grad_norm": 0.00046936588478274643, "learning_rate": 1.3331660239465232e-06, "loss": 0.0008513991488143802, "step": 1619 }, { "epoch": 2.6921077259020487, "grad_norm": 0.0010908697731792927, "learning_rate": 1.3301299393737262e-06, "loss": 0.003656618529930711, "step": 1620 }, { "epoch": 2.6937714463970055, "grad_norm": 0.0030978077556937933, "learning_rate": 1.3270960623225953e-06, "loss": 0.0021632835268974304, "step": 1621 }, { "epoch": 2.6954351668919623, "grad_norm": 0.0015169283142313361, "learning_rate": 1.324064398517994e-06, "loss": 0.0006730949389748275, "step": 1622 }, { "epoch": 2.697098887386919, "grad_norm": 0.0005174805992282927, "learning_rate": 1.3210349536806138e-06, "loss": 0.0017688910011202097, "step": 1623 }, { "epoch": 2.698762607881876, "grad_norm": 0.00047087905113585293, "learning_rate": 1.3180077335269565e-06, "loss": 0.0015057477867230773, "step": 1624 }, { "epoch": 2.7004263283768326, "grad_norm": 0.0007247462053783238, "learning_rate": 1.3149827437693267e-06, "loss": 0.0026692950632423162, "step": 1625 }, { "epoch": 2.7020900488717894, "grad_norm": 0.0009361693519167602, "learning_rate": 1.3119599901158214e-06, "loss": 0.001273240428417921, "step": 1626 }, { "epoch": 2.703753769366746, "grad_norm": 0.0006839700508862734, "learning_rate": 1.3089394782703152e-06, "loss": 0.002276037121191621, "step": 1627 }, { "epoch": 2.7054174898617034, "grad_norm": 0.00048088282346725464, "learning_rate": 1.3059212139324548e-06, "loss": 0.0010588760487735271, "step": 1628 }, { "epoch": 2.70708121035666, "grad_norm": 0.0007159346132539213, "learning_rate": 1.3029052027976457e-06, "loss": 0.0015596949961036444, "step": 1629 }, { "epoch": 2.708744930851617, "grad_norm": 0.0007666469318792224, "learning_rate": 1.299891450557041e-06, "loss": 0.003373862709850073, "step": 1630 }, { "epoch": 2.7104086513465737, "grad_norm": 0.0009571753907948732, "learning_rate": 1.2968799628975311e-06, "loss": 0.0008542872965335846, "step": 1631 }, { "epoch": 2.7120723718415305, "grad_norm": 0.002131329383701086, "learning_rate": 1.2938707455017358e-06, "loss": 0.002327835885807872, "step": 1632 }, { "epoch": 2.7137360923364877, "grad_norm": 0.0014738694299012423, "learning_rate": 1.2908638040479855e-06, "loss": 0.0012567926896736026, "step": 1633 }, { "epoch": 2.7153998128314445, "grad_norm": 0.0009184948867186904, "learning_rate": 1.2878591442103215e-06, "loss": 0.0025412426330149174, "step": 1634 }, { "epoch": 2.7170635333264013, "grad_norm": 0.0003600319323595613, "learning_rate": 1.2848567716584764e-06, "loss": 0.0004601909895427525, "step": 1635 }, { "epoch": 2.718727253821358, "grad_norm": 0.0003036624111700803, "learning_rate": 1.2818566920578684e-06, "loss": 0.0005614445544779301, "step": 1636 }, { "epoch": 2.720390974316315, "grad_norm": 0.0017151448410004377, "learning_rate": 1.2788589110695896e-06, "loss": 0.0019237557426095009, "step": 1637 }, { "epoch": 2.7220546948112716, "grad_norm": 0.00035357868182472885, "learning_rate": 1.275863434350391e-06, "loss": 0.0011692875996232033, "step": 1638 }, { "epoch": 2.7237184153062284, "grad_norm": 0.0005569895729422569, "learning_rate": 1.2728702675526788e-06, "loss": 0.001136164297349751, "step": 1639 }, { "epoch": 2.725382135801185, "grad_norm": 0.0008662780746817589, "learning_rate": 1.2698794163244998e-06, "loss": 0.00196397234685719, "step": 1640 }, { "epoch": 2.7270458562961424, "grad_norm": 0.0009904677281156182, "learning_rate": 1.2668908863095311e-06, "loss": 0.00174479850102216, "step": 1641 }, { "epoch": 2.728709576791099, "grad_norm": 0.001673137187026441, "learning_rate": 1.2639046831470697e-06, "loss": 0.0008900794200599194, "step": 1642 }, { "epoch": 2.730373297286056, "grad_norm": 0.0008164556929841638, "learning_rate": 1.2609208124720228e-06, "loss": 0.0016282034339383245, "step": 1643 }, { "epoch": 2.7320370177810127, "grad_norm": 0.000763962569180876, "learning_rate": 1.2579392799148938e-06, "loss": 0.00045784079702571034, "step": 1644 }, { "epoch": 2.7337007382759695, "grad_norm": 0.0025477285962551832, "learning_rate": 1.2549600911017761e-06, "loss": 0.002230012556537986, "step": 1645 }, { "epoch": 2.7353644587709267, "grad_norm": 0.0005968649056740105, "learning_rate": 1.25198325165434e-06, "loss": 0.0012856629909947515, "step": 1646 }, { "epoch": 2.7370281792658835, "grad_norm": 0.0015948619693517685, "learning_rate": 1.2490087671898234e-06, "loss": 0.003677614266052842, "step": 1647 }, { "epoch": 2.7386918997608403, "grad_norm": 0.0007306319894269109, "learning_rate": 1.24603664332102e-06, "loss": 0.0014641217421740294, "step": 1648 }, { "epoch": 2.740355620255797, "grad_norm": 0.0003455659607425332, "learning_rate": 1.243066885656267e-06, "loss": 0.0004623204004019499, "step": 1649 }, { "epoch": 2.742019340750754, "grad_norm": 0.0006565743242390454, "learning_rate": 1.240099499799439e-06, "loss": 0.0017107607563957572, "step": 1650 }, { "epoch": 2.7436830612457106, "grad_norm": 0.0005443418631330132, "learning_rate": 1.237134491349935e-06, "loss": 0.001200504251755774, "step": 1651 }, { "epoch": 2.7453467817406674, "grad_norm": 0.000680884812027216, "learning_rate": 1.234171865902667e-06, "loss": 0.0012294517364352942, "step": 1652 }, { "epoch": 2.747010502235624, "grad_norm": 0.0007219392573460937, "learning_rate": 1.2312116290480506e-06, "loss": 0.001784635242074728, "step": 1653 }, { "epoch": 2.7486742227305814, "grad_norm": 0.001259760232642293, "learning_rate": 1.228253786371995e-06, "loss": 0.002943810075521469, "step": 1654 }, { "epoch": 2.750337943225538, "grad_norm": 0.0005466834991239011, "learning_rate": 1.2252983434558894e-06, "loss": 0.0013657533563673496, "step": 1655 }, { "epoch": 2.752001663720495, "grad_norm": 0.0007785367197357118, "learning_rate": 1.2223453058765966e-06, "loss": 0.0010540152434259653, "step": 1656 }, { "epoch": 2.7536653842154517, "grad_norm": 0.0008473260677419603, "learning_rate": 1.2193946792064403e-06, "loss": 0.0014812792651355267, "step": 1657 }, { "epoch": 2.755329104710409, "grad_norm": 0.0009911999804899096, "learning_rate": 1.2164464690131947e-06, "loss": 0.0032296220306307077, "step": 1658 }, { "epoch": 2.7569928252053657, "grad_norm": 0.0007574434275738895, "learning_rate": 1.2135006808600752e-06, "loss": 0.001332910731434822, "step": 1659 }, { "epoch": 2.7586565457003225, "grad_norm": 0.00160769815556705, "learning_rate": 1.2105573203057233e-06, "loss": 0.0020911500323563814, "step": 1660 }, { "epoch": 2.7603202661952793, "grad_norm": 0.000817932712379843, "learning_rate": 1.207616392904204e-06, "loss": 0.0013863914646208286, "step": 1661 }, { "epoch": 2.761983986690236, "grad_norm": 0.000839824031572789, "learning_rate": 1.2046779042049883e-06, "loss": 0.002464557997882366, "step": 1662 }, { "epoch": 2.763647707185193, "grad_norm": 0.0009035566472448409, "learning_rate": 1.2017418597529464e-06, "loss": 0.0007109399302862585, "step": 1663 }, { "epoch": 2.7653114276801496, "grad_norm": 0.0004544042458292097, "learning_rate": 1.1988082650883376e-06, "loss": 0.0015377543168142438, "step": 1664 }, { "epoch": 2.7669751481751064, "grad_norm": 0.0002455816720612347, "learning_rate": 1.1958771257467946e-06, "loss": 0.0011990536004304886, "step": 1665 }, { "epoch": 2.7686388686700636, "grad_norm": 0.00031565967947244644, "learning_rate": 1.1929484472593205e-06, "loss": 0.0004553858598228544, "step": 1666 }, { "epoch": 2.7703025891650204, "grad_norm": 0.0012521336320787668, "learning_rate": 1.190022235152274e-06, "loss": 0.0022942747455090284, "step": 1667 }, { "epoch": 2.771966309659977, "grad_norm": 0.0004633648495655507, "learning_rate": 1.1870984949473586e-06, "loss": 0.0014716461300849915, "step": 1668 }, { "epoch": 2.773630030154934, "grad_norm": 0.0006363751017488539, "learning_rate": 1.184177232161615e-06, "loss": 0.0014855836052447557, "step": 1669 }, { "epoch": 2.7752937506498907, "grad_norm": 0.0014003062387928367, "learning_rate": 1.1812584523074089e-06, "loss": 0.002873154589906335, "step": 1670 }, { "epoch": 2.776957471144848, "grad_norm": 0.0004970761365257204, "learning_rate": 1.1783421608924183e-06, "loss": 0.0014596062246710062, "step": 1671 }, { "epoch": 2.7786211916398047, "grad_norm": 0.0008416569326072931, "learning_rate": 1.1754283634196285e-06, "loss": 0.0005961301503702998, "step": 1672 }, { "epoch": 2.7802849121347615, "grad_norm": 0.000693576701451093, "learning_rate": 1.1725170653873174e-06, "loss": 0.0010480983182787895, "step": 1673 }, { "epoch": 2.7819486326297183, "grad_norm": 0.0005453251651488245, "learning_rate": 1.1696082722890474e-06, "loss": 0.0005093438667245209, "step": 1674 }, { "epoch": 2.783612353124675, "grad_norm": 0.001792008988559246, "learning_rate": 1.1667019896136539e-06, "loss": 0.004601791501045227, "step": 1675 }, { "epoch": 2.785276073619632, "grad_norm": 0.0014838258503004909, "learning_rate": 1.1637982228452329e-06, "loss": 0.002853119745850563, "step": 1676 }, { "epoch": 2.7869397941145886, "grad_norm": 0.0009499151492491364, "learning_rate": 1.1608969774631366e-06, "loss": 0.002817405853420496, "step": 1677 }, { "epoch": 2.7886035146095454, "grad_norm": 0.0015111935790628195, "learning_rate": 1.1579982589419568e-06, "loss": 0.0013510992284864187, "step": 1678 }, { "epoch": 2.7902672351045026, "grad_norm": 0.0015526446513831615, "learning_rate": 1.155102072751518e-06, "loss": 0.0025683874264359474, "step": 1679 }, { "epoch": 2.7919309555994594, "grad_norm": 0.0003567964013200253, "learning_rate": 1.152208424356867e-06, "loss": 0.0005328885745257139, "step": 1680 }, { "epoch": 2.793594676094416, "grad_norm": 0.0012884798925369978, "learning_rate": 1.1493173192182613e-06, "loss": 0.0021837963722646236, "step": 1681 }, { "epoch": 2.795258396589373, "grad_norm": 0.0006896915147081017, "learning_rate": 1.1464287627911577e-06, "loss": 0.0025080733466893435, "step": 1682 }, { "epoch": 2.7969221170843297, "grad_norm": 0.001546475337818265, "learning_rate": 1.1435427605262057e-06, "loss": 0.0012556775473058224, "step": 1683 }, { "epoch": 2.798585837579287, "grad_norm": 0.0012773580383509398, "learning_rate": 1.1406593178692346e-06, "loss": 0.0026327914092689753, "step": 1684 }, { "epoch": 2.8002495580742437, "grad_norm": 0.0011847744463011622, "learning_rate": 1.1377784402612439e-06, "loss": 0.002403012476861477, "step": 1685 }, { "epoch": 2.8019132785692005, "grad_norm": 0.0005747679970227182, "learning_rate": 1.1349001331383921e-06, "loss": 0.001767489593476057, "step": 1686 }, { "epoch": 2.8035769990641572, "grad_norm": 0.0015463011804968119, "learning_rate": 1.132024401931988e-06, "loss": 0.002241876907646656, "step": 1687 }, { "epoch": 2.805240719559114, "grad_norm": 0.0010779668809846044, "learning_rate": 1.12915125206848e-06, "loss": 0.0010287202894687653, "step": 1688 }, { "epoch": 2.806904440054071, "grad_norm": 0.0007248444017022848, "learning_rate": 1.1262806889694455e-06, "loss": 0.0018740869127213955, "step": 1689 }, { "epoch": 2.8085681605490276, "grad_norm": 0.000944844214245677, "learning_rate": 1.1234127180515787e-06, "loss": 0.0012290524318814278, "step": 1690 }, { "epoch": 2.8102318810439844, "grad_norm": 0.0008783553494140506, "learning_rate": 1.1205473447266843e-06, "loss": 0.0016621832037344575, "step": 1691 }, { "epoch": 2.8118956015389416, "grad_norm": 0.0011562372092157602, "learning_rate": 1.117684574401666e-06, "loss": 0.0015901480801403522, "step": 1692 }, { "epoch": 2.8135593220338984, "grad_norm": 0.0003519030869938433, "learning_rate": 1.1148244124785143e-06, "loss": 0.0010819558519870043, "step": 1693 }, { "epoch": 2.815223042528855, "grad_norm": 0.0007611211040057242, "learning_rate": 1.111966864354298e-06, "loss": 0.0018490708898752928, "step": 1694 }, { "epoch": 2.816886763023812, "grad_norm": 0.000434171553933993, "learning_rate": 1.1091119354211544e-06, "loss": 0.0010143747786059976, "step": 1695 }, { "epoch": 2.8185504835187687, "grad_norm": 0.0005083135329186916, "learning_rate": 1.1062596310662775e-06, "loss": 0.0012547963997349143, "step": 1696 }, { "epoch": 2.820214204013726, "grad_norm": 0.000701703829690814, "learning_rate": 1.1034099566719104e-06, "loss": 0.001071410602889955, "step": 1697 }, { "epoch": 2.8218779245086827, "grad_norm": 0.00045851178583689034, "learning_rate": 1.1005629176153302e-06, "loss": 0.0013043745420873165, "step": 1698 }, { "epoch": 2.8235416450036395, "grad_norm": 0.0009692832245491445, "learning_rate": 1.097718519268844e-06, "loss": 0.0008316121529787779, "step": 1699 }, { "epoch": 2.8252053654985962, "grad_norm": 0.0006598404725082219, "learning_rate": 1.0948767669997762e-06, "loss": 0.0016597459325566888, "step": 1700 }, { "epoch": 2.8252053654985962, "eval_loss": 0.0020048110745847225, "eval_runtime": 491.3675, "eval_samples_per_second": 9.785, "eval_steps_per_second": 2.446, "step": 1700 }, { "epoch": 2.826869085993553, "grad_norm": 0.001109412987716496, "learning_rate": 1.092037666170456e-06, "loss": 0.004444441292434931, "step": 1701 }, { "epoch": 2.82853280648851, "grad_norm": 0.0004603521665558219, "learning_rate": 1.0892012221382115e-06, "loss": 0.0005218712612986565, "step": 1702 }, { "epoch": 2.8301965269834666, "grad_norm": 0.0006472832174040377, "learning_rate": 1.0863674402553564e-06, "loss": 0.0018780764658004045, "step": 1703 }, { "epoch": 2.8318602474784234, "grad_norm": 0.00071156449848786, "learning_rate": 1.08353632586918e-06, "loss": 0.0007375496788881719, "step": 1704 }, { "epoch": 2.8335239679733806, "grad_norm": 0.000391888344893232, "learning_rate": 1.0807078843219395e-06, "loss": 0.0014355393359437585, "step": 1705 }, { "epoch": 2.8351876884683374, "grad_norm": 0.0008743875660002232, "learning_rate": 1.077882120950849e-06, "loss": 0.001603996381163597, "step": 1706 }, { "epoch": 2.836851408963294, "grad_norm": 0.0015048839850351214, "learning_rate": 1.0750590410880671e-06, "loss": 0.001089327735826373, "step": 1707 }, { "epoch": 2.838515129458251, "grad_norm": 0.0005386308184824884, "learning_rate": 1.072238650060691e-06, "loss": 0.0015150413382798433, "step": 1708 }, { "epoch": 2.840178849953208, "grad_norm": 0.0011673681437969208, "learning_rate": 1.0694209531907412e-06, "loss": 0.0028161397203803062, "step": 1709 }, { "epoch": 2.841842570448165, "grad_norm": 0.0003765317960642278, "learning_rate": 1.0666059557951566e-06, "loss": 0.0004952827002853155, "step": 1710 }, { "epoch": 2.8435062909431217, "grad_norm": 0.0005612285458482802, "learning_rate": 1.0637936631857815e-06, "loss": 0.001651897095143795, "step": 1711 }, { "epoch": 2.8451700114380785, "grad_norm": 0.0003087786608375609, "learning_rate": 1.0609840806693567e-06, "loss": 0.0002589740033727139, "step": 1712 }, { "epoch": 2.8468337319330352, "grad_norm": 0.00024726163246668875, "learning_rate": 1.0581772135475089e-06, "loss": 0.0003322141710668802, "step": 1713 }, { "epoch": 2.848497452427992, "grad_norm": 0.0005495923687703907, "learning_rate": 1.0553730671167412e-06, "loss": 0.000860578496940434, "step": 1714 }, { "epoch": 2.850161172922949, "grad_norm": 0.0003994822036474943, "learning_rate": 1.052571646668421e-06, "loss": 0.00059411337133497, "step": 1715 }, { "epoch": 2.8518248934179056, "grad_norm": 0.0005885502323508263, "learning_rate": 1.0497729574887744e-06, "loss": 0.0014929859898984432, "step": 1716 }, { "epoch": 2.853488613912863, "grad_norm": 0.000602265412453562, "learning_rate": 1.0469770048588723e-06, "loss": 0.0020361102651804686, "step": 1717 }, { "epoch": 2.8551523344078196, "grad_norm": 0.000675926567055285, "learning_rate": 1.0441837940546217e-06, "loss": 0.002011209260672331, "step": 1718 }, { "epoch": 2.8568160549027763, "grad_norm": 0.0006539134192280471, "learning_rate": 1.0413933303467578e-06, "loss": 0.001942803617566824, "step": 1719 }, { "epoch": 2.858479775397733, "grad_norm": 0.0003630979626905173, "learning_rate": 1.038605619000828e-06, "loss": 0.0002884640416596085, "step": 1720 }, { "epoch": 2.86014349589269, "grad_norm": 0.00048402079846709967, "learning_rate": 1.0358206652771896e-06, "loss": 0.0006971699185669422, "step": 1721 }, { "epoch": 2.861807216387647, "grad_norm": 0.000931443297304213, "learning_rate": 1.033038474430995e-06, "loss": 0.0030923194717615843, "step": 1722 }, { "epoch": 2.863470936882604, "grad_norm": 0.0006461223820224404, "learning_rate": 1.0302590517121835e-06, "loss": 0.0008790866122581065, "step": 1723 }, { "epoch": 2.8651346573775607, "grad_norm": 0.0007613158086314797, "learning_rate": 1.0274824023654717e-06, "loss": 0.001279466669075191, "step": 1724 }, { "epoch": 2.8667983778725175, "grad_norm": 0.010180974379181862, "learning_rate": 1.0247085316303401e-06, "loss": 0.007522291969507933, "step": 1725 }, { "epoch": 2.8684620983674742, "grad_norm": 0.00044024086673744023, "learning_rate": 1.0219374447410289e-06, "loss": 0.0016961991786956787, "step": 1726 }, { "epoch": 2.870125818862431, "grad_norm": 0.00041707613854669034, "learning_rate": 1.019169146926524e-06, "loss": 0.0007171843899413943, "step": 1727 }, { "epoch": 2.871789539357388, "grad_norm": 0.0010140544036403298, "learning_rate": 1.016403643410549e-06, "loss": 0.002007360104471445, "step": 1728 }, { "epoch": 2.8734532598523446, "grad_norm": 0.0014105895534157753, "learning_rate": 1.013640939411554e-06, "loss": 0.0018892575753852725, "step": 1729 }, { "epoch": 2.875116980347302, "grad_norm": 0.0012860961724072695, "learning_rate": 1.010881040142708e-06, "loss": 0.0023204863537102938, "step": 1730 }, { "epoch": 2.8767807008422586, "grad_norm": 0.0006491722888313234, "learning_rate": 1.0081239508118842e-06, "loss": 0.0016327498015016317, "step": 1731 }, { "epoch": 2.8784444213372153, "grad_norm": 0.0014453501207754016, "learning_rate": 1.0053696766216566e-06, "loss": 0.002087503205984831, "step": 1732 }, { "epoch": 2.880108141832172, "grad_norm": 0.00037019955925643444, "learning_rate": 1.0026182227692865e-06, "loss": 0.0004213819629512727, "step": 1733 }, { "epoch": 2.881771862327129, "grad_norm": 0.0007157890941016376, "learning_rate": 9.998695944467127e-07, "loss": 0.0005501360283233225, "step": 1734 }, { "epoch": 2.883435582822086, "grad_norm": 0.000919701240491122, "learning_rate": 9.97123796840543e-07, "loss": 0.0014758924953639507, "step": 1735 }, { "epoch": 2.885099303317043, "grad_norm": 0.000745898752938956, "learning_rate": 9.943808351320418e-07, "loss": 0.0012118774466216564, "step": 1736 }, { "epoch": 2.8867630238119997, "grad_norm": 0.0008136593387462199, "learning_rate": 9.916407144971245e-07, "loss": 0.0015056411502882838, "step": 1737 }, { "epoch": 2.8884267443069565, "grad_norm": 0.0004307371564209461, "learning_rate": 9.889034401063443e-07, "loss": 0.0003619653289206326, "step": 1738 }, { "epoch": 2.8900904648019132, "grad_norm": 0.0006141100311651826, "learning_rate": 9.861690171248841e-07, "loss": 0.001559469848871231, "step": 1739 }, { "epoch": 2.89175418529687, "grad_norm": 0.0005701344343833625, "learning_rate": 9.834374507125458e-07, "loss": 0.0010183728300035, "step": 1740 }, { "epoch": 2.893417905791827, "grad_norm": 0.0004537877975963056, "learning_rate": 9.807087460237419e-07, "loss": 0.000840195338241756, "step": 1741 }, { "epoch": 2.8950816262867836, "grad_norm": 0.00036333585740067065, "learning_rate": 9.779829082074827e-07, "loss": 0.0005079791881144047, "step": 1742 }, { "epoch": 2.896745346781741, "grad_norm": 0.0007203708519227803, "learning_rate": 9.752599424073707e-07, "loss": 0.0018293685279786587, "step": 1743 }, { "epoch": 2.8984090672766976, "grad_norm": 0.0016104558017104864, "learning_rate": 9.725398537615894e-07, "loss": 0.0006681864615529776, "step": 1744 }, { "epoch": 2.9000727877716543, "grad_norm": 0.00028292901697568595, "learning_rate": 9.698226474028913e-07, "loss": 0.0003127449890598655, "step": 1745 }, { "epoch": 2.901736508266611, "grad_norm": 0.0007463365909643471, "learning_rate": 9.671083284585925e-07, "loss": 0.0006199987255968153, "step": 1746 }, { "epoch": 2.903400228761568, "grad_norm": 0.0015855298843234777, "learning_rate": 9.643969020505573e-07, "loss": 0.0009128288365900517, "step": 1747 }, { "epoch": 2.905063949256525, "grad_norm": 0.0008573647937737405, "learning_rate": 9.616883732951945e-07, "loss": 0.002327641937881708, "step": 1748 }, { "epoch": 2.906727669751482, "grad_norm": 0.0010641596745699644, "learning_rate": 9.589827473034443e-07, "loss": 0.0026898360811173916, "step": 1749 }, { "epoch": 2.9083913902464387, "grad_norm": 0.00045034498907625675, "learning_rate": 9.562800291807695e-07, "loss": 0.000989622320048511, "step": 1750 }, { "epoch": 2.9100551107413954, "grad_norm": 0.00027441600104793906, "learning_rate": 9.535802240271455e-07, "loss": 0.0004929627757519484, "step": 1751 }, { "epoch": 2.9117188312363522, "grad_norm": 0.000503926130477339, "learning_rate": 9.508833369370524e-07, "loss": 0.0007410001708194613, "step": 1752 }, { "epoch": 2.913382551731309, "grad_norm": 0.00037136339233256876, "learning_rate": 9.481893729994609e-07, "loss": 0.0009830060880631208, "step": 1753 }, { "epoch": 2.9150462722262658, "grad_norm": 0.0011238381266593933, "learning_rate": 9.454983372978288e-07, "loss": 0.0027857953682541847, "step": 1754 }, { "epoch": 2.916709992721223, "grad_norm": 0.0020219890866428614, "learning_rate": 9.428102349100868e-07, "loss": 0.002705494174733758, "step": 1755 }, { "epoch": 2.9183737132161798, "grad_norm": 0.0005653185653500259, "learning_rate": 9.40125070908631e-07, "loss": 0.001744154840707779, "step": 1756 }, { "epoch": 2.9200374337111366, "grad_norm": 0.0004890793352387846, "learning_rate": 9.374428503603139e-07, "loss": 0.0008477336959913373, "step": 1757 }, { "epoch": 2.9217011542060933, "grad_norm": 0.0013079852797091007, "learning_rate": 9.347635783264309e-07, "loss": 0.004897422157227993, "step": 1758 }, { "epoch": 2.92336487470105, "grad_norm": 0.000551872537471354, "learning_rate": 9.32087259862716e-07, "loss": 0.0006481015589088202, "step": 1759 }, { "epoch": 2.9250285951960073, "grad_norm": 0.0008821063674986362, "learning_rate": 9.294139000193292e-07, "loss": 0.0007773377583362162, "step": 1760 }, { "epoch": 2.926692315690964, "grad_norm": 0.0018709349678829312, "learning_rate": 9.267435038408479e-07, "loss": 0.002509012818336487, "step": 1761 }, { "epoch": 2.928356036185921, "grad_norm": 0.0023014312610030174, "learning_rate": 9.240760763662562e-07, "loss": 0.0031186698470264673, "step": 1762 }, { "epoch": 2.9300197566808777, "grad_norm": 0.0033249291591346264, "learning_rate": 9.214116226289388e-07, "loss": 0.004345408175140619, "step": 1763 }, { "epoch": 2.9316834771758344, "grad_norm": 0.0006871186196804047, "learning_rate": 9.187501476566648e-07, "loss": 0.0013219594256952405, "step": 1764 }, { "epoch": 2.933347197670791, "grad_norm": 0.0013599384110420942, "learning_rate": 9.16091656471586e-07, "loss": 0.0026010123547166586, "step": 1765 }, { "epoch": 2.935010918165748, "grad_norm": 0.001112336409278214, "learning_rate": 9.134361540902225e-07, "loss": 0.001636276487261057, "step": 1766 }, { "epoch": 2.9366746386607048, "grad_norm": 0.00034071304253302515, "learning_rate": 9.10783645523455e-07, "loss": 0.0011249319650232792, "step": 1767 }, { "epoch": 2.938338359155662, "grad_norm": 0.0002553804370108992, "learning_rate": 9.081341357765145e-07, "loss": 0.0011264397762715816, "step": 1768 }, { "epoch": 2.9400020796506188, "grad_norm": 0.0016837348230183125, "learning_rate": 9.054876298489742e-07, "loss": 0.0035460232757031918, "step": 1769 }, { "epoch": 2.9416658001455755, "grad_norm": 0.0009260933729819953, "learning_rate": 9.02844132734737e-07, "loss": 0.0019019779283553362, "step": 1770 }, { "epoch": 2.9433295206405323, "grad_norm": 0.0003123321512248367, "learning_rate": 9.002036494220306e-07, "loss": 0.00030813238117843866, "step": 1771 }, { "epoch": 2.944993241135489, "grad_norm": 0.0014501853147521615, "learning_rate": 8.975661848933945e-07, "loss": 0.0015067444182932377, "step": 1772 }, { "epoch": 2.9466569616304463, "grad_norm": 0.0007959870272316039, "learning_rate": 8.949317441256724e-07, "loss": 0.0015829142648726702, "step": 1773 }, { "epoch": 2.948320682125403, "grad_norm": 0.0006247299024835229, "learning_rate": 8.923003320900014e-07, "loss": 0.0010385081404820085, "step": 1774 }, { "epoch": 2.94998440262036, "grad_norm": 0.0006999396136961877, "learning_rate": 8.896719537518048e-07, "loss": 0.0003225843538530171, "step": 1775 }, { "epoch": 2.9516481231153167, "grad_norm": 0.001017800415866077, "learning_rate": 8.870466140707795e-07, "loss": 0.0015760234091430902, "step": 1776 }, { "epoch": 2.9533118436102734, "grad_norm": 0.0008578760316595435, "learning_rate": 8.844243180008913e-07, "loss": 0.00260445149615407, "step": 1777 }, { "epoch": 2.95497556410523, "grad_norm": 0.0011144928866997361, "learning_rate": 8.818050704903589e-07, "loss": 0.0008055091602727771, "step": 1778 }, { "epoch": 2.956639284600187, "grad_norm": 0.0011398998321965337, "learning_rate": 8.791888764816514e-07, "loss": 0.0029183195438236, "step": 1779 }, { "epoch": 2.9583030050951438, "grad_norm": 0.0003928740625269711, "learning_rate": 8.765757409114753e-07, "loss": 0.0009769850876182318, "step": 1780 }, { "epoch": 2.959966725590101, "grad_norm": 0.0003030920051969588, "learning_rate": 8.739656687107656e-07, "loss": 0.00029227673076093197, "step": 1781 }, { "epoch": 2.9616304460850578, "grad_norm": 0.00041401237831451, "learning_rate": 8.713586648046768e-07, "loss": 0.001756420824676752, "step": 1782 }, { "epoch": 2.9632941665800145, "grad_norm": 0.0010973252356052399, "learning_rate": 8.68754734112574e-07, "loss": 0.004302047658711672, "step": 1783 }, { "epoch": 2.9649578870749713, "grad_norm": 0.0006523855263367295, "learning_rate": 8.661538815480228e-07, "loss": 0.0008866323623806238, "step": 1784 }, { "epoch": 2.966621607569928, "grad_norm": 0.0012584852520376444, "learning_rate": 8.635561120187813e-07, "loss": 0.001941533526405692, "step": 1785 }, { "epoch": 2.9682853280648853, "grad_norm": 0.0008230031817220151, "learning_rate": 8.609614304267877e-07, "loss": 0.002135834889486432, "step": 1786 }, { "epoch": 2.969949048559842, "grad_norm": 0.0012836785754188895, "learning_rate": 8.583698416681555e-07, "loss": 0.0027004701551049948, "step": 1787 }, { "epoch": 2.971612769054799, "grad_norm": 0.00196822895668447, "learning_rate": 8.557813506331616e-07, "loss": 0.0021796643268316984, "step": 1788 }, { "epoch": 2.9732764895497557, "grad_norm": 0.00046909775119274855, "learning_rate": 8.531959622062372e-07, "loss": 0.0008232493419200182, "step": 1789 }, { "epoch": 2.9749402100447124, "grad_norm": 0.0011132569052278996, "learning_rate": 8.506136812659601e-07, "loss": 0.001676748739555478, "step": 1790 }, { "epoch": 2.976603930539669, "grad_norm": 0.001434841426089406, "learning_rate": 8.480345126850414e-07, "loss": 0.0022043860517442226, "step": 1791 }, { "epoch": 2.978267651034626, "grad_norm": 0.0004792559484485537, "learning_rate": 8.454584613303227e-07, "loss": 0.0009898152202367783, "step": 1792 }, { "epoch": 2.9799313715295828, "grad_norm": 0.0005377150373533368, "learning_rate": 8.428855320627613e-07, "loss": 0.0008193760877475142, "step": 1793 }, { "epoch": 2.98159509202454, "grad_norm": 0.00031275334185920656, "learning_rate": 8.403157297374239e-07, "loss": 0.0009223963716067374, "step": 1794 }, { "epoch": 2.9832588125194968, "grad_norm": 0.0010333925019949675, "learning_rate": 8.377490592034779e-07, "loss": 0.0013084551319479942, "step": 1795 }, { "epoch": 2.9849225330144535, "grad_norm": 0.0006100983009673655, "learning_rate": 8.35185525304178e-07, "loss": 0.0019747803453356028, "step": 1796 }, { "epoch": 2.9865862535094103, "grad_norm": 0.000807909993454814, "learning_rate": 8.326251328768626e-07, "loss": 0.0023377123288810253, "step": 1797 }, { "epoch": 2.9882499740043675, "grad_norm": 0.0010610126191750169, "learning_rate": 8.300678867529415e-07, "loss": 0.001045091892592609, "step": 1798 }, { "epoch": 2.9899136944993243, "grad_norm": 0.0007304865284822881, "learning_rate": 8.275137917578879e-07, "loss": 0.0006219320348463953, "step": 1799 }, { "epoch": 2.991577414994281, "grad_norm": 0.0004164436541032046, "learning_rate": 8.249628527112282e-07, "loss": 0.0018955947598442435, "step": 1800 }, { "epoch": 2.991577414994281, "eval_loss": 0.0018649041885510087, "eval_runtime": 490.6057, "eval_samples_per_second": 9.8, "eval_steps_per_second": 2.45, "step": 1800 }, { "epoch": 2.993241135489238, "grad_norm": 0.000529412820469588, "learning_rate": 8.224150744265352e-07, "loss": 0.0011494626523926854, "step": 1801 }, { "epoch": 2.9949048559841946, "grad_norm": 0.0014844761462882161, "learning_rate": 8.198704617114143e-07, "loss": 0.001377538195811212, "step": 1802 }, { "epoch": 2.9965685764791514, "grad_norm": 0.0004270290955901146, "learning_rate": 8.173290193674996e-07, "loss": 0.0015947625506669283, "step": 1803 }, { "epoch": 2.998232296974108, "grad_norm": 0.00046261935494840145, "learning_rate": 8.147907521904433e-07, "loss": 0.0006508635706268251, "step": 1804 }, { "epoch": 2.999896017469065, "grad_norm": 0.0010813989210873842, "learning_rate": 8.122556649699051e-07, "loss": 0.0012900270521640778, "step": 1805 }, { "epoch": 3.0, "grad_norm": 0.0009801322594285011, "learning_rate": 8.097237624895452e-07, "loss": 0.0008373880991712213, "step": 1806 }, { "epoch": 3.0016637204949568, "grad_norm": 0.0006377885001711547, "learning_rate": 8.07195049527012e-07, "loss": 0.0012004249729216099, "step": 1807 }, { "epoch": 3.0033274409899136, "grad_norm": 0.0005789811257272959, "learning_rate": 8.046695308539376e-07, "loss": 0.0004920345381833613, "step": 1808 }, { "epoch": 3.0049911614848703, "grad_norm": 0.0002769168349914253, "learning_rate": 8.021472112359255e-07, "loss": 0.0006314446218311787, "step": 1809 }, { "epoch": 3.0066548819798276, "grad_norm": 0.00032566802110522985, "learning_rate": 7.996280954325433e-07, "loss": 0.0009013297967612743, "step": 1810 }, { "epoch": 3.0083186024747843, "grad_norm": 0.0004201479896437377, "learning_rate": 7.971121881973126e-07, "loss": 0.0005803474923595786, "step": 1811 }, { "epoch": 3.009982322969741, "grad_norm": 0.0005800558137707412, "learning_rate": 7.945994942777016e-07, "loss": 0.0009451241930946708, "step": 1812 }, { "epoch": 3.011646043464698, "grad_norm": 0.000676999450661242, "learning_rate": 7.92090018415112e-07, "loss": 0.0016937021864578128, "step": 1813 }, { "epoch": 3.0133097639596547, "grad_norm": 0.0013515023747459054, "learning_rate": 7.895837653448759e-07, "loss": 0.0018922430463135242, "step": 1814 }, { "epoch": 3.0149734844546114, "grad_norm": 0.0002872826880775392, "learning_rate": 7.870807397962438e-07, "loss": 0.0008894064230844378, "step": 1815 }, { "epoch": 3.0166372049495687, "grad_norm": 0.0007774510886520147, "learning_rate": 7.845809464923748e-07, "loss": 0.0020803227089345455, "step": 1816 }, { "epoch": 3.0183009254445254, "grad_norm": 0.0004965619882568717, "learning_rate": 7.820843901503308e-07, "loss": 0.001645309617742896, "step": 1817 }, { "epoch": 3.019964645939482, "grad_norm": 0.00048233833513222635, "learning_rate": 7.79591075481062e-07, "loss": 0.002242596121504903, "step": 1818 }, { "epoch": 3.021628366434439, "grad_norm": 0.0013223245041444898, "learning_rate": 7.771010071894052e-07, "loss": 0.0007967266719788313, "step": 1819 }, { "epoch": 3.0232920869293958, "grad_norm": 0.00044782142504118383, "learning_rate": 7.7461418997407e-07, "loss": 0.0008925136062316597, "step": 1820 }, { "epoch": 3.0249558074243525, "grad_norm": 0.0006108682719059289, "learning_rate": 7.721306285276309e-07, "loss": 0.000799420871771872, "step": 1821 }, { "epoch": 3.0266195279193098, "grad_norm": 0.0005464836140163243, "learning_rate": 7.696503275365194e-07, "loss": 0.0025266679003834724, "step": 1822 }, { "epoch": 3.0282832484142665, "grad_norm": 0.00018666086543817073, "learning_rate": 7.671732916810154e-07, "loss": 0.00021197514433879405, "step": 1823 }, { "epoch": 3.0299469689092233, "grad_norm": 0.00032212541555054486, "learning_rate": 7.646995256352346e-07, "loss": 0.001422622473910451, "step": 1824 }, { "epoch": 3.03161068940418, "grad_norm": 0.0002873683115467429, "learning_rate": 7.622290340671256e-07, "loss": 0.0008930754847824574, "step": 1825 }, { "epoch": 3.033274409899137, "grad_norm": 0.0004030826676171273, "learning_rate": 7.597618216384576e-07, "loss": 0.0012631858699023724, "step": 1826 }, { "epoch": 3.0349381303940937, "grad_norm": 0.0005824784748256207, "learning_rate": 7.572978930048108e-07, "loss": 0.000932716007810086, "step": 1827 }, { "epoch": 3.0366018508890504, "grad_norm": 0.0015588699607178569, "learning_rate": 7.54837252815571e-07, "loss": 0.0011526403250172734, "step": 1828 }, { "epoch": 3.0382655713840077, "grad_norm": 0.0006636729813180864, "learning_rate": 7.523799057139158e-07, "loss": 0.0007361185271292925, "step": 1829 }, { "epoch": 3.0399292918789644, "grad_norm": 0.0008180178119800985, "learning_rate": 7.49925856336812e-07, "loss": 0.0009815450757741928, "step": 1830 }, { "epoch": 3.041593012373921, "grad_norm": 0.0004374012351036072, "learning_rate": 7.474751093150015e-07, "loss": 0.0007682805880904198, "step": 1831 }, { "epoch": 3.043256732868878, "grad_norm": 0.00045620554010383785, "learning_rate": 7.450276692729957e-07, "loss": 0.00044830897240899503, "step": 1832 }, { "epoch": 3.0449204533638348, "grad_norm": 0.00020274176495149732, "learning_rate": 7.425835408290655e-07, "loss": 0.0008629217045381665, "step": 1833 }, { "epoch": 3.0465841738587915, "grad_norm": 0.0003887891652993858, "learning_rate": 7.40142728595234e-07, "loss": 0.0013906953390687704, "step": 1834 }, { "epoch": 3.0482478943537488, "grad_norm": 0.0005733828875236213, "learning_rate": 7.377052371772637e-07, "loss": 0.0016839816235005856, "step": 1835 }, { "epoch": 3.0499116148487055, "grad_norm": 0.0010170930763706565, "learning_rate": 7.352710711746536e-07, "loss": 0.0007546496344730258, "step": 1836 }, { "epoch": 3.0515753353436623, "grad_norm": 0.0003123862552456558, "learning_rate": 7.328402351806269e-07, "loss": 0.0006829963531345129, "step": 1837 }, { "epoch": 3.053239055838619, "grad_norm": 0.0003148913965560496, "learning_rate": 7.304127337821229e-07, "loss": 0.000566673232242465, "step": 1838 }, { "epoch": 3.054902776333576, "grad_norm": 0.00047699068090878427, "learning_rate": 7.279885715597896e-07, "loss": 0.0013908733380958438, "step": 1839 }, { "epoch": 3.0565664968285327, "grad_norm": 0.0006750078173354268, "learning_rate": 7.255677530879713e-07, "loss": 0.0002647516957949847, "step": 1840 }, { "epoch": 3.05823021732349, "grad_norm": 0.0004247261967975646, "learning_rate": 7.231502829347056e-07, "loss": 0.001544360420666635, "step": 1841 }, { "epoch": 3.0598939378184467, "grad_norm": 0.00041044820682145655, "learning_rate": 7.207361656617112e-07, "loss": 0.0008406200795434415, "step": 1842 }, { "epoch": 3.0615576583134034, "grad_norm": 0.0009824121370911598, "learning_rate": 7.183254058243791e-07, "loss": 0.0012988644884899259, "step": 1843 }, { "epoch": 3.06322137880836, "grad_norm": 0.0019123097881674767, "learning_rate": 7.159180079717656e-07, "loss": 0.0016793514369055629, "step": 1844 }, { "epoch": 3.064885099303317, "grad_norm": 0.000979103846475482, "learning_rate": 7.135139766465838e-07, "loss": 0.001676307525485754, "step": 1845 }, { "epoch": 3.0665488197982738, "grad_norm": 0.0005059174727648497, "learning_rate": 7.111133163851916e-07, "loss": 0.001257180469110608, "step": 1846 }, { "epoch": 3.0682125402932305, "grad_norm": 0.00040546999662183225, "learning_rate": 7.087160317175881e-07, "loss": 0.0006301425164565444, "step": 1847 }, { "epoch": 3.0698762607881878, "grad_norm": 0.0005103031289763749, "learning_rate": 7.06322127167402e-07, "loss": 0.000969672342762351, "step": 1848 }, { "epoch": 3.0715399812831445, "grad_norm": 0.0009169050026685, "learning_rate": 7.03931607251884e-07, "loss": 0.000986608793027699, "step": 1849 }, { "epoch": 3.0732037017781013, "grad_norm": 0.0005506612360477448, "learning_rate": 7.015444764818988e-07, "loss": 0.0008717355667613447, "step": 1850 }, { "epoch": 3.074867422273058, "grad_norm": 0.0004976927884854376, "learning_rate": 6.991607393619129e-07, "loss": 0.000616822624579072, "step": 1851 }, { "epoch": 3.076531142768015, "grad_norm": 0.00012596679152920842, "learning_rate": 6.967804003899925e-07, "loss": 0.00013015244621783495, "step": 1852 }, { "epoch": 3.0781948632629716, "grad_norm": 0.0006843673181720078, "learning_rate": 6.944034640577896e-07, "loss": 0.001166867557913065, "step": 1853 }, { "epoch": 3.079858583757929, "grad_norm": 0.0006668114219792187, "learning_rate": 6.920299348505365e-07, "loss": 0.0013408659724518657, "step": 1854 }, { "epoch": 3.0815223042528856, "grad_norm": 0.0005010065506212413, "learning_rate": 6.896598172470356e-07, "loss": 0.0011945900041610003, "step": 1855 }, { "epoch": 3.0831860247478424, "grad_norm": 0.00048501609126105905, "learning_rate": 6.872931157196519e-07, "loss": 0.0008071433403529227, "step": 1856 }, { "epoch": 3.084849745242799, "grad_norm": 0.00019211566541343927, "learning_rate": 6.849298347343044e-07, "loss": 0.00031206224230118096, "step": 1857 }, { "epoch": 3.086513465737756, "grad_norm": 0.0005477959057316184, "learning_rate": 6.825699787504586e-07, "loss": 0.0014917771331965923, "step": 1858 }, { "epoch": 3.0881771862327128, "grad_norm": 0.0004196336667519063, "learning_rate": 6.802135522211142e-07, "loss": 0.0007323448662646115, "step": 1859 }, { "epoch": 3.0898409067276695, "grad_norm": 0.0008467534207738936, "learning_rate": 6.778605595928025e-07, "loss": 0.0009000095305964351, "step": 1860 }, { "epoch": 3.0915046272226268, "grad_norm": 0.0008993221563287079, "learning_rate": 6.755110053055738e-07, "loss": 0.0020232279784977436, "step": 1861 }, { "epoch": 3.0931683477175835, "grad_norm": 0.0012050456134602427, "learning_rate": 6.731648937929911e-07, "loss": 0.002171237487345934, "step": 1862 }, { "epoch": 3.0948320682125403, "grad_norm": 0.000659894838463515, "learning_rate": 6.708222294821196e-07, "loss": 0.0014221647288650274, "step": 1863 }, { "epoch": 3.096495788707497, "grad_norm": 0.0002312004507984966, "learning_rate": 6.684830167935207e-07, "loss": 0.0005635049310512841, "step": 1864 }, { "epoch": 3.098159509202454, "grad_norm": 0.0006758279632776976, "learning_rate": 6.66147260141243e-07, "loss": 0.0010983545798808336, "step": 1865 }, { "epoch": 3.0998232296974106, "grad_norm": 0.0014547808095812798, "learning_rate": 6.638149639328134e-07, "loss": 0.0019277246901765466, "step": 1866 }, { "epoch": 3.101486950192368, "grad_norm": 0.0007520727231167257, "learning_rate": 6.614861325692277e-07, "loss": 0.0018726043635979295, "step": 1867 }, { "epoch": 3.1031506706873246, "grad_norm": 0.0008481996483169496, "learning_rate": 6.591607704449446e-07, "loss": 0.0021393303759396076, "step": 1868 }, { "epoch": 3.1048143911822814, "grad_norm": 0.0004936470650136471, "learning_rate": 6.568388819478769e-07, "loss": 0.0007193313795141876, "step": 1869 }, { "epoch": 3.106478111677238, "grad_norm": 0.0017772550927475095, "learning_rate": 6.545204714593825e-07, "loss": 0.0028288543689996004, "step": 1870 }, { "epoch": 3.108141832172195, "grad_norm": 0.0007597859366796911, "learning_rate": 6.522055433542557e-07, "loss": 0.002685034414753318, "step": 1871 }, { "epoch": 3.1098055526671518, "grad_norm": 0.0007303763413801789, "learning_rate": 6.49894102000721e-07, "loss": 0.0016073291189968586, "step": 1872 }, { "epoch": 3.111469273162109, "grad_norm": 0.0007561154197901487, "learning_rate": 6.47586151760421e-07, "loss": 0.0009440925787203014, "step": 1873 }, { "epoch": 3.1131329936570658, "grad_norm": 0.0003546578227542341, "learning_rate": 6.452816969884127e-07, "loss": 0.0004244074516464025, "step": 1874 }, { "epoch": 3.1147967141520225, "grad_norm": 0.00033709831768646836, "learning_rate": 6.429807420331568e-07, "loss": 0.00048436602810397744, "step": 1875 }, { "epoch": 3.1164604346469793, "grad_norm": 0.0008623041212558746, "learning_rate": 6.406832912365101e-07, "loss": 0.0015984228812158108, "step": 1876 }, { "epoch": 3.118124155141936, "grad_norm": 0.000693855807185173, "learning_rate": 6.383893489337172e-07, "loss": 0.0021479884162545204, "step": 1877 }, { "epoch": 3.119787875636893, "grad_norm": 0.0004894212470389903, "learning_rate": 6.360989194534004e-07, "loss": 0.0007753630052320659, "step": 1878 }, { "epoch": 3.1214515961318496, "grad_norm": 0.0005258218152448535, "learning_rate": 6.338120071175558e-07, "loss": 0.0011958642862737179, "step": 1879 }, { "epoch": 3.123115316626807, "grad_norm": 0.00035203678999096155, "learning_rate": 6.315286162415412e-07, "loss": 0.0011395043693482876, "step": 1880 }, { "epoch": 3.1247790371217636, "grad_norm": 0.0011336497263982892, "learning_rate": 6.292487511340709e-07, "loss": 0.0007257706602104008, "step": 1881 }, { "epoch": 3.1264427576167204, "grad_norm": 0.0004972171154804528, "learning_rate": 6.269724160972043e-07, "loss": 0.0012949982192367315, "step": 1882 }, { "epoch": 3.128106478111677, "grad_norm": 0.00041867265827022493, "learning_rate": 6.246996154263421e-07, "loss": 0.0004262304864823818, "step": 1883 }, { "epoch": 3.129770198606634, "grad_norm": 0.0005103400908410549, "learning_rate": 6.224303534102125e-07, "loss": 0.0006815506494604051, "step": 1884 }, { "epoch": 3.1314339191015907, "grad_norm": 0.0010413973359391093, "learning_rate": 6.201646343308685e-07, "loss": 0.0015371940098702908, "step": 1885 }, { "epoch": 3.133097639596548, "grad_norm": 0.0015511265955865383, "learning_rate": 6.179024624636772e-07, "loss": 0.0027551804669201374, "step": 1886 }, { "epoch": 3.1347613600915047, "grad_norm": 0.000395275535993278, "learning_rate": 6.156438420773125e-07, "loss": 0.0005117086693644524, "step": 1887 }, { "epoch": 3.1364250805864615, "grad_norm": 0.0002733297587838024, "learning_rate": 6.133887774337471e-07, "loss": 0.0005519022815860808, "step": 1888 }, { "epoch": 3.1380888010814183, "grad_norm": 0.0009390793857164681, "learning_rate": 6.111372727882417e-07, "loss": 0.0010190055472776294, "step": 1889 }, { "epoch": 3.139752521576375, "grad_norm": 0.0008794846362434328, "learning_rate": 6.088893323893419e-07, "loss": 0.0028912704437971115, "step": 1890 }, { "epoch": 3.141416242071332, "grad_norm": 0.00033567583886906505, "learning_rate": 6.066449604788666e-07, "loss": 0.0005888398736715317, "step": 1891 }, { "epoch": 3.143079962566289, "grad_norm": 0.0005849747103638947, "learning_rate": 6.044041612919016e-07, "loss": 0.00027158469310961664, "step": 1892 }, { "epoch": 3.144743683061246, "grad_norm": 0.0003590272390283644, "learning_rate": 6.021669390567902e-07, "loss": 0.0011505287839099765, "step": 1893 }, { "epoch": 3.1464074035562026, "grad_norm": 0.00037003972101956606, "learning_rate": 5.999332979951272e-07, "loss": 0.001266230596229434, "step": 1894 }, { "epoch": 3.1480711240511594, "grad_norm": 0.0006911141099408269, "learning_rate": 5.977032423217482e-07, "loss": 0.0023922715336084366, "step": 1895 }, { "epoch": 3.149734844546116, "grad_norm": 0.00020799963385798037, "learning_rate": 5.954767762447244e-07, "loss": 0.0005746656679548323, "step": 1896 }, { "epoch": 3.151398565041073, "grad_norm": 0.0002363747189519927, "learning_rate": 5.932539039653535e-07, "loss": 0.0006022070301696658, "step": 1897 }, { "epoch": 3.1530622855360297, "grad_norm": 0.000486257195007056, "learning_rate": 5.910346296781511e-07, "loss": 0.0011896261712536216, "step": 1898 }, { "epoch": 3.154726006030987, "grad_norm": 0.0006207071710377932, "learning_rate": 5.888189575708453e-07, "loss": 0.0014273447450250387, "step": 1899 }, { "epoch": 3.1563897265259437, "grad_norm": 0.0006188419065438211, "learning_rate": 5.866068918243634e-07, "loss": 0.0006204223609529436, "step": 1900 }, { "epoch": 3.1563897265259437, "eval_loss": 0.0018834135262295604, "eval_runtime": 491.4511, "eval_samples_per_second": 9.783, "eval_steps_per_second": 2.446, "step": 1900 }, { "epoch": 3.1580534470209005, "grad_norm": 0.0005833645700477064, "learning_rate": 5.843984366128308e-07, "loss": 0.0017470639431849122, "step": 1901 }, { "epoch": 3.1597171675158573, "grad_norm": 0.0005380320362746716, "learning_rate": 5.821935961035589e-07, "loss": 0.0008071457268670201, "step": 1902 }, { "epoch": 3.161380888010814, "grad_norm": 0.0004519163048826158, "learning_rate": 5.799923744570376e-07, "loss": 0.0010613186750561, "step": 1903 }, { "epoch": 3.163044608505771, "grad_norm": 0.0016710092313587666, "learning_rate": 5.777947758269295e-07, "loss": 0.0009790394688025117, "step": 1904 }, { "epoch": 3.164708329000728, "grad_norm": 0.0010518544586375356, "learning_rate": 5.756008043600594e-07, "loss": 0.0018751542083919048, "step": 1905 }, { "epoch": 3.166372049495685, "grad_norm": 0.00041189463809132576, "learning_rate": 5.734104641964075e-07, "loss": 0.0011019426165148616, "step": 1906 }, { "epoch": 3.1680357699906416, "grad_norm": 0.0005437190993689001, "learning_rate": 5.712237594691028e-07, "loss": 0.0007574702030979097, "step": 1907 }, { "epoch": 3.1696994904855984, "grad_norm": 0.0003270143934059888, "learning_rate": 5.690406943044138e-07, "loss": 0.00037680071545764804, "step": 1908 }, { "epoch": 3.171363210980555, "grad_norm": 0.00015396956587210298, "learning_rate": 5.668612728217412e-07, "loss": 0.0007919893832877278, "step": 1909 }, { "epoch": 3.173026931475512, "grad_norm": 0.0005151602090336382, "learning_rate": 5.646854991336112e-07, "loss": 0.0012475823750719428, "step": 1910 }, { "epoch": 3.1746906519704687, "grad_norm": 0.000625112559646368, "learning_rate": 5.625133773456639e-07, "loss": 0.0008614555117674172, "step": 1911 }, { "epoch": 3.176354372465426, "grad_norm": 0.0008656211430206895, "learning_rate": 5.603449115566511e-07, "loss": 0.0023587134201079607, "step": 1912 }, { "epoch": 3.1780180929603827, "grad_norm": 0.0003512735420372337, "learning_rate": 5.581801058584252e-07, "loss": 0.00029785794322378933, "step": 1913 }, { "epoch": 3.1796818134553395, "grad_norm": 0.00034385884646326303, "learning_rate": 5.560189643359312e-07, "loss": 0.0004459562769625336, "step": 1914 }, { "epoch": 3.1813455339502963, "grad_norm": 0.0002447869919706136, "learning_rate": 5.538614910672005e-07, "loss": 0.00018510760855861008, "step": 1915 }, { "epoch": 3.183009254445253, "grad_norm": 0.0010491593275219202, "learning_rate": 5.517076901233434e-07, "loss": 0.0005361376097425818, "step": 1916 }, { "epoch": 3.18467297494021, "grad_norm": 0.0007156440988183022, "learning_rate": 5.495575655685382e-07, "loss": 0.0016970396973192692, "step": 1917 }, { "epoch": 3.186336695435167, "grad_norm": 0.001344272750429809, "learning_rate": 5.474111214600278e-07, "loss": 0.0010842399206012487, "step": 1918 }, { "epoch": 3.188000415930124, "grad_norm": 0.0005655754939652979, "learning_rate": 5.452683618481103e-07, "loss": 0.001635242602787912, "step": 1919 }, { "epoch": 3.1896641364250806, "grad_norm": 0.00028636507340706885, "learning_rate": 5.431292907761305e-07, "loss": 0.0009821431012824178, "step": 1920 }, { "epoch": 3.1913278569200374, "grad_norm": 0.00041335669811815023, "learning_rate": 5.409939122804736e-07, "loss": 0.0004407725646160543, "step": 1921 }, { "epoch": 3.192991577414994, "grad_norm": 0.0008381790830753744, "learning_rate": 5.388622303905558e-07, "loss": 0.0011460300302132964, "step": 1922 }, { "epoch": 3.194655297909951, "grad_norm": 0.0005317120812833309, "learning_rate": 5.367342491288186e-07, "loss": 0.00032618624391034245, "step": 1923 }, { "epoch": 3.196319018404908, "grad_norm": 0.0011242582695558667, "learning_rate": 5.346099725107213e-07, "loss": 0.002115800976753235, "step": 1924 }, { "epoch": 3.197982738899865, "grad_norm": 0.00022066081874072552, "learning_rate": 5.324894045447312e-07, "loss": 0.0003675711923278868, "step": 1925 }, { "epoch": 3.1996464593948217, "grad_norm": 0.00032584325526840985, "learning_rate": 5.303725492323194e-07, "loss": 0.0008598530548624694, "step": 1926 }, { "epoch": 3.2013101798897785, "grad_norm": 0.0003403827140573412, "learning_rate": 5.282594105679481e-07, "loss": 0.0009377440437674522, "step": 1927 }, { "epoch": 3.2029739003847353, "grad_norm": 0.000445442070486024, "learning_rate": 5.261499925390692e-07, "loss": 0.0010707492474466562, "step": 1928 }, { "epoch": 3.204637620879692, "grad_norm": 0.00046939656022004783, "learning_rate": 5.240442991261127e-07, "loss": 0.0007110872538760304, "step": 1929 }, { "epoch": 3.2063013413746493, "grad_norm": 0.00022708141477778554, "learning_rate": 5.219423343024804e-07, "loss": 0.000206417273147963, "step": 1930 }, { "epoch": 3.207965061869606, "grad_norm": 0.0007522849482484162, "learning_rate": 5.198441020345382e-07, "loss": 0.00209865253418684, "step": 1931 }, { "epoch": 3.209628782364563, "grad_norm": 0.0005657677538692951, "learning_rate": 5.177496062816101e-07, "loss": 0.0007334444671869278, "step": 1932 }, { "epoch": 3.2112925028595196, "grad_norm": 0.00067489332286641, "learning_rate": 5.156588509959659e-07, "loss": 0.0007933019660413265, "step": 1933 }, { "epoch": 3.2129562233544764, "grad_norm": 0.0009653370943851769, "learning_rate": 5.13571840122821e-07, "loss": 0.0015348546439781785, "step": 1934 }, { "epoch": 3.214619943849433, "grad_norm": 0.0008980388520285487, "learning_rate": 5.114885776003234e-07, "loss": 0.002591199241578579, "step": 1935 }, { "epoch": 3.21628366434439, "grad_norm": 0.0007487988914363086, "learning_rate": 5.094090673595478e-07, "loss": 0.0008501316187903285, "step": 1936 }, { "epoch": 3.217947384839347, "grad_norm": 0.0011044201673939824, "learning_rate": 5.073333133244896e-07, "loss": 0.002894491655752063, "step": 1937 }, { "epoch": 3.219611105334304, "grad_norm": 0.0007839900208637118, "learning_rate": 5.052613194120554e-07, "loss": 0.0013367477804422379, "step": 1938 }, { "epoch": 3.2212748258292607, "grad_norm": 0.000503472751006484, "learning_rate": 5.031930895320569e-07, "loss": 0.000841394648887217, "step": 1939 }, { "epoch": 3.2229385463242175, "grad_norm": 0.002264294307678938, "learning_rate": 5.011286275872021e-07, "loss": 0.0027972303796559572, "step": 1940 }, { "epoch": 3.2246022668191743, "grad_norm": 0.00025088360416702926, "learning_rate": 4.990679374730905e-07, "loss": 0.0010514447931200266, "step": 1941 }, { "epoch": 3.226265987314131, "grad_norm": 0.0012186086969450116, "learning_rate": 4.970110230782035e-07, "loss": 0.00212580687366426, "step": 1942 }, { "epoch": 3.2279297078090883, "grad_norm": 0.0005759792984463274, "learning_rate": 4.949578882838982e-07, "loss": 0.0012910933000966907, "step": 1943 }, { "epoch": 3.229593428304045, "grad_norm": 0.0003267624997533858, "learning_rate": 4.929085369643988e-07, "loss": 0.0011039895471185446, "step": 1944 }, { "epoch": 3.231257148799002, "grad_norm": 0.00042506324825808406, "learning_rate": 4.908629729867908e-07, "loss": 0.0007888518157415092, "step": 1945 }, { "epoch": 3.2329208692939586, "grad_norm": 0.0010529316496104002, "learning_rate": 4.88821200211014e-07, "loss": 0.0011536410311236978, "step": 1946 }, { "epoch": 3.2345845897889154, "grad_norm": 0.0015189244877547026, "learning_rate": 4.867832224898517e-07, "loss": 0.0011142066214233637, "step": 1947 }, { "epoch": 3.236248310283872, "grad_norm": 0.00029045421979390085, "learning_rate": 4.847490436689281e-07, "loss": 0.000906751025468111, "step": 1948 }, { "epoch": 3.237912030778829, "grad_norm": 0.00041969105950556695, "learning_rate": 4.827186675866985e-07, "loss": 0.001455049030482769, "step": 1949 }, { "epoch": 3.239575751273786, "grad_norm": 0.00031135749304667115, "learning_rate": 4.806920980744426e-07, "loss": 0.00046373007353395224, "step": 1950 }, { "epoch": 3.241239471768743, "grad_norm": 0.0017488739686086774, "learning_rate": 4.786693389562566e-07, "loss": 0.003784562461078167, "step": 1951 }, { "epoch": 3.2429031922636997, "grad_norm": 0.0002576555125415325, "learning_rate": 4.7665039404904747e-07, "loss": 0.0005821231752634048, "step": 1952 }, { "epoch": 3.2445669127586565, "grad_norm": 0.0005598700372502208, "learning_rate": 4.746352671625237e-07, "loss": 0.0012865583412349224, "step": 1953 }, { "epoch": 3.2462306332536133, "grad_norm": 0.0003530400281306356, "learning_rate": 4.72623962099191e-07, "loss": 0.0007247515022754669, "step": 1954 }, { "epoch": 3.24789435374857, "grad_norm": 0.0004015358863398433, "learning_rate": 4.7061648265434053e-07, "loss": 0.00036378641379997134, "step": 1955 }, { "epoch": 3.2495580742435273, "grad_norm": 0.0006843737210147083, "learning_rate": 4.6861283261604745e-07, "loss": 0.0021625924855470657, "step": 1956 }, { "epoch": 3.251221794738484, "grad_norm": 0.0014490418834611773, "learning_rate": 4.666130157651594e-07, "loss": 0.0012110623065382242, "step": 1957 }, { "epoch": 3.252885515233441, "grad_norm": 0.0009911368833854795, "learning_rate": 4.6461703587529106e-07, "loss": 0.0018840692937374115, "step": 1958 }, { "epoch": 3.2545492357283976, "grad_norm": 0.00027222695644013584, "learning_rate": 4.62624896712818e-07, "loss": 0.0001918284106068313, "step": 1959 }, { "epoch": 3.2562129562233544, "grad_norm": 0.0007084309472702444, "learning_rate": 4.6063660203686635e-07, "loss": 0.0016576352063566446, "step": 1960 }, { "epoch": 3.257876676718311, "grad_norm": 0.00021952651150058955, "learning_rate": 4.586521555993087e-07, "loss": 0.0004061336221639067, "step": 1961 }, { "epoch": 3.259540397213268, "grad_norm": 0.0003330149920657277, "learning_rate": 4.5667156114475695e-07, "loss": 0.0006381084094755352, "step": 1962 }, { "epoch": 3.261204117708225, "grad_norm": 0.0007844572537578642, "learning_rate": 4.5469482241055324e-07, "loss": 0.002945750020444393, "step": 1963 }, { "epoch": 3.262867838203182, "grad_norm": 0.0007807101937942207, "learning_rate": 4.527219431267646e-07, "loss": 0.0010229629697278142, "step": 1964 }, { "epoch": 3.2645315586981387, "grad_norm": 0.002043907530605793, "learning_rate": 4.507529270161759e-07, "loss": 0.004027180373668671, "step": 1965 }, { "epoch": 3.2661952791930955, "grad_norm": 0.0008136004325933754, "learning_rate": 4.4878777779428034e-07, "loss": 0.0009637029143050313, "step": 1966 }, { "epoch": 3.2678589996880523, "grad_norm": 0.0036568024661391973, "learning_rate": 4.4682649916927614e-07, "loss": 0.0013857954181730747, "step": 1967 }, { "epoch": 3.2695227201830095, "grad_norm": 0.0008362821536138654, "learning_rate": 4.4486909484205725e-07, "loss": 0.0020736800506711006, "step": 1968 }, { "epoch": 3.2711864406779663, "grad_norm": 0.0007145930430851877, "learning_rate": 4.429155685062073e-07, "loss": 0.00082812225446105, "step": 1969 }, { "epoch": 3.272850161172923, "grad_norm": 0.0006530311075039208, "learning_rate": 4.409659238479919e-07, "loss": 0.0017145945457741618, "step": 1970 }, { "epoch": 3.27451388166788, "grad_norm": 0.00036001464468427, "learning_rate": 4.39020164546351e-07, "loss": 0.0006193873705342412, "step": 1971 }, { "epoch": 3.2761776021628366, "grad_norm": 0.0005106754833832383, "learning_rate": 4.370782942728946e-07, "loss": 0.0015185193624347448, "step": 1972 }, { "epoch": 3.2778413226577934, "grad_norm": 0.0010290957288816571, "learning_rate": 4.3514031669189325e-07, "loss": 0.0004486526013351977, "step": 1973 }, { "epoch": 3.27950504315275, "grad_norm": 0.000586140900850296, "learning_rate": 4.3320623546027283e-07, "loss": 0.00030024960869923234, "step": 1974 }, { "epoch": 3.2811687636477074, "grad_norm": 0.002518952591344714, "learning_rate": 4.312760542276059e-07, "loss": 0.0011890861205756664, "step": 1975 }, { "epoch": 3.282832484142664, "grad_norm": 0.0009471022640354931, "learning_rate": 4.293497766361068e-07, "loss": 0.002298705279827118, "step": 1976 }, { "epoch": 3.284496204637621, "grad_norm": 0.00030820450047031045, "learning_rate": 4.2742740632062243e-07, "loss": 0.0003215964825358242, "step": 1977 }, { "epoch": 3.2861599251325777, "grad_norm": 0.0006197311449795961, "learning_rate": 4.255089469086279e-07, "loss": 0.0013243852881714702, "step": 1978 }, { "epoch": 3.2878236456275345, "grad_norm": 0.0003548903623595834, "learning_rate": 4.235944020202182e-07, "loss": 0.0005627534701488912, "step": 1979 }, { "epoch": 3.2894873661224913, "grad_norm": 0.000911902985535562, "learning_rate": 4.216837752681019e-07, "loss": 0.0016927756369113922, "step": 1980 }, { "epoch": 3.2911510866174485, "grad_norm": 0.00043464102782309055, "learning_rate": 4.19777070257594e-07, "loss": 0.001056357054039836, "step": 1981 }, { "epoch": 3.2928148071124053, "grad_norm": 0.0005667249206453562, "learning_rate": 4.1787429058660845e-07, "loss": 0.0005767525872215629, "step": 1982 }, { "epoch": 3.294478527607362, "grad_norm": 0.0024056362453848124, "learning_rate": 4.159754398456531e-07, "loss": 0.0006031446973793209, "step": 1983 }, { "epoch": 3.296142248102319, "grad_norm": 0.0004384003404993564, "learning_rate": 4.14080521617822e-07, "loss": 0.0008626229246146977, "step": 1984 }, { "epoch": 3.2978059685972756, "grad_norm": 0.0007387603400275111, "learning_rate": 4.121895394787881e-07, "loss": 0.0014089690521359444, "step": 1985 }, { "epoch": 3.2994696890922324, "grad_norm": 0.0007006418891251087, "learning_rate": 4.103024969967981e-07, "loss": 0.0016673547215759754, "step": 1986 }, { "epoch": 3.301133409587189, "grad_norm": 0.0003609545237850398, "learning_rate": 4.084193977326625e-07, "loss": 0.0004953519091941416, "step": 1987 }, { "epoch": 3.3027971300821464, "grad_norm": 0.0007110030273906887, "learning_rate": 4.0654024523975323e-07, "loss": 0.0015487212222069502, "step": 1988 }, { "epoch": 3.304460850577103, "grad_norm": 0.0011667604558169842, "learning_rate": 4.0466504306399366e-07, "loss": 0.0027107703499495983, "step": 1989 }, { "epoch": 3.30612457107206, "grad_norm": 0.00033490470377728343, "learning_rate": 4.027937947438532e-07, "loss": 0.0012670408468693495, "step": 1990 }, { "epoch": 3.3077882915670167, "grad_norm": 0.0005540425190702081, "learning_rate": 4.009265038103402e-07, "loss": 0.0018498932477086782, "step": 1991 }, { "epoch": 3.3094520120619735, "grad_norm": 0.0005201280582696199, "learning_rate": 3.9906317378699684e-07, "loss": 0.0012013057712465525, "step": 1992 }, { "epoch": 3.3111157325569303, "grad_norm": 0.0007871100679039955, "learning_rate": 3.972038081898885e-07, "loss": 0.0019484572112560272, "step": 1993 }, { "epoch": 3.3127794530518875, "grad_norm": 0.0008555342210456729, "learning_rate": 3.9534841052760174e-07, "loss": 0.0034242752008140087, "step": 1994 }, { "epoch": 3.3144431735468443, "grad_norm": 0.0008200041484087706, "learning_rate": 3.9349698430123566e-07, "loss": 0.0005676429718732834, "step": 1995 }, { "epoch": 3.316106894041801, "grad_norm": 0.0006836490356363356, "learning_rate": 3.9164953300439456e-07, "loss": 0.0011167812626808882, "step": 1996 }, { "epoch": 3.317770614536758, "grad_norm": 0.0015542430337518454, "learning_rate": 3.898060601231832e-07, "loss": 0.0025796834379434586, "step": 1997 }, { "epoch": 3.3194343350317146, "grad_norm": 0.0003486530913505703, "learning_rate": 3.879665691361975e-07, "loss": 0.00042451033368706703, "step": 1998 }, { "epoch": 3.3210980555266714, "grad_norm": 0.000579525250941515, "learning_rate": 3.861310635145207e-07, "loss": 0.0010016821324825287, "step": 1999 }, { "epoch": 3.322761776021628, "grad_norm": 0.0004743239260278642, "learning_rate": 3.8429954672171613e-07, "loss": 0.00039713463047519326, "step": 2000 }, { "epoch": 3.322761776021628, "eval_loss": 0.00184919114690274, "eval_runtime": 491.8051, "eval_samples_per_second": 9.776, "eval_steps_per_second": 2.444, "step": 2000 }, { "epoch": 3.3244254965165854, "grad_norm": 0.0007103593670763075, "learning_rate": 3.824720222138192e-07, "loss": 0.0009300188394263387, "step": 2001 }, { "epoch": 3.326089217011542, "grad_norm": 0.0003298052179161459, "learning_rate": 3.806484934393331e-07, "loss": 0.0002536668907850981, "step": 2002 }, { "epoch": 3.327752937506499, "grad_norm": 0.00027529304497875273, "learning_rate": 3.788289638392206e-07, "loss": 0.000571264186874032, "step": 2003 }, { "epoch": 3.3294166580014557, "grad_norm": 0.0007259314879775047, "learning_rate": 3.7701343684689725e-07, "loss": 0.0005468014860525727, "step": 2004 }, { "epoch": 3.3310803784964125, "grad_norm": 0.0003917735884897411, "learning_rate": 3.7520191588822695e-07, "loss": 0.0009275685297325253, "step": 2005 }, { "epoch": 3.3327440989913697, "grad_norm": 0.0007926534744910896, "learning_rate": 3.7339440438151383e-07, "loss": 0.0013022706843912601, "step": 2006 }, { "epoch": 3.3344078194863265, "grad_norm": 0.0012599979527294636, "learning_rate": 3.7159090573749693e-07, "loss": 0.000500794849358499, "step": 2007 }, { "epoch": 3.3360715399812833, "grad_norm": 0.00033590139355510473, "learning_rate": 3.6979142335934246e-07, "loss": 0.0007862839847803116, "step": 2008 }, { "epoch": 3.33773526047624, "grad_norm": 0.0002938685938715935, "learning_rate": 3.67995960642637e-07, "loss": 0.00045850701280869544, "step": 2009 }, { "epoch": 3.339398980971197, "grad_norm": 0.00036255698068998754, "learning_rate": 3.6620452097538424e-07, "loss": 0.001345583237707615, "step": 2010 }, { "epoch": 3.3410627014661536, "grad_norm": 0.0007394360145553946, "learning_rate": 3.644171077379949e-07, "loss": 0.001691554207354784, "step": 2011 }, { "epoch": 3.3427264219611104, "grad_norm": 0.00044334522681310773, "learning_rate": 3.6263372430328266e-07, "loss": 0.001117645064368844, "step": 2012 }, { "epoch": 3.344390142456067, "grad_norm": 0.00046220130752772093, "learning_rate": 3.6085437403645645e-07, "loss": 0.0003471467352937907, "step": 2013 }, { "epoch": 3.3460538629510244, "grad_norm": 0.00021781664690934122, "learning_rate": 3.5907906029511606e-07, "loss": 0.00023465295089408755, "step": 2014 }, { "epoch": 3.347717583445981, "grad_norm": 0.00014978775288909674, "learning_rate": 3.573077864292421e-07, "loss": 0.00019691628403961658, "step": 2015 }, { "epoch": 3.349381303940938, "grad_norm": 0.000697939598467201, "learning_rate": 3.555405557811936e-07, "loss": 0.0011822873493656516, "step": 2016 }, { "epoch": 3.3510450244358947, "grad_norm": 0.0019957164768129587, "learning_rate": 3.537773716857004e-07, "loss": 0.0014215171104297042, "step": 2017 }, { "epoch": 3.3527087449308515, "grad_norm": 0.00011045035353163257, "learning_rate": 3.5201823746985554e-07, "loss": 8.753546717343852e-05, "step": 2018 }, { "epoch": 3.3543724654258087, "grad_norm": 0.00021515815751627088, "learning_rate": 3.5026315645311114e-07, "loss": 0.0002639327722135931, "step": 2019 }, { "epoch": 3.3560361859207655, "grad_norm": 0.00012338242959231138, "learning_rate": 3.485121319472695e-07, "loss": 0.00012567051453515887, "step": 2020 }, { "epoch": 3.3576999064157222, "grad_norm": 0.00023669407528359443, "learning_rate": 3.4676516725647953e-07, "loss": 0.0004816576838493347, "step": 2021 }, { "epoch": 3.359363626910679, "grad_norm": 0.0004548949364107102, "learning_rate": 3.450222656772292e-07, "loss": 0.0005232831463217735, "step": 2022 }, { "epoch": 3.361027347405636, "grad_norm": 0.0003678028006106615, "learning_rate": 3.43283430498339e-07, "loss": 0.0003801046696025878, "step": 2023 }, { "epoch": 3.3626910679005926, "grad_norm": 0.00035056608612649143, "learning_rate": 3.4154866500095695e-07, "loss": 0.0007555793272331357, "step": 2024 }, { "epoch": 3.3643547883955494, "grad_norm": 0.0005938123795203865, "learning_rate": 3.3981797245855096e-07, "loss": 0.001063125324435532, "step": 2025 }, { "epoch": 3.3660185088905066, "grad_norm": 0.0010471873683854938, "learning_rate": 3.380913561369037e-07, "loss": 0.0012568510137498379, "step": 2026 }, { "epoch": 3.3676822293854634, "grad_norm": 0.000528757693246007, "learning_rate": 3.363688192941067e-07, "loss": 0.0007248009787872434, "step": 2027 }, { "epoch": 3.36934594988042, "grad_norm": 0.0006465297774411738, "learning_rate": 3.346503651805513e-07, "loss": 0.0006219932110980153, "step": 2028 }, { "epoch": 3.371009670375377, "grad_norm": 0.001121693872846663, "learning_rate": 3.329359970389279e-07, "loss": 0.0017972507048398256, "step": 2029 }, { "epoch": 3.3726733908703337, "grad_norm": 0.0007588271400891244, "learning_rate": 3.312257181042142e-07, "loss": 0.0011564313899725676, "step": 2030 }, { "epoch": 3.3743371113652905, "grad_norm": 0.00013510511780623347, "learning_rate": 3.2951953160367365e-07, "loss": 0.00013225802103988826, "step": 2031 }, { "epoch": 3.3760008318602477, "grad_norm": 0.000917671131901443, "learning_rate": 3.2781744075684576e-07, "loss": 0.0008554297965019941, "step": 2032 }, { "epoch": 3.3776645523552045, "grad_norm": 0.0003718238731380552, "learning_rate": 3.261194487755426e-07, "loss": 0.00021981318423058838, "step": 2033 }, { "epoch": 3.3793282728501612, "grad_norm": 0.00033087420160882175, "learning_rate": 3.2442555886384145e-07, "loss": 0.0009502323227934539, "step": 2034 }, { "epoch": 3.380991993345118, "grad_norm": 0.0007806571666151285, "learning_rate": 3.2273577421807976e-07, "loss": 0.0017600739374756813, "step": 2035 }, { "epoch": 3.382655713840075, "grad_norm": 0.0003112622071057558, "learning_rate": 3.2105009802684636e-07, "loss": 0.0007383651682175696, "step": 2036 }, { "epoch": 3.3843194343350316, "grad_norm": 0.0006200448842719197, "learning_rate": 3.1936853347097923e-07, "loss": 0.0013696863315999508, "step": 2037 }, { "epoch": 3.3859831548299884, "grad_norm": 0.0008885011775419116, "learning_rate": 3.1769108372355804e-07, "loss": 0.001769607188180089, "step": 2038 }, { "epoch": 3.3876468753249456, "grad_norm": 0.000550445111002773, "learning_rate": 3.1601775194989693e-07, "loss": 0.0013523256639018655, "step": 2039 }, { "epoch": 3.3893105958199023, "grad_norm": 0.00022062704374548048, "learning_rate": 3.143485413075398e-07, "loss": 0.0003513278206810355, "step": 2040 }, { "epoch": 3.390974316314859, "grad_norm": 0.0005184001056477427, "learning_rate": 3.1268345494625486e-07, "loss": 0.0006000545108690858, "step": 2041 }, { "epoch": 3.392638036809816, "grad_norm": 0.000451189698651433, "learning_rate": 3.1102249600802573e-07, "loss": 0.0012537793954834342, "step": 2042 }, { "epoch": 3.3943017573047727, "grad_norm": 0.0005222734180279076, "learning_rate": 3.093656676270501e-07, "loss": 0.0004908693372271955, "step": 2043 }, { "epoch": 3.3959654777997295, "grad_norm": 0.0006829127087257802, "learning_rate": 3.0771297292972986e-07, "loss": 0.0016357006970793009, "step": 2044 }, { "epoch": 3.3976291982946867, "grad_norm": 0.00043566894601099193, "learning_rate": 3.0606441503466753e-07, "loss": 0.0005723870126530528, "step": 2045 }, { "epoch": 3.3992929187896435, "grad_norm": 0.00031372217927128077, "learning_rate": 3.044199970526593e-07, "loss": 0.0005421612877398729, "step": 2046 }, { "epoch": 3.4009566392846002, "grad_norm": 0.0005682411137968302, "learning_rate": 3.027797220866896e-07, "loss": 0.0015814367216080427, "step": 2047 }, { "epoch": 3.402620359779557, "grad_norm": 0.00031971544376574457, "learning_rate": 3.01143593231924e-07, "loss": 0.0011364194797351956, "step": 2048 }, { "epoch": 3.404284080274514, "grad_norm": 0.0009130490943789482, "learning_rate": 2.995116135757059e-07, "loss": 0.00183593831025064, "step": 2049 }, { "epoch": 3.4059478007694706, "grad_norm": 0.0004094933101441711, "learning_rate": 2.978837861975484e-07, "loss": 0.0007758493884466588, "step": 2050 }, { "epoch": 3.4076115212644273, "grad_norm": 0.00035802522324956954, "learning_rate": 2.962601141691296e-07, "loss": 0.0007563665276393294, "step": 2051 }, { "epoch": 3.4092752417593846, "grad_norm": 0.00038640270940959454, "learning_rate": 2.9464060055428703e-07, "loss": 0.0003179998602718115, "step": 2052 }, { "epoch": 3.4109389622543413, "grad_norm": 0.0005746350507251918, "learning_rate": 2.930252484090101e-07, "loss": 0.0010296654654666781, "step": 2053 }, { "epoch": 3.412602682749298, "grad_norm": 0.00018986756913363934, "learning_rate": 2.9141406078143644e-07, "loss": 0.0005916217342019081, "step": 2054 }, { "epoch": 3.414266403244255, "grad_norm": 0.00030949199572205544, "learning_rate": 2.8980704071184557e-07, "loss": 0.0005941985873505473, "step": 2055 }, { "epoch": 3.4159301237392117, "grad_norm": 0.000416825816500932, "learning_rate": 2.882041912326525e-07, "loss": 0.0003663770330604166, "step": 2056 }, { "epoch": 3.417593844234169, "grad_norm": 0.0006616072496399283, "learning_rate": 2.8660551536840277e-07, "loss": 0.0013288824120536447, "step": 2057 }, { "epoch": 3.4192575647291257, "grad_norm": 0.0004903053632006049, "learning_rate": 2.8501101613576526e-07, "loss": 0.0007218238897621632, "step": 2058 }, { "epoch": 3.4209212852240825, "grad_norm": 0.0002515607629902661, "learning_rate": 2.834206965435293e-07, "loss": 0.00033488430199213326, "step": 2059 }, { "epoch": 3.4225850057190392, "grad_norm": 0.0004987838328815997, "learning_rate": 2.818345595925959e-07, "loss": 0.0005008272128179669, "step": 2060 }, { "epoch": 3.424248726213996, "grad_norm": 0.0004810976970475167, "learning_rate": 2.8025260827597463e-07, "loss": 0.0014125597663223743, "step": 2061 }, { "epoch": 3.425912446708953, "grad_norm": 0.0006654640892520547, "learning_rate": 2.7867484557877607e-07, "loss": 0.0020283989142626524, "step": 2062 }, { "epoch": 3.4275761672039096, "grad_norm": 0.0005379806971177459, "learning_rate": 2.7710127447820783e-07, "loss": 0.0003175054444000125, "step": 2063 }, { "epoch": 3.429239887698867, "grad_norm": 0.0003664498508442193, "learning_rate": 2.7553189794356615e-07, "loss": 0.0009711123420856893, "step": 2064 }, { "epoch": 3.4309036081938236, "grad_norm": 0.0006012935773469508, "learning_rate": 2.739667189362347e-07, "loss": 0.0008616911945864558, "step": 2065 }, { "epoch": 3.4325673286887803, "grad_norm": 0.0003376624081283808, "learning_rate": 2.724057404096744e-07, "loss": 0.0007702240254729986, "step": 2066 }, { "epoch": 3.434231049183737, "grad_norm": 0.0005580992437899113, "learning_rate": 2.708489653094218e-07, "loss": 0.00029471275047399104, "step": 2067 }, { "epoch": 3.435894769678694, "grad_norm": 0.0006376696401275694, "learning_rate": 2.692963965730805e-07, "loss": 0.0015568721573799849, "step": 2068 }, { "epoch": 3.4375584901736507, "grad_norm": 0.00039179300074465573, "learning_rate": 2.677480371303162e-07, "loss": 0.0004968352150171995, "step": 2069 }, { "epoch": 3.439222210668608, "grad_norm": 0.0004967460408806801, "learning_rate": 2.662038899028532e-07, "loss": 0.0016945095267146826, "step": 2070 }, { "epoch": 3.4408859311635647, "grad_norm": 0.0004912624717690051, "learning_rate": 2.6466395780446657e-07, "loss": 0.00030252139549702406, "step": 2071 }, { "epoch": 3.4425496516585214, "grad_norm": 0.0005200419691391289, "learning_rate": 2.6312824374097794e-07, "loss": 0.0014322304632514715, "step": 2072 }, { "epoch": 3.4442133721534782, "grad_norm": 0.0004998520016670227, "learning_rate": 2.6159675061024905e-07, "loss": 0.0020267111249268055, "step": 2073 }, { "epoch": 3.445877092648435, "grad_norm": 0.00022210314637050033, "learning_rate": 2.6006948130217815e-07, "loss": 0.00020690905512310565, "step": 2074 }, { "epoch": 3.447540813143392, "grad_norm": 0.0008716708398424089, "learning_rate": 2.585464386986908e-07, "loss": 0.001405320130288601, "step": 2075 }, { "epoch": 3.4492045336383486, "grad_norm": 0.0002139442804036662, "learning_rate": 2.570276256737386e-07, "loss": 0.0003679142100736499, "step": 2076 }, { "epoch": 3.450868254133306, "grad_norm": 0.0001617825764697045, "learning_rate": 2.555130450932922e-07, "loss": 0.00012385935406200588, "step": 2077 }, { "epoch": 3.4525319746282626, "grad_norm": 0.000477365858387202, "learning_rate": 2.54002699815335e-07, "loss": 0.0016203195555135608, "step": 2078 }, { "epoch": 3.4541956951232193, "grad_norm": 0.0006356213707476854, "learning_rate": 2.52496592689859e-07, "loss": 0.0019338249694555998, "step": 2079 }, { "epoch": 3.455859415618176, "grad_norm": 0.0006331139011308551, "learning_rate": 2.5099472655885777e-07, "loss": 0.0004027165996376425, "step": 2080 }, { "epoch": 3.457523136113133, "grad_norm": 0.00026520664687268436, "learning_rate": 2.4949710425632353e-07, "loss": 0.0004268680640961975, "step": 2081 }, { "epoch": 3.4591868566080897, "grad_norm": 0.0012745996937155724, "learning_rate": 2.4800372860823956e-07, "loss": 0.00493782851845026, "step": 2082 }, { "epoch": 3.460850577103047, "grad_norm": 0.001241783844307065, "learning_rate": 2.465146024325765e-07, "loss": 0.001954779028892517, "step": 2083 }, { "epoch": 3.4625142975980037, "grad_norm": 0.0007233137730509043, "learning_rate": 2.4502972853928606e-07, "loss": 0.003158936742693186, "step": 2084 }, { "epoch": 3.4641780180929604, "grad_norm": 0.0004373241390567273, "learning_rate": 2.435491097302961e-07, "loss": 0.0005493911448866129, "step": 2085 }, { "epoch": 3.465841738587917, "grad_norm": 0.00048215530114248395, "learning_rate": 2.420727487995045e-07, "loss": 0.0003778711543418467, "step": 2086 }, { "epoch": 3.467505459082874, "grad_norm": 0.0008155179093591869, "learning_rate": 2.40600648532775e-07, "loss": 0.0015070312656462193, "step": 2087 }, { "epoch": 3.4691691795778308, "grad_norm": 0.0006131388363428414, "learning_rate": 2.3913281170793196e-07, "loss": 0.0013324524043127894, "step": 2088 }, { "epoch": 3.4708329000727876, "grad_norm": 0.0007873099530115724, "learning_rate": 2.376692410947548e-07, "loss": 0.0010801044991239905, "step": 2089 }, { "epoch": 3.4724966205677448, "grad_norm": 0.0003397899563424289, "learning_rate": 2.3620993945497217e-07, "loss": 0.0005977149703539908, "step": 2090 }, { "epoch": 3.4741603410627016, "grad_norm": 0.000378866825485602, "learning_rate": 2.347549095422569e-07, "loss": 0.0011136323446407914, "step": 2091 }, { "epoch": 3.4758240615576583, "grad_norm": 0.0004786884237546474, "learning_rate": 2.3330415410222212e-07, "loss": 0.0007871107663959265, "step": 2092 }, { "epoch": 3.477487782052615, "grad_norm": 0.0007962996023707092, "learning_rate": 2.3185767587241447e-07, "loss": 0.001519782468676567, "step": 2093 }, { "epoch": 3.479151502547572, "grad_norm": 0.00041777914157137275, "learning_rate": 2.3041547758230977e-07, "loss": 0.001065521501004696, "step": 2094 }, { "epoch": 3.4808152230425287, "grad_norm": 0.0003879894211422652, "learning_rate": 2.2897756195330773e-07, "loss": 0.001152183162048459, "step": 2095 }, { "epoch": 3.482478943537486, "grad_norm": 0.0003022203454747796, "learning_rate": 2.2754393169872685e-07, "loss": 0.0007047376129776239, "step": 2096 }, { "epoch": 3.4841426640324427, "grad_norm": 0.0007938887574709952, "learning_rate": 2.2611458952379872e-07, "loss": 0.001837533782236278, "step": 2097 }, { "epoch": 3.4858063845273994, "grad_norm": 0.0003543386119417846, "learning_rate": 2.246895381256639e-07, "loss": 0.0008702923660166562, "step": 2098 }, { "epoch": 3.487470105022356, "grad_norm": 0.0016991429729387164, "learning_rate": 2.232687801933664e-07, "loss": 0.002679019933566451, "step": 2099 }, { "epoch": 3.489133825517313, "grad_norm": 0.0003651150909718126, "learning_rate": 2.2185231840784778e-07, "loss": 0.0006635597092099488, "step": 2100 }, { "epoch": 3.489133825517313, "eval_loss": 0.001800627913326025, "eval_runtime": 491.0251, "eval_samples_per_second": 9.792, "eval_steps_per_second": 2.448, "step": 2100 }, { "epoch": 3.4907975460122698, "grad_norm": 0.0009423141600564122, "learning_rate": 2.204401554419444e-07, "loss": 0.0022930323611944914, "step": 2101 }, { "epoch": 3.4924612665072265, "grad_norm": 0.00046992572606541216, "learning_rate": 2.1903229396037896e-07, "loss": 0.0006827316246926785, "step": 2102 }, { "epoch": 3.4941249870021838, "grad_norm": 0.00031514756847172976, "learning_rate": 2.1762873661975825e-07, "loss": 0.001143246074207127, "step": 2103 }, { "epoch": 3.4957887074971405, "grad_norm": 0.0005974940140731633, "learning_rate": 2.1622948606856765e-07, "loss": 0.0005027040606364608, "step": 2104 }, { "epoch": 3.4974524279920973, "grad_norm": 0.0007181466207839549, "learning_rate": 2.1483454494716504e-07, "loss": 0.0008611283265054226, "step": 2105 }, { "epoch": 3.499116148487054, "grad_norm": 0.0006843828014098108, "learning_rate": 2.1344391588777658e-07, "loss": 0.0006472602253779769, "step": 2106 }, { "epoch": 3.500779868982011, "grad_norm": 0.0006242678500711918, "learning_rate": 2.1205760151449206e-07, "loss": 0.0026610223576426506, "step": 2107 }, { "epoch": 3.502443589476968, "grad_norm": 0.0006409132038243115, "learning_rate": 2.106756044432598e-07, "loss": 0.0014607177581638098, "step": 2108 }, { "epoch": 3.504107309971925, "grad_norm": 0.00031891578692011535, "learning_rate": 2.0929792728187986e-07, "loss": 0.0004305794136598706, "step": 2109 }, { "epoch": 3.5057710304668817, "grad_norm": 0.00021313800243660808, "learning_rate": 2.079245726300022e-07, "loss": 0.0005588196218013763, "step": 2110 }, { "epoch": 3.5074347509618384, "grad_norm": 0.0005837749922648072, "learning_rate": 2.0655554307911997e-07, "loss": 0.0011885692365467548, "step": 2111 }, { "epoch": 3.509098471456795, "grad_norm": 0.00048057257663458586, "learning_rate": 2.05190841212565e-07, "loss": 0.0007919726194813848, "step": 2112 }, { "epoch": 3.510762191951752, "grad_norm": 0.0004323796893004328, "learning_rate": 2.038304696055024e-07, "loss": 0.0009170786943286657, "step": 2113 }, { "epoch": 3.5124259124467088, "grad_norm": 0.00035488922731019557, "learning_rate": 2.0247443082492686e-07, "loss": 0.0006726681021973491, "step": 2114 }, { "epoch": 3.5140896329416655, "grad_norm": 0.0003869618522003293, "learning_rate": 2.0112272742965678e-07, "loss": 0.0006815298111177981, "step": 2115 }, { "epoch": 3.5157533534366228, "grad_norm": 0.00034062107442878187, "learning_rate": 1.997753619703291e-07, "loss": 0.0008145941537804902, "step": 2116 }, { "epoch": 3.5174170739315795, "grad_norm": 0.0014651084784418344, "learning_rate": 1.9843233698939617e-07, "loss": 0.000923621584661305, "step": 2117 }, { "epoch": 3.5190807944265363, "grad_norm": 0.0005609922809526324, "learning_rate": 1.9709365502111944e-07, "loss": 0.0010176009964197874, "step": 2118 }, { "epoch": 3.520744514921493, "grad_norm": 0.0006454312242567539, "learning_rate": 1.957593185915657e-07, "loss": 0.0017813025042414665, "step": 2119 }, { "epoch": 3.52240823541645, "grad_norm": 0.00036230773548595607, "learning_rate": 1.9442933021860095e-07, "loss": 0.0005436648498289287, "step": 2120 }, { "epoch": 3.524071955911407, "grad_norm": 0.0003991083358414471, "learning_rate": 1.9310369241188732e-07, "loss": 0.0004445482627488673, "step": 2121 }, { "epoch": 3.525735676406364, "grad_norm": 0.000983903999440372, "learning_rate": 1.9178240767287666e-07, "loss": 0.0016248344909399748, "step": 2122 }, { "epoch": 3.5273993969013206, "grad_norm": 0.00019582314416766167, "learning_rate": 1.904654784948079e-07, "loss": 0.0008514729561284184, "step": 2123 }, { "epoch": 3.5290631173962774, "grad_norm": 0.0004321737796999514, "learning_rate": 1.8915290736269965e-07, "loss": 0.0014025006676092744, "step": 2124 }, { "epoch": 3.530726837891234, "grad_norm": 0.00023967149900272489, "learning_rate": 1.878446967533476e-07, "loss": 0.0005360299255698919, "step": 2125 }, { "epoch": 3.532390558386191, "grad_norm": 0.00028346243198029697, "learning_rate": 1.865408491353199e-07, "loss": 0.0008053178898990154, "step": 2126 }, { "epoch": 3.5340542788811478, "grad_norm": 0.0009381191339343786, "learning_rate": 1.8524136696895068e-07, "loss": 0.00040692309266887605, "step": 2127 }, { "epoch": 3.535717999376105, "grad_norm": 0.00023939047241583467, "learning_rate": 1.8394625270633793e-07, "loss": 0.0006541266338899732, "step": 2128 }, { "epoch": 3.5373817198710618, "grad_norm": 0.000494911044370383, "learning_rate": 1.8265550879133538e-07, "loss": 0.0005820249207317829, "step": 2129 }, { "epoch": 3.5390454403660185, "grad_norm": 0.00031910924008116126, "learning_rate": 1.8136913765955195e-07, "loss": 0.00022852102119941264, "step": 2130 }, { "epoch": 3.5407091608609753, "grad_norm": 0.0003672874590847641, "learning_rate": 1.8008714173834456e-07, "loss": 0.0016192167531698942, "step": 2131 }, { "epoch": 3.542372881355932, "grad_norm": 0.0003836368559859693, "learning_rate": 1.7880952344681402e-07, "loss": 0.0009387480677105486, "step": 2132 }, { "epoch": 3.5440366018508893, "grad_norm": 0.0003523621999192983, "learning_rate": 1.7753628519580097e-07, "loss": 0.0005441518733277917, "step": 2133 }, { "epoch": 3.545700322345846, "grad_norm": 0.0005042285774834454, "learning_rate": 1.7626742938788105e-07, "loss": 0.000680170429404825, "step": 2134 }, { "epoch": 3.547364042840803, "grad_norm": 0.0006183210061863065, "learning_rate": 1.7500295841735905e-07, "loss": 0.0006854024832136929, "step": 2135 }, { "epoch": 3.5490277633357596, "grad_norm": 0.0004747119382955134, "learning_rate": 1.7374287467026767e-07, "loss": 0.0008972194045782089, "step": 2136 }, { "epoch": 3.5506914838307164, "grad_norm": 0.00047969879233278334, "learning_rate": 1.7248718052435942e-07, "loss": 0.000863411754835397, "step": 2137 }, { "epoch": 3.552355204325673, "grad_norm": 0.00033154431730508804, "learning_rate": 1.712358783491047e-07, "loss": 0.00024044688325375319, "step": 2138 }, { "epoch": 3.55401892482063, "grad_norm": 0.00040625245310366154, "learning_rate": 1.6998897050568618e-07, "loss": 0.00100037083029747, "step": 2139 }, { "epoch": 3.5556826453155868, "grad_norm": 0.002271288773044944, "learning_rate": 1.6874645934699342e-07, "loss": 0.0009121758048422635, "step": 2140 }, { "epoch": 3.557346365810544, "grad_norm": 0.0003006242332048714, "learning_rate": 1.6750834721762117e-07, "loss": 0.00045710286940447986, "step": 2141 }, { "epoch": 3.5590100863055008, "grad_norm": 0.00031541401403956115, "learning_rate": 1.6627463645386199e-07, "loss": 0.0009236993500962853, "step": 2142 }, { "epoch": 3.5606738068004575, "grad_norm": 0.0012363658752292395, "learning_rate": 1.6504532938370427e-07, "loss": 0.0014181968290358782, "step": 2143 }, { "epoch": 3.5623375272954143, "grad_norm": 0.0006429841741919518, "learning_rate": 1.6382042832682577e-07, "loss": 0.002108179498463869, "step": 2144 }, { "epoch": 3.564001247790371, "grad_norm": 0.0008274697465822101, "learning_rate": 1.6259993559459091e-07, "loss": 0.0012376433005556464, "step": 2145 }, { "epoch": 3.5656649682853283, "grad_norm": 0.00020373162988107651, "learning_rate": 1.613838534900447e-07, "loss": 0.0004814681888092309, "step": 2146 }, { "epoch": 3.567328688780285, "grad_norm": 0.0003222871746402234, "learning_rate": 1.601721843079107e-07, "loss": 0.00038049567956477404, "step": 2147 }, { "epoch": 3.568992409275242, "grad_norm": 0.00017242447938770056, "learning_rate": 1.5896493033458416e-07, "loss": 0.00020702739129774272, "step": 2148 }, { "epoch": 3.5706561297701986, "grad_norm": 0.0004510879225563258, "learning_rate": 1.5776209384812946e-07, "loss": 0.0011956410016864538, "step": 2149 }, { "epoch": 3.5723198502651554, "grad_norm": 0.0005464230780489743, "learning_rate": 1.5656367711827602e-07, "loss": 0.0018636614549905062, "step": 2150 }, { "epoch": 3.573983570760112, "grad_norm": 0.0003373275394551456, "learning_rate": 1.553696824064116e-07, "loss": 0.0006840836722403765, "step": 2151 }, { "epoch": 3.575647291255069, "grad_norm": 0.000650634232442826, "learning_rate": 1.5418011196558085e-07, "loss": 0.0015320621896535158, "step": 2152 }, { "epoch": 3.5773110117500257, "grad_norm": 0.0008834419422782958, "learning_rate": 1.529949680404799e-07, "loss": 0.0005219897720962763, "step": 2153 }, { "epoch": 3.578974732244983, "grad_norm": 0.00017787229444365948, "learning_rate": 1.5181425286745155e-07, "loss": 0.0001635367952985689, "step": 2154 }, { "epoch": 3.5806384527399397, "grad_norm": 0.0002696933224797249, "learning_rate": 1.5063796867448243e-07, "loss": 0.0004062880761921406, "step": 2155 }, { "epoch": 3.5823021732348965, "grad_norm": 0.000314901873935014, "learning_rate": 1.4946611768119763e-07, "loss": 0.0002677102165762335, "step": 2156 }, { "epoch": 3.5839658937298533, "grad_norm": 0.0017086658626794815, "learning_rate": 1.4829870209885605e-07, "loss": 0.003296004142612219, "step": 2157 }, { "epoch": 3.58562961422481, "grad_norm": 0.0006046981434337795, "learning_rate": 1.471357241303481e-07, "loss": 0.0017653173999860883, "step": 2158 }, { "epoch": 3.5872933347197673, "grad_norm": 0.0014973668148741126, "learning_rate": 1.4597718597019055e-07, "loss": 0.0022984712850302458, "step": 2159 }, { "epoch": 3.588957055214724, "grad_norm": 0.000747077923733741, "learning_rate": 1.4482308980452164e-07, "loss": 0.0016120221698656678, "step": 2160 }, { "epoch": 3.590620775709681, "grad_norm": 0.0008344343514181674, "learning_rate": 1.436734378110985e-07, "loss": 0.001747865928336978, "step": 2161 }, { "epoch": 3.5922844962046376, "grad_norm": 0.0002902126288972795, "learning_rate": 1.425282321592908e-07, "loss": 0.0007139051449485123, "step": 2162 }, { "epoch": 3.5939482166995944, "grad_norm": 0.00047769720549695194, "learning_rate": 1.4138747501007966e-07, "loss": 0.00109275639988482, "step": 2163 }, { "epoch": 3.595611937194551, "grad_norm": 0.0005414061597548425, "learning_rate": 1.4025116851605125e-07, "loss": 0.001188867725431919, "step": 2164 }, { "epoch": 3.597275657689508, "grad_norm": 0.0002681358018890023, "learning_rate": 1.3911931482139317e-07, "loss": 0.0007024380029179156, "step": 2165 }, { "epoch": 3.5989393781844647, "grad_norm": 0.00043560643098317087, "learning_rate": 1.379919160618909e-07, "loss": 0.0007490735151804984, "step": 2166 }, { "epoch": 3.600603098679422, "grad_norm": 0.0005028645973652601, "learning_rate": 1.368689743649243e-07, "loss": 0.0008132288348861039, "step": 2167 }, { "epoch": 3.6022668191743787, "grad_norm": 0.0006175213493406773, "learning_rate": 1.3575049184946122e-07, "loss": 0.0014601877192035317, "step": 2168 }, { "epoch": 3.6039305396693355, "grad_norm": 0.0002806995762512088, "learning_rate": 1.346364706260564e-07, "loss": 0.0002837897918652743, "step": 2169 }, { "epoch": 3.6055942601642923, "grad_norm": 0.0003279212396591902, "learning_rate": 1.3352691279684582e-07, "loss": 0.0011922994162887335, "step": 2170 }, { "epoch": 3.6072579806592495, "grad_norm": 0.00029519712552428246, "learning_rate": 1.324218204555433e-07, "loss": 0.0010329822544008493, "step": 2171 }, { "epoch": 3.6089217011542063, "grad_norm": 0.0012609957484528422, "learning_rate": 1.3132119568743662e-07, "loss": 0.0010717506520450115, "step": 2172 }, { "epoch": 3.610585421649163, "grad_norm": 0.000270798773271963, "learning_rate": 1.3022504056938196e-07, "loss": 0.0009245692635886371, "step": 2173 }, { "epoch": 3.61224914214412, "grad_norm": 0.0003105451469309628, "learning_rate": 1.2913335716980307e-07, "loss": 0.000851097924169153, "step": 2174 }, { "epoch": 3.6139128626390766, "grad_norm": 0.0004231147759128362, "learning_rate": 1.2804614754868466e-07, "loss": 0.001083697541616857, "step": 2175 }, { "epoch": 3.6155765831340334, "grad_norm": 0.0006465599872171879, "learning_rate": 1.2696341375756982e-07, "loss": 0.0018808020977303386, "step": 2176 }, { "epoch": 3.61724030362899, "grad_norm": 0.00040738159441389143, "learning_rate": 1.2588515783955564e-07, "loss": 0.0010184545535594225, "step": 2177 }, { "epoch": 3.618904024123947, "grad_norm": 0.0005552941584028304, "learning_rate": 1.2481138182929065e-07, "loss": 0.0013670467305928469, "step": 2178 }, { "epoch": 3.620567744618904, "grad_norm": 0.0006208255654200912, "learning_rate": 1.2374208775296742e-07, "loss": 0.0009191983845084906, "step": 2179 }, { "epoch": 3.622231465113861, "grad_norm": 0.0006596720195375383, "learning_rate": 1.2267727762832388e-07, "loss": 0.0017082467675209045, "step": 2180 }, { "epoch": 3.6238951856088177, "grad_norm": 0.0013268449110910296, "learning_rate": 1.2161695346463498e-07, "loss": 0.002551913959905505, "step": 2181 }, { "epoch": 3.6255589061037745, "grad_norm": 0.0002721058845054358, "learning_rate": 1.2056111726271192e-07, "loss": 0.0005901381373405457, "step": 2182 }, { "epoch": 3.6272226265987313, "grad_norm": 0.0006825258606113493, "learning_rate": 1.195097710148968e-07, "loss": 0.0011782599613070488, "step": 2183 }, { "epoch": 3.6288863470936885, "grad_norm": 0.00038542994298040867, "learning_rate": 1.1846291670505855e-07, "loss": 0.0003281470271758735, "step": 2184 }, { "epoch": 3.6305500675886453, "grad_norm": 0.0006535347201861441, "learning_rate": 1.1742055630859117e-07, "loss": 0.0009970370447263122, "step": 2185 }, { "epoch": 3.632213788083602, "grad_norm": 0.0002451266336720437, "learning_rate": 1.1638269179240796e-07, "loss": 0.0005679831374436617, "step": 2186 }, { "epoch": 3.633877508578559, "grad_norm": 0.00044009933480992913, "learning_rate": 1.1534932511493846e-07, "loss": 0.0011061355471611023, "step": 2187 }, { "epoch": 3.6355412290735156, "grad_norm": 0.00046958556049503386, "learning_rate": 1.1432045822612564e-07, "loss": 0.00047064467798918486, "step": 2188 }, { "epoch": 3.6372049495684724, "grad_norm": 0.000384511862648651, "learning_rate": 1.132960930674204e-07, "loss": 0.001256321556866169, "step": 2189 }, { "epoch": 3.638868670063429, "grad_norm": 0.0004960609949193895, "learning_rate": 1.1227623157177986e-07, "loss": 0.001438457635231316, "step": 2190 }, { "epoch": 3.640532390558386, "grad_norm": 0.0005817452911287546, "learning_rate": 1.1126087566366266e-07, "loss": 0.0007497647311538458, "step": 2191 }, { "epoch": 3.642196111053343, "grad_norm": 0.0005362757365219295, "learning_rate": 1.1025002725902484e-07, "loss": 0.0009658780181780457, "step": 2192 }, { "epoch": 3.6438598315483, "grad_norm": 0.0004179330717306584, "learning_rate": 1.0924368826531751e-07, "loss": 0.00042840588139370084, "step": 2193 }, { "epoch": 3.6455235520432567, "grad_norm": 0.00041192653588950634, "learning_rate": 1.0824186058148278e-07, "loss": 0.00033956009428948164, "step": 2194 }, { "epoch": 3.6471872725382135, "grad_norm": 0.0011990912025794387, "learning_rate": 1.0724454609794931e-07, "loss": 0.0005998517153784633, "step": 2195 }, { "epoch": 3.6488509930331703, "grad_norm": 0.000372793001588434, "learning_rate": 1.0625174669663036e-07, "loss": 0.000853288802318275, "step": 2196 }, { "epoch": 3.6505147135281275, "grad_norm": 0.0004351623938418925, "learning_rate": 1.0526346425091815e-07, "loss": 0.0008543959120288491, "step": 2197 }, { "epoch": 3.6521784340230843, "grad_norm": 0.0024484992027282715, "learning_rate": 1.042797006256821e-07, "loss": 0.0028258298989385366, "step": 2198 }, { "epoch": 3.653842154518041, "grad_norm": 0.00040658764191903174, "learning_rate": 1.0330045767726504e-07, "loss": 0.0016119135543704033, "step": 2199 }, { "epoch": 3.655505875012998, "grad_norm": 0.0003821366699412465, "learning_rate": 1.023257372534786e-07, "loss": 0.0004272300866432488, "step": 2200 }, { "epoch": 3.655505875012998, "eval_loss": 0.0017772268038243055, "eval_runtime": 491.272, "eval_samples_per_second": 9.787, "eval_steps_per_second": 2.447, "step": 2200 }, { "epoch": 3.6571695955079546, "grad_norm": 0.0009488234645687044, "learning_rate": 1.0135554119360153e-07, "loss": 0.0017828824929893017, "step": 2201 }, { "epoch": 3.6588333160029114, "grad_norm": 0.00019926000095438212, "learning_rate": 1.0038987132837435e-07, "loss": 0.000897647812962532, "step": 2202 }, { "epoch": 3.660497036497868, "grad_norm": 0.0006051113014109433, "learning_rate": 9.942872947999672e-08, "loss": 0.0019816546700894833, "step": 2203 }, { "epoch": 3.662160756992825, "grad_norm": 0.00035018715425394475, "learning_rate": 9.847211746212504e-08, "loss": 0.0008289602119475603, "step": 2204 }, { "epoch": 3.663824477487782, "grad_norm": 0.0005620543961413205, "learning_rate": 9.752003707986652e-08, "loss": 0.0011024302802979946, "step": 2205 }, { "epoch": 3.665488197982739, "grad_norm": 0.00018053792882710695, "learning_rate": 9.657249012977821e-08, "loss": 0.0006109848036430776, "step": 2206 }, { "epoch": 3.6671519184776957, "grad_norm": 0.000838694570120424, "learning_rate": 9.562947839986264e-08, "loss": 0.0028103888034820557, "step": 2207 }, { "epoch": 3.6688156389726525, "grad_norm": 0.0004535977204795927, "learning_rate": 9.469100366956391e-08, "loss": 0.000537961081136018, "step": 2208 }, { "epoch": 3.6704793594676093, "grad_norm": 0.0015345853753387928, "learning_rate": 9.375706770976573e-08, "loss": 0.0016995491459965706, "step": 2209 }, { "epoch": 3.6721430799625665, "grad_norm": 0.00038879108615219593, "learning_rate": 9.282767228278672e-08, "loss": 0.0005763666122220457, "step": 2210 }, { "epoch": 3.6738068004575233, "grad_norm": 0.00027304861578159034, "learning_rate": 9.190281914237736e-08, "loss": 0.00019859435269609094, "step": 2211 }, { "epoch": 3.67547052095248, "grad_norm": 0.00028828351059928536, "learning_rate": 9.09825100337175e-08, "loss": 0.00034817203413695097, "step": 2212 }, { "epoch": 3.677134241447437, "grad_norm": 0.0007686371682211757, "learning_rate": 9.006674669341214e-08, "loss": 0.0016276794485747814, "step": 2213 }, { "epoch": 3.6787979619423936, "grad_norm": 0.000721732503734529, "learning_rate": 8.915553084948847e-08, "loss": 0.0017058923840522766, "step": 2214 }, { "epoch": 3.6804616824373504, "grad_norm": 0.0006037505227141082, "learning_rate": 8.824886422139273e-08, "loss": 0.001759952399879694, "step": 2215 }, { "epoch": 3.682125402932307, "grad_norm": 0.00021061238658148795, "learning_rate": 8.734674851998748e-08, "loss": 0.0008622463792562485, "step": 2216 }, { "epoch": 3.683789123427264, "grad_norm": 0.0003846586332656443, "learning_rate": 8.64491854475466e-08, "loss": 0.0010997102363035083, "step": 2217 }, { "epoch": 3.685452843922221, "grad_norm": 0.000777195324189961, "learning_rate": 8.55561766977539e-08, "loss": 0.00318273832090199, "step": 2218 }, { "epoch": 3.687116564417178, "grad_norm": 0.0010364966001361609, "learning_rate": 8.46677239556995e-08, "loss": 0.001554339425638318, "step": 2219 }, { "epoch": 3.6887802849121347, "grad_norm": 0.0006480670999735594, "learning_rate": 8.378382889787596e-08, "loss": 0.0019870519172400236, "step": 2220 }, { "epoch": 3.6904440054070915, "grad_norm": 0.0005411705933511257, "learning_rate": 8.290449319217603e-08, "loss": 0.001172770163975656, "step": 2221 }, { "epoch": 3.6921077259020487, "grad_norm": 0.0001723592431517318, "learning_rate": 8.202971849788854e-08, "loss": 0.000567898852750659, "step": 2222 }, { "epoch": 3.6937714463970055, "grad_norm": 0.0002687703527044505, "learning_rate": 8.115950646569587e-08, "loss": 0.0005061675328761339, "step": 2223 }, { "epoch": 3.6954351668919623, "grad_norm": 0.0002580997534096241, "learning_rate": 8.029385873767115e-08, "loss": 0.0005549566121771932, "step": 2224 }, { "epoch": 3.697098887386919, "grad_norm": 0.0005996919353492558, "learning_rate": 7.943277694727469e-08, "loss": 0.0011159824207425117, "step": 2225 }, { "epoch": 3.698762607881876, "grad_norm": 0.00021931962692178786, "learning_rate": 7.857626271935037e-08, "loss": 0.00018094456754624844, "step": 2226 }, { "epoch": 3.7004263283768326, "grad_norm": 0.0003673741302918643, "learning_rate": 7.772431767012423e-08, "loss": 0.0005871973698958755, "step": 2227 }, { "epoch": 3.7020900488717894, "grad_norm": 0.0002457849041093141, "learning_rate": 7.68769434071992e-08, "loss": 0.0002511520578991622, "step": 2228 }, { "epoch": 3.703753769366746, "grad_norm": 0.000532071222551167, "learning_rate": 7.603414152955374e-08, "loss": 0.001324426382780075, "step": 2229 }, { "epoch": 3.7054174898617034, "grad_norm": 0.00074821210000664, "learning_rate": 7.519591362753848e-08, "loss": 0.0009245446999557316, "step": 2230 }, { "epoch": 3.70708121035666, "grad_norm": 0.000523492693901062, "learning_rate": 7.436226128287288e-08, "loss": 0.0013367345090955496, "step": 2231 }, { "epoch": 3.708744930851617, "grad_norm": 0.0005014461930841208, "learning_rate": 7.35331860686428e-08, "loss": 0.00041717401472851634, "step": 2232 }, { "epoch": 3.7104086513465737, "grad_norm": 0.00034521002089604735, "learning_rate": 7.270868954929595e-08, "loss": 0.0006486955680884421, "step": 2233 }, { "epoch": 3.7120723718415305, "grad_norm": 0.0002621794992592186, "learning_rate": 7.188877328064142e-08, "loss": 0.0001665848249103874, "step": 2234 }, { "epoch": 3.7137360923364877, "grad_norm": 0.000527933007106185, "learning_rate": 7.107343880984496e-08, "loss": 0.0015387427993118763, "step": 2235 }, { "epoch": 3.7153998128314445, "grad_norm": 0.0006754556088708341, "learning_rate": 7.026268767542671e-08, "loss": 0.0019412613473832607, "step": 2236 }, { "epoch": 3.7170635333264013, "grad_norm": 0.00033381491084583104, "learning_rate": 6.94565214072579e-08, "loss": 0.000849235279019922, "step": 2237 }, { "epoch": 3.718727253821358, "grad_norm": 0.00031778853735886514, "learning_rate": 6.86549415265586e-08, "loss": 0.0003436643164604902, "step": 2238 }, { "epoch": 3.720390974316315, "grad_norm": 0.000337104604113847, "learning_rate": 6.785794954589365e-08, "loss": 0.00036453024949878454, "step": 2239 }, { "epoch": 3.7220546948112716, "grad_norm": 0.0006949629751034081, "learning_rate": 6.706554696917139e-08, "loss": 0.0010551823070272803, "step": 2240 }, { "epoch": 3.7237184153062284, "grad_norm": 0.00045205987407825887, "learning_rate": 6.627773529163994e-08, "loss": 0.00048298074398189783, "step": 2241 }, { "epoch": 3.725382135801185, "grad_norm": 0.0003112118865828961, "learning_rate": 6.549451599988432e-08, "loss": 0.0006346139125525951, "step": 2242 }, { "epoch": 3.7270458562961424, "grad_norm": 0.0003499843296594918, "learning_rate": 6.471589057182398e-08, "loss": 0.001009125029668212, "step": 2243 }, { "epoch": 3.728709576791099, "grad_norm": 0.00034769653575494885, "learning_rate": 6.394186047670947e-08, "loss": 0.0003636126348283142, "step": 2244 }, { "epoch": 3.730373297286056, "grad_norm": 0.00042841010144911706, "learning_rate": 6.317242717511995e-08, "loss": 0.00031774997478350997, "step": 2245 }, { "epoch": 3.7320370177810127, "grad_norm": 0.000637256249319762, "learning_rate": 6.240759211896153e-08, "loss": 0.000800920941401273, "step": 2246 }, { "epoch": 3.7337007382759695, "grad_norm": 0.00034591983421705663, "learning_rate": 6.16473567514625e-08, "loss": 0.0005393112078309059, "step": 2247 }, { "epoch": 3.7353644587709267, "grad_norm": 0.001043170690536499, "learning_rate": 6.089172250717201e-08, "loss": 0.002363135339692235, "step": 2248 }, { "epoch": 3.7370281792658835, "grad_norm": 0.0002499793190509081, "learning_rate": 6.014069081195673e-08, "loss": 0.0002837787615135312, "step": 2249 }, { "epoch": 3.7386918997608403, "grad_norm": 0.0007483753724955022, "learning_rate": 5.9394263082998836e-08, "loss": 0.0016868056263774633, "step": 2250 }, { "epoch": 3.740355620255797, "grad_norm": 0.0009630986023694277, "learning_rate": 5.8652440728792504e-08, "loss": 0.0023214765824377537, "step": 2251 }, { "epoch": 3.742019340750754, "grad_norm": 0.0007593136979267001, "learning_rate": 5.791522514914216e-08, "loss": 0.0014121567364782095, "step": 2252 }, { "epoch": 3.7436830612457106, "grad_norm": 0.00046497341827489436, "learning_rate": 5.718261773515865e-08, "loss": 0.001239665667526424, "step": 2253 }, { "epoch": 3.7453467817406674, "grad_norm": 0.0004559413646347821, "learning_rate": 5.64546198692581e-08, "loss": 0.0008746258681640029, "step": 2254 }, { "epoch": 3.747010502235624, "grad_norm": 0.00031668288283981383, "learning_rate": 5.573123292515775e-08, "loss": 0.00044546768185682595, "step": 2255 }, { "epoch": 3.7486742227305814, "grad_norm": 0.0006626700051128864, "learning_rate": 5.50124582678746e-08, "loss": 0.0009660154464654624, "step": 2256 }, { "epoch": 3.750337943225538, "grad_norm": 0.0002030782779911533, "learning_rate": 5.429829725372204e-08, "loss": 0.00020031498570460826, "step": 2257 }, { "epoch": 3.752001663720495, "grad_norm": 0.0021811239421367645, "learning_rate": 5.3588751230307935e-08, "loss": 0.0007013549329712987, "step": 2258 }, { "epoch": 3.7536653842154517, "grad_norm": 0.0006810147897340357, "learning_rate": 5.2883821536531545e-08, "loss": 0.0013047631364315748, "step": 2259 }, { "epoch": 3.755329104710409, "grad_norm": 0.0003946752694901079, "learning_rate": 5.218350950258133e-08, "loss": 0.00029323113267309964, "step": 2260 }, { "epoch": 3.7569928252053657, "grad_norm": 0.0013763107126578689, "learning_rate": 5.1487816449932174e-08, "loss": 0.0007352272514253855, "step": 2261 }, { "epoch": 3.7586565457003225, "grad_norm": 0.000568808987736702, "learning_rate": 5.079674369134313e-08, "loss": 0.0008357037440873682, "step": 2262 }, { "epoch": 3.7603202661952793, "grad_norm": 0.0008823948446661234, "learning_rate": 5.0110292530854696e-08, "loss": 0.002022108295932412, "step": 2263 }, { "epoch": 3.761983986690236, "grad_norm": 0.00047194812214002013, "learning_rate": 4.942846426378683e-08, "loss": 0.0012379256077110767, "step": 2264 }, { "epoch": 3.763647707185193, "grad_norm": 0.00037526569212786853, "learning_rate": 4.875126017673593e-08, "loss": 0.0010932236909866333, "step": 2265 }, { "epoch": 3.7653114276801496, "grad_norm": 0.001681567169725895, "learning_rate": 4.807868154757284e-08, "loss": 0.001502260216511786, "step": 2266 }, { "epoch": 3.7669751481751064, "grad_norm": 0.00038177220267243683, "learning_rate": 4.741072964543958e-08, "loss": 0.0011915108188986778, "step": 2267 }, { "epoch": 3.7686388686700636, "grad_norm": 0.0006360916304402053, "learning_rate": 4.6747405730748765e-08, "loss": 0.0012110350653529167, "step": 2268 }, { "epoch": 3.7703025891650204, "grad_norm": 0.0014272902626544237, "learning_rate": 4.6088711055179426e-08, "loss": 0.0026597613468766212, "step": 2269 }, { "epoch": 3.771966309659977, "grad_norm": 0.0002763984084594995, "learning_rate": 4.543464686167537e-08, "loss": 0.0004325263435021043, "step": 2270 }, { "epoch": 3.773630030154934, "grad_norm": 0.00036100976285524666, "learning_rate": 4.478521438444267e-08, "loss": 0.001015659305267036, "step": 2271 }, { "epoch": 3.7752937506498907, "grad_norm": 0.00018730417650658637, "learning_rate": 4.414041484894743e-08, "loss": 0.00018422273569740355, "step": 2272 }, { "epoch": 3.776957471144848, "grad_norm": 0.0004674112715292722, "learning_rate": 4.3500249471913616e-08, "loss": 0.001334009226411581, "step": 2273 }, { "epoch": 3.7786211916398047, "grad_norm": 0.0006042693858034909, "learning_rate": 4.2864719461321036e-08, "loss": 0.0009468183270655572, "step": 2274 }, { "epoch": 3.7802849121347615, "grad_norm": 0.000509067380335182, "learning_rate": 4.223382601640208e-08, "loss": 0.0006977926241233945, "step": 2275 }, { "epoch": 3.7819486326297183, "grad_norm": 0.0004617350932676345, "learning_rate": 4.160757032764001e-08, "loss": 0.001105066854506731, "step": 2276 }, { "epoch": 3.783612353124675, "grad_norm": 0.0007248525507748127, "learning_rate": 4.098595357676732e-08, "loss": 0.002765103243291378, "step": 2277 }, { "epoch": 3.785276073619632, "grad_norm": 0.0004243562580086291, "learning_rate": 4.036897693676184e-08, "loss": 0.0010327072814106941, "step": 2278 }, { "epoch": 3.7869397941145886, "grad_norm": 0.0010116234188899398, "learning_rate": 3.9756641571847e-08, "loss": 0.0029219533316791058, "step": 2279 }, { "epoch": 3.7886035146095454, "grad_norm": 0.00012117379083065316, "learning_rate": 3.914894863748714e-08, "loss": 0.0004345136694610119, "step": 2280 }, { "epoch": 3.7902672351045026, "grad_norm": 0.0007400370086543262, "learning_rate": 3.854589928038666e-08, "loss": 0.0018152652774006128, "step": 2281 }, { "epoch": 3.7919309555994594, "grad_norm": 0.0004178027156740427, "learning_rate": 3.794749463848835e-08, "loss": 0.0008192003006115556, "step": 2282 }, { "epoch": 3.793594676094416, "grad_norm": 0.00044253907981328666, "learning_rate": 3.735373584096924e-08, "loss": 0.0011070969048887491, "step": 2283 }, { "epoch": 3.795258396589373, "grad_norm": 0.0009567950037308037, "learning_rate": 3.676462400824088e-08, "loss": 0.0016953707672655582, "step": 2284 }, { "epoch": 3.7969221170843297, "grad_norm": 0.0004708846681751311, "learning_rate": 3.618016025194598e-08, "loss": 0.0005484845023602247, "step": 2285 }, { "epoch": 3.798585837579287, "grad_norm": 0.0005545560852624476, "learning_rate": 3.560034567495513e-08, "loss": 0.0006607613759115338, "step": 2286 }, { "epoch": 3.8002495580742437, "grad_norm": 0.0006822629366070032, "learning_rate": 3.5025181371367844e-08, "loss": 0.0019367439672350883, "step": 2287 }, { "epoch": 3.8019132785692005, "grad_norm": 0.0006036330014467239, "learning_rate": 3.4454668426507076e-08, "loss": 0.000651317008305341, "step": 2288 }, { "epoch": 3.8035769990641572, "grad_norm": 0.00024399223912041634, "learning_rate": 3.388880791692001e-08, "loss": 0.0004422190831974149, "step": 2289 }, { "epoch": 3.805240719559114, "grad_norm": 0.0008544662268832326, "learning_rate": 3.33276009103739e-08, "loss": 0.0014506196603178978, "step": 2290 }, { "epoch": 3.806904440054071, "grad_norm": 0.0008960445993579924, "learning_rate": 3.2771048465855546e-08, "loss": 0.0016973534366115928, "step": 2291 }, { "epoch": 3.8085681605490276, "grad_norm": 0.0002961951831821352, "learning_rate": 3.221915163356848e-08, "loss": 0.0009070704109035432, "step": 2292 }, { "epoch": 3.8102318810439844, "grad_norm": 0.0005132612423039973, "learning_rate": 3.167191145493076e-08, "loss": 0.001032351516187191, "step": 2293 }, { "epoch": 3.8118956015389416, "grad_norm": 0.00018734767218120396, "learning_rate": 3.1129328962573865e-08, "loss": 0.00034245982533320785, "step": 2294 }, { "epoch": 3.8135593220338984, "grad_norm": 0.0005684709176421165, "learning_rate": 3.05914051803402e-08, "loss": 0.0010232322383672, "step": 2295 }, { "epoch": 3.815223042528855, "grad_norm": 0.0007894489681348205, "learning_rate": 3.005814112328143e-08, "loss": 0.0005177940474823117, "step": 2296 }, { "epoch": 3.816886763023812, "grad_norm": 0.0005052572232671082, "learning_rate": 2.9529537797656215e-08, "loss": 0.00123726692982018, "step": 2297 }, { "epoch": 3.8185504835187687, "grad_norm": 0.0003198084014002234, "learning_rate": 2.900559620092891e-08, "loss": 0.0007540383376181126, "step": 2298 }, { "epoch": 3.820214204013726, "grad_norm": 0.00037152034929022193, "learning_rate": 2.8486317321766432e-08, "loss": 0.0010864018695428967, "step": 2299 }, { "epoch": 3.8218779245086827, "grad_norm": 0.0003372343198861927, "learning_rate": 2.797170214003775e-08, "loss": 0.0008511084015481174, "step": 2300 }, { "epoch": 3.8218779245086827, "eval_loss": 0.0017695488641038537, "eval_runtime": 490.2862, "eval_samples_per_second": 9.807, "eval_steps_per_second": 2.452, "step": 2300 }, { "epoch": 3.8235416450036395, "grad_norm": 0.0007120806840248406, "learning_rate": 2.7461751626811916e-08, "loss": 0.0017852610908448696, "step": 2301 }, { "epoch": 3.8252053654985962, "grad_norm": 0.0023090168833732605, "learning_rate": 2.6956466744355315e-08, "loss": 0.0010553663596510887, "step": 2302 }, { "epoch": 3.826869085993553, "grad_norm": 0.000529763288795948, "learning_rate": 2.6455848446130526e-08, "loss": 0.0010059673804789782, "step": 2303 }, { "epoch": 3.82853280648851, "grad_norm": 0.0005984203307889402, "learning_rate": 2.5959897676794134e-08, "loss": 0.0006512810941785574, "step": 2304 }, { "epoch": 3.8301965269834666, "grad_norm": 0.0004964310792274773, "learning_rate": 2.546861537219586e-08, "loss": 0.0004773701657541096, "step": 2305 }, { "epoch": 3.8318602474784234, "grad_norm": 0.0003719302185345441, "learning_rate": 2.4982002459375265e-08, "loss": 0.0005666849319823086, "step": 2306 }, { "epoch": 3.8335239679733806, "grad_norm": 0.0004134979972150177, "learning_rate": 2.450005985656173e-08, "loss": 0.0005266037187539041, "step": 2307 }, { "epoch": 3.8351876884683374, "grad_norm": 0.0006398731493391097, "learning_rate": 2.4022788473170853e-08, "loss": 0.0011300542391836643, "step": 2308 }, { "epoch": 3.836851408963294, "grad_norm": 0.0009114894201047719, "learning_rate": 2.355018920980501e-08, "loss": 0.0019518604967743158, "step": 2309 }, { "epoch": 3.838515129458251, "grad_norm": 0.0005688403616659343, "learning_rate": 2.308226295824917e-08, "loss": 0.00036758609348908067, "step": 2310 }, { "epoch": 3.840178849953208, "grad_norm": 0.0004458473122213036, "learning_rate": 2.2619010601470925e-08, "loss": 0.0006280520465224981, "step": 2311 }, { "epoch": 3.841842570448165, "grad_norm": 0.00043085412471555173, "learning_rate": 2.2160433013618533e-08, "loss": 0.0002646467764861882, "step": 2312 }, { "epoch": 3.8435062909431217, "grad_norm": 0.00022731930948793888, "learning_rate": 2.170653106001841e-08, "loss": 0.0002789350401144475, "step": 2313 }, { "epoch": 3.8451700114380785, "grad_norm": 0.0002457225928083062, "learning_rate": 2.1257305597175428e-08, "loss": 0.0006970633985474706, "step": 2314 }, { "epoch": 3.8468337319330352, "grad_norm": 0.00033074014936573803, "learning_rate": 2.0812757472768175e-08, "loss": 0.0007895432645455003, "step": 2315 }, { "epoch": 3.848497452427992, "grad_norm": 0.0004209664766676724, "learning_rate": 2.037288752565064e-08, "loss": 0.0005390758742578328, "step": 2316 }, { "epoch": 3.850161172922949, "grad_norm": 0.0006175689049996436, "learning_rate": 1.99376965858486e-08, "loss": 0.00044948633876629174, "step": 2317 }, { "epoch": 3.8518248934179056, "grad_norm": 0.0005152714438736439, "learning_rate": 1.9507185474558765e-08, "loss": 0.00044237388647161424, "step": 2318 }, { "epoch": 3.853488613912863, "grad_norm": 0.00018733202887233347, "learning_rate": 1.908135500414743e-08, "loss": 0.0006890572840347886, "step": 2319 }, { "epoch": 3.8551523344078196, "grad_norm": 0.0010485963430255651, "learning_rate": 1.866020597814766e-08, "loss": 0.0014118121471256018, "step": 2320 }, { "epoch": 3.8568160549027763, "grad_norm": 0.0004583117552101612, "learning_rate": 1.8243739191259603e-08, "loss": 0.0010312426602467895, "step": 2321 }, { "epoch": 3.858479775397733, "grad_norm": 0.00041788682574406266, "learning_rate": 1.7831955429348235e-08, "loss": 0.0004907081602141261, "step": 2322 }, { "epoch": 3.86014349589269, "grad_norm": 0.0006742252153344452, "learning_rate": 1.7424855469440617e-08, "loss": 0.00199217745102942, "step": 2323 }, { "epoch": 3.861807216387647, "grad_norm": 0.0006253820611163974, "learning_rate": 1.7022440079726976e-08, "loss": 0.0010176701471209526, "step": 2324 }, { "epoch": 3.863470936882604, "grad_norm": 0.0006420851568691432, "learning_rate": 1.6624710019556844e-08, "loss": 0.000928575056605041, "step": 2325 }, { "epoch": 3.8651346573775607, "grad_norm": 0.0007003938080742955, "learning_rate": 1.623166603943932e-08, "loss": 0.0018214373849332333, "step": 2326 }, { "epoch": 3.8667983778725175, "grad_norm": 0.0003563383943401277, "learning_rate": 1.584330888104002e-08, "loss": 0.00042250618571415544, "step": 2327 }, { "epoch": 3.8684620983674742, "grad_norm": 0.0006026191986165941, "learning_rate": 1.5459639277181637e-08, "loss": 0.0012118013110011816, "step": 2328 }, { "epoch": 3.870125818862431, "grad_norm": 0.0019928766414523125, "learning_rate": 1.508065795184116e-08, "loss": 0.001427660696208477, "step": 2329 }, { "epoch": 3.871789539357388, "grad_norm": 0.0011689052917063236, "learning_rate": 1.4706365620149043e-08, "loss": 0.0008310202974826097, "step": 2330 }, { "epoch": 3.8734532598523446, "grad_norm": 0.00027448718901723623, "learning_rate": 1.433676298838671e-08, "loss": 0.0003550715046003461, "step": 2331 }, { "epoch": 3.875116980347302, "grad_norm": 0.00040693397750146687, "learning_rate": 1.3971850753987936e-08, "loss": 0.0009889578213915229, "step": 2332 }, { "epoch": 3.8767807008422586, "grad_norm": 0.00041427675751037896, "learning_rate": 1.3611629605534139e-08, "loss": 0.000923509243875742, "step": 2333 }, { "epoch": 3.8784444213372153, "grad_norm": 0.0012977409642189741, "learning_rate": 1.325610022275603e-08, "loss": 0.0011391546577215195, "step": 2334 }, { "epoch": 3.880108141832172, "grad_norm": 0.0003560408076737076, "learning_rate": 1.29052632765303e-08, "loss": 0.00028428417863324285, "step": 2335 }, { "epoch": 3.881771862327129, "grad_norm": 0.0002894706267397851, "learning_rate": 1.2559119428879607e-08, "loss": 0.0006873887032270432, "step": 2336 }, { "epoch": 3.883435582822086, "grad_norm": 0.0006624959642067552, "learning_rate": 1.2217669332970084e-08, "loss": 0.0011978786205872893, "step": 2337 }, { "epoch": 3.885099303317043, "grad_norm": 0.0004509827704168856, "learning_rate": 1.1880913633111335e-08, "loss": 0.0003517848963383585, "step": 2338 }, { "epoch": 3.8867630238119997, "grad_norm": 0.0004664452571887523, "learning_rate": 1.1548852964755053e-08, "loss": 0.0006236121407710016, "step": 2339 }, { "epoch": 3.8884267443069565, "grad_norm": 0.0010952012380585074, "learning_rate": 1.122148795449307e-08, "loss": 0.001222100923769176, "step": 2340 }, { "epoch": 3.8900904648019132, "grad_norm": 0.0004181310359854251, "learning_rate": 1.0898819220056811e-08, "loss": 0.0012167840031906962, "step": 2341 }, { "epoch": 3.89175418529687, "grad_norm": 0.0009198611951433122, "learning_rate": 1.058084737031534e-08, "loss": 0.001886456273496151, "step": 2342 }, { "epoch": 3.893417905791827, "grad_norm": 0.0006522826733998954, "learning_rate": 1.0267573005275645e-08, "loss": 0.002321903593838215, "step": 2343 }, { "epoch": 3.8950816262867836, "grad_norm": 0.0005509674083441496, "learning_rate": 9.95899671607986e-09, "loss": 0.0020275390706956387, "step": 2344 }, { "epoch": 3.896745346781741, "grad_norm": 0.0004306949267629534, "learning_rate": 9.655119085005827e-09, "loss": 0.0005395016050897539, "step": 2345 }, { "epoch": 3.8984090672766976, "grad_norm": 0.0005223198095336556, "learning_rate": 9.355940685464305e-09, "loss": 0.0008524530567228794, "step": 2346 }, { "epoch": 3.9000727877716543, "grad_norm": 0.0007850914262235165, "learning_rate": 9.061462081999262e-09, "loss": 0.001647283905185759, "step": 2347 }, { "epoch": 3.901736508266611, "grad_norm": 0.0004000430053565651, "learning_rate": 8.771683830285649e-09, "loss": 0.0007437191670760512, "step": 2348 }, { "epoch": 3.903400228761568, "grad_norm": 0.0003827679029200226, "learning_rate": 8.486606477129677e-09, "loss": 0.00033460953272879124, "step": 2349 }, { "epoch": 3.905063949256525, "grad_norm": 0.0010400597238913178, "learning_rate": 8.206230560466322e-09, "loss": 0.001289241947233677, "step": 2350 }, { "epoch": 3.906727669751482, "grad_norm": 0.0002617063873913139, "learning_rate": 7.930556609359596e-09, "loss": 0.00028740576817654073, "step": 2351 }, { "epoch": 3.9083913902464387, "grad_norm": 0.00038715035771019757, "learning_rate": 7.659585144000892e-09, "loss": 0.0006691685412079096, "step": 2352 }, { "epoch": 3.9100551107413954, "grad_norm": 0.00024767228751443326, "learning_rate": 7.393316675707584e-09, "loss": 0.0009464296163059771, "step": 2353 }, { "epoch": 3.9117188312363522, "grad_norm": 0.0005090117338113487, "learning_rate": 7.131751706923595e-09, "loss": 0.001270594191737473, "step": 2354 }, { "epoch": 3.913382551731309, "grad_norm": 0.0008436394273303449, "learning_rate": 6.8748907312163325e-09, "loss": 0.0014107085298746824, "step": 2355 }, { "epoch": 3.9150462722262658, "grad_norm": 0.00042344772373326123, "learning_rate": 6.622734233277528e-09, "loss": 0.00029759103199467063, "step": 2356 }, { "epoch": 3.916709992721223, "grad_norm": 0.0011615968542173505, "learning_rate": 6.375282688921569e-09, "loss": 0.0027657081373035908, "step": 2357 }, { "epoch": 3.9183737132161798, "grad_norm": 0.0006488868384622037, "learning_rate": 6.132536565084945e-09, "loss": 0.0007090591825544834, "step": 2358 }, { "epoch": 3.9200374337111366, "grad_norm": 0.00039018920506350696, "learning_rate": 5.894496319824306e-09, "loss": 0.0009499025763943791, "step": 2359 }, { "epoch": 3.9217011542060933, "grad_norm": 0.0005614451947622001, "learning_rate": 5.661162402316733e-09, "loss": 0.0015096960123628378, "step": 2360 }, { "epoch": 3.92336487470105, "grad_norm": 0.0004525469848886132, "learning_rate": 5.432535252859472e-09, "loss": 0.0007028505788184702, "step": 2361 }, { "epoch": 3.9250285951960073, "grad_norm": 0.000415037851780653, "learning_rate": 5.208615302866593e-09, "loss": 0.0008837031782604754, "step": 2362 }, { "epoch": 3.926692315690964, "grad_norm": 0.0017863917164504528, "learning_rate": 4.989402974871216e-09, "loss": 0.0014681301545351744, "step": 2363 }, { "epoch": 3.928356036185921, "grad_norm": 0.000572762219235301, "learning_rate": 4.774898682522455e-09, "loss": 0.0012490102089941502, "step": 2364 }, { "epoch": 3.9300197566808777, "grad_norm": 0.0006480145384557545, "learning_rate": 4.565102830585699e-09, "loss": 0.000560228421818465, "step": 2365 }, { "epoch": 3.9316834771758344, "grad_norm": 0.0008436389616690576, "learning_rate": 4.360015814941498e-09, "loss": 0.0010321622248739004, "step": 2366 }, { "epoch": 3.933347197670791, "grad_norm": 0.0009871098445728421, "learning_rate": 4.159638022585011e-09, "loss": 0.001806100714020431, "step": 2367 }, { "epoch": 3.935010918165748, "grad_norm": 0.0009362440323457122, "learning_rate": 3.96396983162517e-09, "loss": 0.0003677202039398253, "step": 2368 }, { "epoch": 3.9366746386607048, "grad_norm": 0.0002989772765431553, "learning_rate": 3.773011611284128e-09, "loss": 0.0011360242497175932, "step": 2369 }, { "epoch": 3.938338359155662, "grad_norm": 0.0009438424604013562, "learning_rate": 3.586763721896147e-09, "loss": 0.0016448276583105326, "step": 2370 }, { "epoch": 3.9400020796506188, "grad_norm": 0.0011332824360579252, "learning_rate": 3.4052265149070453e-09, "loss": 0.0027621465269476175, "step": 2371 }, { "epoch": 3.9416658001455755, "grad_norm": 0.0005722562782466412, "learning_rate": 3.2284003328744706e-09, "loss": 0.0005765999085269868, "step": 2372 }, { "epoch": 3.9433295206405323, "grad_norm": 0.00041134137427434325, "learning_rate": 3.056285509465684e-09, "loss": 0.00039809662848711014, "step": 2373 }, { "epoch": 3.944993241135489, "grad_norm": 0.002205124357715249, "learning_rate": 2.888882369457835e-09, "loss": 0.0011762338690459728, "step": 2374 }, { "epoch": 3.9466569616304463, "grad_norm": 0.0006662149680778384, "learning_rate": 2.726191228737407e-09, "loss": 0.0012037127744406462, "step": 2375 }, { "epoch": 3.948320682125403, "grad_norm": 0.00028259915416128933, "learning_rate": 2.5682123942993852e-09, "loss": 0.0002108765474986285, "step": 2376 }, { "epoch": 3.94998440262036, "grad_norm": 0.00035649127676151693, "learning_rate": 2.414946164246701e-09, "loss": 0.0012281450908631086, "step": 2377 }, { "epoch": 3.9516481231153167, "grad_norm": 0.0008716229349374771, "learning_rate": 2.2663928277896763e-09, "loss": 0.0019710322376340628, "step": 2378 }, { "epoch": 3.9533118436102734, "grad_norm": 0.00026112596970051527, "learning_rate": 2.122552665245747e-09, "loss": 0.0003657359629869461, "step": 2379 }, { "epoch": 3.95497556410523, "grad_norm": 0.0003548185050021857, "learning_rate": 1.9834259480380756e-09, "loss": 0.00039860629476606846, "step": 2380 }, { "epoch": 3.956639284600187, "grad_norm": 0.0009071942768059671, "learning_rate": 1.8490129386963818e-09, "loss": 0.00256812060251832, "step": 2381 }, { "epoch": 3.9583030050951438, "grad_norm": 0.00014368511619977653, "learning_rate": 1.719313890855001e-09, "loss": 0.000299682782497257, "step": 2382 }, { "epoch": 3.959966725590101, "grad_norm": 0.0008304541115649045, "learning_rate": 1.5943290492539953e-09, "loss": 0.0019825128838419914, "step": 2383 }, { "epoch": 3.9616304460850578, "grad_norm": 0.0004863161302637309, "learning_rate": 1.4740586497366538e-09, "loss": 0.0006778284441679716, "step": 2384 }, { "epoch": 3.9632941665800145, "grad_norm": 0.00045845622662454844, "learning_rate": 1.358502919251159e-09, "loss": 0.0009040733566507697, "step": 2385 }, { "epoch": 3.9649578870749713, "grad_norm": 0.0006530190003104508, "learning_rate": 1.247662075848921e-09, "loss": 0.0013933960581198335, "step": 2386 }, { "epoch": 3.966621607569928, "grad_norm": 0.00040783261647447944, "learning_rate": 1.1415363286843007e-09, "loss": 0.0008399755461141467, "step": 2387 }, { "epoch": 3.9682853280648853, "grad_norm": 0.0008198352297767997, "learning_rate": 1.0401258780146084e-09, "loss": 0.001252037356607616, "step": 2388 }, { "epoch": 3.969949048559842, "grad_norm": 0.0005774135352112353, "learning_rate": 9.434309151992727e-10, "loss": 0.001550138695165515, "step": 2389 }, { "epoch": 3.971612769054799, "grad_norm": 0.001226370339281857, "learning_rate": 8.514516226998393e-10, "loss": 0.001360030728392303, "step": 2390 }, { "epoch": 3.9732764895497557, "grad_norm": 0.0003287496219854802, "learning_rate": 7.641881740794166e-10, "loss": 0.00078690325608477, "step": 2391 }, { "epoch": 3.9749402100447124, "grad_norm": 0.0006232394371181726, "learning_rate": 6.816407340023978e-10, "loss": 0.0008975286036729813, "step": 2392 }, { "epoch": 3.976603930539669, "grad_norm": 0.000568083138205111, "learning_rate": 6.03809458233906e-10, "loss": 0.0012704274849966168, "step": 2393 }, { "epoch": 3.978267651034626, "grad_norm": 0.00043166911927983165, "learning_rate": 5.306944936406266e-10, "loss": 0.0010767162311822176, "step": 2394 }, { "epoch": 3.9799313715295828, "grad_norm": 0.0003333722706884146, "learning_rate": 4.622959781883096e-10, "loss": 0.0009152543498203158, "step": 2395 }, { "epoch": 3.98159509202454, "grad_norm": 0.0008719568722881377, "learning_rate": 3.9861404094426734e-10, "loss": 0.0020166838075965643, "step": 2396 }, { "epoch": 3.9832588125194968, "grad_norm": 0.00061134371208027, "learning_rate": 3.3964880207459916e-10, "loss": 0.0016927790129557252, "step": 2397 }, { "epoch": 3.9849225330144535, "grad_norm": 0.00016088728443719447, "learning_rate": 2.8540037284557897e-10, "loss": 0.0004834794672206044, "step": 2398 }, { "epoch": 3.9865862535094103, "grad_norm": 0.00041522938408888876, "learning_rate": 2.358688556233779e-10, "loss": 0.0015406750608235598, "step": 2399 }, { "epoch": 3.9882499740043675, "grad_norm": 0.0003348875616211444, "learning_rate": 1.9105434387239886e-10, "loss": 0.0004655818920582533, "step": 2400 }, { "epoch": 3.9882499740043675, "eval_loss": 0.001770942471921444, "eval_runtime": 490.0651, "eval_samples_per_second": 9.811, "eval_steps_per_second": 2.453, "step": 2400 }, { "epoch": 3.9899136944993243, "grad_norm": 0.0005287445383146405, "learning_rate": 1.509569221569418e-10, "loss": 0.0008558098925277591, "step": 2401 }, { "epoch": 3.991577414994281, "grad_norm": 0.000635623000562191, "learning_rate": 1.1557666614037122e-10, "loss": 0.001685410039499402, "step": 2402 }, { "epoch": 3.993241135489238, "grad_norm": 0.001174519187770784, "learning_rate": 8.49136425840058e-11, "loss": 0.0027791480533778667, "step": 2403 }, { "epoch": 3.9949048559841946, "grad_norm": 0.0006644176319241524, "learning_rate": 5.896790934878383e-11, "loss": 0.0005844932748004794, "step": 2404 }, { "epoch": 3.9965685764791514, "grad_norm": 0.0004365496861282736, "learning_rate": 3.7739515393320215e-11, "loss": 0.0005024238489568233, "step": 2405 }, { "epoch": 3.998232296974108, "grad_norm": 0.00023726493236608803, "learning_rate": 2.122850077584948e-11, "loss": 0.00023512027109973133, "step": 2406 }, { "epoch": 3.999896017469065, "grad_norm": 0.0007440268527716398, "learning_rate": 9.434896651727699e-12, "loss": 0.0026000516954809427, "step": 2407 }, { "epoch": 4.0, "grad_norm": 0.0002130132488673553, "learning_rate": 2.358725275652951e-12, "loss": 9.920789307216182e-05, "step": 2408 }, { "epoch": 4.0, "eval_loss": 0.0017681519966572523, "eval_runtime": 491.0341, "eval_samples_per_second": 9.792, "eval_steps_per_second": 2.448, "step": 2408 }, { "epoch": 4.0, "step": 2408, "total_flos": 6.475362492187935e+17, "train_loss": 0.0011171495650682802, "train_runtime": 68951.8007, "train_samples_per_second": 2.231, "train_steps_per_second": 0.035 } ], "logging_steps": 1, "max_steps": 2408, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.475362492187935e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }