{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6584924113449486, "eval_steps": 500, "global_step": 909000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9985712158349534e-05, "loss": 2.2557, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.997133807218206e-05, "loss": 2.049, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.995699273418692e-05, "loss": 2.0487, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9942618648019454e-05, "loss": 2.0357, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9928244561851985e-05, "loss": 2.0048, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.991387047568451e-05, "loss": 1.919, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.989949638951703e-05, "loss": 1.9217, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.988512230334957e-05, "loss": 1.825, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.9870748217182095e-05, "loss": 1.8015, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.985637413101462e-05, "loss": 1.7936, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.984200004484715e-05, "loss": 1.7456, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.9827654706852015e-05, "loss": 1.7414, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.9813280620684546e-05, "loss": 1.728, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.979890653451707e-05, "loss": 1.7282, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.9784532448349594e-05, "loss": 1.7469, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.97702158585268e-05, "loss": 1.6573, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.975584177235932e-05, "loss": 1.7118, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.974146768619185e-05, "loss": 1.6161, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.9727093600024385e-05, "loss": 1.6275, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.971271951385691e-05, "loss": 1.6875, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.9698345427689434e-05, "loss": 1.6544, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.9683971341521965e-05, "loss": 1.6431, "step": 11000 }, { "epoch": 0.05, "learning_rate": 4.9669597255354495e-05, "loss": 1.6536, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.965522316918702e-05, "loss": 1.6471, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.9640877831191884e-05, "loss": 1.5942, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.962650374502441e-05, "loss": 1.6318, "step": 13000 }, { "epoch": 0.05, "learning_rate": 4.9612129658856946e-05, "loss": 1.5797, "step": 13500 }, { "epoch": 0.06, "learning_rate": 4.959775557268947e-05, "loss": 1.5792, "step": 14000 }, { "epoch": 0.06, "learning_rate": 4.9583410234694335e-05, "loss": 1.5361, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.956903614852686e-05, "loss": 1.6009, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.9554690810531724e-05, "loss": 1.5841, "step": 15500 }, { "epoch": 0.06, "learning_rate": 4.954031672436425e-05, "loss": 1.5948, "step": 16000 }, { "epoch": 0.07, "learning_rate": 4.952594263819678e-05, "loss": 1.6366, "step": 16500 }, { "epoch": 0.07, "learning_rate": 4.951156855202931e-05, "loss": 1.5873, "step": 17000 }, { "epoch": 0.07, "learning_rate": 4.9497194465861834e-05, "loss": 1.6046, "step": 17500 }, { "epoch": 0.07, "learning_rate": 4.94828491278667e-05, "loss": 1.6193, "step": 18000 }, { "epoch": 0.07, "learning_rate": 4.946847504169922e-05, "loss": 1.5759, "step": 18500 }, { "epoch": 0.08, "learning_rate": 4.9454100955531754e-05, "loss": 1.5702, "step": 19000 }, { "epoch": 0.08, "learning_rate": 4.9439726869364285e-05, "loss": 1.5179, "step": 19500 }, { "epoch": 0.08, "learning_rate": 4.942535278319681e-05, "loss": 1.5536, "step": 20000 }, { "epoch": 0.08, "learning_rate": 4.941097869702934e-05, "loss": 1.5235, "step": 20500 }, { "epoch": 0.08, "learning_rate": 4.939660461086187e-05, "loss": 1.5325, "step": 21000 }, { "epoch": 0.09, "learning_rate": 4.9382230524694395e-05, "loss": 1.5385, "step": 21500 }, { "epoch": 0.09, "learning_rate": 4.936788518669926e-05, "loss": 1.5189, "step": 22000 }, { "epoch": 0.09, "learning_rate": 4.9353511100531784e-05, "loss": 1.548, "step": 22500 }, { "epoch": 0.09, "learning_rate": 4.9339137014364315e-05, "loss": 1.5116, "step": 23000 }, { "epoch": 0.09, "learning_rate": 4.9324762928196846e-05, "loss": 1.5404, "step": 23500 }, { "epoch": 0.1, "learning_rate": 4.931041759020171e-05, "loss": 1.5425, "step": 24000 }, { "epoch": 0.1, "learning_rate": 4.929607225220657e-05, "loss": 1.5627, "step": 24500 }, { "epoch": 0.1, "learning_rate": 4.92816981660391e-05, "loss": 1.5173, "step": 25000 }, { "epoch": 0.1, "learning_rate": 4.926732407987162e-05, "loss": 1.5033, "step": 25500 }, { "epoch": 0.1, "learning_rate": 4.925294999370415e-05, "loss": 1.4979, "step": 26000 }, { "epoch": 0.11, "learning_rate": 4.9238575907536685e-05, "loss": 1.5482, "step": 26500 }, { "epoch": 0.11, "learning_rate": 4.922420182136921e-05, "loss": 1.5178, "step": 27000 }, { "epoch": 0.11, "learning_rate": 4.920982773520173e-05, "loss": 1.5487, "step": 27500 }, { "epoch": 0.11, "learning_rate": 4.919545364903427e-05, "loss": 1.5682, "step": 28000 }, { "epoch": 0.11, "learning_rate": 4.918110831103913e-05, "loss": 1.5289, "step": 28500 }, { "epoch": 0.12, "learning_rate": 4.916673422487166e-05, "loss": 1.5002, "step": 29000 }, { "epoch": 0.12, "learning_rate": 4.9152360138704184e-05, "loss": 1.5392, "step": 29500 }, { "epoch": 0.12, "learning_rate": 4.913798605253671e-05, "loss": 1.5209, "step": 30000 }, { "epoch": 0.12, "learning_rate": 4.9123611966369246e-05, "loss": 1.4972, "step": 30500 }, { "epoch": 0.12, "learning_rate": 4.910923788020177e-05, "loss": 1.4937, "step": 31000 }, { "epoch": 0.13, "learning_rate": 4.9094863794034294e-05, "loss": 1.4849, "step": 31500 }, { "epoch": 0.13, "learning_rate": 4.9080489707866825e-05, "loss": 1.4916, "step": 32000 }, { "epoch": 0.13, "learning_rate": 4.9066115621699356e-05, "loss": 1.4975, "step": 32500 }, { "epoch": 0.13, "learning_rate": 4.905174153553188e-05, "loss": 1.4753, "step": 33000 }, { "epoch": 0.13, "learning_rate": 4.903742494570908e-05, "loss": 1.4382, "step": 33500 }, { "epoch": 0.14, "learning_rate": 4.902305085954161e-05, "loss": 1.5305, "step": 34000 }, { "epoch": 0.14, "learning_rate": 4.9008676773374134e-05, "loss": 1.4588, "step": 34500 }, { "epoch": 0.14, "learning_rate": 4.8994302687206665e-05, "loss": 1.4831, "step": 35000 }, { "epoch": 0.14, "learning_rate": 4.897995734921152e-05, "loss": 1.5033, "step": 35500 }, { "epoch": 0.14, "learning_rate": 4.896558326304406e-05, "loss": 1.5217, "step": 36000 }, { "epoch": 0.15, "learning_rate": 4.8951209176876584e-05, "loss": 1.4881, "step": 36500 }, { "epoch": 0.15, "learning_rate": 4.893683509070911e-05, "loss": 1.4689, "step": 37000 }, { "epoch": 0.15, "learning_rate": 4.892246100454164e-05, "loss": 1.5241, "step": 37500 }, { "epoch": 0.15, "learning_rate": 4.890808691837417e-05, "loss": 1.5023, "step": 38000 }, { "epoch": 0.15, "learning_rate": 4.8893712832206695e-05, "loss": 1.4979, "step": 38500 }, { "epoch": 0.16, "learning_rate": 4.887933874603922e-05, "loss": 1.4906, "step": 39000 }, { "epoch": 0.16, "learning_rate": 4.8864964659871757e-05, "loss": 1.4677, "step": 39500 }, { "epoch": 0.16, "learning_rate": 4.8850619321876614e-05, "loss": 1.4625, "step": 40000 }, { "epoch": 0.16, "learning_rate": 4.8836245235709145e-05, "loss": 1.4539, "step": 40500 }, { "epoch": 0.17, "learning_rate": 4.882187114954167e-05, "loss": 1.456, "step": 41000 }, { "epoch": 0.17, "learning_rate": 4.8807497063374194e-05, "loss": 1.4562, "step": 41500 }, { "epoch": 0.17, "learning_rate": 4.879315172537906e-05, "loss": 1.4844, "step": 42000 }, { "epoch": 0.17, "learning_rate": 4.877877763921159e-05, "loss": 1.4885, "step": 42500 }, { "epoch": 0.17, "learning_rate": 4.876440355304412e-05, "loss": 1.4746, "step": 43000 }, { "epoch": 0.18, "learning_rate": 4.8750029466876644e-05, "loss": 1.4728, "step": 43500 }, { "epoch": 0.18, "learning_rate": 4.873568412888151e-05, "loss": 1.4834, "step": 44000 }, { "epoch": 0.18, "learning_rate": 4.872131004271403e-05, "loss": 1.4744, "step": 44500 }, { "epoch": 0.18, "learning_rate": 4.8706935956546564e-05, "loss": 1.496, "step": 45000 }, { "epoch": 0.18, "learning_rate": 4.8692561870379095e-05, "loss": 1.4814, "step": 45500 }, { "epoch": 0.19, "learning_rate": 4.867821653238396e-05, "loss": 1.479, "step": 46000 }, { "epoch": 0.19, "learning_rate": 4.8663842446216484e-05, "loss": 1.4949, "step": 46500 }, { "epoch": 0.19, "learning_rate": 4.864946836004901e-05, "loss": 1.4751, "step": 47000 }, { "epoch": 0.19, "learning_rate": 4.8635094273881546e-05, "loss": 1.4633, "step": 47500 }, { "epoch": 0.19, "learning_rate": 4.8620748935886403e-05, "loss": 1.419, "step": 48000 }, { "epoch": 0.2, "learning_rate": 4.8606374849718934e-05, "loss": 1.4838, "step": 48500 }, { "epoch": 0.2, "learning_rate": 4.859200076355146e-05, "loss": 1.4311, "step": 49000 }, { "epoch": 0.2, "learning_rate": 4.857765542555632e-05, "loss": 1.5105, "step": 49500 }, { "epoch": 0.2, "learning_rate": 4.856328133938885e-05, "loss": 1.4417, "step": 50000 }, { "epoch": 0.2, "learning_rate": 4.854890725322138e-05, "loss": 1.4828, "step": 50500 }, { "epoch": 0.21, "learning_rate": 4.853453316705391e-05, "loss": 1.4745, "step": 51000 }, { "epoch": 0.21, "learning_rate": 4.8520159080886433e-05, "loss": 1.4476, "step": 51500 }, { "epoch": 0.21, "learning_rate": 4.8505784994718964e-05, "loss": 1.4907, "step": 52000 }, { "epoch": 0.21, "learning_rate": 4.849143965672382e-05, "loss": 1.4435, "step": 52500 }, { "epoch": 0.21, "learning_rate": 4.847706557055635e-05, "loss": 1.4265, "step": 53000 }, { "epoch": 0.22, "learning_rate": 4.8462691484388884e-05, "loss": 1.4731, "step": 53500 }, { "epoch": 0.22, "learning_rate": 4.844831739822141e-05, "loss": 1.4475, "step": 54000 }, { "epoch": 0.22, "learning_rate": 4.843394331205394e-05, "loss": 1.4976, "step": 54500 }, { "epoch": 0.22, "learning_rate": 4.841956922588647e-05, "loss": 1.4172, "step": 55000 }, { "epoch": 0.22, "learning_rate": 4.8405223887891335e-05, "loss": 1.4651, "step": 55500 }, { "epoch": 0.23, "learning_rate": 4.839084980172386e-05, "loss": 1.4469, "step": 56000 }, { "epoch": 0.23, "learning_rate": 4.837647571555638e-05, "loss": 1.4732, "step": 56500 }, { "epoch": 0.23, "learning_rate": 4.8362101629388914e-05, "loss": 1.4417, "step": 57000 }, { "epoch": 0.23, "learning_rate": 4.8347727543221445e-05, "loss": 1.4229, "step": 57500 }, { "epoch": 0.23, "learning_rate": 4.833335345705397e-05, "loss": 1.4677, "step": 58000 }, { "epoch": 0.24, "learning_rate": 4.83189793708865e-05, "loss": 1.4511, "step": 58500 }, { "epoch": 0.24, "learning_rate": 4.830460528471903e-05, "loss": 1.4449, "step": 59000 }, { "epoch": 0.24, "learning_rate": 4.8290231198551555e-05, "loss": 1.4342, "step": 59500 }, { "epoch": 0.24, "learning_rate": 4.827588586055642e-05, "loss": 1.4212, "step": 60000 }, { "epoch": 0.24, "learning_rate": 4.8261511774388944e-05, "loss": 1.4428, "step": 60500 }, { "epoch": 0.25, "learning_rate": 4.824716643639381e-05, "loss": 1.4625, "step": 61000 }, { "epoch": 0.25, "learning_rate": 4.823279235022633e-05, "loss": 1.4529, "step": 61500 }, { "epoch": 0.25, "learning_rate": 4.8218418264058864e-05, "loss": 1.4375, "step": 62000 }, { "epoch": 0.25, "learning_rate": 4.8204044177891395e-05, "loss": 1.4338, "step": 62500 }, { "epoch": 0.25, "learning_rate": 4.818967009172392e-05, "loss": 1.4556, "step": 63000 }, { "epoch": 0.26, "learning_rate": 4.817529600555645e-05, "loss": 1.4141, "step": 63500 }, { "epoch": 0.26, "learning_rate": 4.816092191938898e-05, "loss": 1.4264, "step": 64000 }, { "epoch": 0.26, "learning_rate": 4.8146547833221505e-05, "loss": 1.4223, "step": 64500 }, { "epoch": 0.26, "learning_rate": 4.813220249522637e-05, "loss": 1.4082, "step": 65000 }, { "epoch": 0.26, "learning_rate": 4.8117828409058894e-05, "loss": 1.4546, "step": 65500 }, { "epoch": 0.27, "learning_rate": 4.8103454322891425e-05, "loss": 1.4737, "step": 66000 }, { "epoch": 0.27, "learning_rate": 4.808910898489629e-05, "loss": 1.4299, "step": 66500 }, { "epoch": 0.27, "learning_rate": 4.807473489872882e-05, "loss": 1.4541, "step": 67000 }, { "epoch": 0.27, "learning_rate": 4.8060360812561344e-05, "loss": 1.444, "step": 67500 }, { "epoch": 0.27, "learning_rate": 4.804598672639387e-05, "loss": 1.4292, "step": 68000 }, { "epoch": 0.28, "learning_rate": 4.80316126402264e-05, "loss": 1.3933, "step": 68500 }, { "epoch": 0.28, "learning_rate": 4.801723855405893e-05, "loss": 1.4456, "step": 69000 }, { "epoch": 0.28, "learning_rate": 4.8002864467891455e-05, "loss": 1.4617, "step": 69500 }, { "epoch": 0.28, "learning_rate": 4.7988490381723986e-05, "loss": 1.4573, "step": 70000 }, { "epoch": 0.28, "learning_rate": 4.7974145043728843e-05, "loss": 1.4704, "step": 70500 }, { "epoch": 0.29, "learning_rate": 4.7959770957561374e-05, "loss": 1.4401, "step": 71000 }, { "epoch": 0.29, "learning_rate": 4.794542561956624e-05, "loss": 1.4131, "step": 71500 }, { "epoch": 0.29, "learning_rate": 4.793105153339877e-05, "loss": 1.4041, "step": 72000 }, { "epoch": 0.29, "learning_rate": 4.7916677447231294e-05, "loss": 1.4603, "step": 72500 }, { "epoch": 0.29, "learning_rate": 4.7902303361063825e-05, "loss": 1.444, "step": 73000 }, { "epoch": 0.3, "learning_rate": 4.788792927489635e-05, "loss": 1.4544, "step": 73500 }, { "epoch": 0.3, "learning_rate": 4.787355518872888e-05, "loss": 1.4553, "step": 74000 }, { "epoch": 0.3, "learning_rate": 4.7859181102561404e-05, "loss": 1.417, "step": 74500 }, { "epoch": 0.3, "learning_rate": 4.784483576456627e-05, "loss": 1.4277, "step": 75000 }, { "epoch": 0.3, "learning_rate": 4.78304616783988e-05, "loss": 1.4746, "step": 75500 }, { "epoch": 0.31, "learning_rate": 4.781608759223133e-05, "loss": 1.4245, "step": 76000 }, { "epoch": 0.31, "learning_rate": 4.7801713506063855e-05, "loss": 1.429, "step": 76500 }, { "epoch": 0.31, "learning_rate": 4.778733941989638e-05, "loss": 1.39, "step": 77000 }, { "epoch": 0.31, "learning_rate": 4.777296533372891e-05, "loss": 1.4128, "step": 77500 }, { "epoch": 0.31, "learning_rate": 4.775859124756144e-05, "loss": 1.4054, "step": 78000 }, { "epoch": 0.32, "learning_rate": 4.7744217161393965e-05, "loss": 1.4737, "step": 78500 }, { "epoch": 0.32, "learning_rate": 4.772987182339883e-05, "loss": 1.4217, "step": 79000 }, { "epoch": 0.32, "learning_rate": 4.7715497737231354e-05, "loss": 1.4723, "step": 79500 }, { "epoch": 0.32, "learning_rate": 4.770115239923622e-05, "loss": 1.4435, "step": 80000 }, { "epoch": 0.32, "learning_rate": 4.768677831306875e-05, "loss": 1.438, "step": 80500 }, { "epoch": 0.33, "learning_rate": 4.767240422690128e-05, "loss": 1.4572, "step": 81000 }, { "epoch": 0.33, "learning_rate": 4.7658030140733805e-05, "loss": 1.3813, "step": 81500 }, { "epoch": 0.33, "learning_rate": 4.7643656054566336e-05, "loss": 1.45, "step": 82000 }, { "epoch": 0.33, "learning_rate": 4.7629310716571193e-05, "loss": 1.4768, "step": 82500 }, { "epoch": 0.33, "learning_rate": 4.7614936630403724e-05, "loss": 1.3856, "step": 83000 }, { "epoch": 0.34, "learning_rate": 4.7600562544236255e-05, "loss": 1.4351, "step": 83500 }, { "epoch": 0.34, "learning_rate": 4.758618845806878e-05, "loss": 1.4182, "step": 84000 }, { "epoch": 0.34, "learning_rate": 4.757181437190131e-05, "loss": 1.4368, "step": 84500 }, { "epoch": 0.34, "learning_rate": 4.7557440285733835e-05, "loss": 1.4184, "step": 85000 }, { "epoch": 0.34, "learning_rate": 4.7543066199566366e-05, "loss": 1.432, "step": 85500 }, { "epoch": 0.35, "learning_rate": 4.752869211339889e-05, "loss": 1.4598, "step": 86000 }, { "epoch": 0.35, "learning_rate": 4.7514346775403754e-05, "loss": 1.4246, "step": 86500 }, { "epoch": 0.35, "learning_rate": 4.7499972689236285e-05, "loss": 1.4359, "step": 87000 }, { "epoch": 0.35, "learning_rate": 4.7485598603068816e-05, "loss": 1.4266, "step": 87500 }, { "epoch": 0.35, "learning_rate": 4.747122451690134e-05, "loss": 1.4182, "step": 88000 }, { "epoch": 0.36, "learning_rate": 4.7456879178906205e-05, "loss": 1.4571, "step": 88500 }, { "epoch": 0.36, "learning_rate": 4.744250509273873e-05, "loss": 1.4457, "step": 89000 }, { "epoch": 0.36, "learning_rate": 4.742813100657126e-05, "loss": 1.4522, "step": 89500 }, { "epoch": 0.36, "learning_rate": 4.741375692040379e-05, "loss": 1.4193, "step": 90000 }, { "epoch": 0.36, "learning_rate": 4.7399382834236315e-05, "loss": 1.3954, "step": 90500 }, { "epoch": 0.37, "learning_rate": 4.7385008748068846e-05, "loss": 1.4555, "step": 91000 }, { "epoch": 0.37, "learning_rate": 4.7370663410073704e-05, "loss": 1.3915, "step": 91500 }, { "epoch": 0.37, "learning_rate": 4.7356289323906235e-05, "loss": 1.4263, "step": 92000 }, { "epoch": 0.37, "learning_rate": 4.7341915237738766e-05, "loss": 1.4067, "step": 92500 }, { "epoch": 0.37, "learning_rate": 4.732754115157129e-05, "loss": 1.4035, "step": 93000 }, { "epoch": 0.38, "learning_rate": 4.7313195813576155e-05, "loss": 1.4041, "step": 93500 }, { "epoch": 0.38, "learning_rate": 4.729882172740868e-05, "loss": 1.4253, "step": 94000 }, { "epoch": 0.38, "learning_rate": 4.728444764124121e-05, "loss": 1.4418, "step": 94500 }, { "epoch": 0.38, "learning_rate": 4.727007355507374e-05, "loss": 1.4457, "step": 95000 }, { "epoch": 0.38, "learning_rate": 4.7255728217078605e-05, "loss": 1.4415, "step": 95500 }, { "epoch": 0.39, "learning_rate": 4.724135413091113e-05, "loss": 1.4302, "step": 96000 }, { "epoch": 0.39, "learning_rate": 4.722698004474366e-05, "loss": 1.4659, "step": 96500 }, { "epoch": 0.39, "learning_rate": 4.7212605958576185e-05, "loss": 1.411, "step": 97000 }, { "epoch": 0.39, "learning_rate": 4.7198231872408716e-05, "loss": 1.4336, "step": 97500 }, { "epoch": 0.39, "learning_rate": 4.718385778624124e-05, "loss": 1.3472, "step": 98000 }, { "epoch": 0.4, "learning_rate": 4.716948370007377e-05, "loss": 1.4557, "step": 98500 }, { "epoch": 0.4, "learning_rate": 4.71551096139063e-05, "loss": 1.3925, "step": 99000 }, { "epoch": 0.4, "learning_rate": 4.714076427591116e-05, "loss": 1.3936, "step": 99500 }, { "epoch": 0.4, "learning_rate": 4.712639018974369e-05, "loss": 1.427, "step": 100000 }, { "epoch": 0.4, "learning_rate": 4.7112016103576215e-05, "loss": 1.4013, "step": 100500 }, { "epoch": 0.41, "learning_rate": 4.7097642017408746e-05, "loss": 1.4052, "step": 101000 }, { "epoch": 0.41, "learning_rate": 4.708329667941361e-05, "loss": 1.4341, "step": 101500 }, { "epoch": 0.41, "learning_rate": 4.7068922593246134e-05, "loss": 1.3709, "step": 102000 }, { "epoch": 0.41, "learning_rate": 4.7054548507078665e-05, "loss": 1.3834, "step": 102500 }, { "epoch": 0.41, "learning_rate": 4.704017442091119e-05, "loss": 1.4163, "step": 103000 }, { "epoch": 0.42, "learning_rate": 4.702580033474372e-05, "loss": 1.4131, "step": 103500 }, { "epoch": 0.42, "learning_rate": 4.701142624857625e-05, "loss": 1.4448, "step": 104000 }, { "epoch": 0.42, "learning_rate": 4.699708091058111e-05, "loss": 1.4091, "step": 104500 }, { "epoch": 0.42, "learning_rate": 4.698270682441364e-05, "loss": 1.3743, "step": 105000 }, { "epoch": 0.42, "learning_rate": 4.6968361486418505e-05, "loss": 1.4378, "step": 105500 }, { "epoch": 0.43, "learning_rate": 4.695398740025103e-05, "loss": 1.3978, "step": 106000 }, { "epoch": 0.43, "learning_rate": 4.693961331408356e-05, "loss": 1.4279, "step": 106500 }, { "epoch": 0.43, "learning_rate": 4.692523922791609e-05, "loss": 1.4038, "step": 107000 }, { "epoch": 0.43, "learning_rate": 4.691089388992095e-05, "loss": 1.3691, "step": 107500 }, { "epoch": 0.43, "learning_rate": 4.689651980375348e-05, "loss": 1.4098, "step": 108000 }, { "epoch": 0.44, "learning_rate": 4.6882145717586004e-05, "loss": 1.4601, "step": 108500 }, { "epoch": 0.44, "learning_rate": 4.6867771631418535e-05, "loss": 1.4051, "step": 109000 }, { "epoch": 0.44, "learning_rate": 4.6853397545251066e-05, "loss": 1.3787, "step": 109500 }, { "epoch": 0.44, "learning_rate": 4.683902345908359e-05, "loss": 1.3862, "step": 110000 }, { "epoch": 0.44, "learning_rate": 4.682464937291612e-05, "loss": 1.3977, "step": 110500 }, { "epoch": 0.45, "learning_rate": 4.6810275286748645e-05, "loss": 1.4151, "step": 111000 }, { "epoch": 0.45, "learning_rate": 4.679592994875351e-05, "loss": 1.4034, "step": 111500 }, { "epoch": 0.45, "learning_rate": 4.678155586258604e-05, "loss": 1.3965, "step": 112000 }, { "epoch": 0.45, "learning_rate": 4.6767181776418565e-05, "loss": 1.4523, "step": 112500 }, { "epoch": 0.45, "learning_rate": 4.6752807690251096e-05, "loss": 1.4077, "step": 113000 }, { "epoch": 0.46, "learning_rate": 4.673843360408362e-05, "loss": 1.4365, "step": 113500 }, { "epoch": 0.46, "learning_rate": 4.672405951791615e-05, "loss": 1.371, "step": 114000 }, { "epoch": 0.46, "learning_rate": 4.670968543174868e-05, "loss": 1.3935, "step": 114500 }, { "epoch": 0.46, "learning_rate": 4.6695311345581206e-05, "loss": 1.4184, "step": 115000 }, { "epoch": 0.46, "learning_rate": 4.668096600758607e-05, "loss": 1.394, "step": 115500 }, { "epoch": 0.47, "learning_rate": 4.6666591921418595e-05, "loss": 1.3801, "step": 116000 }, { "epoch": 0.47, "learning_rate": 4.6652217835251126e-05, "loss": 1.3832, "step": 116500 }, { "epoch": 0.47, "learning_rate": 4.663784374908366e-05, "loss": 1.4032, "step": 117000 }, { "epoch": 0.47, "learning_rate": 4.662346966291618e-05, "loss": 1.4205, "step": 117500 }, { "epoch": 0.47, "learning_rate": 4.660909557674871e-05, "loss": 1.3854, "step": 118000 }, { "epoch": 0.48, "learning_rate": 4.6594750238753576e-05, "loss": 1.3982, "step": 118500 }, { "epoch": 0.48, "learning_rate": 4.65803761525861e-05, "loss": 1.3992, "step": 119000 }, { "epoch": 0.48, "learning_rate": 4.656600206641863e-05, "loss": 1.348, "step": 119500 }, { "epoch": 0.48, "learning_rate": 4.6551627980251156e-05, "loss": 1.4251, "step": 120000 }, { "epoch": 0.48, "learning_rate": 4.653725389408369e-05, "loss": 1.3713, "step": 120500 }, { "epoch": 0.49, "learning_rate": 4.652290855608855e-05, "loss": 1.4152, "step": 121000 }, { "epoch": 0.49, "learning_rate": 4.6508534469921075e-05, "loss": 1.4396, "step": 121500 }, { "epoch": 0.49, "learning_rate": 4.6494160383753606e-05, "loss": 1.412, "step": 122000 }, { "epoch": 0.49, "learning_rate": 4.647978629758613e-05, "loss": 1.4279, "step": 122500 }, { "epoch": 0.5, "learning_rate": 4.646541221141866e-05, "loss": 1.4773, "step": 123000 }, { "epoch": 0.5, "learning_rate": 4.645103812525119e-05, "loss": 1.4025, "step": 123500 }, { "epoch": 0.5, "learning_rate": 4.643666403908372e-05, "loss": 1.4126, "step": 124000 }, { "epoch": 0.5, "learning_rate": 4.642228995291625e-05, "loss": 1.3657, "step": 124500 }, { "epoch": 0.5, "learning_rate": 4.6407944614921105e-05, "loss": 1.4284, "step": 125000 }, { "epoch": 0.51, "learning_rate": 4.639359927692597e-05, "loss": 1.4351, "step": 125500 }, { "epoch": 0.51, "learning_rate": 4.63792251907585e-05, "loss": 1.4265, "step": 126000 }, { "epoch": 0.51, "learning_rate": 4.6364851104591025e-05, "loss": 1.3681, "step": 126500 }, { "epoch": 0.51, "learning_rate": 4.6350477018423556e-05, "loss": 1.3806, "step": 127000 }, { "epoch": 0.51, "learning_rate": 4.633610293225608e-05, "loss": 1.3462, "step": 127500 }, { "epoch": 0.52, "learning_rate": 4.632172884608861e-05, "loss": 1.4283, "step": 128000 }, { "epoch": 0.52, "learning_rate": 4.630735475992114e-05, "loss": 1.4105, "step": 128500 }, { "epoch": 0.52, "learning_rate": 4.6292980673753666e-05, "loss": 1.3929, "step": 129000 }, { "epoch": 0.52, "learning_rate": 4.627863533575853e-05, "loss": 1.4379, "step": 129500 }, { "epoch": 0.52, "learning_rate": 4.626426124959106e-05, "loss": 1.4175, "step": 130000 }, { "epoch": 0.53, "learning_rate": 4.6249887163423586e-05, "loss": 1.3625, "step": 130500 }, { "epoch": 0.53, "learning_rate": 4.623551307725612e-05, "loss": 1.4202, "step": 131000 }, { "epoch": 0.53, "learning_rate": 4.622113899108864e-05, "loss": 1.4135, "step": 131500 }, { "epoch": 0.53, "learning_rate": 4.620676490492117e-05, "loss": 1.4075, "step": 132000 }, { "epoch": 0.53, "learning_rate": 4.61923908187537e-05, "loss": 1.3812, "step": 132500 }, { "epoch": 0.54, "learning_rate": 4.617804548075856e-05, "loss": 1.3856, "step": 133000 }, { "epoch": 0.54, "learning_rate": 4.616367139459109e-05, "loss": 1.3601, "step": 133500 }, { "epoch": 0.54, "learning_rate": 4.6149297308423616e-05, "loss": 1.3832, "step": 134000 }, { "epoch": 0.54, "learning_rate": 4.613492322225615e-05, "loss": 1.3954, "step": 134500 }, { "epoch": 0.54, "learning_rate": 4.612057788426101e-05, "loss": 1.3669, "step": 135000 }, { "epoch": 0.55, "learning_rate": 4.6106203798093536e-05, "loss": 1.4357, "step": 135500 }, { "epoch": 0.55, "learning_rate": 4.609182971192607e-05, "loss": 1.4157, "step": 136000 }, { "epoch": 0.55, "learning_rate": 4.607745562575859e-05, "loss": 1.3635, "step": 136500 }, { "epoch": 0.55, "learning_rate": 4.606308153959112e-05, "loss": 1.4251, "step": 137000 }, { "epoch": 0.55, "learning_rate": 4.604870745342365e-05, "loss": 1.3557, "step": 137500 }, { "epoch": 0.56, "learning_rate": 4.603436211542852e-05, "loss": 1.3947, "step": 138000 }, { "epoch": 0.56, "learning_rate": 4.601998802926104e-05, "loss": 1.3881, "step": 138500 }, { "epoch": 0.56, "learning_rate": 4.6005613943093566e-05, "loss": 1.3912, "step": 139000 }, { "epoch": 0.56, "learning_rate": 4.59912398569261e-05, "loss": 1.3857, "step": 139500 }, { "epoch": 0.56, "learning_rate": 4.597689451893096e-05, "loss": 1.4202, "step": 140000 }, { "epoch": 0.57, "learning_rate": 4.596252043276349e-05, "loss": 1.3871, "step": 140500 }, { "epoch": 0.57, "learning_rate": 4.5948146346596016e-05, "loss": 1.3563, "step": 141000 }, { "epoch": 0.57, "learning_rate": 4.593380100860088e-05, "loss": 1.4027, "step": 141500 }, { "epoch": 0.57, "learning_rate": 4.5919455670605745e-05, "loss": 1.399, "step": 142000 }, { "epoch": 0.57, "learning_rate": 4.590508158443827e-05, "loss": 1.4522, "step": 142500 }, { "epoch": 0.58, "learning_rate": 4.58907074982708e-05, "loss": 1.4005, "step": 143000 }, { "epoch": 0.58, "learning_rate": 4.587633341210333e-05, "loss": 1.3592, "step": 143500 }, { "epoch": 0.58, "learning_rate": 4.5861959325935856e-05, "loss": 1.401, "step": 144000 }, { "epoch": 0.58, "learning_rate": 4.584758523976838e-05, "loss": 1.3642, "step": 144500 }, { "epoch": 0.58, "learning_rate": 4.583321115360091e-05, "loss": 1.3673, "step": 145000 }, { "epoch": 0.59, "learning_rate": 4.581883706743344e-05, "loss": 1.3732, "step": 145500 }, { "epoch": 0.59, "learning_rate": 4.5804491729438306e-05, "loss": 1.3784, "step": 146000 }, { "epoch": 0.59, "learning_rate": 4.579011764327083e-05, "loss": 1.3617, "step": 146500 }, { "epoch": 0.59, "learning_rate": 4.5775743557103355e-05, "loss": 1.4072, "step": 147000 }, { "epoch": 0.59, "learning_rate": 4.5761369470935886e-05, "loss": 1.387, "step": 147500 }, { "epoch": 0.6, "learning_rate": 4.574699538476842e-05, "loss": 1.4189, "step": 148000 }, { "epoch": 0.6, "learning_rate": 4.573262129860094e-05, "loss": 1.4086, "step": 148500 }, { "epoch": 0.6, "learning_rate": 4.571824721243347e-05, "loss": 1.4118, "step": 149000 }, { "epoch": 0.6, "learning_rate": 4.5703873126266e-05, "loss": 1.3505, "step": 149500 }, { "epoch": 0.6, "learning_rate": 4.568949904009853e-05, "loss": 1.3993, "step": 150000 }, { "epoch": 0.61, "learning_rate": 4.567515370210339e-05, "loss": 1.3766, "step": 150500 }, { "epoch": 0.61, "learning_rate": 4.5660779615935916e-05, "loss": 1.3773, "step": 151000 }, { "epoch": 0.61, "learning_rate": 4.564640552976845e-05, "loss": 1.3799, "step": 151500 }, { "epoch": 0.61, "learning_rate": 4.563203144360098e-05, "loss": 1.4175, "step": 152000 }, { "epoch": 0.61, "learning_rate": 4.56176573574335e-05, "loss": 1.3748, "step": 152500 }, { "epoch": 0.62, "learning_rate": 4.560328327126603e-05, "loss": 1.4009, "step": 153000 }, { "epoch": 0.62, "learning_rate": 4.558893793327089e-05, "loss": 1.3603, "step": 153500 }, { "epoch": 0.62, "learning_rate": 4.557456384710342e-05, "loss": 1.3207, "step": 154000 }, { "epoch": 0.62, "learning_rate": 4.556018976093595e-05, "loss": 1.4082, "step": 154500 }, { "epoch": 0.62, "learning_rate": 4.554581567476848e-05, "loss": 1.3811, "step": 155000 }, { "epoch": 0.63, "learning_rate": 4.553144158860101e-05, "loss": 1.4071, "step": 155500 }, { "epoch": 0.63, "learning_rate": 4.551706750243354e-05, "loss": 1.3726, "step": 156000 }, { "epoch": 0.63, "learning_rate": 4.550269341626606e-05, "loss": 1.4198, "step": 156500 }, { "epoch": 0.63, "learning_rate": 4.548831933009859e-05, "loss": 1.3945, "step": 157000 }, { "epoch": 0.63, "learning_rate": 4.547394524393112e-05, "loss": 1.4092, "step": 157500 }, { "epoch": 0.64, "learning_rate": 4.545957115776365e-05, "loss": 1.3685, "step": 158000 }, { "epoch": 0.64, "learning_rate": 4.5445225819768513e-05, "loss": 1.3967, "step": 158500 }, { "epoch": 0.64, "learning_rate": 4.543085173360104e-05, "loss": 1.3583, "step": 159000 }, { "epoch": 0.64, "learning_rate": 4.541647764743356e-05, "loss": 1.3435, "step": 159500 }, { "epoch": 0.64, "learning_rate": 4.5402132309438426e-05, "loss": 1.3974, "step": 160000 }, { "epoch": 0.65, "learning_rate": 4.538775822327096e-05, "loss": 1.3452, "step": 160500 }, { "epoch": 0.65, "learning_rate": 4.537338413710349e-05, "loss": 1.3704, "step": 161000 }, { "epoch": 0.65, "learning_rate": 4.535901005093601e-05, "loss": 1.4065, "step": 161500 }, { "epoch": 0.65, "learning_rate": 4.534463596476854e-05, "loss": 1.3301, "step": 162000 }, { "epoch": 0.65, "learning_rate": 4.5330261878601074e-05, "loss": 1.4155, "step": 162500 }, { "epoch": 0.66, "learning_rate": 4.53158877924336e-05, "loss": 1.3922, "step": 163000 }, { "epoch": 0.66, "learning_rate": 4.530151370626612e-05, "loss": 1.424, "step": 163500 }, { "epoch": 0.66, "learning_rate": 4.5287139620098654e-05, "loss": 1.3749, "step": 164000 }, { "epoch": 0.66, "learning_rate": 4.5272765533931185e-05, "loss": 1.3787, "step": 164500 }, { "epoch": 0.66, "learning_rate": 4.525839144776371e-05, "loss": 1.4292, "step": 165000 }, { "epoch": 0.67, "learning_rate": 4.5244046109768573e-05, "loss": 1.3708, "step": 165500 }, { "epoch": 0.67, "learning_rate": 4.52296720236011e-05, "loss": 1.3503, "step": 166000 }, { "epoch": 0.67, "learning_rate": 4.521529793743363e-05, "loss": 1.3939, "step": 166500 }, { "epoch": 0.67, "learning_rate": 4.520092385126616e-05, "loss": 1.3795, "step": 167000 }, { "epoch": 0.67, "learning_rate": 4.5186578513271024e-05, "loss": 1.317, "step": 167500 }, { "epoch": 0.68, "learning_rate": 4.517223317527589e-05, "loss": 1.3537, "step": 168000 }, { "epoch": 0.68, "learning_rate": 4.515785908910841e-05, "loss": 1.3657, "step": 168500 }, { "epoch": 0.68, "learning_rate": 4.514348500294094e-05, "loss": 1.3708, "step": 169000 }, { "epoch": 0.68, "learning_rate": 4.512911091677347e-05, "loss": 1.4122, "step": 169500 }, { "epoch": 0.68, "learning_rate": 4.5114736830606e-05, "loss": 1.3816, "step": 170000 }, { "epoch": 0.69, "learning_rate": 4.510036274443852e-05, "loss": 1.3861, "step": 170500 }, { "epoch": 0.69, "learning_rate": 4.508598865827105e-05, "loss": 1.3518, "step": 171000 }, { "epoch": 0.69, "learning_rate": 4.5071614572103585e-05, "loss": 1.3967, "step": 171500 }, { "epoch": 0.69, "learning_rate": 4.505726923410844e-05, "loss": 1.3642, "step": 172000 }, { "epoch": 0.69, "learning_rate": 4.504292389611331e-05, "loss": 1.353, "step": 172500 }, { "epoch": 0.7, "learning_rate": 4.502854980994584e-05, "loss": 1.3855, "step": 173000 }, { "epoch": 0.7, "learning_rate": 4.501417572377836e-05, "loss": 1.3736, "step": 173500 }, { "epoch": 0.7, "learning_rate": 4.499980163761089e-05, "loss": 1.3663, "step": 174000 }, { "epoch": 0.7, "learning_rate": 4.498542755144342e-05, "loss": 1.4056, "step": 174500 }, { "epoch": 0.7, "learning_rate": 4.497105346527595e-05, "loss": 1.3703, "step": 175000 }, { "epoch": 0.71, "learning_rate": 4.495667937910847e-05, "loss": 1.3636, "step": 175500 }, { "epoch": 0.71, "learning_rate": 4.4942305292941004e-05, "loss": 1.3887, "step": 176000 }, { "epoch": 0.71, "learning_rate": 4.4927931206773535e-05, "loss": 1.3568, "step": 176500 }, { "epoch": 0.71, "learning_rate": 4.49135858687784e-05, "loss": 1.4145, "step": 177000 }, { "epoch": 0.71, "learning_rate": 4.4899211782610923e-05, "loss": 1.3641, "step": 177500 }, { "epoch": 0.72, "learning_rate": 4.488483769644345e-05, "loss": 1.3802, "step": 178000 }, { "epoch": 0.72, "learning_rate": 4.487046361027598e-05, "loss": 1.3609, "step": 178500 }, { "epoch": 0.72, "learning_rate": 4.4856118272280836e-05, "loss": 1.3743, "step": 179000 }, { "epoch": 0.72, "learning_rate": 4.48417729342857e-05, "loss": 1.3967, "step": 179500 }, { "epoch": 0.72, "learning_rate": 4.482739884811823e-05, "loss": 1.3887, "step": 180000 }, { "epoch": 0.73, "learning_rate": 4.481302476195076e-05, "loss": 1.3803, "step": 180500 }, { "epoch": 0.73, "learning_rate": 4.479865067578329e-05, "loss": 1.3843, "step": 181000 }, { "epoch": 0.73, "learning_rate": 4.478427658961582e-05, "loss": 1.368, "step": 181500 }, { "epoch": 0.73, "learning_rate": 4.4769931251620676e-05, "loss": 1.4185, "step": 182000 }, { "epoch": 0.73, "learning_rate": 4.4755557165453214e-05, "loss": 1.3608, "step": 182500 }, { "epoch": 0.74, "learning_rate": 4.474118307928574e-05, "loss": 1.3724, "step": 183000 }, { "epoch": 0.74, "learning_rate": 4.472680899311826e-05, "loss": 1.3864, "step": 183500 }, { "epoch": 0.74, "learning_rate": 4.471243490695079e-05, "loss": 1.3729, "step": 184000 }, { "epoch": 0.74, "learning_rate": 4.4698060820783324e-05, "loss": 1.3833, "step": 184500 }, { "epoch": 0.74, "learning_rate": 4.468368673461585e-05, "loss": 1.3593, "step": 185000 }, { "epoch": 0.75, "learning_rate": 4.466931264844837e-05, "loss": 1.3443, "step": 185500 }, { "epoch": 0.75, "learning_rate": 4.465496731045324e-05, "loss": 1.3494, "step": 186000 }, { "epoch": 0.75, "learning_rate": 4.464059322428577e-05, "loss": 1.3314, "step": 186500 }, { "epoch": 0.75, "learning_rate": 4.46262191381183e-05, "loss": 1.3666, "step": 187000 }, { "epoch": 0.75, "learning_rate": 4.461184505195082e-05, "loss": 1.4079, "step": 187500 }, { "epoch": 0.76, "learning_rate": 4.459749971395569e-05, "loss": 1.4069, "step": 188000 }, { "epoch": 0.76, "learning_rate": 4.458312562778821e-05, "loss": 1.3429, "step": 188500 }, { "epoch": 0.76, "learning_rate": 4.4568780289793076e-05, "loss": 1.365, "step": 189000 }, { "epoch": 0.76, "learning_rate": 4.455440620362561e-05, "loss": 1.4167, "step": 189500 }, { "epoch": 0.76, "learning_rate": 4.454003211745814e-05, "loss": 1.3715, "step": 190000 }, { "epoch": 0.77, "learning_rate": 4.452565803129066e-05, "loss": 1.3698, "step": 190500 }, { "epoch": 0.77, "learning_rate": 4.4511283945123186e-05, "loss": 1.3936, "step": 191000 }, { "epoch": 0.77, "learning_rate": 4.4496909858955724e-05, "loss": 1.3526, "step": 191500 }, { "epoch": 0.77, "learning_rate": 4.448253577278825e-05, "loss": 1.3401, "step": 192000 }, { "epoch": 0.77, "learning_rate": 4.446819043479311e-05, "loss": 1.4083, "step": 192500 }, { "epoch": 0.78, "learning_rate": 4.445381634862564e-05, "loss": 1.3706, "step": 193000 }, { "epoch": 0.78, "learning_rate": 4.443944226245816e-05, "loss": 1.3702, "step": 193500 }, { "epoch": 0.78, "learning_rate": 4.44250681762907e-05, "loss": 1.3915, "step": 194000 }, { "epoch": 0.78, "learning_rate": 4.441069409012322e-05, "loss": 1.3347, "step": 194500 }, { "epoch": 0.78, "learning_rate": 4.439632000395575e-05, "loss": 1.4214, "step": 195000 }, { "epoch": 0.79, "learning_rate": 4.438197466596061e-05, "loss": 1.3836, "step": 195500 }, { "epoch": 0.79, "learning_rate": 4.4367600579793136e-05, "loss": 1.3125, "step": 196000 }, { "epoch": 0.79, "learning_rate": 4.4353226493625674e-05, "loss": 1.3719, "step": 196500 }, { "epoch": 0.79, "learning_rate": 4.43388524074582e-05, "loss": 1.388, "step": 197000 }, { "epoch": 0.79, "learning_rate": 4.432447832129072e-05, "loss": 1.3519, "step": 197500 }, { "epoch": 0.8, "learning_rate": 4.431010423512325e-05, "loss": 1.352, "step": 198000 }, { "epoch": 0.8, "learning_rate": 4.4295730148955784e-05, "loss": 1.3897, "step": 198500 }, { "epoch": 0.8, "learning_rate": 4.428135606278831e-05, "loss": 1.3947, "step": 199000 }, { "epoch": 0.8, "learning_rate": 4.426701072479317e-05, "loss": 1.4083, "step": 199500 }, { "epoch": 0.8, "learning_rate": 4.42526366386257e-05, "loss": 1.3361, "step": 200000 }, { "epoch": 0.81, "learning_rate": 4.4238262552458235e-05, "loss": 1.3415, "step": 200500 }, { "epoch": 0.81, "learning_rate": 4.422388846629076e-05, "loss": 1.4054, "step": 201000 }, { "epoch": 0.81, "learning_rate": 4.420951438012328e-05, "loss": 1.3871, "step": 201500 }, { "epoch": 0.81, "learning_rate": 4.419516904212815e-05, "loss": 1.3916, "step": 202000 }, { "epoch": 0.82, "learning_rate": 4.418079495596067e-05, "loss": 1.3449, "step": 202500 }, { "epoch": 0.82, "learning_rate": 4.416642086979321e-05, "loss": 1.4411, "step": 203000 }, { "epoch": 0.82, "learning_rate": 4.4152046783625734e-05, "loss": 1.3792, "step": 203500 }, { "epoch": 0.82, "learning_rate": 4.41377014456306e-05, "loss": 1.3843, "step": 204000 }, { "epoch": 0.82, "learning_rate": 4.412332735946312e-05, "loss": 1.3514, "step": 204500 }, { "epoch": 0.83, "learning_rate": 4.410895327329565e-05, "loss": 1.3531, "step": 205000 }, { "epoch": 0.83, "learning_rate": 4.4094579187128184e-05, "loss": 1.3575, "step": 205500 }, { "epoch": 0.83, "learning_rate": 4.408023384913305e-05, "loss": 1.3671, "step": 206000 }, { "epoch": 0.83, "learning_rate": 4.406585976296557e-05, "loss": 1.3818, "step": 206500 }, { "epoch": 0.83, "learning_rate": 4.40514856767981e-05, "loss": 1.3625, "step": 207000 }, { "epoch": 0.84, "learning_rate": 4.403711159063063e-05, "loss": 1.2734, "step": 207500 }, { "epoch": 0.84, "learning_rate": 4.402273750446316e-05, "loss": 1.4119, "step": 208000 }, { "epoch": 0.84, "learning_rate": 4.4008392166468024e-05, "loss": 1.3444, "step": 208500 }, { "epoch": 0.84, "learning_rate": 4.399401808030055e-05, "loss": 1.3591, "step": 209000 }, { "epoch": 0.84, "learning_rate": 4.397964399413307e-05, "loss": 1.3544, "step": 209500 }, { "epoch": 0.85, "learning_rate": 4.39652699079656e-05, "loss": 1.3843, "step": 210000 }, { "epoch": 0.85, "learning_rate": 4.3950895821798134e-05, "loss": 1.361, "step": 210500 }, { "epoch": 0.85, "learning_rate": 4.393652173563066e-05, "loss": 1.3929, "step": 211000 }, { "epoch": 0.85, "learning_rate": 4.392217639763552e-05, "loss": 1.3484, "step": 211500 }, { "epoch": 0.85, "learning_rate": 4.390780231146805e-05, "loss": 1.3612, "step": 212000 }, { "epoch": 0.86, "learning_rate": 4.389342822530058e-05, "loss": 1.3857, "step": 212500 }, { "epoch": 0.86, "learning_rate": 4.387905413913311e-05, "loss": 1.3092, "step": 213000 }, { "epoch": 0.86, "learning_rate": 4.386468005296563e-05, "loss": 1.3349, "step": 213500 }, { "epoch": 0.86, "learning_rate": 4.38503347149705e-05, "loss": 1.3484, "step": 214000 }, { "epoch": 0.86, "learning_rate": 4.383596062880302e-05, "loss": 1.3718, "step": 214500 }, { "epoch": 0.87, "learning_rate": 4.3821615290807887e-05, "loss": 1.371, "step": 215000 }, { "epoch": 0.87, "learning_rate": 4.380724120464042e-05, "loss": 1.3532, "step": 215500 }, { "epoch": 0.87, "learning_rate": 4.379286711847295e-05, "loss": 1.3346, "step": 216000 }, { "epoch": 0.87, "learning_rate": 4.377849303230547e-05, "loss": 1.4022, "step": 216500 }, { "epoch": 0.87, "learning_rate": 4.3764118946138e-05, "loss": 1.2966, "step": 217000 }, { "epoch": 0.88, "learning_rate": 4.3749744859970535e-05, "loss": 1.3618, "step": 217500 }, { "epoch": 0.88, "learning_rate": 4.373537077380306e-05, "loss": 1.3389, "step": 218000 }, { "epoch": 0.88, "learning_rate": 4.372099668763558e-05, "loss": 1.4182, "step": 218500 }, { "epoch": 0.88, "learning_rate": 4.370665134964045e-05, "loss": 1.3107, "step": 219000 }, { "epoch": 0.88, "learning_rate": 4.369230601164531e-05, "loss": 1.3784, "step": 219500 }, { "epoch": 0.89, "learning_rate": 4.3677931925477836e-05, "loss": 1.3293, "step": 220000 }, { "epoch": 0.89, "learning_rate": 4.3663557839310374e-05, "loss": 1.3381, "step": 220500 }, { "epoch": 0.89, "learning_rate": 4.36491837531429e-05, "loss": 1.3526, "step": 221000 }, { "epoch": 0.89, "learning_rate": 4.363483841514776e-05, "loss": 1.3405, "step": 221500 }, { "epoch": 0.89, "learning_rate": 4.362046432898029e-05, "loss": 1.3746, "step": 222000 }, { "epoch": 0.9, "learning_rate": 4.360609024281281e-05, "loss": 1.3648, "step": 222500 }, { "epoch": 0.9, "learning_rate": 4.359171615664535e-05, "loss": 1.413, "step": 223000 }, { "epoch": 0.9, "learning_rate": 4.357734207047787e-05, "loss": 1.3731, "step": 223500 }, { "epoch": 0.9, "learning_rate": 4.356299673248274e-05, "loss": 1.3905, "step": 224000 }, { "epoch": 0.9, "learning_rate": 4.354862264631526e-05, "loss": 1.3992, "step": 224500 }, { "epoch": 0.91, "learning_rate": 4.3534248560147786e-05, "loss": 1.3332, "step": 225000 }, { "epoch": 0.91, "learning_rate": 4.3519874473980324e-05, "loss": 1.3735, "step": 225500 }, { "epoch": 0.91, "learning_rate": 4.350550038781285e-05, "loss": 1.3732, "step": 226000 }, { "epoch": 0.91, "learning_rate": 4.349112630164537e-05, "loss": 1.3131, "step": 226500 }, { "epoch": 0.91, "learning_rate": 4.3476780963650237e-05, "loss": 1.3553, "step": 227000 }, { "epoch": 0.92, "learning_rate": 4.346240687748277e-05, "loss": 1.3415, "step": 227500 }, { "epoch": 0.92, "learning_rate": 4.34480327913153e-05, "loss": 1.4036, "step": 228000 }, { "epoch": 0.92, "learning_rate": 4.343365870514782e-05, "loss": 1.3283, "step": 228500 }, { "epoch": 0.92, "learning_rate": 4.341928461898035e-05, "loss": 1.368, "step": 229000 }, { "epoch": 0.92, "learning_rate": 4.340491053281288e-05, "loss": 1.3641, "step": 229500 }, { "epoch": 0.93, "learning_rate": 4.339053644664541e-05, "loss": 1.3759, "step": 230000 }, { "epoch": 0.93, "learning_rate": 4.337616236047793e-05, "loss": 1.3909, "step": 230500 }, { "epoch": 0.93, "learning_rate": 4.3361788274310464e-05, "loss": 1.337, "step": 231000 }, { "epoch": 0.93, "learning_rate": 4.334744293631532e-05, "loss": 1.3323, "step": 231500 }, { "epoch": 0.93, "learning_rate": 4.333306885014786e-05, "loss": 1.3528, "step": 232000 }, { "epoch": 0.94, "learning_rate": 4.3318694763980384e-05, "loss": 1.3617, "step": 232500 }, { "epoch": 0.94, "learning_rate": 4.330434942598525e-05, "loss": 1.353, "step": 233000 }, { "epoch": 0.94, "learning_rate": 4.328997533981777e-05, "loss": 1.3511, "step": 233500 }, { "epoch": 0.94, "learning_rate": 4.3275601253650297e-05, "loss": 1.3907, "step": 234000 }, { "epoch": 0.94, "learning_rate": 4.3261227167482834e-05, "loss": 1.3828, "step": 234500 }, { "epoch": 0.95, "learning_rate": 4.324685308131536e-05, "loss": 1.3867, "step": 235000 }, { "epoch": 0.95, "learning_rate": 4.323250774332022e-05, "loss": 1.4029, "step": 235500 }, { "epoch": 0.95, "learning_rate": 4.321813365715275e-05, "loss": 1.336, "step": 236000 }, { "epoch": 0.95, "learning_rate": 4.320375957098528e-05, "loss": 1.3467, "step": 236500 }, { "epoch": 0.95, "learning_rate": 4.318938548481781e-05, "loss": 1.3369, "step": 237000 }, { "epoch": 0.96, "learning_rate": 4.317501139865033e-05, "loss": 1.3248, "step": 237500 }, { "epoch": 0.96, "learning_rate": 4.316063731248286e-05, "loss": 1.3209, "step": 238000 }, { "epoch": 0.96, "learning_rate": 4.314626322631539e-05, "loss": 1.3802, "step": 238500 }, { "epoch": 0.96, "learning_rate": 4.313188914014792e-05, "loss": 1.3491, "step": 239000 }, { "epoch": 0.96, "learning_rate": 4.3117543802152784e-05, "loss": 1.3705, "step": 239500 }, { "epoch": 0.97, "learning_rate": 4.310319846415765e-05, "loss": 1.3495, "step": 240000 }, { "epoch": 0.97, "learning_rate": 4.308882437799017e-05, "loss": 1.3667, "step": 240500 }, { "epoch": 0.97, "learning_rate": 4.30744502918227e-05, "loss": 1.3464, "step": 241000 }, { "epoch": 0.97, "learning_rate": 4.306007620565523e-05, "loss": 1.3318, "step": 241500 }, { "epoch": 0.97, "learning_rate": 4.304570211948776e-05, "loss": 1.3779, "step": 242000 }, { "epoch": 0.98, "learning_rate": 4.303132803332028e-05, "loss": 1.3211, "step": 242500 }, { "epoch": 0.98, "learning_rate": 4.301695394715281e-05, "loss": 1.3281, "step": 243000 }, { "epoch": 0.98, "learning_rate": 4.3002579860985345e-05, "loss": 1.4052, "step": 243500 }, { "epoch": 0.98, "learning_rate": 4.298820577481787e-05, "loss": 1.357, "step": 244000 }, { "epoch": 0.98, "learning_rate": 4.297383168865039e-05, "loss": 1.3943, "step": 244500 }, { "epoch": 0.99, "learning_rate": 4.295948635065526e-05, "loss": 1.338, "step": 245000 }, { "epoch": 0.99, "learning_rate": 4.294511226448779e-05, "loss": 1.3568, "step": 245500 }, { "epoch": 0.99, "learning_rate": 4.2930766926492647e-05, "loss": 1.3254, "step": 246000 }, { "epoch": 0.99, "learning_rate": 4.291639284032518e-05, "loss": 1.3355, "step": 246500 }, { "epoch": 0.99, "learning_rate": 4.290201875415771e-05, "loss": 1.3979, "step": 247000 }, { "epoch": 1.0, "learning_rate": 4.288764466799023e-05, "loss": 1.3396, "step": 247500 }, { "epoch": 1.0, "learning_rate": 4.2873270581822764e-05, "loss": 1.3746, "step": 248000 }, { "epoch": 1.0, "eval_cer": 0.2530746846497492, "eval_loss": 1.026394009590149, "eval_runtime": 10837.1839, "eval_samples_per_second": 9.02, "eval_steps_per_second": 1.128, "step": 248463 }, { "epoch": 1.0, "learning_rate": 4.2858896495655295e-05, "loss": 1.3325, "step": 248500 }, { "epoch": 1.0, "learning_rate": 4.284452240948782e-05, "loss": 1.3003, "step": 249000 }, { "epoch": 1.0, "learning_rate": 4.283014832332034e-05, "loss": 1.3294, "step": 249500 }, { "epoch": 1.01, "learning_rate": 4.2815774237152874e-05, "loss": 1.3038, "step": 250000 }, { "epoch": 1.01, "learning_rate": 4.280142889915774e-05, "loss": 1.3098, "step": 250500 }, { "epoch": 1.01, "learning_rate": 4.278705481299027e-05, "loss": 1.2883, "step": 251000 }, { "epoch": 1.01, "learning_rate": 4.2772709474995134e-05, "loss": 1.3391, "step": 251500 }, { "epoch": 1.01, "learning_rate": 4.275833538882766e-05, "loss": 1.342, "step": 252000 }, { "epoch": 1.02, "learning_rate": 4.274396130266018e-05, "loss": 1.2917, "step": 252500 }, { "epoch": 1.02, "learning_rate": 4.272958721649271e-05, "loss": 1.3136, "step": 253000 }, { "epoch": 1.02, "learning_rate": 4.2715213130325244e-05, "loss": 1.2974, "step": 253500 }, { "epoch": 1.02, "learning_rate": 4.270083904415777e-05, "loss": 1.2851, "step": 254000 }, { "epoch": 1.02, "learning_rate": 4.26864649579903e-05, "loss": 1.2778, "step": 254500 }, { "epoch": 1.03, "learning_rate": 4.267209087182283e-05, "loss": 1.3019, "step": 255000 }, { "epoch": 1.03, "learning_rate": 4.265774553382769e-05, "loss": 1.2946, "step": 255500 }, { "epoch": 1.03, "learning_rate": 4.264337144766022e-05, "loss": 1.3452, "step": 256000 }, { "epoch": 1.03, "learning_rate": 4.262899736149274e-05, "loss": 1.3797, "step": 256500 }, { "epoch": 1.03, "learning_rate": 4.2614623275325274e-05, "loss": 1.3143, "step": 257000 }, { "epoch": 1.04, "learning_rate": 4.260027793733013e-05, "loss": 1.2851, "step": 257500 }, { "epoch": 1.04, "learning_rate": 4.258590385116266e-05, "loss": 1.3339, "step": 258000 }, { "epoch": 1.04, "learning_rate": 4.2571529764995194e-05, "loss": 1.327, "step": 258500 }, { "epoch": 1.04, "learning_rate": 4.255718442700006e-05, "loss": 1.3036, "step": 259000 }, { "epoch": 1.04, "learning_rate": 4.254281034083258e-05, "loss": 1.2892, "step": 259500 }, { "epoch": 1.05, "learning_rate": 4.2528436254665114e-05, "loss": 1.3411, "step": 260000 }, { "epoch": 1.05, "learning_rate": 4.2514062168497645e-05, "loss": 1.3209, "step": 260500 }, { "epoch": 1.05, "learning_rate": 4.24997168305025e-05, "loss": 1.321, "step": 261000 }, { "epoch": 1.05, "learning_rate": 4.248534274433503e-05, "loss": 1.3048, "step": 261500 }, { "epoch": 1.05, "learning_rate": 4.247096865816756e-05, "loss": 1.2998, "step": 262000 }, { "epoch": 1.06, "learning_rate": 4.245659457200009e-05, "loss": 1.3151, "step": 262500 }, { "epoch": 1.06, "learning_rate": 4.244222048583262e-05, "loss": 1.3393, "step": 263000 }, { "epoch": 1.06, "learning_rate": 4.2427846399665144e-05, "loss": 1.3427, "step": 263500 }, { "epoch": 1.06, "learning_rate": 4.241347231349767e-05, "loss": 1.3462, "step": 264000 }, { "epoch": 1.06, "learning_rate": 4.23990982273302e-05, "loss": 1.2871, "step": 264500 }, { "epoch": 1.07, "learning_rate": 4.238472414116273e-05, "loss": 1.3217, "step": 265000 }, { "epoch": 1.07, "learning_rate": 4.2370378803167594e-05, "loss": 1.3224, "step": 265500 }, { "epoch": 1.07, "learning_rate": 4.235600471700012e-05, "loss": 1.3642, "step": 266000 }, { "epoch": 1.07, "learning_rate": 4.234163063083264e-05, "loss": 1.2768, "step": 266500 }, { "epoch": 1.07, "learning_rate": 4.2327256544665174e-05, "loss": 1.3201, "step": 267000 }, { "epoch": 1.08, "learning_rate": 4.231291120667004e-05, "loss": 1.3396, "step": 267500 }, { "epoch": 1.08, "learning_rate": 4.22985658686749e-05, "loss": 1.3086, "step": 268000 }, { "epoch": 1.08, "learning_rate": 4.2284191782507434e-05, "loss": 1.3079, "step": 268500 }, { "epoch": 1.08, "learning_rate": 4.226981769633996e-05, "loss": 1.3209, "step": 269000 }, { "epoch": 1.08, "learning_rate": 4.225544361017248e-05, "loss": 1.3255, "step": 269500 }, { "epoch": 1.09, "learning_rate": 4.224106952400501e-05, "loss": 1.3124, "step": 270000 }, { "epoch": 1.09, "learning_rate": 4.2226695437837544e-05, "loss": 1.3312, "step": 270500 }, { "epoch": 1.09, "learning_rate": 4.221232135167007e-05, "loss": 1.3202, "step": 271000 }, { "epoch": 1.09, "learning_rate": 4.21979472655026e-05, "loss": 1.3048, "step": 271500 }, { "epoch": 1.09, "learning_rate": 4.218357317933513e-05, "loss": 1.2913, "step": 272000 }, { "epoch": 1.1, "learning_rate": 4.216922784133999e-05, "loss": 1.3403, "step": 272500 }, { "epoch": 1.1, "learning_rate": 4.215485375517252e-05, "loss": 1.3155, "step": 273000 }, { "epoch": 1.1, "learning_rate": 4.214047966900504e-05, "loss": 1.3483, "step": 273500 }, { "epoch": 1.1, "learning_rate": 4.2126105582837574e-05, "loss": 1.2946, "step": 274000 }, { "epoch": 1.1, "learning_rate": 4.211176024484244e-05, "loss": 1.3293, "step": 274500 }, { "epoch": 1.11, "learning_rate": 4.209738615867496e-05, "loss": 1.3216, "step": 275000 }, { "epoch": 1.11, "learning_rate": 4.2083012072507494e-05, "loss": 1.3089, "step": 275500 }, { "epoch": 1.11, "learning_rate": 4.206863798634002e-05, "loss": 1.2769, "step": 276000 }, { "epoch": 1.11, "learning_rate": 4.205429264834488e-05, "loss": 1.3592, "step": 276500 }, { "epoch": 1.11, "learning_rate": 4.203994731034975e-05, "loss": 1.3546, "step": 277000 }, { "epoch": 1.12, "learning_rate": 4.202557322418227e-05, "loss": 1.2827, "step": 277500 }, { "epoch": 1.12, "learning_rate": 4.20111991380148e-05, "loss": 1.3685, "step": 278000 }, { "epoch": 1.12, "learning_rate": 4.199682505184733e-05, "loss": 1.3143, "step": 278500 }, { "epoch": 1.12, "learning_rate": 4.198245096567986e-05, "loss": 1.3236, "step": 279000 }, { "epoch": 1.12, "learning_rate": 4.196810562768472e-05, "loss": 1.3198, "step": 279500 }, { "epoch": 1.13, "learning_rate": 4.1953731541517246e-05, "loss": 1.298, "step": 280000 }, { "epoch": 1.13, "learning_rate": 4.193935745534978e-05, "loss": 1.2766, "step": 280500 }, { "epoch": 1.13, "learning_rate": 4.192498336918231e-05, "loss": 1.2751, "step": 281000 }, { "epoch": 1.13, "learning_rate": 4.191060928301483e-05, "loss": 1.3115, "step": 281500 }, { "epoch": 1.13, "learning_rate": 4.189629269319203e-05, "loss": 1.3157, "step": 282000 }, { "epoch": 1.14, "learning_rate": 4.188191860702456e-05, "loss": 1.3184, "step": 282500 }, { "epoch": 1.14, "learning_rate": 4.1867544520857085e-05, "loss": 1.3392, "step": 283000 }, { "epoch": 1.14, "learning_rate": 4.1853170434689616e-05, "loss": 1.3014, "step": 283500 }, { "epoch": 1.14, "learning_rate": 4.183879634852215e-05, "loss": 1.3342, "step": 284000 }, { "epoch": 1.15, "learning_rate": 4.182442226235467e-05, "loss": 1.2772, "step": 284500 }, { "epoch": 1.15, "learning_rate": 4.18100481761872e-05, "loss": 1.3493, "step": 285000 }, { "epoch": 1.15, "learning_rate": 4.179567409001973e-05, "loss": 1.3381, "step": 285500 }, { "epoch": 1.15, "learning_rate": 4.178130000385226e-05, "loss": 1.3231, "step": 286000 }, { "epoch": 1.15, "learning_rate": 4.176695466585712e-05, "loss": 1.3612, "step": 286500 }, { "epoch": 1.16, "learning_rate": 4.1752580579689646e-05, "loss": 1.3584, "step": 287000 }, { "epoch": 1.16, "learning_rate": 4.173820649352218e-05, "loss": 1.3164, "step": 287500 }, { "epoch": 1.16, "learning_rate": 4.172383240735471e-05, "loss": 1.3132, "step": 288000 }, { "epoch": 1.16, "learning_rate": 4.170945832118723e-05, "loss": 1.3136, "step": 288500 }, { "epoch": 1.16, "learning_rate": 4.1695084235019763e-05, "loss": 1.3275, "step": 289000 }, { "epoch": 1.17, "learning_rate": 4.168076764519696e-05, "loss": 1.3111, "step": 289500 }, { "epoch": 1.17, "learning_rate": 4.1666393559029486e-05, "loss": 1.3326, "step": 290000 }, { "epoch": 1.17, "learning_rate": 4.165201947286202e-05, "loss": 1.3284, "step": 290500 }, { "epoch": 1.17, "learning_rate": 4.163764538669454e-05, "loss": 1.3265, "step": 291000 }, { "epoch": 1.17, "learning_rate": 4.162327130052707e-05, "loss": 1.3103, "step": 291500 }, { "epoch": 1.18, "learning_rate": 4.1608897214359596e-05, "loss": 1.2961, "step": 292000 }, { "epoch": 1.18, "learning_rate": 4.159452312819213e-05, "loss": 1.2697, "step": 292500 }, { "epoch": 1.18, "learning_rate": 4.158014904202466e-05, "loss": 1.3562, "step": 293000 }, { "epoch": 1.18, "learning_rate": 4.156577495585718e-05, "loss": 1.3253, "step": 293500 }, { "epoch": 1.18, "learning_rate": 4.155142961786205e-05, "loss": 1.3282, "step": 294000 }, { "epoch": 1.19, "learning_rate": 4.153705553169457e-05, "loss": 1.3074, "step": 294500 }, { "epoch": 1.19, "learning_rate": 4.15226814455271e-05, "loss": 1.2868, "step": 295000 }, { "epoch": 1.19, "learning_rate": 4.150830735935963e-05, "loss": 1.3628, "step": 295500 }, { "epoch": 1.19, "learning_rate": 4.149393327319216e-05, "loss": 1.3665, "step": 296000 }, { "epoch": 1.19, "learning_rate": 4.147955918702469e-05, "loss": 1.2772, "step": 296500 }, { "epoch": 1.2, "learning_rate": 4.146518510085721e-05, "loss": 1.3005, "step": 297000 }, { "epoch": 1.2, "learning_rate": 4.145081101468974e-05, "loss": 1.3234, "step": 297500 }, { "epoch": 1.2, "learning_rate": 4.1436436928522274e-05, "loss": 1.3169, "step": 298000 }, { "epoch": 1.2, "learning_rate": 4.142209159052713e-05, "loss": 1.3383, "step": 298500 }, { "epoch": 1.2, "learning_rate": 4.140771750435966e-05, "loss": 1.3168, "step": 299000 }, { "epoch": 1.21, "learning_rate": 4.1393343418192194e-05, "loss": 1.3092, "step": 299500 }, { "epoch": 1.21, "learning_rate": 4.137896933202472e-05, "loss": 1.2855, "step": 300000 }, { "epoch": 1.21, "learning_rate": 4.136462399402958e-05, "loss": 1.3017, "step": 300500 }, { "epoch": 1.21, "learning_rate": 4.135024990786211e-05, "loss": 1.3185, "step": 301000 }, { "epoch": 1.21, "learning_rate": 4.133587582169464e-05, "loss": 1.3136, "step": 301500 }, { "epoch": 1.22, "learning_rate": 4.132150173552717e-05, "loss": 1.302, "step": 302000 }, { "epoch": 1.22, "learning_rate": 4.130712764935969e-05, "loss": 1.3137, "step": 302500 }, { "epoch": 1.22, "learning_rate": 4.1292753563192224e-05, "loss": 1.3458, "step": 303000 }, { "epoch": 1.22, "learning_rate": 4.127840822519708e-05, "loss": 1.3394, "step": 303500 }, { "epoch": 1.22, "learning_rate": 4.126403413902961e-05, "loss": 1.3058, "step": 304000 }, { "epoch": 1.23, "learning_rate": 4.1249660052862143e-05, "loss": 1.2944, "step": 304500 }, { "epoch": 1.23, "learning_rate": 4.123528596669467e-05, "loss": 1.2933, "step": 305000 }, { "epoch": 1.23, "learning_rate": 4.122094062869953e-05, "loss": 1.2685, "step": 305500 }, { "epoch": 1.23, "learning_rate": 4.120656654253206e-05, "loss": 1.3184, "step": 306000 }, { "epoch": 1.23, "learning_rate": 4.119222120453692e-05, "loss": 1.3046, "step": 306500 }, { "epoch": 1.24, "learning_rate": 4.117784711836945e-05, "loss": 1.2728, "step": 307000 }, { "epoch": 1.24, "learning_rate": 4.116347303220198e-05, "loss": 1.3227, "step": 307500 }, { "epoch": 1.24, "learning_rate": 4.114909894603451e-05, "loss": 1.3138, "step": 308000 }, { "epoch": 1.24, "learning_rate": 4.113472485986704e-05, "loss": 1.3364, "step": 308500 }, { "epoch": 1.24, "learning_rate": 4.112035077369956e-05, "loss": 1.3093, "step": 309000 }, { "epoch": 1.25, "learning_rate": 4.110597668753209e-05, "loss": 1.2981, "step": 309500 }, { "epoch": 1.25, "learning_rate": 4.109160260136462e-05, "loss": 1.3095, "step": 310000 }, { "epoch": 1.25, "learning_rate": 4.107722851519715e-05, "loss": 1.3385, "step": 310500 }, { "epoch": 1.25, "learning_rate": 4.1062911925374346e-05, "loss": 1.2851, "step": 311000 }, { "epoch": 1.25, "learning_rate": 4.104853783920688e-05, "loss": 1.3389, "step": 311500 }, { "epoch": 1.26, "learning_rate": 4.10341637530394e-05, "loss": 1.321, "step": 312000 }, { "epoch": 1.26, "learning_rate": 4.101978966687193e-05, "loss": 1.3117, "step": 312500 }, { "epoch": 1.26, "learning_rate": 4.100541558070446e-05, "loss": 1.3384, "step": 313000 }, { "epoch": 1.26, "learning_rate": 4.099104149453699e-05, "loss": 1.3208, "step": 313500 }, { "epoch": 1.26, "learning_rate": 4.097666740836951e-05, "loss": 1.2958, "step": 314000 }, { "epoch": 1.27, "learning_rate": 4.0962322070374376e-05, "loss": 1.3443, "step": 314500 }, { "epoch": 1.27, "learning_rate": 4.094794798420691e-05, "loss": 1.3363, "step": 315000 }, { "epoch": 1.27, "learning_rate": 4.093357389803943e-05, "loss": 1.2784, "step": 315500 }, { "epoch": 1.27, "learning_rate": 4.091919981187196e-05, "loss": 1.3149, "step": 316000 }, { "epoch": 1.27, "learning_rate": 4.090482572570449e-05, "loss": 1.3311, "step": 316500 }, { "epoch": 1.28, "learning_rate": 4.089045163953702e-05, "loss": 1.3024, "step": 317000 }, { "epoch": 1.28, "learning_rate": 4.087607755336955e-05, "loss": 1.2816, "step": 317500 }, { "epoch": 1.28, "learning_rate": 4.086170346720207e-05, "loss": 1.3256, "step": 318000 }, { "epoch": 1.28, "learning_rate": 4.084735812920694e-05, "loss": 1.3071, "step": 318500 }, { "epoch": 1.28, "learning_rate": 4.083298404303947e-05, "loss": 1.3082, "step": 319000 }, { "epoch": 1.29, "learning_rate": 4.081860995687199e-05, "loss": 1.325, "step": 319500 }, { "epoch": 1.29, "learning_rate": 4.0804235870704524e-05, "loss": 1.3001, "step": 320000 }, { "epoch": 1.29, "learning_rate": 4.078989053270939e-05, "loss": 1.2427, "step": 320500 }, { "epoch": 1.29, "learning_rate": 4.077551644654191e-05, "loss": 1.3231, "step": 321000 }, { "epoch": 1.29, "learning_rate": 4.076114236037444e-05, "loss": 1.2805, "step": 321500 }, { "epoch": 1.3, "learning_rate": 4.074676827420697e-05, "loss": 1.303, "step": 322000 }, { "epoch": 1.3, "learning_rate": 4.07323941880395e-05, "loss": 1.3498, "step": 322500 }, { "epoch": 1.3, "learning_rate": 4.071804885004436e-05, "loss": 1.3334, "step": 323000 }, { "epoch": 1.3, "learning_rate": 4.070367476387689e-05, "loss": 1.3092, "step": 323500 }, { "epoch": 1.3, "learning_rate": 4.068930067770942e-05, "loss": 1.3639, "step": 324000 }, { "epoch": 1.31, "learning_rate": 4.067492659154194e-05, "loss": 1.2504, "step": 324500 }, { "epoch": 1.31, "learning_rate": 4.066055250537447e-05, "loss": 1.3246, "step": 325000 }, { "epoch": 1.31, "learning_rate": 4.064620716737934e-05, "loss": 1.3483, "step": 325500 }, { "epoch": 1.31, "learning_rate": 4.063183308121186e-05, "loss": 1.322, "step": 326000 }, { "epoch": 1.31, "learning_rate": 4.061745899504439e-05, "loss": 1.299, "step": 326500 }, { "epoch": 1.32, "learning_rate": 4.0603084908876924e-05, "loss": 1.269, "step": 327000 }, { "epoch": 1.32, "learning_rate": 4.058873957088178e-05, "loss": 1.3026, "step": 327500 }, { "epoch": 1.32, "learning_rate": 4.057436548471431e-05, "loss": 1.3286, "step": 328000 }, { "epoch": 1.32, "learning_rate": 4.055999139854684e-05, "loss": 1.3145, "step": 328500 }, { "epoch": 1.32, "learning_rate": 4.054561731237937e-05, "loss": 1.3127, "step": 329000 }, { "epoch": 1.33, "learning_rate": 4.053127197438423e-05, "loss": 1.3519, "step": 329500 }, { "epoch": 1.33, "learning_rate": 4.0516897888216757e-05, "loss": 1.3121, "step": 330000 }, { "epoch": 1.33, "learning_rate": 4.050252380204929e-05, "loss": 1.3452, "step": 330500 }, { "epoch": 1.33, "learning_rate": 4.048817846405415e-05, "loss": 1.2979, "step": 331000 }, { "epoch": 1.33, "learning_rate": 4.0473804377886676e-05, "loss": 1.348, "step": 331500 }, { "epoch": 1.34, "learning_rate": 4.045943029171921e-05, "loss": 1.337, "step": 332000 }, { "epoch": 1.34, "learning_rate": 4.044505620555173e-05, "loss": 1.2797, "step": 332500 }, { "epoch": 1.34, "learning_rate": 4.043068211938426e-05, "loss": 1.301, "step": 333000 }, { "epoch": 1.34, "learning_rate": 4.0416308033216786e-05, "loss": 1.3433, "step": 333500 }, { "epoch": 1.34, "learning_rate": 4.040193394704932e-05, "loss": 1.2581, "step": 334000 }, { "epoch": 1.35, "learning_rate": 4.038755986088185e-05, "loss": 1.3248, "step": 334500 }, { "epoch": 1.35, "learning_rate": 4.037318577471437e-05, "loss": 1.2734, "step": 335000 }, { "epoch": 1.35, "learning_rate": 4.035884043671924e-05, "loss": 1.3842, "step": 335500 }, { "epoch": 1.35, "learning_rate": 4.034446635055177e-05, "loss": 1.2941, "step": 336000 }, { "epoch": 1.35, "learning_rate": 4.033009226438429e-05, "loss": 1.3248, "step": 336500 }, { "epoch": 1.36, "learning_rate": 4.031574692638916e-05, "loss": 1.3258, "step": 337000 }, { "epoch": 1.36, "learning_rate": 4.030137284022169e-05, "loss": 1.3629, "step": 337500 }, { "epoch": 1.36, "learning_rate": 4.028699875405421e-05, "loss": 1.2897, "step": 338000 }, { "epoch": 1.36, "learning_rate": 4.027262466788674e-05, "loss": 1.33, "step": 338500 }, { "epoch": 1.36, "learning_rate": 4.025825058171927e-05, "loss": 1.3211, "step": 339000 }, { "epoch": 1.37, "learning_rate": 4.024390524372413e-05, "loss": 1.3184, "step": 339500 }, { "epoch": 1.37, "learning_rate": 4.022953115755666e-05, "loss": 1.3217, "step": 340000 }, { "epoch": 1.37, "learning_rate": 4.021518581956153e-05, "loss": 1.2926, "step": 340500 }, { "epoch": 1.37, "learning_rate": 4.020081173339405e-05, "loss": 1.304, "step": 341000 }, { "epoch": 1.37, "learning_rate": 4.0186437647226576e-05, "loss": 1.2539, "step": 341500 }, { "epoch": 1.38, "learning_rate": 4.0172063561059107e-05, "loss": 1.3232, "step": 342000 }, { "epoch": 1.38, "learning_rate": 4.015768947489164e-05, "loss": 1.3217, "step": 342500 }, { "epoch": 1.38, "learning_rate": 4.014331538872416e-05, "loss": 1.2563, "step": 343000 }, { "epoch": 1.38, "learning_rate": 4.012894130255669e-05, "loss": 1.2927, "step": 343500 }, { "epoch": 1.38, "learning_rate": 4.0114567216389224e-05, "loss": 1.2866, "step": 344000 }, { "epoch": 1.39, "learning_rate": 4.010019313022175e-05, "loss": 1.3148, "step": 344500 }, { "epoch": 1.39, "learning_rate": 4.008584779222661e-05, "loss": 1.3109, "step": 345000 }, { "epoch": 1.39, "learning_rate": 4.0071473706059137e-05, "loss": 1.2924, "step": 345500 }, { "epoch": 1.39, "learning_rate": 4.005709961989167e-05, "loss": 1.3149, "step": 346000 }, { "epoch": 1.39, "learning_rate": 4.00427255337242e-05, "loss": 1.3123, "step": 346500 }, { "epoch": 1.4, "learning_rate": 4.002835144755672e-05, "loss": 1.3045, "step": 347000 }, { "epoch": 1.4, "learning_rate": 4.0013977361389254e-05, "loss": 1.3122, "step": 347500 }, { "epoch": 1.4, "learning_rate": 3.999963202339411e-05, "loss": 1.3002, "step": 348000 }, { "epoch": 1.4, "learning_rate": 3.998525793722664e-05, "loss": 1.314, "step": 348500 }, { "epoch": 1.4, "learning_rate": 3.997088385105917e-05, "loss": 1.2549, "step": 349000 }, { "epoch": 1.41, "learning_rate": 3.99565097648917e-05, "loss": 1.288, "step": 349500 }, { "epoch": 1.41, "learning_rate": 3.994213567872423e-05, "loss": 1.3279, "step": 350000 }, { "epoch": 1.41, "learning_rate": 3.992776159255676e-05, "loss": 1.2969, "step": 350500 }, { "epoch": 1.41, "learning_rate": 3.9913387506389284e-05, "loss": 1.2879, "step": 351000 }, { "epoch": 1.41, "learning_rate": 3.989901342022181e-05, "loss": 1.3131, "step": 351500 }, { "epoch": 1.42, "learning_rate": 3.988469683039901e-05, "loss": 1.3212, "step": 352000 }, { "epoch": 1.42, "learning_rate": 3.987032274423154e-05, "loss": 1.3022, "step": 352500 }, { "epoch": 1.42, "learning_rate": 3.985594865806406e-05, "loss": 1.313, "step": 353000 }, { "epoch": 1.42, "learning_rate": 3.984157457189659e-05, "loss": 1.2912, "step": 353500 }, { "epoch": 1.42, "learning_rate": 3.982720048572912e-05, "loss": 1.2996, "step": 354000 }, { "epoch": 1.43, "learning_rate": 3.981282639956165e-05, "loss": 1.2747, "step": 354500 }, { "epoch": 1.43, "learning_rate": 3.979845231339418e-05, "loss": 1.3257, "step": 355000 }, { "epoch": 1.43, "learning_rate": 3.978407822722671e-05, "loss": 1.2859, "step": 355500 }, { "epoch": 1.43, "learning_rate": 3.976970414105923e-05, "loss": 1.3374, "step": 356000 }, { "epoch": 1.43, "learning_rate": 3.97553588030641e-05, "loss": 1.324, "step": 356500 }, { "epoch": 1.44, "learning_rate": 3.974098471689662e-05, "loss": 1.2956, "step": 357000 }, { "epoch": 1.44, "learning_rate": 3.972661063072915e-05, "loss": 1.3514, "step": 357500 }, { "epoch": 1.44, "learning_rate": 3.9712236544561684e-05, "loss": 1.2666, "step": 358000 }, { "epoch": 1.44, "learning_rate": 3.969789120656655e-05, "loss": 1.3184, "step": 358500 }, { "epoch": 1.44, "learning_rate": 3.968351712039907e-05, "loss": 1.3575, "step": 359000 }, { "epoch": 1.45, "learning_rate": 3.96691430342316e-05, "loss": 1.3093, "step": 359500 }, { "epoch": 1.45, "learning_rate": 3.965476894806413e-05, "loss": 1.3171, "step": 360000 }, { "epoch": 1.45, "learning_rate": 3.964039486189666e-05, "loss": 1.3405, "step": 360500 }, { "epoch": 1.45, "learning_rate": 3.962604952390152e-05, "loss": 1.3328, "step": 361000 }, { "epoch": 1.45, "learning_rate": 3.961167543773405e-05, "loss": 1.2607, "step": 361500 }, { "epoch": 1.46, "learning_rate": 3.959730135156657e-05, "loss": 1.2771, "step": 362000 }, { "epoch": 1.46, "learning_rate": 3.95829272653991e-05, "loss": 1.2848, "step": 362500 }, { "epoch": 1.46, "learning_rate": 3.9568553179231634e-05, "loss": 1.2904, "step": 363000 }, { "epoch": 1.46, "learning_rate": 3.955417909306416e-05, "loss": 1.309, "step": 363500 }, { "epoch": 1.47, "learning_rate": 3.953980500689669e-05, "loss": 1.3402, "step": 364000 }, { "epoch": 1.47, "learning_rate": 3.9525459668901547e-05, "loss": 1.2667, "step": 364500 }, { "epoch": 1.47, "learning_rate": 3.9511085582734084e-05, "loss": 1.332, "step": 365000 }, { "epoch": 1.47, "learning_rate": 3.949671149656661e-05, "loss": 1.336, "step": 365500 }, { "epoch": 1.47, "learning_rate": 3.948233741039913e-05, "loss": 1.357, "step": 366000 }, { "epoch": 1.48, "learning_rate": 3.9467963324231664e-05, "loss": 1.2626, "step": 366500 }, { "epoch": 1.48, "learning_rate": 3.9453589238064195e-05, "loss": 1.291, "step": 367000 }, { "epoch": 1.48, "learning_rate": 3.943921515189672e-05, "loss": 1.3226, "step": 367500 }, { "epoch": 1.48, "learning_rate": 3.942484106572924e-05, "loss": 1.2919, "step": 368000 }, { "epoch": 1.48, "learning_rate": 3.941046697956178e-05, "loss": 1.3069, "step": 368500 }, { "epoch": 1.49, "learning_rate": 3.939612164156664e-05, "loss": 1.3492, "step": 369000 }, { "epoch": 1.49, "learning_rate": 3.938174755539917e-05, "loss": 1.2857, "step": 369500 }, { "epoch": 1.49, "learning_rate": 3.9367402217404034e-05, "loss": 1.296, "step": 370000 }, { "epoch": 1.49, "learning_rate": 3.935302813123656e-05, "loss": 1.3178, "step": 370500 }, { "epoch": 1.49, "learning_rate": 3.933865404506908e-05, "loss": 1.3425, "step": 371000 }, { "epoch": 1.5, "learning_rate": 3.932427995890161e-05, "loss": 1.2953, "step": 371500 }, { "epoch": 1.5, "learning_rate": 3.9309905872734144e-05, "loss": 1.3248, "step": 372000 }, { "epoch": 1.5, "learning_rate": 3.929553178656667e-05, "loss": 1.2808, "step": 372500 }, { "epoch": 1.5, "learning_rate": 3.92811577003992e-05, "loss": 1.2778, "step": 373000 }, { "epoch": 1.5, "learning_rate": 3.926678361423173e-05, "loss": 1.3322, "step": 373500 }, { "epoch": 1.51, "learning_rate": 3.9252409528064255e-05, "loss": 1.3141, "step": 374000 }, { "epoch": 1.51, "learning_rate": 3.923806419006912e-05, "loss": 1.3462, "step": 374500 }, { "epoch": 1.51, "learning_rate": 3.922369010390164e-05, "loss": 1.303, "step": 375000 }, { "epoch": 1.51, "learning_rate": 3.9209316017734174e-05, "loss": 1.2846, "step": 375500 }, { "epoch": 1.51, "learning_rate": 3.9194941931566705e-05, "loss": 1.2977, "step": 376000 }, { "epoch": 1.52, "learning_rate": 3.918059659357157e-05, "loss": 1.3207, "step": 376500 }, { "epoch": 1.52, "learning_rate": 3.9166222507404094e-05, "loss": 1.3579, "step": 377000 }, { "epoch": 1.52, "learning_rate": 3.915184842123662e-05, "loss": 1.2612, "step": 377500 }, { "epoch": 1.52, "learning_rate": 3.913747433506915e-05, "loss": 1.2684, "step": 378000 }, { "epoch": 1.52, "learning_rate": 3.912310024890168e-05, "loss": 1.3118, "step": 378500 }, { "epoch": 1.53, "learning_rate": 3.9108754910906545e-05, "loss": 1.2694, "step": 379000 }, { "epoch": 1.53, "learning_rate": 3.90944095729114e-05, "loss": 1.3283, "step": 379500 }, { "epoch": 1.53, "learning_rate": 3.908003548674393e-05, "loss": 1.2881, "step": 380000 }, { "epoch": 1.53, "learning_rate": 3.906566140057646e-05, "loss": 1.2595, "step": 380500 }, { "epoch": 1.53, "learning_rate": 3.905128731440899e-05, "loss": 1.3154, "step": 381000 }, { "epoch": 1.54, "learning_rate": 3.903691322824152e-05, "loss": 1.2472, "step": 381500 }, { "epoch": 1.54, "learning_rate": 3.9022539142074044e-05, "loss": 1.2501, "step": 382000 }, { "epoch": 1.54, "learning_rate": 3.900816505590657e-05, "loss": 1.2928, "step": 382500 }, { "epoch": 1.54, "learning_rate": 3.8993790969739106e-05, "loss": 1.3238, "step": 383000 }, { "epoch": 1.54, "learning_rate": 3.897944563174396e-05, "loss": 1.323, "step": 383500 }, { "epoch": 1.55, "learning_rate": 3.8965071545576494e-05, "loss": 1.3102, "step": 384000 }, { "epoch": 1.55, "learning_rate": 3.895069745940902e-05, "loss": 1.2707, "step": 384500 }, { "epoch": 1.55, "learning_rate": 3.893635212141388e-05, "loss": 1.3345, "step": 385000 }, { "epoch": 1.55, "learning_rate": 3.892197803524641e-05, "loss": 1.3071, "step": 385500 }, { "epoch": 1.55, "learning_rate": 3.890760394907894e-05, "loss": 1.3225, "step": 386000 }, { "epoch": 1.56, "learning_rate": 3.889322986291147e-05, "loss": 1.3136, "step": 386500 }, { "epoch": 1.56, "learning_rate": 3.887885577674399e-05, "loss": 1.3166, "step": 387000 }, { "epoch": 1.56, "learning_rate": 3.886448169057652e-05, "loss": 1.3347, "step": 387500 }, { "epoch": 1.56, "learning_rate": 3.8850107604409055e-05, "loss": 1.3325, "step": 388000 }, { "epoch": 1.56, "learning_rate": 3.883573351824158e-05, "loss": 1.244, "step": 388500 }, { "epoch": 1.57, "learning_rate": 3.8821359432074104e-05, "loss": 1.2885, "step": 389000 }, { "epoch": 1.57, "learning_rate": 3.880701409407897e-05, "loss": 1.2643, "step": 389500 }, { "epoch": 1.57, "learning_rate": 3.87926400079115e-05, "loss": 1.2765, "step": 390000 }, { "epoch": 1.57, "learning_rate": 3.877826592174403e-05, "loss": 1.2574, "step": 390500 }, { "epoch": 1.57, "learning_rate": 3.8763891835576554e-05, "loss": 1.3173, "step": 391000 }, { "epoch": 1.58, "learning_rate": 3.874954649758142e-05, "loss": 1.2948, "step": 391500 }, { "epoch": 1.58, "learning_rate": 3.873517241141394e-05, "loss": 1.2861, "step": 392000 }, { "epoch": 1.58, "learning_rate": 3.8720798325246474e-05, "loss": 1.3029, "step": 392500 }, { "epoch": 1.58, "learning_rate": 3.8706424239079005e-05, "loss": 1.3286, "step": 393000 }, { "epoch": 1.58, "learning_rate": 3.869207890108387e-05, "loss": 1.3104, "step": 393500 }, { "epoch": 1.59, "learning_rate": 3.8677704814916394e-05, "loss": 1.3286, "step": 394000 }, { "epoch": 1.59, "learning_rate": 3.866333072874892e-05, "loss": 1.2915, "step": 394500 }, { "epoch": 1.59, "learning_rate": 3.864895664258145e-05, "loss": 1.3365, "step": 395000 }, { "epoch": 1.59, "learning_rate": 3.863458255641398e-05, "loss": 1.3339, "step": 395500 }, { "epoch": 1.59, "learning_rate": 3.8620237218418844e-05, "loss": 1.3178, "step": 396000 }, { "epoch": 1.6, "learning_rate": 3.860586313225137e-05, "loss": 1.341, "step": 396500 }, { "epoch": 1.6, "learning_rate": 3.859148904608389e-05, "loss": 1.2945, "step": 397000 }, { "epoch": 1.6, "learning_rate": 3.857714370808876e-05, "loss": 1.2954, "step": 397500 }, { "epoch": 1.6, "learning_rate": 3.856276962192129e-05, "loss": 1.2809, "step": 398000 }, { "epoch": 1.6, "learning_rate": 3.854839553575382e-05, "loss": 1.2982, "step": 398500 }, { "epoch": 1.61, "learning_rate": 3.853402144958634e-05, "loss": 1.2972, "step": 399000 }, { "epoch": 1.61, "learning_rate": 3.851964736341887e-05, "loss": 1.3654, "step": 399500 }, { "epoch": 1.61, "learning_rate": 3.8505273277251405e-05, "loss": 1.2658, "step": 400000 }, { "epoch": 1.61, "learning_rate": 3.849089919108393e-05, "loss": 1.292, "step": 400500 }, { "epoch": 1.61, "learning_rate": 3.8476525104916454e-05, "loss": 1.3061, "step": 401000 }, { "epoch": 1.62, "learning_rate": 3.8462151018748985e-05, "loss": 1.2735, "step": 401500 }, { "epoch": 1.62, "learning_rate": 3.8447776932581516e-05, "loss": 1.3375, "step": 402000 }, { "epoch": 1.62, "learning_rate": 3.843340284641404e-05, "loss": 1.2889, "step": 402500 }, { "epoch": 1.62, "learning_rate": 3.8419028760246564e-05, "loss": 1.3407, "step": 403000 }, { "epoch": 1.62, "learning_rate": 3.840468342225143e-05, "loss": 1.309, "step": 403500 }, { "epoch": 1.63, "learning_rate": 3.839030933608396e-05, "loss": 1.2736, "step": 404000 }, { "epoch": 1.63, "learning_rate": 3.837593524991649e-05, "loss": 1.2761, "step": 404500 }, { "epoch": 1.63, "learning_rate": 3.8361589911921355e-05, "loss": 1.333, "step": 405000 }, { "epoch": 1.63, "learning_rate": 3.834721582575388e-05, "loss": 1.2702, "step": 405500 }, { "epoch": 1.63, "learning_rate": 3.83328417395864e-05, "loss": 1.3226, "step": 406000 }, { "epoch": 1.64, "learning_rate": 3.831846765341894e-05, "loss": 1.3432, "step": 406500 }, { "epoch": 1.64, "learning_rate": 3.8304093567251465e-05, "loss": 1.2957, "step": 407000 }, { "epoch": 1.64, "learning_rate": 3.828971948108399e-05, "loss": 1.3351, "step": 407500 }, { "epoch": 1.64, "learning_rate": 3.827534539491652e-05, "loss": 1.3261, "step": 408000 }, { "epoch": 1.64, "learning_rate": 3.826097130874905e-05, "loss": 1.3281, "step": 408500 }, { "epoch": 1.65, "learning_rate": 3.8246625970753916e-05, "loss": 1.3111, "step": 409000 }, { "epoch": 1.65, "learning_rate": 3.823225188458644e-05, "loss": 1.3541, "step": 409500 }, { "epoch": 1.65, "learning_rate": 3.8217877798418964e-05, "loss": 1.2615, "step": 410000 }, { "epoch": 1.65, "learning_rate": 3.8203503712251495e-05, "loss": 1.2716, "step": 410500 }, { "epoch": 1.65, "learning_rate": 3.818915837425635e-05, "loss": 1.3619, "step": 411000 }, { "epoch": 1.66, "learning_rate": 3.817478428808889e-05, "loss": 1.3252, "step": 411500 }, { "epoch": 1.66, "learning_rate": 3.8160410201921415e-05, "loss": 1.2901, "step": 412000 }, { "epoch": 1.66, "learning_rate": 3.814603611575394e-05, "loss": 1.2943, "step": 412500 }, { "epoch": 1.66, "learning_rate": 3.813166202958648e-05, "loss": 1.2663, "step": 413000 }, { "epoch": 1.66, "learning_rate": 3.8117316691591335e-05, "loss": 1.2747, "step": 413500 }, { "epoch": 1.67, "learning_rate": 3.8102942605423866e-05, "loss": 1.3128, "step": 414000 }, { "epoch": 1.67, "learning_rate": 3.808856851925639e-05, "loss": 1.3107, "step": 414500 }, { "epoch": 1.67, "learning_rate": 3.8074194433088914e-05, "loss": 1.3281, "step": 415000 }, { "epoch": 1.67, "learning_rate": 3.805984909509378e-05, "loss": 1.2913, "step": 415500 }, { "epoch": 1.67, "learning_rate": 3.804547500892631e-05, "loss": 1.3163, "step": 416000 }, { "epoch": 1.68, "learning_rate": 3.803112967093117e-05, "loss": 1.2811, "step": 416500 }, { "epoch": 1.68, "learning_rate": 3.8016755584763705e-05, "loss": 1.3121, "step": 417000 }, { "epoch": 1.68, "learning_rate": 3.800238149859623e-05, "loss": 1.2716, "step": 417500 }, { "epoch": 1.68, "learning_rate": 3.798800741242875e-05, "loss": 1.3418, "step": 418000 }, { "epoch": 1.68, "learning_rate": 3.797366207443362e-05, "loss": 1.2755, "step": 418500 }, { "epoch": 1.69, "learning_rate": 3.795928798826615e-05, "loss": 1.2791, "step": 419000 }, { "epoch": 1.69, "learning_rate": 3.794491390209868e-05, "loss": 1.2886, "step": 419500 }, { "epoch": 1.69, "learning_rate": 3.7930539815931204e-05, "loss": 1.2725, "step": 420000 }, { "epoch": 1.69, "learning_rate": 3.791616572976373e-05, "loss": 1.2872, "step": 420500 }, { "epoch": 1.69, "learning_rate": 3.7901791643596266e-05, "loss": 1.2549, "step": 421000 }, { "epoch": 1.7, "learning_rate": 3.788741755742879e-05, "loss": 1.3114, "step": 421500 }, { "epoch": 1.7, "learning_rate": 3.7873043471261314e-05, "loss": 1.2449, "step": 422000 }, { "epoch": 1.7, "learning_rate": 3.785869813326618e-05, "loss": 1.2821, "step": 422500 }, { "epoch": 1.7, "learning_rate": 3.78443240470987e-05, "loss": 1.3374, "step": 423000 }, { "epoch": 1.7, "learning_rate": 3.782994996093124e-05, "loss": 1.2604, "step": 423500 }, { "epoch": 1.71, "learning_rate": 3.7815575874763765e-05, "loss": 1.2585, "step": 424000 }, { "epoch": 1.71, "learning_rate": 3.780123053676863e-05, "loss": 1.306, "step": 424500 }, { "epoch": 1.71, "learning_rate": 3.7786856450601154e-05, "loss": 1.2987, "step": 425000 }, { "epoch": 1.71, "learning_rate": 3.777248236443368e-05, "loss": 1.3073, "step": 425500 }, { "epoch": 1.71, "learning_rate": 3.7758108278266216e-05, "loss": 1.298, "step": 426000 }, { "epoch": 1.72, "learning_rate": 3.774373419209874e-05, "loss": 1.301, "step": 426500 }, { "epoch": 1.72, "learning_rate": 3.7729360105931264e-05, "loss": 1.3247, "step": 427000 }, { "epoch": 1.72, "learning_rate": 3.7714986019763795e-05, "loss": 1.2696, "step": 427500 }, { "epoch": 1.72, "learning_rate": 3.770064068176866e-05, "loss": 1.3062, "step": 428000 }, { "epoch": 1.72, "learning_rate": 3.768626659560119e-05, "loss": 1.2945, "step": 428500 }, { "epoch": 1.73, "learning_rate": 3.7671892509433715e-05, "loss": 1.2748, "step": 429000 }, { "epoch": 1.73, "learning_rate": 3.765751842326624e-05, "loss": 1.2795, "step": 429500 }, { "epoch": 1.73, "learning_rate": 3.764314433709877e-05, "loss": 1.3429, "step": 430000 }, { "epoch": 1.73, "learning_rate": 3.76287702509313e-05, "loss": 1.365, "step": 430500 }, { "epoch": 1.73, "learning_rate": 3.7614396164763825e-05, "loss": 1.3175, "step": 431000 }, { "epoch": 1.74, "learning_rate": 3.7600022078596356e-05, "loss": 1.2859, "step": 431500 }, { "epoch": 1.74, "learning_rate": 3.7585676740601214e-05, "loss": 1.2628, "step": 432000 }, { "epoch": 1.74, "learning_rate": 3.757130265443375e-05, "loss": 1.2823, "step": 432500 }, { "epoch": 1.74, "learning_rate": 3.7556928568266276e-05, "loss": 1.2831, "step": 433000 }, { "epoch": 1.74, "learning_rate": 3.75425544820988e-05, "loss": 1.31, "step": 433500 }, { "epoch": 1.75, "learning_rate": 3.7528209144103664e-05, "loss": 1.2884, "step": 434000 }, { "epoch": 1.75, "learning_rate": 3.751383505793619e-05, "loss": 1.3053, "step": 434500 }, { "epoch": 1.75, "learning_rate": 3.7499460971768726e-05, "loss": 1.288, "step": 435000 }, { "epoch": 1.75, "learning_rate": 3.748508688560125e-05, "loss": 1.3179, "step": 435500 }, { "epoch": 1.75, "learning_rate": 3.7470712799433775e-05, "loss": 1.3117, "step": 436000 }, { "epoch": 1.76, "learning_rate": 3.7456338713266306e-05, "loss": 1.3112, "step": 436500 }, { "epoch": 1.76, "learning_rate": 3.7441964627098837e-05, "loss": 1.2802, "step": 437000 }, { "epoch": 1.76, "learning_rate": 3.742759054093136e-05, "loss": 1.2868, "step": 437500 }, { "epoch": 1.76, "learning_rate": 3.741321645476389e-05, "loss": 1.2857, "step": 438000 }, { "epoch": 1.76, "learning_rate": 3.739887111676875e-05, "loss": 1.2808, "step": 438500 }, { "epoch": 1.77, "learning_rate": 3.738449703060128e-05, "loss": 1.309, "step": 439000 }, { "epoch": 1.77, "learning_rate": 3.737012294443381e-05, "loss": 1.3257, "step": 439500 }, { "epoch": 1.77, "learning_rate": 3.7355748858266336e-05, "loss": 1.3181, "step": 440000 }, { "epoch": 1.77, "learning_rate": 3.73414035202712e-05, "loss": 1.3189, "step": 440500 }, { "epoch": 1.77, "learning_rate": 3.7327029434103724e-05, "loss": 1.2565, "step": 441000 }, { "epoch": 1.78, "learning_rate": 3.731265534793626e-05, "loss": 1.2688, "step": 441500 }, { "epoch": 1.78, "learning_rate": 3.729831000994112e-05, "loss": 1.2885, "step": 442000 }, { "epoch": 1.78, "learning_rate": 3.728393592377365e-05, "loss": 1.3076, "step": 442500 }, { "epoch": 1.78, "learning_rate": 3.7269561837606175e-05, "loss": 1.2664, "step": 443000 }, { "epoch": 1.78, "learning_rate": 3.7255187751438706e-05, "loss": 1.3208, "step": 443500 }, { "epoch": 1.79, "learning_rate": 3.724081366527124e-05, "loss": 1.2682, "step": 444000 }, { "epoch": 1.79, "learning_rate": 3.722643957910376e-05, "loss": 1.3087, "step": 444500 }, { "epoch": 1.79, "learning_rate": 3.7212065492936285e-05, "loss": 1.2755, "step": 445000 }, { "epoch": 1.79, "learning_rate": 3.7197691406768816e-05, "loss": 1.3005, "step": 445500 }, { "epoch": 1.8, "learning_rate": 3.7183374816946014e-05, "loss": 1.2834, "step": 446000 }, { "epoch": 1.8, "learning_rate": 3.716900073077854e-05, "loss": 1.2735, "step": 446500 }, { "epoch": 1.8, "learning_rate": 3.715462664461107e-05, "loss": 1.3396, "step": 447000 }, { "epoch": 1.8, "learning_rate": 3.71402525584436e-05, "loss": 1.2835, "step": 447500 }, { "epoch": 1.8, "learning_rate": 3.7125878472276125e-05, "loss": 1.2776, "step": 448000 }, { "epoch": 1.81, "learning_rate": 3.7111504386108656e-05, "loss": 1.2976, "step": 448500 }, { "epoch": 1.81, "learning_rate": 3.7097159048113513e-05, "loss": 1.3103, "step": 449000 }, { "epoch": 1.81, "learning_rate": 3.708278496194605e-05, "loss": 1.3003, "step": 449500 }, { "epoch": 1.81, "learning_rate": 3.7068410875778575e-05, "loss": 1.305, "step": 450000 }, { "epoch": 1.81, "learning_rate": 3.70540367896111e-05, "loss": 1.2685, "step": 450500 }, { "epoch": 1.82, "learning_rate": 3.703966270344363e-05, "loss": 1.2824, "step": 451000 }, { "epoch": 1.82, "learning_rate": 3.7025317365448495e-05, "loss": 1.2703, "step": 451500 }, { "epoch": 1.82, "learning_rate": 3.7010943279281026e-05, "loss": 1.2796, "step": 452000 }, { "epoch": 1.82, "learning_rate": 3.699656919311355e-05, "loss": 1.2921, "step": 452500 }, { "epoch": 1.82, "learning_rate": 3.6982195106946074e-05, "loss": 1.2714, "step": 453000 }, { "epoch": 1.83, "learning_rate": 3.6967821020778605e-05, "loss": 1.2959, "step": 453500 }, { "epoch": 1.83, "learning_rate": 3.6953446934611136e-05, "loss": 1.283, "step": 454000 }, { "epoch": 1.83, "learning_rate": 3.693907284844366e-05, "loss": 1.3516, "step": 454500 }, { "epoch": 1.83, "learning_rate": 3.6924727510448525e-05, "loss": 1.2841, "step": 455000 }, { "epoch": 1.83, "learning_rate": 3.691035342428105e-05, "loss": 1.3314, "step": 455500 }, { "epoch": 1.84, "learning_rate": 3.689597933811358e-05, "loss": 1.3287, "step": 456000 }, { "epoch": 1.84, "learning_rate": 3.688160525194611e-05, "loss": 1.3005, "step": 456500 }, { "epoch": 1.84, "learning_rate": 3.6867231165778635e-05, "loss": 1.2771, "step": 457000 }, { "epoch": 1.84, "learning_rate": 3.6852857079611166e-05, "loss": 1.3057, "step": 457500 }, { "epoch": 1.84, "learning_rate": 3.68384829934437e-05, "loss": 1.299, "step": 458000 }, { "epoch": 1.85, "learning_rate": 3.682410890727622e-05, "loss": 1.2904, "step": 458500 }, { "epoch": 1.85, "learning_rate": 3.6809734821108746e-05, "loss": 1.2533, "step": 459000 }, { "epoch": 1.85, "learning_rate": 3.679538948311361e-05, "loss": 1.3131, "step": 459500 }, { "epoch": 1.85, "learning_rate": 3.678101539694614e-05, "loss": 1.2769, "step": 460000 }, { "epoch": 1.85, "learning_rate": 3.676664131077867e-05, "loss": 1.3289, "step": 460500 }, { "epoch": 1.86, "learning_rate": 3.675229597278354e-05, "loss": 1.3292, "step": 461000 }, { "epoch": 1.86, "learning_rate": 3.673792188661606e-05, "loss": 1.2642, "step": 461500 }, { "epoch": 1.86, "learning_rate": 3.6723547800448585e-05, "loss": 1.2938, "step": 462000 }, { "epoch": 1.86, "learning_rate": 3.6709173714281116e-05, "loss": 1.3634, "step": 462500 }, { "epoch": 1.86, "learning_rate": 3.669479962811365e-05, "loss": 1.2554, "step": 463000 }, { "epoch": 1.87, "learning_rate": 3.668042554194617e-05, "loss": 1.2683, "step": 463500 }, { "epoch": 1.87, "learning_rate": 3.66660514557787e-05, "loss": 1.2845, "step": 464000 }, { "epoch": 1.87, "learning_rate": 3.665167736961123e-05, "loss": 1.3046, "step": 464500 }, { "epoch": 1.87, "learning_rate": 3.663733203161609e-05, "loss": 1.2429, "step": 465000 }, { "epoch": 1.87, "learning_rate": 3.662295794544862e-05, "loss": 1.2988, "step": 465500 }, { "epoch": 1.88, "learning_rate": 3.6608583859281146e-05, "loss": 1.3175, "step": 466000 }, { "epoch": 1.88, "learning_rate": 3.659423852128601e-05, "loss": 1.279, "step": 466500 }, { "epoch": 1.88, "learning_rate": 3.6579893183290875e-05, "loss": 1.2663, "step": 467000 }, { "epoch": 1.88, "learning_rate": 3.65655190971234e-05, "loss": 1.2988, "step": 467500 }, { "epoch": 1.88, "learning_rate": 3.655114501095593e-05, "loss": 1.3224, "step": 468000 }, { "epoch": 1.89, "learning_rate": 3.653677092478846e-05, "loss": 1.2751, "step": 468500 }, { "epoch": 1.89, "learning_rate": 3.6522396838620985e-05, "loss": 1.2846, "step": 469000 }, { "epoch": 1.89, "learning_rate": 3.6508022752453516e-05, "loss": 1.3111, "step": 469500 }, { "epoch": 1.89, "learning_rate": 3.649364866628604e-05, "loss": 1.3045, "step": 470000 }, { "epoch": 1.89, "learning_rate": 3.647927458011857e-05, "loss": 1.2615, "step": 470500 }, { "epoch": 1.9, "learning_rate": 3.6464900493951096e-05, "loss": 1.264, "step": 471000 }, { "epoch": 1.9, "learning_rate": 3.6450526407783627e-05, "loss": 1.276, "step": 471500 }, { "epoch": 1.9, "learning_rate": 3.643615232161616e-05, "loss": 1.2798, "step": 472000 }, { "epoch": 1.9, "learning_rate": 3.642177823544868e-05, "loss": 1.2926, "step": 472500 }, { "epoch": 1.9, "learning_rate": 3.640740414928121e-05, "loss": 1.2795, "step": 473000 }, { "epoch": 1.91, "learning_rate": 3.639303006311374e-05, "loss": 1.239, "step": 473500 }, { "epoch": 1.91, "learning_rate": 3.6378713473290935e-05, "loss": 1.3506, "step": 474000 }, { "epoch": 1.91, "learning_rate": 3.6364339387123466e-05, "loss": 1.3075, "step": 474500 }, { "epoch": 1.91, "learning_rate": 3.6349965300956e-05, "loss": 1.3345, "step": 475000 }, { "epoch": 1.91, "learning_rate": 3.633559121478852e-05, "loss": 1.3233, "step": 475500 }, { "epoch": 1.92, "learning_rate": 3.632121712862105e-05, "loss": 1.2644, "step": 476000 }, { "epoch": 1.92, "learning_rate": 3.630687179062591e-05, "loss": 1.2507, "step": 476500 }, { "epoch": 1.92, "learning_rate": 3.629249770445844e-05, "loss": 1.2983, "step": 477000 }, { "epoch": 1.92, "learning_rate": 3.627812361829097e-05, "loss": 1.296, "step": 477500 }, { "epoch": 1.92, "learning_rate": 3.6263749532123496e-05, "loss": 1.297, "step": 478000 }, { "epoch": 1.93, "learning_rate": 3.624937544595603e-05, "loss": 1.3237, "step": 478500 }, { "epoch": 1.93, "learning_rate": 3.6235030107960885e-05, "loss": 1.2638, "step": 479000 }, { "epoch": 1.93, "learning_rate": 3.6220656021793416e-05, "loss": 1.2876, "step": 479500 }, { "epoch": 1.93, "learning_rate": 3.620628193562595e-05, "loss": 1.2688, "step": 480000 }, { "epoch": 1.93, "learning_rate": 3.619193659763081e-05, "loss": 1.2915, "step": 480500 }, { "epoch": 1.94, "learning_rate": 3.6177562511463335e-05, "loss": 1.2574, "step": 481000 }, { "epoch": 1.94, "learning_rate": 3.6163188425295866e-05, "loss": 1.2486, "step": 481500 }, { "epoch": 1.94, "learning_rate": 3.614881433912839e-05, "loss": 1.3007, "step": 482000 }, { "epoch": 1.94, "learning_rate": 3.613444025296092e-05, "loss": 1.2465, "step": 482500 }, { "epoch": 1.94, "learning_rate": 3.6120066166793446e-05, "loss": 1.272, "step": 483000 }, { "epoch": 1.95, "learning_rate": 3.610569208062598e-05, "loss": 1.3023, "step": 483500 }, { "epoch": 1.95, "learning_rate": 3.609131799445851e-05, "loss": 1.2473, "step": 484000 }, { "epoch": 1.95, "learning_rate": 3.607694390829103e-05, "loss": 1.2563, "step": 484500 }, { "epoch": 1.95, "learning_rate": 3.606256982212356e-05, "loss": 1.2937, "step": 485000 }, { "epoch": 1.95, "learning_rate": 3.604819573595609e-05, "loss": 1.3179, "step": 485500 }, { "epoch": 1.96, "learning_rate": 3.603382164978862e-05, "loss": 1.2751, "step": 486000 }, { "epoch": 1.96, "learning_rate": 3.601947631179348e-05, "loss": 1.2914, "step": 486500 }, { "epoch": 1.96, "learning_rate": 3.600510222562601e-05, "loss": 1.3037, "step": 487000 }, { "epoch": 1.96, "learning_rate": 3.599072813945854e-05, "loss": 1.2894, "step": 487500 }, { "epoch": 1.96, "learning_rate": 3.597635405329106e-05, "loss": 1.2917, "step": 488000 }, { "epoch": 1.97, "learning_rate": 3.5962008715295926e-05, "loss": 1.2776, "step": 488500 }, { "epoch": 1.97, "learning_rate": 3.594763462912846e-05, "loss": 1.3065, "step": 489000 }, { "epoch": 1.97, "learning_rate": 3.5933289291133315e-05, "loss": 1.3115, "step": 489500 }, { "epoch": 1.97, "learning_rate": 3.5918915204965846e-05, "loss": 1.2485, "step": 490000 }, { "epoch": 1.97, "learning_rate": 3.590454111879838e-05, "loss": 1.2972, "step": 490500 }, { "epoch": 1.98, "learning_rate": 3.58901670326309e-05, "loss": 1.2865, "step": 491000 }, { "epoch": 1.98, "learning_rate": 3.587579294646343e-05, "loss": 1.3067, "step": 491500 }, { "epoch": 1.98, "learning_rate": 3.5861418860295956e-05, "loss": 1.2642, "step": 492000 }, { "epoch": 1.98, "learning_rate": 3.584704477412849e-05, "loss": 1.3229, "step": 492500 }, { "epoch": 1.98, "learning_rate": 3.583267068796101e-05, "loss": 1.2988, "step": 493000 }, { "epoch": 1.99, "learning_rate": 3.581835409813821e-05, "loss": 1.2913, "step": 493500 }, { "epoch": 1.99, "learning_rate": 3.580398001197074e-05, "loss": 1.2787, "step": 494000 }, { "epoch": 1.99, "learning_rate": 3.5789634673975605e-05, "loss": 1.2615, "step": 494500 }, { "epoch": 1.99, "learning_rate": 3.577526058780813e-05, "loss": 1.2474, "step": 495000 }, { "epoch": 1.99, "learning_rate": 3.576088650164066e-05, "loss": 1.2448, "step": 495500 }, { "epoch": 2.0, "learning_rate": 3.5746512415473184e-05, "loss": 1.3201, "step": 496000 }, { "epoch": 2.0, "learning_rate": 3.5732138329305715e-05, "loss": 1.3178, "step": 496500 }, { "epoch": 2.0, "eval_cer": 0.24798620372209454, "eval_loss": 1.0018064975738525, "eval_runtime": 10926.7858, "eval_samples_per_second": 8.946, "eval_steps_per_second": 1.118, "step": 496926 }, { "epoch": 2.0, "learning_rate": 3.5717764243138246e-05, "loss": 1.304, "step": 497000 }, { "epoch": 2.0, "learning_rate": 3.570339015697077e-05, "loss": 1.2501, "step": 497500 }, { "epoch": 2.0, "learning_rate": 3.56890160708033e-05, "loss": 1.2717, "step": 498000 }, { "epoch": 2.01, "learning_rate": 3.5674641984635826e-05, "loss": 1.2588, "step": 498500 }, { "epoch": 2.01, "learning_rate": 3.566026789846836e-05, "loss": 1.2931, "step": 499000 }, { "epoch": 2.01, "learning_rate": 3.564589381230089e-05, "loss": 1.2257, "step": 499500 }, { "epoch": 2.01, "learning_rate": 3.5631548474305745e-05, "loss": 1.2627, "step": 500000 }, { "epoch": 2.01, "learning_rate": 3.5617174388138276e-05, "loss": 1.2737, "step": 500500 }, { "epoch": 2.02, "learning_rate": 3.56028003019708e-05, "loss": 1.2805, "step": 501000 }, { "epoch": 2.02, "learning_rate": 3.558842621580333e-05, "loss": 1.2255, "step": 501500 }, { "epoch": 2.02, "learning_rate": 3.557405212963586e-05, "loss": 1.2769, "step": 502000 }, { "epoch": 2.02, "learning_rate": 3.555967804346839e-05, "loss": 1.259, "step": 502500 }, { "epoch": 2.02, "learning_rate": 3.554533270547325e-05, "loss": 1.2578, "step": 503000 }, { "epoch": 2.03, "learning_rate": 3.553095861930578e-05, "loss": 1.2479, "step": 503500 }, { "epoch": 2.03, "learning_rate": 3.5516584533138306e-05, "loss": 1.2287, "step": 504000 }, { "epoch": 2.03, "learning_rate": 3.550221044697084e-05, "loss": 1.2565, "step": 504500 }, { "epoch": 2.03, "learning_rate": 3.548783636080336e-05, "loss": 1.2534, "step": 505000 }, { "epoch": 2.03, "learning_rate": 3.547346227463589e-05, "loss": 1.2356, "step": 505500 }, { "epoch": 2.04, "learning_rate": 3.545911693664076e-05, "loss": 1.2739, "step": 506000 }, { "epoch": 2.04, "learning_rate": 3.544474285047328e-05, "loss": 1.2215, "step": 506500 }, { "epoch": 2.04, "learning_rate": 3.543036876430581e-05, "loss": 1.2267, "step": 507000 }, { "epoch": 2.04, "learning_rate": 3.5415994678138336e-05, "loss": 1.2708, "step": 507500 }, { "epoch": 2.04, "learning_rate": 3.540162059197087e-05, "loss": 1.299, "step": 508000 }, { "epoch": 2.05, "learning_rate": 3.538727525397573e-05, "loss": 1.2797, "step": 508500 }, { "epoch": 2.05, "learning_rate": 3.5372901167808256e-05, "loss": 1.254, "step": 509000 }, { "epoch": 2.05, "learning_rate": 3.535852708164079e-05, "loss": 1.239, "step": 509500 }, { "epoch": 2.05, "learning_rate": 3.534415299547331e-05, "loss": 1.2227, "step": 510000 }, { "epoch": 2.05, "learning_rate": 3.532977890930584e-05, "loss": 1.2489, "step": 510500 }, { "epoch": 2.06, "learning_rate": 3.531540482313837e-05, "loss": 1.2213, "step": 511000 }, { "epoch": 2.06, "learning_rate": 3.530105948514323e-05, "loss": 1.2131, "step": 511500 }, { "epoch": 2.06, "learning_rate": 3.528668539897576e-05, "loss": 1.2442, "step": 512000 }, { "epoch": 2.06, "learning_rate": 3.5272311312808286e-05, "loss": 1.2223, "step": 512500 }, { "epoch": 2.06, "learning_rate": 3.525793722664082e-05, "loss": 1.2384, "step": 513000 }, { "epoch": 2.07, "learning_rate": 3.524356314047335e-05, "loss": 1.2598, "step": 513500 }, { "epoch": 2.07, "learning_rate": 3.522921780247821e-05, "loss": 1.3033, "step": 514000 }, { "epoch": 2.07, "learning_rate": 3.521484371631074e-05, "loss": 1.2349, "step": 514500 }, { "epoch": 2.07, "learning_rate": 3.520046963014327e-05, "loss": 1.2263, "step": 515000 }, { "epoch": 2.07, "learning_rate": 3.518609554397579e-05, "loss": 1.2533, "step": 515500 }, { "epoch": 2.08, "learning_rate": 3.517172145780832e-05, "loss": 1.2289, "step": 516000 }, { "epoch": 2.08, "learning_rate": 3.515734737164085e-05, "loss": 1.2387, "step": 516500 }, { "epoch": 2.08, "learning_rate": 3.514300203364571e-05, "loss": 1.2407, "step": 517000 }, { "epoch": 2.08, "learning_rate": 3.512862794747824e-05, "loss": 1.2843, "step": 517500 }, { "epoch": 2.08, "learning_rate": 3.511425386131077e-05, "loss": 1.2666, "step": 518000 }, { "epoch": 2.09, "learning_rate": 3.50998797751433e-05, "loss": 1.2308, "step": 518500 }, { "epoch": 2.09, "learning_rate": 3.508550568897582e-05, "loss": 1.2481, "step": 519000 }, { "epoch": 2.09, "learning_rate": 3.507113160280835e-05, "loss": 1.2184, "step": 519500 }, { "epoch": 2.09, "learning_rate": 3.5056757516640884e-05, "loss": 1.2723, "step": 520000 }, { "epoch": 2.09, "learning_rate": 3.504241217864574e-05, "loss": 1.248, "step": 520500 }, { "epoch": 2.1, "learning_rate": 3.502803809247827e-05, "loss": 1.2647, "step": 521000 }, { "epoch": 2.1, "learning_rate": 3.50136640063108e-05, "loss": 1.2281, "step": 521500 }, { "epoch": 2.1, "learning_rate": 3.499928992014333e-05, "loss": 1.2436, "step": 522000 }, { "epoch": 2.1, "learning_rate": 3.498491583397586e-05, "loss": 1.2255, "step": 522500 }, { "epoch": 2.1, "learning_rate": 3.497054174780838e-05, "loss": 1.2615, "step": 523000 }, { "epoch": 2.11, "learning_rate": 3.4956167661640914e-05, "loss": 1.238, "step": 523500 }, { "epoch": 2.11, "learning_rate": 3.4941793575473445e-05, "loss": 1.2343, "step": 524000 }, { "epoch": 2.11, "learning_rate": 3.49274482374783e-05, "loss": 1.2505, "step": 524500 }, { "epoch": 2.11, "learning_rate": 3.491310289948317e-05, "loss": 1.2915, "step": 525000 }, { "epoch": 2.12, "learning_rate": 3.48987288133157e-05, "loss": 1.2477, "step": 525500 }, { "epoch": 2.12, "learning_rate": 3.488435472714822e-05, "loss": 1.2653, "step": 526000 }, { "epoch": 2.12, "learning_rate": 3.486998064098075e-05, "loss": 1.2362, "step": 526500 }, { "epoch": 2.12, "learning_rate": 3.485563530298561e-05, "loss": 1.2141, "step": 527000 }, { "epoch": 2.12, "learning_rate": 3.484126121681814e-05, "loss": 1.2283, "step": 527500 }, { "epoch": 2.13, "learning_rate": 3.482688713065067e-05, "loss": 1.2756, "step": 528000 }, { "epoch": 2.13, "learning_rate": 3.48125130444832e-05, "loss": 1.2044, "step": 528500 }, { "epoch": 2.13, "learning_rate": 3.479813895831573e-05, "loss": 1.2843, "step": 529000 }, { "epoch": 2.13, "learning_rate": 3.478376487214826e-05, "loss": 1.2615, "step": 529500 }, { "epoch": 2.13, "learning_rate": 3.476941953415312e-05, "loss": 1.3152, "step": 530000 }, { "epoch": 2.14, "learning_rate": 3.475504544798565e-05, "loss": 1.2859, "step": 530500 }, { "epoch": 2.14, "learning_rate": 3.474067136181817e-05, "loss": 1.2408, "step": 531000 }, { "epoch": 2.14, "learning_rate": 3.47262972756507e-05, "loss": 1.251, "step": 531500 }, { "epoch": 2.14, "learning_rate": 3.4711923189483234e-05, "loss": 1.1937, "step": 532000 }, { "epoch": 2.14, "learning_rate": 3.469754910331576e-05, "loss": 1.2696, "step": 532500 }, { "epoch": 2.15, "learning_rate": 3.468317501714828e-05, "loss": 1.2626, "step": 533000 }, { "epoch": 2.15, "learning_rate": 3.466880093098081e-05, "loss": 1.2667, "step": 533500 }, { "epoch": 2.15, "learning_rate": 3.465445559298568e-05, "loss": 1.2286, "step": 534000 }, { "epoch": 2.15, "learning_rate": 3.464011025499054e-05, "loss": 1.3034, "step": 534500 }, { "epoch": 2.15, "learning_rate": 3.4625736168823066e-05, "loss": 1.2727, "step": 535000 }, { "epoch": 2.16, "learning_rate": 3.46113620826556e-05, "loss": 1.3009, "step": 535500 }, { "epoch": 2.16, "learning_rate": 3.459698799648812e-05, "loss": 1.2333, "step": 536000 }, { "epoch": 2.16, "learning_rate": 3.458261391032065e-05, "loss": 1.2414, "step": 536500 }, { "epoch": 2.16, "learning_rate": 3.4568239824153183e-05, "loss": 1.2648, "step": 537000 }, { "epoch": 2.16, "learning_rate": 3.455386573798571e-05, "loss": 1.2345, "step": 537500 }, { "epoch": 2.17, "learning_rate": 3.453949165181824e-05, "loss": 1.2002, "step": 538000 }, { "epoch": 2.17, "learning_rate": 3.4525146313823096e-05, "loss": 1.2801, "step": 538500 }, { "epoch": 2.17, "learning_rate": 3.451077222765563e-05, "loss": 1.2502, "step": 539000 }, { "epoch": 2.17, "learning_rate": 3.449639814148816e-05, "loss": 1.2946, "step": 539500 }, { "epoch": 2.17, "learning_rate": 3.448202405532068e-05, "loss": 1.2442, "step": 540000 }, { "epoch": 2.18, "learning_rate": 3.446770746549788e-05, "loss": 1.2646, "step": 540500 }, { "epoch": 2.18, "learning_rate": 3.445333337933041e-05, "loss": 1.2592, "step": 541000 }, { "epoch": 2.18, "learning_rate": 3.4438959293162936e-05, "loss": 1.2476, "step": 541500 }, { "epoch": 2.18, "learning_rate": 3.442458520699547e-05, "loss": 1.2312, "step": 542000 }, { "epoch": 2.18, "learning_rate": 3.4410211120828e-05, "loss": 1.2613, "step": 542500 }, { "epoch": 2.19, "learning_rate": 3.439586578283286e-05, "loss": 1.2881, "step": 543000 }, { "epoch": 2.19, "learning_rate": 3.4381491696665386e-05, "loss": 1.2507, "step": 543500 }, { "epoch": 2.19, "learning_rate": 3.436711761049791e-05, "loss": 1.2611, "step": 544000 }, { "epoch": 2.19, "learning_rate": 3.435274352433044e-05, "loss": 1.2903, "step": 544500 }, { "epoch": 2.19, "learning_rate": 3.433836943816297e-05, "loss": 1.2557, "step": 545000 }, { "epoch": 2.2, "learning_rate": 3.432402410016784e-05, "loss": 1.2547, "step": 545500 }, { "epoch": 2.2, "learning_rate": 3.430965001400036e-05, "loss": 1.2438, "step": 546000 }, { "epoch": 2.2, "learning_rate": 3.4295275927832886e-05, "loss": 1.2705, "step": 546500 }, { "epoch": 2.2, "learning_rate": 3.4280901841665416e-05, "loss": 1.2638, "step": 547000 }, { "epoch": 2.2, "learning_rate": 3.426652775549795e-05, "loss": 1.246, "step": 547500 }, { "epoch": 2.21, "learning_rate": 3.425215366933047e-05, "loss": 1.259, "step": 548000 }, { "epoch": 2.21, "learning_rate": 3.4237808331335336e-05, "loss": 1.2738, "step": 548500 }, { "epoch": 2.21, "learning_rate": 3.422343424516786e-05, "loss": 1.2558, "step": 549000 }, { "epoch": 2.21, "learning_rate": 3.420906015900039e-05, "loss": 1.2246, "step": 549500 }, { "epoch": 2.21, "learning_rate": 3.419468607283292e-05, "loss": 1.193, "step": 550000 }, { "epoch": 2.22, "learning_rate": 3.4180311986665446e-05, "loss": 1.2253, "step": 550500 }, { "epoch": 2.22, "learning_rate": 3.416593790049798e-05, "loss": 1.2249, "step": 551000 }, { "epoch": 2.22, "learning_rate": 3.415156381433051e-05, "loss": 1.2014, "step": 551500 }, { "epoch": 2.22, "learning_rate": 3.413718972816303e-05, "loss": 1.2901, "step": 552000 }, { "epoch": 2.22, "learning_rate": 3.41228443901679e-05, "loss": 1.259, "step": 552500 }, { "epoch": 2.23, "learning_rate": 3.410847030400042e-05, "loss": 1.2049, "step": 553000 }, { "epoch": 2.23, "learning_rate": 3.4094124966005286e-05, "loss": 1.2618, "step": 553500 }, { "epoch": 2.23, "learning_rate": 3.407975087983782e-05, "loss": 1.2664, "step": 554000 }, { "epoch": 2.23, "learning_rate": 3.406537679367035e-05, "loss": 1.2458, "step": 554500 }, { "epoch": 2.23, "learning_rate": 3.405100270750287e-05, "loss": 1.2345, "step": 555000 }, { "epoch": 2.24, "learning_rate": 3.4036657369507737e-05, "loss": 1.2623, "step": 555500 }, { "epoch": 2.24, "learning_rate": 3.402228328334026e-05, "loss": 1.2062, "step": 556000 }, { "epoch": 2.24, "learning_rate": 3.400790919717279e-05, "loss": 1.2831, "step": 556500 }, { "epoch": 2.24, "learning_rate": 3.399353511100532e-05, "loss": 1.2745, "step": 557000 }, { "epoch": 2.24, "learning_rate": 3.397916102483785e-05, "loss": 1.233, "step": 557500 }, { "epoch": 2.25, "learning_rate": 3.396481568684271e-05, "loss": 1.2692, "step": 558000 }, { "epoch": 2.25, "learning_rate": 3.3950441600675236e-05, "loss": 1.2596, "step": 558500 }, { "epoch": 2.25, "learning_rate": 3.3936067514507767e-05, "loss": 1.2565, "step": 559000 }, { "epoch": 2.25, "learning_rate": 3.39216934283403e-05, "loss": 1.2362, "step": 559500 }, { "epoch": 2.25, "learning_rate": 3.390731934217282e-05, "loss": 1.2155, "step": 560000 }, { "epoch": 2.26, "learning_rate": 3.3892974004177686e-05, "loss": 1.266, "step": 560500 }, { "epoch": 2.26, "learning_rate": 3.387859991801021e-05, "loss": 1.2511, "step": 561000 }, { "epoch": 2.26, "learning_rate": 3.386422583184274e-05, "loss": 1.2623, "step": 561500 }, { "epoch": 2.26, "learning_rate": 3.384985174567527e-05, "loss": 1.216, "step": 562000 }, { "epoch": 2.26, "learning_rate": 3.383550640768014e-05, "loss": 1.2525, "step": 562500 }, { "epoch": 2.27, "learning_rate": 3.382113232151266e-05, "loss": 1.2496, "step": 563000 }, { "epoch": 2.27, "learning_rate": 3.3806758235345185e-05, "loss": 1.2253, "step": 563500 }, { "epoch": 2.27, "learning_rate": 3.3792384149177716e-05, "loss": 1.288, "step": 564000 }, { "epoch": 2.27, "learning_rate": 3.377801006301025e-05, "loss": 1.242, "step": 564500 }, { "epoch": 2.27, "learning_rate": 3.376366472501511e-05, "loss": 1.2785, "step": 565000 }, { "epoch": 2.28, "learning_rate": 3.3749290638847636e-05, "loss": 1.2483, "step": 565500 }, { "epoch": 2.28, "learning_rate": 3.373491655268016e-05, "loss": 1.2293, "step": 566000 }, { "epoch": 2.28, "learning_rate": 3.37205424665127e-05, "loss": 1.2759, "step": 566500 }, { "epoch": 2.28, "learning_rate": 3.3706197128517556e-05, "loss": 1.2912, "step": 567000 }, { "epoch": 2.28, "learning_rate": 3.3691823042350087e-05, "loss": 1.2564, "step": 567500 }, { "epoch": 2.29, "learning_rate": 3.367744895618261e-05, "loss": 1.2192, "step": 568000 }, { "epoch": 2.29, "learning_rate": 3.3663074870015135e-05, "loss": 1.2384, "step": 568500 }, { "epoch": 2.29, "learning_rate": 3.364870078384767e-05, "loss": 1.2578, "step": 569000 }, { "epoch": 2.29, "learning_rate": 3.363435544585253e-05, "loss": 1.2857, "step": 569500 }, { "epoch": 2.29, "learning_rate": 3.361998135968506e-05, "loss": 1.236, "step": 570000 }, { "epoch": 2.3, "learning_rate": 3.3605607273517586e-05, "loss": 1.262, "step": 570500 }, { "epoch": 2.3, "learning_rate": 3.3591233187350117e-05, "loss": 1.2389, "step": 571000 }, { "epoch": 2.3, "learning_rate": 3.357685910118265e-05, "loss": 1.2537, "step": 571500 }, { "epoch": 2.3, "learning_rate": 3.356248501501517e-05, "loss": 1.2869, "step": 572000 }, { "epoch": 2.3, "learning_rate": 3.3548110928847696e-05, "loss": 1.2332, "step": 572500 }, { "epoch": 2.31, "learning_rate": 3.353373684268023e-05, "loss": 1.2552, "step": 573000 }, { "epoch": 2.31, "learning_rate": 3.351936275651276e-05, "loss": 1.281, "step": 573500 }, { "epoch": 2.31, "learning_rate": 3.350498867034528e-05, "loss": 1.2591, "step": 574000 }, { "epoch": 2.31, "learning_rate": 3.349061458417781e-05, "loss": 1.2511, "step": 574500 }, { "epoch": 2.31, "learning_rate": 3.3476240498010344e-05, "loss": 1.2287, "step": 575000 }, { "epoch": 2.32, "learning_rate": 3.346189516001521e-05, "loss": 1.2665, "step": 575500 }, { "epoch": 2.32, "learning_rate": 3.344752107384773e-05, "loss": 1.1707, "step": 576000 }, { "epoch": 2.32, "learning_rate": 3.343314698768026e-05, "loss": 1.2328, "step": 576500 }, { "epoch": 2.32, "learning_rate": 3.341877290151279e-05, "loss": 1.2742, "step": 577000 }, { "epoch": 2.32, "learning_rate": 3.3404456311689986e-05, "loss": 1.247, "step": 577500 }, { "epoch": 2.33, "learning_rate": 3.339008222552251e-05, "loss": 1.2432, "step": 578000 }, { "epoch": 2.33, "learning_rate": 3.337570813935504e-05, "loss": 1.2333, "step": 578500 }, { "epoch": 2.33, "learning_rate": 3.336133405318757e-05, "loss": 1.2269, "step": 579000 }, { "epoch": 2.33, "learning_rate": 3.3346959967020096e-05, "loss": 1.2434, "step": 579500 }, { "epoch": 2.33, "learning_rate": 3.333258588085262e-05, "loss": 1.2411, "step": 580000 }, { "epoch": 2.34, "learning_rate": 3.331821179468516e-05, "loss": 1.2331, "step": 580500 }, { "epoch": 2.34, "learning_rate": 3.330386645669002e-05, "loss": 1.2312, "step": 581000 }, { "epoch": 2.34, "learning_rate": 3.328949237052255e-05, "loss": 1.2393, "step": 581500 }, { "epoch": 2.34, "learning_rate": 3.327511828435507e-05, "loss": 1.2778, "step": 582000 }, { "epoch": 2.34, "learning_rate": 3.32607441981876e-05, "loss": 1.2703, "step": 582500 }, { "epoch": 2.35, "learning_rate": 3.324637011202013e-05, "loss": 1.296, "step": 583000 }, { "epoch": 2.35, "learning_rate": 3.323199602585266e-05, "loss": 1.2469, "step": 583500 }, { "epoch": 2.35, "learning_rate": 3.321762193968518e-05, "loss": 1.2738, "step": 584000 }, { "epoch": 2.35, "learning_rate": 3.320324785351772e-05, "loss": 1.2696, "step": 584500 }, { "epoch": 2.35, "learning_rate": 3.318890251552258e-05, "loss": 1.2645, "step": 585000 }, { "epoch": 2.36, "learning_rate": 3.317452842935511e-05, "loss": 1.2552, "step": 585500 }, { "epoch": 2.36, "learning_rate": 3.316015434318763e-05, "loss": 1.2484, "step": 586000 }, { "epoch": 2.36, "learning_rate": 3.3145780257020156e-05, "loss": 1.2692, "step": 586500 }, { "epoch": 2.36, "learning_rate": 3.313143491902502e-05, "loss": 1.2243, "step": 587000 }, { "epoch": 2.36, "learning_rate": 3.3117089581029885e-05, "loss": 1.213, "step": 587500 }, { "epoch": 2.37, "learning_rate": 3.3102715494862416e-05, "loss": 1.2585, "step": 588000 }, { "epoch": 2.37, "learning_rate": 3.308834140869495e-05, "loss": 1.2533, "step": 588500 }, { "epoch": 2.37, "learning_rate": 3.307396732252747e-05, "loss": 1.2547, "step": 589000 }, { "epoch": 2.37, "learning_rate": 3.3059593236359996e-05, "loss": 1.2081, "step": 589500 }, { "epoch": 2.37, "learning_rate": 3.304521915019253e-05, "loss": 1.2561, "step": 590000 }, { "epoch": 2.38, "learning_rate": 3.303084506402506e-05, "loss": 1.2344, "step": 590500 }, { "epoch": 2.38, "learning_rate": 3.301647097785758e-05, "loss": 1.287, "step": 591000 }, { "epoch": 2.38, "learning_rate": 3.300209689169011e-05, "loss": 1.2357, "step": 591500 }, { "epoch": 2.38, "learning_rate": 3.2987722805522644e-05, "loss": 1.2741, "step": 592000 }, { "epoch": 2.38, "learning_rate": 3.297334871935517e-05, "loss": 1.2655, "step": 592500 }, { "epoch": 2.39, "learning_rate": 3.295897463318769e-05, "loss": 1.2085, "step": 593000 }, { "epoch": 2.39, "learning_rate": 3.294460054702023e-05, "loss": 1.2697, "step": 593500 }, { "epoch": 2.39, "learning_rate": 3.293025520902509e-05, "loss": 1.226, "step": 594000 }, { "epoch": 2.39, "learning_rate": 3.291588112285762e-05, "loss": 1.2169, "step": 594500 }, { "epoch": 2.39, "learning_rate": 3.290150703669014e-05, "loss": 1.2318, "step": 595000 }, { "epoch": 2.4, "learning_rate": 3.288713295052267e-05, "loss": 1.2192, "step": 595500 }, { "epoch": 2.4, "learning_rate": 3.287278761252753e-05, "loss": 1.2522, "step": 596000 }, { "epoch": 2.4, "learning_rate": 3.285841352636006e-05, "loss": 1.2731, "step": 596500 }, { "epoch": 2.4, "learning_rate": 3.284403944019259e-05, "loss": 1.2572, "step": 597000 }, { "epoch": 2.4, "learning_rate": 3.282966535402512e-05, "loss": 1.2339, "step": 597500 }, { "epoch": 2.41, "learning_rate": 3.281532001602998e-05, "loss": 1.283, "step": 598000 }, { "epoch": 2.41, "learning_rate": 3.2800945929862506e-05, "loss": 1.2274, "step": 598500 }, { "epoch": 2.41, "learning_rate": 3.278660059186737e-05, "loss": 1.2292, "step": 599000 }, { "epoch": 2.41, "learning_rate": 3.27722265056999e-05, "loss": 1.2474, "step": 599500 }, { "epoch": 2.41, "learning_rate": 3.275785241953243e-05, "loss": 1.278, "step": 600000 }, { "epoch": 2.42, "learning_rate": 3.274347833336496e-05, "loss": 1.2438, "step": 600500 }, { "epoch": 2.42, "learning_rate": 3.272910424719748e-05, "loss": 1.2174, "step": 601000 }, { "epoch": 2.42, "learning_rate": 3.271473016103002e-05, "loss": 1.2063, "step": 601500 }, { "epoch": 2.42, "learning_rate": 3.270035607486254e-05, "loss": 1.188, "step": 602000 }, { "epoch": 2.42, "learning_rate": 3.268601073686741e-05, "loss": 1.2207, "step": 602500 }, { "epoch": 2.43, "learning_rate": 3.267163665069993e-05, "loss": 1.233, "step": 603000 }, { "epoch": 2.43, "learning_rate": 3.2657262564532456e-05, "loss": 1.2776, "step": 603500 }, { "epoch": 2.43, "learning_rate": 3.2642888478364994e-05, "loss": 1.2963, "step": 604000 }, { "epoch": 2.43, "learning_rate": 3.262851439219752e-05, "loss": 1.2479, "step": 604500 }, { "epoch": 2.43, "learning_rate": 3.261414030603004e-05, "loss": 1.1801, "step": 605000 }, { "epoch": 2.44, "learning_rate": 3.259976621986258e-05, "loss": 1.2522, "step": 605500 }, { "epoch": 2.44, "learning_rate": 3.2585392133695104e-05, "loss": 1.2482, "step": 606000 }, { "epoch": 2.44, "learning_rate": 3.257104679569997e-05, "loss": 1.2325, "step": 606500 }, { "epoch": 2.44, "learning_rate": 3.255667270953249e-05, "loss": 1.3044, "step": 607000 }, { "epoch": 2.45, "learning_rate": 3.254232737153736e-05, "loss": 1.2587, "step": 607500 }, { "epoch": 2.45, "learning_rate": 3.252795328536988e-05, "loss": 1.2764, "step": 608000 }, { "epoch": 2.45, "learning_rate": 3.251357919920241e-05, "loss": 1.2882, "step": 608500 }, { "epoch": 2.45, "learning_rate": 3.249920511303494e-05, "loss": 1.2507, "step": 609000 }, { "epoch": 2.45, "learning_rate": 3.248483102686747e-05, "loss": 1.1949, "step": 609500 }, { "epoch": 2.46, "learning_rate": 3.247045694069999e-05, "loss": 1.2653, "step": 610000 }, { "epoch": 2.46, "learning_rate": 3.245608285453253e-05, "loss": 1.291, "step": 610500 }, { "epoch": 2.46, "learning_rate": 3.2441708768365054e-05, "loss": 1.1922, "step": 611000 }, { "epoch": 2.46, "learning_rate": 3.242736343036992e-05, "loss": 1.2725, "step": 611500 }, { "epoch": 2.46, "learning_rate": 3.241298934420244e-05, "loss": 1.1928, "step": 612000 }, { "epoch": 2.47, "learning_rate": 3.239861525803497e-05, "loss": 1.2518, "step": 612500 }, { "epoch": 2.47, "learning_rate": 3.2384241171867504e-05, "loss": 1.229, "step": 613000 }, { "epoch": 2.47, "learning_rate": 3.236989583387237e-05, "loss": 1.2946, "step": 613500 }, { "epoch": 2.47, "learning_rate": 3.235552174770489e-05, "loss": 1.256, "step": 614000 }, { "epoch": 2.47, "learning_rate": 3.234114766153742e-05, "loss": 1.2156, "step": 614500 }, { "epoch": 2.48, "learning_rate": 3.232677357536995e-05, "loss": 1.2669, "step": 615000 }, { "epoch": 2.48, "learning_rate": 3.231239948920248e-05, "loss": 1.2413, "step": 615500 }, { "epoch": 2.48, "learning_rate": 3.2298025403035e-05, "loss": 1.2777, "step": 616000 }, { "epoch": 2.48, "learning_rate": 3.228368006503987e-05, "loss": 1.2184, "step": 616500 }, { "epoch": 2.48, "learning_rate": 3.226930597887239e-05, "loss": 1.2222, "step": 617000 }, { "epoch": 2.49, "learning_rate": 3.225493189270492e-05, "loss": 1.2481, "step": 617500 }, { "epoch": 2.49, "learning_rate": 3.2240557806537454e-05, "loss": 1.2333, "step": 618000 }, { "epoch": 2.49, "learning_rate": 3.222618372036998e-05, "loss": 1.2376, "step": 618500 }, { "epoch": 2.49, "learning_rate": 3.221183838237484e-05, "loss": 1.2284, "step": 619000 }, { "epoch": 2.49, "learning_rate": 3.219746429620737e-05, "loss": 1.2159, "step": 619500 }, { "epoch": 2.5, "learning_rate": 3.21830902100399e-05, "loss": 1.2351, "step": 620000 }, { "epoch": 2.5, "learning_rate": 3.216871612387243e-05, "loss": 1.2163, "step": 620500 }, { "epoch": 2.5, "learning_rate": 3.215434203770495e-05, "loss": 1.2584, "step": 621000 }, { "epoch": 2.5, "learning_rate": 3.2139967951537484e-05, "loss": 1.2286, "step": 621500 }, { "epoch": 2.5, "learning_rate": 3.2125593865370015e-05, "loss": 1.2537, "step": 622000 }, { "epoch": 2.51, "learning_rate": 3.211124852737488e-05, "loss": 1.2097, "step": 622500 }, { "epoch": 2.51, "learning_rate": 3.2096874441207404e-05, "loss": 1.2062, "step": 623000 }, { "epoch": 2.51, "learning_rate": 3.208250035503993e-05, "loss": 1.2346, "step": 623500 }, { "epoch": 2.51, "learning_rate": 3.206812626887246e-05, "loss": 1.2543, "step": 624000 }, { "epoch": 2.51, "learning_rate": 3.205375218270499e-05, "loss": 1.2531, "step": 624500 }, { "epoch": 2.52, "learning_rate": 3.2039406844709854e-05, "loss": 1.2416, "step": 625000 }, { "epoch": 2.52, "learning_rate": 3.202503275854238e-05, "loss": 1.2237, "step": 625500 }, { "epoch": 2.52, "learning_rate": 3.20106586723749e-05, "loss": 1.2425, "step": 626000 }, { "epoch": 2.52, "learning_rate": 3.1996284586207434e-05, "loss": 1.2313, "step": 626500 }, { "epoch": 2.52, "learning_rate": 3.1981910500039965e-05, "loss": 1.2602, "step": 627000 }, { "epoch": 2.53, "learning_rate": 3.196753641387249e-05, "loss": 1.2338, "step": 627500 }, { "epoch": 2.53, "learning_rate": 3.195316232770501e-05, "loss": 1.2783, "step": 628000 }, { "epoch": 2.53, "learning_rate": 3.193881698970988e-05, "loss": 1.2234, "step": 628500 }, { "epoch": 2.53, "learning_rate": 3.192444290354241e-05, "loss": 1.2668, "step": 629000 }, { "epoch": 2.53, "learning_rate": 3.191006881737494e-05, "loss": 1.2136, "step": 629500 }, { "epoch": 2.54, "learning_rate": 3.1895694731207464e-05, "loss": 1.23, "step": 630000 }, { "epoch": 2.54, "learning_rate": 3.1881320645039995e-05, "loss": 1.218, "step": 630500 }, { "epoch": 2.54, "learning_rate": 3.1866946558872526e-05, "loss": 1.2594, "step": 631000 }, { "epoch": 2.54, "learning_rate": 3.185257247270505e-05, "loss": 1.2191, "step": 631500 }, { "epoch": 2.54, "learning_rate": 3.1838227134709914e-05, "loss": 1.2261, "step": 632000 }, { "epoch": 2.55, "learning_rate": 3.182385304854244e-05, "loss": 1.2288, "step": 632500 }, { "epoch": 2.55, "learning_rate": 3.180947896237497e-05, "loss": 1.2882, "step": 633000 }, { "epoch": 2.55, "learning_rate": 3.17951048762075e-05, "loss": 1.2466, "step": 633500 }, { "epoch": 2.55, "learning_rate": 3.1780730790040025e-05, "loss": 1.2221, "step": 634000 }, { "epoch": 2.55, "learning_rate": 3.176635670387255e-05, "loss": 1.2453, "step": 634500 }, { "epoch": 2.56, "learning_rate": 3.175198261770508e-05, "loss": 1.2455, "step": 635000 }, { "epoch": 2.56, "learning_rate": 3.173760853153761e-05, "loss": 1.2269, "step": 635500 }, { "epoch": 2.56, "learning_rate": 3.1723263193542475e-05, "loss": 1.2535, "step": 636000 }, { "epoch": 2.56, "learning_rate": 3.1708889107375e-05, "loss": 1.2515, "step": 636500 }, { "epoch": 2.56, "learning_rate": 3.169451502120753e-05, "loss": 1.239, "step": 637000 }, { "epoch": 2.57, "learning_rate": 3.168014093504006e-05, "loss": 1.2717, "step": 637500 }, { "epoch": 2.57, "learning_rate": 3.1665766848872586e-05, "loss": 1.2414, "step": 638000 }, { "epoch": 2.57, "learning_rate": 3.165142151087745e-05, "loss": 1.2811, "step": 638500 }, { "epoch": 2.57, "learning_rate": 3.1637047424709974e-05, "loss": 1.2393, "step": 639000 }, { "epoch": 2.57, "learning_rate": 3.1622673338542505e-05, "loss": 1.1999, "step": 639500 }, { "epoch": 2.58, "learning_rate": 3.1608299252375036e-05, "loss": 1.2494, "step": 640000 }, { "epoch": 2.58, "learning_rate": 3.159392516620756e-05, "loss": 1.247, "step": 640500 }, { "epoch": 2.58, "learning_rate": 3.1579551080040085e-05, "loss": 1.2276, "step": 641000 }, { "epoch": 2.58, "learning_rate": 3.1565176993872616e-05, "loss": 1.2768, "step": 641500 }, { "epoch": 2.58, "learning_rate": 3.1550802907705146e-05, "loss": 1.2675, "step": 642000 }, { "epoch": 2.59, "learning_rate": 3.153642882153767e-05, "loss": 1.269, "step": 642500 }, { "epoch": 2.59, "learning_rate": 3.152211223171487e-05, "loss": 1.2432, "step": 643000 }, { "epoch": 2.59, "learning_rate": 3.15077381455474e-05, "loss": 1.2798, "step": 643500 }, { "epoch": 2.59, "learning_rate": 3.1493364059379924e-05, "loss": 1.2195, "step": 644000 }, { "epoch": 2.59, "learning_rate": 3.1478989973212455e-05, "loss": 1.2509, "step": 644500 }, { "epoch": 2.6, "learning_rate": 3.1464615887044986e-05, "loss": 1.2541, "step": 645000 }, { "epoch": 2.6, "learning_rate": 3.145024180087751e-05, "loss": 1.2381, "step": 645500 }, { "epoch": 2.6, "learning_rate": 3.143586771471004e-05, "loss": 1.274, "step": 646000 }, { "epoch": 2.6, "learning_rate": 3.1421493628542565e-05, "loss": 1.2519, "step": 646500 }, { "epoch": 2.6, "learning_rate": 3.140717703871976e-05, "loss": 1.2487, "step": 647000 }, { "epoch": 2.61, "learning_rate": 3.1392802952552294e-05, "loss": 1.2223, "step": 647500 }, { "epoch": 2.61, "learning_rate": 3.1378428866384825e-05, "loss": 1.2576, "step": 648000 }, { "epoch": 2.61, "learning_rate": 3.136405478021735e-05, "loss": 1.2481, "step": 648500 }, { "epoch": 2.61, "learning_rate": 3.1349680694049874e-05, "loss": 1.236, "step": 649000 }, { "epoch": 2.61, "learning_rate": 3.1335306607882405e-05, "loss": 1.1997, "step": 649500 }, { "epoch": 2.62, "learning_rate": 3.132096126988727e-05, "loss": 1.2026, "step": 650000 }, { "epoch": 2.62, "learning_rate": 3.13065871837198e-05, "loss": 1.2536, "step": 650500 }, { "epoch": 2.62, "learning_rate": 3.1292213097552324e-05, "loss": 1.2429, "step": 651000 }, { "epoch": 2.62, "learning_rate": 3.127783901138485e-05, "loss": 1.2246, "step": 651500 }, { "epoch": 2.62, "learning_rate": 3.126349367338971e-05, "loss": 1.2687, "step": 652000 }, { "epoch": 2.63, "learning_rate": 3.1249119587222244e-05, "loss": 1.2562, "step": 652500 }, { "epoch": 2.63, "learning_rate": 3.1234745501054775e-05, "loss": 1.2347, "step": 653000 }, { "epoch": 2.63, "learning_rate": 3.12203714148873e-05, "loss": 1.2105, "step": 653500 }, { "epoch": 2.63, "learning_rate": 3.120599732871983e-05, "loss": 1.2172, "step": 654000 }, { "epoch": 2.63, "learning_rate": 3.1191623242552354e-05, "loss": 1.2471, "step": 654500 }, { "epoch": 2.64, "learning_rate": 3.1177249156384885e-05, "loss": 1.2841, "step": 655000 }, { "epoch": 2.64, "learning_rate": 3.116290381838975e-05, "loss": 1.2743, "step": 655500 }, { "epoch": 2.64, "learning_rate": 3.1148529732222274e-05, "loss": 1.2504, "step": 656000 }, { "epoch": 2.64, "learning_rate": 3.1134155646054805e-05, "loss": 1.2194, "step": 656500 }, { "epoch": 2.64, "learning_rate": 3.1119781559887336e-05, "loss": 1.2181, "step": 657000 }, { "epoch": 2.65, "learning_rate": 3.110540747371986e-05, "loss": 1.294, "step": 657500 }, { "epoch": 2.65, "learning_rate": 3.1091033387552384e-05, "loss": 1.248, "step": 658000 }, { "epoch": 2.65, "learning_rate": 3.1076659301384915e-05, "loss": 1.2075, "step": 658500 }, { "epoch": 2.65, "learning_rate": 3.1062285215217446e-05, "loss": 1.2646, "step": 659000 }, { "epoch": 2.65, "learning_rate": 3.104791112904997e-05, "loss": 1.2633, "step": 659500 }, { "epoch": 2.66, "learning_rate": 3.10335370428825e-05, "loss": 1.2457, "step": 660000 }, { "epoch": 2.66, "learning_rate": 3.1019191704887366e-05, "loss": 1.224, "step": 660500 }, { "epoch": 2.66, "learning_rate": 3.100481761871989e-05, "loss": 1.2108, "step": 661000 }, { "epoch": 2.66, "learning_rate": 3.099044353255242e-05, "loss": 1.2735, "step": 661500 }, { "epoch": 2.66, "learning_rate": 3.0976069446384945e-05, "loss": 1.2517, "step": 662000 }, { "epoch": 2.67, "learning_rate": 3.096172410838981e-05, "loss": 1.2133, "step": 662500 }, { "epoch": 2.67, "learning_rate": 3.094735002222234e-05, "loss": 1.2374, "step": 663000 }, { "epoch": 2.67, "learning_rate": 3.09330046842272e-05, "loss": 1.2218, "step": 663500 }, { "epoch": 2.67, "learning_rate": 3.091863059805973e-05, "loss": 1.2535, "step": 664000 }, { "epoch": 2.67, "learning_rate": 3.090425651189226e-05, "loss": 1.2373, "step": 664500 }, { "epoch": 2.68, "learning_rate": 3.0889882425724785e-05, "loss": 1.2154, "step": 665000 }, { "epoch": 2.68, "learning_rate": 3.087553708772965e-05, "loss": 1.2471, "step": 665500 }, { "epoch": 2.68, "learning_rate": 3.0861163001562173e-05, "loss": 1.1906, "step": 666000 }, { "epoch": 2.68, "learning_rate": 3.0846788915394704e-05, "loss": 1.2542, "step": 666500 }, { "epoch": 2.68, "learning_rate": 3.0832414829227235e-05, "loss": 1.2244, "step": 667000 }, { "epoch": 2.69, "learning_rate": 3.081804074305976e-05, "loss": 1.2348, "step": 667500 }, { "epoch": 2.69, "learning_rate": 3.080366665689229e-05, "loss": 1.2351, "step": 668000 }, { "epoch": 2.69, "learning_rate": 3.078929257072482e-05, "loss": 1.2391, "step": 668500 }, { "epoch": 2.69, "learning_rate": 3.0774918484557346e-05, "loss": 1.2348, "step": 669000 }, { "epoch": 2.69, "learning_rate": 3.0760544398389877e-05, "loss": 1.234, "step": 669500 }, { "epoch": 2.7, "learning_rate": 3.07461703122224e-05, "loss": 1.2389, "step": 670000 }, { "epoch": 2.7, "learning_rate": 3.073179622605493e-05, "loss": 1.2821, "step": 670500 }, { "epoch": 2.7, "learning_rate": 3.0717422139887456e-05, "loss": 1.2589, "step": 671000 }, { "epoch": 2.7, "learning_rate": 3.070307680189232e-05, "loss": 1.2413, "step": 671500 }, { "epoch": 2.7, "learning_rate": 3.068870271572485e-05, "loss": 1.2379, "step": 672000 }, { "epoch": 2.71, "learning_rate": 3.0674328629557376e-05, "loss": 1.2484, "step": 672500 }, { "epoch": 2.71, "learning_rate": 3.0659954543389907e-05, "loss": 1.2199, "step": 673000 }, { "epoch": 2.71, "learning_rate": 3.064560920539477e-05, "loss": 1.2932, "step": 673500 }, { "epoch": 2.71, "learning_rate": 3.0631235119227295e-05, "loss": 1.3011, "step": 674000 }, { "epoch": 2.71, "learning_rate": 3.0616861033059826e-05, "loss": 1.2379, "step": 674500 }, { "epoch": 2.72, "learning_rate": 3.060248694689235e-05, "loss": 1.2357, "step": 675000 }, { "epoch": 2.72, "learning_rate": 3.0588141608897215e-05, "loss": 1.2198, "step": 675500 }, { "epoch": 2.72, "learning_rate": 3.0573767522729746e-05, "loss": 1.2762, "step": 676000 }, { "epoch": 2.72, "learning_rate": 3.055939343656227e-05, "loss": 1.2504, "step": 676500 }, { "epoch": 2.72, "learning_rate": 3.05450193503948e-05, "loss": 1.2453, "step": 677000 }, { "epoch": 2.73, "learning_rate": 3.0530645264227325e-05, "loss": 1.2447, "step": 677500 }, { "epoch": 2.73, "learning_rate": 3.051629992623219e-05, "loss": 1.213, "step": 678000 }, { "epoch": 2.73, "learning_rate": 3.050192584006472e-05, "loss": 1.2488, "step": 678500 }, { "epoch": 2.73, "learning_rate": 3.048755175389725e-05, "loss": 1.1872, "step": 679000 }, { "epoch": 2.73, "learning_rate": 3.0473177667729773e-05, "loss": 1.2731, "step": 679500 }, { "epoch": 2.74, "learning_rate": 3.0458832329734637e-05, "loss": 1.2094, "step": 680000 }, { "epoch": 2.74, "learning_rate": 3.0444458243567165e-05, "loss": 1.2595, "step": 680500 }, { "epoch": 2.74, "learning_rate": 3.0430084157399696e-05, "loss": 1.2945, "step": 681000 }, { "epoch": 2.74, "learning_rate": 3.0415710071232223e-05, "loss": 1.2121, "step": 681500 }, { "epoch": 2.74, "learning_rate": 3.040133598506475e-05, "loss": 1.2368, "step": 682000 }, { "epoch": 2.75, "learning_rate": 3.0386961898897282e-05, "loss": 1.22, "step": 682500 }, { "epoch": 2.75, "learning_rate": 3.037261656090214e-05, "loss": 1.2889, "step": 683000 }, { "epoch": 2.75, "learning_rate": 3.035824247473467e-05, "loss": 1.2652, "step": 683500 }, { "epoch": 2.75, "learning_rate": 3.0343868388567198e-05, "loss": 1.2319, "step": 684000 }, { "epoch": 2.75, "learning_rate": 3.0329494302399726e-05, "loss": 1.2568, "step": 684500 }, { "epoch": 2.76, "learning_rate": 3.0315120216232257e-05, "loss": 1.2336, "step": 685000 }, { "epoch": 2.76, "learning_rate": 3.0300774878237114e-05, "loss": 1.3042, "step": 685500 }, { "epoch": 2.76, "learning_rate": 3.0286400792069645e-05, "loss": 1.2508, "step": 686000 }, { "epoch": 2.76, "learning_rate": 3.0272026705902173e-05, "loss": 1.2772, "step": 686500 }, { "epoch": 2.76, "learning_rate": 3.02576526197347e-05, "loss": 1.2556, "step": 687000 }, { "epoch": 2.77, "learning_rate": 3.024327853356723e-05, "loss": 1.2766, "step": 687500 }, { "epoch": 2.77, "learning_rate": 3.022890444739976e-05, "loss": 1.2361, "step": 688000 }, { "epoch": 2.77, "learning_rate": 3.0214530361232287e-05, "loss": 1.2118, "step": 688500 }, { "epoch": 2.77, "learning_rate": 3.0200185023237148e-05, "loss": 1.2504, "step": 689000 }, { "epoch": 2.78, "learning_rate": 3.0185810937069675e-05, "loss": 1.2209, "step": 689500 }, { "epoch": 2.78, "learning_rate": 3.0171436850902206e-05, "loss": 1.2519, "step": 690000 }, { "epoch": 2.78, "learning_rate": 3.0157062764734734e-05, "loss": 1.2479, "step": 690500 }, { "epoch": 2.78, "learning_rate": 3.014268867856726e-05, "loss": 1.2803, "step": 691000 }, { "epoch": 2.78, "learning_rate": 3.0128314592399792e-05, "loss": 1.2125, "step": 691500 }, { "epoch": 2.79, "learning_rate": 3.011396925440465e-05, "loss": 1.3017, "step": 692000 }, { "epoch": 2.79, "learning_rate": 3.009959516823718e-05, "loss": 1.2246, "step": 692500 }, { "epoch": 2.79, "learning_rate": 3.008522108206971e-05, "loss": 1.2735, "step": 693000 }, { "epoch": 2.79, "learning_rate": 3.0070846995902236e-05, "loss": 1.2631, "step": 693500 }, { "epoch": 2.79, "learning_rate": 3.0056472909734767e-05, "loss": 1.1929, "step": 694000 }, { "epoch": 2.8, "learning_rate": 3.0042098823567295e-05, "loss": 1.1861, "step": 694500 }, { "epoch": 2.8, "learning_rate": 3.002772473739982e-05, "loss": 1.2358, "step": 695000 }, { "epoch": 2.8, "learning_rate": 3.0013350651232347e-05, "loss": 1.2334, "step": 695500 }, { "epoch": 2.8, "learning_rate": 2.999900531323721e-05, "loss": 1.1998, "step": 696000 }, { "epoch": 2.8, "learning_rate": 2.9984631227069742e-05, "loss": 1.2429, "step": 696500 }, { "epoch": 2.81, "learning_rate": 2.997025714090227e-05, "loss": 1.2357, "step": 697000 }, { "epoch": 2.81, "learning_rate": 2.9955883054734797e-05, "loss": 1.2471, "step": 697500 }, { "epoch": 2.81, "learning_rate": 2.994153771673966e-05, "loss": 1.2215, "step": 698000 }, { "epoch": 2.81, "learning_rate": 2.9927192378744523e-05, "loss": 1.219, "step": 698500 }, { "epoch": 2.81, "learning_rate": 2.991281829257705e-05, "loss": 1.23, "step": 699000 }, { "epoch": 2.82, "learning_rate": 2.989844420640958e-05, "loss": 1.2233, "step": 699500 }, { "epoch": 2.82, "learning_rate": 2.988407012024211e-05, "loss": 1.2892, "step": 700000 }, { "epoch": 2.82, "learning_rate": 2.9869696034074633e-05, "loss": 1.2555, "step": 700500 }, { "epoch": 2.82, "learning_rate": 2.9855350696079498e-05, "loss": 1.2268, "step": 701000 }, { "epoch": 2.82, "learning_rate": 2.9841005358084362e-05, "loss": 1.2577, "step": 701500 }, { "epoch": 2.83, "learning_rate": 2.982663127191689e-05, "loss": 1.2226, "step": 702000 }, { "epoch": 2.83, "learning_rate": 2.9812257185749414e-05, "loss": 1.2568, "step": 702500 }, { "epoch": 2.83, "learning_rate": 2.979788309958195e-05, "loss": 1.2055, "step": 703000 }, { "epoch": 2.83, "learning_rate": 2.9783509013414473e-05, "loss": 1.2711, "step": 703500 }, { "epoch": 2.83, "learning_rate": 2.9769134927247e-05, "loss": 1.2103, "step": 704000 }, { "epoch": 2.84, "learning_rate": 2.975476084107953e-05, "loss": 1.2523, "step": 704500 }, { "epoch": 2.84, "learning_rate": 2.974041550308439e-05, "loss": 1.2298, "step": 705000 }, { "epoch": 2.84, "learning_rate": 2.9726041416916923e-05, "loss": 1.2213, "step": 705500 }, { "epoch": 2.84, "learning_rate": 2.9711667330749447e-05, "loss": 1.2054, "step": 706000 }, { "epoch": 2.84, "learning_rate": 2.9697293244581975e-05, "loss": 1.2266, "step": 706500 }, { "epoch": 2.85, "learning_rate": 2.9682919158414506e-05, "loss": 1.2166, "step": 707000 }, { "epoch": 2.85, "learning_rate": 2.9668545072247034e-05, "loss": 1.2413, "step": 707500 }, { "epoch": 2.85, "learning_rate": 2.965417098607956e-05, "loss": 1.2661, "step": 708000 }, { "epoch": 2.85, "learning_rate": 2.9639796899912085e-05, "loss": 1.243, "step": 708500 }, { "epoch": 2.85, "learning_rate": 2.962542281374462e-05, "loss": 1.2561, "step": 709000 }, { "epoch": 2.86, "learning_rate": 2.961107747574948e-05, "loss": 1.2337, "step": 709500 }, { "epoch": 2.86, "learning_rate": 2.959670338958201e-05, "loss": 1.2169, "step": 710000 }, { "epoch": 2.86, "learning_rate": 2.9582329303414536e-05, "loss": 1.229, "step": 710500 }, { "epoch": 2.86, "learning_rate": 2.9567955217247067e-05, "loss": 1.2684, "step": 711000 }, { "epoch": 2.86, "learning_rate": 2.9553581131079595e-05, "loss": 1.2043, "step": 711500 }, { "epoch": 2.87, "learning_rate": 2.9539207044912122e-05, "loss": 1.2192, "step": 712000 }, { "epoch": 2.87, "learning_rate": 2.9524861706916983e-05, "loss": 1.2479, "step": 712500 }, { "epoch": 2.87, "learning_rate": 2.951048762074951e-05, "loss": 1.1925, "step": 713000 }, { "epoch": 2.87, "learning_rate": 2.9496113534582042e-05, "loss": 1.2559, "step": 713500 }, { "epoch": 2.87, "learning_rate": 2.948173944841457e-05, "loss": 1.2398, "step": 714000 }, { "epoch": 2.88, "learning_rate": 2.9467365362247097e-05, "loss": 1.2269, "step": 714500 }, { "epoch": 2.88, "learning_rate": 2.945299127607962e-05, "loss": 1.2711, "step": 715000 }, { "epoch": 2.88, "learning_rate": 2.9438617189912155e-05, "loss": 1.2233, "step": 715500 }, { "epoch": 2.88, "learning_rate": 2.942424310374468e-05, "loss": 1.2588, "step": 716000 }, { "epoch": 2.88, "learning_rate": 2.9409897765749544e-05, "loss": 1.2265, "step": 716500 }, { "epoch": 2.89, "learning_rate": 2.9395523679582072e-05, "loss": 1.23, "step": 717000 }, { "epoch": 2.89, "learning_rate": 2.9381149593414596e-05, "loss": 1.1803, "step": 717500 }, { "epoch": 2.89, "learning_rate": 2.936677550724713e-05, "loss": 1.2335, "step": 718000 }, { "epoch": 2.89, "learning_rate": 2.9352430169251995e-05, "loss": 1.2416, "step": 718500 }, { "epoch": 2.89, "learning_rate": 2.933805608308452e-05, "loss": 1.2309, "step": 719000 }, { "epoch": 2.9, "learning_rate": 2.9323681996917047e-05, "loss": 1.2603, "step": 719500 }, { "epoch": 2.9, "learning_rate": 2.9309307910749578e-05, "loss": 1.2577, "step": 720000 }, { "epoch": 2.9, "learning_rate": 2.9294933824582105e-05, "loss": 1.2358, "step": 720500 }, { "epoch": 2.9, "learning_rate": 2.9280559738414633e-05, "loss": 1.2698, "step": 721000 }, { "epoch": 2.9, "learning_rate": 2.9266185652247157e-05, "loss": 1.2195, "step": 721500 }, { "epoch": 2.91, "learning_rate": 2.925181156607969e-05, "loss": 1.2116, "step": 722000 }, { "epoch": 2.91, "learning_rate": 2.9237466228084552e-05, "loss": 1.3009, "step": 722500 }, { "epoch": 2.91, "learning_rate": 2.922309214191708e-05, "loss": 1.2121, "step": 723000 }, { "epoch": 2.91, "learning_rate": 2.9208718055749608e-05, "loss": 1.2387, "step": 723500 }, { "epoch": 2.91, "learning_rate": 2.9194343969582132e-05, "loss": 1.2452, "step": 724000 }, { "epoch": 2.92, "learning_rate": 2.9179998631586996e-05, "loss": 1.2374, "step": 724500 }, { "epoch": 2.92, "learning_rate": 2.9165624545419527e-05, "loss": 1.2658, "step": 725000 }, { "epoch": 2.92, "learning_rate": 2.9151250459252055e-05, "loss": 1.205, "step": 725500 }, { "epoch": 2.92, "learning_rate": 2.913690512125692e-05, "loss": 1.2435, "step": 726000 }, { "epoch": 2.92, "learning_rate": 2.9122531035089447e-05, "loss": 1.2219, "step": 726500 }, { "epoch": 2.93, "learning_rate": 2.910815694892197e-05, "loss": 1.2025, "step": 727000 }, { "epoch": 2.93, "learning_rate": 2.9093782862754506e-05, "loss": 1.2128, "step": 727500 }, { "epoch": 2.93, "learning_rate": 2.9079437524759367e-05, "loss": 1.2164, "step": 728000 }, { "epoch": 2.93, "learning_rate": 2.9065063438591894e-05, "loss": 1.2084, "step": 728500 }, { "epoch": 2.93, "learning_rate": 2.9050689352424422e-05, "loss": 1.2484, "step": 729000 }, { "epoch": 2.94, "learning_rate": 2.9036315266256946e-05, "loss": 1.2232, "step": 729500 }, { "epoch": 2.94, "learning_rate": 2.902194118008948e-05, "loss": 1.1881, "step": 730000 }, { "epoch": 2.94, "learning_rate": 2.9007567093922005e-05, "loss": 1.2966, "step": 730500 }, { "epoch": 2.94, "learning_rate": 2.899322175592687e-05, "loss": 1.2549, "step": 731000 }, { "epoch": 2.94, "learning_rate": 2.8978847669759397e-05, "loss": 1.2204, "step": 731500 }, { "epoch": 2.95, "learning_rate": 2.896447358359192e-05, "loss": 1.2822, "step": 732000 }, { "epoch": 2.95, "learning_rate": 2.8950099497424455e-05, "loss": 1.1844, "step": 732500 }, { "epoch": 2.95, "learning_rate": 2.893572541125698e-05, "loss": 1.2724, "step": 733000 }, { "epoch": 2.95, "learning_rate": 2.8921351325089507e-05, "loss": 1.2292, "step": 733500 }, { "epoch": 2.95, "learning_rate": 2.8906977238922038e-05, "loss": 1.2345, "step": 734000 }, { "epoch": 2.96, "learning_rate": 2.8892603152754565e-05, "loss": 1.1832, "step": 734500 }, { "epoch": 2.96, "learning_rate": 2.887825781475943e-05, "loss": 1.2625, "step": 735000 }, { "epoch": 2.96, "learning_rate": 2.8863883728591958e-05, "loss": 1.2347, "step": 735500 }, { "epoch": 2.96, "learning_rate": 2.8849509642424482e-05, "loss": 1.237, "step": 736000 }, { "epoch": 2.96, "learning_rate": 2.8835135556257016e-05, "loss": 1.249, "step": 736500 }, { "epoch": 2.97, "learning_rate": 2.8820790218261874e-05, "loss": 1.2297, "step": 737000 }, { "epoch": 2.97, "learning_rate": 2.8806416132094405e-05, "loss": 1.2588, "step": 737500 }, { "epoch": 2.97, "learning_rate": 2.8792042045926932e-05, "loss": 1.2088, "step": 738000 }, { "epoch": 2.97, "learning_rate": 2.8777667959759457e-05, "loss": 1.2394, "step": 738500 }, { "epoch": 2.97, "learning_rate": 2.876329387359199e-05, "loss": 1.2174, "step": 739000 }, { "epoch": 2.98, "learning_rate": 2.8748919787424515e-05, "loss": 1.2442, "step": 739500 }, { "epoch": 2.98, "learning_rate": 2.8734545701257043e-05, "loss": 1.245, "step": 740000 }, { "epoch": 2.98, "learning_rate": 2.872017161508957e-05, "loss": 1.2265, "step": 740500 }, { "epoch": 2.98, "learning_rate": 2.870582627709443e-05, "loss": 1.2302, "step": 741000 }, { "epoch": 2.98, "learning_rate": 2.8691452190926966e-05, "loss": 1.233, "step": 741500 }, { "epoch": 2.99, "learning_rate": 2.867707810475949e-05, "loss": 1.2119, "step": 742000 }, { "epoch": 2.99, "learning_rate": 2.8662704018592018e-05, "loss": 1.2313, "step": 742500 }, { "epoch": 2.99, "learning_rate": 2.864832993242455e-05, "loss": 1.2186, "step": 743000 }, { "epoch": 2.99, "learning_rate": 2.8633955846257076e-05, "loss": 1.2117, "step": 743500 }, { "epoch": 2.99, "learning_rate": 2.8619581760089604e-05, "loss": 1.2814, "step": 744000 }, { "epoch": 3.0, "learning_rate": 2.860520767392213e-05, "loss": 1.2375, "step": 744500 }, { "epoch": 3.0, "learning_rate": 2.8590862335926992e-05, "loss": 1.237, "step": 745000 }, { "epoch": 3.0, "eval_cer": 0.2424137811448767, "eval_loss": 0.976024329662323, "eval_runtime": 10946.8543, "eval_samples_per_second": 8.929, "eval_steps_per_second": 1.116, "step": 745389 }, { "epoch": 3.0, "learning_rate": 2.8576488249759527e-05, "loss": 1.2107, "step": 745500 }, { "epoch": 3.0, "learning_rate": 2.856211416359205e-05, "loss": 1.1992, "step": 746000 }, { "epoch": 3.0, "learning_rate": 2.854774007742458e-05, "loss": 1.1948, "step": 746500 }, { "epoch": 3.01, "learning_rate": 2.8533365991257106e-05, "loss": 1.1713, "step": 747000 }, { "epoch": 3.01, "learning_rate": 2.8519020653261967e-05, "loss": 1.1483, "step": 747500 }, { "epoch": 3.01, "learning_rate": 2.85046465670945e-05, "loss": 1.2257, "step": 748000 }, { "epoch": 3.01, "learning_rate": 2.849030122909936e-05, "loss": 1.1686, "step": 748500 }, { "epoch": 3.01, "learning_rate": 2.847592714293189e-05, "loss": 1.1807, "step": 749000 }, { "epoch": 3.02, "learning_rate": 2.8461553056764418e-05, "loss": 1.1587, "step": 749500 }, { "epoch": 3.02, "learning_rate": 2.8447178970596946e-05, "loss": 1.2438, "step": 750000 }, { "epoch": 3.02, "learning_rate": 2.8432833632601807e-05, "loss": 1.1855, "step": 750500 }, { "epoch": 3.02, "learning_rate": 2.841845954643434e-05, "loss": 1.1944, "step": 751000 }, { "epoch": 3.02, "learning_rate": 2.84041142084392e-05, "loss": 1.1879, "step": 751500 }, { "epoch": 3.03, "learning_rate": 2.838974012227173e-05, "loss": 1.1839, "step": 752000 }, { "epoch": 3.03, "learning_rate": 2.8375366036104257e-05, "loss": 1.2428, "step": 752500 }, { "epoch": 3.03, "learning_rate": 2.836099194993678e-05, "loss": 1.2279, "step": 753000 }, { "epoch": 3.03, "learning_rate": 2.8346617863769316e-05, "loss": 1.2226, "step": 753500 }, { "epoch": 3.03, "learning_rate": 2.833224377760184e-05, "loss": 1.2016, "step": 754000 }, { "epoch": 3.04, "learning_rate": 2.8317869691434368e-05, "loss": 1.1895, "step": 754500 }, { "epoch": 3.04, "learning_rate": 2.8303495605266895e-05, "loss": 1.2318, "step": 755000 }, { "epoch": 3.04, "learning_rate": 2.8289121519099426e-05, "loss": 1.2281, "step": 755500 }, { "epoch": 3.04, "learning_rate": 2.8274747432931954e-05, "loss": 1.206, "step": 756000 }, { "epoch": 3.04, "learning_rate": 2.8260373346764478e-05, "loss": 1.1901, "step": 756500 }, { "epoch": 3.05, "learning_rate": 2.8245999260597012e-05, "loss": 1.1949, "step": 757000 }, { "epoch": 3.05, "learning_rate": 2.823165392260187e-05, "loss": 1.1934, "step": 757500 }, { "epoch": 3.05, "learning_rate": 2.82172798364344e-05, "loss": 1.1726, "step": 758000 }, { "epoch": 3.05, "learning_rate": 2.820290575026693e-05, "loss": 1.1841, "step": 758500 }, { "epoch": 3.05, "learning_rate": 2.8188531664099456e-05, "loss": 1.1345, "step": 759000 }, { "epoch": 3.06, "learning_rate": 2.8174186326104317e-05, "loss": 1.1785, "step": 759500 }, { "epoch": 3.06, "learning_rate": 2.8159812239936845e-05, "loss": 1.2117, "step": 760000 }, { "epoch": 3.06, "learning_rate": 2.8145438153769376e-05, "loss": 1.2459, "step": 760500 }, { "epoch": 3.06, "learning_rate": 2.813109281577424e-05, "loss": 1.2031, "step": 761000 }, { "epoch": 3.06, "learning_rate": 2.8116718729606768e-05, "loss": 1.1813, "step": 761500 }, { "epoch": 3.07, "learning_rate": 2.8102344643439292e-05, "loss": 1.1901, "step": 762000 }, { "epoch": 3.07, "learning_rate": 2.8087970557271827e-05, "loss": 1.1793, "step": 762500 }, { "epoch": 3.07, "learning_rate": 2.807359647110435e-05, "loss": 1.2231, "step": 763000 }, { "epoch": 3.07, "learning_rate": 2.8059222384936878e-05, "loss": 1.1651, "step": 763500 }, { "epoch": 3.07, "learning_rate": 2.8044848298769406e-05, "loss": 1.1767, "step": 764000 }, { "epoch": 3.08, "learning_rate": 2.8030474212601937e-05, "loss": 1.1947, "step": 764500 }, { "epoch": 3.08, "learning_rate": 2.801615762277913e-05, "loss": 1.2363, "step": 765000 }, { "epoch": 3.08, "learning_rate": 2.800178353661166e-05, "loss": 1.2033, "step": 765500 }, { "epoch": 3.08, "learning_rate": 2.798740945044419e-05, "loss": 1.1903, "step": 766000 }, { "epoch": 3.08, "learning_rate": 2.7973035364276718e-05, "loss": 1.1984, "step": 766500 }, { "epoch": 3.09, "learning_rate": 2.7958690026281582e-05, "loss": 1.2454, "step": 767000 }, { "epoch": 3.09, "learning_rate": 2.7944315940114106e-05, "loss": 1.1979, "step": 767500 }, { "epoch": 3.09, "learning_rate": 2.7929941853946634e-05, "loss": 1.1888, "step": 768000 }, { "epoch": 3.09, "learning_rate": 2.7915567767779165e-05, "loss": 1.1791, "step": 768500 }, { "epoch": 3.1, "learning_rate": 2.7901193681611693e-05, "loss": 1.2505, "step": 769000 }, { "epoch": 3.1, "learning_rate": 2.7886848343616557e-05, "loss": 1.2186, "step": 769500 }, { "epoch": 3.1, "learning_rate": 2.787247425744908e-05, "loss": 1.1915, "step": 770000 }, { "epoch": 3.1, "learning_rate": 2.7858100171281616e-05, "loss": 1.2007, "step": 770500 }, { "epoch": 3.1, "learning_rate": 2.784372608511414e-05, "loss": 1.1768, "step": 771000 }, { "epoch": 3.11, "learning_rate": 2.7829380747119004e-05, "loss": 1.1771, "step": 771500 }, { "epoch": 3.11, "learning_rate": 2.7815006660951532e-05, "loss": 1.165, "step": 772000 }, { "epoch": 3.11, "learning_rate": 2.780063257478406e-05, "loss": 1.2091, "step": 772500 }, { "epoch": 3.11, "learning_rate": 2.778625848861659e-05, "loss": 1.205, "step": 773000 }, { "epoch": 3.11, "learning_rate": 2.7771884402449118e-05, "loss": 1.1734, "step": 773500 }, { "epoch": 3.12, "learning_rate": 2.775753906445398e-05, "loss": 1.2123, "step": 774000 }, { "epoch": 3.12, "learning_rate": 2.7743164978286507e-05, "loss": 1.2031, "step": 774500 }, { "epoch": 3.12, "learning_rate": 2.7728790892119034e-05, "loss": 1.208, "step": 775000 }, { "epoch": 3.12, "learning_rate": 2.7714416805951565e-05, "loss": 1.2273, "step": 775500 }, { "epoch": 3.12, "learning_rate": 2.7700042719784093e-05, "loss": 1.1497, "step": 776000 }, { "epoch": 3.13, "learning_rate": 2.7685668633616617e-05, "loss": 1.2239, "step": 776500 }, { "epoch": 3.13, "learning_rate": 2.767132329562148e-05, "loss": 1.1874, "step": 777000 }, { "epoch": 3.13, "learning_rate": 2.765694920945401e-05, "loss": 1.168, "step": 777500 }, { "epoch": 3.13, "learning_rate": 2.764257512328654e-05, "loss": 1.2032, "step": 778000 }, { "epoch": 3.13, "learning_rate": 2.7628201037119068e-05, "loss": 1.1673, "step": 778500 }, { "epoch": 3.14, "learning_rate": 2.761385569912393e-05, "loss": 1.2331, "step": 779000 }, { "epoch": 3.14, "learning_rate": 2.7599481612956456e-05, "loss": 1.2009, "step": 779500 }, { "epoch": 3.14, "learning_rate": 2.7585107526788984e-05, "loss": 1.1931, "step": 780000 }, { "epoch": 3.14, "learning_rate": 2.7570733440621515e-05, "loss": 1.185, "step": 780500 }, { "epoch": 3.14, "learning_rate": 2.7556359354454043e-05, "loss": 1.2052, "step": 781000 }, { "epoch": 3.15, "learning_rate": 2.7542014016458907e-05, "loss": 1.1839, "step": 781500 }, { "epoch": 3.15, "learning_rate": 2.752763993029143e-05, "loss": 1.2023, "step": 782000 }, { "epoch": 3.15, "learning_rate": 2.751326584412396e-05, "loss": 1.2242, "step": 782500 }, { "epoch": 3.15, "learning_rate": 2.749889175795649e-05, "loss": 1.1282, "step": 783000 }, { "epoch": 3.15, "learning_rate": 2.7484546419961354e-05, "loss": 1.1759, "step": 783500 }, { "epoch": 3.16, "learning_rate": 2.7470172333793882e-05, "loss": 1.1955, "step": 784000 }, { "epoch": 3.16, "learning_rate": 2.7455798247626406e-05, "loss": 1.1752, "step": 784500 }, { "epoch": 3.16, "learning_rate": 2.7441424161458934e-05, "loss": 1.2358, "step": 785000 }, { "epoch": 3.16, "learning_rate": 2.7427050075291465e-05, "loss": 1.1965, "step": 785500 }, { "epoch": 3.16, "learning_rate": 2.7412675989123992e-05, "loss": 1.1938, "step": 786000 }, { "epoch": 3.17, "learning_rate": 2.739830190295652e-05, "loss": 1.1775, "step": 786500 }, { "epoch": 3.17, "learning_rate": 2.7383956564961384e-05, "loss": 1.2015, "step": 787000 }, { "epoch": 3.17, "learning_rate": 2.736958247879391e-05, "loss": 1.1769, "step": 787500 }, { "epoch": 3.17, "learning_rate": 2.7355208392626443e-05, "loss": 1.1922, "step": 788000 }, { "epoch": 3.17, "learning_rate": 2.7340834306458967e-05, "loss": 1.1821, "step": 788500 }, { "epoch": 3.18, "learning_rate": 2.7326460220291495e-05, "loss": 1.2219, "step": 789000 }, { "epoch": 3.18, "learning_rate": 2.731211488229636e-05, "loss": 1.1915, "step": 789500 }, { "epoch": 3.18, "learning_rate": 2.729774079612889e-05, "loss": 1.1633, "step": 790000 }, { "epoch": 3.18, "learning_rate": 2.7283366709961418e-05, "loss": 1.194, "step": 790500 }, { "epoch": 3.18, "learning_rate": 2.7268992623793942e-05, "loss": 1.2103, "step": 791000 }, { "epoch": 3.19, "learning_rate": 2.725461853762647e-05, "loss": 1.1906, "step": 791500 }, { "epoch": 3.19, "learning_rate": 2.7240273199631334e-05, "loss": 1.1998, "step": 792000 }, { "epoch": 3.19, "learning_rate": 2.7225899113463865e-05, "loss": 1.2401, "step": 792500 }, { "epoch": 3.19, "learning_rate": 2.7211525027296393e-05, "loss": 1.1735, "step": 793000 }, { "epoch": 3.19, "learning_rate": 2.7197150941128917e-05, "loss": 1.1767, "step": 793500 }, { "epoch": 3.2, "learning_rate": 2.7182776854961444e-05, "loss": 1.2027, "step": 794000 }, { "epoch": 3.2, "learning_rate": 2.7168402768793975e-05, "loss": 1.1756, "step": 794500 }, { "epoch": 3.2, "learning_rate": 2.7154028682626503e-05, "loss": 1.2002, "step": 795000 }, { "epoch": 3.2, "learning_rate": 2.713965459645903e-05, "loss": 1.2255, "step": 795500 }, { "epoch": 3.2, "learning_rate": 2.7125309258463895e-05, "loss": 1.2232, "step": 796000 }, { "epoch": 3.21, "learning_rate": 2.711093517229642e-05, "loss": 1.1852, "step": 796500 }, { "epoch": 3.21, "learning_rate": 2.7096561086128954e-05, "loss": 1.1968, "step": 797000 }, { "epoch": 3.21, "learning_rate": 2.7082215748133815e-05, "loss": 1.2214, "step": 797500 }, { "epoch": 3.21, "learning_rate": 2.7067841661966342e-05, "loss": 1.1843, "step": 798000 }, { "epoch": 3.21, "learning_rate": 2.705346757579887e-05, "loss": 1.1879, "step": 798500 }, { "epoch": 3.22, "learning_rate": 2.7039093489631394e-05, "loss": 1.1862, "step": 799000 }, { "epoch": 3.22, "learning_rate": 2.702471940346393e-05, "loss": 1.1976, "step": 799500 }, { "epoch": 3.22, "learning_rate": 2.7010345317296453e-05, "loss": 1.2242, "step": 800000 }, { "epoch": 3.22, "learning_rate": 2.699597123112898e-05, "loss": 1.2212, "step": 800500 }, { "epoch": 3.22, "learning_rate": 2.698159714496151e-05, "loss": 1.1921, "step": 801000 }, { "epoch": 3.23, "learning_rate": 2.6967251806966376e-05, "loss": 1.1946, "step": 801500 }, { "epoch": 3.23, "learning_rate": 2.6952877720798903e-05, "loss": 1.1886, "step": 802000 }, { "epoch": 3.23, "learning_rate": 2.6938503634631427e-05, "loss": 1.2393, "step": 802500 }, { "epoch": 3.23, "learning_rate": 2.6924129548463955e-05, "loss": 1.1932, "step": 803000 }, { "epoch": 3.23, "learning_rate": 2.6909755462296486e-05, "loss": 1.1931, "step": 803500 }, { "epoch": 3.24, "learning_rate": 2.689541012430135e-05, "loss": 1.1557, "step": 804000 }, { "epoch": 3.24, "learning_rate": 2.6881036038133878e-05, "loss": 1.203, "step": 804500 }, { "epoch": 3.24, "learning_rate": 2.6866661951966406e-05, "loss": 1.1944, "step": 805000 }, { "epoch": 3.24, "learning_rate": 2.685228786579893e-05, "loss": 1.2017, "step": 805500 }, { "epoch": 3.24, "learning_rate": 2.6837913779631464e-05, "loss": 1.2312, "step": 806000 }, { "epoch": 3.25, "learning_rate": 2.682353969346399e-05, "loss": 1.2017, "step": 806500 }, { "epoch": 3.25, "learning_rate": 2.6809194355468853e-05, "loss": 1.1889, "step": 807000 }, { "epoch": 3.25, "learning_rate": 2.679482026930138e-05, "loss": 1.2188, "step": 807500 }, { "epoch": 3.25, "learning_rate": 2.6780446183133905e-05, "loss": 1.1738, "step": 808000 }, { "epoch": 3.25, "learning_rate": 2.676610084513877e-05, "loss": 1.1761, "step": 808500 }, { "epoch": 3.26, "learning_rate": 2.67517267589713e-05, "loss": 1.1507, "step": 809000 }, { "epoch": 3.26, "learning_rate": 2.6737352672803828e-05, "loss": 1.1887, "step": 809500 }, { "epoch": 3.26, "learning_rate": 2.6722978586636355e-05, "loss": 1.2401, "step": 810000 }, { "epoch": 3.26, "learning_rate": 2.6708604500468886e-05, "loss": 1.2165, "step": 810500 }, { "epoch": 3.26, "learning_rate": 2.6694230414301414e-05, "loss": 1.1777, "step": 811000 }, { "epoch": 3.27, "learning_rate": 2.667985632813394e-05, "loss": 1.17, "step": 811500 }, { "epoch": 3.27, "learning_rate": 2.6665482241966466e-05, "loss": 1.209, "step": 812000 }, { "epoch": 3.27, "learning_rate": 2.6651108155799e-05, "loss": 1.1625, "step": 812500 }, { "epoch": 3.27, "learning_rate": 2.663676281780386e-05, "loss": 1.1877, "step": 813000 }, { "epoch": 3.27, "learning_rate": 2.662238873163639e-05, "loss": 1.2107, "step": 813500 }, { "epoch": 3.28, "learning_rate": 2.6608014645468916e-05, "loss": 1.2071, "step": 814000 }, { "epoch": 3.28, "learning_rate": 2.659364055930144e-05, "loss": 1.1744, "step": 814500 }, { "epoch": 3.28, "learning_rate": 2.6579266473133975e-05, "loss": 1.1599, "step": 815000 }, { "epoch": 3.28, "learning_rate": 2.6564921135138836e-05, "loss": 1.1937, "step": 815500 }, { "epoch": 3.28, "learning_rate": 2.6550547048971364e-05, "loss": 1.1839, "step": 816000 }, { "epoch": 3.29, "learning_rate": 2.653617296280389e-05, "loss": 1.231, "step": 816500 }, { "epoch": 3.29, "learning_rate": 2.6521798876636415e-05, "loss": 1.1725, "step": 817000 }, { "epoch": 3.29, "learning_rate": 2.650742479046895e-05, "loss": 1.2099, "step": 817500 }, { "epoch": 3.29, "learning_rate": 2.6493050704301474e-05, "loss": 1.1811, "step": 818000 }, { "epoch": 3.29, "learning_rate": 2.647870536630634e-05, "loss": 1.2299, "step": 818500 }, { "epoch": 3.3, "learning_rate": 2.6464331280138866e-05, "loss": 1.1692, "step": 819000 }, { "epoch": 3.3, "learning_rate": 2.6449957193971394e-05, "loss": 1.2198, "step": 819500 }, { "epoch": 3.3, "learning_rate": 2.6435583107803924e-05, "loss": 1.2231, "step": 820000 }, { "epoch": 3.3, "learning_rate": 2.6421209021636452e-05, "loss": 1.2063, "step": 820500 }, { "epoch": 3.3, "learning_rate": 2.6406834935468976e-05, "loss": 1.2099, "step": 821000 }, { "epoch": 3.31, "learning_rate": 2.639248959747384e-05, "loss": 1.2221, "step": 821500 }, { "epoch": 3.31, "learning_rate": 2.6378115511306372e-05, "loss": 1.2231, "step": 822000 }, { "epoch": 3.31, "learning_rate": 2.63637414251389e-05, "loss": 1.2349, "step": 822500 }, { "epoch": 3.31, "learning_rate": 2.6349367338971427e-05, "loss": 1.2288, "step": 823000 }, { "epoch": 3.31, "learning_rate": 2.633499325280395e-05, "loss": 1.2305, "step": 823500 }, { "epoch": 3.32, "learning_rate": 2.6320619166636485e-05, "loss": 1.1755, "step": 824000 }, { "epoch": 3.32, "learning_rate": 2.6306273828641347e-05, "loss": 1.2109, "step": 824500 }, { "epoch": 3.32, "learning_rate": 2.6291899742473874e-05, "loss": 1.2093, "step": 825000 }, { "epoch": 3.32, "learning_rate": 2.6277525656306402e-05, "loss": 1.1682, "step": 825500 }, { "epoch": 3.32, "learning_rate": 2.6263151570138926e-05, "loss": 1.1864, "step": 826000 }, { "epoch": 3.33, "learning_rate": 2.624877748397146e-05, "loss": 1.2548, "step": 826500 }, { "epoch": 3.33, "learning_rate": 2.6234403397803984e-05, "loss": 1.1723, "step": 827000 }, { "epoch": 3.33, "learning_rate": 2.6220029311636512e-05, "loss": 1.1182, "step": 827500 }, { "epoch": 3.33, "learning_rate": 2.6205655225469043e-05, "loss": 1.2548, "step": 828000 }, { "epoch": 3.33, "learning_rate": 2.6191309887473904e-05, "loss": 1.1878, "step": 828500 }, { "epoch": 3.34, "learning_rate": 2.6176935801306435e-05, "loss": 1.2124, "step": 829000 }, { "epoch": 3.34, "learning_rate": 2.6162561715138963e-05, "loss": 1.1506, "step": 829500 }, { "epoch": 3.34, "learning_rate": 2.6148187628971487e-05, "loss": 1.1477, "step": 830000 }, { "epoch": 3.34, "learning_rate": 2.613384229097635e-05, "loss": 1.1757, "step": 830500 }, { "epoch": 3.34, "learning_rate": 2.611946820480888e-05, "loss": 1.2022, "step": 831000 }, { "epoch": 3.35, "learning_rate": 2.610509411864141e-05, "loss": 1.1714, "step": 831500 }, { "epoch": 3.35, "learning_rate": 2.6090748780646275e-05, "loss": 1.2133, "step": 832000 }, { "epoch": 3.35, "learning_rate": 2.60763746944788e-05, "loss": 1.1997, "step": 832500 }, { "epoch": 3.35, "learning_rate": 2.6062000608311326e-05, "loss": 1.1625, "step": 833000 }, { "epoch": 3.35, "learning_rate": 2.6047626522143857e-05, "loss": 1.1425, "step": 833500 }, { "epoch": 3.36, "learning_rate": 2.603328118414872e-05, "loss": 1.1886, "step": 834000 }, { "epoch": 3.36, "learning_rate": 2.601890709798125e-05, "loss": 1.1943, "step": 834500 }, { "epoch": 3.36, "learning_rate": 2.6004533011813777e-05, "loss": 1.1922, "step": 835000 }, { "epoch": 3.36, "learning_rate": 2.59901589256463e-05, "loss": 1.1883, "step": 835500 }, { "epoch": 3.36, "learning_rate": 2.5975784839478835e-05, "loss": 1.1884, "step": 836000 }, { "epoch": 3.37, "learning_rate": 2.596141075331136e-05, "loss": 1.2316, "step": 836500 }, { "epoch": 3.37, "learning_rate": 2.5947036667143887e-05, "loss": 1.2248, "step": 837000 }, { "epoch": 3.37, "learning_rate": 2.5932691329148752e-05, "loss": 1.2145, "step": 837500 }, { "epoch": 3.37, "learning_rate": 2.5918317242981276e-05, "loss": 1.2003, "step": 838000 }, { "epoch": 3.37, "learning_rate": 2.590394315681381e-05, "loss": 1.2374, "step": 838500 }, { "epoch": 3.38, "learning_rate": 2.5889569070646335e-05, "loss": 1.194, "step": 839000 }, { "epoch": 3.38, "learning_rate": 2.5875194984478862e-05, "loss": 1.2262, "step": 839500 }, { "epoch": 3.38, "learning_rate": 2.5860849646483727e-05, "loss": 1.1755, "step": 840000 }, { "epoch": 3.38, "learning_rate": 2.584647556031625e-05, "loss": 1.193, "step": 840500 }, { "epoch": 3.38, "learning_rate": 2.5832101474148785e-05, "loss": 1.2141, "step": 841000 }, { "epoch": 3.39, "learning_rate": 2.581772738798131e-05, "loss": 1.1863, "step": 841500 }, { "epoch": 3.39, "learning_rate": 2.5803382049986174e-05, "loss": 1.1763, "step": 842000 }, { "epoch": 3.39, "learning_rate": 2.57890079638187e-05, "loss": 1.1525, "step": 842500 }, { "epoch": 3.39, "learning_rate": 2.577463387765123e-05, "loss": 1.1811, "step": 843000 }, { "epoch": 3.39, "learning_rate": 2.576025979148376e-05, "loss": 1.1807, "step": 843500 }, { "epoch": 3.4, "learning_rate": 2.5745885705316288e-05, "loss": 1.2043, "step": 844000 }, { "epoch": 3.4, "learning_rate": 2.573154036732115e-05, "loss": 1.1559, "step": 844500 }, { "epoch": 3.4, "learning_rate": 2.5717166281153676e-05, "loss": 1.2187, "step": 845000 }, { "epoch": 3.4, "learning_rate": 2.5702792194986204e-05, "loss": 1.1387, "step": 845500 }, { "epoch": 3.4, "learning_rate": 2.5688418108818735e-05, "loss": 1.165, "step": 846000 }, { "epoch": 3.41, "learning_rate": 2.5674044022651262e-05, "loss": 1.2342, "step": 846500 }, { "epoch": 3.41, "learning_rate": 2.5659698684656124e-05, "loss": 1.1913, "step": 847000 }, { "epoch": 3.41, "learning_rate": 2.564532459848865e-05, "loss": 1.2385, "step": 847500 }, { "epoch": 3.41, "learning_rate": 2.563095051232118e-05, "loss": 1.2333, "step": 848000 }, { "epoch": 3.41, "learning_rate": 2.561657642615371e-05, "loss": 1.2223, "step": 848500 }, { "epoch": 3.42, "learning_rate": 2.5602202339986237e-05, "loss": 1.2245, "step": 849000 }, { "epoch": 3.42, "learning_rate": 2.55878570019911e-05, "loss": 1.2579, "step": 849500 }, { "epoch": 3.42, "learning_rate": 2.5573482915823626e-05, "loss": 1.2277, "step": 850000 }, { "epoch": 3.42, "learning_rate": 2.5559108829656154e-05, "loss": 1.2517, "step": 850500 }, { "epoch": 3.43, "learning_rate": 2.5544763491661018e-05, "loss": 1.1861, "step": 851000 }, { "epoch": 3.43, "learning_rate": 2.553038940549355e-05, "loss": 1.2176, "step": 851500 }, { "epoch": 3.43, "learning_rate": 2.5516015319326077e-05, "loss": 1.208, "step": 852000 }, { "epoch": 3.43, "learning_rate": 2.55016412331586e-05, "loss": 1.169, "step": 852500 }, { "epoch": 3.43, "learning_rate": 2.5487267146991135e-05, "loss": 1.1972, "step": 853000 }, { "epoch": 3.44, "learning_rate": 2.547289306082366e-05, "loss": 1.1865, "step": 853500 }, { "epoch": 3.44, "learning_rate": 2.5458518974656187e-05, "loss": 1.1974, "step": 854000 }, { "epoch": 3.44, "learning_rate": 2.5444144888488715e-05, "loss": 1.1921, "step": 854500 }, { "epoch": 3.44, "learning_rate": 2.5429799550493576e-05, "loss": 1.1845, "step": 855000 }, { "epoch": 3.44, "learning_rate": 2.541542546432611e-05, "loss": 1.1969, "step": 855500 }, { "epoch": 3.45, "learning_rate": 2.5401051378158634e-05, "loss": 1.1579, "step": 856000 }, { "epoch": 3.45, "learning_rate": 2.5386677291991162e-05, "loss": 1.2033, "step": 856500 }, { "epoch": 3.45, "learning_rate": 2.537230320582369e-05, "loss": 1.1873, "step": 857000 }, { "epoch": 3.45, "learning_rate": 2.5357957867828554e-05, "loss": 1.262, "step": 857500 }, { "epoch": 3.45, "learning_rate": 2.5343612529833415e-05, "loss": 1.2345, "step": 858000 }, { "epoch": 3.46, "learning_rate": 2.5329238443665943e-05, "loss": 1.1708, "step": 858500 }, { "epoch": 3.46, "learning_rate": 2.5314864357498474e-05, "loss": 1.1934, "step": 859000 }, { "epoch": 3.46, "learning_rate": 2.5300490271331e-05, "loss": 1.2051, "step": 859500 }, { "epoch": 3.46, "learning_rate": 2.528611618516353e-05, "loss": 1.1626, "step": 860000 }, { "epoch": 3.46, "learning_rate": 2.527177084716839e-05, "loss": 1.2027, "step": 860500 }, { "epoch": 3.47, "learning_rate": 2.5257425509173255e-05, "loss": 1.1948, "step": 861000 }, { "epoch": 3.47, "learning_rate": 2.5243051423005782e-05, "loss": 1.1753, "step": 861500 }, { "epoch": 3.47, "learning_rate": 2.5228677336838313e-05, "loss": 1.1782, "step": 862000 }, { "epoch": 3.47, "learning_rate": 2.521430325067084e-05, "loss": 1.1961, "step": 862500 }, { "epoch": 3.47, "learning_rate": 2.5199929164503365e-05, "loss": 1.2003, "step": 863000 }, { "epoch": 3.48, "learning_rate": 2.51855550783359e-05, "loss": 1.2066, "step": 863500 }, { "epoch": 3.48, "learning_rate": 2.5171180992168423e-05, "loss": 1.1561, "step": 864000 }, { "epoch": 3.48, "learning_rate": 2.515680690600095e-05, "loss": 1.1935, "step": 864500 }, { "epoch": 3.48, "learning_rate": 2.514243281983348e-05, "loss": 1.1848, "step": 865000 }, { "epoch": 3.48, "learning_rate": 2.512805873366601e-05, "loss": 1.223, "step": 865500 }, { "epoch": 3.49, "learning_rate": 2.5113684647498537e-05, "loss": 1.2462, "step": 866000 }, { "epoch": 3.49, "learning_rate": 2.5099310561331065e-05, "loss": 1.1886, "step": 866500 }, { "epoch": 3.49, "learning_rate": 2.5084936475163596e-05, "loss": 1.2163, "step": 867000 }, { "epoch": 3.49, "learning_rate": 2.5070562388996123e-05, "loss": 1.1928, "step": 867500 }, { "epoch": 3.49, "learning_rate": 2.5056188302828647e-05, "loss": 1.1466, "step": 868000 }, { "epoch": 3.5, "learning_rate": 2.5041842964833512e-05, "loss": 1.1692, "step": 868500 }, { "epoch": 3.5, "learning_rate": 2.502746887866604e-05, "loss": 1.181, "step": 869000 }, { "epoch": 3.5, "learning_rate": 2.501309479249857e-05, "loss": 1.1604, "step": 869500 }, { "epoch": 3.5, "learning_rate": 2.4998720706331098e-05, "loss": 1.1684, "step": 870000 }, { "epoch": 3.5, "learning_rate": 2.4984346620163622e-05, "loss": 1.1951, "step": 870500 }, { "epoch": 3.51, "learning_rate": 2.4970001282168487e-05, "loss": 1.1963, "step": 871000 }, { "epoch": 3.51, "learning_rate": 2.4955627196001018e-05, "loss": 1.1888, "step": 871500 }, { "epoch": 3.51, "learning_rate": 2.4941253109833542e-05, "loss": 1.1906, "step": 872000 }, { "epoch": 3.51, "learning_rate": 2.4926879023666073e-05, "loss": 1.1896, "step": 872500 }, { "epoch": 3.51, "learning_rate": 2.4912504937498597e-05, "loss": 1.2349, "step": 873000 }, { "epoch": 3.52, "learning_rate": 2.4898130851331128e-05, "loss": 1.1894, "step": 873500 }, { "epoch": 3.52, "learning_rate": 2.4883785513335993e-05, "loss": 1.1912, "step": 874000 }, { "epoch": 3.52, "learning_rate": 2.4869411427168517e-05, "loss": 1.1669, "step": 874500 }, { "epoch": 3.52, "learning_rate": 2.4855037341001048e-05, "loss": 1.2058, "step": 875000 }, { "epoch": 3.52, "learning_rate": 2.4840663254833575e-05, "loss": 1.212, "step": 875500 }, { "epoch": 3.53, "learning_rate": 2.4826317916838436e-05, "loss": 1.1972, "step": 876000 }, { "epoch": 3.53, "learning_rate": 2.4811943830670967e-05, "loss": 1.1659, "step": 876500 }, { "epoch": 3.53, "learning_rate": 2.4797569744503495e-05, "loss": 1.1717, "step": 877000 }, { "epoch": 3.53, "learning_rate": 2.4783195658336022e-05, "loss": 1.1978, "step": 877500 }, { "epoch": 3.53, "learning_rate": 2.4768821572168553e-05, "loss": 1.1735, "step": 878000 }, { "epoch": 3.54, "learning_rate": 2.475447623417341e-05, "loss": 1.2148, "step": 878500 }, { "epoch": 3.54, "learning_rate": 2.4740102148005942e-05, "loss": 1.1704, "step": 879000 }, { "epoch": 3.54, "learning_rate": 2.472572806183847e-05, "loss": 1.2026, "step": 879500 }, { "epoch": 3.54, "learning_rate": 2.4711353975670997e-05, "loss": 1.1933, "step": 880000 }, { "epoch": 3.54, "learning_rate": 2.4696979889503528e-05, "loss": 1.2241, "step": 880500 }, { "epoch": 3.55, "learning_rate": 2.4682605803336052e-05, "loss": 1.169, "step": 881000 }, { "epoch": 3.55, "learning_rate": 2.4668231717168583e-05, "loss": 1.1719, "step": 881500 }, { "epoch": 3.55, "learning_rate": 2.465385763100111e-05, "loss": 1.1823, "step": 882000 }, { "epoch": 3.55, "learning_rate": 2.4639512293005972e-05, "loss": 1.1706, "step": 882500 }, { "epoch": 3.55, "learning_rate": 2.4625166955010837e-05, "loss": 1.1814, "step": 883000 }, { "epoch": 3.56, "learning_rate": 2.4610792868843364e-05, "loss": 1.2014, "step": 883500 }, { "epoch": 3.56, "learning_rate": 2.4596418782675892e-05, "loss": 1.1884, "step": 884000 }, { "epoch": 3.56, "learning_rate": 2.4582044696508423e-05, "loss": 1.1569, "step": 884500 }, { "epoch": 3.56, "learning_rate": 2.4567699358513284e-05, "loss": 1.2318, "step": 885000 }, { "epoch": 3.56, "learning_rate": 2.455332527234581e-05, "loss": 1.2312, "step": 885500 }, { "epoch": 3.57, "learning_rate": 2.4538951186178343e-05, "loss": 1.1868, "step": 886000 }, { "epoch": 3.57, "learning_rate": 2.45246058481832e-05, "loss": 1.1478, "step": 886500 }, { "epoch": 3.57, "learning_rate": 2.451023176201573e-05, "loss": 1.1534, "step": 887000 }, { "epoch": 3.57, "learning_rate": 2.449585767584826e-05, "loss": 1.2182, "step": 887500 }, { "epoch": 3.57, "learning_rate": 2.4481483589680786e-05, "loss": 1.1811, "step": 888000 }, { "epoch": 3.58, "learning_rate": 2.4467109503513317e-05, "loss": 1.2205, "step": 888500 }, { "epoch": 3.58, "learning_rate": 2.445273541734584e-05, "loss": 1.1841, "step": 889000 }, { "epoch": 3.58, "learning_rate": 2.4438361331178373e-05, "loss": 1.194, "step": 889500 }, { "epoch": 3.58, "learning_rate": 2.4424015993183237e-05, "loss": 1.1959, "step": 890000 }, { "epoch": 3.58, "learning_rate": 2.440964190701576e-05, "loss": 1.1874, "step": 890500 }, { "epoch": 3.59, "learning_rate": 2.4395267820848292e-05, "loss": 1.1992, "step": 891000 }, { "epoch": 3.59, "learning_rate": 2.4380893734680816e-05, "loss": 1.1908, "step": 891500 }, { "epoch": 3.59, "learning_rate": 2.4366519648513347e-05, "loss": 1.1908, "step": 892000 }, { "epoch": 3.59, "learning_rate": 2.4352145562345875e-05, "loss": 1.2308, "step": 892500 }, { "epoch": 3.59, "learning_rate": 2.4337771476178403e-05, "loss": 1.1986, "step": 893000 }, { "epoch": 3.6, "learning_rate": 2.4323397390010933e-05, "loss": 1.2446, "step": 893500 }, { "epoch": 3.6, "learning_rate": 2.4309023303843458e-05, "loss": 1.2184, "step": 894000 }, { "epoch": 3.6, "learning_rate": 2.4294677965848322e-05, "loss": 1.1718, "step": 894500 }, { "epoch": 3.6, "learning_rate": 2.428030387968085e-05, "loss": 1.226, "step": 895000 }, { "epoch": 3.6, "learning_rate": 2.4265929793513377e-05, "loss": 1.1927, "step": 895500 }, { "epoch": 3.61, "learning_rate": 2.425155570734591e-05, "loss": 1.2361, "step": 896000 }, { "epoch": 3.61, "learning_rate": 2.4237181621178433e-05, "loss": 1.1985, "step": 896500 }, { "epoch": 3.61, "learning_rate": 2.4222836283183297e-05, "loss": 1.1943, "step": 897000 }, { "epoch": 3.61, "learning_rate": 2.4208462197015828e-05, "loss": 1.1602, "step": 897500 }, { "epoch": 3.61, "learning_rate": 2.419411685902069e-05, "loss": 1.1945, "step": 898000 }, { "epoch": 3.62, "learning_rate": 2.4179742772853217e-05, "loss": 1.1917, "step": 898500 }, { "epoch": 3.62, "learning_rate": 2.4165368686685748e-05, "loss": 1.1912, "step": 899000 }, { "epoch": 3.62, "learning_rate": 2.4150994600518272e-05, "loss": 1.1658, "step": 899500 }, { "epoch": 3.62, "learning_rate": 2.4136620514350803e-05, "loss": 1.1596, "step": 900000 }, { "epoch": 3.62, "learning_rate": 2.412224642818333e-05, "loss": 1.1588, "step": 900500 }, { "epoch": 3.63, "learning_rate": 2.4107872342015858e-05, "loss": 1.1778, "step": 901000 }, { "epoch": 3.63, "learning_rate": 2.4093498255848386e-05, "loss": 1.2307, "step": 901500 }, { "epoch": 3.63, "learning_rate": 2.4079152917853247e-05, "loss": 1.2047, "step": 902000 }, { "epoch": 3.63, "learning_rate": 2.4064778831685778e-05, "loss": 1.2131, "step": 902500 }, { "epoch": 3.63, "learning_rate": 2.4050404745518305e-05, "loss": 1.1606, "step": 903000 }, { "epoch": 3.64, "learning_rate": 2.4036030659350833e-05, "loss": 1.1774, "step": 903500 }, { "epoch": 3.64, "learning_rate": 2.4021685321355697e-05, "loss": 1.1936, "step": 904000 }, { "epoch": 3.64, "learning_rate": 2.4007311235188225e-05, "loss": 1.2016, "step": 904500 }, { "epoch": 3.64, "learning_rate": 2.3992937149020753e-05, "loss": 1.2103, "step": 905000 }, { "epoch": 3.64, "learning_rate": 2.397856306285328e-05, "loss": 1.2546, "step": 905500 }, { "epoch": 3.65, "learning_rate": 2.3964188976685808e-05, "loss": 1.1628, "step": 906000 }, { "epoch": 3.65, "learning_rate": 2.3949843638690672e-05, "loss": 1.2363, "step": 906500 }, { "epoch": 3.65, "learning_rate": 2.39354695525232e-05, "loss": 1.1835, "step": 907000 }, { "epoch": 3.65, "learning_rate": 2.3921095466355727e-05, "loss": 1.1611, "step": 907500 }, { "epoch": 3.65, "learning_rate": 2.3906721380188255e-05, "loss": 1.1875, "step": 908000 }, { "epoch": 3.66, "learning_rate": 2.3892347294020783e-05, "loss": 1.1809, "step": 908500 }, { "epoch": 3.66, "learning_rate": 2.3877973207853314e-05, "loss": 1.1983, "step": 909000 } ], "logging_steps": 500, "max_steps": 1739241, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 9000, "total_flos": 8.699092914175696e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }