Tzefa-Word-OCR-TrOCR / trainer_state.json
WARAJA's picture
Upload trainer_state.json
ca9e6e3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.6584924113449486,
"eval_steps": 500,
"global_step": 909000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9985712158349534e-05,
"loss": 2.2557,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.997133807218206e-05,
"loss": 2.049,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.995699273418692e-05,
"loss": 2.0487,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.9942618648019454e-05,
"loss": 2.0357,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9928244561851985e-05,
"loss": 2.0048,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.991387047568451e-05,
"loss": 1.919,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.989949638951703e-05,
"loss": 1.9217,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.988512230334957e-05,
"loss": 1.825,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.9870748217182095e-05,
"loss": 1.8015,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.985637413101462e-05,
"loss": 1.7936,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.984200004484715e-05,
"loss": 1.7456,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.9827654706852015e-05,
"loss": 1.7414,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 4.9813280620684546e-05,
"loss": 1.728,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.979890653451707e-05,
"loss": 1.7282,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.9784532448349594e-05,
"loss": 1.7469,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.97702158585268e-05,
"loss": 1.6573,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.975584177235932e-05,
"loss": 1.7118,
"step": 8500
},
{
"epoch": 0.04,
"learning_rate": 4.974146768619185e-05,
"loss": 1.6161,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.9727093600024385e-05,
"loss": 1.6275,
"step": 9500
},
{
"epoch": 0.04,
"learning_rate": 4.971271951385691e-05,
"loss": 1.6875,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.9698345427689434e-05,
"loss": 1.6544,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.9683971341521965e-05,
"loss": 1.6431,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 4.9669597255354495e-05,
"loss": 1.6536,
"step": 11500
},
{
"epoch": 0.05,
"learning_rate": 4.965522316918702e-05,
"loss": 1.6471,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 4.9640877831191884e-05,
"loss": 1.5942,
"step": 12500
},
{
"epoch": 0.05,
"learning_rate": 4.962650374502441e-05,
"loss": 1.6318,
"step": 13000
},
{
"epoch": 0.05,
"learning_rate": 4.9612129658856946e-05,
"loss": 1.5797,
"step": 13500
},
{
"epoch": 0.06,
"learning_rate": 4.959775557268947e-05,
"loss": 1.5792,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 4.9583410234694335e-05,
"loss": 1.5361,
"step": 14500
},
{
"epoch": 0.06,
"learning_rate": 4.956903614852686e-05,
"loss": 1.6009,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.9554690810531724e-05,
"loss": 1.5841,
"step": 15500
},
{
"epoch": 0.06,
"learning_rate": 4.954031672436425e-05,
"loss": 1.5948,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 4.952594263819678e-05,
"loss": 1.6366,
"step": 16500
},
{
"epoch": 0.07,
"learning_rate": 4.951156855202931e-05,
"loss": 1.5873,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 4.9497194465861834e-05,
"loss": 1.6046,
"step": 17500
},
{
"epoch": 0.07,
"learning_rate": 4.94828491278667e-05,
"loss": 1.6193,
"step": 18000
},
{
"epoch": 0.07,
"learning_rate": 4.946847504169922e-05,
"loss": 1.5759,
"step": 18500
},
{
"epoch": 0.08,
"learning_rate": 4.9454100955531754e-05,
"loss": 1.5702,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 4.9439726869364285e-05,
"loss": 1.5179,
"step": 19500
},
{
"epoch": 0.08,
"learning_rate": 4.942535278319681e-05,
"loss": 1.5536,
"step": 20000
},
{
"epoch": 0.08,
"learning_rate": 4.941097869702934e-05,
"loss": 1.5235,
"step": 20500
},
{
"epoch": 0.08,
"learning_rate": 4.939660461086187e-05,
"loss": 1.5325,
"step": 21000
},
{
"epoch": 0.09,
"learning_rate": 4.9382230524694395e-05,
"loss": 1.5385,
"step": 21500
},
{
"epoch": 0.09,
"learning_rate": 4.936788518669926e-05,
"loss": 1.5189,
"step": 22000
},
{
"epoch": 0.09,
"learning_rate": 4.9353511100531784e-05,
"loss": 1.548,
"step": 22500
},
{
"epoch": 0.09,
"learning_rate": 4.9339137014364315e-05,
"loss": 1.5116,
"step": 23000
},
{
"epoch": 0.09,
"learning_rate": 4.9324762928196846e-05,
"loss": 1.5404,
"step": 23500
},
{
"epoch": 0.1,
"learning_rate": 4.931041759020171e-05,
"loss": 1.5425,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 4.929607225220657e-05,
"loss": 1.5627,
"step": 24500
},
{
"epoch": 0.1,
"learning_rate": 4.92816981660391e-05,
"loss": 1.5173,
"step": 25000
},
{
"epoch": 0.1,
"learning_rate": 4.926732407987162e-05,
"loss": 1.5033,
"step": 25500
},
{
"epoch": 0.1,
"learning_rate": 4.925294999370415e-05,
"loss": 1.4979,
"step": 26000
},
{
"epoch": 0.11,
"learning_rate": 4.9238575907536685e-05,
"loss": 1.5482,
"step": 26500
},
{
"epoch": 0.11,
"learning_rate": 4.922420182136921e-05,
"loss": 1.5178,
"step": 27000
},
{
"epoch": 0.11,
"learning_rate": 4.920982773520173e-05,
"loss": 1.5487,
"step": 27500
},
{
"epoch": 0.11,
"learning_rate": 4.919545364903427e-05,
"loss": 1.5682,
"step": 28000
},
{
"epoch": 0.11,
"learning_rate": 4.918110831103913e-05,
"loss": 1.5289,
"step": 28500
},
{
"epoch": 0.12,
"learning_rate": 4.916673422487166e-05,
"loss": 1.5002,
"step": 29000
},
{
"epoch": 0.12,
"learning_rate": 4.9152360138704184e-05,
"loss": 1.5392,
"step": 29500
},
{
"epoch": 0.12,
"learning_rate": 4.913798605253671e-05,
"loss": 1.5209,
"step": 30000
},
{
"epoch": 0.12,
"learning_rate": 4.9123611966369246e-05,
"loss": 1.4972,
"step": 30500
},
{
"epoch": 0.12,
"learning_rate": 4.910923788020177e-05,
"loss": 1.4937,
"step": 31000
},
{
"epoch": 0.13,
"learning_rate": 4.9094863794034294e-05,
"loss": 1.4849,
"step": 31500
},
{
"epoch": 0.13,
"learning_rate": 4.9080489707866825e-05,
"loss": 1.4916,
"step": 32000
},
{
"epoch": 0.13,
"learning_rate": 4.9066115621699356e-05,
"loss": 1.4975,
"step": 32500
},
{
"epoch": 0.13,
"learning_rate": 4.905174153553188e-05,
"loss": 1.4753,
"step": 33000
},
{
"epoch": 0.13,
"learning_rate": 4.903742494570908e-05,
"loss": 1.4382,
"step": 33500
},
{
"epoch": 0.14,
"learning_rate": 4.902305085954161e-05,
"loss": 1.5305,
"step": 34000
},
{
"epoch": 0.14,
"learning_rate": 4.9008676773374134e-05,
"loss": 1.4588,
"step": 34500
},
{
"epoch": 0.14,
"learning_rate": 4.8994302687206665e-05,
"loss": 1.4831,
"step": 35000
},
{
"epoch": 0.14,
"learning_rate": 4.897995734921152e-05,
"loss": 1.5033,
"step": 35500
},
{
"epoch": 0.14,
"learning_rate": 4.896558326304406e-05,
"loss": 1.5217,
"step": 36000
},
{
"epoch": 0.15,
"learning_rate": 4.8951209176876584e-05,
"loss": 1.4881,
"step": 36500
},
{
"epoch": 0.15,
"learning_rate": 4.893683509070911e-05,
"loss": 1.4689,
"step": 37000
},
{
"epoch": 0.15,
"learning_rate": 4.892246100454164e-05,
"loss": 1.5241,
"step": 37500
},
{
"epoch": 0.15,
"learning_rate": 4.890808691837417e-05,
"loss": 1.5023,
"step": 38000
},
{
"epoch": 0.15,
"learning_rate": 4.8893712832206695e-05,
"loss": 1.4979,
"step": 38500
},
{
"epoch": 0.16,
"learning_rate": 4.887933874603922e-05,
"loss": 1.4906,
"step": 39000
},
{
"epoch": 0.16,
"learning_rate": 4.8864964659871757e-05,
"loss": 1.4677,
"step": 39500
},
{
"epoch": 0.16,
"learning_rate": 4.8850619321876614e-05,
"loss": 1.4625,
"step": 40000
},
{
"epoch": 0.16,
"learning_rate": 4.8836245235709145e-05,
"loss": 1.4539,
"step": 40500
},
{
"epoch": 0.17,
"learning_rate": 4.882187114954167e-05,
"loss": 1.456,
"step": 41000
},
{
"epoch": 0.17,
"learning_rate": 4.8807497063374194e-05,
"loss": 1.4562,
"step": 41500
},
{
"epoch": 0.17,
"learning_rate": 4.879315172537906e-05,
"loss": 1.4844,
"step": 42000
},
{
"epoch": 0.17,
"learning_rate": 4.877877763921159e-05,
"loss": 1.4885,
"step": 42500
},
{
"epoch": 0.17,
"learning_rate": 4.876440355304412e-05,
"loss": 1.4746,
"step": 43000
},
{
"epoch": 0.18,
"learning_rate": 4.8750029466876644e-05,
"loss": 1.4728,
"step": 43500
},
{
"epoch": 0.18,
"learning_rate": 4.873568412888151e-05,
"loss": 1.4834,
"step": 44000
},
{
"epoch": 0.18,
"learning_rate": 4.872131004271403e-05,
"loss": 1.4744,
"step": 44500
},
{
"epoch": 0.18,
"learning_rate": 4.8706935956546564e-05,
"loss": 1.496,
"step": 45000
},
{
"epoch": 0.18,
"learning_rate": 4.8692561870379095e-05,
"loss": 1.4814,
"step": 45500
},
{
"epoch": 0.19,
"learning_rate": 4.867821653238396e-05,
"loss": 1.479,
"step": 46000
},
{
"epoch": 0.19,
"learning_rate": 4.8663842446216484e-05,
"loss": 1.4949,
"step": 46500
},
{
"epoch": 0.19,
"learning_rate": 4.864946836004901e-05,
"loss": 1.4751,
"step": 47000
},
{
"epoch": 0.19,
"learning_rate": 4.8635094273881546e-05,
"loss": 1.4633,
"step": 47500
},
{
"epoch": 0.19,
"learning_rate": 4.8620748935886403e-05,
"loss": 1.419,
"step": 48000
},
{
"epoch": 0.2,
"learning_rate": 4.8606374849718934e-05,
"loss": 1.4838,
"step": 48500
},
{
"epoch": 0.2,
"learning_rate": 4.859200076355146e-05,
"loss": 1.4311,
"step": 49000
},
{
"epoch": 0.2,
"learning_rate": 4.857765542555632e-05,
"loss": 1.5105,
"step": 49500
},
{
"epoch": 0.2,
"learning_rate": 4.856328133938885e-05,
"loss": 1.4417,
"step": 50000
},
{
"epoch": 0.2,
"learning_rate": 4.854890725322138e-05,
"loss": 1.4828,
"step": 50500
},
{
"epoch": 0.21,
"learning_rate": 4.853453316705391e-05,
"loss": 1.4745,
"step": 51000
},
{
"epoch": 0.21,
"learning_rate": 4.8520159080886433e-05,
"loss": 1.4476,
"step": 51500
},
{
"epoch": 0.21,
"learning_rate": 4.8505784994718964e-05,
"loss": 1.4907,
"step": 52000
},
{
"epoch": 0.21,
"learning_rate": 4.849143965672382e-05,
"loss": 1.4435,
"step": 52500
},
{
"epoch": 0.21,
"learning_rate": 4.847706557055635e-05,
"loss": 1.4265,
"step": 53000
},
{
"epoch": 0.22,
"learning_rate": 4.8462691484388884e-05,
"loss": 1.4731,
"step": 53500
},
{
"epoch": 0.22,
"learning_rate": 4.844831739822141e-05,
"loss": 1.4475,
"step": 54000
},
{
"epoch": 0.22,
"learning_rate": 4.843394331205394e-05,
"loss": 1.4976,
"step": 54500
},
{
"epoch": 0.22,
"learning_rate": 4.841956922588647e-05,
"loss": 1.4172,
"step": 55000
},
{
"epoch": 0.22,
"learning_rate": 4.8405223887891335e-05,
"loss": 1.4651,
"step": 55500
},
{
"epoch": 0.23,
"learning_rate": 4.839084980172386e-05,
"loss": 1.4469,
"step": 56000
},
{
"epoch": 0.23,
"learning_rate": 4.837647571555638e-05,
"loss": 1.4732,
"step": 56500
},
{
"epoch": 0.23,
"learning_rate": 4.8362101629388914e-05,
"loss": 1.4417,
"step": 57000
},
{
"epoch": 0.23,
"learning_rate": 4.8347727543221445e-05,
"loss": 1.4229,
"step": 57500
},
{
"epoch": 0.23,
"learning_rate": 4.833335345705397e-05,
"loss": 1.4677,
"step": 58000
},
{
"epoch": 0.24,
"learning_rate": 4.83189793708865e-05,
"loss": 1.4511,
"step": 58500
},
{
"epoch": 0.24,
"learning_rate": 4.830460528471903e-05,
"loss": 1.4449,
"step": 59000
},
{
"epoch": 0.24,
"learning_rate": 4.8290231198551555e-05,
"loss": 1.4342,
"step": 59500
},
{
"epoch": 0.24,
"learning_rate": 4.827588586055642e-05,
"loss": 1.4212,
"step": 60000
},
{
"epoch": 0.24,
"learning_rate": 4.8261511774388944e-05,
"loss": 1.4428,
"step": 60500
},
{
"epoch": 0.25,
"learning_rate": 4.824716643639381e-05,
"loss": 1.4625,
"step": 61000
},
{
"epoch": 0.25,
"learning_rate": 4.823279235022633e-05,
"loss": 1.4529,
"step": 61500
},
{
"epoch": 0.25,
"learning_rate": 4.8218418264058864e-05,
"loss": 1.4375,
"step": 62000
},
{
"epoch": 0.25,
"learning_rate": 4.8204044177891395e-05,
"loss": 1.4338,
"step": 62500
},
{
"epoch": 0.25,
"learning_rate": 4.818967009172392e-05,
"loss": 1.4556,
"step": 63000
},
{
"epoch": 0.26,
"learning_rate": 4.817529600555645e-05,
"loss": 1.4141,
"step": 63500
},
{
"epoch": 0.26,
"learning_rate": 4.816092191938898e-05,
"loss": 1.4264,
"step": 64000
},
{
"epoch": 0.26,
"learning_rate": 4.8146547833221505e-05,
"loss": 1.4223,
"step": 64500
},
{
"epoch": 0.26,
"learning_rate": 4.813220249522637e-05,
"loss": 1.4082,
"step": 65000
},
{
"epoch": 0.26,
"learning_rate": 4.8117828409058894e-05,
"loss": 1.4546,
"step": 65500
},
{
"epoch": 0.27,
"learning_rate": 4.8103454322891425e-05,
"loss": 1.4737,
"step": 66000
},
{
"epoch": 0.27,
"learning_rate": 4.808910898489629e-05,
"loss": 1.4299,
"step": 66500
},
{
"epoch": 0.27,
"learning_rate": 4.807473489872882e-05,
"loss": 1.4541,
"step": 67000
},
{
"epoch": 0.27,
"learning_rate": 4.8060360812561344e-05,
"loss": 1.444,
"step": 67500
},
{
"epoch": 0.27,
"learning_rate": 4.804598672639387e-05,
"loss": 1.4292,
"step": 68000
},
{
"epoch": 0.28,
"learning_rate": 4.80316126402264e-05,
"loss": 1.3933,
"step": 68500
},
{
"epoch": 0.28,
"learning_rate": 4.801723855405893e-05,
"loss": 1.4456,
"step": 69000
},
{
"epoch": 0.28,
"learning_rate": 4.8002864467891455e-05,
"loss": 1.4617,
"step": 69500
},
{
"epoch": 0.28,
"learning_rate": 4.7988490381723986e-05,
"loss": 1.4573,
"step": 70000
},
{
"epoch": 0.28,
"learning_rate": 4.7974145043728843e-05,
"loss": 1.4704,
"step": 70500
},
{
"epoch": 0.29,
"learning_rate": 4.7959770957561374e-05,
"loss": 1.4401,
"step": 71000
},
{
"epoch": 0.29,
"learning_rate": 4.794542561956624e-05,
"loss": 1.4131,
"step": 71500
},
{
"epoch": 0.29,
"learning_rate": 4.793105153339877e-05,
"loss": 1.4041,
"step": 72000
},
{
"epoch": 0.29,
"learning_rate": 4.7916677447231294e-05,
"loss": 1.4603,
"step": 72500
},
{
"epoch": 0.29,
"learning_rate": 4.7902303361063825e-05,
"loss": 1.444,
"step": 73000
},
{
"epoch": 0.3,
"learning_rate": 4.788792927489635e-05,
"loss": 1.4544,
"step": 73500
},
{
"epoch": 0.3,
"learning_rate": 4.787355518872888e-05,
"loss": 1.4553,
"step": 74000
},
{
"epoch": 0.3,
"learning_rate": 4.7859181102561404e-05,
"loss": 1.417,
"step": 74500
},
{
"epoch": 0.3,
"learning_rate": 4.784483576456627e-05,
"loss": 1.4277,
"step": 75000
},
{
"epoch": 0.3,
"learning_rate": 4.78304616783988e-05,
"loss": 1.4746,
"step": 75500
},
{
"epoch": 0.31,
"learning_rate": 4.781608759223133e-05,
"loss": 1.4245,
"step": 76000
},
{
"epoch": 0.31,
"learning_rate": 4.7801713506063855e-05,
"loss": 1.429,
"step": 76500
},
{
"epoch": 0.31,
"learning_rate": 4.778733941989638e-05,
"loss": 1.39,
"step": 77000
},
{
"epoch": 0.31,
"learning_rate": 4.777296533372891e-05,
"loss": 1.4128,
"step": 77500
},
{
"epoch": 0.31,
"learning_rate": 4.775859124756144e-05,
"loss": 1.4054,
"step": 78000
},
{
"epoch": 0.32,
"learning_rate": 4.7744217161393965e-05,
"loss": 1.4737,
"step": 78500
},
{
"epoch": 0.32,
"learning_rate": 4.772987182339883e-05,
"loss": 1.4217,
"step": 79000
},
{
"epoch": 0.32,
"learning_rate": 4.7715497737231354e-05,
"loss": 1.4723,
"step": 79500
},
{
"epoch": 0.32,
"learning_rate": 4.770115239923622e-05,
"loss": 1.4435,
"step": 80000
},
{
"epoch": 0.32,
"learning_rate": 4.768677831306875e-05,
"loss": 1.438,
"step": 80500
},
{
"epoch": 0.33,
"learning_rate": 4.767240422690128e-05,
"loss": 1.4572,
"step": 81000
},
{
"epoch": 0.33,
"learning_rate": 4.7658030140733805e-05,
"loss": 1.3813,
"step": 81500
},
{
"epoch": 0.33,
"learning_rate": 4.7643656054566336e-05,
"loss": 1.45,
"step": 82000
},
{
"epoch": 0.33,
"learning_rate": 4.7629310716571193e-05,
"loss": 1.4768,
"step": 82500
},
{
"epoch": 0.33,
"learning_rate": 4.7614936630403724e-05,
"loss": 1.3856,
"step": 83000
},
{
"epoch": 0.34,
"learning_rate": 4.7600562544236255e-05,
"loss": 1.4351,
"step": 83500
},
{
"epoch": 0.34,
"learning_rate": 4.758618845806878e-05,
"loss": 1.4182,
"step": 84000
},
{
"epoch": 0.34,
"learning_rate": 4.757181437190131e-05,
"loss": 1.4368,
"step": 84500
},
{
"epoch": 0.34,
"learning_rate": 4.7557440285733835e-05,
"loss": 1.4184,
"step": 85000
},
{
"epoch": 0.34,
"learning_rate": 4.7543066199566366e-05,
"loss": 1.432,
"step": 85500
},
{
"epoch": 0.35,
"learning_rate": 4.752869211339889e-05,
"loss": 1.4598,
"step": 86000
},
{
"epoch": 0.35,
"learning_rate": 4.7514346775403754e-05,
"loss": 1.4246,
"step": 86500
},
{
"epoch": 0.35,
"learning_rate": 4.7499972689236285e-05,
"loss": 1.4359,
"step": 87000
},
{
"epoch": 0.35,
"learning_rate": 4.7485598603068816e-05,
"loss": 1.4266,
"step": 87500
},
{
"epoch": 0.35,
"learning_rate": 4.747122451690134e-05,
"loss": 1.4182,
"step": 88000
},
{
"epoch": 0.36,
"learning_rate": 4.7456879178906205e-05,
"loss": 1.4571,
"step": 88500
},
{
"epoch": 0.36,
"learning_rate": 4.744250509273873e-05,
"loss": 1.4457,
"step": 89000
},
{
"epoch": 0.36,
"learning_rate": 4.742813100657126e-05,
"loss": 1.4522,
"step": 89500
},
{
"epoch": 0.36,
"learning_rate": 4.741375692040379e-05,
"loss": 1.4193,
"step": 90000
},
{
"epoch": 0.36,
"learning_rate": 4.7399382834236315e-05,
"loss": 1.3954,
"step": 90500
},
{
"epoch": 0.37,
"learning_rate": 4.7385008748068846e-05,
"loss": 1.4555,
"step": 91000
},
{
"epoch": 0.37,
"learning_rate": 4.7370663410073704e-05,
"loss": 1.3915,
"step": 91500
},
{
"epoch": 0.37,
"learning_rate": 4.7356289323906235e-05,
"loss": 1.4263,
"step": 92000
},
{
"epoch": 0.37,
"learning_rate": 4.7341915237738766e-05,
"loss": 1.4067,
"step": 92500
},
{
"epoch": 0.37,
"learning_rate": 4.732754115157129e-05,
"loss": 1.4035,
"step": 93000
},
{
"epoch": 0.38,
"learning_rate": 4.7313195813576155e-05,
"loss": 1.4041,
"step": 93500
},
{
"epoch": 0.38,
"learning_rate": 4.729882172740868e-05,
"loss": 1.4253,
"step": 94000
},
{
"epoch": 0.38,
"learning_rate": 4.728444764124121e-05,
"loss": 1.4418,
"step": 94500
},
{
"epoch": 0.38,
"learning_rate": 4.727007355507374e-05,
"loss": 1.4457,
"step": 95000
},
{
"epoch": 0.38,
"learning_rate": 4.7255728217078605e-05,
"loss": 1.4415,
"step": 95500
},
{
"epoch": 0.39,
"learning_rate": 4.724135413091113e-05,
"loss": 1.4302,
"step": 96000
},
{
"epoch": 0.39,
"learning_rate": 4.722698004474366e-05,
"loss": 1.4659,
"step": 96500
},
{
"epoch": 0.39,
"learning_rate": 4.7212605958576185e-05,
"loss": 1.411,
"step": 97000
},
{
"epoch": 0.39,
"learning_rate": 4.7198231872408716e-05,
"loss": 1.4336,
"step": 97500
},
{
"epoch": 0.39,
"learning_rate": 4.718385778624124e-05,
"loss": 1.3472,
"step": 98000
},
{
"epoch": 0.4,
"learning_rate": 4.716948370007377e-05,
"loss": 1.4557,
"step": 98500
},
{
"epoch": 0.4,
"learning_rate": 4.71551096139063e-05,
"loss": 1.3925,
"step": 99000
},
{
"epoch": 0.4,
"learning_rate": 4.714076427591116e-05,
"loss": 1.3936,
"step": 99500
},
{
"epoch": 0.4,
"learning_rate": 4.712639018974369e-05,
"loss": 1.427,
"step": 100000
},
{
"epoch": 0.4,
"learning_rate": 4.7112016103576215e-05,
"loss": 1.4013,
"step": 100500
},
{
"epoch": 0.41,
"learning_rate": 4.7097642017408746e-05,
"loss": 1.4052,
"step": 101000
},
{
"epoch": 0.41,
"learning_rate": 4.708329667941361e-05,
"loss": 1.4341,
"step": 101500
},
{
"epoch": 0.41,
"learning_rate": 4.7068922593246134e-05,
"loss": 1.3709,
"step": 102000
},
{
"epoch": 0.41,
"learning_rate": 4.7054548507078665e-05,
"loss": 1.3834,
"step": 102500
},
{
"epoch": 0.41,
"learning_rate": 4.704017442091119e-05,
"loss": 1.4163,
"step": 103000
},
{
"epoch": 0.42,
"learning_rate": 4.702580033474372e-05,
"loss": 1.4131,
"step": 103500
},
{
"epoch": 0.42,
"learning_rate": 4.701142624857625e-05,
"loss": 1.4448,
"step": 104000
},
{
"epoch": 0.42,
"learning_rate": 4.699708091058111e-05,
"loss": 1.4091,
"step": 104500
},
{
"epoch": 0.42,
"learning_rate": 4.698270682441364e-05,
"loss": 1.3743,
"step": 105000
},
{
"epoch": 0.42,
"learning_rate": 4.6968361486418505e-05,
"loss": 1.4378,
"step": 105500
},
{
"epoch": 0.43,
"learning_rate": 4.695398740025103e-05,
"loss": 1.3978,
"step": 106000
},
{
"epoch": 0.43,
"learning_rate": 4.693961331408356e-05,
"loss": 1.4279,
"step": 106500
},
{
"epoch": 0.43,
"learning_rate": 4.692523922791609e-05,
"loss": 1.4038,
"step": 107000
},
{
"epoch": 0.43,
"learning_rate": 4.691089388992095e-05,
"loss": 1.3691,
"step": 107500
},
{
"epoch": 0.43,
"learning_rate": 4.689651980375348e-05,
"loss": 1.4098,
"step": 108000
},
{
"epoch": 0.44,
"learning_rate": 4.6882145717586004e-05,
"loss": 1.4601,
"step": 108500
},
{
"epoch": 0.44,
"learning_rate": 4.6867771631418535e-05,
"loss": 1.4051,
"step": 109000
},
{
"epoch": 0.44,
"learning_rate": 4.6853397545251066e-05,
"loss": 1.3787,
"step": 109500
},
{
"epoch": 0.44,
"learning_rate": 4.683902345908359e-05,
"loss": 1.3862,
"step": 110000
},
{
"epoch": 0.44,
"learning_rate": 4.682464937291612e-05,
"loss": 1.3977,
"step": 110500
},
{
"epoch": 0.45,
"learning_rate": 4.6810275286748645e-05,
"loss": 1.4151,
"step": 111000
},
{
"epoch": 0.45,
"learning_rate": 4.679592994875351e-05,
"loss": 1.4034,
"step": 111500
},
{
"epoch": 0.45,
"learning_rate": 4.678155586258604e-05,
"loss": 1.3965,
"step": 112000
},
{
"epoch": 0.45,
"learning_rate": 4.6767181776418565e-05,
"loss": 1.4523,
"step": 112500
},
{
"epoch": 0.45,
"learning_rate": 4.6752807690251096e-05,
"loss": 1.4077,
"step": 113000
},
{
"epoch": 0.46,
"learning_rate": 4.673843360408362e-05,
"loss": 1.4365,
"step": 113500
},
{
"epoch": 0.46,
"learning_rate": 4.672405951791615e-05,
"loss": 1.371,
"step": 114000
},
{
"epoch": 0.46,
"learning_rate": 4.670968543174868e-05,
"loss": 1.3935,
"step": 114500
},
{
"epoch": 0.46,
"learning_rate": 4.6695311345581206e-05,
"loss": 1.4184,
"step": 115000
},
{
"epoch": 0.46,
"learning_rate": 4.668096600758607e-05,
"loss": 1.394,
"step": 115500
},
{
"epoch": 0.47,
"learning_rate": 4.6666591921418595e-05,
"loss": 1.3801,
"step": 116000
},
{
"epoch": 0.47,
"learning_rate": 4.6652217835251126e-05,
"loss": 1.3832,
"step": 116500
},
{
"epoch": 0.47,
"learning_rate": 4.663784374908366e-05,
"loss": 1.4032,
"step": 117000
},
{
"epoch": 0.47,
"learning_rate": 4.662346966291618e-05,
"loss": 1.4205,
"step": 117500
},
{
"epoch": 0.47,
"learning_rate": 4.660909557674871e-05,
"loss": 1.3854,
"step": 118000
},
{
"epoch": 0.48,
"learning_rate": 4.6594750238753576e-05,
"loss": 1.3982,
"step": 118500
},
{
"epoch": 0.48,
"learning_rate": 4.65803761525861e-05,
"loss": 1.3992,
"step": 119000
},
{
"epoch": 0.48,
"learning_rate": 4.656600206641863e-05,
"loss": 1.348,
"step": 119500
},
{
"epoch": 0.48,
"learning_rate": 4.6551627980251156e-05,
"loss": 1.4251,
"step": 120000
},
{
"epoch": 0.48,
"learning_rate": 4.653725389408369e-05,
"loss": 1.3713,
"step": 120500
},
{
"epoch": 0.49,
"learning_rate": 4.652290855608855e-05,
"loss": 1.4152,
"step": 121000
},
{
"epoch": 0.49,
"learning_rate": 4.6508534469921075e-05,
"loss": 1.4396,
"step": 121500
},
{
"epoch": 0.49,
"learning_rate": 4.6494160383753606e-05,
"loss": 1.412,
"step": 122000
},
{
"epoch": 0.49,
"learning_rate": 4.647978629758613e-05,
"loss": 1.4279,
"step": 122500
},
{
"epoch": 0.5,
"learning_rate": 4.646541221141866e-05,
"loss": 1.4773,
"step": 123000
},
{
"epoch": 0.5,
"learning_rate": 4.645103812525119e-05,
"loss": 1.4025,
"step": 123500
},
{
"epoch": 0.5,
"learning_rate": 4.643666403908372e-05,
"loss": 1.4126,
"step": 124000
},
{
"epoch": 0.5,
"learning_rate": 4.642228995291625e-05,
"loss": 1.3657,
"step": 124500
},
{
"epoch": 0.5,
"learning_rate": 4.6407944614921105e-05,
"loss": 1.4284,
"step": 125000
},
{
"epoch": 0.51,
"learning_rate": 4.639359927692597e-05,
"loss": 1.4351,
"step": 125500
},
{
"epoch": 0.51,
"learning_rate": 4.63792251907585e-05,
"loss": 1.4265,
"step": 126000
},
{
"epoch": 0.51,
"learning_rate": 4.6364851104591025e-05,
"loss": 1.3681,
"step": 126500
},
{
"epoch": 0.51,
"learning_rate": 4.6350477018423556e-05,
"loss": 1.3806,
"step": 127000
},
{
"epoch": 0.51,
"learning_rate": 4.633610293225608e-05,
"loss": 1.3462,
"step": 127500
},
{
"epoch": 0.52,
"learning_rate": 4.632172884608861e-05,
"loss": 1.4283,
"step": 128000
},
{
"epoch": 0.52,
"learning_rate": 4.630735475992114e-05,
"loss": 1.4105,
"step": 128500
},
{
"epoch": 0.52,
"learning_rate": 4.6292980673753666e-05,
"loss": 1.3929,
"step": 129000
},
{
"epoch": 0.52,
"learning_rate": 4.627863533575853e-05,
"loss": 1.4379,
"step": 129500
},
{
"epoch": 0.52,
"learning_rate": 4.626426124959106e-05,
"loss": 1.4175,
"step": 130000
},
{
"epoch": 0.53,
"learning_rate": 4.6249887163423586e-05,
"loss": 1.3625,
"step": 130500
},
{
"epoch": 0.53,
"learning_rate": 4.623551307725612e-05,
"loss": 1.4202,
"step": 131000
},
{
"epoch": 0.53,
"learning_rate": 4.622113899108864e-05,
"loss": 1.4135,
"step": 131500
},
{
"epoch": 0.53,
"learning_rate": 4.620676490492117e-05,
"loss": 1.4075,
"step": 132000
},
{
"epoch": 0.53,
"learning_rate": 4.61923908187537e-05,
"loss": 1.3812,
"step": 132500
},
{
"epoch": 0.54,
"learning_rate": 4.617804548075856e-05,
"loss": 1.3856,
"step": 133000
},
{
"epoch": 0.54,
"learning_rate": 4.616367139459109e-05,
"loss": 1.3601,
"step": 133500
},
{
"epoch": 0.54,
"learning_rate": 4.6149297308423616e-05,
"loss": 1.3832,
"step": 134000
},
{
"epoch": 0.54,
"learning_rate": 4.613492322225615e-05,
"loss": 1.3954,
"step": 134500
},
{
"epoch": 0.54,
"learning_rate": 4.612057788426101e-05,
"loss": 1.3669,
"step": 135000
},
{
"epoch": 0.55,
"learning_rate": 4.6106203798093536e-05,
"loss": 1.4357,
"step": 135500
},
{
"epoch": 0.55,
"learning_rate": 4.609182971192607e-05,
"loss": 1.4157,
"step": 136000
},
{
"epoch": 0.55,
"learning_rate": 4.607745562575859e-05,
"loss": 1.3635,
"step": 136500
},
{
"epoch": 0.55,
"learning_rate": 4.606308153959112e-05,
"loss": 1.4251,
"step": 137000
},
{
"epoch": 0.55,
"learning_rate": 4.604870745342365e-05,
"loss": 1.3557,
"step": 137500
},
{
"epoch": 0.56,
"learning_rate": 4.603436211542852e-05,
"loss": 1.3947,
"step": 138000
},
{
"epoch": 0.56,
"learning_rate": 4.601998802926104e-05,
"loss": 1.3881,
"step": 138500
},
{
"epoch": 0.56,
"learning_rate": 4.6005613943093566e-05,
"loss": 1.3912,
"step": 139000
},
{
"epoch": 0.56,
"learning_rate": 4.59912398569261e-05,
"loss": 1.3857,
"step": 139500
},
{
"epoch": 0.56,
"learning_rate": 4.597689451893096e-05,
"loss": 1.4202,
"step": 140000
},
{
"epoch": 0.57,
"learning_rate": 4.596252043276349e-05,
"loss": 1.3871,
"step": 140500
},
{
"epoch": 0.57,
"learning_rate": 4.5948146346596016e-05,
"loss": 1.3563,
"step": 141000
},
{
"epoch": 0.57,
"learning_rate": 4.593380100860088e-05,
"loss": 1.4027,
"step": 141500
},
{
"epoch": 0.57,
"learning_rate": 4.5919455670605745e-05,
"loss": 1.399,
"step": 142000
},
{
"epoch": 0.57,
"learning_rate": 4.590508158443827e-05,
"loss": 1.4522,
"step": 142500
},
{
"epoch": 0.58,
"learning_rate": 4.58907074982708e-05,
"loss": 1.4005,
"step": 143000
},
{
"epoch": 0.58,
"learning_rate": 4.587633341210333e-05,
"loss": 1.3592,
"step": 143500
},
{
"epoch": 0.58,
"learning_rate": 4.5861959325935856e-05,
"loss": 1.401,
"step": 144000
},
{
"epoch": 0.58,
"learning_rate": 4.584758523976838e-05,
"loss": 1.3642,
"step": 144500
},
{
"epoch": 0.58,
"learning_rate": 4.583321115360091e-05,
"loss": 1.3673,
"step": 145000
},
{
"epoch": 0.59,
"learning_rate": 4.581883706743344e-05,
"loss": 1.3732,
"step": 145500
},
{
"epoch": 0.59,
"learning_rate": 4.5804491729438306e-05,
"loss": 1.3784,
"step": 146000
},
{
"epoch": 0.59,
"learning_rate": 4.579011764327083e-05,
"loss": 1.3617,
"step": 146500
},
{
"epoch": 0.59,
"learning_rate": 4.5775743557103355e-05,
"loss": 1.4072,
"step": 147000
},
{
"epoch": 0.59,
"learning_rate": 4.5761369470935886e-05,
"loss": 1.387,
"step": 147500
},
{
"epoch": 0.6,
"learning_rate": 4.574699538476842e-05,
"loss": 1.4189,
"step": 148000
},
{
"epoch": 0.6,
"learning_rate": 4.573262129860094e-05,
"loss": 1.4086,
"step": 148500
},
{
"epoch": 0.6,
"learning_rate": 4.571824721243347e-05,
"loss": 1.4118,
"step": 149000
},
{
"epoch": 0.6,
"learning_rate": 4.5703873126266e-05,
"loss": 1.3505,
"step": 149500
},
{
"epoch": 0.6,
"learning_rate": 4.568949904009853e-05,
"loss": 1.3993,
"step": 150000
},
{
"epoch": 0.61,
"learning_rate": 4.567515370210339e-05,
"loss": 1.3766,
"step": 150500
},
{
"epoch": 0.61,
"learning_rate": 4.5660779615935916e-05,
"loss": 1.3773,
"step": 151000
},
{
"epoch": 0.61,
"learning_rate": 4.564640552976845e-05,
"loss": 1.3799,
"step": 151500
},
{
"epoch": 0.61,
"learning_rate": 4.563203144360098e-05,
"loss": 1.4175,
"step": 152000
},
{
"epoch": 0.61,
"learning_rate": 4.56176573574335e-05,
"loss": 1.3748,
"step": 152500
},
{
"epoch": 0.62,
"learning_rate": 4.560328327126603e-05,
"loss": 1.4009,
"step": 153000
},
{
"epoch": 0.62,
"learning_rate": 4.558893793327089e-05,
"loss": 1.3603,
"step": 153500
},
{
"epoch": 0.62,
"learning_rate": 4.557456384710342e-05,
"loss": 1.3207,
"step": 154000
},
{
"epoch": 0.62,
"learning_rate": 4.556018976093595e-05,
"loss": 1.4082,
"step": 154500
},
{
"epoch": 0.62,
"learning_rate": 4.554581567476848e-05,
"loss": 1.3811,
"step": 155000
},
{
"epoch": 0.63,
"learning_rate": 4.553144158860101e-05,
"loss": 1.4071,
"step": 155500
},
{
"epoch": 0.63,
"learning_rate": 4.551706750243354e-05,
"loss": 1.3726,
"step": 156000
},
{
"epoch": 0.63,
"learning_rate": 4.550269341626606e-05,
"loss": 1.4198,
"step": 156500
},
{
"epoch": 0.63,
"learning_rate": 4.548831933009859e-05,
"loss": 1.3945,
"step": 157000
},
{
"epoch": 0.63,
"learning_rate": 4.547394524393112e-05,
"loss": 1.4092,
"step": 157500
},
{
"epoch": 0.64,
"learning_rate": 4.545957115776365e-05,
"loss": 1.3685,
"step": 158000
},
{
"epoch": 0.64,
"learning_rate": 4.5445225819768513e-05,
"loss": 1.3967,
"step": 158500
},
{
"epoch": 0.64,
"learning_rate": 4.543085173360104e-05,
"loss": 1.3583,
"step": 159000
},
{
"epoch": 0.64,
"learning_rate": 4.541647764743356e-05,
"loss": 1.3435,
"step": 159500
},
{
"epoch": 0.64,
"learning_rate": 4.5402132309438426e-05,
"loss": 1.3974,
"step": 160000
},
{
"epoch": 0.65,
"learning_rate": 4.538775822327096e-05,
"loss": 1.3452,
"step": 160500
},
{
"epoch": 0.65,
"learning_rate": 4.537338413710349e-05,
"loss": 1.3704,
"step": 161000
},
{
"epoch": 0.65,
"learning_rate": 4.535901005093601e-05,
"loss": 1.4065,
"step": 161500
},
{
"epoch": 0.65,
"learning_rate": 4.534463596476854e-05,
"loss": 1.3301,
"step": 162000
},
{
"epoch": 0.65,
"learning_rate": 4.5330261878601074e-05,
"loss": 1.4155,
"step": 162500
},
{
"epoch": 0.66,
"learning_rate": 4.53158877924336e-05,
"loss": 1.3922,
"step": 163000
},
{
"epoch": 0.66,
"learning_rate": 4.530151370626612e-05,
"loss": 1.424,
"step": 163500
},
{
"epoch": 0.66,
"learning_rate": 4.5287139620098654e-05,
"loss": 1.3749,
"step": 164000
},
{
"epoch": 0.66,
"learning_rate": 4.5272765533931185e-05,
"loss": 1.3787,
"step": 164500
},
{
"epoch": 0.66,
"learning_rate": 4.525839144776371e-05,
"loss": 1.4292,
"step": 165000
},
{
"epoch": 0.67,
"learning_rate": 4.5244046109768573e-05,
"loss": 1.3708,
"step": 165500
},
{
"epoch": 0.67,
"learning_rate": 4.52296720236011e-05,
"loss": 1.3503,
"step": 166000
},
{
"epoch": 0.67,
"learning_rate": 4.521529793743363e-05,
"loss": 1.3939,
"step": 166500
},
{
"epoch": 0.67,
"learning_rate": 4.520092385126616e-05,
"loss": 1.3795,
"step": 167000
},
{
"epoch": 0.67,
"learning_rate": 4.5186578513271024e-05,
"loss": 1.317,
"step": 167500
},
{
"epoch": 0.68,
"learning_rate": 4.517223317527589e-05,
"loss": 1.3537,
"step": 168000
},
{
"epoch": 0.68,
"learning_rate": 4.515785908910841e-05,
"loss": 1.3657,
"step": 168500
},
{
"epoch": 0.68,
"learning_rate": 4.514348500294094e-05,
"loss": 1.3708,
"step": 169000
},
{
"epoch": 0.68,
"learning_rate": 4.512911091677347e-05,
"loss": 1.4122,
"step": 169500
},
{
"epoch": 0.68,
"learning_rate": 4.5114736830606e-05,
"loss": 1.3816,
"step": 170000
},
{
"epoch": 0.69,
"learning_rate": 4.510036274443852e-05,
"loss": 1.3861,
"step": 170500
},
{
"epoch": 0.69,
"learning_rate": 4.508598865827105e-05,
"loss": 1.3518,
"step": 171000
},
{
"epoch": 0.69,
"learning_rate": 4.5071614572103585e-05,
"loss": 1.3967,
"step": 171500
},
{
"epoch": 0.69,
"learning_rate": 4.505726923410844e-05,
"loss": 1.3642,
"step": 172000
},
{
"epoch": 0.69,
"learning_rate": 4.504292389611331e-05,
"loss": 1.353,
"step": 172500
},
{
"epoch": 0.7,
"learning_rate": 4.502854980994584e-05,
"loss": 1.3855,
"step": 173000
},
{
"epoch": 0.7,
"learning_rate": 4.501417572377836e-05,
"loss": 1.3736,
"step": 173500
},
{
"epoch": 0.7,
"learning_rate": 4.499980163761089e-05,
"loss": 1.3663,
"step": 174000
},
{
"epoch": 0.7,
"learning_rate": 4.498542755144342e-05,
"loss": 1.4056,
"step": 174500
},
{
"epoch": 0.7,
"learning_rate": 4.497105346527595e-05,
"loss": 1.3703,
"step": 175000
},
{
"epoch": 0.71,
"learning_rate": 4.495667937910847e-05,
"loss": 1.3636,
"step": 175500
},
{
"epoch": 0.71,
"learning_rate": 4.4942305292941004e-05,
"loss": 1.3887,
"step": 176000
},
{
"epoch": 0.71,
"learning_rate": 4.4927931206773535e-05,
"loss": 1.3568,
"step": 176500
},
{
"epoch": 0.71,
"learning_rate": 4.49135858687784e-05,
"loss": 1.4145,
"step": 177000
},
{
"epoch": 0.71,
"learning_rate": 4.4899211782610923e-05,
"loss": 1.3641,
"step": 177500
},
{
"epoch": 0.72,
"learning_rate": 4.488483769644345e-05,
"loss": 1.3802,
"step": 178000
},
{
"epoch": 0.72,
"learning_rate": 4.487046361027598e-05,
"loss": 1.3609,
"step": 178500
},
{
"epoch": 0.72,
"learning_rate": 4.4856118272280836e-05,
"loss": 1.3743,
"step": 179000
},
{
"epoch": 0.72,
"learning_rate": 4.48417729342857e-05,
"loss": 1.3967,
"step": 179500
},
{
"epoch": 0.72,
"learning_rate": 4.482739884811823e-05,
"loss": 1.3887,
"step": 180000
},
{
"epoch": 0.73,
"learning_rate": 4.481302476195076e-05,
"loss": 1.3803,
"step": 180500
},
{
"epoch": 0.73,
"learning_rate": 4.479865067578329e-05,
"loss": 1.3843,
"step": 181000
},
{
"epoch": 0.73,
"learning_rate": 4.478427658961582e-05,
"loss": 1.368,
"step": 181500
},
{
"epoch": 0.73,
"learning_rate": 4.4769931251620676e-05,
"loss": 1.4185,
"step": 182000
},
{
"epoch": 0.73,
"learning_rate": 4.4755557165453214e-05,
"loss": 1.3608,
"step": 182500
},
{
"epoch": 0.74,
"learning_rate": 4.474118307928574e-05,
"loss": 1.3724,
"step": 183000
},
{
"epoch": 0.74,
"learning_rate": 4.472680899311826e-05,
"loss": 1.3864,
"step": 183500
},
{
"epoch": 0.74,
"learning_rate": 4.471243490695079e-05,
"loss": 1.3729,
"step": 184000
},
{
"epoch": 0.74,
"learning_rate": 4.4698060820783324e-05,
"loss": 1.3833,
"step": 184500
},
{
"epoch": 0.74,
"learning_rate": 4.468368673461585e-05,
"loss": 1.3593,
"step": 185000
},
{
"epoch": 0.75,
"learning_rate": 4.466931264844837e-05,
"loss": 1.3443,
"step": 185500
},
{
"epoch": 0.75,
"learning_rate": 4.465496731045324e-05,
"loss": 1.3494,
"step": 186000
},
{
"epoch": 0.75,
"learning_rate": 4.464059322428577e-05,
"loss": 1.3314,
"step": 186500
},
{
"epoch": 0.75,
"learning_rate": 4.46262191381183e-05,
"loss": 1.3666,
"step": 187000
},
{
"epoch": 0.75,
"learning_rate": 4.461184505195082e-05,
"loss": 1.4079,
"step": 187500
},
{
"epoch": 0.76,
"learning_rate": 4.459749971395569e-05,
"loss": 1.4069,
"step": 188000
},
{
"epoch": 0.76,
"learning_rate": 4.458312562778821e-05,
"loss": 1.3429,
"step": 188500
},
{
"epoch": 0.76,
"learning_rate": 4.4568780289793076e-05,
"loss": 1.365,
"step": 189000
},
{
"epoch": 0.76,
"learning_rate": 4.455440620362561e-05,
"loss": 1.4167,
"step": 189500
},
{
"epoch": 0.76,
"learning_rate": 4.454003211745814e-05,
"loss": 1.3715,
"step": 190000
},
{
"epoch": 0.77,
"learning_rate": 4.452565803129066e-05,
"loss": 1.3698,
"step": 190500
},
{
"epoch": 0.77,
"learning_rate": 4.4511283945123186e-05,
"loss": 1.3936,
"step": 191000
},
{
"epoch": 0.77,
"learning_rate": 4.4496909858955724e-05,
"loss": 1.3526,
"step": 191500
},
{
"epoch": 0.77,
"learning_rate": 4.448253577278825e-05,
"loss": 1.3401,
"step": 192000
},
{
"epoch": 0.77,
"learning_rate": 4.446819043479311e-05,
"loss": 1.4083,
"step": 192500
},
{
"epoch": 0.78,
"learning_rate": 4.445381634862564e-05,
"loss": 1.3706,
"step": 193000
},
{
"epoch": 0.78,
"learning_rate": 4.443944226245816e-05,
"loss": 1.3702,
"step": 193500
},
{
"epoch": 0.78,
"learning_rate": 4.44250681762907e-05,
"loss": 1.3915,
"step": 194000
},
{
"epoch": 0.78,
"learning_rate": 4.441069409012322e-05,
"loss": 1.3347,
"step": 194500
},
{
"epoch": 0.78,
"learning_rate": 4.439632000395575e-05,
"loss": 1.4214,
"step": 195000
},
{
"epoch": 0.79,
"learning_rate": 4.438197466596061e-05,
"loss": 1.3836,
"step": 195500
},
{
"epoch": 0.79,
"learning_rate": 4.4367600579793136e-05,
"loss": 1.3125,
"step": 196000
},
{
"epoch": 0.79,
"learning_rate": 4.4353226493625674e-05,
"loss": 1.3719,
"step": 196500
},
{
"epoch": 0.79,
"learning_rate": 4.43388524074582e-05,
"loss": 1.388,
"step": 197000
},
{
"epoch": 0.79,
"learning_rate": 4.432447832129072e-05,
"loss": 1.3519,
"step": 197500
},
{
"epoch": 0.8,
"learning_rate": 4.431010423512325e-05,
"loss": 1.352,
"step": 198000
},
{
"epoch": 0.8,
"learning_rate": 4.4295730148955784e-05,
"loss": 1.3897,
"step": 198500
},
{
"epoch": 0.8,
"learning_rate": 4.428135606278831e-05,
"loss": 1.3947,
"step": 199000
},
{
"epoch": 0.8,
"learning_rate": 4.426701072479317e-05,
"loss": 1.4083,
"step": 199500
},
{
"epoch": 0.8,
"learning_rate": 4.42526366386257e-05,
"loss": 1.3361,
"step": 200000
},
{
"epoch": 0.81,
"learning_rate": 4.4238262552458235e-05,
"loss": 1.3415,
"step": 200500
},
{
"epoch": 0.81,
"learning_rate": 4.422388846629076e-05,
"loss": 1.4054,
"step": 201000
},
{
"epoch": 0.81,
"learning_rate": 4.420951438012328e-05,
"loss": 1.3871,
"step": 201500
},
{
"epoch": 0.81,
"learning_rate": 4.419516904212815e-05,
"loss": 1.3916,
"step": 202000
},
{
"epoch": 0.82,
"learning_rate": 4.418079495596067e-05,
"loss": 1.3449,
"step": 202500
},
{
"epoch": 0.82,
"learning_rate": 4.416642086979321e-05,
"loss": 1.4411,
"step": 203000
},
{
"epoch": 0.82,
"learning_rate": 4.4152046783625734e-05,
"loss": 1.3792,
"step": 203500
},
{
"epoch": 0.82,
"learning_rate": 4.41377014456306e-05,
"loss": 1.3843,
"step": 204000
},
{
"epoch": 0.82,
"learning_rate": 4.412332735946312e-05,
"loss": 1.3514,
"step": 204500
},
{
"epoch": 0.83,
"learning_rate": 4.410895327329565e-05,
"loss": 1.3531,
"step": 205000
},
{
"epoch": 0.83,
"learning_rate": 4.4094579187128184e-05,
"loss": 1.3575,
"step": 205500
},
{
"epoch": 0.83,
"learning_rate": 4.408023384913305e-05,
"loss": 1.3671,
"step": 206000
},
{
"epoch": 0.83,
"learning_rate": 4.406585976296557e-05,
"loss": 1.3818,
"step": 206500
},
{
"epoch": 0.83,
"learning_rate": 4.40514856767981e-05,
"loss": 1.3625,
"step": 207000
},
{
"epoch": 0.84,
"learning_rate": 4.403711159063063e-05,
"loss": 1.2734,
"step": 207500
},
{
"epoch": 0.84,
"learning_rate": 4.402273750446316e-05,
"loss": 1.4119,
"step": 208000
},
{
"epoch": 0.84,
"learning_rate": 4.4008392166468024e-05,
"loss": 1.3444,
"step": 208500
},
{
"epoch": 0.84,
"learning_rate": 4.399401808030055e-05,
"loss": 1.3591,
"step": 209000
},
{
"epoch": 0.84,
"learning_rate": 4.397964399413307e-05,
"loss": 1.3544,
"step": 209500
},
{
"epoch": 0.85,
"learning_rate": 4.39652699079656e-05,
"loss": 1.3843,
"step": 210000
},
{
"epoch": 0.85,
"learning_rate": 4.3950895821798134e-05,
"loss": 1.361,
"step": 210500
},
{
"epoch": 0.85,
"learning_rate": 4.393652173563066e-05,
"loss": 1.3929,
"step": 211000
},
{
"epoch": 0.85,
"learning_rate": 4.392217639763552e-05,
"loss": 1.3484,
"step": 211500
},
{
"epoch": 0.85,
"learning_rate": 4.390780231146805e-05,
"loss": 1.3612,
"step": 212000
},
{
"epoch": 0.86,
"learning_rate": 4.389342822530058e-05,
"loss": 1.3857,
"step": 212500
},
{
"epoch": 0.86,
"learning_rate": 4.387905413913311e-05,
"loss": 1.3092,
"step": 213000
},
{
"epoch": 0.86,
"learning_rate": 4.386468005296563e-05,
"loss": 1.3349,
"step": 213500
},
{
"epoch": 0.86,
"learning_rate": 4.38503347149705e-05,
"loss": 1.3484,
"step": 214000
},
{
"epoch": 0.86,
"learning_rate": 4.383596062880302e-05,
"loss": 1.3718,
"step": 214500
},
{
"epoch": 0.87,
"learning_rate": 4.3821615290807887e-05,
"loss": 1.371,
"step": 215000
},
{
"epoch": 0.87,
"learning_rate": 4.380724120464042e-05,
"loss": 1.3532,
"step": 215500
},
{
"epoch": 0.87,
"learning_rate": 4.379286711847295e-05,
"loss": 1.3346,
"step": 216000
},
{
"epoch": 0.87,
"learning_rate": 4.377849303230547e-05,
"loss": 1.4022,
"step": 216500
},
{
"epoch": 0.87,
"learning_rate": 4.3764118946138e-05,
"loss": 1.2966,
"step": 217000
},
{
"epoch": 0.88,
"learning_rate": 4.3749744859970535e-05,
"loss": 1.3618,
"step": 217500
},
{
"epoch": 0.88,
"learning_rate": 4.373537077380306e-05,
"loss": 1.3389,
"step": 218000
},
{
"epoch": 0.88,
"learning_rate": 4.372099668763558e-05,
"loss": 1.4182,
"step": 218500
},
{
"epoch": 0.88,
"learning_rate": 4.370665134964045e-05,
"loss": 1.3107,
"step": 219000
},
{
"epoch": 0.88,
"learning_rate": 4.369230601164531e-05,
"loss": 1.3784,
"step": 219500
},
{
"epoch": 0.89,
"learning_rate": 4.3677931925477836e-05,
"loss": 1.3293,
"step": 220000
},
{
"epoch": 0.89,
"learning_rate": 4.3663557839310374e-05,
"loss": 1.3381,
"step": 220500
},
{
"epoch": 0.89,
"learning_rate": 4.36491837531429e-05,
"loss": 1.3526,
"step": 221000
},
{
"epoch": 0.89,
"learning_rate": 4.363483841514776e-05,
"loss": 1.3405,
"step": 221500
},
{
"epoch": 0.89,
"learning_rate": 4.362046432898029e-05,
"loss": 1.3746,
"step": 222000
},
{
"epoch": 0.9,
"learning_rate": 4.360609024281281e-05,
"loss": 1.3648,
"step": 222500
},
{
"epoch": 0.9,
"learning_rate": 4.359171615664535e-05,
"loss": 1.413,
"step": 223000
},
{
"epoch": 0.9,
"learning_rate": 4.357734207047787e-05,
"loss": 1.3731,
"step": 223500
},
{
"epoch": 0.9,
"learning_rate": 4.356299673248274e-05,
"loss": 1.3905,
"step": 224000
},
{
"epoch": 0.9,
"learning_rate": 4.354862264631526e-05,
"loss": 1.3992,
"step": 224500
},
{
"epoch": 0.91,
"learning_rate": 4.3534248560147786e-05,
"loss": 1.3332,
"step": 225000
},
{
"epoch": 0.91,
"learning_rate": 4.3519874473980324e-05,
"loss": 1.3735,
"step": 225500
},
{
"epoch": 0.91,
"learning_rate": 4.350550038781285e-05,
"loss": 1.3732,
"step": 226000
},
{
"epoch": 0.91,
"learning_rate": 4.349112630164537e-05,
"loss": 1.3131,
"step": 226500
},
{
"epoch": 0.91,
"learning_rate": 4.3476780963650237e-05,
"loss": 1.3553,
"step": 227000
},
{
"epoch": 0.92,
"learning_rate": 4.346240687748277e-05,
"loss": 1.3415,
"step": 227500
},
{
"epoch": 0.92,
"learning_rate": 4.34480327913153e-05,
"loss": 1.4036,
"step": 228000
},
{
"epoch": 0.92,
"learning_rate": 4.343365870514782e-05,
"loss": 1.3283,
"step": 228500
},
{
"epoch": 0.92,
"learning_rate": 4.341928461898035e-05,
"loss": 1.368,
"step": 229000
},
{
"epoch": 0.92,
"learning_rate": 4.340491053281288e-05,
"loss": 1.3641,
"step": 229500
},
{
"epoch": 0.93,
"learning_rate": 4.339053644664541e-05,
"loss": 1.3759,
"step": 230000
},
{
"epoch": 0.93,
"learning_rate": 4.337616236047793e-05,
"loss": 1.3909,
"step": 230500
},
{
"epoch": 0.93,
"learning_rate": 4.3361788274310464e-05,
"loss": 1.337,
"step": 231000
},
{
"epoch": 0.93,
"learning_rate": 4.334744293631532e-05,
"loss": 1.3323,
"step": 231500
},
{
"epoch": 0.93,
"learning_rate": 4.333306885014786e-05,
"loss": 1.3528,
"step": 232000
},
{
"epoch": 0.94,
"learning_rate": 4.3318694763980384e-05,
"loss": 1.3617,
"step": 232500
},
{
"epoch": 0.94,
"learning_rate": 4.330434942598525e-05,
"loss": 1.353,
"step": 233000
},
{
"epoch": 0.94,
"learning_rate": 4.328997533981777e-05,
"loss": 1.3511,
"step": 233500
},
{
"epoch": 0.94,
"learning_rate": 4.3275601253650297e-05,
"loss": 1.3907,
"step": 234000
},
{
"epoch": 0.94,
"learning_rate": 4.3261227167482834e-05,
"loss": 1.3828,
"step": 234500
},
{
"epoch": 0.95,
"learning_rate": 4.324685308131536e-05,
"loss": 1.3867,
"step": 235000
},
{
"epoch": 0.95,
"learning_rate": 4.323250774332022e-05,
"loss": 1.4029,
"step": 235500
},
{
"epoch": 0.95,
"learning_rate": 4.321813365715275e-05,
"loss": 1.336,
"step": 236000
},
{
"epoch": 0.95,
"learning_rate": 4.320375957098528e-05,
"loss": 1.3467,
"step": 236500
},
{
"epoch": 0.95,
"learning_rate": 4.318938548481781e-05,
"loss": 1.3369,
"step": 237000
},
{
"epoch": 0.96,
"learning_rate": 4.317501139865033e-05,
"loss": 1.3248,
"step": 237500
},
{
"epoch": 0.96,
"learning_rate": 4.316063731248286e-05,
"loss": 1.3209,
"step": 238000
},
{
"epoch": 0.96,
"learning_rate": 4.314626322631539e-05,
"loss": 1.3802,
"step": 238500
},
{
"epoch": 0.96,
"learning_rate": 4.313188914014792e-05,
"loss": 1.3491,
"step": 239000
},
{
"epoch": 0.96,
"learning_rate": 4.3117543802152784e-05,
"loss": 1.3705,
"step": 239500
},
{
"epoch": 0.97,
"learning_rate": 4.310319846415765e-05,
"loss": 1.3495,
"step": 240000
},
{
"epoch": 0.97,
"learning_rate": 4.308882437799017e-05,
"loss": 1.3667,
"step": 240500
},
{
"epoch": 0.97,
"learning_rate": 4.30744502918227e-05,
"loss": 1.3464,
"step": 241000
},
{
"epoch": 0.97,
"learning_rate": 4.306007620565523e-05,
"loss": 1.3318,
"step": 241500
},
{
"epoch": 0.97,
"learning_rate": 4.304570211948776e-05,
"loss": 1.3779,
"step": 242000
},
{
"epoch": 0.98,
"learning_rate": 4.303132803332028e-05,
"loss": 1.3211,
"step": 242500
},
{
"epoch": 0.98,
"learning_rate": 4.301695394715281e-05,
"loss": 1.3281,
"step": 243000
},
{
"epoch": 0.98,
"learning_rate": 4.3002579860985345e-05,
"loss": 1.4052,
"step": 243500
},
{
"epoch": 0.98,
"learning_rate": 4.298820577481787e-05,
"loss": 1.357,
"step": 244000
},
{
"epoch": 0.98,
"learning_rate": 4.297383168865039e-05,
"loss": 1.3943,
"step": 244500
},
{
"epoch": 0.99,
"learning_rate": 4.295948635065526e-05,
"loss": 1.338,
"step": 245000
},
{
"epoch": 0.99,
"learning_rate": 4.294511226448779e-05,
"loss": 1.3568,
"step": 245500
},
{
"epoch": 0.99,
"learning_rate": 4.2930766926492647e-05,
"loss": 1.3254,
"step": 246000
},
{
"epoch": 0.99,
"learning_rate": 4.291639284032518e-05,
"loss": 1.3355,
"step": 246500
},
{
"epoch": 0.99,
"learning_rate": 4.290201875415771e-05,
"loss": 1.3979,
"step": 247000
},
{
"epoch": 1.0,
"learning_rate": 4.288764466799023e-05,
"loss": 1.3396,
"step": 247500
},
{
"epoch": 1.0,
"learning_rate": 4.2873270581822764e-05,
"loss": 1.3746,
"step": 248000
},
{
"epoch": 1.0,
"eval_cer": 0.2530746846497492,
"eval_loss": 1.026394009590149,
"eval_runtime": 10837.1839,
"eval_samples_per_second": 9.02,
"eval_steps_per_second": 1.128,
"step": 248463
},
{
"epoch": 1.0,
"learning_rate": 4.2858896495655295e-05,
"loss": 1.3325,
"step": 248500
},
{
"epoch": 1.0,
"learning_rate": 4.284452240948782e-05,
"loss": 1.3003,
"step": 249000
},
{
"epoch": 1.0,
"learning_rate": 4.283014832332034e-05,
"loss": 1.3294,
"step": 249500
},
{
"epoch": 1.01,
"learning_rate": 4.2815774237152874e-05,
"loss": 1.3038,
"step": 250000
},
{
"epoch": 1.01,
"learning_rate": 4.280142889915774e-05,
"loss": 1.3098,
"step": 250500
},
{
"epoch": 1.01,
"learning_rate": 4.278705481299027e-05,
"loss": 1.2883,
"step": 251000
},
{
"epoch": 1.01,
"learning_rate": 4.2772709474995134e-05,
"loss": 1.3391,
"step": 251500
},
{
"epoch": 1.01,
"learning_rate": 4.275833538882766e-05,
"loss": 1.342,
"step": 252000
},
{
"epoch": 1.02,
"learning_rate": 4.274396130266018e-05,
"loss": 1.2917,
"step": 252500
},
{
"epoch": 1.02,
"learning_rate": 4.272958721649271e-05,
"loss": 1.3136,
"step": 253000
},
{
"epoch": 1.02,
"learning_rate": 4.2715213130325244e-05,
"loss": 1.2974,
"step": 253500
},
{
"epoch": 1.02,
"learning_rate": 4.270083904415777e-05,
"loss": 1.2851,
"step": 254000
},
{
"epoch": 1.02,
"learning_rate": 4.26864649579903e-05,
"loss": 1.2778,
"step": 254500
},
{
"epoch": 1.03,
"learning_rate": 4.267209087182283e-05,
"loss": 1.3019,
"step": 255000
},
{
"epoch": 1.03,
"learning_rate": 4.265774553382769e-05,
"loss": 1.2946,
"step": 255500
},
{
"epoch": 1.03,
"learning_rate": 4.264337144766022e-05,
"loss": 1.3452,
"step": 256000
},
{
"epoch": 1.03,
"learning_rate": 4.262899736149274e-05,
"loss": 1.3797,
"step": 256500
},
{
"epoch": 1.03,
"learning_rate": 4.2614623275325274e-05,
"loss": 1.3143,
"step": 257000
},
{
"epoch": 1.04,
"learning_rate": 4.260027793733013e-05,
"loss": 1.2851,
"step": 257500
},
{
"epoch": 1.04,
"learning_rate": 4.258590385116266e-05,
"loss": 1.3339,
"step": 258000
},
{
"epoch": 1.04,
"learning_rate": 4.2571529764995194e-05,
"loss": 1.327,
"step": 258500
},
{
"epoch": 1.04,
"learning_rate": 4.255718442700006e-05,
"loss": 1.3036,
"step": 259000
},
{
"epoch": 1.04,
"learning_rate": 4.254281034083258e-05,
"loss": 1.2892,
"step": 259500
},
{
"epoch": 1.05,
"learning_rate": 4.2528436254665114e-05,
"loss": 1.3411,
"step": 260000
},
{
"epoch": 1.05,
"learning_rate": 4.2514062168497645e-05,
"loss": 1.3209,
"step": 260500
},
{
"epoch": 1.05,
"learning_rate": 4.24997168305025e-05,
"loss": 1.321,
"step": 261000
},
{
"epoch": 1.05,
"learning_rate": 4.248534274433503e-05,
"loss": 1.3048,
"step": 261500
},
{
"epoch": 1.05,
"learning_rate": 4.247096865816756e-05,
"loss": 1.2998,
"step": 262000
},
{
"epoch": 1.06,
"learning_rate": 4.245659457200009e-05,
"loss": 1.3151,
"step": 262500
},
{
"epoch": 1.06,
"learning_rate": 4.244222048583262e-05,
"loss": 1.3393,
"step": 263000
},
{
"epoch": 1.06,
"learning_rate": 4.2427846399665144e-05,
"loss": 1.3427,
"step": 263500
},
{
"epoch": 1.06,
"learning_rate": 4.241347231349767e-05,
"loss": 1.3462,
"step": 264000
},
{
"epoch": 1.06,
"learning_rate": 4.23990982273302e-05,
"loss": 1.2871,
"step": 264500
},
{
"epoch": 1.07,
"learning_rate": 4.238472414116273e-05,
"loss": 1.3217,
"step": 265000
},
{
"epoch": 1.07,
"learning_rate": 4.2370378803167594e-05,
"loss": 1.3224,
"step": 265500
},
{
"epoch": 1.07,
"learning_rate": 4.235600471700012e-05,
"loss": 1.3642,
"step": 266000
},
{
"epoch": 1.07,
"learning_rate": 4.234163063083264e-05,
"loss": 1.2768,
"step": 266500
},
{
"epoch": 1.07,
"learning_rate": 4.2327256544665174e-05,
"loss": 1.3201,
"step": 267000
},
{
"epoch": 1.08,
"learning_rate": 4.231291120667004e-05,
"loss": 1.3396,
"step": 267500
},
{
"epoch": 1.08,
"learning_rate": 4.22985658686749e-05,
"loss": 1.3086,
"step": 268000
},
{
"epoch": 1.08,
"learning_rate": 4.2284191782507434e-05,
"loss": 1.3079,
"step": 268500
},
{
"epoch": 1.08,
"learning_rate": 4.226981769633996e-05,
"loss": 1.3209,
"step": 269000
},
{
"epoch": 1.08,
"learning_rate": 4.225544361017248e-05,
"loss": 1.3255,
"step": 269500
},
{
"epoch": 1.09,
"learning_rate": 4.224106952400501e-05,
"loss": 1.3124,
"step": 270000
},
{
"epoch": 1.09,
"learning_rate": 4.2226695437837544e-05,
"loss": 1.3312,
"step": 270500
},
{
"epoch": 1.09,
"learning_rate": 4.221232135167007e-05,
"loss": 1.3202,
"step": 271000
},
{
"epoch": 1.09,
"learning_rate": 4.21979472655026e-05,
"loss": 1.3048,
"step": 271500
},
{
"epoch": 1.09,
"learning_rate": 4.218357317933513e-05,
"loss": 1.2913,
"step": 272000
},
{
"epoch": 1.1,
"learning_rate": 4.216922784133999e-05,
"loss": 1.3403,
"step": 272500
},
{
"epoch": 1.1,
"learning_rate": 4.215485375517252e-05,
"loss": 1.3155,
"step": 273000
},
{
"epoch": 1.1,
"learning_rate": 4.214047966900504e-05,
"loss": 1.3483,
"step": 273500
},
{
"epoch": 1.1,
"learning_rate": 4.2126105582837574e-05,
"loss": 1.2946,
"step": 274000
},
{
"epoch": 1.1,
"learning_rate": 4.211176024484244e-05,
"loss": 1.3293,
"step": 274500
},
{
"epoch": 1.11,
"learning_rate": 4.209738615867496e-05,
"loss": 1.3216,
"step": 275000
},
{
"epoch": 1.11,
"learning_rate": 4.2083012072507494e-05,
"loss": 1.3089,
"step": 275500
},
{
"epoch": 1.11,
"learning_rate": 4.206863798634002e-05,
"loss": 1.2769,
"step": 276000
},
{
"epoch": 1.11,
"learning_rate": 4.205429264834488e-05,
"loss": 1.3592,
"step": 276500
},
{
"epoch": 1.11,
"learning_rate": 4.203994731034975e-05,
"loss": 1.3546,
"step": 277000
},
{
"epoch": 1.12,
"learning_rate": 4.202557322418227e-05,
"loss": 1.2827,
"step": 277500
},
{
"epoch": 1.12,
"learning_rate": 4.20111991380148e-05,
"loss": 1.3685,
"step": 278000
},
{
"epoch": 1.12,
"learning_rate": 4.199682505184733e-05,
"loss": 1.3143,
"step": 278500
},
{
"epoch": 1.12,
"learning_rate": 4.198245096567986e-05,
"loss": 1.3236,
"step": 279000
},
{
"epoch": 1.12,
"learning_rate": 4.196810562768472e-05,
"loss": 1.3198,
"step": 279500
},
{
"epoch": 1.13,
"learning_rate": 4.1953731541517246e-05,
"loss": 1.298,
"step": 280000
},
{
"epoch": 1.13,
"learning_rate": 4.193935745534978e-05,
"loss": 1.2766,
"step": 280500
},
{
"epoch": 1.13,
"learning_rate": 4.192498336918231e-05,
"loss": 1.2751,
"step": 281000
},
{
"epoch": 1.13,
"learning_rate": 4.191060928301483e-05,
"loss": 1.3115,
"step": 281500
},
{
"epoch": 1.13,
"learning_rate": 4.189629269319203e-05,
"loss": 1.3157,
"step": 282000
},
{
"epoch": 1.14,
"learning_rate": 4.188191860702456e-05,
"loss": 1.3184,
"step": 282500
},
{
"epoch": 1.14,
"learning_rate": 4.1867544520857085e-05,
"loss": 1.3392,
"step": 283000
},
{
"epoch": 1.14,
"learning_rate": 4.1853170434689616e-05,
"loss": 1.3014,
"step": 283500
},
{
"epoch": 1.14,
"learning_rate": 4.183879634852215e-05,
"loss": 1.3342,
"step": 284000
},
{
"epoch": 1.15,
"learning_rate": 4.182442226235467e-05,
"loss": 1.2772,
"step": 284500
},
{
"epoch": 1.15,
"learning_rate": 4.18100481761872e-05,
"loss": 1.3493,
"step": 285000
},
{
"epoch": 1.15,
"learning_rate": 4.179567409001973e-05,
"loss": 1.3381,
"step": 285500
},
{
"epoch": 1.15,
"learning_rate": 4.178130000385226e-05,
"loss": 1.3231,
"step": 286000
},
{
"epoch": 1.15,
"learning_rate": 4.176695466585712e-05,
"loss": 1.3612,
"step": 286500
},
{
"epoch": 1.16,
"learning_rate": 4.1752580579689646e-05,
"loss": 1.3584,
"step": 287000
},
{
"epoch": 1.16,
"learning_rate": 4.173820649352218e-05,
"loss": 1.3164,
"step": 287500
},
{
"epoch": 1.16,
"learning_rate": 4.172383240735471e-05,
"loss": 1.3132,
"step": 288000
},
{
"epoch": 1.16,
"learning_rate": 4.170945832118723e-05,
"loss": 1.3136,
"step": 288500
},
{
"epoch": 1.16,
"learning_rate": 4.1695084235019763e-05,
"loss": 1.3275,
"step": 289000
},
{
"epoch": 1.17,
"learning_rate": 4.168076764519696e-05,
"loss": 1.3111,
"step": 289500
},
{
"epoch": 1.17,
"learning_rate": 4.1666393559029486e-05,
"loss": 1.3326,
"step": 290000
},
{
"epoch": 1.17,
"learning_rate": 4.165201947286202e-05,
"loss": 1.3284,
"step": 290500
},
{
"epoch": 1.17,
"learning_rate": 4.163764538669454e-05,
"loss": 1.3265,
"step": 291000
},
{
"epoch": 1.17,
"learning_rate": 4.162327130052707e-05,
"loss": 1.3103,
"step": 291500
},
{
"epoch": 1.18,
"learning_rate": 4.1608897214359596e-05,
"loss": 1.2961,
"step": 292000
},
{
"epoch": 1.18,
"learning_rate": 4.159452312819213e-05,
"loss": 1.2697,
"step": 292500
},
{
"epoch": 1.18,
"learning_rate": 4.158014904202466e-05,
"loss": 1.3562,
"step": 293000
},
{
"epoch": 1.18,
"learning_rate": 4.156577495585718e-05,
"loss": 1.3253,
"step": 293500
},
{
"epoch": 1.18,
"learning_rate": 4.155142961786205e-05,
"loss": 1.3282,
"step": 294000
},
{
"epoch": 1.19,
"learning_rate": 4.153705553169457e-05,
"loss": 1.3074,
"step": 294500
},
{
"epoch": 1.19,
"learning_rate": 4.15226814455271e-05,
"loss": 1.2868,
"step": 295000
},
{
"epoch": 1.19,
"learning_rate": 4.150830735935963e-05,
"loss": 1.3628,
"step": 295500
},
{
"epoch": 1.19,
"learning_rate": 4.149393327319216e-05,
"loss": 1.3665,
"step": 296000
},
{
"epoch": 1.19,
"learning_rate": 4.147955918702469e-05,
"loss": 1.2772,
"step": 296500
},
{
"epoch": 1.2,
"learning_rate": 4.146518510085721e-05,
"loss": 1.3005,
"step": 297000
},
{
"epoch": 1.2,
"learning_rate": 4.145081101468974e-05,
"loss": 1.3234,
"step": 297500
},
{
"epoch": 1.2,
"learning_rate": 4.1436436928522274e-05,
"loss": 1.3169,
"step": 298000
},
{
"epoch": 1.2,
"learning_rate": 4.142209159052713e-05,
"loss": 1.3383,
"step": 298500
},
{
"epoch": 1.2,
"learning_rate": 4.140771750435966e-05,
"loss": 1.3168,
"step": 299000
},
{
"epoch": 1.21,
"learning_rate": 4.1393343418192194e-05,
"loss": 1.3092,
"step": 299500
},
{
"epoch": 1.21,
"learning_rate": 4.137896933202472e-05,
"loss": 1.2855,
"step": 300000
},
{
"epoch": 1.21,
"learning_rate": 4.136462399402958e-05,
"loss": 1.3017,
"step": 300500
},
{
"epoch": 1.21,
"learning_rate": 4.135024990786211e-05,
"loss": 1.3185,
"step": 301000
},
{
"epoch": 1.21,
"learning_rate": 4.133587582169464e-05,
"loss": 1.3136,
"step": 301500
},
{
"epoch": 1.22,
"learning_rate": 4.132150173552717e-05,
"loss": 1.302,
"step": 302000
},
{
"epoch": 1.22,
"learning_rate": 4.130712764935969e-05,
"loss": 1.3137,
"step": 302500
},
{
"epoch": 1.22,
"learning_rate": 4.1292753563192224e-05,
"loss": 1.3458,
"step": 303000
},
{
"epoch": 1.22,
"learning_rate": 4.127840822519708e-05,
"loss": 1.3394,
"step": 303500
},
{
"epoch": 1.22,
"learning_rate": 4.126403413902961e-05,
"loss": 1.3058,
"step": 304000
},
{
"epoch": 1.23,
"learning_rate": 4.1249660052862143e-05,
"loss": 1.2944,
"step": 304500
},
{
"epoch": 1.23,
"learning_rate": 4.123528596669467e-05,
"loss": 1.2933,
"step": 305000
},
{
"epoch": 1.23,
"learning_rate": 4.122094062869953e-05,
"loss": 1.2685,
"step": 305500
},
{
"epoch": 1.23,
"learning_rate": 4.120656654253206e-05,
"loss": 1.3184,
"step": 306000
},
{
"epoch": 1.23,
"learning_rate": 4.119222120453692e-05,
"loss": 1.3046,
"step": 306500
},
{
"epoch": 1.24,
"learning_rate": 4.117784711836945e-05,
"loss": 1.2728,
"step": 307000
},
{
"epoch": 1.24,
"learning_rate": 4.116347303220198e-05,
"loss": 1.3227,
"step": 307500
},
{
"epoch": 1.24,
"learning_rate": 4.114909894603451e-05,
"loss": 1.3138,
"step": 308000
},
{
"epoch": 1.24,
"learning_rate": 4.113472485986704e-05,
"loss": 1.3364,
"step": 308500
},
{
"epoch": 1.24,
"learning_rate": 4.112035077369956e-05,
"loss": 1.3093,
"step": 309000
},
{
"epoch": 1.25,
"learning_rate": 4.110597668753209e-05,
"loss": 1.2981,
"step": 309500
},
{
"epoch": 1.25,
"learning_rate": 4.109160260136462e-05,
"loss": 1.3095,
"step": 310000
},
{
"epoch": 1.25,
"learning_rate": 4.107722851519715e-05,
"loss": 1.3385,
"step": 310500
},
{
"epoch": 1.25,
"learning_rate": 4.1062911925374346e-05,
"loss": 1.2851,
"step": 311000
},
{
"epoch": 1.25,
"learning_rate": 4.104853783920688e-05,
"loss": 1.3389,
"step": 311500
},
{
"epoch": 1.26,
"learning_rate": 4.10341637530394e-05,
"loss": 1.321,
"step": 312000
},
{
"epoch": 1.26,
"learning_rate": 4.101978966687193e-05,
"loss": 1.3117,
"step": 312500
},
{
"epoch": 1.26,
"learning_rate": 4.100541558070446e-05,
"loss": 1.3384,
"step": 313000
},
{
"epoch": 1.26,
"learning_rate": 4.099104149453699e-05,
"loss": 1.3208,
"step": 313500
},
{
"epoch": 1.26,
"learning_rate": 4.097666740836951e-05,
"loss": 1.2958,
"step": 314000
},
{
"epoch": 1.27,
"learning_rate": 4.0962322070374376e-05,
"loss": 1.3443,
"step": 314500
},
{
"epoch": 1.27,
"learning_rate": 4.094794798420691e-05,
"loss": 1.3363,
"step": 315000
},
{
"epoch": 1.27,
"learning_rate": 4.093357389803943e-05,
"loss": 1.2784,
"step": 315500
},
{
"epoch": 1.27,
"learning_rate": 4.091919981187196e-05,
"loss": 1.3149,
"step": 316000
},
{
"epoch": 1.27,
"learning_rate": 4.090482572570449e-05,
"loss": 1.3311,
"step": 316500
},
{
"epoch": 1.28,
"learning_rate": 4.089045163953702e-05,
"loss": 1.3024,
"step": 317000
},
{
"epoch": 1.28,
"learning_rate": 4.087607755336955e-05,
"loss": 1.2816,
"step": 317500
},
{
"epoch": 1.28,
"learning_rate": 4.086170346720207e-05,
"loss": 1.3256,
"step": 318000
},
{
"epoch": 1.28,
"learning_rate": 4.084735812920694e-05,
"loss": 1.3071,
"step": 318500
},
{
"epoch": 1.28,
"learning_rate": 4.083298404303947e-05,
"loss": 1.3082,
"step": 319000
},
{
"epoch": 1.29,
"learning_rate": 4.081860995687199e-05,
"loss": 1.325,
"step": 319500
},
{
"epoch": 1.29,
"learning_rate": 4.0804235870704524e-05,
"loss": 1.3001,
"step": 320000
},
{
"epoch": 1.29,
"learning_rate": 4.078989053270939e-05,
"loss": 1.2427,
"step": 320500
},
{
"epoch": 1.29,
"learning_rate": 4.077551644654191e-05,
"loss": 1.3231,
"step": 321000
},
{
"epoch": 1.29,
"learning_rate": 4.076114236037444e-05,
"loss": 1.2805,
"step": 321500
},
{
"epoch": 1.3,
"learning_rate": 4.074676827420697e-05,
"loss": 1.303,
"step": 322000
},
{
"epoch": 1.3,
"learning_rate": 4.07323941880395e-05,
"loss": 1.3498,
"step": 322500
},
{
"epoch": 1.3,
"learning_rate": 4.071804885004436e-05,
"loss": 1.3334,
"step": 323000
},
{
"epoch": 1.3,
"learning_rate": 4.070367476387689e-05,
"loss": 1.3092,
"step": 323500
},
{
"epoch": 1.3,
"learning_rate": 4.068930067770942e-05,
"loss": 1.3639,
"step": 324000
},
{
"epoch": 1.31,
"learning_rate": 4.067492659154194e-05,
"loss": 1.2504,
"step": 324500
},
{
"epoch": 1.31,
"learning_rate": 4.066055250537447e-05,
"loss": 1.3246,
"step": 325000
},
{
"epoch": 1.31,
"learning_rate": 4.064620716737934e-05,
"loss": 1.3483,
"step": 325500
},
{
"epoch": 1.31,
"learning_rate": 4.063183308121186e-05,
"loss": 1.322,
"step": 326000
},
{
"epoch": 1.31,
"learning_rate": 4.061745899504439e-05,
"loss": 1.299,
"step": 326500
},
{
"epoch": 1.32,
"learning_rate": 4.0603084908876924e-05,
"loss": 1.269,
"step": 327000
},
{
"epoch": 1.32,
"learning_rate": 4.058873957088178e-05,
"loss": 1.3026,
"step": 327500
},
{
"epoch": 1.32,
"learning_rate": 4.057436548471431e-05,
"loss": 1.3286,
"step": 328000
},
{
"epoch": 1.32,
"learning_rate": 4.055999139854684e-05,
"loss": 1.3145,
"step": 328500
},
{
"epoch": 1.32,
"learning_rate": 4.054561731237937e-05,
"loss": 1.3127,
"step": 329000
},
{
"epoch": 1.33,
"learning_rate": 4.053127197438423e-05,
"loss": 1.3519,
"step": 329500
},
{
"epoch": 1.33,
"learning_rate": 4.0516897888216757e-05,
"loss": 1.3121,
"step": 330000
},
{
"epoch": 1.33,
"learning_rate": 4.050252380204929e-05,
"loss": 1.3452,
"step": 330500
},
{
"epoch": 1.33,
"learning_rate": 4.048817846405415e-05,
"loss": 1.2979,
"step": 331000
},
{
"epoch": 1.33,
"learning_rate": 4.0473804377886676e-05,
"loss": 1.348,
"step": 331500
},
{
"epoch": 1.34,
"learning_rate": 4.045943029171921e-05,
"loss": 1.337,
"step": 332000
},
{
"epoch": 1.34,
"learning_rate": 4.044505620555173e-05,
"loss": 1.2797,
"step": 332500
},
{
"epoch": 1.34,
"learning_rate": 4.043068211938426e-05,
"loss": 1.301,
"step": 333000
},
{
"epoch": 1.34,
"learning_rate": 4.0416308033216786e-05,
"loss": 1.3433,
"step": 333500
},
{
"epoch": 1.34,
"learning_rate": 4.040193394704932e-05,
"loss": 1.2581,
"step": 334000
},
{
"epoch": 1.35,
"learning_rate": 4.038755986088185e-05,
"loss": 1.3248,
"step": 334500
},
{
"epoch": 1.35,
"learning_rate": 4.037318577471437e-05,
"loss": 1.2734,
"step": 335000
},
{
"epoch": 1.35,
"learning_rate": 4.035884043671924e-05,
"loss": 1.3842,
"step": 335500
},
{
"epoch": 1.35,
"learning_rate": 4.034446635055177e-05,
"loss": 1.2941,
"step": 336000
},
{
"epoch": 1.35,
"learning_rate": 4.033009226438429e-05,
"loss": 1.3248,
"step": 336500
},
{
"epoch": 1.36,
"learning_rate": 4.031574692638916e-05,
"loss": 1.3258,
"step": 337000
},
{
"epoch": 1.36,
"learning_rate": 4.030137284022169e-05,
"loss": 1.3629,
"step": 337500
},
{
"epoch": 1.36,
"learning_rate": 4.028699875405421e-05,
"loss": 1.2897,
"step": 338000
},
{
"epoch": 1.36,
"learning_rate": 4.027262466788674e-05,
"loss": 1.33,
"step": 338500
},
{
"epoch": 1.36,
"learning_rate": 4.025825058171927e-05,
"loss": 1.3211,
"step": 339000
},
{
"epoch": 1.37,
"learning_rate": 4.024390524372413e-05,
"loss": 1.3184,
"step": 339500
},
{
"epoch": 1.37,
"learning_rate": 4.022953115755666e-05,
"loss": 1.3217,
"step": 340000
},
{
"epoch": 1.37,
"learning_rate": 4.021518581956153e-05,
"loss": 1.2926,
"step": 340500
},
{
"epoch": 1.37,
"learning_rate": 4.020081173339405e-05,
"loss": 1.304,
"step": 341000
},
{
"epoch": 1.37,
"learning_rate": 4.0186437647226576e-05,
"loss": 1.2539,
"step": 341500
},
{
"epoch": 1.38,
"learning_rate": 4.0172063561059107e-05,
"loss": 1.3232,
"step": 342000
},
{
"epoch": 1.38,
"learning_rate": 4.015768947489164e-05,
"loss": 1.3217,
"step": 342500
},
{
"epoch": 1.38,
"learning_rate": 4.014331538872416e-05,
"loss": 1.2563,
"step": 343000
},
{
"epoch": 1.38,
"learning_rate": 4.012894130255669e-05,
"loss": 1.2927,
"step": 343500
},
{
"epoch": 1.38,
"learning_rate": 4.0114567216389224e-05,
"loss": 1.2866,
"step": 344000
},
{
"epoch": 1.39,
"learning_rate": 4.010019313022175e-05,
"loss": 1.3148,
"step": 344500
},
{
"epoch": 1.39,
"learning_rate": 4.008584779222661e-05,
"loss": 1.3109,
"step": 345000
},
{
"epoch": 1.39,
"learning_rate": 4.0071473706059137e-05,
"loss": 1.2924,
"step": 345500
},
{
"epoch": 1.39,
"learning_rate": 4.005709961989167e-05,
"loss": 1.3149,
"step": 346000
},
{
"epoch": 1.39,
"learning_rate": 4.00427255337242e-05,
"loss": 1.3123,
"step": 346500
},
{
"epoch": 1.4,
"learning_rate": 4.002835144755672e-05,
"loss": 1.3045,
"step": 347000
},
{
"epoch": 1.4,
"learning_rate": 4.0013977361389254e-05,
"loss": 1.3122,
"step": 347500
},
{
"epoch": 1.4,
"learning_rate": 3.999963202339411e-05,
"loss": 1.3002,
"step": 348000
},
{
"epoch": 1.4,
"learning_rate": 3.998525793722664e-05,
"loss": 1.314,
"step": 348500
},
{
"epoch": 1.4,
"learning_rate": 3.997088385105917e-05,
"loss": 1.2549,
"step": 349000
},
{
"epoch": 1.41,
"learning_rate": 3.99565097648917e-05,
"loss": 1.288,
"step": 349500
},
{
"epoch": 1.41,
"learning_rate": 3.994213567872423e-05,
"loss": 1.3279,
"step": 350000
},
{
"epoch": 1.41,
"learning_rate": 3.992776159255676e-05,
"loss": 1.2969,
"step": 350500
},
{
"epoch": 1.41,
"learning_rate": 3.9913387506389284e-05,
"loss": 1.2879,
"step": 351000
},
{
"epoch": 1.41,
"learning_rate": 3.989901342022181e-05,
"loss": 1.3131,
"step": 351500
},
{
"epoch": 1.42,
"learning_rate": 3.988469683039901e-05,
"loss": 1.3212,
"step": 352000
},
{
"epoch": 1.42,
"learning_rate": 3.987032274423154e-05,
"loss": 1.3022,
"step": 352500
},
{
"epoch": 1.42,
"learning_rate": 3.985594865806406e-05,
"loss": 1.313,
"step": 353000
},
{
"epoch": 1.42,
"learning_rate": 3.984157457189659e-05,
"loss": 1.2912,
"step": 353500
},
{
"epoch": 1.42,
"learning_rate": 3.982720048572912e-05,
"loss": 1.2996,
"step": 354000
},
{
"epoch": 1.43,
"learning_rate": 3.981282639956165e-05,
"loss": 1.2747,
"step": 354500
},
{
"epoch": 1.43,
"learning_rate": 3.979845231339418e-05,
"loss": 1.3257,
"step": 355000
},
{
"epoch": 1.43,
"learning_rate": 3.978407822722671e-05,
"loss": 1.2859,
"step": 355500
},
{
"epoch": 1.43,
"learning_rate": 3.976970414105923e-05,
"loss": 1.3374,
"step": 356000
},
{
"epoch": 1.43,
"learning_rate": 3.97553588030641e-05,
"loss": 1.324,
"step": 356500
},
{
"epoch": 1.44,
"learning_rate": 3.974098471689662e-05,
"loss": 1.2956,
"step": 357000
},
{
"epoch": 1.44,
"learning_rate": 3.972661063072915e-05,
"loss": 1.3514,
"step": 357500
},
{
"epoch": 1.44,
"learning_rate": 3.9712236544561684e-05,
"loss": 1.2666,
"step": 358000
},
{
"epoch": 1.44,
"learning_rate": 3.969789120656655e-05,
"loss": 1.3184,
"step": 358500
},
{
"epoch": 1.44,
"learning_rate": 3.968351712039907e-05,
"loss": 1.3575,
"step": 359000
},
{
"epoch": 1.45,
"learning_rate": 3.96691430342316e-05,
"loss": 1.3093,
"step": 359500
},
{
"epoch": 1.45,
"learning_rate": 3.965476894806413e-05,
"loss": 1.3171,
"step": 360000
},
{
"epoch": 1.45,
"learning_rate": 3.964039486189666e-05,
"loss": 1.3405,
"step": 360500
},
{
"epoch": 1.45,
"learning_rate": 3.962604952390152e-05,
"loss": 1.3328,
"step": 361000
},
{
"epoch": 1.45,
"learning_rate": 3.961167543773405e-05,
"loss": 1.2607,
"step": 361500
},
{
"epoch": 1.46,
"learning_rate": 3.959730135156657e-05,
"loss": 1.2771,
"step": 362000
},
{
"epoch": 1.46,
"learning_rate": 3.95829272653991e-05,
"loss": 1.2848,
"step": 362500
},
{
"epoch": 1.46,
"learning_rate": 3.9568553179231634e-05,
"loss": 1.2904,
"step": 363000
},
{
"epoch": 1.46,
"learning_rate": 3.955417909306416e-05,
"loss": 1.309,
"step": 363500
},
{
"epoch": 1.47,
"learning_rate": 3.953980500689669e-05,
"loss": 1.3402,
"step": 364000
},
{
"epoch": 1.47,
"learning_rate": 3.9525459668901547e-05,
"loss": 1.2667,
"step": 364500
},
{
"epoch": 1.47,
"learning_rate": 3.9511085582734084e-05,
"loss": 1.332,
"step": 365000
},
{
"epoch": 1.47,
"learning_rate": 3.949671149656661e-05,
"loss": 1.336,
"step": 365500
},
{
"epoch": 1.47,
"learning_rate": 3.948233741039913e-05,
"loss": 1.357,
"step": 366000
},
{
"epoch": 1.48,
"learning_rate": 3.9467963324231664e-05,
"loss": 1.2626,
"step": 366500
},
{
"epoch": 1.48,
"learning_rate": 3.9453589238064195e-05,
"loss": 1.291,
"step": 367000
},
{
"epoch": 1.48,
"learning_rate": 3.943921515189672e-05,
"loss": 1.3226,
"step": 367500
},
{
"epoch": 1.48,
"learning_rate": 3.942484106572924e-05,
"loss": 1.2919,
"step": 368000
},
{
"epoch": 1.48,
"learning_rate": 3.941046697956178e-05,
"loss": 1.3069,
"step": 368500
},
{
"epoch": 1.49,
"learning_rate": 3.939612164156664e-05,
"loss": 1.3492,
"step": 369000
},
{
"epoch": 1.49,
"learning_rate": 3.938174755539917e-05,
"loss": 1.2857,
"step": 369500
},
{
"epoch": 1.49,
"learning_rate": 3.9367402217404034e-05,
"loss": 1.296,
"step": 370000
},
{
"epoch": 1.49,
"learning_rate": 3.935302813123656e-05,
"loss": 1.3178,
"step": 370500
},
{
"epoch": 1.49,
"learning_rate": 3.933865404506908e-05,
"loss": 1.3425,
"step": 371000
},
{
"epoch": 1.5,
"learning_rate": 3.932427995890161e-05,
"loss": 1.2953,
"step": 371500
},
{
"epoch": 1.5,
"learning_rate": 3.9309905872734144e-05,
"loss": 1.3248,
"step": 372000
},
{
"epoch": 1.5,
"learning_rate": 3.929553178656667e-05,
"loss": 1.2808,
"step": 372500
},
{
"epoch": 1.5,
"learning_rate": 3.92811577003992e-05,
"loss": 1.2778,
"step": 373000
},
{
"epoch": 1.5,
"learning_rate": 3.926678361423173e-05,
"loss": 1.3322,
"step": 373500
},
{
"epoch": 1.51,
"learning_rate": 3.9252409528064255e-05,
"loss": 1.3141,
"step": 374000
},
{
"epoch": 1.51,
"learning_rate": 3.923806419006912e-05,
"loss": 1.3462,
"step": 374500
},
{
"epoch": 1.51,
"learning_rate": 3.922369010390164e-05,
"loss": 1.303,
"step": 375000
},
{
"epoch": 1.51,
"learning_rate": 3.9209316017734174e-05,
"loss": 1.2846,
"step": 375500
},
{
"epoch": 1.51,
"learning_rate": 3.9194941931566705e-05,
"loss": 1.2977,
"step": 376000
},
{
"epoch": 1.52,
"learning_rate": 3.918059659357157e-05,
"loss": 1.3207,
"step": 376500
},
{
"epoch": 1.52,
"learning_rate": 3.9166222507404094e-05,
"loss": 1.3579,
"step": 377000
},
{
"epoch": 1.52,
"learning_rate": 3.915184842123662e-05,
"loss": 1.2612,
"step": 377500
},
{
"epoch": 1.52,
"learning_rate": 3.913747433506915e-05,
"loss": 1.2684,
"step": 378000
},
{
"epoch": 1.52,
"learning_rate": 3.912310024890168e-05,
"loss": 1.3118,
"step": 378500
},
{
"epoch": 1.53,
"learning_rate": 3.9108754910906545e-05,
"loss": 1.2694,
"step": 379000
},
{
"epoch": 1.53,
"learning_rate": 3.90944095729114e-05,
"loss": 1.3283,
"step": 379500
},
{
"epoch": 1.53,
"learning_rate": 3.908003548674393e-05,
"loss": 1.2881,
"step": 380000
},
{
"epoch": 1.53,
"learning_rate": 3.906566140057646e-05,
"loss": 1.2595,
"step": 380500
},
{
"epoch": 1.53,
"learning_rate": 3.905128731440899e-05,
"loss": 1.3154,
"step": 381000
},
{
"epoch": 1.54,
"learning_rate": 3.903691322824152e-05,
"loss": 1.2472,
"step": 381500
},
{
"epoch": 1.54,
"learning_rate": 3.9022539142074044e-05,
"loss": 1.2501,
"step": 382000
},
{
"epoch": 1.54,
"learning_rate": 3.900816505590657e-05,
"loss": 1.2928,
"step": 382500
},
{
"epoch": 1.54,
"learning_rate": 3.8993790969739106e-05,
"loss": 1.3238,
"step": 383000
},
{
"epoch": 1.54,
"learning_rate": 3.897944563174396e-05,
"loss": 1.323,
"step": 383500
},
{
"epoch": 1.55,
"learning_rate": 3.8965071545576494e-05,
"loss": 1.3102,
"step": 384000
},
{
"epoch": 1.55,
"learning_rate": 3.895069745940902e-05,
"loss": 1.2707,
"step": 384500
},
{
"epoch": 1.55,
"learning_rate": 3.893635212141388e-05,
"loss": 1.3345,
"step": 385000
},
{
"epoch": 1.55,
"learning_rate": 3.892197803524641e-05,
"loss": 1.3071,
"step": 385500
},
{
"epoch": 1.55,
"learning_rate": 3.890760394907894e-05,
"loss": 1.3225,
"step": 386000
},
{
"epoch": 1.56,
"learning_rate": 3.889322986291147e-05,
"loss": 1.3136,
"step": 386500
},
{
"epoch": 1.56,
"learning_rate": 3.887885577674399e-05,
"loss": 1.3166,
"step": 387000
},
{
"epoch": 1.56,
"learning_rate": 3.886448169057652e-05,
"loss": 1.3347,
"step": 387500
},
{
"epoch": 1.56,
"learning_rate": 3.8850107604409055e-05,
"loss": 1.3325,
"step": 388000
},
{
"epoch": 1.56,
"learning_rate": 3.883573351824158e-05,
"loss": 1.244,
"step": 388500
},
{
"epoch": 1.57,
"learning_rate": 3.8821359432074104e-05,
"loss": 1.2885,
"step": 389000
},
{
"epoch": 1.57,
"learning_rate": 3.880701409407897e-05,
"loss": 1.2643,
"step": 389500
},
{
"epoch": 1.57,
"learning_rate": 3.87926400079115e-05,
"loss": 1.2765,
"step": 390000
},
{
"epoch": 1.57,
"learning_rate": 3.877826592174403e-05,
"loss": 1.2574,
"step": 390500
},
{
"epoch": 1.57,
"learning_rate": 3.8763891835576554e-05,
"loss": 1.3173,
"step": 391000
},
{
"epoch": 1.58,
"learning_rate": 3.874954649758142e-05,
"loss": 1.2948,
"step": 391500
},
{
"epoch": 1.58,
"learning_rate": 3.873517241141394e-05,
"loss": 1.2861,
"step": 392000
},
{
"epoch": 1.58,
"learning_rate": 3.8720798325246474e-05,
"loss": 1.3029,
"step": 392500
},
{
"epoch": 1.58,
"learning_rate": 3.8706424239079005e-05,
"loss": 1.3286,
"step": 393000
},
{
"epoch": 1.58,
"learning_rate": 3.869207890108387e-05,
"loss": 1.3104,
"step": 393500
},
{
"epoch": 1.59,
"learning_rate": 3.8677704814916394e-05,
"loss": 1.3286,
"step": 394000
},
{
"epoch": 1.59,
"learning_rate": 3.866333072874892e-05,
"loss": 1.2915,
"step": 394500
},
{
"epoch": 1.59,
"learning_rate": 3.864895664258145e-05,
"loss": 1.3365,
"step": 395000
},
{
"epoch": 1.59,
"learning_rate": 3.863458255641398e-05,
"loss": 1.3339,
"step": 395500
},
{
"epoch": 1.59,
"learning_rate": 3.8620237218418844e-05,
"loss": 1.3178,
"step": 396000
},
{
"epoch": 1.6,
"learning_rate": 3.860586313225137e-05,
"loss": 1.341,
"step": 396500
},
{
"epoch": 1.6,
"learning_rate": 3.859148904608389e-05,
"loss": 1.2945,
"step": 397000
},
{
"epoch": 1.6,
"learning_rate": 3.857714370808876e-05,
"loss": 1.2954,
"step": 397500
},
{
"epoch": 1.6,
"learning_rate": 3.856276962192129e-05,
"loss": 1.2809,
"step": 398000
},
{
"epoch": 1.6,
"learning_rate": 3.854839553575382e-05,
"loss": 1.2982,
"step": 398500
},
{
"epoch": 1.61,
"learning_rate": 3.853402144958634e-05,
"loss": 1.2972,
"step": 399000
},
{
"epoch": 1.61,
"learning_rate": 3.851964736341887e-05,
"loss": 1.3654,
"step": 399500
},
{
"epoch": 1.61,
"learning_rate": 3.8505273277251405e-05,
"loss": 1.2658,
"step": 400000
},
{
"epoch": 1.61,
"learning_rate": 3.849089919108393e-05,
"loss": 1.292,
"step": 400500
},
{
"epoch": 1.61,
"learning_rate": 3.8476525104916454e-05,
"loss": 1.3061,
"step": 401000
},
{
"epoch": 1.62,
"learning_rate": 3.8462151018748985e-05,
"loss": 1.2735,
"step": 401500
},
{
"epoch": 1.62,
"learning_rate": 3.8447776932581516e-05,
"loss": 1.3375,
"step": 402000
},
{
"epoch": 1.62,
"learning_rate": 3.843340284641404e-05,
"loss": 1.2889,
"step": 402500
},
{
"epoch": 1.62,
"learning_rate": 3.8419028760246564e-05,
"loss": 1.3407,
"step": 403000
},
{
"epoch": 1.62,
"learning_rate": 3.840468342225143e-05,
"loss": 1.309,
"step": 403500
},
{
"epoch": 1.63,
"learning_rate": 3.839030933608396e-05,
"loss": 1.2736,
"step": 404000
},
{
"epoch": 1.63,
"learning_rate": 3.837593524991649e-05,
"loss": 1.2761,
"step": 404500
},
{
"epoch": 1.63,
"learning_rate": 3.8361589911921355e-05,
"loss": 1.333,
"step": 405000
},
{
"epoch": 1.63,
"learning_rate": 3.834721582575388e-05,
"loss": 1.2702,
"step": 405500
},
{
"epoch": 1.63,
"learning_rate": 3.83328417395864e-05,
"loss": 1.3226,
"step": 406000
},
{
"epoch": 1.64,
"learning_rate": 3.831846765341894e-05,
"loss": 1.3432,
"step": 406500
},
{
"epoch": 1.64,
"learning_rate": 3.8304093567251465e-05,
"loss": 1.2957,
"step": 407000
},
{
"epoch": 1.64,
"learning_rate": 3.828971948108399e-05,
"loss": 1.3351,
"step": 407500
},
{
"epoch": 1.64,
"learning_rate": 3.827534539491652e-05,
"loss": 1.3261,
"step": 408000
},
{
"epoch": 1.64,
"learning_rate": 3.826097130874905e-05,
"loss": 1.3281,
"step": 408500
},
{
"epoch": 1.65,
"learning_rate": 3.8246625970753916e-05,
"loss": 1.3111,
"step": 409000
},
{
"epoch": 1.65,
"learning_rate": 3.823225188458644e-05,
"loss": 1.3541,
"step": 409500
},
{
"epoch": 1.65,
"learning_rate": 3.8217877798418964e-05,
"loss": 1.2615,
"step": 410000
},
{
"epoch": 1.65,
"learning_rate": 3.8203503712251495e-05,
"loss": 1.2716,
"step": 410500
},
{
"epoch": 1.65,
"learning_rate": 3.818915837425635e-05,
"loss": 1.3619,
"step": 411000
},
{
"epoch": 1.66,
"learning_rate": 3.817478428808889e-05,
"loss": 1.3252,
"step": 411500
},
{
"epoch": 1.66,
"learning_rate": 3.8160410201921415e-05,
"loss": 1.2901,
"step": 412000
},
{
"epoch": 1.66,
"learning_rate": 3.814603611575394e-05,
"loss": 1.2943,
"step": 412500
},
{
"epoch": 1.66,
"learning_rate": 3.813166202958648e-05,
"loss": 1.2663,
"step": 413000
},
{
"epoch": 1.66,
"learning_rate": 3.8117316691591335e-05,
"loss": 1.2747,
"step": 413500
},
{
"epoch": 1.67,
"learning_rate": 3.8102942605423866e-05,
"loss": 1.3128,
"step": 414000
},
{
"epoch": 1.67,
"learning_rate": 3.808856851925639e-05,
"loss": 1.3107,
"step": 414500
},
{
"epoch": 1.67,
"learning_rate": 3.8074194433088914e-05,
"loss": 1.3281,
"step": 415000
},
{
"epoch": 1.67,
"learning_rate": 3.805984909509378e-05,
"loss": 1.2913,
"step": 415500
},
{
"epoch": 1.67,
"learning_rate": 3.804547500892631e-05,
"loss": 1.3163,
"step": 416000
},
{
"epoch": 1.68,
"learning_rate": 3.803112967093117e-05,
"loss": 1.2811,
"step": 416500
},
{
"epoch": 1.68,
"learning_rate": 3.8016755584763705e-05,
"loss": 1.3121,
"step": 417000
},
{
"epoch": 1.68,
"learning_rate": 3.800238149859623e-05,
"loss": 1.2716,
"step": 417500
},
{
"epoch": 1.68,
"learning_rate": 3.798800741242875e-05,
"loss": 1.3418,
"step": 418000
},
{
"epoch": 1.68,
"learning_rate": 3.797366207443362e-05,
"loss": 1.2755,
"step": 418500
},
{
"epoch": 1.69,
"learning_rate": 3.795928798826615e-05,
"loss": 1.2791,
"step": 419000
},
{
"epoch": 1.69,
"learning_rate": 3.794491390209868e-05,
"loss": 1.2886,
"step": 419500
},
{
"epoch": 1.69,
"learning_rate": 3.7930539815931204e-05,
"loss": 1.2725,
"step": 420000
},
{
"epoch": 1.69,
"learning_rate": 3.791616572976373e-05,
"loss": 1.2872,
"step": 420500
},
{
"epoch": 1.69,
"learning_rate": 3.7901791643596266e-05,
"loss": 1.2549,
"step": 421000
},
{
"epoch": 1.7,
"learning_rate": 3.788741755742879e-05,
"loss": 1.3114,
"step": 421500
},
{
"epoch": 1.7,
"learning_rate": 3.7873043471261314e-05,
"loss": 1.2449,
"step": 422000
},
{
"epoch": 1.7,
"learning_rate": 3.785869813326618e-05,
"loss": 1.2821,
"step": 422500
},
{
"epoch": 1.7,
"learning_rate": 3.78443240470987e-05,
"loss": 1.3374,
"step": 423000
},
{
"epoch": 1.7,
"learning_rate": 3.782994996093124e-05,
"loss": 1.2604,
"step": 423500
},
{
"epoch": 1.71,
"learning_rate": 3.7815575874763765e-05,
"loss": 1.2585,
"step": 424000
},
{
"epoch": 1.71,
"learning_rate": 3.780123053676863e-05,
"loss": 1.306,
"step": 424500
},
{
"epoch": 1.71,
"learning_rate": 3.7786856450601154e-05,
"loss": 1.2987,
"step": 425000
},
{
"epoch": 1.71,
"learning_rate": 3.777248236443368e-05,
"loss": 1.3073,
"step": 425500
},
{
"epoch": 1.71,
"learning_rate": 3.7758108278266216e-05,
"loss": 1.298,
"step": 426000
},
{
"epoch": 1.72,
"learning_rate": 3.774373419209874e-05,
"loss": 1.301,
"step": 426500
},
{
"epoch": 1.72,
"learning_rate": 3.7729360105931264e-05,
"loss": 1.3247,
"step": 427000
},
{
"epoch": 1.72,
"learning_rate": 3.7714986019763795e-05,
"loss": 1.2696,
"step": 427500
},
{
"epoch": 1.72,
"learning_rate": 3.770064068176866e-05,
"loss": 1.3062,
"step": 428000
},
{
"epoch": 1.72,
"learning_rate": 3.768626659560119e-05,
"loss": 1.2945,
"step": 428500
},
{
"epoch": 1.73,
"learning_rate": 3.7671892509433715e-05,
"loss": 1.2748,
"step": 429000
},
{
"epoch": 1.73,
"learning_rate": 3.765751842326624e-05,
"loss": 1.2795,
"step": 429500
},
{
"epoch": 1.73,
"learning_rate": 3.764314433709877e-05,
"loss": 1.3429,
"step": 430000
},
{
"epoch": 1.73,
"learning_rate": 3.76287702509313e-05,
"loss": 1.365,
"step": 430500
},
{
"epoch": 1.73,
"learning_rate": 3.7614396164763825e-05,
"loss": 1.3175,
"step": 431000
},
{
"epoch": 1.74,
"learning_rate": 3.7600022078596356e-05,
"loss": 1.2859,
"step": 431500
},
{
"epoch": 1.74,
"learning_rate": 3.7585676740601214e-05,
"loss": 1.2628,
"step": 432000
},
{
"epoch": 1.74,
"learning_rate": 3.757130265443375e-05,
"loss": 1.2823,
"step": 432500
},
{
"epoch": 1.74,
"learning_rate": 3.7556928568266276e-05,
"loss": 1.2831,
"step": 433000
},
{
"epoch": 1.74,
"learning_rate": 3.75425544820988e-05,
"loss": 1.31,
"step": 433500
},
{
"epoch": 1.75,
"learning_rate": 3.7528209144103664e-05,
"loss": 1.2884,
"step": 434000
},
{
"epoch": 1.75,
"learning_rate": 3.751383505793619e-05,
"loss": 1.3053,
"step": 434500
},
{
"epoch": 1.75,
"learning_rate": 3.7499460971768726e-05,
"loss": 1.288,
"step": 435000
},
{
"epoch": 1.75,
"learning_rate": 3.748508688560125e-05,
"loss": 1.3179,
"step": 435500
},
{
"epoch": 1.75,
"learning_rate": 3.7470712799433775e-05,
"loss": 1.3117,
"step": 436000
},
{
"epoch": 1.76,
"learning_rate": 3.7456338713266306e-05,
"loss": 1.3112,
"step": 436500
},
{
"epoch": 1.76,
"learning_rate": 3.7441964627098837e-05,
"loss": 1.2802,
"step": 437000
},
{
"epoch": 1.76,
"learning_rate": 3.742759054093136e-05,
"loss": 1.2868,
"step": 437500
},
{
"epoch": 1.76,
"learning_rate": 3.741321645476389e-05,
"loss": 1.2857,
"step": 438000
},
{
"epoch": 1.76,
"learning_rate": 3.739887111676875e-05,
"loss": 1.2808,
"step": 438500
},
{
"epoch": 1.77,
"learning_rate": 3.738449703060128e-05,
"loss": 1.309,
"step": 439000
},
{
"epoch": 1.77,
"learning_rate": 3.737012294443381e-05,
"loss": 1.3257,
"step": 439500
},
{
"epoch": 1.77,
"learning_rate": 3.7355748858266336e-05,
"loss": 1.3181,
"step": 440000
},
{
"epoch": 1.77,
"learning_rate": 3.73414035202712e-05,
"loss": 1.3189,
"step": 440500
},
{
"epoch": 1.77,
"learning_rate": 3.7327029434103724e-05,
"loss": 1.2565,
"step": 441000
},
{
"epoch": 1.78,
"learning_rate": 3.731265534793626e-05,
"loss": 1.2688,
"step": 441500
},
{
"epoch": 1.78,
"learning_rate": 3.729831000994112e-05,
"loss": 1.2885,
"step": 442000
},
{
"epoch": 1.78,
"learning_rate": 3.728393592377365e-05,
"loss": 1.3076,
"step": 442500
},
{
"epoch": 1.78,
"learning_rate": 3.7269561837606175e-05,
"loss": 1.2664,
"step": 443000
},
{
"epoch": 1.78,
"learning_rate": 3.7255187751438706e-05,
"loss": 1.3208,
"step": 443500
},
{
"epoch": 1.79,
"learning_rate": 3.724081366527124e-05,
"loss": 1.2682,
"step": 444000
},
{
"epoch": 1.79,
"learning_rate": 3.722643957910376e-05,
"loss": 1.3087,
"step": 444500
},
{
"epoch": 1.79,
"learning_rate": 3.7212065492936285e-05,
"loss": 1.2755,
"step": 445000
},
{
"epoch": 1.79,
"learning_rate": 3.7197691406768816e-05,
"loss": 1.3005,
"step": 445500
},
{
"epoch": 1.8,
"learning_rate": 3.7183374816946014e-05,
"loss": 1.2834,
"step": 446000
},
{
"epoch": 1.8,
"learning_rate": 3.716900073077854e-05,
"loss": 1.2735,
"step": 446500
},
{
"epoch": 1.8,
"learning_rate": 3.715462664461107e-05,
"loss": 1.3396,
"step": 447000
},
{
"epoch": 1.8,
"learning_rate": 3.71402525584436e-05,
"loss": 1.2835,
"step": 447500
},
{
"epoch": 1.8,
"learning_rate": 3.7125878472276125e-05,
"loss": 1.2776,
"step": 448000
},
{
"epoch": 1.81,
"learning_rate": 3.7111504386108656e-05,
"loss": 1.2976,
"step": 448500
},
{
"epoch": 1.81,
"learning_rate": 3.7097159048113513e-05,
"loss": 1.3103,
"step": 449000
},
{
"epoch": 1.81,
"learning_rate": 3.708278496194605e-05,
"loss": 1.3003,
"step": 449500
},
{
"epoch": 1.81,
"learning_rate": 3.7068410875778575e-05,
"loss": 1.305,
"step": 450000
},
{
"epoch": 1.81,
"learning_rate": 3.70540367896111e-05,
"loss": 1.2685,
"step": 450500
},
{
"epoch": 1.82,
"learning_rate": 3.703966270344363e-05,
"loss": 1.2824,
"step": 451000
},
{
"epoch": 1.82,
"learning_rate": 3.7025317365448495e-05,
"loss": 1.2703,
"step": 451500
},
{
"epoch": 1.82,
"learning_rate": 3.7010943279281026e-05,
"loss": 1.2796,
"step": 452000
},
{
"epoch": 1.82,
"learning_rate": 3.699656919311355e-05,
"loss": 1.2921,
"step": 452500
},
{
"epoch": 1.82,
"learning_rate": 3.6982195106946074e-05,
"loss": 1.2714,
"step": 453000
},
{
"epoch": 1.83,
"learning_rate": 3.6967821020778605e-05,
"loss": 1.2959,
"step": 453500
},
{
"epoch": 1.83,
"learning_rate": 3.6953446934611136e-05,
"loss": 1.283,
"step": 454000
},
{
"epoch": 1.83,
"learning_rate": 3.693907284844366e-05,
"loss": 1.3516,
"step": 454500
},
{
"epoch": 1.83,
"learning_rate": 3.6924727510448525e-05,
"loss": 1.2841,
"step": 455000
},
{
"epoch": 1.83,
"learning_rate": 3.691035342428105e-05,
"loss": 1.3314,
"step": 455500
},
{
"epoch": 1.84,
"learning_rate": 3.689597933811358e-05,
"loss": 1.3287,
"step": 456000
},
{
"epoch": 1.84,
"learning_rate": 3.688160525194611e-05,
"loss": 1.3005,
"step": 456500
},
{
"epoch": 1.84,
"learning_rate": 3.6867231165778635e-05,
"loss": 1.2771,
"step": 457000
},
{
"epoch": 1.84,
"learning_rate": 3.6852857079611166e-05,
"loss": 1.3057,
"step": 457500
},
{
"epoch": 1.84,
"learning_rate": 3.68384829934437e-05,
"loss": 1.299,
"step": 458000
},
{
"epoch": 1.85,
"learning_rate": 3.682410890727622e-05,
"loss": 1.2904,
"step": 458500
},
{
"epoch": 1.85,
"learning_rate": 3.6809734821108746e-05,
"loss": 1.2533,
"step": 459000
},
{
"epoch": 1.85,
"learning_rate": 3.679538948311361e-05,
"loss": 1.3131,
"step": 459500
},
{
"epoch": 1.85,
"learning_rate": 3.678101539694614e-05,
"loss": 1.2769,
"step": 460000
},
{
"epoch": 1.85,
"learning_rate": 3.676664131077867e-05,
"loss": 1.3289,
"step": 460500
},
{
"epoch": 1.86,
"learning_rate": 3.675229597278354e-05,
"loss": 1.3292,
"step": 461000
},
{
"epoch": 1.86,
"learning_rate": 3.673792188661606e-05,
"loss": 1.2642,
"step": 461500
},
{
"epoch": 1.86,
"learning_rate": 3.6723547800448585e-05,
"loss": 1.2938,
"step": 462000
},
{
"epoch": 1.86,
"learning_rate": 3.6709173714281116e-05,
"loss": 1.3634,
"step": 462500
},
{
"epoch": 1.86,
"learning_rate": 3.669479962811365e-05,
"loss": 1.2554,
"step": 463000
},
{
"epoch": 1.87,
"learning_rate": 3.668042554194617e-05,
"loss": 1.2683,
"step": 463500
},
{
"epoch": 1.87,
"learning_rate": 3.66660514557787e-05,
"loss": 1.2845,
"step": 464000
},
{
"epoch": 1.87,
"learning_rate": 3.665167736961123e-05,
"loss": 1.3046,
"step": 464500
},
{
"epoch": 1.87,
"learning_rate": 3.663733203161609e-05,
"loss": 1.2429,
"step": 465000
},
{
"epoch": 1.87,
"learning_rate": 3.662295794544862e-05,
"loss": 1.2988,
"step": 465500
},
{
"epoch": 1.88,
"learning_rate": 3.6608583859281146e-05,
"loss": 1.3175,
"step": 466000
},
{
"epoch": 1.88,
"learning_rate": 3.659423852128601e-05,
"loss": 1.279,
"step": 466500
},
{
"epoch": 1.88,
"learning_rate": 3.6579893183290875e-05,
"loss": 1.2663,
"step": 467000
},
{
"epoch": 1.88,
"learning_rate": 3.65655190971234e-05,
"loss": 1.2988,
"step": 467500
},
{
"epoch": 1.88,
"learning_rate": 3.655114501095593e-05,
"loss": 1.3224,
"step": 468000
},
{
"epoch": 1.89,
"learning_rate": 3.653677092478846e-05,
"loss": 1.2751,
"step": 468500
},
{
"epoch": 1.89,
"learning_rate": 3.6522396838620985e-05,
"loss": 1.2846,
"step": 469000
},
{
"epoch": 1.89,
"learning_rate": 3.6508022752453516e-05,
"loss": 1.3111,
"step": 469500
},
{
"epoch": 1.89,
"learning_rate": 3.649364866628604e-05,
"loss": 1.3045,
"step": 470000
},
{
"epoch": 1.89,
"learning_rate": 3.647927458011857e-05,
"loss": 1.2615,
"step": 470500
},
{
"epoch": 1.9,
"learning_rate": 3.6464900493951096e-05,
"loss": 1.264,
"step": 471000
},
{
"epoch": 1.9,
"learning_rate": 3.6450526407783627e-05,
"loss": 1.276,
"step": 471500
},
{
"epoch": 1.9,
"learning_rate": 3.643615232161616e-05,
"loss": 1.2798,
"step": 472000
},
{
"epoch": 1.9,
"learning_rate": 3.642177823544868e-05,
"loss": 1.2926,
"step": 472500
},
{
"epoch": 1.9,
"learning_rate": 3.640740414928121e-05,
"loss": 1.2795,
"step": 473000
},
{
"epoch": 1.91,
"learning_rate": 3.639303006311374e-05,
"loss": 1.239,
"step": 473500
},
{
"epoch": 1.91,
"learning_rate": 3.6378713473290935e-05,
"loss": 1.3506,
"step": 474000
},
{
"epoch": 1.91,
"learning_rate": 3.6364339387123466e-05,
"loss": 1.3075,
"step": 474500
},
{
"epoch": 1.91,
"learning_rate": 3.6349965300956e-05,
"loss": 1.3345,
"step": 475000
},
{
"epoch": 1.91,
"learning_rate": 3.633559121478852e-05,
"loss": 1.3233,
"step": 475500
},
{
"epoch": 1.92,
"learning_rate": 3.632121712862105e-05,
"loss": 1.2644,
"step": 476000
},
{
"epoch": 1.92,
"learning_rate": 3.630687179062591e-05,
"loss": 1.2507,
"step": 476500
},
{
"epoch": 1.92,
"learning_rate": 3.629249770445844e-05,
"loss": 1.2983,
"step": 477000
},
{
"epoch": 1.92,
"learning_rate": 3.627812361829097e-05,
"loss": 1.296,
"step": 477500
},
{
"epoch": 1.92,
"learning_rate": 3.6263749532123496e-05,
"loss": 1.297,
"step": 478000
},
{
"epoch": 1.93,
"learning_rate": 3.624937544595603e-05,
"loss": 1.3237,
"step": 478500
},
{
"epoch": 1.93,
"learning_rate": 3.6235030107960885e-05,
"loss": 1.2638,
"step": 479000
},
{
"epoch": 1.93,
"learning_rate": 3.6220656021793416e-05,
"loss": 1.2876,
"step": 479500
},
{
"epoch": 1.93,
"learning_rate": 3.620628193562595e-05,
"loss": 1.2688,
"step": 480000
},
{
"epoch": 1.93,
"learning_rate": 3.619193659763081e-05,
"loss": 1.2915,
"step": 480500
},
{
"epoch": 1.94,
"learning_rate": 3.6177562511463335e-05,
"loss": 1.2574,
"step": 481000
},
{
"epoch": 1.94,
"learning_rate": 3.6163188425295866e-05,
"loss": 1.2486,
"step": 481500
},
{
"epoch": 1.94,
"learning_rate": 3.614881433912839e-05,
"loss": 1.3007,
"step": 482000
},
{
"epoch": 1.94,
"learning_rate": 3.613444025296092e-05,
"loss": 1.2465,
"step": 482500
},
{
"epoch": 1.94,
"learning_rate": 3.6120066166793446e-05,
"loss": 1.272,
"step": 483000
},
{
"epoch": 1.95,
"learning_rate": 3.610569208062598e-05,
"loss": 1.3023,
"step": 483500
},
{
"epoch": 1.95,
"learning_rate": 3.609131799445851e-05,
"loss": 1.2473,
"step": 484000
},
{
"epoch": 1.95,
"learning_rate": 3.607694390829103e-05,
"loss": 1.2563,
"step": 484500
},
{
"epoch": 1.95,
"learning_rate": 3.606256982212356e-05,
"loss": 1.2937,
"step": 485000
},
{
"epoch": 1.95,
"learning_rate": 3.604819573595609e-05,
"loss": 1.3179,
"step": 485500
},
{
"epoch": 1.96,
"learning_rate": 3.603382164978862e-05,
"loss": 1.2751,
"step": 486000
},
{
"epoch": 1.96,
"learning_rate": 3.601947631179348e-05,
"loss": 1.2914,
"step": 486500
},
{
"epoch": 1.96,
"learning_rate": 3.600510222562601e-05,
"loss": 1.3037,
"step": 487000
},
{
"epoch": 1.96,
"learning_rate": 3.599072813945854e-05,
"loss": 1.2894,
"step": 487500
},
{
"epoch": 1.96,
"learning_rate": 3.597635405329106e-05,
"loss": 1.2917,
"step": 488000
},
{
"epoch": 1.97,
"learning_rate": 3.5962008715295926e-05,
"loss": 1.2776,
"step": 488500
},
{
"epoch": 1.97,
"learning_rate": 3.594763462912846e-05,
"loss": 1.3065,
"step": 489000
},
{
"epoch": 1.97,
"learning_rate": 3.5933289291133315e-05,
"loss": 1.3115,
"step": 489500
},
{
"epoch": 1.97,
"learning_rate": 3.5918915204965846e-05,
"loss": 1.2485,
"step": 490000
},
{
"epoch": 1.97,
"learning_rate": 3.590454111879838e-05,
"loss": 1.2972,
"step": 490500
},
{
"epoch": 1.98,
"learning_rate": 3.58901670326309e-05,
"loss": 1.2865,
"step": 491000
},
{
"epoch": 1.98,
"learning_rate": 3.587579294646343e-05,
"loss": 1.3067,
"step": 491500
},
{
"epoch": 1.98,
"learning_rate": 3.5861418860295956e-05,
"loss": 1.2642,
"step": 492000
},
{
"epoch": 1.98,
"learning_rate": 3.584704477412849e-05,
"loss": 1.3229,
"step": 492500
},
{
"epoch": 1.98,
"learning_rate": 3.583267068796101e-05,
"loss": 1.2988,
"step": 493000
},
{
"epoch": 1.99,
"learning_rate": 3.581835409813821e-05,
"loss": 1.2913,
"step": 493500
},
{
"epoch": 1.99,
"learning_rate": 3.580398001197074e-05,
"loss": 1.2787,
"step": 494000
},
{
"epoch": 1.99,
"learning_rate": 3.5789634673975605e-05,
"loss": 1.2615,
"step": 494500
},
{
"epoch": 1.99,
"learning_rate": 3.577526058780813e-05,
"loss": 1.2474,
"step": 495000
},
{
"epoch": 1.99,
"learning_rate": 3.576088650164066e-05,
"loss": 1.2448,
"step": 495500
},
{
"epoch": 2.0,
"learning_rate": 3.5746512415473184e-05,
"loss": 1.3201,
"step": 496000
},
{
"epoch": 2.0,
"learning_rate": 3.5732138329305715e-05,
"loss": 1.3178,
"step": 496500
},
{
"epoch": 2.0,
"eval_cer": 0.24798620372209454,
"eval_loss": 1.0018064975738525,
"eval_runtime": 10926.7858,
"eval_samples_per_second": 8.946,
"eval_steps_per_second": 1.118,
"step": 496926
},
{
"epoch": 2.0,
"learning_rate": 3.5717764243138246e-05,
"loss": 1.304,
"step": 497000
},
{
"epoch": 2.0,
"learning_rate": 3.570339015697077e-05,
"loss": 1.2501,
"step": 497500
},
{
"epoch": 2.0,
"learning_rate": 3.56890160708033e-05,
"loss": 1.2717,
"step": 498000
},
{
"epoch": 2.01,
"learning_rate": 3.5674641984635826e-05,
"loss": 1.2588,
"step": 498500
},
{
"epoch": 2.01,
"learning_rate": 3.566026789846836e-05,
"loss": 1.2931,
"step": 499000
},
{
"epoch": 2.01,
"learning_rate": 3.564589381230089e-05,
"loss": 1.2257,
"step": 499500
},
{
"epoch": 2.01,
"learning_rate": 3.5631548474305745e-05,
"loss": 1.2627,
"step": 500000
},
{
"epoch": 2.01,
"learning_rate": 3.5617174388138276e-05,
"loss": 1.2737,
"step": 500500
},
{
"epoch": 2.02,
"learning_rate": 3.56028003019708e-05,
"loss": 1.2805,
"step": 501000
},
{
"epoch": 2.02,
"learning_rate": 3.558842621580333e-05,
"loss": 1.2255,
"step": 501500
},
{
"epoch": 2.02,
"learning_rate": 3.557405212963586e-05,
"loss": 1.2769,
"step": 502000
},
{
"epoch": 2.02,
"learning_rate": 3.555967804346839e-05,
"loss": 1.259,
"step": 502500
},
{
"epoch": 2.02,
"learning_rate": 3.554533270547325e-05,
"loss": 1.2578,
"step": 503000
},
{
"epoch": 2.03,
"learning_rate": 3.553095861930578e-05,
"loss": 1.2479,
"step": 503500
},
{
"epoch": 2.03,
"learning_rate": 3.5516584533138306e-05,
"loss": 1.2287,
"step": 504000
},
{
"epoch": 2.03,
"learning_rate": 3.550221044697084e-05,
"loss": 1.2565,
"step": 504500
},
{
"epoch": 2.03,
"learning_rate": 3.548783636080336e-05,
"loss": 1.2534,
"step": 505000
},
{
"epoch": 2.03,
"learning_rate": 3.547346227463589e-05,
"loss": 1.2356,
"step": 505500
},
{
"epoch": 2.04,
"learning_rate": 3.545911693664076e-05,
"loss": 1.2739,
"step": 506000
},
{
"epoch": 2.04,
"learning_rate": 3.544474285047328e-05,
"loss": 1.2215,
"step": 506500
},
{
"epoch": 2.04,
"learning_rate": 3.543036876430581e-05,
"loss": 1.2267,
"step": 507000
},
{
"epoch": 2.04,
"learning_rate": 3.5415994678138336e-05,
"loss": 1.2708,
"step": 507500
},
{
"epoch": 2.04,
"learning_rate": 3.540162059197087e-05,
"loss": 1.299,
"step": 508000
},
{
"epoch": 2.05,
"learning_rate": 3.538727525397573e-05,
"loss": 1.2797,
"step": 508500
},
{
"epoch": 2.05,
"learning_rate": 3.5372901167808256e-05,
"loss": 1.254,
"step": 509000
},
{
"epoch": 2.05,
"learning_rate": 3.535852708164079e-05,
"loss": 1.239,
"step": 509500
},
{
"epoch": 2.05,
"learning_rate": 3.534415299547331e-05,
"loss": 1.2227,
"step": 510000
},
{
"epoch": 2.05,
"learning_rate": 3.532977890930584e-05,
"loss": 1.2489,
"step": 510500
},
{
"epoch": 2.06,
"learning_rate": 3.531540482313837e-05,
"loss": 1.2213,
"step": 511000
},
{
"epoch": 2.06,
"learning_rate": 3.530105948514323e-05,
"loss": 1.2131,
"step": 511500
},
{
"epoch": 2.06,
"learning_rate": 3.528668539897576e-05,
"loss": 1.2442,
"step": 512000
},
{
"epoch": 2.06,
"learning_rate": 3.5272311312808286e-05,
"loss": 1.2223,
"step": 512500
},
{
"epoch": 2.06,
"learning_rate": 3.525793722664082e-05,
"loss": 1.2384,
"step": 513000
},
{
"epoch": 2.07,
"learning_rate": 3.524356314047335e-05,
"loss": 1.2598,
"step": 513500
},
{
"epoch": 2.07,
"learning_rate": 3.522921780247821e-05,
"loss": 1.3033,
"step": 514000
},
{
"epoch": 2.07,
"learning_rate": 3.521484371631074e-05,
"loss": 1.2349,
"step": 514500
},
{
"epoch": 2.07,
"learning_rate": 3.520046963014327e-05,
"loss": 1.2263,
"step": 515000
},
{
"epoch": 2.07,
"learning_rate": 3.518609554397579e-05,
"loss": 1.2533,
"step": 515500
},
{
"epoch": 2.08,
"learning_rate": 3.517172145780832e-05,
"loss": 1.2289,
"step": 516000
},
{
"epoch": 2.08,
"learning_rate": 3.515734737164085e-05,
"loss": 1.2387,
"step": 516500
},
{
"epoch": 2.08,
"learning_rate": 3.514300203364571e-05,
"loss": 1.2407,
"step": 517000
},
{
"epoch": 2.08,
"learning_rate": 3.512862794747824e-05,
"loss": 1.2843,
"step": 517500
},
{
"epoch": 2.08,
"learning_rate": 3.511425386131077e-05,
"loss": 1.2666,
"step": 518000
},
{
"epoch": 2.09,
"learning_rate": 3.50998797751433e-05,
"loss": 1.2308,
"step": 518500
},
{
"epoch": 2.09,
"learning_rate": 3.508550568897582e-05,
"loss": 1.2481,
"step": 519000
},
{
"epoch": 2.09,
"learning_rate": 3.507113160280835e-05,
"loss": 1.2184,
"step": 519500
},
{
"epoch": 2.09,
"learning_rate": 3.5056757516640884e-05,
"loss": 1.2723,
"step": 520000
},
{
"epoch": 2.09,
"learning_rate": 3.504241217864574e-05,
"loss": 1.248,
"step": 520500
},
{
"epoch": 2.1,
"learning_rate": 3.502803809247827e-05,
"loss": 1.2647,
"step": 521000
},
{
"epoch": 2.1,
"learning_rate": 3.50136640063108e-05,
"loss": 1.2281,
"step": 521500
},
{
"epoch": 2.1,
"learning_rate": 3.499928992014333e-05,
"loss": 1.2436,
"step": 522000
},
{
"epoch": 2.1,
"learning_rate": 3.498491583397586e-05,
"loss": 1.2255,
"step": 522500
},
{
"epoch": 2.1,
"learning_rate": 3.497054174780838e-05,
"loss": 1.2615,
"step": 523000
},
{
"epoch": 2.11,
"learning_rate": 3.4956167661640914e-05,
"loss": 1.238,
"step": 523500
},
{
"epoch": 2.11,
"learning_rate": 3.4941793575473445e-05,
"loss": 1.2343,
"step": 524000
},
{
"epoch": 2.11,
"learning_rate": 3.49274482374783e-05,
"loss": 1.2505,
"step": 524500
},
{
"epoch": 2.11,
"learning_rate": 3.491310289948317e-05,
"loss": 1.2915,
"step": 525000
},
{
"epoch": 2.12,
"learning_rate": 3.48987288133157e-05,
"loss": 1.2477,
"step": 525500
},
{
"epoch": 2.12,
"learning_rate": 3.488435472714822e-05,
"loss": 1.2653,
"step": 526000
},
{
"epoch": 2.12,
"learning_rate": 3.486998064098075e-05,
"loss": 1.2362,
"step": 526500
},
{
"epoch": 2.12,
"learning_rate": 3.485563530298561e-05,
"loss": 1.2141,
"step": 527000
},
{
"epoch": 2.12,
"learning_rate": 3.484126121681814e-05,
"loss": 1.2283,
"step": 527500
},
{
"epoch": 2.13,
"learning_rate": 3.482688713065067e-05,
"loss": 1.2756,
"step": 528000
},
{
"epoch": 2.13,
"learning_rate": 3.48125130444832e-05,
"loss": 1.2044,
"step": 528500
},
{
"epoch": 2.13,
"learning_rate": 3.479813895831573e-05,
"loss": 1.2843,
"step": 529000
},
{
"epoch": 2.13,
"learning_rate": 3.478376487214826e-05,
"loss": 1.2615,
"step": 529500
},
{
"epoch": 2.13,
"learning_rate": 3.476941953415312e-05,
"loss": 1.3152,
"step": 530000
},
{
"epoch": 2.14,
"learning_rate": 3.475504544798565e-05,
"loss": 1.2859,
"step": 530500
},
{
"epoch": 2.14,
"learning_rate": 3.474067136181817e-05,
"loss": 1.2408,
"step": 531000
},
{
"epoch": 2.14,
"learning_rate": 3.47262972756507e-05,
"loss": 1.251,
"step": 531500
},
{
"epoch": 2.14,
"learning_rate": 3.4711923189483234e-05,
"loss": 1.1937,
"step": 532000
},
{
"epoch": 2.14,
"learning_rate": 3.469754910331576e-05,
"loss": 1.2696,
"step": 532500
},
{
"epoch": 2.15,
"learning_rate": 3.468317501714828e-05,
"loss": 1.2626,
"step": 533000
},
{
"epoch": 2.15,
"learning_rate": 3.466880093098081e-05,
"loss": 1.2667,
"step": 533500
},
{
"epoch": 2.15,
"learning_rate": 3.465445559298568e-05,
"loss": 1.2286,
"step": 534000
},
{
"epoch": 2.15,
"learning_rate": 3.464011025499054e-05,
"loss": 1.3034,
"step": 534500
},
{
"epoch": 2.15,
"learning_rate": 3.4625736168823066e-05,
"loss": 1.2727,
"step": 535000
},
{
"epoch": 2.16,
"learning_rate": 3.46113620826556e-05,
"loss": 1.3009,
"step": 535500
},
{
"epoch": 2.16,
"learning_rate": 3.459698799648812e-05,
"loss": 1.2333,
"step": 536000
},
{
"epoch": 2.16,
"learning_rate": 3.458261391032065e-05,
"loss": 1.2414,
"step": 536500
},
{
"epoch": 2.16,
"learning_rate": 3.4568239824153183e-05,
"loss": 1.2648,
"step": 537000
},
{
"epoch": 2.16,
"learning_rate": 3.455386573798571e-05,
"loss": 1.2345,
"step": 537500
},
{
"epoch": 2.17,
"learning_rate": 3.453949165181824e-05,
"loss": 1.2002,
"step": 538000
},
{
"epoch": 2.17,
"learning_rate": 3.4525146313823096e-05,
"loss": 1.2801,
"step": 538500
},
{
"epoch": 2.17,
"learning_rate": 3.451077222765563e-05,
"loss": 1.2502,
"step": 539000
},
{
"epoch": 2.17,
"learning_rate": 3.449639814148816e-05,
"loss": 1.2946,
"step": 539500
},
{
"epoch": 2.17,
"learning_rate": 3.448202405532068e-05,
"loss": 1.2442,
"step": 540000
},
{
"epoch": 2.18,
"learning_rate": 3.446770746549788e-05,
"loss": 1.2646,
"step": 540500
},
{
"epoch": 2.18,
"learning_rate": 3.445333337933041e-05,
"loss": 1.2592,
"step": 541000
},
{
"epoch": 2.18,
"learning_rate": 3.4438959293162936e-05,
"loss": 1.2476,
"step": 541500
},
{
"epoch": 2.18,
"learning_rate": 3.442458520699547e-05,
"loss": 1.2312,
"step": 542000
},
{
"epoch": 2.18,
"learning_rate": 3.4410211120828e-05,
"loss": 1.2613,
"step": 542500
},
{
"epoch": 2.19,
"learning_rate": 3.439586578283286e-05,
"loss": 1.2881,
"step": 543000
},
{
"epoch": 2.19,
"learning_rate": 3.4381491696665386e-05,
"loss": 1.2507,
"step": 543500
},
{
"epoch": 2.19,
"learning_rate": 3.436711761049791e-05,
"loss": 1.2611,
"step": 544000
},
{
"epoch": 2.19,
"learning_rate": 3.435274352433044e-05,
"loss": 1.2903,
"step": 544500
},
{
"epoch": 2.19,
"learning_rate": 3.433836943816297e-05,
"loss": 1.2557,
"step": 545000
},
{
"epoch": 2.2,
"learning_rate": 3.432402410016784e-05,
"loss": 1.2547,
"step": 545500
},
{
"epoch": 2.2,
"learning_rate": 3.430965001400036e-05,
"loss": 1.2438,
"step": 546000
},
{
"epoch": 2.2,
"learning_rate": 3.4295275927832886e-05,
"loss": 1.2705,
"step": 546500
},
{
"epoch": 2.2,
"learning_rate": 3.4280901841665416e-05,
"loss": 1.2638,
"step": 547000
},
{
"epoch": 2.2,
"learning_rate": 3.426652775549795e-05,
"loss": 1.246,
"step": 547500
},
{
"epoch": 2.21,
"learning_rate": 3.425215366933047e-05,
"loss": 1.259,
"step": 548000
},
{
"epoch": 2.21,
"learning_rate": 3.4237808331335336e-05,
"loss": 1.2738,
"step": 548500
},
{
"epoch": 2.21,
"learning_rate": 3.422343424516786e-05,
"loss": 1.2558,
"step": 549000
},
{
"epoch": 2.21,
"learning_rate": 3.420906015900039e-05,
"loss": 1.2246,
"step": 549500
},
{
"epoch": 2.21,
"learning_rate": 3.419468607283292e-05,
"loss": 1.193,
"step": 550000
},
{
"epoch": 2.22,
"learning_rate": 3.4180311986665446e-05,
"loss": 1.2253,
"step": 550500
},
{
"epoch": 2.22,
"learning_rate": 3.416593790049798e-05,
"loss": 1.2249,
"step": 551000
},
{
"epoch": 2.22,
"learning_rate": 3.415156381433051e-05,
"loss": 1.2014,
"step": 551500
},
{
"epoch": 2.22,
"learning_rate": 3.413718972816303e-05,
"loss": 1.2901,
"step": 552000
},
{
"epoch": 2.22,
"learning_rate": 3.41228443901679e-05,
"loss": 1.259,
"step": 552500
},
{
"epoch": 2.23,
"learning_rate": 3.410847030400042e-05,
"loss": 1.2049,
"step": 553000
},
{
"epoch": 2.23,
"learning_rate": 3.4094124966005286e-05,
"loss": 1.2618,
"step": 553500
},
{
"epoch": 2.23,
"learning_rate": 3.407975087983782e-05,
"loss": 1.2664,
"step": 554000
},
{
"epoch": 2.23,
"learning_rate": 3.406537679367035e-05,
"loss": 1.2458,
"step": 554500
},
{
"epoch": 2.23,
"learning_rate": 3.405100270750287e-05,
"loss": 1.2345,
"step": 555000
},
{
"epoch": 2.24,
"learning_rate": 3.4036657369507737e-05,
"loss": 1.2623,
"step": 555500
},
{
"epoch": 2.24,
"learning_rate": 3.402228328334026e-05,
"loss": 1.2062,
"step": 556000
},
{
"epoch": 2.24,
"learning_rate": 3.400790919717279e-05,
"loss": 1.2831,
"step": 556500
},
{
"epoch": 2.24,
"learning_rate": 3.399353511100532e-05,
"loss": 1.2745,
"step": 557000
},
{
"epoch": 2.24,
"learning_rate": 3.397916102483785e-05,
"loss": 1.233,
"step": 557500
},
{
"epoch": 2.25,
"learning_rate": 3.396481568684271e-05,
"loss": 1.2692,
"step": 558000
},
{
"epoch": 2.25,
"learning_rate": 3.3950441600675236e-05,
"loss": 1.2596,
"step": 558500
},
{
"epoch": 2.25,
"learning_rate": 3.3936067514507767e-05,
"loss": 1.2565,
"step": 559000
},
{
"epoch": 2.25,
"learning_rate": 3.39216934283403e-05,
"loss": 1.2362,
"step": 559500
},
{
"epoch": 2.25,
"learning_rate": 3.390731934217282e-05,
"loss": 1.2155,
"step": 560000
},
{
"epoch": 2.26,
"learning_rate": 3.3892974004177686e-05,
"loss": 1.266,
"step": 560500
},
{
"epoch": 2.26,
"learning_rate": 3.387859991801021e-05,
"loss": 1.2511,
"step": 561000
},
{
"epoch": 2.26,
"learning_rate": 3.386422583184274e-05,
"loss": 1.2623,
"step": 561500
},
{
"epoch": 2.26,
"learning_rate": 3.384985174567527e-05,
"loss": 1.216,
"step": 562000
},
{
"epoch": 2.26,
"learning_rate": 3.383550640768014e-05,
"loss": 1.2525,
"step": 562500
},
{
"epoch": 2.27,
"learning_rate": 3.382113232151266e-05,
"loss": 1.2496,
"step": 563000
},
{
"epoch": 2.27,
"learning_rate": 3.3806758235345185e-05,
"loss": 1.2253,
"step": 563500
},
{
"epoch": 2.27,
"learning_rate": 3.3792384149177716e-05,
"loss": 1.288,
"step": 564000
},
{
"epoch": 2.27,
"learning_rate": 3.377801006301025e-05,
"loss": 1.242,
"step": 564500
},
{
"epoch": 2.27,
"learning_rate": 3.376366472501511e-05,
"loss": 1.2785,
"step": 565000
},
{
"epoch": 2.28,
"learning_rate": 3.3749290638847636e-05,
"loss": 1.2483,
"step": 565500
},
{
"epoch": 2.28,
"learning_rate": 3.373491655268016e-05,
"loss": 1.2293,
"step": 566000
},
{
"epoch": 2.28,
"learning_rate": 3.37205424665127e-05,
"loss": 1.2759,
"step": 566500
},
{
"epoch": 2.28,
"learning_rate": 3.3706197128517556e-05,
"loss": 1.2912,
"step": 567000
},
{
"epoch": 2.28,
"learning_rate": 3.3691823042350087e-05,
"loss": 1.2564,
"step": 567500
},
{
"epoch": 2.29,
"learning_rate": 3.367744895618261e-05,
"loss": 1.2192,
"step": 568000
},
{
"epoch": 2.29,
"learning_rate": 3.3663074870015135e-05,
"loss": 1.2384,
"step": 568500
},
{
"epoch": 2.29,
"learning_rate": 3.364870078384767e-05,
"loss": 1.2578,
"step": 569000
},
{
"epoch": 2.29,
"learning_rate": 3.363435544585253e-05,
"loss": 1.2857,
"step": 569500
},
{
"epoch": 2.29,
"learning_rate": 3.361998135968506e-05,
"loss": 1.236,
"step": 570000
},
{
"epoch": 2.3,
"learning_rate": 3.3605607273517586e-05,
"loss": 1.262,
"step": 570500
},
{
"epoch": 2.3,
"learning_rate": 3.3591233187350117e-05,
"loss": 1.2389,
"step": 571000
},
{
"epoch": 2.3,
"learning_rate": 3.357685910118265e-05,
"loss": 1.2537,
"step": 571500
},
{
"epoch": 2.3,
"learning_rate": 3.356248501501517e-05,
"loss": 1.2869,
"step": 572000
},
{
"epoch": 2.3,
"learning_rate": 3.3548110928847696e-05,
"loss": 1.2332,
"step": 572500
},
{
"epoch": 2.31,
"learning_rate": 3.353373684268023e-05,
"loss": 1.2552,
"step": 573000
},
{
"epoch": 2.31,
"learning_rate": 3.351936275651276e-05,
"loss": 1.281,
"step": 573500
},
{
"epoch": 2.31,
"learning_rate": 3.350498867034528e-05,
"loss": 1.2591,
"step": 574000
},
{
"epoch": 2.31,
"learning_rate": 3.349061458417781e-05,
"loss": 1.2511,
"step": 574500
},
{
"epoch": 2.31,
"learning_rate": 3.3476240498010344e-05,
"loss": 1.2287,
"step": 575000
},
{
"epoch": 2.32,
"learning_rate": 3.346189516001521e-05,
"loss": 1.2665,
"step": 575500
},
{
"epoch": 2.32,
"learning_rate": 3.344752107384773e-05,
"loss": 1.1707,
"step": 576000
},
{
"epoch": 2.32,
"learning_rate": 3.343314698768026e-05,
"loss": 1.2328,
"step": 576500
},
{
"epoch": 2.32,
"learning_rate": 3.341877290151279e-05,
"loss": 1.2742,
"step": 577000
},
{
"epoch": 2.32,
"learning_rate": 3.3404456311689986e-05,
"loss": 1.247,
"step": 577500
},
{
"epoch": 2.33,
"learning_rate": 3.339008222552251e-05,
"loss": 1.2432,
"step": 578000
},
{
"epoch": 2.33,
"learning_rate": 3.337570813935504e-05,
"loss": 1.2333,
"step": 578500
},
{
"epoch": 2.33,
"learning_rate": 3.336133405318757e-05,
"loss": 1.2269,
"step": 579000
},
{
"epoch": 2.33,
"learning_rate": 3.3346959967020096e-05,
"loss": 1.2434,
"step": 579500
},
{
"epoch": 2.33,
"learning_rate": 3.333258588085262e-05,
"loss": 1.2411,
"step": 580000
},
{
"epoch": 2.34,
"learning_rate": 3.331821179468516e-05,
"loss": 1.2331,
"step": 580500
},
{
"epoch": 2.34,
"learning_rate": 3.330386645669002e-05,
"loss": 1.2312,
"step": 581000
},
{
"epoch": 2.34,
"learning_rate": 3.328949237052255e-05,
"loss": 1.2393,
"step": 581500
},
{
"epoch": 2.34,
"learning_rate": 3.327511828435507e-05,
"loss": 1.2778,
"step": 582000
},
{
"epoch": 2.34,
"learning_rate": 3.32607441981876e-05,
"loss": 1.2703,
"step": 582500
},
{
"epoch": 2.35,
"learning_rate": 3.324637011202013e-05,
"loss": 1.296,
"step": 583000
},
{
"epoch": 2.35,
"learning_rate": 3.323199602585266e-05,
"loss": 1.2469,
"step": 583500
},
{
"epoch": 2.35,
"learning_rate": 3.321762193968518e-05,
"loss": 1.2738,
"step": 584000
},
{
"epoch": 2.35,
"learning_rate": 3.320324785351772e-05,
"loss": 1.2696,
"step": 584500
},
{
"epoch": 2.35,
"learning_rate": 3.318890251552258e-05,
"loss": 1.2645,
"step": 585000
},
{
"epoch": 2.36,
"learning_rate": 3.317452842935511e-05,
"loss": 1.2552,
"step": 585500
},
{
"epoch": 2.36,
"learning_rate": 3.316015434318763e-05,
"loss": 1.2484,
"step": 586000
},
{
"epoch": 2.36,
"learning_rate": 3.3145780257020156e-05,
"loss": 1.2692,
"step": 586500
},
{
"epoch": 2.36,
"learning_rate": 3.313143491902502e-05,
"loss": 1.2243,
"step": 587000
},
{
"epoch": 2.36,
"learning_rate": 3.3117089581029885e-05,
"loss": 1.213,
"step": 587500
},
{
"epoch": 2.37,
"learning_rate": 3.3102715494862416e-05,
"loss": 1.2585,
"step": 588000
},
{
"epoch": 2.37,
"learning_rate": 3.308834140869495e-05,
"loss": 1.2533,
"step": 588500
},
{
"epoch": 2.37,
"learning_rate": 3.307396732252747e-05,
"loss": 1.2547,
"step": 589000
},
{
"epoch": 2.37,
"learning_rate": 3.3059593236359996e-05,
"loss": 1.2081,
"step": 589500
},
{
"epoch": 2.37,
"learning_rate": 3.304521915019253e-05,
"loss": 1.2561,
"step": 590000
},
{
"epoch": 2.38,
"learning_rate": 3.303084506402506e-05,
"loss": 1.2344,
"step": 590500
},
{
"epoch": 2.38,
"learning_rate": 3.301647097785758e-05,
"loss": 1.287,
"step": 591000
},
{
"epoch": 2.38,
"learning_rate": 3.300209689169011e-05,
"loss": 1.2357,
"step": 591500
},
{
"epoch": 2.38,
"learning_rate": 3.2987722805522644e-05,
"loss": 1.2741,
"step": 592000
},
{
"epoch": 2.38,
"learning_rate": 3.297334871935517e-05,
"loss": 1.2655,
"step": 592500
},
{
"epoch": 2.39,
"learning_rate": 3.295897463318769e-05,
"loss": 1.2085,
"step": 593000
},
{
"epoch": 2.39,
"learning_rate": 3.294460054702023e-05,
"loss": 1.2697,
"step": 593500
},
{
"epoch": 2.39,
"learning_rate": 3.293025520902509e-05,
"loss": 1.226,
"step": 594000
},
{
"epoch": 2.39,
"learning_rate": 3.291588112285762e-05,
"loss": 1.2169,
"step": 594500
},
{
"epoch": 2.39,
"learning_rate": 3.290150703669014e-05,
"loss": 1.2318,
"step": 595000
},
{
"epoch": 2.4,
"learning_rate": 3.288713295052267e-05,
"loss": 1.2192,
"step": 595500
},
{
"epoch": 2.4,
"learning_rate": 3.287278761252753e-05,
"loss": 1.2522,
"step": 596000
},
{
"epoch": 2.4,
"learning_rate": 3.285841352636006e-05,
"loss": 1.2731,
"step": 596500
},
{
"epoch": 2.4,
"learning_rate": 3.284403944019259e-05,
"loss": 1.2572,
"step": 597000
},
{
"epoch": 2.4,
"learning_rate": 3.282966535402512e-05,
"loss": 1.2339,
"step": 597500
},
{
"epoch": 2.41,
"learning_rate": 3.281532001602998e-05,
"loss": 1.283,
"step": 598000
},
{
"epoch": 2.41,
"learning_rate": 3.2800945929862506e-05,
"loss": 1.2274,
"step": 598500
},
{
"epoch": 2.41,
"learning_rate": 3.278660059186737e-05,
"loss": 1.2292,
"step": 599000
},
{
"epoch": 2.41,
"learning_rate": 3.27722265056999e-05,
"loss": 1.2474,
"step": 599500
},
{
"epoch": 2.41,
"learning_rate": 3.275785241953243e-05,
"loss": 1.278,
"step": 600000
},
{
"epoch": 2.42,
"learning_rate": 3.274347833336496e-05,
"loss": 1.2438,
"step": 600500
},
{
"epoch": 2.42,
"learning_rate": 3.272910424719748e-05,
"loss": 1.2174,
"step": 601000
},
{
"epoch": 2.42,
"learning_rate": 3.271473016103002e-05,
"loss": 1.2063,
"step": 601500
},
{
"epoch": 2.42,
"learning_rate": 3.270035607486254e-05,
"loss": 1.188,
"step": 602000
},
{
"epoch": 2.42,
"learning_rate": 3.268601073686741e-05,
"loss": 1.2207,
"step": 602500
},
{
"epoch": 2.43,
"learning_rate": 3.267163665069993e-05,
"loss": 1.233,
"step": 603000
},
{
"epoch": 2.43,
"learning_rate": 3.2657262564532456e-05,
"loss": 1.2776,
"step": 603500
},
{
"epoch": 2.43,
"learning_rate": 3.2642888478364994e-05,
"loss": 1.2963,
"step": 604000
},
{
"epoch": 2.43,
"learning_rate": 3.262851439219752e-05,
"loss": 1.2479,
"step": 604500
},
{
"epoch": 2.43,
"learning_rate": 3.261414030603004e-05,
"loss": 1.1801,
"step": 605000
},
{
"epoch": 2.44,
"learning_rate": 3.259976621986258e-05,
"loss": 1.2522,
"step": 605500
},
{
"epoch": 2.44,
"learning_rate": 3.2585392133695104e-05,
"loss": 1.2482,
"step": 606000
},
{
"epoch": 2.44,
"learning_rate": 3.257104679569997e-05,
"loss": 1.2325,
"step": 606500
},
{
"epoch": 2.44,
"learning_rate": 3.255667270953249e-05,
"loss": 1.3044,
"step": 607000
},
{
"epoch": 2.45,
"learning_rate": 3.254232737153736e-05,
"loss": 1.2587,
"step": 607500
},
{
"epoch": 2.45,
"learning_rate": 3.252795328536988e-05,
"loss": 1.2764,
"step": 608000
},
{
"epoch": 2.45,
"learning_rate": 3.251357919920241e-05,
"loss": 1.2882,
"step": 608500
},
{
"epoch": 2.45,
"learning_rate": 3.249920511303494e-05,
"loss": 1.2507,
"step": 609000
},
{
"epoch": 2.45,
"learning_rate": 3.248483102686747e-05,
"loss": 1.1949,
"step": 609500
},
{
"epoch": 2.46,
"learning_rate": 3.247045694069999e-05,
"loss": 1.2653,
"step": 610000
},
{
"epoch": 2.46,
"learning_rate": 3.245608285453253e-05,
"loss": 1.291,
"step": 610500
},
{
"epoch": 2.46,
"learning_rate": 3.2441708768365054e-05,
"loss": 1.1922,
"step": 611000
},
{
"epoch": 2.46,
"learning_rate": 3.242736343036992e-05,
"loss": 1.2725,
"step": 611500
},
{
"epoch": 2.46,
"learning_rate": 3.241298934420244e-05,
"loss": 1.1928,
"step": 612000
},
{
"epoch": 2.47,
"learning_rate": 3.239861525803497e-05,
"loss": 1.2518,
"step": 612500
},
{
"epoch": 2.47,
"learning_rate": 3.2384241171867504e-05,
"loss": 1.229,
"step": 613000
},
{
"epoch": 2.47,
"learning_rate": 3.236989583387237e-05,
"loss": 1.2946,
"step": 613500
},
{
"epoch": 2.47,
"learning_rate": 3.235552174770489e-05,
"loss": 1.256,
"step": 614000
},
{
"epoch": 2.47,
"learning_rate": 3.234114766153742e-05,
"loss": 1.2156,
"step": 614500
},
{
"epoch": 2.48,
"learning_rate": 3.232677357536995e-05,
"loss": 1.2669,
"step": 615000
},
{
"epoch": 2.48,
"learning_rate": 3.231239948920248e-05,
"loss": 1.2413,
"step": 615500
},
{
"epoch": 2.48,
"learning_rate": 3.2298025403035e-05,
"loss": 1.2777,
"step": 616000
},
{
"epoch": 2.48,
"learning_rate": 3.228368006503987e-05,
"loss": 1.2184,
"step": 616500
},
{
"epoch": 2.48,
"learning_rate": 3.226930597887239e-05,
"loss": 1.2222,
"step": 617000
},
{
"epoch": 2.49,
"learning_rate": 3.225493189270492e-05,
"loss": 1.2481,
"step": 617500
},
{
"epoch": 2.49,
"learning_rate": 3.2240557806537454e-05,
"loss": 1.2333,
"step": 618000
},
{
"epoch": 2.49,
"learning_rate": 3.222618372036998e-05,
"loss": 1.2376,
"step": 618500
},
{
"epoch": 2.49,
"learning_rate": 3.221183838237484e-05,
"loss": 1.2284,
"step": 619000
},
{
"epoch": 2.49,
"learning_rate": 3.219746429620737e-05,
"loss": 1.2159,
"step": 619500
},
{
"epoch": 2.5,
"learning_rate": 3.21830902100399e-05,
"loss": 1.2351,
"step": 620000
},
{
"epoch": 2.5,
"learning_rate": 3.216871612387243e-05,
"loss": 1.2163,
"step": 620500
},
{
"epoch": 2.5,
"learning_rate": 3.215434203770495e-05,
"loss": 1.2584,
"step": 621000
},
{
"epoch": 2.5,
"learning_rate": 3.2139967951537484e-05,
"loss": 1.2286,
"step": 621500
},
{
"epoch": 2.5,
"learning_rate": 3.2125593865370015e-05,
"loss": 1.2537,
"step": 622000
},
{
"epoch": 2.51,
"learning_rate": 3.211124852737488e-05,
"loss": 1.2097,
"step": 622500
},
{
"epoch": 2.51,
"learning_rate": 3.2096874441207404e-05,
"loss": 1.2062,
"step": 623000
},
{
"epoch": 2.51,
"learning_rate": 3.208250035503993e-05,
"loss": 1.2346,
"step": 623500
},
{
"epoch": 2.51,
"learning_rate": 3.206812626887246e-05,
"loss": 1.2543,
"step": 624000
},
{
"epoch": 2.51,
"learning_rate": 3.205375218270499e-05,
"loss": 1.2531,
"step": 624500
},
{
"epoch": 2.52,
"learning_rate": 3.2039406844709854e-05,
"loss": 1.2416,
"step": 625000
},
{
"epoch": 2.52,
"learning_rate": 3.202503275854238e-05,
"loss": 1.2237,
"step": 625500
},
{
"epoch": 2.52,
"learning_rate": 3.20106586723749e-05,
"loss": 1.2425,
"step": 626000
},
{
"epoch": 2.52,
"learning_rate": 3.1996284586207434e-05,
"loss": 1.2313,
"step": 626500
},
{
"epoch": 2.52,
"learning_rate": 3.1981910500039965e-05,
"loss": 1.2602,
"step": 627000
},
{
"epoch": 2.53,
"learning_rate": 3.196753641387249e-05,
"loss": 1.2338,
"step": 627500
},
{
"epoch": 2.53,
"learning_rate": 3.195316232770501e-05,
"loss": 1.2783,
"step": 628000
},
{
"epoch": 2.53,
"learning_rate": 3.193881698970988e-05,
"loss": 1.2234,
"step": 628500
},
{
"epoch": 2.53,
"learning_rate": 3.192444290354241e-05,
"loss": 1.2668,
"step": 629000
},
{
"epoch": 2.53,
"learning_rate": 3.191006881737494e-05,
"loss": 1.2136,
"step": 629500
},
{
"epoch": 2.54,
"learning_rate": 3.1895694731207464e-05,
"loss": 1.23,
"step": 630000
},
{
"epoch": 2.54,
"learning_rate": 3.1881320645039995e-05,
"loss": 1.218,
"step": 630500
},
{
"epoch": 2.54,
"learning_rate": 3.1866946558872526e-05,
"loss": 1.2594,
"step": 631000
},
{
"epoch": 2.54,
"learning_rate": 3.185257247270505e-05,
"loss": 1.2191,
"step": 631500
},
{
"epoch": 2.54,
"learning_rate": 3.1838227134709914e-05,
"loss": 1.2261,
"step": 632000
},
{
"epoch": 2.55,
"learning_rate": 3.182385304854244e-05,
"loss": 1.2288,
"step": 632500
},
{
"epoch": 2.55,
"learning_rate": 3.180947896237497e-05,
"loss": 1.2882,
"step": 633000
},
{
"epoch": 2.55,
"learning_rate": 3.17951048762075e-05,
"loss": 1.2466,
"step": 633500
},
{
"epoch": 2.55,
"learning_rate": 3.1780730790040025e-05,
"loss": 1.2221,
"step": 634000
},
{
"epoch": 2.55,
"learning_rate": 3.176635670387255e-05,
"loss": 1.2453,
"step": 634500
},
{
"epoch": 2.56,
"learning_rate": 3.175198261770508e-05,
"loss": 1.2455,
"step": 635000
},
{
"epoch": 2.56,
"learning_rate": 3.173760853153761e-05,
"loss": 1.2269,
"step": 635500
},
{
"epoch": 2.56,
"learning_rate": 3.1723263193542475e-05,
"loss": 1.2535,
"step": 636000
},
{
"epoch": 2.56,
"learning_rate": 3.1708889107375e-05,
"loss": 1.2515,
"step": 636500
},
{
"epoch": 2.56,
"learning_rate": 3.169451502120753e-05,
"loss": 1.239,
"step": 637000
},
{
"epoch": 2.57,
"learning_rate": 3.168014093504006e-05,
"loss": 1.2717,
"step": 637500
},
{
"epoch": 2.57,
"learning_rate": 3.1665766848872586e-05,
"loss": 1.2414,
"step": 638000
},
{
"epoch": 2.57,
"learning_rate": 3.165142151087745e-05,
"loss": 1.2811,
"step": 638500
},
{
"epoch": 2.57,
"learning_rate": 3.1637047424709974e-05,
"loss": 1.2393,
"step": 639000
},
{
"epoch": 2.57,
"learning_rate": 3.1622673338542505e-05,
"loss": 1.1999,
"step": 639500
},
{
"epoch": 2.58,
"learning_rate": 3.1608299252375036e-05,
"loss": 1.2494,
"step": 640000
},
{
"epoch": 2.58,
"learning_rate": 3.159392516620756e-05,
"loss": 1.247,
"step": 640500
},
{
"epoch": 2.58,
"learning_rate": 3.1579551080040085e-05,
"loss": 1.2276,
"step": 641000
},
{
"epoch": 2.58,
"learning_rate": 3.1565176993872616e-05,
"loss": 1.2768,
"step": 641500
},
{
"epoch": 2.58,
"learning_rate": 3.1550802907705146e-05,
"loss": 1.2675,
"step": 642000
},
{
"epoch": 2.59,
"learning_rate": 3.153642882153767e-05,
"loss": 1.269,
"step": 642500
},
{
"epoch": 2.59,
"learning_rate": 3.152211223171487e-05,
"loss": 1.2432,
"step": 643000
},
{
"epoch": 2.59,
"learning_rate": 3.15077381455474e-05,
"loss": 1.2798,
"step": 643500
},
{
"epoch": 2.59,
"learning_rate": 3.1493364059379924e-05,
"loss": 1.2195,
"step": 644000
},
{
"epoch": 2.59,
"learning_rate": 3.1478989973212455e-05,
"loss": 1.2509,
"step": 644500
},
{
"epoch": 2.6,
"learning_rate": 3.1464615887044986e-05,
"loss": 1.2541,
"step": 645000
},
{
"epoch": 2.6,
"learning_rate": 3.145024180087751e-05,
"loss": 1.2381,
"step": 645500
},
{
"epoch": 2.6,
"learning_rate": 3.143586771471004e-05,
"loss": 1.274,
"step": 646000
},
{
"epoch": 2.6,
"learning_rate": 3.1421493628542565e-05,
"loss": 1.2519,
"step": 646500
},
{
"epoch": 2.6,
"learning_rate": 3.140717703871976e-05,
"loss": 1.2487,
"step": 647000
},
{
"epoch": 2.61,
"learning_rate": 3.1392802952552294e-05,
"loss": 1.2223,
"step": 647500
},
{
"epoch": 2.61,
"learning_rate": 3.1378428866384825e-05,
"loss": 1.2576,
"step": 648000
},
{
"epoch": 2.61,
"learning_rate": 3.136405478021735e-05,
"loss": 1.2481,
"step": 648500
},
{
"epoch": 2.61,
"learning_rate": 3.1349680694049874e-05,
"loss": 1.236,
"step": 649000
},
{
"epoch": 2.61,
"learning_rate": 3.1335306607882405e-05,
"loss": 1.1997,
"step": 649500
},
{
"epoch": 2.62,
"learning_rate": 3.132096126988727e-05,
"loss": 1.2026,
"step": 650000
},
{
"epoch": 2.62,
"learning_rate": 3.13065871837198e-05,
"loss": 1.2536,
"step": 650500
},
{
"epoch": 2.62,
"learning_rate": 3.1292213097552324e-05,
"loss": 1.2429,
"step": 651000
},
{
"epoch": 2.62,
"learning_rate": 3.127783901138485e-05,
"loss": 1.2246,
"step": 651500
},
{
"epoch": 2.62,
"learning_rate": 3.126349367338971e-05,
"loss": 1.2687,
"step": 652000
},
{
"epoch": 2.63,
"learning_rate": 3.1249119587222244e-05,
"loss": 1.2562,
"step": 652500
},
{
"epoch": 2.63,
"learning_rate": 3.1234745501054775e-05,
"loss": 1.2347,
"step": 653000
},
{
"epoch": 2.63,
"learning_rate": 3.12203714148873e-05,
"loss": 1.2105,
"step": 653500
},
{
"epoch": 2.63,
"learning_rate": 3.120599732871983e-05,
"loss": 1.2172,
"step": 654000
},
{
"epoch": 2.63,
"learning_rate": 3.1191623242552354e-05,
"loss": 1.2471,
"step": 654500
},
{
"epoch": 2.64,
"learning_rate": 3.1177249156384885e-05,
"loss": 1.2841,
"step": 655000
},
{
"epoch": 2.64,
"learning_rate": 3.116290381838975e-05,
"loss": 1.2743,
"step": 655500
},
{
"epoch": 2.64,
"learning_rate": 3.1148529732222274e-05,
"loss": 1.2504,
"step": 656000
},
{
"epoch": 2.64,
"learning_rate": 3.1134155646054805e-05,
"loss": 1.2194,
"step": 656500
},
{
"epoch": 2.64,
"learning_rate": 3.1119781559887336e-05,
"loss": 1.2181,
"step": 657000
},
{
"epoch": 2.65,
"learning_rate": 3.110540747371986e-05,
"loss": 1.294,
"step": 657500
},
{
"epoch": 2.65,
"learning_rate": 3.1091033387552384e-05,
"loss": 1.248,
"step": 658000
},
{
"epoch": 2.65,
"learning_rate": 3.1076659301384915e-05,
"loss": 1.2075,
"step": 658500
},
{
"epoch": 2.65,
"learning_rate": 3.1062285215217446e-05,
"loss": 1.2646,
"step": 659000
},
{
"epoch": 2.65,
"learning_rate": 3.104791112904997e-05,
"loss": 1.2633,
"step": 659500
},
{
"epoch": 2.66,
"learning_rate": 3.10335370428825e-05,
"loss": 1.2457,
"step": 660000
},
{
"epoch": 2.66,
"learning_rate": 3.1019191704887366e-05,
"loss": 1.224,
"step": 660500
},
{
"epoch": 2.66,
"learning_rate": 3.100481761871989e-05,
"loss": 1.2108,
"step": 661000
},
{
"epoch": 2.66,
"learning_rate": 3.099044353255242e-05,
"loss": 1.2735,
"step": 661500
},
{
"epoch": 2.66,
"learning_rate": 3.0976069446384945e-05,
"loss": 1.2517,
"step": 662000
},
{
"epoch": 2.67,
"learning_rate": 3.096172410838981e-05,
"loss": 1.2133,
"step": 662500
},
{
"epoch": 2.67,
"learning_rate": 3.094735002222234e-05,
"loss": 1.2374,
"step": 663000
},
{
"epoch": 2.67,
"learning_rate": 3.09330046842272e-05,
"loss": 1.2218,
"step": 663500
},
{
"epoch": 2.67,
"learning_rate": 3.091863059805973e-05,
"loss": 1.2535,
"step": 664000
},
{
"epoch": 2.67,
"learning_rate": 3.090425651189226e-05,
"loss": 1.2373,
"step": 664500
},
{
"epoch": 2.68,
"learning_rate": 3.0889882425724785e-05,
"loss": 1.2154,
"step": 665000
},
{
"epoch": 2.68,
"learning_rate": 3.087553708772965e-05,
"loss": 1.2471,
"step": 665500
},
{
"epoch": 2.68,
"learning_rate": 3.0861163001562173e-05,
"loss": 1.1906,
"step": 666000
},
{
"epoch": 2.68,
"learning_rate": 3.0846788915394704e-05,
"loss": 1.2542,
"step": 666500
},
{
"epoch": 2.68,
"learning_rate": 3.0832414829227235e-05,
"loss": 1.2244,
"step": 667000
},
{
"epoch": 2.69,
"learning_rate": 3.081804074305976e-05,
"loss": 1.2348,
"step": 667500
},
{
"epoch": 2.69,
"learning_rate": 3.080366665689229e-05,
"loss": 1.2351,
"step": 668000
},
{
"epoch": 2.69,
"learning_rate": 3.078929257072482e-05,
"loss": 1.2391,
"step": 668500
},
{
"epoch": 2.69,
"learning_rate": 3.0774918484557346e-05,
"loss": 1.2348,
"step": 669000
},
{
"epoch": 2.69,
"learning_rate": 3.0760544398389877e-05,
"loss": 1.234,
"step": 669500
},
{
"epoch": 2.7,
"learning_rate": 3.07461703122224e-05,
"loss": 1.2389,
"step": 670000
},
{
"epoch": 2.7,
"learning_rate": 3.073179622605493e-05,
"loss": 1.2821,
"step": 670500
},
{
"epoch": 2.7,
"learning_rate": 3.0717422139887456e-05,
"loss": 1.2589,
"step": 671000
},
{
"epoch": 2.7,
"learning_rate": 3.070307680189232e-05,
"loss": 1.2413,
"step": 671500
},
{
"epoch": 2.7,
"learning_rate": 3.068870271572485e-05,
"loss": 1.2379,
"step": 672000
},
{
"epoch": 2.71,
"learning_rate": 3.0674328629557376e-05,
"loss": 1.2484,
"step": 672500
},
{
"epoch": 2.71,
"learning_rate": 3.0659954543389907e-05,
"loss": 1.2199,
"step": 673000
},
{
"epoch": 2.71,
"learning_rate": 3.064560920539477e-05,
"loss": 1.2932,
"step": 673500
},
{
"epoch": 2.71,
"learning_rate": 3.0631235119227295e-05,
"loss": 1.3011,
"step": 674000
},
{
"epoch": 2.71,
"learning_rate": 3.0616861033059826e-05,
"loss": 1.2379,
"step": 674500
},
{
"epoch": 2.72,
"learning_rate": 3.060248694689235e-05,
"loss": 1.2357,
"step": 675000
},
{
"epoch": 2.72,
"learning_rate": 3.0588141608897215e-05,
"loss": 1.2198,
"step": 675500
},
{
"epoch": 2.72,
"learning_rate": 3.0573767522729746e-05,
"loss": 1.2762,
"step": 676000
},
{
"epoch": 2.72,
"learning_rate": 3.055939343656227e-05,
"loss": 1.2504,
"step": 676500
},
{
"epoch": 2.72,
"learning_rate": 3.05450193503948e-05,
"loss": 1.2453,
"step": 677000
},
{
"epoch": 2.73,
"learning_rate": 3.0530645264227325e-05,
"loss": 1.2447,
"step": 677500
},
{
"epoch": 2.73,
"learning_rate": 3.051629992623219e-05,
"loss": 1.213,
"step": 678000
},
{
"epoch": 2.73,
"learning_rate": 3.050192584006472e-05,
"loss": 1.2488,
"step": 678500
},
{
"epoch": 2.73,
"learning_rate": 3.048755175389725e-05,
"loss": 1.1872,
"step": 679000
},
{
"epoch": 2.73,
"learning_rate": 3.0473177667729773e-05,
"loss": 1.2731,
"step": 679500
},
{
"epoch": 2.74,
"learning_rate": 3.0458832329734637e-05,
"loss": 1.2094,
"step": 680000
},
{
"epoch": 2.74,
"learning_rate": 3.0444458243567165e-05,
"loss": 1.2595,
"step": 680500
},
{
"epoch": 2.74,
"learning_rate": 3.0430084157399696e-05,
"loss": 1.2945,
"step": 681000
},
{
"epoch": 2.74,
"learning_rate": 3.0415710071232223e-05,
"loss": 1.2121,
"step": 681500
},
{
"epoch": 2.74,
"learning_rate": 3.040133598506475e-05,
"loss": 1.2368,
"step": 682000
},
{
"epoch": 2.75,
"learning_rate": 3.0386961898897282e-05,
"loss": 1.22,
"step": 682500
},
{
"epoch": 2.75,
"learning_rate": 3.037261656090214e-05,
"loss": 1.2889,
"step": 683000
},
{
"epoch": 2.75,
"learning_rate": 3.035824247473467e-05,
"loss": 1.2652,
"step": 683500
},
{
"epoch": 2.75,
"learning_rate": 3.0343868388567198e-05,
"loss": 1.2319,
"step": 684000
},
{
"epoch": 2.75,
"learning_rate": 3.0329494302399726e-05,
"loss": 1.2568,
"step": 684500
},
{
"epoch": 2.76,
"learning_rate": 3.0315120216232257e-05,
"loss": 1.2336,
"step": 685000
},
{
"epoch": 2.76,
"learning_rate": 3.0300774878237114e-05,
"loss": 1.3042,
"step": 685500
},
{
"epoch": 2.76,
"learning_rate": 3.0286400792069645e-05,
"loss": 1.2508,
"step": 686000
},
{
"epoch": 2.76,
"learning_rate": 3.0272026705902173e-05,
"loss": 1.2772,
"step": 686500
},
{
"epoch": 2.76,
"learning_rate": 3.02576526197347e-05,
"loss": 1.2556,
"step": 687000
},
{
"epoch": 2.77,
"learning_rate": 3.024327853356723e-05,
"loss": 1.2766,
"step": 687500
},
{
"epoch": 2.77,
"learning_rate": 3.022890444739976e-05,
"loss": 1.2361,
"step": 688000
},
{
"epoch": 2.77,
"learning_rate": 3.0214530361232287e-05,
"loss": 1.2118,
"step": 688500
},
{
"epoch": 2.77,
"learning_rate": 3.0200185023237148e-05,
"loss": 1.2504,
"step": 689000
},
{
"epoch": 2.78,
"learning_rate": 3.0185810937069675e-05,
"loss": 1.2209,
"step": 689500
},
{
"epoch": 2.78,
"learning_rate": 3.0171436850902206e-05,
"loss": 1.2519,
"step": 690000
},
{
"epoch": 2.78,
"learning_rate": 3.0157062764734734e-05,
"loss": 1.2479,
"step": 690500
},
{
"epoch": 2.78,
"learning_rate": 3.014268867856726e-05,
"loss": 1.2803,
"step": 691000
},
{
"epoch": 2.78,
"learning_rate": 3.0128314592399792e-05,
"loss": 1.2125,
"step": 691500
},
{
"epoch": 2.79,
"learning_rate": 3.011396925440465e-05,
"loss": 1.3017,
"step": 692000
},
{
"epoch": 2.79,
"learning_rate": 3.009959516823718e-05,
"loss": 1.2246,
"step": 692500
},
{
"epoch": 2.79,
"learning_rate": 3.008522108206971e-05,
"loss": 1.2735,
"step": 693000
},
{
"epoch": 2.79,
"learning_rate": 3.0070846995902236e-05,
"loss": 1.2631,
"step": 693500
},
{
"epoch": 2.79,
"learning_rate": 3.0056472909734767e-05,
"loss": 1.1929,
"step": 694000
},
{
"epoch": 2.8,
"learning_rate": 3.0042098823567295e-05,
"loss": 1.1861,
"step": 694500
},
{
"epoch": 2.8,
"learning_rate": 3.002772473739982e-05,
"loss": 1.2358,
"step": 695000
},
{
"epoch": 2.8,
"learning_rate": 3.0013350651232347e-05,
"loss": 1.2334,
"step": 695500
},
{
"epoch": 2.8,
"learning_rate": 2.999900531323721e-05,
"loss": 1.1998,
"step": 696000
},
{
"epoch": 2.8,
"learning_rate": 2.9984631227069742e-05,
"loss": 1.2429,
"step": 696500
},
{
"epoch": 2.81,
"learning_rate": 2.997025714090227e-05,
"loss": 1.2357,
"step": 697000
},
{
"epoch": 2.81,
"learning_rate": 2.9955883054734797e-05,
"loss": 1.2471,
"step": 697500
},
{
"epoch": 2.81,
"learning_rate": 2.994153771673966e-05,
"loss": 1.2215,
"step": 698000
},
{
"epoch": 2.81,
"learning_rate": 2.9927192378744523e-05,
"loss": 1.219,
"step": 698500
},
{
"epoch": 2.81,
"learning_rate": 2.991281829257705e-05,
"loss": 1.23,
"step": 699000
},
{
"epoch": 2.82,
"learning_rate": 2.989844420640958e-05,
"loss": 1.2233,
"step": 699500
},
{
"epoch": 2.82,
"learning_rate": 2.988407012024211e-05,
"loss": 1.2892,
"step": 700000
},
{
"epoch": 2.82,
"learning_rate": 2.9869696034074633e-05,
"loss": 1.2555,
"step": 700500
},
{
"epoch": 2.82,
"learning_rate": 2.9855350696079498e-05,
"loss": 1.2268,
"step": 701000
},
{
"epoch": 2.82,
"learning_rate": 2.9841005358084362e-05,
"loss": 1.2577,
"step": 701500
},
{
"epoch": 2.83,
"learning_rate": 2.982663127191689e-05,
"loss": 1.2226,
"step": 702000
},
{
"epoch": 2.83,
"learning_rate": 2.9812257185749414e-05,
"loss": 1.2568,
"step": 702500
},
{
"epoch": 2.83,
"learning_rate": 2.979788309958195e-05,
"loss": 1.2055,
"step": 703000
},
{
"epoch": 2.83,
"learning_rate": 2.9783509013414473e-05,
"loss": 1.2711,
"step": 703500
},
{
"epoch": 2.83,
"learning_rate": 2.9769134927247e-05,
"loss": 1.2103,
"step": 704000
},
{
"epoch": 2.84,
"learning_rate": 2.975476084107953e-05,
"loss": 1.2523,
"step": 704500
},
{
"epoch": 2.84,
"learning_rate": 2.974041550308439e-05,
"loss": 1.2298,
"step": 705000
},
{
"epoch": 2.84,
"learning_rate": 2.9726041416916923e-05,
"loss": 1.2213,
"step": 705500
},
{
"epoch": 2.84,
"learning_rate": 2.9711667330749447e-05,
"loss": 1.2054,
"step": 706000
},
{
"epoch": 2.84,
"learning_rate": 2.9697293244581975e-05,
"loss": 1.2266,
"step": 706500
},
{
"epoch": 2.85,
"learning_rate": 2.9682919158414506e-05,
"loss": 1.2166,
"step": 707000
},
{
"epoch": 2.85,
"learning_rate": 2.9668545072247034e-05,
"loss": 1.2413,
"step": 707500
},
{
"epoch": 2.85,
"learning_rate": 2.965417098607956e-05,
"loss": 1.2661,
"step": 708000
},
{
"epoch": 2.85,
"learning_rate": 2.9639796899912085e-05,
"loss": 1.243,
"step": 708500
},
{
"epoch": 2.85,
"learning_rate": 2.962542281374462e-05,
"loss": 1.2561,
"step": 709000
},
{
"epoch": 2.86,
"learning_rate": 2.961107747574948e-05,
"loss": 1.2337,
"step": 709500
},
{
"epoch": 2.86,
"learning_rate": 2.959670338958201e-05,
"loss": 1.2169,
"step": 710000
},
{
"epoch": 2.86,
"learning_rate": 2.9582329303414536e-05,
"loss": 1.229,
"step": 710500
},
{
"epoch": 2.86,
"learning_rate": 2.9567955217247067e-05,
"loss": 1.2684,
"step": 711000
},
{
"epoch": 2.86,
"learning_rate": 2.9553581131079595e-05,
"loss": 1.2043,
"step": 711500
},
{
"epoch": 2.87,
"learning_rate": 2.9539207044912122e-05,
"loss": 1.2192,
"step": 712000
},
{
"epoch": 2.87,
"learning_rate": 2.9524861706916983e-05,
"loss": 1.2479,
"step": 712500
},
{
"epoch": 2.87,
"learning_rate": 2.951048762074951e-05,
"loss": 1.1925,
"step": 713000
},
{
"epoch": 2.87,
"learning_rate": 2.9496113534582042e-05,
"loss": 1.2559,
"step": 713500
},
{
"epoch": 2.87,
"learning_rate": 2.948173944841457e-05,
"loss": 1.2398,
"step": 714000
},
{
"epoch": 2.88,
"learning_rate": 2.9467365362247097e-05,
"loss": 1.2269,
"step": 714500
},
{
"epoch": 2.88,
"learning_rate": 2.945299127607962e-05,
"loss": 1.2711,
"step": 715000
},
{
"epoch": 2.88,
"learning_rate": 2.9438617189912155e-05,
"loss": 1.2233,
"step": 715500
},
{
"epoch": 2.88,
"learning_rate": 2.942424310374468e-05,
"loss": 1.2588,
"step": 716000
},
{
"epoch": 2.88,
"learning_rate": 2.9409897765749544e-05,
"loss": 1.2265,
"step": 716500
},
{
"epoch": 2.89,
"learning_rate": 2.9395523679582072e-05,
"loss": 1.23,
"step": 717000
},
{
"epoch": 2.89,
"learning_rate": 2.9381149593414596e-05,
"loss": 1.1803,
"step": 717500
},
{
"epoch": 2.89,
"learning_rate": 2.936677550724713e-05,
"loss": 1.2335,
"step": 718000
},
{
"epoch": 2.89,
"learning_rate": 2.9352430169251995e-05,
"loss": 1.2416,
"step": 718500
},
{
"epoch": 2.89,
"learning_rate": 2.933805608308452e-05,
"loss": 1.2309,
"step": 719000
},
{
"epoch": 2.9,
"learning_rate": 2.9323681996917047e-05,
"loss": 1.2603,
"step": 719500
},
{
"epoch": 2.9,
"learning_rate": 2.9309307910749578e-05,
"loss": 1.2577,
"step": 720000
},
{
"epoch": 2.9,
"learning_rate": 2.9294933824582105e-05,
"loss": 1.2358,
"step": 720500
},
{
"epoch": 2.9,
"learning_rate": 2.9280559738414633e-05,
"loss": 1.2698,
"step": 721000
},
{
"epoch": 2.9,
"learning_rate": 2.9266185652247157e-05,
"loss": 1.2195,
"step": 721500
},
{
"epoch": 2.91,
"learning_rate": 2.925181156607969e-05,
"loss": 1.2116,
"step": 722000
},
{
"epoch": 2.91,
"learning_rate": 2.9237466228084552e-05,
"loss": 1.3009,
"step": 722500
},
{
"epoch": 2.91,
"learning_rate": 2.922309214191708e-05,
"loss": 1.2121,
"step": 723000
},
{
"epoch": 2.91,
"learning_rate": 2.9208718055749608e-05,
"loss": 1.2387,
"step": 723500
},
{
"epoch": 2.91,
"learning_rate": 2.9194343969582132e-05,
"loss": 1.2452,
"step": 724000
},
{
"epoch": 2.92,
"learning_rate": 2.9179998631586996e-05,
"loss": 1.2374,
"step": 724500
},
{
"epoch": 2.92,
"learning_rate": 2.9165624545419527e-05,
"loss": 1.2658,
"step": 725000
},
{
"epoch": 2.92,
"learning_rate": 2.9151250459252055e-05,
"loss": 1.205,
"step": 725500
},
{
"epoch": 2.92,
"learning_rate": 2.913690512125692e-05,
"loss": 1.2435,
"step": 726000
},
{
"epoch": 2.92,
"learning_rate": 2.9122531035089447e-05,
"loss": 1.2219,
"step": 726500
},
{
"epoch": 2.93,
"learning_rate": 2.910815694892197e-05,
"loss": 1.2025,
"step": 727000
},
{
"epoch": 2.93,
"learning_rate": 2.9093782862754506e-05,
"loss": 1.2128,
"step": 727500
},
{
"epoch": 2.93,
"learning_rate": 2.9079437524759367e-05,
"loss": 1.2164,
"step": 728000
},
{
"epoch": 2.93,
"learning_rate": 2.9065063438591894e-05,
"loss": 1.2084,
"step": 728500
},
{
"epoch": 2.93,
"learning_rate": 2.9050689352424422e-05,
"loss": 1.2484,
"step": 729000
},
{
"epoch": 2.94,
"learning_rate": 2.9036315266256946e-05,
"loss": 1.2232,
"step": 729500
},
{
"epoch": 2.94,
"learning_rate": 2.902194118008948e-05,
"loss": 1.1881,
"step": 730000
},
{
"epoch": 2.94,
"learning_rate": 2.9007567093922005e-05,
"loss": 1.2966,
"step": 730500
},
{
"epoch": 2.94,
"learning_rate": 2.899322175592687e-05,
"loss": 1.2549,
"step": 731000
},
{
"epoch": 2.94,
"learning_rate": 2.8978847669759397e-05,
"loss": 1.2204,
"step": 731500
},
{
"epoch": 2.95,
"learning_rate": 2.896447358359192e-05,
"loss": 1.2822,
"step": 732000
},
{
"epoch": 2.95,
"learning_rate": 2.8950099497424455e-05,
"loss": 1.1844,
"step": 732500
},
{
"epoch": 2.95,
"learning_rate": 2.893572541125698e-05,
"loss": 1.2724,
"step": 733000
},
{
"epoch": 2.95,
"learning_rate": 2.8921351325089507e-05,
"loss": 1.2292,
"step": 733500
},
{
"epoch": 2.95,
"learning_rate": 2.8906977238922038e-05,
"loss": 1.2345,
"step": 734000
},
{
"epoch": 2.96,
"learning_rate": 2.8892603152754565e-05,
"loss": 1.1832,
"step": 734500
},
{
"epoch": 2.96,
"learning_rate": 2.887825781475943e-05,
"loss": 1.2625,
"step": 735000
},
{
"epoch": 2.96,
"learning_rate": 2.8863883728591958e-05,
"loss": 1.2347,
"step": 735500
},
{
"epoch": 2.96,
"learning_rate": 2.8849509642424482e-05,
"loss": 1.237,
"step": 736000
},
{
"epoch": 2.96,
"learning_rate": 2.8835135556257016e-05,
"loss": 1.249,
"step": 736500
},
{
"epoch": 2.97,
"learning_rate": 2.8820790218261874e-05,
"loss": 1.2297,
"step": 737000
},
{
"epoch": 2.97,
"learning_rate": 2.8806416132094405e-05,
"loss": 1.2588,
"step": 737500
},
{
"epoch": 2.97,
"learning_rate": 2.8792042045926932e-05,
"loss": 1.2088,
"step": 738000
},
{
"epoch": 2.97,
"learning_rate": 2.8777667959759457e-05,
"loss": 1.2394,
"step": 738500
},
{
"epoch": 2.97,
"learning_rate": 2.876329387359199e-05,
"loss": 1.2174,
"step": 739000
},
{
"epoch": 2.98,
"learning_rate": 2.8748919787424515e-05,
"loss": 1.2442,
"step": 739500
},
{
"epoch": 2.98,
"learning_rate": 2.8734545701257043e-05,
"loss": 1.245,
"step": 740000
},
{
"epoch": 2.98,
"learning_rate": 2.872017161508957e-05,
"loss": 1.2265,
"step": 740500
},
{
"epoch": 2.98,
"learning_rate": 2.870582627709443e-05,
"loss": 1.2302,
"step": 741000
},
{
"epoch": 2.98,
"learning_rate": 2.8691452190926966e-05,
"loss": 1.233,
"step": 741500
},
{
"epoch": 2.99,
"learning_rate": 2.867707810475949e-05,
"loss": 1.2119,
"step": 742000
},
{
"epoch": 2.99,
"learning_rate": 2.8662704018592018e-05,
"loss": 1.2313,
"step": 742500
},
{
"epoch": 2.99,
"learning_rate": 2.864832993242455e-05,
"loss": 1.2186,
"step": 743000
},
{
"epoch": 2.99,
"learning_rate": 2.8633955846257076e-05,
"loss": 1.2117,
"step": 743500
},
{
"epoch": 2.99,
"learning_rate": 2.8619581760089604e-05,
"loss": 1.2814,
"step": 744000
},
{
"epoch": 3.0,
"learning_rate": 2.860520767392213e-05,
"loss": 1.2375,
"step": 744500
},
{
"epoch": 3.0,
"learning_rate": 2.8590862335926992e-05,
"loss": 1.237,
"step": 745000
},
{
"epoch": 3.0,
"eval_cer": 0.2424137811448767,
"eval_loss": 0.976024329662323,
"eval_runtime": 10946.8543,
"eval_samples_per_second": 8.929,
"eval_steps_per_second": 1.116,
"step": 745389
},
{
"epoch": 3.0,
"learning_rate": 2.8576488249759527e-05,
"loss": 1.2107,
"step": 745500
},
{
"epoch": 3.0,
"learning_rate": 2.856211416359205e-05,
"loss": 1.1992,
"step": 746000
},
{
"epoch": 3.0,
"learning_rate": 2.854774007742458e-05,
"loss": 1.1948,
"step": 746500
},
{
"epoch": 3.01,
"learning_rate": 2.8533365991257106e-05,
"loss": 1.1713,
"step": 747000
},
{
"epoch": 3.01,
"learning_rate": 2.8519020653261967e-05,
"loss": 1.1483,
"step": 747500
},
{
"epoch": 3.01,
"learning_rate": 2.85046465670945e-05,
"loss": 1.2257,
"step": 748000
},
{
"epoch": 3.01,
"learning_rate": 2.849030122909936e-05,
"loss": 1.1686,
"step": 748500
},
{
"epoch": 3.01,
"learning_rate": 2.847592714293189e-05,
"loss": 1.1807,
"step": 749000
},
{
"epoch": 3.02,
"learning_rate": 2.8461553056764418e-05,
"loss": 1.1587,
"step": 749500
},
{
"epoch": 3.02,
"learning_rate": 2.8447178970596946e-05,
"loss": 1.2438,
"step": 750000
},
{
"epoch": 3.02,
"learning_rate": 2.8432833632601807e-05,
"loss": 1.1855,
"step": 750500
},
{
"epoch": 3.02,
"learning_rate": 2.841845954643434e-05,
"loss": 1.1944,
"step": 751000
},
{
"epoch": 3.02,
"learning_rate": 2.84041142084392e-05,
"loss": 1.1879,
"step": 751500
},
{
"epoch": 3.03,
"learning_rate": 2.838974012227173e-05,
"loss": 1.1839,
"step": 752000
},
{
"epoch": 3.03,
"learning_rate": 2.8375366036104257e-05,
"loss": 1.2428,
"step": 752500
},
{
"epoch": 3.03,
"learning_rate": 2.836099194993678e-05,
"loss": 1.2279,
"step": 753000
},
{
"epoch": 3.03,
"learning_rate": 2.8346617863769316e-05,
"loss": 1.2226,
"step": 753500
},
{
"epoch": 3.03,
"learning_rate": 2.833224377760184e-05,
"loss": 1.2016,
"step": 754000
},
{
"epoch": 3.04,
"learning_rate": 2.8317869691434368e-05,
"loss": 1.1895,
"step": 754500
},
{
"epoch": 3.04,
"learning_rate": 2.8303495605266895e-05,
"loss": 1.2318,
"step": 755000
},
{
"epoch": 3.04,
"learning_rate": 2.8289121519099426e-05,
"loss": 1.2281,
"step": 755500
},
{
"epoch": 3.04,
"learning_rate": 2.8274747432931954e-05,
"loss": 1.206,
"step": 756000
},
{
"epoch": 3.04,
"learning_rate": 2.8260373346764478e-05,
"loss": 1.1901,
"step": 756500
},
{
"epoch": 3.05,
"learning_rate": 2.8245999260597012e-05,
"loss": 1.1949,
"step": 757000
},
{
"epoch": 3.05,
"learning_rate": 2.823165392260187e-05,
"loss": 1.1934,
"step": 757500
},
{
"epoch": 3.05,
"learning_rate": 2.82172798364344e-05,
"loss": 1.1726,
"step": 758000
},
{
"epoch": 3.05,
"learning_rate": 2.820290575026693e-05,
"loss": 1.1841,
"step": 758500
},
{
"epoch": 3.05,
"learning_rate": 2.8188531664099456e-05,
"loss": 1.1345,
"step": 759000
},
{
"epoch": 3.06,
"learning_rate": 2.8174186326104317e-05,
"loss": 1.1785,
"step": 759500
},
{
"epoch": 3.06,
"learning_rate": 2.8159812239936845e-05,
"loss": 1.2117,
"step": 760000
},
{
"epoch": 3.06,
"learning_rate": 2.8145438153769376e-05,
"loss": 1.2459,
"step": 760500
},
{
"epoch": 3.06,
"learning_rate": 2.813109281577424e-05,
"loss": 1.2031,
"step": 761000
},
{
"epoch": 3.06,
"learning_rate": 2.8116718729606768e-05,
"loss": 1.1813,
"step": 761500
},
{
"epoch": 3.07,
"learning_rate": 2.8102344643439292e-05,
"loss": 1.1901,
"step": 762000
},
{
"epoch": 3.07,
"learning_rate": 2.8087970557271827e-05,
"loss": 1.1793,
"step": 762500
},
{
"epoch": 3.07,
"learning_rate": 2.807359647110435e-05,
"loss": 1.2231,
"step": 763000
},
{
"epoch": 3.07,
"learning_rate": 2.8059222384936878e-05,
"loss": 1.1651,
"step": 763500
},
{
"epoch": 3.07,
"learning_rate": 2.8044848298769406e-05,
"loss": 1.1767,
"step": 764000
},
{
"epoch": 3.08,
"learning_rate": 2.8030474212601937e-05,
"loss": 1.1947,
"step": 764500
},
{
"epoch": 3.08,
"learning_rate": 2.801615762277913e-05,
"loss": 1.2363,
"step": 765000
},
{
"epoch": 3.08,
"learning_rate": 2.800178353661166e-05,
"loss": 1.2033,
"step": 765500
},
{
"epoch": 3.08,
"learning_rate": 2.798740945044419e-05,
"loss": 1.1903,
"step": 766000
},
{
"epoch": 3.08,
"learning_rate": 2.7973035364276718e-05,
"loss": 1.1984,
"step": 766500
},
{
"epoch": 3.09,
"learning_rate": 2.7958690026281582e-05,
"loss": 1.2454,
"step": 767000
},
{
"epoch": 3.09,
"learning_rate": 2.7944315940114106e-05,
"loss": 1.1979,
"step": 767500
},
{
"epoch": 3.09,
"learning_rate": 2.7929941853946634e-05,
"loss": 1.1888,
"step": 768000
},
{
"epoch": 3.09,
"learning_rate": 2.7915567767779165e-05,
"loss": 1.1791,
"step": 768500
},
{
"epoch": 3.1,
"learning_rate": 2.7901193681611693e-05,
"loss": 1.2505,
"step": 769000
},
{
"epoch": 3.1,
"learning_rate": 2.7886848343616557e-05,
"loss": 1.2186,
"step": 769500
},
{
"epoch": 3.1,
"learning_rate": 2.787247425744908e-05,
"loss": 1.1915,
"step": 770000
},
{
"epoch": 3.1,
"learning_rate": 2.7858100171281616e-05,
"loss": 1.2007,
"step": 770500
},
{
"epoch": 3.1,
"learning_rate": 2.784372608511414e-05,
"loss": 1.1768,
"step": 771000
},
{
"epoch": 3.11,
"learning_rate": 2.7829380747119004e-05,
"loss": 1.1771,
"step": 771500
},
{
"epoch": 3.11,
"learning_rate": 2.7815006660951532e-05,
"loss": 1.165,
"step": 772000
},
{
"epoch": 3.11,
"learning_rate": 2.780063257478406e-05,
"loss": 1.2091,
"step": 772500
},
{
"epoch": 3.11,
"learning_rate": 2.778625848861659e-05,
"loss": 1.205,
"step": 773000
},
{
"epoch": 3.11,
"learning_rate": 2.7771884402449118e-05,
"loss": 1.1734,
"step": 773500
},
{
"epoch": 3.12,
"learning_rate": 2.775753906445398e-05,
"loss": 1.2123,
"step": 774000
},
{
"epoch": 3.12,
"learning_rate": 2.7743164978286507e-05,
"loss": 1.2031,
"step": 774500
},
{
"epoch": 3.12,
"learning_rate": 2.7728790892119034e-05,
"loss": 1.208,
"step": 775000
},
{
"epoch": 3.12,
"learning_rate": 2.7714416805951565e-05,
"loss": 1.2273,
"step": 775500
},
{
"epoch": 3.12,
"learning_rate": 2.7700042719784093e-05,
"loss": 1.1497,
"step": 776000
},
{
"epoch": 3.13,
"learning_rate": 2.7685668633616617e-05,
"loss": 1.2239,
"step": 776500
},
{
"epoch": 3.13,
"learning_rate": 2.767132329562148e-05,
"loss": 1.1874,
"step": 777000
},
{
"epoch": 3.13,
"learning_rate": 2.765694920945401e-05,
"loss": 1.168,
"step": 777500
},
{
"epoch": 3.13,
"learning_rate": 2.764257512328654e-05,
"loss": 1.2032,
"step": 778000
},
{
"epoch": 3.13,
"learning_rate": 2.7628201037119068e-05,
"loss": 1.1673,
"step": 778500
},
{
"epoch": 3.14,
"learning_rate": 2.761385569912393e-05,
"loss": 1.2331,
"step": 779000
},
{
"epoch": 3.14,
"learning_rate": 2.7599481612956456e-05,
"loss": 1.2009,
"step": 779500
},
{
"epoch": 3.14,
"learning_rate": 2.7585107526788984e-05,
"loss": 1.1931,
"step": 780000
},
{
"epoch": 3.14,
"learning_rate": 2.7570733440621515e-05,
"loss": 1.185,
"step": 780500
},
{
"epoch": 3.14,
"learning_rate": 2.7556359354454043e-05,
"loss": 1.2052,
"step": 781000
},
{
"epoch": 3.15,
"learning_rate": 2.7542014016458907e-05,
"loss": 1.1839,
"step": 781500
},
{
"epoch": 3.15,
"learning_rate": 2.752763993029143e-05,
"loss": 1.2023,
"step": 782000
},
{
"epoch": 3.15,
"learning_rate": 2.751326584412396e-05,
"loss": 1.2242,
"step": 782500
},
{
"epoch": 3.15,
"learning_rate": 2.749889175795649e-05,
"loss": 1.1282,
"step": 783000
},
{
"epoch": 3.15,
"learning_rate": 2.7484546419961354e-05,
"loss": 1.1759,
"step": 783500
},
{
"epoch": 3.16,
"learning_rate": 2.7470172333793882e-05,
"loss": 1.1955,
"step": 784000
},
{
"epoch": 3.16,
"learning_rate": 2.7455798247626406e-05,
"loss": 1.1752,
"step": 784500
},
{
"epoch": 3.16,
"learning_rate": 2.7441424161458934e-05,
"loss": 1.2358,
"step": 785000
},
{
"epoch": 3.16,
"learning_rate": 2.7427050075291465e-05,
"loss": 1.1965,
"step": 785500
},
{
"epoch": 3.16,
"learning_rate": 2.7412675989123992e-05,
"loss": 1.1938,
"step": 786000
},
{
"epoch": 3.17,
"learning_rate": 2.739830190295652e-05,
"loss": 1.1775,
"step": 786500
},
{
"epoch": 3.17,
"learning_rate": 2.7383956564961384e-05,
"loss": 1.2015,
"step": 787000
},
{
"epoch": 3.17,
"learning_rate": 2.736958247879391e-05,
"loss": 1.1769,
"step": 787500
},
{
"epoch": 3.17,
"learning_rate": 2.7355208392626443e-05,
"loss": 1.1922,
"step": 788000
},
{
"epoch": 3.17,
"learning_rate": 2.7340834306458967e-05,
"loss": 1.1821,
"step": 788500
},
{
"epoch": 3.18,
"learning_rate": 2.7326460220291495e-05,
"loss": 1.2219,
"step": 789000
},
{
"epoch": 3.18,
"learning_rate": 2.731211488229636e-05,
"loss": 1.1915,
"step": 789500
},
{
"epoch": 3.18,
"learning_rate": 2.729774079612889e-05,
"loss": 1.1633,
"step": 790000
},
{
"epoch": 3.18,
"learning_rate": 2.7283366709961418e-05,
"loss": 1.194,
"step": 790500
},
{
"epoch": 3.18,
"learning_rate": 2.7268992623793942e-05,
"loss": 1.2103,
"step": 791000
},
{
"epoch": 3.19,
"learning_rate": 2.725461853762647e-05,
"loss": 1.1906,
"step": 791500
},
{
"epoch": 3.19,
"learning_rate": 2.7240273199631334e-05,
"loss": 1.1998,
"step": 792000
},
{
"epoch": 3.19,
"learning_rate": 2.7225899113463865e-05,
"loss": 1.2401,
"step": 792500
},
{
"epoch": 3.19,
"learning_rate": 2.7211525027296393e-05,
"loss": 1.1735,
"step": 793000
},
{
"epoch": 3.19,
"learning_rate": 2.7197150941128917e-05,
"loss": 1.1767,
"step": 793500
},
{
"epoch": 3.2,
"learning_rate": 2.7182776854961444e-05,
"loss": 1.2027,
"step": 794000
},
{
"epoch": 3.2,
"learning_rate": 2.7168402768793975e-05,
"loss": 1.1756,
"step": 794500
},
{
"epoch": 3.2,
"learning_rate": 2.7154028682626503e-05,
"loss": 1.2002,
"step": 795000
},
{
"epoch": 3.2,
"learning_rate": 2.713965459645903e-05,
"loss": 1.2255,
"step": 795500
},
{
"epoch": 3.2,
"learning_rate": 2.7125309258463895e-05,
"loss": 1.2232,
"step": 796000
},
{
"epoch": 3.21,
"learning_rate": 2.711093517229642e-05,
"loss": 1.1852,
"step": 796500
},
{
"epoch": 3.21,
"learning_rate": 2.7096561086128954e-05,
"loss": 1.1968,
"step": 797000
},
{
"epoch": 3.21,
"learning_rate": 2.7082215748133815e-05,
"loss": 1.2214,
"step": 797500
},
{
"epoch": 3.21,
"learning_rate": 2.7067841661966342e-05,
"loss": 1.1843,
"step": 798000
},
{
"epoch": 3.21,
"learning_rate": 2.705346757579887e-05,
"loss": 1.1879,
"step": 798500
},
{
"epoch": 3.22,
"learning_rate": 2.7039093489631394e-05,
"loss": 1.1862,
"step": 799000
},
{
"epoch": 3.22,
"learning_rate": 2.702471940346393e-05,
"loss": 1.1976,
"step": 799500
},
{
"epoch": 3.22,
"learning_rate": 2.7010345317296453e-05,
"loss": 1.2242,
"step": 800000
},
{
"epoch": 3.22,
"learning_rate": 2.699597123112898e-05,
"loss": 1.2212,
"step": 800500
},
{
"epoch": 3.22,
"learning_rate": 2.698159714496151e-05,
"loss": 1.1921,
"step": 801000
},
{
"epoch": 3.23,
"learning_rate": 2.6967251806966376e-05,
"loss": 1.1946,
"step": 801500
},
{
"epoch": 3.23,
"learning_rate": 2.6952877720798903e-05,
"loss": 1.1886,
"step": 802000
},
{
"epoch": 3.23,
"learning_rate": 2.6938503634631427e-05,
"loss": 1.2393,
"step": 802500
},
{
"epoch": 3.23,
"learning_rate": 2.6924129548463955e-05,
"loss": 1.1932,
"step": 803000
},
{
"epoch": 3.23,
"learning_rate": 2.6909755462296486e-05,
"loss": 1.1931,
"step": 803500
},
{
"epoch": 3.24,
"learning_rate": 2.689541012430135e-05,
"loss": 1.1557,
"step": 804000
},
{
"epoch": 3.24,
"learning_rate": 2.6881036038133878e-05,
"loss": 1.203,
"step": 804500
},
{
"epoch": 3.24,
"learning_rate": 2.6866661951966406e-05,
"loss": 1.1944,
"step": 805000
},
{
"epoch": 3.24,
"learning_rate": 2.685228786579893e-05,
"loss": 1.2017,
"step": 805500
},
{
"epoch": 3.24,
"learning_rate": 2.6837913779631464e-05,
"loss": 1.2312,
"step": 806000
},
{
"epoch": 3.25,
"learning_rate": 2.682353969346399e-05,
"loss": 1.2017,
"step": 806500
},
{
"epoch": 3.25,
"learning_rate": 2.6809194355468853e-05,
"loss": 1.1889,
"step": 807000
},
{
"epoch": 3.25,
"learning_rate": 2.679482026930138e-05,
"loss": 1.2188,
"step": 807500
},
{
"epoch": 3.25,
"learning_rate": 2.6780446183133905e-05,
"loss": 1.1738,
"step": 808000
},
{
"epoch": 3.25,
"learning_rate": 2.676610084513877e-05,
"loss": 1.1761,
"step": 808500
},
{
"epoch": 3.26,
"learning_rate": 2.67517267589713e-05,
"loss": 1.1507,
"step": 809000
},
{
"epoch": 3.26,
"learning_rate": 2.6737352672803828e-05,
"loss": 1.1887,
"step": 809500
},
{
"epoch": 3.26,
"learning_rate": 2.6722978586636355e-05,
"loss": 1.2401,
"step": 810000
},
{
"epoch": 3.26,
"learning_rate": 2.6708604500468886e-05,
"loss": 1.2165,
"step": 810500
},
{
"epoch": 3.26,
"learning_rate": 2.6694230414301414e-05,
"loss": 1.1777,
"step": 811000
},
{
"epoch": 3.27,
"learning_rate": 2.667985632813394e-05,
"loss": 1.17,
"step": 811500
},
{
"epoch": 3.27,
"learning_rate": 2.6665482241966466e-05,
"loss": 1.209,
"step": 812000
},
{
"epoch": 3.27,
"learning_rate": 2.6651108155799e-05,
"loss": 1.1625,
"step": 812500
},
{
"epoch": 3.27,
"learning_rate": 2.663676281780386e-05,
"loss": 1.1877,
"step": 813000
},
{
"epoch": 3.27,
"learning_rate": 2.662238873163639e-05,
"loss": 1.2107,
"step": 813500
},
{
"epoch": 3.28,
"learning_rate": 2.6608014645468916e-05,
"loss": 1.2071,
"step": 814000
},
{
"epoch": 3.28,
"learning_rate": 2.659364055930144e-05,
"loss": 1.1744,
"step": 814500
},
{
"epoch": 3.28,
"learning_rate": 2.6579266473133975e-05,
"loss": 1.1599,
"step": 815000
},
{
"epoch": 3.28,
"learning_rate": 2.6564921135138836e-05,
"loss": 1.1937,
"step": 815500
},
{
"epoch": 3.28,
"learning_rate": 2.6550547048971364e-05,
"loss": 1.1839,
"step": 816000
},
{
"epoch": 3.29,
"learning_rate": 2.653617296280389e-05,
"loss": 1.231,
"step": 816500
},
{
"epoch": 3.29,
"learning_rate": 2.6521798876636415e-05,
"loss": 1.1725,
"step": 817000
},
{
"epoch": 3.29,
"learning_rate": 2.650742479046895e-05,
"loss": 1.2099,
"step": 817500
},
{
"epoch": 3.29,
"learning_rate": 2.6493050704301474e-05,
"loss": 1.1811,
"step": 818000
},
{
"epoch": 3.29,
"learning_rate": 2.647870536630634e-05,
"loss": 1.2299,
"step": 818500
},
{
"epoch": 3.3,
"learning_rate": 2.6464331280138866e-05,
"loss": 1.1692,
"step": 819000
},
{
"epoch": 3.3,
"learning_rate": 2.6449957193971394e-05,
"loss": 1.2198,
"step": 819500
},
{
"epoch": 3.3,
"learning_rate": 2.6435583107803924e-05,
"loss": 1.2231,
"step": 820000
},
{
"epoch": 3.3,
"learning_rate": 2.6421209021636452e-05,
"loss": 1.2063,
"step": 820500
},
{
"epoch": 3.3,
"learning_rate": 2.6406834935468976e-05,
"loss": 1.2099,
"step": 821000
},
{
"epoch": 3.31,
"learning_rate": 2.639248959747384e-05,
"loss": 1.2221,
"step": 821500
},
{
"epoch": 3.31,
"learning_rate": 2.6378115511306372e-05,
"loss": 1.2231,
"step": 822000
},
{
"epoch": 3.31,
"learning_rate": 2.63637414251389e-05,
"loss": 1.2349,
"step": 822500
},
{
"epoch": 3.31,
"learning_rate": 2.6349367338971427e-05,
"loss": 1.2288,
"step": 823000
},
{
"epoch": 3.31,
"learning_rate": 2.633499325280395e-05,
"loss": 1.2305,
"step": 823500
},
{
"epoch": 3.32,
"learning_rate": 2.6320619166636485e-05,
"loss": 1.1755,
"step": 824000
},
{
"epoch": 3.32,
"learning_rate": 2.6306273828641347e-05,
"loss": 1.2109,
"step": 824500
},
{
"epoch": 3.32,
"learning_rate": 2.6291899742473874e-05,
"loss": 1.2093,
"step": 825000
},
{
"epoch": 3.32,
"learning_rate": 2.6277525656306402e-05,
"loss": 1.1682,
"step": 825500
},
{
"epoch": 3.32,
"learning_rate": 2.6263151570138926e-05,
"loss": 1.1864,
"step": 826000
},
{
"epoch": 3.33,
"learning_rate": 2.624877748397146e-05,
"loss": 1.2548,
"step": 826500
},
{
"epoch": 3.33,
"learning_rate": 2.6234403397803984e-05,
"loss": 1.1723,
"step": 827000
},
{
"epoch": 3.33,
"learning_rate": 2.6220029311636512e-05,
"loss": 1.1182,
"step": 827500
},
{
"epoch": 3.33,
"learning_rate": 2.6205655225469043e-05,
"loss": 1.2548,
"step": 828000
},
{
"epoch": 3.33,
"learning_rate": 2.6191309887473904e-05,
"loss": 1.1878,
"step": 828500
},
{
"epoch": 3.34,
"learning_rate": 2.6176935801306435e-05,
"loss": 1.2124,
"step": 829000
},
{
"epoch": 3.34,
"learning_rate": 2.6162561715138963e-05,
"loss": 1.1506,
"step": 829500
},
{
"epoch": 3.34,
"learning_rate": 2.6148187628971487e-05,
"loss": 1.1477,
"step": 830000
},
{
"epoch": 3.34,
"learning_rate": 2.613384229097635e-05,
"loss": 1.1757,
"step": 830500
},
{
"epoch": 3.34,
"learning_rate": 2.611946820480888e-05,
"loss": 1.2022,
"step": 831000
},
{
"epoch": 3.35,
"learning_rate": 2.610509411864141e-05,
"loss": 1.1714,
"step": 831500
},
{
"epoch": 3.35,
"learning_rate": 2.6090748780646275e-05,
"loss": 1.2133,
"step": 832000
},
{
"epoch": 3.35,
"learning_rate": 2.60763746944788e-05,
"loss": 1.1997,
"step": 832500
},
{
"epoch": 3.35,
"learning_rate": 2.6062000608311326e-05,
"loss": 1.1625,
"step": 833000
},
{
"epoch": 3.35,
"learning_rate": 2.6047626522143857e-05,
"loss": 1.1425,
"step": 833500
},
{
"epoch": 3.36,
"learning_rate": 2.603328118414872e-05,
"loss": 1.1886,
"step": 834000
},
{
"epoch": 3.36,
"learning_rate": 2.601890709798125e-05,
"loss": 1.1943,
"step": 834500
},
{
"epoch": 3.36,
"learning_rate": 2.6004533011813777e-05,
"loss": 1.1922,
"step": 835000
},
{
"epoch": 3.36,
"learning_rate": 2.59901589256463e-05,
"loss": 1.1883,
"step": 835500
},
{
"epoch": 3.36,
"learning_rate": 2.5975784839478835e-05,
"loss": 1.1884,
"step": 836000
},
{
"epoch": 3.37,
"learning_rate": 2.596141075331136e-05,
"loss": 1.2316,
"step": 836500
},
{
"epoch": 3.37,
"learning_rate": 2.5947036667143887e-05,
"loss": 1.2248,
"step": 837000
},
{
"epoch": 3.37,
"learning_rate": 2.5932691329148752e-05,
"loss": 1.2145,
"step": 837500
},
{
"epoch": 3.37,
"learning_rate": 2.5918317242981276e-05,
"loss": 1.2003,
"step": 838000
},
{
"epoch": 3.37,
"learning_rate": 2.590394315681381e-05,
"loss": 1.2374,
"step": 838500
},
{
"epoch": 3.38,
"learning_rate": 2.5889569070646335e-05,
"loss": 1.194,
"step": 839000
},
{
"epoch": 3.38,
"learning_rate": 2.5875194984478862e-05,
"loss": 1.2262,
"step": 839500
},
{
"epoch": 3.38,
"learning_rate": 2.5860849646483727e-05,
"loss": 1.1755,
"step": 840000
},
{
"epoch": 3.38,
"learning_rate": 2.584647556031625e-05,
"loss": 1.193,
"step": 840500
},
{
"epoch": 3.38,
"learning_rate": 2.5832101474148785e-05,
"loss": 1.2141,
"step": 841000
},
{
"epoch": 3.39,
"learning_rate": 2.581772738798131e-05,
"loss": 1.1863,
"step": 841500
},
{
"epoch": 3.39,
"learning_rate": 2.5803382049986174e-05,
"loss": 1.1763,
"step": 842000
},
{
"epoch": 3.39,
"learning_rate": 2.57890079638187e-05,
"loss": 1.1525,
"step": 842500
},
{
"epoch": 3.39,
"learning_rate": 2.577463387765123e-05,
"loss": 1.1811,
"step": 843000
},
{
"epoch": 3.39,
"learning_rate": 2.576025979148376e-05,
"loss": 1.1807,
"step": 843500
},
{
"epoch": 3.4,
"learning_rate": 2.5745885705316288e-05,
"loss": 1.2043,
"step": 844000
},
{
"epoch": 3.4,
"learning_rate": 2.573154036732115e-05,
"loss": 1.1559,
"step": 844500
},
{
"epoch": 3.4,
"learning_rate": 2.5717166281153676e-05,
"loss": 1.2187,
"step": 845000
},
{
"epoch": 3.4,
"learning_rate": 2.5702792194986204e-05,
"loss": 1.1387,
"step": 845500
},
{
"epoch": 3.4,
"learning_rate": 2.5688418108818735e-05,
"loss": 1.165,
"step": 846000
},
{
"epoch": 3.41,
"learning_rate": 2.5674044022651262e-05,
"loss": 1.2342,
"step": 846500
},
{
"epoch": 3.41,
"learning_rate": 2.5659698684656124e-05,
"loss": 1.1913,
"step": 847000
},
{
"epoch": 3.41,
"learning_rate": 2.564532459848865e-05,
"loss": 1.2385,
"step": 847500
},
{
"epoch": 3.41,
"learning_rate": 2.563095051232118e-05,
"loss": 1.2333,
"step": 848000
},
{
"epoch": 3.41,
"learning_rate": 2.561657642615371e-05,
"loss": 1.2223,
"step": 848500
},
{
"epoch": 3.42,
"learning_rate": 2.5602202339986237e-05,
"loss": 1.2245,
"step": 849000
},
{
"epoch": 3.42,
"learning_rate": 2.55878570019911e-05,
"loss": 1.2579,
"step": 849500
},
{
"epoch": 3.42,
"learning_rate": 2.5573482915823626e-05,
"loss": 1.2277,
"step": 850000
},
{
"epoch": 3.42,
"learning_rate": 2.5559108829656154e-05,
"loss": 1.2517,
"step": 850500
},
{
"epoch": 3.43,
"learning_rate": 2.5544763491661018e-05,
"loss": 1.1861,
"step": 851000
},
{
"epoch": 3.43,
"learning_rate": 2.553038940549355e-05,
"loss": 1.2176,
"step": 851500
},
{
"epoch": 3.43,
"learning_rate": 2.5516015319326077e-05,
"loss": 1.208,
"step": 852000
},
{
"epoch": 3.43,
"learning_rate": 2.55016412331586e-05,
"loss": 1.169,
"step": 852500
},
{
"epoch": 3.43,
"learning_rate": 2.5487267146991135e-05,
"loss": 1.1972,
"step": 853000
},
{
"epoch": 3.44,
"learning_rate": 2.547289306082366e-05,
"loss": 1.1865,
"step": 853500
},
{
"epoch": 3.44,
"learning_rate": 2.5458518974656187e-05,
"loss": 1.1974,
"step": 854000
},
{
"epoch": 3.44,
"learning_rate": 2.5444144888488715e-05,
"loss": 1.1921,
"step": 854500
},
{
"epoch": 3.44,
"learning_rate": 2.5429799550493576e-05,
"loss": 1.1845,
"step": 855000
},
{
"epoch": 3.44,
"learning_rate": 2.541542546432611e-05,
"loss": 1.1969,
"step": 855500
},
{
"epoch": 3.45,
"learning_rate": 2.5401051378158634e-05,
"loss": 1.1579,
"step": 856000
},
{
"epoch": 3.45,
"learning_rate": 2.5386677291991162e-05,
"loss": 1.2033,
"step": 856500
},
{
"epoch": 3.45,
"learning_rate": 2.537230320582369e-05,
"loss": 1.1873,
"step": 857000
},
{
"epoch": 3.45,
"learning_rate": 2.5357957867828554e-05,
"loss": 1.262,
"step": 857500
},
{
"epoch": 3.45,
"learning_rate": 2.5343612529833415e-05,
"loss": 1.2345,
"step": 858000
},
{
"epoch": 3.46,
"learning_rate": 2.5329238443665943e-05,
"loss": 1.1708,
"step": 858500
},
{
"epoch": 3.46,
"learning_rate": 2.5314864357498474e-05,
"loss": 1.1934,
"step": 859000
},
{
"epoch": 3.46,
"learning_rate": 2.5300490271331e-05,
"loss": 1.2051,
"step": 859500
},
{
"epoch": 3.46,
"learning_rate": 2.528611618516353e-05,
"loss": 1.1626,
"step": 860000
},
{
"epoch": 3.46,
"learning_rate": 2.527177084716839e-05,
"loss": 1.2027,
"step": 860500
},
{
"epoch": 3.47,
"learning_rate": 2.5257425509173255e-05,
"loss": 1.1948,
"step": 861000
},
{
"epoch": 3.47,
"learning_rate": 2.5243051423005782e-05,
"loss": 1.1753,
"step": 861500
},
{
"epoch": 3.47,
"learning_rate": 2.5228677336838313e-05,
"loss": 1.1782,
"step": 862000
},
{
"epoch": 3.47,
"learning_rate": 2.521430325067084e-05,
"loss": 1.1961,
"step": 862500
},
{
"epoch": 3.47,
"learning_rate": 2.5199929164503365e-05,
"loss": 1.2003,
"step": 863000
},
{
"epoch": 3.48,
"learning_rate": 2.51855550783359e-05,
"loss": 1.2066,
"step": 863500
},
{
"epoch": 3.48,
"learning_rate": 2.5171180992168423e-05,
"loss": 1.1561,
"step": 864000
},
{
"epoch": 3.48,
"learning_rate": 2.515680690600095e-05,
"loss": 1.1935,
"step": 864500
},
{
"epoch": 3.48,
"learning_rate": 2.514243281983348e-05,
"loss": 1.1848,
"step": 865000
},
{
"epoch": 3.48,
"learning_rate": 2.512805873366601e-05,
"loss": 1.223,
"step": 865500
},
{
"epoch": 3.49,
"learning_rate": 2.5113684647498537e-05,
"loss": 1.2462,
"step": 866000
},
{
"epoch": 3.49,
"learning_rate": 2.5099310561331065e-05,
"loss": 1.1886,
"step": 866500
},
{
"epoch": 3.49,
"learning_rate": 2.5084936475163596e-05,
"loss": 1.2163,
"step": 867000
},
{
"epoch": 3.49,
"learning_rate": 2.5070562388996123e-05,
"loss": 1.1928,
"step": 867500
},
{
"epoch": 3.49,
"learning_rate": 2.5056188302828647e-05,
"loss": 1.1466,
"step": 868000
},
{
"epoch": 3.5,
"learning_rate": 2.5041842964833512e-05,
"loss": 1.1692,
"step": 868500
},
{
"epoch": 3.5,
"learning_rate": 2.502746887866604e-05,
"loss": 1.181,
"step": 869000
},
{
"epoch": 3.5,
"learning_rate": 2.501309479249857e-05,
"loss": 1.1604,
"step": 869500
},
{
"epoch": 3.5,
"learning_rate": 2.4998720706331098e-05,
"loss": 1.1684,
"step": 870000
},
{
"epoch": 3.5,
"learning_rate": 2.4984346620163622e-05,
"loss": 1.1951,
"step": 870500
},
{
"epoch": 3.51,
"learning_rate": 2.4970001282168487e-05,
"loss": 1.1963,
"step": 871000
},
{
"epoch": 3.51,
"learning_rate": 2.4955627196001018e-05,
"loss": 1.1888,
"step": 871500
},
{
"epoch": 3.51,
"learning_rate": 2.4941253109833542e-05,
"loss": 1.1906,
"step": 872000
},
{
"epoch": 3.51,
"learning_rate": 2.4926879023666073e-05,
"loss": 1.1896,
"step": 872500
},
{
"epoch": 3.51,
"learning_rate": 2.4912504937498597e-05,
"loss": 1.2349,
"step": 873000
},
{
"epoch": 3.52,
"learning_rate": 2.4898130851331128e-05,
"loss": 1.1894,
"step": 873500
},
{
"epoch": 3.52,
"learning_rate": 2.4883785513335993e-05,
"loss": 1.1912,
"step": 874000
},
{
"epoch": 3.52,
"learning_rate": 2.4869411427168517e-05,
"loss": 1.1669,
"step": 874500
},
{
"epoch": 3.52,
"learning_rate": 2.4855037341001048e-05,
"loss": 1.2058,
"step": 875000
},
{
"epoch": 3.52,
"learning_rate": 2.4840663254833575e-05,
"loss": 1.212,
"step": 875500
},
{
"epoch": 3.53,
"learning_rate": 2.4826317916838436e-05,
"loss": 1.1972,
"step": 876000
},
{
"epoch": 3.53,
"learning_rate": 2.4811943830670967e-05,
"loss": 1.1659,
"step": 876500
},
{
"epoch": 3.53,
"learning_rate": 2.4797569744503495e-05,
"loss": 1.1717,
"step": 877000
},
{
"epoch": 3.53,
"learning_rate": 2.4783195658336022e-05,
"loss": 1.1978,
"step": 877500
},
{
"epoch": 3.53,
"learning_rate": 2.4768821572168553e-05,
"loss": 1.1735,
"step": 878000
},
{
"epoch": 3.54,
"learning_rate": 2.475447623417341e-05,
"loss": 1.2148,
"step": 878500
},
{
"epoch": 3.54,
"learning_rate": 2.4740102148005942e-05,
"loss": 1.1704,
"step": 879000
},
{
"epoch": 3.54,
"learning_rate": 2.472572806183847e-05,
"loss": 1.2026,
"step": 879500
},
{
"epoch": 3.54,
"learning_rate": 2.4711353975670997e-05,
"loss": 1.1933,
"step": 880000
},
{
"epoch": 3.54,
"learning_rate": 2.4696979889503528e-05,
"loss": 1.2241,
"step": 880500
},
{
"epoch": 3.55,
"learning_rate": 2.4682605803336052e-05,
"loss": 1.169,
"step": 881000
},
{
"epoch": 3.55,
"learning_rate": 2.4668231717168583e-05,
"loss": 1.1719,
"step": 881500
},
{
"epoch": 3.55,
"learning_rate": 2.465385763100111e-05,
"loss": 1.1823,
"step": 882000
},
{
"epoch": 3.55,
"learning_rate": 2.4639512293005972e-05,
"loss": 1.1706,
"step": 882500
},
{
"epoch": 3.55,
"learning_rate": 2.4625166955010837e-05,
"loss": 1.1814,
"step": 883000
},
{
"epoch": 3.56,
"learning_rate": 2.4610792868843364e-05,
"loss": 1.2014,
"step": 883500
},
{
"epoch": 3.56,
"learning_rate": 2.4596418782675892e-05,
"loss": 1.1884,
"step": 884000
},
{
"epoch": 3.56,
"learning_rate": 2.4582044696508423e-05,
"loss": 1.1569,
"step": 884500
},
{
"epoch": 3.56,
"learning_rate": 2.4567699358513284e-05,
"loss": 1.2318,
"step": 885000
},
{
"epoch": 3.56,
"learning_rate": 2.455332527234581e-05,
"loss": 1.2312,
"step": 885500
},
{
"epoch": 3.57,
"learning_rate": 2.4538951186178343e-05,
"loss": 1.1868,
"step": 886000
},
{
"epoch": 3.57,
"learning_rate": 2.45246058481832e-05,
"loss": 1.1478,
"step": 886500
},
{
"epoch": 3.57,
"learning_rate": 2.451023176201573e-05,
"loss": 1.1534,
"step": 887000
},
{
"epoch": 3.57,
"learning_rate": 2.449585767584826e-05,
"loss": 1.2182,
"step": 887500
},
{
"epoch": 3.57,
"learning_rate": 2.4481483589680786e-05,
"loss": 1.1811,
"step": 888000
},
{
"epoch": 3.58,
"learning_rate": 2.4467109503513317e-05,
"loss": 1.2205,
"step": 888500
},
{
"epoch": 3.58,
"learning_rate": 2.445273541734584e-05,
"loss": 1.1841,
"step": 889000
},
{
"epoch": 3.58,
"learning_rate": 2.4438361331178373e-05,
"loss": 1.194,
"step": 889500
},
{
"epoch": 3.58,
"learning_rate": 2.4424015993183237e-05,
"loss": 1.1959,
"step": 890000
},
{
"epoch": 3.58,
"learning_rate": 2.440964190701576e-05,
"loss": 1.1874,
"step": 890500
},
{
"epoch": 3.59,
"learning_rate": 2.4395267820848292e-05,
"loss": 1.1992,
"step": 891000
},
{
"epoch": 3.59,
"learning_rate": 2.4380893734680816e-05,
"loss": 1.1908,
"step": 891500
},
{
"epoch": 3.59,
"learning_rate": 2.4366519648513347e-05,
"loss": 1.1908,
"step": 892000
},
{
"epoch": 3.59,
"learning_rate": 2.4352145562345875e-05,
"loss": 1.2308,
"step": 892500
},
{
"epoch": 3.59,
"learning_rate": 2.4337771476178403e-05,
"loss": 1.1986,
"step": 893000
},
{
"epoch": 3.6,
"learning_rate": 2.4323397390010933e-05,
"loss": 1.2446,
"step": 893500
},
{
"epoch": 3.6,
"learning_rate": 2.4309023303843458e-05,
"loss": 1.2184,
"step": 894000
},
{
"epoch": 3.6,
"learning_rate": 2.4294677965848322e-05,
"loss": 1.1718,
"step": 894500
},
{
"epoch": 3.6,
"learning_rate": 2.428030387968085e-05,
"loss": 1.226,
"step": 895000
},
{
"epoch": 3.6,
"learning_rate": 2.4265929793513377e-05,
"loss": 1.1927,
"step": 895500
},
{
"epoch": 3.61,
"learning_rate": 2.425155570734591e-05,
"loss": 1.2361,
"step": 896000
},
{
"epoch": 3.61,
"learning_rate": 2.4237181621178433e-05,
"loss": 1.1985,
"step": 896500
},
{
"epoch": 3.61,
"learning_rate": 2.4222836283183297e-05,
"loss": 1.1943,
"step": 897000
},
{
"epoch": 3.61,
"learning_rate": 2.4208462197015828e-05,
"loss": 1.1602,
"step": 897500
},
{
"epoch": 3.61,
"learning_rate": 2.419411685902069e-05,
"loss": 1.1945,
"step": 898000
},
{
"epoch": 3.62,
"learning_rate": 2.4179742772853217e-05,
"loss": 1.1917,
"step": 898500
},
{
"epoch": 3.62,
"learning_rate": 2.4165368686685748e-05,
"loss": 1.1912,
"step": 899000
},
{
"epoch": 3.62,
"learning_rate": 2.4150994600518272e-05,
"loss": 1.1658,
"step": 899500
},
{
"epoch": 3.62,
"learning_rate": 2.4136620514350803e-05,
"loss": 1.1596,
"step": 900000
},
{
"epoch": 3.62,
"learning_rate": 2.412224642818333e-05,
"loss": 1.1588,
"step": 900500
},
{
"epoch": 3.63,
"learning_rate": 2.4107872342015858e-05,
"loss": 1.1778,
"step": 901000
},
{
"epoch": 3.63,
"learning_rate": 2.4093498255848386e-05,
"loss": 1.2307,
"step": 901500
},
{
"epoch": 3.63,
"learning_rate": 2.4079152917853247e-05,
"loss": 1.2047,
"step": 902000
},
{
"epoch": 3.63,
"learning_rate": 2.4064778831685778e-05,
"loss": 1.2131,
"step": 902500
},
{
"epoch": 3.63,
"learning_rate": 2.4050404745518305e-05,
"loss": 1.1606,
"step": 903000
},
{
"epoch": 3.64,
"learning_rate": 2.4036030659350833e-05,
"loss": 1.1774,
"step": 903500
},
{
"epoch": 3.64,
"learning_rate": 2.4021685321355697e-05,
"loss": 1.1936,
"step": 904000
},
{
"epoch": 3.64,
"learning_rate": 2.4007311235188225e-05,
"loss": 1.2016,
"step": 904500
},
{
"epoch": 3.64,
"learning_rate": 2.3992937149020753e-05,
"loss": 1.2103,
"step": 905000
},
{
"epoch": 3.64,
"learning_rate": 2.397856306285328e-05,
"loss": 1.2546,
"step": 905500
},
{
"epoch": 3.65,
"learning_rate": 2.3964188976685808e-05,
"loss": 1.1628,
"step": 906000
},
{
"epoch": 3.65,
"learning_rate": 2.3949843638690672e-05,
"loss": 1.2363,
"step": 906500
},
{
"epoch": 3.65,
"learning_rate": 2.39354695525232e-05,
"loss": 1.1835,
"step": 907000
},
{
"epoch": 3.65,
"learning_rate": 2.3921095466355727e-05,
"loss": 1.1611,
"step": 907500
},
{
"epoch": 3.65,
"learning_rate": 2.3906721380188255e-05,
"loss": 1.1875,
"step": 908000
},
{
"epoch": 3.66,
"learning_rate": 2.3892347294020783e-05,
"loss": 1.1809,
"step": 908500
},
{
"epoch": 3.66,
"learning_rate": 2.3877973207853314e-05,
"loss": 1.1983,
"step": 909000
}
],
"logging_steps": 500,
"max_steps": 1739241,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 9000,
"total_flos": 8.699092914175696e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}