{ "best_metric": 0.9997974273270536, "best_model_checkpoint": "neunit-ks-529/checkpoint-5554", "epoch": 4.998199495858841, "global_step": 6940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.322766570605187e-07, "loss": 1.1106, "step": 10 }, { "epoch": 0.01, "learning_rate": 8.645533141210374e-07, "loss": 1.105, "step": 20 }, { "epoch": 0.02, "learning_rate": 1.2968299711815562e-06, "loss": 1.0854, "step": 30 }, { "epoch": 0.03, "learning_rate": 1.7291066282420748e-06, "loss": 1.0488, "step": 40 }, { "epoch": 0.04, "learning_rate": 2.1613832853025936e-06, "loss": 0.9952, "step": 50 }, { "epoch": 0.04, "learning_rate": 2.5936599423631124e-06, "loss": 0.9323, "step": 60 }, { "epoch": 0.05, "learning_rate": 3.0259365994236312e-06, "loss": 0.8507, "step": 70 }, { "epoch": 0.06, "learning_rate": 3.4582132564841496e-06, "loss": 0.7498, "step": 80 }, { "epoch": 0.06, "learning_rate": 3.890489913544669e-06, "loss": 0.6362, "step": 90 }, { "epoch": 0.07, "learning_rate": 4.322766570605187e-06, "loss": 0.5327, "step": 100 }, { "epoch": 0.08, "learning_rate": 4.7550432276657065e-06, "loss": 0.444, "step": 110 }, { "epoch": 0.09, "learning_rate": 5.187319884726225e-06, "loss": 0.3778, "step": 120 }, { "epoch": 0.09, "learning_rate": 5.619596541786744e-06, "loss": 0.346, "step": 130 }, { "epoch": 0.1, "learning_rate": 6.0518731988472625e-06, "loss": 0.3227, "step": 140 }, { "epoch": 0.11, "learning_rate": 6.484149855907781e-06, "loss": 0.2791, "step": 150 }, { "epoch": 0.12, "learning_rate": 6.916426512968299e-06, "loss": 0.2632, "step": 160 }, { "epoch": 0.12, "learning_rate": 7.3487031700288185e-06, "loss": 0.2362, "step": 170 }, { "epoch": 0.13, "learning_rate": 7.780979827089338e-06, "loss": 0.213, "step": 180 }, { "epoch": 0.14, "learning_rate": 8.213256484149856e-06, "loss": 0.1976, "step": 190 }, { "epoch": 0.14, "learning_rate": 8.645533141210375e-06, "loss": 0.1863, "step": 200 }, { "epoch": 0.15, "learning_rate": 9.077809798270893e-06, "loss": 0.1709, "step": 210 }, { "epoch": 0.16, "learning_rate": 9.510086455331413e-06, "loss": 0.1528, "step": 220 }, { "epoch": 0.17, "learning_rate": 9.942363112391931e-06, "loss": 0.1503, "step": 230 }, { "epoch": 0.17, "learning_rate": 1.037463976945245e-05, "loss": 0.133, "step": 240 }, { "epoch": 0.18, "learning_rate": 1.0806916426512968e-05, "loss": 0.122, "step": 250 }, { "epoch": 0.19, "learning_rate": 1.1239193083573488e-05, "loss": 0.1178, "step": 260 }, { "epoch": 0.19, "learning_rate": 1.1671469740634007e-05, "loss": 0.109, "step": 270 }, { "epoch": 0.2, "learning_rate": 1.2103746397694525e-05, "loss": 0.1092, "step": 280 }, { "epoch": 0.21, "learning_rate": 1.2536023054755043e-05, "loss": 0.0978, "step": 290 }, { "epoch": 0.22, "learning_rate": 1.2968299711815562e-05, "loss": 0.0872, "step": 300 }, { "epoch": 0.22, "learning_rate": 1.3400576368876082e-05, "loss": 0.0903, "step": 310 }, { "epoch": 0.23, "learning_rate": 1.3832853025936599e-05, "loss": 0.0928, "step": 320 }, { "epoch": 0.24, "learning_rate": 1.4265129682997119e-05, "loss": 0.0846, "step": 330 }, { "epoch": 0.24, "learning_rate": 1.4697406340057637e-05, "loss": 0.075, "step": 340 }, { "epoch": 0.25, "learning_rate": 1.5129682997118155e-05, "loss": 0.0416, "step": 350 }, { "epoch": 0.26, "learning_rate": 1.5561959654178675e-05, "loss": 0.0677, "step": 360 }, { "epoch": 0.27, "learning_rate": 1.5994236311239196e-05, "loss": 0.066, "step": 370 }, { "epoch": 0.27, "learning_rate": 1.6426512968299712e-05, "loss": 0.0673, "step": 380 }, { "epoch": 0.28, "learning_rate": 1.685878962536023e-05, "loss": 0.0509, "step": 390 }, { "epoch": 0.29, "learning_rate": 1.729106628242075e-05, "loss": 0.0719, "step": 400 }, { "epoch": 0.3, "learning_rate": 1.772334293948127e-05, "loss": 0.0588, "step": 410 }, { "epoch": 0.3, "learning_rate": 1.8155619596541786e-05, "loss": 0.0572, "step": 420 }, { "epoch": 0.31, "learning_rate": 1.8587896253602306e-05, "loss": 0.0717, "step": 430 }, { "epoch": 0.32, "learning_rate": 1.9020172910662826e-05, "loss": 0.0563, "step": 440 }, { "epoch": 0.32, "learning_rate": 1.9452449567723346e-05, "loss": 0.0491, "step": 450 }, { "epoch": 0.33, "learning_rate": 1.9884726224783863e-05, "loss": 0.0766, "step": 460 }, { "epoch": 0.34, "learning_rate": 2.031700288184438e-05, "loss": 0.0691, "step": 470 }, { "epoch": 0.35, "learning_rate": 2.07492795389049e-05, "loss": 0.0728, "step": 480 }, { "epoch": 0.35, "learning_rate": 2.1181556195965416e-05, "loss": 0.0781, "step": 490 }, { "epoch": 0.36, "learning_rate": 2.1613832853025936e-05, "loss": 0.0643, "step": 500 }, { "epoch": 0.37, "learning_rate": 2.2046109510086456e-05, "loss": 0.05, "step": 510 }, { "epoch": 0.37, "learning_rate": 2.2478386167146976e-05, "loss": 0.0577, "step": 520 }, { "epoch": 0.38, "learning_rate": 2.2910662824207493e-05, "loss": 0.073, "step": 530 }, { "epoch": 0.39, "learning_rate": 2.3342939481268013e-05, "loss": 0.0575, "step": 540 }, { "epoch": 0.4, "learning_rate": 2.377521613832853e-05, "loss": 0.0692, "step": 550 }, { "epoch": 0.4, "learning_rate": 2.420749279538905e-05, "loss": 0.0518, "step": 560 }, { "epoch": 0.41, "learning_rate": 2.4639769452449567e-05, "loss": 0.0647, "step": 570 }, { "epoch": 0.42, "learning_rate": 2.5072046109510087e-05, "loss": 0.0563, "step": 580 }, { "epoch": 0.42, "learning_rate": 2.5504322766570607e-05, "loss": 0.0678, "step": 590 }, { "epoch": 0.43, "learning_rate": 2.5936599423631124e-05, "loss": 0.0601, "step": 600 }, { "epoch": 0.44, "learning_rate": 2.6368876080691644e-05, "loss": 0.0635, "step": 610 }, { "epoch": 0.45, "learning_rate": 2.6801152737752164e-05, "loss": 0.0413, "step": 620 }, { "epoch": 0.45, "learning_rate": 2.7233429394812684e-05, "loss": 0.0442, "step": 630 }, { "epoch": 0.46, "learning_rate": 2.7665706051873197e-05, "loss": 0.0558, "step": 640 }, { "epoch": 0.47, "learning_rate": 2.8097982708933717e-05, "loss": 0.0544, "step": 650 }, { "epoch": 0.48, "learning_rate": 2.8530259365994237e-05, "loss": 0.0756, "step": 660 }, { "epoch": 0.48, "learning_rate": 2.8962536023054754e-05, "loss": 0.0775, "step": 670 }, { "epoch": 0.49, "learning_rate": 2.9394812680115274e-05, "loss": 0.0509, "step": 680 }, { "epoch": 0.5, "learning_rate": 2.9827089337175794e-05, "loss": 0.0619, "step": 690 }, { "epoch": 0.5, "learning_rate": 2.9971181556195965e-05, "loss": 0.0624, "step": 700 }, { "epoch": 0.51, "learning_rate": 2.9923150816522575e-05, "loss": 0.0549, "step": 710 }, { "epoch": 0.52, "learning_rate": 2.9875120076849184e-05, "loss": 0.0619, "step": 720 }, { "epoch": 0.53, "learning_rate": 2.9827089337175794e-05, "loss": 0.0696, "step": 730 }, { "epoch": 0.53, "learning_rate": 2.9779058597502404e-05, "loss": 0.055, "step": 740 }, { "epoch": 0.54, "learning_rate": 2.973102785782901e-05, "loss": 0.0614, "step": 750 }, { "epoch": 0.55, "learning_rate": 2.968299711815562e-05, "loss": 0.0717, "step": 760 }, { "epoch": 0.55, "learning_rate": 2.963496637848223e-05, "loss": 0.0547, "step": 770 }, { "epoch": 0.56, "learning_rate": 2.958693563880884e-05, "loss": 0.0467, "step": 780 }, { "epoch": 0.57, "learning_rate": 2.953890489913545e-05, "loss": 0.057, "step": 790 }, { "epoch": 0.58, "learning_rate": 2.9490874159462058e-05, "loss": 0.0646, "step": 800 }, { "epoch": 0.58, "learning_rate": 2.9442843419788664e-05, "loss": 0.0664, "step": 810 }, { "epoch": 0.59, "learning_rate": 2.9394812680115274e-05, "loss": 0.0438, "step": 820 }, { "epoch": 0.6, "learning_rate": 2.9346781940441884e-05, "loss": 0.0559, "step": 830 }, { "epoch": 0.6, "learning_rate": 2.929875120076849e-05, "loss": 0.0545, "step": 840 }, { "epoch": 0.61, "learning_rate": 2.9250720461095103e-05, "loss": 0.0772, "step": 850 }, { "epoch": 0.62, "learning_rate": 2.9202689721421713e-05, "loss": 0.0647, "step": 860 }, { "epoch": 0.63, "learning_rate": 2.915465898174832e-05, "loss": 0.0425, "step": 870 }, { "epoch": 0.63, "learning_rate": 2.910662824207493e-05, "loss": 0.0661, "step": 880 }, { "epoch": 0.64, "learning_rate": 2.9058597502401538e-05, "loss": 0.0324, "step": 890 }, { "epoch": 0.65, "learning_rate": 2.9010566762728144e-05, "loss": 0.0814, "step": 900 }, { "epoch": 0.66, "learning_rate": 2.8962536023054754e-05, "loss": 0.0815, "step": 910 }, { "epoch": 0.66, "learning_rate": 2.8914505283381367e-05, "loss": 0.0456, "step": 920 }, { "epoch": 0.67, "learning_rate": 2.8866474543707973e-05, "loss": 0.0518, "step": 930 }, { "epoch": 0.68, "learning_rate": 2.8818443804034583e-05, "loss": 0.0713, "step": 940 }, { "epoch": 0.68, "learning_rate": 2.8770413064361192e-05, "loss": 0.0473, "step": 950 }, { "epoch": 0.69, "learning_rate": 2.87223823246878e-05, "loss": 0.0372, "step": 960 }, { "epoch": 0.7, "learning_rate": 2.867435158501441e-05, "loss": 0.0422, "step": 970 }, { "epoch": 0.71, "learning_rate": 2.8626320845341018e-05, "loss": 0.0459, "step": 980 }, { "epoch": 0.71, "learning_rate": 2.857829010566763e-05, "loss": 0.0526, "step": 990 }, { "epoch": 0.72, "learning_rate": 2.8530259365994237e-05, "loss": 0.0427, "step": 1000 }, { "epoch": 0.73, "learning_rate": 2.8482228626320847e-05, "loss": 0.0503, "step": 1010 }, { "epoch": 0.73, "learning_rate": 2.8434197886647456e-05, "loss": 0.0593, "step": 1020 }, { "epoch": 0.74, "learning_rate": 2.8386167146974063e-05, "loss": 0.06, "step": 1030 }, { "epoch": 0.75, "learning_rate": 2.8338136407300672e-05, "loss": 0.0593, "step": 1040 }, { "epoch": 0.76, "learning_rate": 2.8290105667627282e-05, "loss": 0.0653, "step": 1050 }, { "epoch": 0.76, "learning_rate": 2.824207492795389e-05, "loss": 0.0673, "step": 1060 }, { "epoch": 0.77, "learning_rate": 2.81940441882805e-05, "loss": 0.0414, "step": 1070 }, { "epoch": 0.78, "learning_rate": 2.814601344860711e-05, "loss": 0.0532, "step": 1080 }, { "epoch": 0.79, "learning_rate": 2.8097982708933717e-05, "loss": 0.0452, "step": 1090 }, { "epoch": 0.79, "learning_rate": 2.8049951969260327e-05, "loss": 0.0524, "step": 1100 }, { "epoch": 0.8, "learning_rate": 2.8001921229586936e-05, "loss": 0.0541, "step": 1110 }, { "epoch": 0.81, "learning_rate": 2.7953890489913543e-05, "loss": 0.0749, "step": 1120 }, { "epoch": 0.81, "learning_rate": 2.7905859750240156e-05, "loss": 0.0567, "step": 1130 }, { "epoch": 0.82, "learning_rate": 2.7857829010566765e-05, "loss": 0.0252, "step": 1140 }, { "epoch": 0.83, "learning_rate": 2.780979827089337e-05, "loss": 0.0541, "step": 1150 }, { "epoch": 0.84, "learning_rate": 2.776176753121998e-05, "loss": 0.0294, "step": 1160 }, { "epoch": 0.84, "learning_rate": 2.771373679154659e-05, "loss": 0.058, "step": 1170 }, { "epoch": 0.85, "learning_rate": 2.7665706051873197e-05, "loss": 0.0537, "step": 1180 }, { "epoch": 0.86, "learning_rate": 2.7617675312199807e-05, "loss": 0.0466, "step": 1190 }, { "epoch": 0.86, "learning_rate": 2.756964457252642e-05, "loss": 0.0268, "step": 1200 }, { "epoch": 0.87, "learning_rate": 2.7521613832853026e-05, "loss": 0.0425, "step": 1210 }, { "epoch": 0.88, "learning_rate": 2.7473583093179636e-05, "loss": 0.0463, "step": 1220 }, { "epoch": 0.89, "learning_rate": 2.7425552353506245e-05, "loss": 0.0538, "step": 1230 }, { "epoch": 0.89, "learning_rate": 2.7377521613832855e-05, "loss": 0.0385, "step": 1240 }, { "epoch": 0.9, "learning_rate": 2.732949087415946e-05, "loss": 0.0577, "step": 1250 }, { "epoch": 0.91, "learning_rate": 2.728146013448607e-05, "loss": 0.0681, "step": 1260 }, { "epoch": 0.91, "learning_rate": 2.7233429394812684e-05, "loss": 0.0543, "step": 1270 }, { "epoch": 0.92, "learning_rate": 2.718539865513929e-05, "loss": 0.0519, "step": 1280 }, { "epoch": 0.93, "learning_rate": 2.71373679154659e-05, "loss": 0.0385, "step": 1290 }, { "epoch": 0.94, "learning_rate": 2.708933717579251e-05, "loss": 0.0342, "step": 1300 }, { "epoch": 0.94, "learning_rate": 2.7041306436119116e-05, "loss": 0.0257, "step": 1310 }, { "epoch": 0.95, "learning_rate": 2.6993275696445725e-05, "loss": 0.0358, "step": 1320 }, { "epoch": 0.96, "learning_rate": 2.6945244956772335e-05, "loss": 0.0485, "step": 1330 }, { "epoch": 0.97, "learning_rate": 2.6897214217098944e-05, "loss": 0.0277, "step": 1340 }, { "epoch": 0.97, "learning_rate": 2.6849183477425554e-05, "loss": 0.0224, "step": 1350 }, { "epoch": 0.98, "learning_rate": 2.6801152737752164e-05, "loss": 0.0162, "step": 1360 }, { "epoch": 0.99, "learning_rate": 2.675312199807877e-05, "loss": 0.0253, "step": 1370 }, { "epoch": 0.99, "learning_rate": 2.670509125840538e-05, "loss": 0.0303, "step": 1380 }, { "epoch": 1.0, "eval_accuracy": 0.9964549782234376, "eval_loss": 0.023142272606492043, "eval_runtime": 22.3983, "eval_samples_per_second": 881.586, "eval_steps_per_second": 27.591, "step": 1388 }, { "epoch": 1.0, "learning_rate": 2.665706051873199e-05, "loss": 0.0146, "step": 1390 }, { "epoch": 1.01, "learning_rate": 2.6609029779058595e-05, "loss": 0.0338, "step": 1400 }, { "epoch": 1.02, "learning_rate": 2.656099903938521e-05, "loss": 0.0312, "step": 1410 }, { "epoch": 1.02, "learning_rate": 2.6512968299711818e-05, "loss": 0.0164, "step": 1420 }, { "epoch": 1.03, "learning_rate": 2.6464937560038424e-05, "loss": 0.0088, "step": 1430 }, { "epoch": 1.04, "learning_rate": 2.6416906820365034e-05, "loss": 0.0196, "step": 1440 }, { "epoch": 1.04, "learning_rate": 2.6368876080691644e-05, "loss": 0.0186, "step": 1450 }, { "epoch": 1.05, "learning_rate": 2.632084534101825e-05, "loss": 0.0136, "step": 1460 }, { "epoch": 1.06, "learning_rate": 2.627281460134486e-05, "loss": 0.0301, "step": 1470 }, { "epoch": 1.07, "learning_rate": 2.6224783861671473e-05, "loss": 0.0224, "step": 1480 }, { "epoch": 1.07, "learning_rate": 2.6176753121998082e-05, "loss": 0.0214, "step": 1490 }, { "epoch": 1.08, "learning_rate": 2.612872238232469e-05, "loss": 0.0159, "step": 1500 }, { "epoch": 1.09, "learning_rate": 2.6080691642651298e-05, "loss": 0.0207, "step": 1510 }, { "epoch": 1.09, "learning_rate": 2.6032660902977908e-05, "loss": 0.0198, "step": 1520 }, { "epoch": 1.1, "learning_rate": 2.5984630163304514e-05, "loss": 0.0213, "step": 1530 }, { "epoch": 1.11, "learning_rate": 2.5936599423631124e-05, "loss": 0.0168, "step": 1540 }, { "epoch": 1.12, "learning_rate": 2.5888568683957737e-05, "loss": 0.0106, "step": 1550 }, { "epoch": 1.12, "learning_rate": 2.5840537944284343e-05, "loss": 0.0119, "step": 1560 }, { "epoch": 1.13, "learning_rate": 2.5792507204610952e-05, "loss": 0.0139, "step": 1570 }, { "epoch": 1.14, "learning_rate": 2.5744476464937562e-05, "loss": 0.0121, "step": 1580 }, { "epoch": 1.15, "learning_rate": 2.5696445725264168e-05, "loss": 0.0091, "step": 1590 }, { "epoch": 1.15, "learning_rate": 2.5648414985590778e-05, "loss": 0.0134, "step": 1600 }, { "epoch": 1.16, "learning_rate": 2.5600384245917388e-05, "loss": 0.0182, "step": 1610 }, { "epoch": 1.17, "learning_rate": 2.5552353506243997e-05, "loss": 0.0222, "step": 1620 }, { "epoch": 1.17, "learning_rate": 2.5504322766570607e-05, "loss": 0.0138, "step": 1630 }, { "epoch": 1.18, "learning_rate": 2.5456292026897216e-05, "loss": 0.0076, "step": 1640 }, { "epoch": 1.19, "learning_rate": 2.5408261287223823e-05, "loss": 0.014, "step": 1650 }, { "epoch": 1.2, "learning_rate": 2.5360230547550432e-05, "loss": 0.0096, "step": 1660 }, { "epoch": 1.2, "learning_rate": 2.5312199807877042e-05, "loss": 0.0119, "step": 1670 }, { "epoch": 1.21, "learning_rate": 2.5264169068203648e-05, "loss": 0.0058, "step": 1680 }, { "epoch": 1.22, "learning_rate": 2.521613832853026e-05, "loss": 0.0017, "step": 1690 }, { "epoch": 1.22, "learning_rate": 2.516810758885687e-05, "loss": 0.0121, "step": 1700 }, { "epoch": 1.23, "learning_rate": 2.5120076849183477e-05, "loss": 0.0164, "step": 1710 }, { "epoch": 1.24, "learning_rate": 2.5072046109510087e-05, "loss": 0.0148, "step": 1720 }, { "epoch": 1.25, "learning_rate": 2.5024015369836696e-05, "loss": 0.006, "step": 1730 }, { "epoch": 1.25, "learning_rate": 2.4975984630163306e-05, "loss": 0.0147, "step": 1740 }, { "epoch": 1.26, "learning_rate": 2.4927953890489912e-05, "loss": 0.0148, "step": 1750 }, { "epoch": 1.27, "learning_rate": 2.4879923150816525e-05, "loss": 0.0338, "step": 1760 }, { "epoch": 1.27, "learning_rate": 2.4831892411143135e-05, "loss": 0.0132, "step": 1770 }, { "epoch": 1.28, "learning_rate": 2.478386167146974e-05, "loss": 0.0059, "step": 1780 }, { "epoch": 1.29, "learning_rate": 2.473583093179635e-05, "loss": 0.0153, "step": 1790 }, { "epoch": 1.3, "learning_rate": 2.468780019212296e-05, "loss": 0.0159, "step": 1800 }, { "epoch": 1.3, "learning_rate": 2.4639769452449567e-05, "loss": 0.0054, "step": 1810 }, { "epoch": 1.31, "learning_rate": 2.4591738712776176e-05, "loss": 0.0184, "step": 1820 }, { "epoch": 1.32, "learning_rate": 2.454370797310279e-05, "loss": 0.0183, "step": 1830 }, { "epoch": 1.33, "learning_rate": 2.4495677233429396e-05, "loss": 0.0064, "step": 1840 }, { "epoch": 1.33, "learning_rate": 2.4447646493756005e-05, "loss": 0.0071, "step": 1850 }, { "epoch": 1.34, "learning_rate": 2.4399615754082615e-05, "loss": 0.0041, "step": 1860 }, { "epoch": 1.35, "learning_rate": 2.435158501440922e-05, "loss": 0.0131, "step": 1870 }, { "epoch": 1.35, "learning_rate": 2.430355427473583e-05, "loss": 0.0091, "step": 1880 }, { "epoch": 1.36, "learning_rate": 2.425552353506244e-05, "loss": 0.0108, "step": 1890 }, { "epoch": 1.37, "learning_rate": 2.420749279538905e-05, "loss": 0.0117, "step": 1900 }, { "epoch": 1.38, "learning_rate": 2.415946205571566e-05, "loss": 0.0085, "step": 1910 }, { "epoch": 1.38, "learning_rate": 2.411143131604227e-05, "loss": 0.0058, "step": 1920 }, { "epoch": 1.39, "learning_rate": 2.4063400576368875e-05, "loss": 0.0079, "step": 1930 }, { "epoch": 1.4, "learning_rate": 2.4015369836695485e-05, "loss": 0.0112, "step": 1940 }, { "epoch": 1.4, "learning_rate": 2.3967339097022095e-05, "loss": 0.0068, "step": 1950 }, { "epoch": 1.41, "learning_rate": 2.39193083573487e-05, "loss": 0.0064, "step": 1960 }, { "epoch": 1.42, "learning_rate": 2.3871277617675314e-05, "loss": 0.0103, "step": 1970 }, { "epoch": 1.43, "learning_rate": 2.3823246878001924e-05, "loss": 0.0174, "step": 1980 }, { "epoch": 1.43, "learning_rate": 2.377521613832853e-05, "loss": 0.0052, "step": 1990 }, { "epoch": 1.44, "learning_rate": 2.372718539865514e-05, "loss": 0.0091, "step": 2000 }, { "epoch": 1.45, "learning_rate": 2.367915465898175e-05, "loss": 0.0152, "step": 2010 }, { "epoch": 1.45, "learning_rate": 2.363112391930836e-05, "loss": 0.012, "step": 2020 }, { "epoch": 1.46, "learning_rate": 2.3583093179634965e-05, "loss": 0.0129, "step": 2030 }, { "epoch": 1.47, "learning_rate": 2.3535062439961575e-05, "loss": 0.0075, "step": 2040 }, { "epoch": 1.48, "learning_rate": 2.3487031700288188e-05, "loss": 0.0124, "step": 2050 }, { "epoch": 1.48, "learning_rate": 2.3439000960614794e-05, "loss": 0.0109, "step": 2060 }, { "epoch": 1.49, "learning_rate": 2.3390970220941404e-05, "loss": 0.0062, "step": 2070 }, { "epoch": 1.5, "learning_rate": 2.3342939481268013e-05, "loss": 0.0063, "step": 2080 }, { "epoch": 1.51, "learning_rate": 2.329490874159462e-05, "loss": 0.0082, "step": 2090 }, { "epoch": 1.51, "learning_rate": 2.324687800192123e-05, "loss": 0.0039, "step": 2100 }, { "epoch": 1.52, "learning_rate": 2.319884726224784e-05, "loss": 0.0019, "step": 2110 }, { "epoch": 1.53, "learning_rate": 2.315081652257445e-05, "loss": 0.0018, "step": 2120 }, { "epoch": 1.53, "learning_rate": 2.3102785782901058e-05, "loss": 0.0069, "step": 2130 }, { "epoch": 1.54, "learning_rate": 2.3054755043227668e-05, "loss": 0.0176, "step": 2140 }, { "epoch": 1.55, "learning_rate": 2.3006724303554274e-05, "loss": 0.0134, "step": 2150 }, { "epoch": 1.56, "learning_rate": 2.2958693563880883e-05, "loss": 0.0046, "step": 2160 }, { "epoch": 1.56, "learning_rate": 2.2910662824207493e-05, "loss": 0.0067, "step": 2170 }, { "epoch": 1.57, "learning_rate": 2.28626320845341e-05, "loss": 0.0088, "step": 2180 }, { "epoch": 1.58, "learning_rate": 2.2814601344860712e-05, "loss": 0.0157, "step": 2190 }, { "epoch": 1.58, "learning_rate": 2.2766570605187322e-05, "loss": 0.0094, "step": 2200 }, { "epoch": 1.59, "learning_rate": 2.2718539865513928e-05, "loss": 0.0074, "step": 2210 }, { "epoch": 1.6, "learning_rate": 2.2670509125840538e-05, "loss": 0.0049, "step": 2220 }, { "epoch": 1.61, "learning_rate": 2.2622478386167148e-05, "loss": 0.0085, "step": 2230 }, { "epoch": 1.61, "learning_rate": 2.2574447646493757e-05, "loss": 0.0024, "step": 2240 }, { "epoch": 1.62, "learning_rate": 2.2526416906820363e-05, "loss": 0.0038, "step": 2250 }, { "epoch": 1.63, "learning_rate": 2.2478386167146976e-05, "loss": 0.0128, "step": 2260 }, { "epoch": 1.63, "learning_rate": 2.2430355427473586e-05, "loss": 0.0115, "step": 2270 }, { "epoch": 1.64, "learning_rate": 2.2382324687800192e-05, "loss": 0.004, "step": 2280 }, { "epoch": 1.65, "learning_rate": 2.2334293948126802e-05, "loss": 0.0092, "step": 2290 }, { "epoch": 1.66, "learning_rate": 2.228626320845341e-05, "loss": 0.0104, "step": 2300 }, { "epoch": 1.66, "learning_rate": 2.2238232468780018e-05, "loss": 0.006, "step": 2310 }, { "epoch": 1.67, "learning_rate": 2.2190201729106627e-05, "loss": 0.0103, "step": 2320 }, { "epoch": 1.68, "learning_rate": 2.214217098943324e-05, "loss": 0.0068, "step": 2330 }, { "epoch": 1.69, "learning_rate": 2.2098943323727187e-05, "loss": 0.0127, "step": 2340 }, { "epoch": 1.69, "learning_rate": 2.2050912584053797e-05, "loss": 0.0074, "step": 2350 }, { "epoch": 1.7, "learning_rate": 2.2002881844380403e-05, "loss": 0.0023, "step": 2360 }, { "epoch": 1.71, "learning_rate": 2.1954851104707013e-05, "loss": 0.0066, "step": 2370 }, { "epoch": 1.71, "learning_rate": 2.1906820365033622e-05, "loss": 0.0128, "step": 2380 }, { "epoch": 1.72, "learning_rate": 2.1858789625360232e-05, "loss": 0.0031, "step": 2390 }, { "epoch": 1.73, "learning_rate": 2.181075888568684e-05, "loss": 0.0036, "step": 2400 }, { "epoch": 1.74, "learning_rate": 2.176272814601345e-05, "loss": 0.003, "step": 2410 }, { "epoch": 1.74, "learning_rate": 2.1714697406340057e-05, "loss": 0.0054, "step": 2420 }, { "epoch": 1.75, "learning_rate": 2.1666666666666667e-05, "loss": 0.0015, "step": 2430 }, { "epoch": 1.76, "learning_rate": 2.1618635926993277e-05, "loss": 0.0041, "step": 2440 }, { "epoch": 1.76, "learning_rate": 2.1570605187319883e-05, "loss": 0.0036, "step": 2450 }, { "epoch": 1.77, "learning_rate": 2.1522574447646496e-05, "loss": 0.0053, "step": 2460 }, { "epoch": 1.78, "learning_rate": 2.1474543707973106e-05, "loss": 0.0094, "step": 2470 }, { "epoch": 1.79, "learning_rate": 2.1426512968299712e-05, "loss": 0.0031, "step": 2480 }, { "epoch": 1.79, "learning_rate": 2.137848222862632e-05, "loss": 0.0022, "step": 2490 }, { "epoch": 1.8, "learning_rate": 2.133045148895293e-05, "loss": 0.0085, "step": 2500 }, { "epoch": 1.81, "learning_rate": 2.1282420749279537e-05, "loss": 0.0031, "step": 2510 }, { "epoch": 1.81, "learning_rate": 2.1234390009606147e-05, "loss": 0.004, "step": 2520 }, { "epoch": 1.82, "learning_rate": 2.118635926993276e-05, "loss": 0.0072, "step": 2530 }, { "epoch": 1.83, "learning_rate": 2.1138328530259366e-05, "loss": 0.0019, "step": 2540 }, { "epoch": 1.84, "learning_rate": 2.1090297790585976e-05, "loss": 0.0071, "step": 2550 }, { "epoch": 1.84, "learning_rate": 2.1042267050912585e-05, "loss": 0.0022, "step": 2560 }, { "epoch": 1.85, "learning_rate": 2.0994236311239192e-05, "loss": 0.0017, "step": 2570 }, { "epoch": 1.86, "learning_rate": 2.09462055715658e-05, "loss": 0.015, "step": 2580 }, { "epoch": 1.87, "learning_rate": 2.089817483189241e-05, "loss": 0.0104, "step": 2590 }, { "epoch": 1.87, "learning_rate": 2.085014409221902e-05, "loss": 0.0023, "step": 2600 }, { "epoch": 1.88, "learning_rate": 2.080211335254563e-05, "loss": 0.0071, "step": 2610 }, { "epoch": 1.89, "learning_rate": 2.075408261287224e-05, "loss": 0.0026, "step": 2620 }, { "epoch": 1.89, "learning_rate": 2.070605187319885e-05, "loss": 0.0021, "step": 2630 }, { "epoch": 1.9, "learning_rate": 2.0658021133525456e-05, "loss": 0.0116, "step": 2640 }, { "epoch": 1.91, "learning_rate": 2.0609990393852065e-05, "loss": 0.009, "step": 2650 }, { "epoch": 1.92, "learning_rate": 2.0561959654178675e-05, "loss": 0.0065, "step": 2660 }, { "epoch": 1.92, "learning_rate": 2.0513928914505285e-05, "loss": 0.0027, "step": 2670 }, { "epoch": 1.93, "learning_rate": 2.0465898174831894e-05, "loss": 0.0079, "step": 2680 }, { "epoch": 1.94, "learning_rate": 2.0417867435158504e-05, "loss": 0.0037, "step": 2690 }, { "epoch": 1.94, "learning_rate": 2.036983669548511e-05, "loss": 0.0018, "step": 2700 }, { "epoch": 1.95, "learning_rate": 2.032180595581172e-05, "loss": 0.0057, "step": 2710 }, { "epoch": 1.96, "learning_rate": 2.027377521613833e-05, "loss": 0.0058, "step": 2720 }, { "epoch": 1.97, "learning_rate": 2.0225744476464936e-05, "loss": 0.0097, "step": 2730 }, { "epoch": 1.97, "learning_rate": 2.017771373679155e-05, "loss": 0.0051, "step": 2740 }, { "epoch": 1.98, "learning_rate": 2.012968299711816e-05, "loss": 0.0041, "step": 2750 }, { "epoch": 1.99, "learning_rate": 2.0081652257444765e-05, "loss": 0.0025, "step": 2760 }, { "epoch": 1.99, "learning_rate": 2.0033621517771374e-05, "loss": 0.003, "step": 2770 }, { "epoch": 2.0, "eval_accuracy": 0.9994935683176339, "eval_loss": 0.003025745041668415, "eval_runtime": 22.1254, "eval_samples_per_second": 892.458, "eval_steps_per_second": 27.932, "step": 2777 }, { "epoch": 2.0, "learning_rate": 1.9985590778097984e-05, "loss": 0.0042, "step": 2780 }, { "epoch": 2.01, "learning_rate": 1.993756003842459e-05, "loss": 0.0042, "step": 2790 }, { "epoch": 2.02, "learning_rate": 1.98895292987512e-05, "loss": 0.0009, "step": 2800 }, { "epoch": 2.02, "learning_rate": 1.9841498559077813e-05, "loss": 0.0018, "step": 2810 }, { "epoch": 2.03, "learning_rate": 1.979346781940442e-05, "loss": 0.0034, "step": 2820 }, { "epoch": 2.04, "learning_rate": 1.974543707973103e-05, "loss": 0.0023, "step": 2830 }, { "epoch": 2.05, "learning_rate": 1.9697406340057638e-05, "loss": 0.0013, "step": 2840 }, { "epoch": 2.05, "learning_rate": 1.9649375600384245e-05, "loss": 0.0032, "step": 2850 }, { "epoch": 2.06, "learning_rate": 1.9601344860710854e-05, "loss": 0.0127, "step": 2860 }, { "epoch": 2.07, "learning_rate": 1.9553314121037464e-05, "loss": 0.0036, "step": 2870 }, { "epoch": 2.07, "learning_rate": 1.9505283381364073e-05, "loss": 0.0072, "step": 2880 }, { "epoch": 2.08, "learning_rate": 1.9457252641690683e-05, "loss": 0.0105, "step": 2890 }, { "epoch": 2.09, "learning_rate": 1.9409221902017293e-05, "loss": 0.0076, "step": 2900 }, { "epoch": 2.1, "learning_rate": 1.9361191162343902e-05, "loss": 0.0139, "step": 2910 }, { "epoch": 2.1, "learning_rate": 1.931316042267051e-05, "loss": 0.0025, "step": 2920 }, { "epoch": 2.11, "learning_rate": 1.9265129682997118e-05, "loss": 0.0059, "step": 2930 }, { "epoch": 2.12, "learning_rate": 1.9217098943323728e-05, "loss": 0.0042, "step": 2940 }, { "epoch": 2.12, "learning_rate": 1.9169068203650334e-05, "loss": 0.0005, "step": 2950 }, { "epoch": 2.13, "learning_rate": 1.9121037463976947e-05, "loss": 0.0147, "step": 2960 }, { "epoch": 2.14, "learning_rate": 1.9073006724303557e-05, "loss": 0.0065, "step": 2970 }, { "epoch": 2.15, "learning_rate": 1.9024975984630163e-05, "loss": 0.0079, "step": 2980 }, { "epoch": 2.15, "learning_rate": 1.8976945244956773e-05, "loss": 0.0046, "step": 2990 }, { "epoch": 2.16, "learning_rate": 1.8928914505283382e-05, "loss": 0.0032, "step": 3000 }, { "epoch": 2.17, "learning_rate": 1.888088376560999e-05, "loss": 0.0084, "step": 3010 }, { "epoch": 2.18, "learning_rate": 1.8832853025936598e-05, "loss": 0.0136, "step": 3020 }, { "epoch": 2.18, "learning_rate": 1.878482228626321e-05, "loss": 0.0026, "step": 3030 }, { "epoch": 2.19, "learning_rate": 1.8736791546589817e-05, "loss": 0.0044, "step": 3040 }, { "epoch": 2.2, "learning_rate": 1.8688760806916427e-05, "loss": 0.0102, "step": 3050 }, { "epoch": 2.2, "learning_rate": 1.8640730067243037e-05, "loss": 0.0039, "step": 3060 }, { "epoch": 2.21, "learning_rate": 1.8592699327569643e-05, "loss": 0.0007, "step": 3070 }, { "epoch": 2.22, "learning_rate": 1.8544668587896253e-05, "loss": 0.0091, "step": 3080 }, { "epoch": 2.23, "learning_rate": 1.8496637848222862e-05, "loss": 0.0069, "step": 3090 }, { "epoch": 2.23, "learning_rate": 1.8448607108549472e-05, "loss": 0.0031, "step": 3100 }, { "epoch": 2.24, "learning_rate": 1.840057636887608e-05, "loss": 0.0071, "step": 3110 }, { "epoch": 2.25, "learning_rate": 1.835254562920269e-05, "loss": 0.0011, "step": 3120 }, { "epoch": 2.25, "learning_rate": 1.83045148895293e-05, "loss": 0.004, "step": 3130 }, { "epoch": 2.26, "learning_rate": 1.8256484149855907e-05, "loss": 0.0006, "step": 3140 }, { "epoch": 2.27, "learning_rate": 1.8208453410182517e-05, "loss": 0.001, "step": 3150 }, { "epoch": 2.28, "learning_rate": 1.8160422670509126e-05, "loss": 0.0023, "step": 3160 }, { "epoch": 2.28, "learning_rate": 1.8112391930835736e-05, "loss": 0.0016, "step": 3170 }, { "epoch": 2.29, "learning_rate": 1.8064361191162345e-05, "loss": 0.0078, "step": 3180 }, { "epoch": 2.3, "learning_rate": 1.8016330451488955e-05, "loss": 0.0008, "step": 3190 }, { "epoch": 2.3, "learning_rate": 1.796829971181556e-05, "loss": 0.0076, "step": 3200 }, { "epoch": 2.31, "learning_rate": 1.792026897214217e-05, "loss": 0.0087, "step": 3210 }, { "epoch": 2.32, "learning_rate": 1.787223823246878e-05, "loss": 0.0015, "step": 3220 }, { "epoch": 2.33, "learning_rate": 1.7824207492795387e-05, "loss": 0.0053, "step": 3230 }, { "epoch": 2.33, "learning_rate": 1.7776176753122e-05, "loss": 0.0053, "step": 3240 }, { "epoch": 2.34, "learning_rate": 1.772814601344861e-05, "loss": 0.0042, "step": 3250 }, { "epoch": 2.35, "learning_rate": 1.7680115273775216e-05, "loss": 0.0007, "step": 3260 }, { "epoch": 2.36, "learning_rate": 1.7632084534101825e-05, "loss": 0.0083, "step": 3270 }, { "epoch": 2.36, "learning_rate": 1.7584053794428435e-05, "loss": 0.0039, "step": 3280 }, { "epoch": 2.37, "learning_rate": 1.753602305475504e-05, "loss": 0.0016, "step": 3290 }, { "epoch": 2.38, "learning_rate": 1.748799231508165e-05, "loss": 0.0081, "step": 3300 }, { "epoch": 2.38, "learning_rate": 1.7439961575408264e-05, "loss": 0.0048, "step": 3310 }, { "epoch": 2.39, "learning_rate": 1.739193083573487e-05, "loss": 0.0042, "step": 3320 }, { "epoch": 2.4, "learning_rate": 1.734390009606148e-05, "loss": 0.002, "step": 3330 }, { "epoch": 2.41, "learning_rate": 1.729586935638809e-05, "loss": 0.0041, "step": 3340 }, { "epoch": 2.41, "learning_rate": 1.7247838616714696e-05, "loss": 0.0029, "step": 3350 }, { "epoch": 2.42, "learning_rate": 1.7199807877041305e-05, "loss": 0.003, "step": 3360 }, { "epoch": 2.43, "learning_rate": 1.7151777137367915e-05, "loss": 0.0024, "step": 3370 }, { "epoch": 2.43, "learning_rate": 1.7103746397694528e-05, "loss": 0.0013, "step": 3380 }, { "epoch": 2.44, "learning_rate": 1.7055715658021134e-05, "loss": 0.0062, "step": 3390 }, { "epoch": 2.45, "learning_rate": 1.7007684918347744e-05, "loss": 0.0027, "step": 3400 }, { "epoch": 2.46, "learning_rate": 1.6959654178674353e-05, "loss": 0.0045, "step": 3410 }, { "epoch": 2.46, "learning_rate": 1.691162343900096e-05, "loss": 0.0037, "step": 3420 }, { "epoch": 2.47, "learning_rate": 1.686359269932757e-05, "loss": 0.0012, "step": 3430 }, { "epoch": 2.48, "learning_rate": 1.681556195965418e-05, "loss": 0.0029, "step": 3440 }, { "epoch": 2.48, "learning_rate": 1.676753121998079e-05, "loss": 0.0085, "step": 3450 }, { "epoch": 2.49, "learning_rate": 1.6719500480307398e-05, "loss": 0.0082, "step": 3460 }, { "epoch": 2.5, "learning_rate": 1.6671469740634008e-05, "loss": 0.004, "step": 3470 }, { "epoch": 2.51, "learning_rate": 1.6623439000960614e-05, "loss": 0.0069, "step": 3480 }, { "epoch": 2.51, "learning_rate": 1.6575408261287224e-05, "loss": 0.0059, "step": 3490 }, { "epoch": 2.52, "learning_rate": 1.6527377521613833e-05, "loss": 0.007, "step": 3500 }, { "epoch": 2.53, "learning_rate": 1.647934678194044e-05, "loss": 0.0084, "step": 3510 }, { "epoch": 2.54, "learning_rate": 1.6431316042267053e-05, "loss": 0.0033, "step": 3520 }, { "epoch": 2.54, "learning_rate": 1.6383285302593662e-05, "loss": 0.0065, "step": 3530 }, { "epoch": 2.55, "learning_rate": 1.633525456292027e-05, "loss": 0.0018, "step": 3540 }, { "epoch": 2.56, "learning_rate": 1.6287223823246878e-05, "loss": 0.0005, "step": 3550 }, { "epoch": 2.56, "learning_rate": 1.6239193083573488e-05, "loss": 0.0064, "step": 3560 }, { "epoch": 2.57, "learning_rate": 1.6191162343900094e-05, "loss": 0.0018, "step": 3570 }, { "epoch": 2.58, "learning_rate": 1.6143131604226704e-05, "loss": 0.0056, "step": 3580 }, { "epoch": 2.59, "learning_rate": 1.6095100864553317e-05, "loss": 0.0013, "step": 3590 }, { "epoch": 2.59, "learning_rate": 1.6047070124879923e-05, "loss": 0.0085, "step": 3600 }, { "epoch": 2.6, "learning_rate": 1.5999039385206533e-05, "loss": 0.002, "step": 3610 }, { "epoch": 2.61, "learning_rate": 1.5951008645533142e-05, "loss": 0.005, "step": 3620 }, { "epoch": 2.61, "learning_rate": 1.5902977905859752e-05, "loss": 0.007, "step": 3630 }, { "epoch": 2.62, "learning_rate": 1.5854947166186358e-05, "loss": 0.0045, "step": 3640 }, { "epoch": 2.63, "learning_rate": 1.5806916426512968e-05, "loss": 0.0007, "step": 3650 }, { "epoch": 2.64, "learning_rate": 1.575888568683958e-05, "loss": 0.0034, "step": 3660 }, { "epoch": 2.64, "learning_rate": 1.5710854947166187e-05, "loss": 0.0037, "step": 3670 }, { "epoch": 2.65, "learning_rate": 1.5662824207492797e-05, "loss": 0.001, "step": 3680 }, { "epoch": 2.66, "learning_rate": 1.5614793467819406e-05, "loss": 0.0022, "step": 3690 }, { "epoch": 2.66, "learning_rate": 1.5566762728146012e-05, "loss": 0.001, "step": 3700 }, { "epoch": 2.67, "learning_rate": 1.5518731988472622e-05, "loss": 0.0048, "step": 3710 }, { "epoch": 2.68, "learning_rate": 1.5470701248799232e-05, "loss": 0.0052, "step": 3720 }, { "epoch": 2.69, "learning_rate": 1.542267050912584e-05, "loss": 0.0021, "step": 3730 }, { "epoch": 2.69, "learning_rate": 1.537463976945245e-05, "loss": 0.0091, "step": 3740 }, { "epoch": 2.7, "learning_rate": 1.532660902977906e-05, "loss": 0.0054, "step": 3750 }, { "epoch": 2.71, "learning_rate": 1.5278578290105667e-05, "loss": 0.0007, "step": 3760 }, { "epoch": 2.72, "learning_rate": 1.5230547550432277e-05, "loss": 0.0006, "step": 3770 }, { "epoch": 2.72, "learning_rate": 1.5182516810758884e-05, "loss": 0.0035, "step": 3780 }, { "epoch": 2.73, "learning_rate": 1.5134486071085494e-05, "loss": 0.0038, "step": 3790 }, { "epoch": 2.74, "learning_rate": 1.5086455331412105e-05, "loss": 0.0064, "step": 3800 }, { "epoch": 2.74, "learning_rate": 1.5038424591738715e-05, "loss": 0.0007, "step": 3810 }, { "epoch": 2.75, "learning_rate": 1.4990393852065323e-05, "loss": 0.0059, "step": 3820 }, { "epoch": 2.76, "learning_rate": 1.4942363112391931e-05, "loss": 0.0008, "step": 3830 }, { "epoch": 2.77, "learning_rate": 1.489433237271854e-05, "loss": 0.0009, "step": 3840 }, { "epoch": 2.77, "learning_rate": 1.484630163304515e-05, "loss": 0.0048, "step": 3850 }, { "epoch": 2.78, "learning_rate": 1.4798270893371758e-05, "loss": 0.0004, "step": 3860 }, { "epoch": 2.79, "learning_rate": 1.4750240153698368e-05, "loss": 0.0004, "step": 3870 }, { "epoch": 2.79, "learning_rate": 1.4702209414024976e-05, "loss": 0.006, "step": 3880 }, { "epoch": 2.8, "learning_rate": 1.4654178674351585e-05, "loss": 0.0056, "step": 3890 }, { "epoch": 2.81, "learning_rate": 1.4606147934678195e-05, "loss": 0.0021, "step": 3900 }, { "epoch": 2.82, "learning_rate": 1.4558117195004803e-05, "loss": 0.0035, "step": 3910 }, { "epoch": 2.82, "learning_rate": 1.451008645533141e-05, "loss": 0.0031, "step": 3920 }, { "epoch": 2.83, "learning_rate": 1.4462055715658022e-05, "loss": 0.0015, "step": 3930 }, { "epoch": 2.84, "learning_rate": 1.441402497598463e-05, "loss": 0.0039, "step": 3940 }, { "epoch": 2.84, "learning_rate": 1.436599423631124e-05, "loss": 0.0012, "step": 3950 }, { "epoch": 2.85, "learning_rate": 1.431796349663785e-05, "loss": 0.0041, "step": 3960 }, { "epoch": 2.86, "learning_rate": 1.4269932756964457e-05, "loss": 0.003, "step": 3970 }, { "epoch": 2.87, "learning_rate": 1.4221902017291067e-05, "loss": 0.0043, "step": 3980 }, { "epoch": 2.87, "learning_rate": 1.4173871277617675e-05, "loss": 0.009, "step": 3990 }, { "epoch": 2.88, "learning_rate": 1.4125840537944285e-05, "loss": 0.0123, "step": 4000 }, { "epoch": 2.89, "learning_rate": 1.4077809798270894e-05, "loss": 0.0088, "step": 4010 }, { "epoch": 2.9, "learning_rate": 1.4029779058597502e-05, "loss": 0.0019, "step": 4020 }, { "epoch": 2.9, "learning_rate": 1.3981748318924112e-05, "loss": 0.0046, "step": 4030 }, { "epoch": 2.91, "learning_rate": 1.3933717579250721e-05, "loss": 0.0004, "step": 4040 }, { "epoch": 2.92, "learning_rate": 1.388568683957733e-05, "loss": 0.0077, "step": 4050 }, { "epoch": 2.92, "learning_rate": 1.3837656099903939e-05, "loss": 0.0038, "step": 4060 }, { "epoch": 2.93, "learning_rate": 1.3789625360230549e-05, "loss": 0.0047, "step": 4070 }, { "epoch": 2.94, "learning_rate": 1.3741594620557156e-05, "loss": 0.0049, "step": 4080 }, { "epoch": 2.95, "learning_rate": 1.3693563880883766e-05, "loss": 0.0011, "step": 4090 }, { "epoch": 2.95, "learning_rate": 1.3645533141210376e-05, "loss": 0.0087, "step": 4100 }, { "epoch": 2.96, "learning_rate": 1.3597502401536984e-05, "loss": 0.0035, "step": 4110 }, { "epoch": 2.97, "learning_rate": 1.3549471661863593e-05, "loss": 0.0042, "step": 4120 }, { "epoch": 2.97, "learning_rate": 1.3501440922190201e-05, "loss": 0.0006, "step": 4130 }, { "epoch": 2.98, "learning_rate": 1.3453410182516811e-05, "loss": 0.0033, "step": 4140 }, { "epoch": 2.99, "learning_rate": 1.340537944284342e-05, "loss": 0.0013, "step": 4150 }, { "epoch": 3.0, "learning_rate": 1.3357348703170028e-05, "loss": 0.0018, "step": 4160 }, { "epoch": 3.0, "eval_accuracy": 0.9994935683176339, "eval_loss": 0.0025780154392123222, "eval_runtime": 22.1192, "eval_samples_per_second": 892.709, "eval_steps_per_second": 27.94, "step": 4165 }, { "epoch": 3.0, "learning_rate": 1.3309317963496638e-05, "loss": 0.0004, "step": 4170 }, { "epoch": 3.01, "learning_rate": 1.3261287223823248e-05, "loss": 0.0002, "step": 4180 }, { "epoch": 3.02, "learning_rate": 1.3213256484149856e-05, "loss": 0.0002, "step": 4190 }, { "epoch": 3.02, "learning_rate": 1.3165225744476465e-05, "loss": 0.0099, "step": 4200 }, { "epoch": 3.03, "learning_rate": 1.3117195004803075e-05, "loss": 0.0034, "step": 4210 }, { "epoch": 3.04, "learning_rate": 1.3069164265129683e-05, "loss": 0.0003, "step": 4220 }, { "epoch": 3.05, "learning_rate": 1.3021133525456293e-05, "loss": 0.0105, "step": 4230 }, { "epoch": 3.05, "learning_rate": 1.2973102785782902e-05, "loss": 0.0049, "step": 4240 }, { "epoch": 3.06, "learning_rate": 1.292507204610951e-05, "loss": 0.0007, "step": 4250 }, { "epoch": 3.07, "learning_rate": 1.287704130643612e-05, "loss": 0.0082, "step": 4260 }, { "epoch": 3.08, "learning_rate": 1.2829010566762728e-05, "loss": 0.002, "step": 4270 }, { "epoch": 3.08, "learning_rate": 1.2780979827089337e-05, "loss": 0.0041, "step": 4280 }, { "epoch": 3.09, "learning_rate": 1.2732949087415947e-05, "loss": 0.004, "step": 4290 }, { "epoch": 3.1, "learning_rate": 1.2684918347742555e-05, "loss": 0.0009, "step": 4300 }, { "epoch": 3.1, "learning_rate": 1.2636887608069165e-05, "loss": 0.0031, "step": 4310 }, { "epoch": 3.11, "learning_rate": 1.2588856868395774e-05, "loss": 0.0012, "step": 4320 }, { "epoch": 3.12, "learning_rate": 1.2540826128722382e-05, "loss": 0.0026, "step": 4330 }, { "epoch": 3.13, "learning_rate": 1.2492795389048992e-05, "loss": 0.0031, "step": 4340 }, { "epoch": 3.13, "learning_rate": 1.2444764649375601e-05, "loss": 0.0004, "step": 4350 }, { "epoch": 3.14, "learning_rate": 1.239673390970221e-05, "loss": 0.0004, "step": 4360 }, { "epoch": 3.15, "learning_rate": 1.2348703170028819e-05, "loss": 0.0007, "step": 4370 }, { "epoch": 3.15, "learning_rate": 1.2300672430355429e-05, "loss": 0.0005, "step": 4380 }, { "epoch": 3.16, "learning_rate": 1.2252641690682036e-05, "loss": 0.0021, "step": 4390 }, { "epoch": 3.17, "learning_rate": 1.2204610951008646e-05, "loss": 0.0003, "step": 4400 }, { "epoch": 3.18, "learning_rate": 1.2156580211335254e-05, "loss": 0.008, "step": 4410 }, { "epoch": 3.18, "learning_rate": 1.2108549471661864e-05, "loss": 0.0003, "step": 4420 }, { "epoch": 3.19, "learning_rate": 1.2060518731988473e-05, "loss": 0.0005, "step": 4430 }, { "epoch": 3.2, "learning_rate": 1.2012487992315081e-05, "loss": 0.0034, "step": 4440 }, { "epoch": 3.2, "learning_rate": 1.1964457252641693e-05, "loss": 0.0039, "step": 4450 }, { "epoch": 3.21, "learning_rate": 1.19164265129683e-05, "loss": 0.0019, "step": 4460 }, { "epoch": 3.22, "learning_rate": 1.1868395773294908e-05, "loss": 0.0006, "step": 4470 }, { "epoch": 3.23, "learning_rate": 1.1820365033621518e-05, "loss": 0.0102, "step": 4480 }, { "epoch": 3.23, "learning_rate": 1.1772334293948128e-05, "loss": 0.0019, "step": 4490 }, { "epoch": 3.24, "learning_rate": 1.1724303554274736e-05, "loss": 0.0038, "step": 4500 }, { "epoch": 3.25, "learning_rate": 1.1676272814601345e-05, "loss": 0.0034, "step": 4510 }, { "epoch": 3.26, "learning_rate": 1.1628242074927955e-05, "loss": 0.0016, "step": 4520 }, { "epoch": 3.26, "learning_rate": 1.1580211335254563e-05, "loss": 0.0047, "step": 4530 }, { "epoch": 3.27, "learning_rate": 1.1532180595581173e-05, "loss": 0.0023, "step": 4540 }, { "epoch": 3.28, "learning_rate": 1.148414985590778e-05, "loss": 0.004, "step": 4550 }, { "epoch": 3.28, "learning_rate": 1.143611911623439e-05, "loss": 0.0046, "step": 4560 }, { "epoch": 3.29, "learning_rate": 1.1388088376561e-05, "loss": 0.0014, "step": 4570 }, { "epoch": 3.3, "learning_rate": 1.1340057636887608e-05, "loss": 0.0105, "step": 4580 }, { "epoch": 3.31, "learning_rate": 1.1292026897214219e-05, "loss": 0.0001, "step": 4590 }, { "epoch": 3.31, "learning_rate": 1.1243996157540827e-05, "loss": 0.0042, "step": 4600 }, { "epoch": 3.32, "learning_rate": 1.1195965417867435e-05, "loss": 0.004, "step": 4610 }, { "epoch": 3.33, "learning_rate": 1.1147934678194044e-05, "loss": 0.0011, "step": 4620 }, { "epoch": 3.33, "learning_rate": 1.1099903938520654e-05, "loss": 0.0052, "step": 4630 }, { "epoch": 3.34, "learning_rate": 1.1051873198847262e-05, "loss": 0.0002, "step": 4640 }, { "epoch": 3.35, "learning_rate": 1.1003842459173872e-05, "loss": 0.0005, "step": 4650 }, { "epoch": 3.36, "learning_rate": 1.095581171950048e-05, "loss": 0.0006, "step": 4660 }, { "epoch": 3.36, "learning_rate": 1.090778097982709e-05, "loss": 0.0005, "step": 4670 }, { "epoch": 3.37, "learning_rate": 1.0859750240153699e-05, "loss": 0.0004, "step": 4680 }, { "epoch": 3.38, "learning_rate": 1.0811719500480307e-05, "loss": 0.0081, "step": 4690 }, { "epoch": 3.38, "learning_rate": 1.0763688760806918e-05, "loss": 0.0038, "step": 4700 }, { "epoch": 3.39, "learning_rate": 1.0715658021133526e-05, "loss": 0.0005, "step": 4710 }, { "epoch": 3.4, "learning_rate": 1.0667627281460134e-05, "loss": 0.0043, "step": 4720 }, { "epoch": 3.41, "learning_rate": 1.0619596541786744e-05, "loss": 0.0016, "step": 4730 }, { "epoch": 3.41, "learning_rate": 1.0571565802113353e-05, "loss": 0.004, "step": 4740 }, { "epoch": 3.42, "learning_rate": 1.0523535062439961e-05, "loss": 0.0052, "step": 4750 }, { "epoch": 3.43, "learning_rate": 1.0475504322766571e-05, "loss": 0.0007, "step": 4760 }, { "epoch": 3.44, "learning_rate": 1.042747358309318e-05, "loss": 0.0042, "step": 4770 }, { "epoch": 3.44, "learning_rate": 1.0379442843419788e-05, "loss": 0.0045, "step": 4780 }, { "epoch": 3.45, "learning_rate": 1.0331412103746398e-05, "loss": 0.0017, "step": 4790 }, { "epoch": 3.46, "learning_rate": 1.0283381364073006e-05, "loss": 0.0003, "step": 4800 }, { "epoch": 3.46, "learning_rate": 1.0235350624399616e-05, "loss": 0.0003, "step": 4810 }, { "epoch": 3.47, "learning_rate": 1.0187319884726225e-05, "loss": 0.0011, "step": 4820 }, { "epoch": 3.48, "learning_rate": 1.0139289145052833e-05, "loss": 0.0028, "step": 4830 }, { "epoch": 3.49, "learning_rate": 1.0091258405379445e-05, "loss": 0.0006, "step": 4840 }, { "epoch": 3.49, "learning_rate": 1.0043227665706052e-05, "loss": 0.0019, "step": 4850 }, { "epoch": 3.5, "learning_rate": 9.99519692603266e-06, "loss": 0.0032, "step": 4860 }, { "epoch": 3.51, "learning_rate": 9.94716618635927e-06, "loss": 0.0119, "step": 4870 }, { "epoch": 3.51, "learning_rate": 9.89913544668588e-06, "loss": 0.0022, "step": 4880 }, { "epoch": 3.52, "learning_rate": 9.851104707012488e-06, "loss": 0.0022, "step": 4890 }, { "epoch": 3.53, "learning_rate": 9.803073967339097e-06, "loss": 0.0012, "step": 4900 }, { "epoch": 3.54, "learning_rate": 9.755043227665707e-06, "loss": 0.0046, "step": 4910 }, { "epoch": 3.54, "learning_rate": 9.707012487992315e-06, "loss": 0.0047, "step": 4920 }, { "epoch": 3.55, "learning_rate": 9.658981748318924e-06, "loss": 0.0012, "step": 4930 }, { "epoch": 3.56, "learning_rate": 9.610951008645532e-06, "loss": 0.0004, "step": 4940 }, { "epoch": 3.56, "learning_rate": 9.562920268972144e-06, "loss": 0.0002, "step": 4950 }, { "epoch": 3.57, "learning_rate": 9.514889529298752e-06, "loss": 0.0002, "step": 4960 }, { "epoch": 3.58, "learning_rate": 9.46685878962536e-06, "loss": 0.0043, "step": 4970 }, { "epoch": 3.59, "learning_rate": 9.418828049951971e-06, "loss": 0.0019, "step": 4980 }, { "epoch": 3.59, "learning_rate": 9.370797310278579e-06, "loss": 0.0017, "step": 4990 }, { "epoch": 3.6, "learning_rate": 9.322766570605187e-06, "loss": 0.0073, "step": 5000 }, { "epoch": 3.61, "learning_rate": 9.274735830931796e-06, "loss": 0.0022, "step": 5010 }, { "epoch": 3.62, "learning_rate": 9.226705091258406e-06, "loss": 0.0011, "step": 5020 }, { "epoch": 3.62, "learning_rate": 9.178674351585014e-06, "loss": 0.0015, "step": 5030 }, { "epoch": 3.63, "learning_rate": 9.130643611911624e-06, "loss": 0.0059, "step": 5040 }, { "epoch": 3.64, "learning_rate": 9.082612872238233e-06, "loss": 0.0012, "step": 5050 }, { "epoch": 3.64, "learning_rate": 9.034582132564841e-06, "loss": 0.0028, "step": 5060 }, { "epoch": 3.65, "learning_rate": 8.986551392891451e-06, "loss": 0.0005, "step": 5070 }, { "epoch": 3.66, "learning_rate": 8.938520653218059e-06, "loss": 0.0013, "step": 5080 }, { "epoch": 3.67, "learning_rate": 8.89048991354467e-06, "loss": 0.0045, "step": 5090 }, { "epoch": 3.67, "learning_rate": 8.842459173871278e-06, "loss": 0.0001, "step": 5100 }, { "epoch": 3.68, "learning_rate": 8.794428434197886e-06, "loss": 0.0014, "step": 5110 }, { "epoch": 3.69, "learning_rate": 8.746397694524497e-06, "loss": 0.0004, "step": 5120 }, { "epoch": 3.69, "learning_rate": 8.698366954851105e-06, "loss": 0.0015, "step": 5130 }, { "epoch": 3.7, "learning_rate": 8.650336215177713e-06, "loss": 0.0002, "step": 5140 }, { "epoch": 3.71, "learning_rate": 8.602305475504323e-06, "loss": 0.0007, "step": 5150 }, { "epoch": 3.72, "learning_rate": 8.554274735830932e-06, "loss": 0.0017, "step": 5160 }, { "epoch": 3.72, "learning_rate": 8.50624399615754e-06, "loss": 0.0002, "step": 5170 }, { "epoch": 3.73, "learning_rate": 8.45821325648415e-06, "loss": 0.0008, "step": 5180 }, { "epoch": 3.74, "learning_rate": 8.41018251681076e-06, "loss": 0.0014, "step": 5190 }, { "epoch": 3.75, "learning_rate": 8.362151777137368e-06, "loss": 0.0003, "step": 5200 }, { "epoch": 3.75, "learning_rate": 8.314121037463977e-06, "loss": 0.0117, "step": 5210 }, { "epoch": 3.76, "learning_rate": 8.266090297790585e-06, "loss": 0.0018, "step": 5220 }, { "epoch": 3.77, "learning_rate": 8.218059558117197e-06, "loss": 0.0007, "step": 5230 }, { "epoch": 3.77, "learning_rate": 8.170028818443804e-06, "loss": 0.0027, "step": 5240 }, { "epoch": 3.78, "learning_rate": 8.121998078770412e-06, "loss": 0.0004, "step": 5250 }, { "epoch": 3.79, "learning_rate": 8.073967339097024e-06, "loss": 0.0038, "step": 5260 }, { "epoch": 3.8, "learning_rate": 8.025936599423632e-06, "loss": 0.0027, "step": 5270 }, { "epoch": 3.8, "learning_rate": 7.97790585975024e-06, "loss": 0.0018, "step": 5280 }, { "epoch": 3.81, "learning_rate": 7.92987512007685e-06, "loss": 0.0032, "step": 5290 }, { "epoch": 3.82, "learning_rate": 7.881844380403459e-06, "loss": 0.0011, "step": 5300 }, { "epoch": 3.82, "learning_rate": 7.833813640730067e-06, "loss": 0.0019, "step": 5310 }, { "epoch": 3.83, "learning_rate": 7.785782901056676e-06, "loss": 0.001, "step": 5320 }, { "epoch": 3.84, "learning_rate": 7.737752161383284e-06, "loss": 0.0009, "step": 5330 }, { "epoch": 3.85, "learning_rate": 7.689721421709896e-06, "loss": 0.0016, "step": 5340 }, { "epoch": 3.85, "learning_rate": 7.641690682036504e-06, "loss": 0.004, "step": 5350 }, { "epoch": 3.86, "learning_rate": 7.593659942363112e-06, "loss": 0.0004, "step": 5360 }, { "epoch": 3.87, "learning_rate": 7.545629202689722e-06, "loss": 0.0036, "step": 5370 }, { "epoch": 3.87, "learning_rate": 7.497598463016331e-06, "loss": 0.0011, "step": 5380 }, { "epoch": 3.88, "learning_rate": 7.44956772334294e-06, "loss": 0.0014, "step": 5390 }, { "epoch": 3.89, "learning_rate": 7.401536983669548e-06, "loss": 0.0077, "step": 5400 }, { "epoch": 3.9, "learning_rate": 7.353506243996158e-06, "loss": 0.0069, "step": 5410 }, { "epoch": 3.9, "learning_rate": 7.305475504322767e-06, "loss": 0.0028, "step": 5420 }, { "epoch": 3.91, "learning_rate": 7.2574447646493765e-06, "loss": 0.0003, "step": 5430 }, { "epoch": 3.92, "learning_rate": 7.209414024975984e-06, "loss": 0.007, "step": 5440 }, { "epoch": 3.93, "learning_rate": 7.161383285302594e-06, "loss": 0.0017, "step": 5450 }, { "epoch": 3.93, "learning_rate": 7.113352545629203e-06, "loss": 0.0003, "step": 5460 }, { "epoch": 3.94, "learning_rate": 7.065321805955812e-06, "loss": 0.003, "step": 5470 }, { "epoch": 3.95, "learning_rate": 7.017291066282421e-06, "loss": 0.0007, "step": 5480 }, { "epoch": 3.95, "learning_rate": 6.96926032660903e-06, "loss": 0.0029, "step": 5490 }, { "epoch": 3.96, "learning_rate": 6.92122958693564e-06, "loss": 0.0004, "step": 5500 }, { "epoch": 3.97, "learning_rate": 6.873198847262248e-06, "loss": 0.0016, "step": 5510 }, { "epoch": 3.98, "learning_rate": 6.825168107588857e-06, "loss": 0.0002, "step": 5520 }, { "epoch": 3.98, "learning_rate": 6.777137367915466e-06, "loss": 0.0002, "step": 5530 }, { "epoch": 3.99, "learning_rate": 6.729106628242075e-06, "loss": 0.0005, "step": 5540 }, { "epoch": 4.0, "learning_rate": 6.681075888568684e-06, "loss": 0.0037, "step": 5550 }, { "epoch": 4.0, "eval_accuracy": 0.9997974273270536, "eval_loss": 0.001714603858999908, "eval_runtime": 22.1054, "eval_samples_per_second": 893.265, "eval_steps_per_second": 27.957, "step": 5554 }, { "epoch": 4.0, "learning_rate": 6.633045148895293e-06, "loss": 0.0002, "step": 5560 }, { "epoch": 4.01, "learning_rate": 6.585014409221903e-06, "loss": 0.0003, "step": 5570 }, { "epoch": 4.02, "learning_rate": 6.536983669548511e-06, "loss": 0.0042, "step": 5580 }, { "epoch": 4.03, "learning_rate": 6.4889529298751204e-06, "loss": 0.0002, "step": 5590 }, { "epoch": 4.03, "learning_rate": 6.440922190201729e-06, "loss": 0.0032, "step": 5600 }, { "epoch": 4.04, "learning_rate": 6.392891450528338e-06, "loss": 0.0003, "step": 5610 }, { "epoch": 4.05, "learning_rate": 6.344860710854947e-06, "loss": 0.0016, "step": 5620 }, { "epoch": 4.05, "learning_rate": 6.296829971181556e-06, "loss": 0.0003, "step": 5630 }, { "epoch": 4.06, "learning_rate": 6.248799231508166e-06, "loss": 0.0007, "step": 5640 }, { "epoch": 4.07, "learning_rate": 6.200768491834774e-06, "loss": 0.0004, "step": 5650 }, { "epoch": 4.08, "learning_rate": 6.152737752161384e-06, "loss": 0.0014, "step": 5660 }, { "epoch": 4.08, "learning_rate": 6.104707012487992e-06, "loss": 0.0002, "step": 5670 }, { "epoch": 4.09, "learning_rate": 6.056676272814602e-06, "loss": 0.003, "step": 5680 }, { "epoch": 4.1, "learning_rate": 6.00864553314121e-06, "loss": 0.0009, "step": 5690 }, { "epoch": 4.11, "learning_rate": 5.96061479346782e-06, "loss": 0.0057, "step": 5700 }, { "epoch": 4.11, "learning_rate": 5.912584053794429e-06, "loss": 0.0005, "step": 5710 }, { "epoch": 4.12, "learning_rate": 5.864553314121037e-06, "loss": 0.0009, "step": 5720 }, { "epoch": 4.13, "learning_rate": 5.816522574447647e-06, "loss": 0.002, "step": 5730 }, { "epoch": 4.13, "learning_rate": 5.768491834774256e-06, "loss": 0.0065, "step": 5740 }, { "epoch": 4.14, "learning_rate": 5.720461095100865e-06, "loss": 0.005, "step": 5750 }, { "epoch": 4.15, "learning_rate": 5.672430355427473e-06, "loss": 0.0062, "step": 5760 }, { "epoch": 4.16, "learning_rate": 5.624399615754083e-06, "loss": 0.0037, "step": 5770 }, { "epoch": 4.16, "learning_rate": 5.5763688760806924e-06, "loss": 0.0019, "step": 5780 }, { "epoch": 4.17, "learning_rate": 5.5283381364073e-06, "loss": 0.0013, "step": 5790 }, { "epoch": 4.18, "learning_rate": 5.48030739673391e-06, "loss": 0.0008, "step": 5800 }, { "epoch": 4.18, "learning_rate": 5.432276657060519e-06, "loss": 0.0011, "step": 5810 }, { "epoch": 4.19, "learning_rate": 5.3842459173871284e-06, "loss": 0.0003, "step": 5820 }, { "epoch": 4.2, "learning_rate": 5.336215177713736e-06, "loss": 0.0004, "step": 5830 }, { "epoch": 4.21, "learning_rate": 5.288184438040346e-06, "loss": 0.0026, "step": 5840 }, { "epoch": 4.21, "learning_rate": 5.240153698366955e-06, "loss": 0.0022, "step": 5850 }, { "epoch": 4.22, "learning_rate": 5.1921229586935636e-06, "loss": 0.0018, "step": 5860 }, { "epoch": 4.23, "learning_rate": 5.144092219020173e-06, "loss": 0.0002, "step": 5870 }, { "epoch": 4.23, "learning_rate": 5.096061479346782e-06, "loss": 0.0016, "step": 5880 }, { "epoch": 4.24, "learning_rate": 5.048030739673392e-06, "loss": 0.0003, "step": 5890 }, { "epoch": 4.25, "learning_rate": 4.9999999999999996e-06, "loss": 0.0014, "step": 5900 }, { "epoch": 4.26, "learning_rate": 4.951969260326609e-06, "loss": 0.0026, "step": 5910 }, { "epoch": 4.26, "learning_rate": 4.903938520653218e-06, "loss": 0.0004, "step": 5920 }, { "epoch": 4.27, "learning_rate": 4.855907780979827e-06, "loss": 0.0003, "step": 5930 }, { "epoch": 4.28, "learning_rate": 4.807877041306436e-06, "loss": 0.0003, "step": 5940 }, { "epoch": 4.29, "learning_rate": 4.759846301633045e-06, "loss": 0.0004, "step": 5950 }, { "epoch": 4.29, "learning_rate": 4.711815561959655e-06, "loss": 0.0006, "step": 5960 }, { "epoch": 4.3, "learning_rate": 4.663784822286263e-06, "loss": 0.0002, "step": 5970 }, { "epoch": 4.31, "learning_rate": 4.615754082612872e-06, "loss": 0.0015, "step": 5980 }, { "epoch": 4.31, "learning_rate": 4.567723342939481e-06, "loss": 0.0001, "step": 5990 }, { "epoch": 4.32, "learning_rate": 4.519692603266091e-06, "loss": 0.0004, "step": 6000 }, { "epoch": 4.33, "learning_rate": 4.4716618635927e-06, "loss": 0.0043, "step": 6010 }, { "epoch": 4.34, "learning_rate": 4.423631123919308e-06, "loss": 0.0024, "step": 6020 }, { "epoch": 4.34, "learning_rate": 4.375600384245918e-06, "loss": 0.0033, "step": 6030 }, { "epoch": 4.35, "learning_rate": 4.327569644572526e-06, "loss": 0.001, "step": 6040 }, { "epoch": 4.36, "learning_rate": 4.279538904899136e-06, "loss": 0.0002, "step": 6050 }, { "epoch": 4.36, "learning_rate": 4.231508165225744e-06, "loss": 0.0001, "step": 6060 }, { "epoch": 4.37, "learning_rate": 4.183477425552354e-06, "loss": 0.0002, "step": 6070 }, { "epoch": 4.38, "learning_rate": 4.135446685878963e-06, "loss": 0.0088, "step": 6080 }, { "epoch": 4.39, "learning_rate": 4.0874159462055716e-06, "loss": 0.0002, "step": 6090 }, { "epoch": 4.39, "learning_rate": 4.039385206532181e-06, "loss": 0.0002, "step": 6100 }, { "epoch": 4.4, "learning_rate": 3.991354466858789e-06, "loss": 0.0023, "step": 6110 }, { "epoch": 4.41, "learning_rate": 3.943323727185399e-06, "loss": 0.0019, "step": 6120 }, { "epoch": 4.41, "learning_rate": 3.8952929875120076e-06, "loss": 0.0001, "step": 6130 }, { "epoch": 4.42, "learning_rate": 3.847262247838617e-06, "loss": 0.0001, "step": 6140 }, { "epoch": 4.43, "learning_rate": 3.7992315081652264e-06, "loss": 0.0003, "step": 6150 }, { "epoch": 4.44, "learning_rate": 3.7512007684918348e-06, "loss": 0.003, "step": 6160 }, { "epoch": 4.44, "learning_rate": 3.703170028818444e-06, "loss": 0.0038, "step": 6170 }, { "epoch": 4.45, "learning_rate": 3.6551392891450528e-06, "loss": 0.0036, "step": 6180 }, { "epoch": 4.46, "learning_rate": 3.607108549471662e-06, "loss": 0.0004, "step": 6190 }, { "epoch": 4.47, "learning_rate": 3.559077809798271e-06, "loss": 0.0006, "step": 6200 }, { "epoch": 4.47, "learning_rate": 3.51104707012488e-06, "loss": 0.0003, "step": 6210 }, { "epoch": 4.48, "learning_rate": 3.463016330451489e-06, "loss": 0.0003, "step": 6220 }, { "epoch": 4.49, "learning_rate": 3.414985590778098e-06, "loss": 0.0003, "step": 6230 }, { "epoch": 4.49, "learning_rate": 3.366954851104707e-06, "loss": 0.0041, "step": 6240 }, { "epoch": 4.5, "learning_rate": 3.318924111431316e-06, "loss": 0.0003, "step": 6250 }, { "epoch": 4.51, "learning_rate": 3.270893371757925e-06, "loss": 0.0021, "step": 6260 }, { "epoch": 4.52, "learning_rate": 3.2228626320845344e-06, "loss": 0.0002, "step": 6270 }, { "epoch": 4.52, "learning_rate": 3.174831892411143e-06, "loss": 0.0002, "step": 6280 }, { "epoch": 4.53, "learning_rate": 3.1268011527377524e-06, "loss": 0.0015, "step": 6290 }, { "epoch": 4.54, "learning_rate": 3.078770413064361e-06, "loss": 0.0026, "step": 6300 }, { "epoch": 4.54, "learning_rate": 3.0307396733909704e-06, "loss": 0.0002, "step": 6310 }, { "epoch": 4.55, "learning_rate": 2.982708933717579e-06, "loss": 0.0009, "step": 6320 }, { "epoch": 4.56, "learning_rate": 2.9346781940441884e-06, "loss": 0.0002, "step": 6330 }, { "epoch": 4.57, "learning_rate": 2.8866474543707976e-06, "loss": 0.0007, "step": 6340 }, { "epoch": 4.57, "learning_rate": 2.8386167146974064e-06, "loss": 0.0036, "step": 6350 }, { "epoch": 4.58, "learning_rate": 2.7905859750240156e-06, "loss": 0.0017, "step": 6360 }, { "epoch": 4.59, "learning_rate": 2.7425552353506244e-06, "loss": 0.0003, "step": 6370 }, { "epoch": 4.59, "learning_rate": 2.6945244956772336e-06, "loss": 0.0001, "step": 6380 }, { "epoch": 4.6, "learning_rate": 2.6464937560038423e-06, "loss": 0.0002, "step": 6390 }, { "epoch": 4.61, "learning_rate": 2.5984630163304516e-06, "loss": 0.0005, "step": 6400 }, { "epoch": 4.62, "learning_rate": 2.5504322766570608e-06, "loss": 0.0006, "step": 6410 }, { "epoch": 4.62, "learning_rate": 2.5024015369836696e-06, "loss": 0.0004, "step": 6420 }, { "epoch": 4.63, "learning_rate": 2.4543707973102788e-06, "loss": 0.0005, "step": 6430 }, { "epoch": 4.64, "learning_rate": 2.4063400576368875e-06, "loss": 0.0003, "step": 6440 }, { "epoch": 4.65, "learning_rate": 2.3583093179634968e-06, "loss": 0.0069, "step": 6450 }, { "epoch": 4.65, "learning_rate": 2.3102785782901055e-06, "loss": 0.0021, "step": 6460 }, { "epoch": 4.66, "learning_rate": 2.2622478386167148e-06, "loss": 0.0004, "step": 6470 }, { "epoch": 4.67, "learning_rate": 2.2142170989433235e-06, "loss": 0.0003, "step": 6480 }, { "epoch": 4.67, "learning_rate": 2.166186359269933e-06, "loss": 0.0003, "step": 6490 }, { "epoch": 4.68, "learning_rate": 2.118155619596542e-06, "loss": 0.0025, "step": 6500 }, { "epoch": 4.69, "learning_rate": 2.0701248799231507e-06, "loss": 0.0012, "step": 6510 }, { "epoch": 4.7, "learning_rate": 2.02209414024976e-06, "loss": 0.0001, "step": 6520 }, { "epoch": 4.7, "learning_rate": 1.9740634005763687e-06, "loss": 0.0019, "step": 6530 }, { "epoch": 4.71, "learning_rate": 1.926032660902978e-06, "loss": 0.0038, "step": 6540 }, { "epoch": 4.72, "learning_rate": 1.8780019212295867e-06, "loss": 0.0002, "step": 6550 }, { "epoch": 4.72, "learning_rate": 1.829971181556196e-06, "loss": 0.0007, "step": 6560 }, { "epoch": 4.73, "learning_rate": 1.781940441882805e-06, "loss": 0.0004, "step": 6570 }, { "epoch": 4.74, "learning_rate": 1.7339097022094141e-06, "loss": 0.0007, "step": 6580 }, { "epoch": 4.75, "learning_rate": 1.6858789625360231e-06, "loss": 0.0031, "step": 6590 }, { "epoch": 4.75, "learning_rate": 1.6378482228626321e-06, "loss": 0.0007, "step": 6600 }, { "epoch": 4.76, "learning_rate": 1.5898174831892411e-06, "loss": 0.0003, "step": 6610 }, { "epoch": 4.77, "learning_rate": 1.5417867435158501e-06, "loss": 0.0005, "step": 6620 }, { "epoch": 4.77, "learning_rate": 1.4937560038424591e-06, "loss": 0.002, "step": 6630 }, { "epoch": 4.78, "learning_rate": 1.4457252641690681e-06, "loss": 0.0011, "step": 6640 }, { "epoch": 4.79, "learning_rate": 1.3976945244956773e-06, "loss": 0.0022, "step": 6650 }, { "epoch": 4.8, "learning_rate": 1.3496637848222863e-06, "loss": 0.0007, "step": 6660 }, { "epoch": 4.8, "learning_rate": 1.3016330451488953e-06, "loss": 0.0095, "step": 6670 }, { "epoch": 4.81, "learning_rate": 1.2536023054755043e-06, "loss": 0.0002, "step": 6680 }, { "epoch": 4.82, "learning_rate": 1.2055715658021135e-06, "loss": 0.0012, "step": 6690 }, { "epoch": 4.83, "learning_rate": 1.1575408261287223e-06, "loss": 0.0037, "step": 6700 }, { "epoch": 4.83, "learning_rate": 1.1095100864553313e-06, "loss": 0.0009, "step": 6710 }, { "epoch": 4.84, "learning_rate": 1.0614793467819403e-06, "loss": 0.0003, "step": 6720 }, { "epoch": 4.85, "learning_rate": 1.0134486071085495e-06, "loss": 0.0004, "step": 6730 }, { "epoch": 4.85, "learning_rate": 9.654178674351585e-07, "loss": 0.0048, "step": 6740 }, { "epoch": 4.86, "learning_rate": 9.173871277617676e-07, "loss": 0.0011, "step": 6750 }, { "epoch": 4.87, "learning_rate": 8.693563880883765e-07, "loss": 0.0014, "step": 6760 }, { "epoch": 4.88, "learning_rate": 8.213256484149856e-07, "loss": 0.0011, "step": 6770 }, { "epoch": 4.88, "learning_rate": 7.732949087415946e-07, "loss": 0.0014, "step": 6780 }, { "epoch": 4.89, "learning_rate": 7.252641690682037e-07, "loss": 0.0002, "step": 6790 }, { "epoch": 4.9, "learning_rate": 6.772334293948126e-07, "loss": 0.0005, "step": 6800 }, { "epoch": 4.9, "learning_rate": 6.292026897214217e-07, "loss": 0.0025, "step": 6810 }, { "epoch": 4.91, "learning_rate": 5.811719500480307e-07, "loss": 0.0002, "step": 6820 }, { "epoch": 4.92, "learning_rate": 5.331412103746398e-07, "loss": 0.002, "step": 6830 }, { "epoch": 4.93, "learning_rate": 4.851104707012487e-07, "loss": 0.0054, "step": 6840 }, { "epoch": 4.93, "learning_rate": 4.370797310278578e-07, "loss": 0.0013, "step": 6850 }, { "epoch": 4.94, "learning_rate": 3.890489913544669e-07, "loss": 0.0089, "step": 6860 }, { "epoch": 4.95, "learning_rate": 3.4101825168107593e-07, "loss": 0.0015, "step": 6870 }, { "epoch": 4.95, "learning_rate": 2.929875120076849e-07, "loss": 0.0032, "step": 6880 }, { "epoch": 4.96, "learning_rate": 2.44956772334294e-07, "loss": 0.0017, "step": 6890 }, { "epoch": 4.97, "learning_rate": 1.9692603266090297e-07, "loss": 0.0013, "step": 6900 }, { "epoch": 4.98, "learning_rate": 1.4889529298751202e-07, "loss": 0.0003, "step": 6910 }, { "epoch": 4.98, "learning_rate": 1.0086455331412103e-07, "loss": 0.0002, "step": 6920 }, { "epoch": 4.99, "learning_rate": 5.2833813640730064e-08, "loss": 0.0014, "step": 6930 }, { "epoch": 5.0, "learning_rate": 4.803073967339097e-09, "loss": 0.0002, "step": 6940 }, { "epoch": 5.0, "eval_accuracy": 0.9997974273270536, "eval_loss": 0.001698512933216989, "eval_runtime": 22.1369, "eval_samples_per_second": 891.996, "eval_steps_per_second": 27.917, "step": 6940 }, { "epoch": 5.0, "step": 6940, "total_flos": 1.4460593835514075e+19, "train_loss": 0.03131400679324697, "train_runtime": 2706.2965, "train_samples_per_second": 328.332, "train_steps_per_second": 2.564 } ], "max_steps": 6940, "num_train_epochs": 5, "total_flos": 1.4460593835514075e+19, "trial_name": null, "trial_params": null }