{"step": 2000, "epoch": 2, "train_loss": 4.106095605669401, "val_loss": 3.5962798564910887, "tok_f1": 0.1444776942012236, "mean_words": 4.9355, "lr": 0.0003, "wall_time": 1779222902.593431} {"step": 4000, "epoch": 3, "train_loss": 3.413264048563655, "val_loss": 3.324702338409424, "tok_f1": 0.2016711557233616, "mean_words": 4.7035, "lr": 0.0003, "wall_time": 1779223948.1597261} {"step": 6000, "epoch": 4, "train_loss": 3.2278357425950444, "val_loss": 3.215394763946533, "tok_f1": 0.22583124481727423, "mean_words": 4.8295, "lr": 0.0003, "wall_time": 1779224990.457532} {"step": 8000, "epoch": 5, "train_loss": 3.1305528082016782, "val_loss": 3.154838008880615, "tok_f1": 0.24623787544155193, "mean_words": 4.788, "lr": 0.0003, "wall_time": 1779226040.401222} {"step": 10000, "epoch": 6, "train_loss": 3.0648372187956725, "val_loss": 3.1069913497924806, "tok_f1": 0.24793856168341463, "mean_words": 4.969, "lr": 0.0003, "wall_time": 1779227077.782281} {"step": 12000, "epoch": 7, "train_loss": 3.0171820744484097, "val_loss": 3.0755839088439942, "tok_f1": 0.2596720575176457, "mean_words": 4.941, "lr": 0.0003, "wall_time": 1779228114.887825} {"step": 14000, "epoch": 8, "train_loss": 2.9800491321919766, "val_loss": 3.052339796447754, "tok_f1": 0.2559486954222248, "mean_words": 5.0645, "lr": 0.0003, "wall_time": 1779229152.6426811} {"step": 16000, "epoch": 9, "train_loss": 2.9487837862643955, "val_loss": 3.0353144744873046, "tok_f1": 0.26408666970284617, "mean_words": 4.755, "lr": 0.0003, "wall_time": 1779230163.828542} {"step": 18000, "epoch": 11, "train_loss": 2.918691721206019, "val_loss": 3.02962755279541, "tok_f1": 0.2688920691236868, "mean_words": 5.12, "lr": 0.0003, "wall_time": 1779231166.284846} {"step": 20000, "epoch": 12, "train_loss": 2.896327673036307, "val_loss": 3.018091846084595, "tok_f1": 0.2724128310415075, "mean_words": 4.878, "lr": 0.0003, "wall_time": 1779232199.0462751} {"step": 22000, "epoch": 13, "train_loss": 2.87821229420086, "val_loss": 3.0077461536407473, "tok_f1": 0.27315177722604195, "mean_words": 5.1035, "lr": 0.0003, "wall_time": 1779233254.125132} {"step": 24000, "epoch": 14, "train_loss": 2.8617876689077253, "val_loss": 2.998493883895874, "tok_f1": 0.2770256465756466, "mean_words": 4.8905, "lr": 0.0003, "wall_time": 1779234303.187704} {"step": 26000, "epoch": 15, "train_loss": 2.846088374496488, "val_loss": 2.9906312114715576, "tok_f1": 0.27703381985661396, "mean_words": 4.896, "lr": 0.0003, "wall_time": 1779235355.874115} {"step": 28000, "epoch": 16, "train_loss": 2.8328490578439105, "val_loss": 2.983960963058472, "tok_f1": 0.2795972222222222, "mean_words": 4.9435, "lr": 0.0003, "wall_time": 1779236406.483165} {"step": 30000, "epoch": 17, "train_loss": 2.820020103981039, "val_loss": 2.97227031211853, "tok_f1": 0.28214252634620285, "mean_words": 5.0595, "lr": 0.0003, "wall_time": 1779237476.096491} {"step": 32000, "epoch": 18, "train_loss": 2.8092726084687767, "val_loss": 2.968260679626465, "tok_f1": 0.28473659257409256, "mean_words": 4.924, "lr": 0.0003, "wall_time": 1779238523.0281012} {"step": 34000, "epoch": 20, "train_loss": 2.79349008795453, "val_loss": 2.977187242126465, "tok_f1": 0.2865114801864802, "mean_words": 4.9075, "lr": 0.0003, "wall_time": 1779239577.0179908} {"step": 36000, "epoch": 21, "train_loss": 2.783505980300933, "val_loss": 2.9694487785339354, "tok_f1": 0.288755238062591, "mean_words": 4.858, "lr": 0.0003, "wall_time": 1779240639.721827} {"step": 38000, "epoch": 22, "train_loss": 2.774734211295068, "val_loss": 2.965319557952881, "tok_f1": 0.2830145099181864, "mean_words": 4.9315, "lr": 0.0003, "wall_time": 1779241690.4812958} {"step": 40000, "epoch": 23, "train_loss": 2.7663396469081585, "val_loss": 2.960056104660034, "tok_f1": 0.29040886058386056, "mean_words": 4.988, "lr": 0.0003, "wall_time": 1779242737.81421} {"step": 42000, "epoch": 24, "train_loss": 2.75957179015756, "val_loss": 2.957438604736328, "tok_f1": 0.2905343975468975, "mean_words": 4.9165, "lr": 0.0003, "wall_time": 1779243786.238262} {"step": 44000, "epoch": 25, "train_loss": 2.7523164791037815, "val_loss": 2.9523234798431397, "tok_f1": 0.29058897613824086, "mean_words": 4.9375, "lr": 0.0003, "wall_time": 1779244830.177305} {"step": 46000, "epoch": 26, "train_loss": 2.7447811277795235, "val_loss": 2.9494457813262938, "tok_f1": 0.28798811188811185, "mean_words": 5.0245, "lr": 0.0003, "wall_time": 1779245868.350689} {"step": 48000, "epoch": 27, "train_loss": 2.7385771292894536, "val_loss": 2.946452843475342, "tok_f1": 0.28848719752469754, "mean_words": 4.876, "lr": 0.0003, "wall_time": 1779246903.871413} {"step": 50000, "epoch": 29, "train_loss": 2.728870005215236, "val_loss": 2.957064482879639, "tok_f1": 0.290686912515589, "mean_words": 4.911, "lr": 0.0003, "wall_time": 1779247946.015985} {"step": 52000, "epoch": 30, "train_loss": 2.7219258368258132, "val_loss": 2.9526238201141357, "tok_f1": 0.2944186653216065, "mean_words": 4.7625, "lr": 0.0003, "wall_time": 1779248976.7653491} {"step": 54000, "epoch": 31, "train_loss": 2.7171959208950165, "val_loss": 2.9489395374298097, "tok_f1": 0.28971268453768456, "mean_words": 4.812, "lr": 0.0003, "wall_time": 1779250006.8798962} {"step": 56000, "epoch": 32, "train_loss": 2.711857982278668, "val_loss": 2.949110791015625, "tok_f1": 0.29125145589704415, "mean_words": 4.9335, "lr": 0.0003, "wall_time": 1779251042.63035} {"step": 58000, "epoch": 33, "train_loss": 2.7074541541301547, "val_loss": 2.9462409435272217, "tok_f1": 0.2962148821766469, "mean_words": 4.908, "lr": 0.0003, "wall_time": 1779252071.914974} {"step": 60000, "epoch": 34, "train_loss": 2.70361461964871, "val_loss": 2.944313480758667, "tok_f1": 0.29103940960999786, "mean_words": 4.9475, "lr": 0.0003, "wall_time": 1779253094.764807} {"step": 62000, "epoch": 35, "train_loss": 2.698599462362122, "val_loss": 2.942076708984375, "tok_f1": 0.29306238744915214, "mean_words": 4.841, "lr": 0.0003, "wall_time": 1779254107.271397} {"step": 64000, "epoch": 36, "train_loss": 2.6947960017598676, "val_loss": 2.937381767654419, "tok_f1": 0.295903315556992, "mean_words": 4.934, "lr": 0.0003, "wall_time": 1779255123.1438122} {"step": 66000, "epoch": 38, "train_loss": 2.687774037942866, "val_loss": 2.948435255050659, "tok_f1": 0.2897239565989566, "mean_words": 4.964, "lr": 0.0003, "wall_time": 1779256134.4341109} {"step": 68000, "epoch": 39, "train_loss": 2.6818021759542097, "val_loss": 2.9472034103393554, "tok_f1": 0.2949354034854035, "mean_words": 4.946, "lr": 0.0003, "wall_time": 1779257196.270357} {"step": 70000, "epoch": 40, "train_loss": 2.678613240182306, "val_loss": 2.9431504138946534, "tok_f1": 0.29160234944793767, "mean_words": 5.032, "lr": 0.0003, "wall_time": 1779258283.2707899}