[ { "loss": 1.755609130859375, "grad_norm": 0.5997568368911743, "learning_rate": 5e-05, "epoch": 0.059790732436472344, "step": 10 }, { "loss": 1.6719018936157226, "grad_norm": 0.5945823192596436, "learning_rate": 5.960479567624579e-05, "epoch": 0.11958146487294469, "step": 20 }, { "loss": 1.7211877822875976, "grad_norm": 0.6304926872253418, "learning_rate": 5.825201988739524e-05, "epoch": 0.17937219730941703, "step": 30 }, { "loss": 1.646573257446289, "grad_norm": 0.6276059746742249, "learning_rate": 5.5980762113533166e-05, "epoch": 0.23916292974588937, "step": 40 }, { "loss": 1.675025749206543, "grad_norm": 0.6908546686172485, "learning_rate": 5.28648616538291e-05, "epoch": 0.29895366218236175, "step": 50 }, { "loss": 1.5153386116027832, "grad_norm": 0.7395423054695129, "learning_rate": 4.900561740481272e-05, "epoch": 0.35874439461883406, "step": 60 }, { "loss": 1.4698020935058593, "grad_norm": 0.7430984377861023, "learning_rate": 4.4528494601628586e-05, "epoch": 0.41853512705530643, "step": 70 }, { "loss": 1.561171054840088, "grad_norm": NaN, "learning_rate": 3.9579045904079395e-05, "epoch": 0.47832585949177875, "step": 80 }, { "loss": 1.4652462005615234, "grad_norm": 0.786108672618866, "learning_rate": 3.485345989658295e-05, "epoch": 0.5381165919282511, "step": 90 }, { "loss": 1.4830116271972655, "grad_norm": 0.847040057182312, "learning_rate": 2.9458375522483184e-05, "epoch": 0.5979073243647235, "step": 100 }, { "loss": 1.4077320098876953, "grad_norm": 0.8058563470840454, "learning_rate": 2.4080899529208004e-05, "epoch": 0.6576980568011959, "step": 110 }, { "loss": 1.4563824653625488, "grad_norm": 0.832401692867279, "learning_rate": 1.8895855339802568e-05, "epoch": 0.7174887892376681, "step": 120 }, { "loss": 1.3895584106445313, "grad_norm": 0.836421012878418, "learning_rate": 1.4071810359303403e-05, "epoch": 0.7772795216741405, "step": 130 }, { "loss": 1.4442138671875, "grad_norm": 0.8808762431144714, "learning_rate": 9.765595795784586e-06, "epoch": 0.8370702541106129, "step": 140 }, { "loss": 1.4237044334411622, "grad_norm": 0.9126316905021667, "learning_rate": 6.117208028830687e-06, "epoch": 0.8968609865470852, "step": 150 }, { "loss": 1.466029167175293, "grad_norm": 0.8786431550979614, "learning_rate": 3.245257283538533e-06, "epoch": 0.9566517189835575, "step": 160 }, { "loss": 1.4303228378295898, "grad_norm": 0.9314239621162415, "learning_rate": 1.2431115750767608e-06, "epoch": 1.0119581464872944, "step": 170 }, { "loss": 1.4143404006958007, "grad_norm": 0.8618354201316833, "learning_rate": 1.758612853692121e-07, "epoch": 1.0717488789237668, "step": 180 }, { "train_runtime": 16504.5683, "train_samples_per_second": 0.698, "train_steps_per_second": 0.011, "total_flos": 7.269800170720051e+16, "train_loss": 1.5220639440748427, "epoch": 1.0717488789237668, "step": 180 } ]